aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_file.c2
-rw-r--r--fs/adfs/super.c2
-rw-r--r--fs/affs/affs.h20
-rw-r--r--fs/affs/amigaffs.c23
-rw-r--r--fs/affs/dir.c28
-rw-r--r--fs/affs/namei.c32
-rw-r--r--fs/affs/super.c8
-rw-r--r--fs/autofs4/dev-ioctl.c3
-rw-r--r--fs/bfs/inode.c2
-rw-r--r--fs/binfmt_elf.c4
-rw-r--r--fs/bio-integrity.c22
-rw-r--r--fs/btrfs/file.c1
-rw-r--r--fs/ceph/cache.c1
-rw-r--r--fs/ceph/cache.h10
-rw-r--r--fs/ceph/caps.c9
-rw-r--r--fs/ceph/debugfs.c5
-rw-r--r--fs/ceph/dir.c53
-rw-r--r--fs/ceph/export.c267
-rw-r--r--fs/ceph/file.c8
-rw-r--r--fs/ceph/inode.c76
-rw-r--r--fs/ceph/ioctl.c5
-rw-r--r--fs/ceph/locks.c98
-rw-r--r--fs/ceph/mds_client.c97
-rw-r--r--fs/ceph/mds_client.h4
-rw-r--r--fs/ceph/strings.c1
-rw-r--r--fs/ceph/super.c1
-rw-r--r--fs/ceph/super.h3
-rw-r--r--fs/ceph/xattr.c48
-rw-r--r--fs/cifs/file.c1
-rw-r--r--fs/dcache.c1
-rw-r--r--fs/exec.c26
-rw-r--r--fs/ext2/acl.c1
-rw-r--r--fs/ext2/ialloc.c2
-rw-r--r--fs/ext2/super.c2
-rw-r--r--fs/ext2/xattr_security.c4
-rw-r--r--fs/ext3/balloc.c5
-rw-r--r--fs/ext3/dir.c2
-rw-r--r--fs/ext3/ialloc.c2
-rw-r--r--fs/ext3/inode.c86
-rw-r--r--fs/ext3/super.c2
-rw-r--r--fs/ext3/xattr_security.c5
-rw-r--r--fs/ext4/file.c1
-rw-r--r--fs/f2fs/acl.c8
-rw-r--r--fs/f2fs/checkpoint.c208
-rw-r--r--fs/f2fs/data.c106
-rw-r--r--fs/f2fs/debug.c12
-rw-r--r--fs/f2fs/dir.c85
-rw-r--r--fs/f2fs/f2fs.h105
-rw-r--r--fs/f2fs/file.c32
-rw-r--r--fs/f2fs/gc.c16
-rw-r--r--fs/f2fs/inline.c4
-rw-r--r--fs/f2fs/inode.c27
-rw-r--r--fs/f2fs/namei.c9
-rw-r--r--fs/f2fs/node.c334
-rw-r--r--fs/f2fs/node.h25
-rw-r--r--fs/f2fs/recovery.c37
-rw-r--r--fs/f2fs/segment.c222
-rw-r--r--fs/f2fs/segment.h75
-rw-r--r--fs/f2fs/super.c97
-rw-r--r--fs/f2fs/xattr.c7
-rw-r--r--fs/fuse/cuse.c4
-rw-r--r--fs/fuse/file.c1
-rw-r--r--fs/gfs2/file.c1
-rw-r--r--fs/isofs/inode.c2
-rw-r--r--fs/jffs2/compr_rtime.c4
-rw-r--r--fs/jffs2/fs.c9
-rw-r--r--fs/jffs2/nodelist.h2
-rw-r--r--fs/jffs2/nodemgmt.c14
-rw-r--r--fs/lockd/svc.c1
-rw-r--r--fs/ncpfs/dir.c69
-rw-r--r--fs/ncpfs/file.c24
-rw-r--r--fs/ncpfs/getopt.c12
-rw-r--r--fs/ncpfs/inode.c32
-rw-r--r--fs/ncpfs/ioctl.c17
-rw-r--r--fs/ncpfs/mmap.c2
-rw-r--r--fs/ncpfs/ncp_fs.h30
-rw-r--r--fs/ncpfs/ncplib_kernel.c28
-rw-r--r--fs/ncpfs/sock.c49
-rw-r--r--fs/ncpfs/symlink.c2
-rw-r--r--fs/nfs/callback_proc.c19
-rw-r--r--fs/nfs/dir.c62
-rw-r--r--fs/nfs/file.c1
-rw-r--r--fs/nfs/inode.c34
-rw-r--r--fs/nfs/internal.h8
-rw-r--r--fs/nfs/nfs3proc.c36
-rw-r--r--fs/nfs/nfs4_fs.h11
-rw-r--r--fs/nfs/nfs4client.c7
-rw-r--r--fs/nfs/nfs4proc.c197
-rw-r--r--fs/nfs/nfs4state.c6
-rw-r--r--fs/nfs/nfs4xdr.c3
-rw-r--r--fs/nfs/pnfs.c17
-rw-r--r--fs/nfs/proc.c25
-rw-r--r--fs/nfs/unlink.c35
-rw-r--r--fs/nfsd/acl.h10
-rw-r--r--fs/nfsd/nfs4acl.c13
-rw-r--r--fs/nfsd/nfs4callback.c19
-rw-r--r--fs/nfsd/nfs4proc.c39
-rw-r--r--fs/nfsd/nfs4state.c28
-rw-r--r--fs/nfsd/nfs4xdr.c30
-rw-r--r--fs/nfsd/nfsctl.c5
-rw-r--r--fs/nfsd/nfsd.h2
-rw-r--r--fs/nfsd/nfsfh.h14
-rw-r--r--fs/nfsd/nfsxdr.c2
-rw-r--r--fs/nfsd/vfs.c15
-rw-r--r--fs/nfsd/xdr4.h2
-rw-r--r--fs/nilfs2/file.c1
-rw-r--r--fs/ntfs/debug.c58
-rw-r--r--fs/ntfs/debug.h7
-rw-r--r--fs/ntfs/super.c28
-rw-r--r--fs/ocfs2/cluster/sys.c2
-rw-r--r--fs/ocfs2/stackglue.c8
-rw-r--r--fs/proc/array.c4
-rw-r--r--fs/proc/base.c19
-rw-r--r--fs/proc/fd.c6
-rw-r--r--fs/proc/inode.c2
-rw-r--r--fs/proc/meminfo.c2
-rw-r--r--fs/proc/task_mmu.c3
-rw-r--r--fs/proc/vmcore.c3
-rw-r--r--fs/quota/Kconfig7
-rw-r--r--fs/reiserfs/dir.c6
-rw-r--r--fs/ubifs/file.c1
-rw-r--r--fs/udf/super.c8
-rw-r--r--fs/ufs/balloc.c12
-rw-r--r--fs/ufs/ialloc.c4
-rw-r--r--fs/ufs/super.c8
-rw-r--r--fs/xfs/xfs_file.c1
126 files changed, 2089 insertions, 1426 deletions
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index a16b0ff497ca..d8223209d4b1 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -832,6 +832,7 @@ static void v9fs_mmap_vm_close(struct vm_area_struct *vma)
832 832
833static const struct vm_operations_struct v9fs_file_vm_ops = { 833static const struct vm_operations_struct v9fs_file_vm_ops = {
834 .fault = filemap_fault, 834 .fault = filemap_fault,
835 .map_pages = filemap_map_pages,
835 .page_mkwrite = v9fs_vm_page_mkwrite, 836 .page_mkwrite = v9fs_vm_page_mkwrite,
836 .remap_pages = generic_file_remap_pages, 837 .remap_pages = generic_file_remap_pages,
837}; 838};
@@ -839,6 +840,7 @@ static const struct vm_operations_struct v9fs_file_vm_ops = {
839static const struct vm_operations_struct v9fs_mmap_file_vm_ops = { 840static const struct vm_operations_struct v9fs_mmap_file_vm_ops = {
840 .close = v9fs_mmap_vm_close, 841 .close = v9fs_mmap_vm_close,
841 .fault = filemap_fault, 842 .fault = filemap_fault,
843 .map_pages = filemap_map_pages,
842 .page_mkwrite = v9fs_vm_page_mkwrite, 844 .page_mkwrite = v9fs_vm_page_mkwrite,
843 .remap_pages = generic_file_remap_pages, 845 .remap_pages = generic_file_remap_pages,
844}; 846};
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 952aeb048349..9852bdf34d76 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -266,7 +266,7 @@ static void init_once(void *foo)
266 inode_init_once(&ei->vfs_inode); 266 inode_init_once(&ei->vfs_inode);
267} 267}
268 268
269static int init_inodecache(void) 269static int __init init_inodecache(void)
270{ 270{
271 adfs_inode_cachep = kmem_cache_create("adfs_inode_cache", 271 adfs_inode_cachep = kmem_cache_create("adfs_inode_cache",
272 sizeof(struct adfs_inode_info), 272 sizeof(struct adfs_inode_info),
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index 3952121f2f28..25b23b1e7f22 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -5,14 +5,6 @@
5#include <linux/mutex.h> 5#include <linux/mutex.h>
6#include <linux/workqueue.h> 6#include <linux/workqueue.h>
7 7
8/* AmigaOS allows file names with up to 30 characters length.
9 * Names longer than that will be silently truncated. If you
10 * want to disallow this, comment out the following #define.
11 * Creating filesystem objects with longer names will then
12 * result in an error (ENAMETOOLONG).
13 */
14/*#define AFFS_NO_TRUNCATE */
15
16/* Ugly macros make the code more pretty. */ 8/* Ugly macros make the code more pretty. */
17 9
18#define GET_END_PTR(st,p,sz) ((st *)((char *)(p)+((sz)-sizeof(st)))) 10#define GET_END_PTR(st,p,sz) ((st *)((char *)(p)+((sz)-sizeof(st))))
@@ -28,7 +20,6 @@
28 20
29#define AFFS_CACHE_SIZE PAGE_SIZE 21#define AFFS_CACHE_SIZE PAGE_SIZE
30 22
31#define AFFS_MAX_PREALLOC 32
32#define AFFS_LC_SIZE (AFFS_CACHE_SIZE/sizeof(u32)/2) 23#define AFFS_LC_SIZE (AFFS_CACHE_SIZE/sizeof(u32)/2)
33#define AFFS_AC_SIZE (AFFS_CACHE_SIZE/sizeof(struct affs_ext_key)/2) 24#define AFFS_AC_SIZE (AFFS_CACHE_SIZE/sizeof(struct affs_ext_key)/2)
34#define AFFS_AC_MASK (AFFS_AC_SIZE-1) 25#define AFFS_AC_MASK (AFFS_AC_SIZE-1)
@@ -118,6 +109,7 @@ struct affs_sb_info {
118#define SF_OFS 0x0200 /* Old filesystem */ 109#define SF_OFS 0x0200 /* Old filesystem */
119#define SF_PREFIX 0x0400 /* Buffer for prefix is allocated */ 110#define SF_PREFIX 0x0400 /* Buffer for prefix is allocated */
120#define SF_VERBOSE 0x0800 /* Talk about fs when mounting */ 111#define SF_VERBOSE 0x0800 /* Talk about fs when mounting */
112#define SF_NO_TRUNCATE 0x1000 /* Don't truncate filenames */
121 113
122/* short cut to get to the affs specific sb data */ 114/* short cut to get to the affs specific sb data */
123static inline struct affs_sb_info *AFFS_SB(struct super_block *sb) 115static inline struct affs_sb_info *AFFS_SB(struct super_block *sb)
@@ -137,9 +129,13 @@ extern void affs_fix_checksum(struct super_block *sb, struct buffer_head *bh);
137extern void secs_to_datestamp(time_t secs, struct affs_date *ds); 129extern void secs_to_datestamp(time_t secs, struct affs_date *ds);
138extern umode_t prot_to_mode(u32 prot); 130extern umode_t prot_to_mode(u32 prot);
139extern void mode_to_prot(struct inode *inode); 131extern void mode_to_prot(struct inode *inode);
140extern void affs_error(struct super_block *sb, const char *function, const char *fmt, ...); 132extern void affs_error(struct super_block *sb, const char *function,
141extern void affs_warning(struct super_block *sb, const char *function, const char *fmt, ...); 133 const char *fmt, ...);
142extern int affs_check_name(const unsigned char *name, int len); 134extern void affs_warning(struct super_block *sb, const char *function,
135 const char *fmt, ...);
136extern bool affs_nofilenametruncate(const struct dentry *dentry);
137extern int affs_check_name(const unsigned char *name, int len,
138 bool notruncate);
143extern int affs_copy_name(unsigned char *bstr, struct dentry *dentry); 139extern int affs_copy_name(unsigned char *bstr, struct dentry *dentry);
144 140
145/* bitmap. c */ 141/* bitmap. c */
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index d9a43674cb94..533a322c41c0 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -471,20 +471,27 @@ affs_warning(struct super_block *sb, const char *function, const char *fmt, ...)
471 function,ErrorBuffer); 471 function,ErrorBuffer);
472} 472}
473 473
474bool
475affs_nofilenametruncate(const struct dentry *dentry)
476{
477 struct inode *inode = dentry->d_inode;
478 return AFFS_SB(inode->i_sb)->s_flags & SF_NO_TRUNCATE;
479
480}
481
474/* Check if the name is valid for a affs object. */ 482/* Check if the name is valid for a affs object. */
475 483
476int 484int
477affs_check_name(const unsigned char *name, int len) 485affs_check_name(const unsigned char *name, int len, bool notruncate)
478{ 486{
479 int i; 487 int i;
480 488
481 if (len > 30) 489 if (len > 30) {
482#ifdef AFFS_NO_TRUNCATE 490 if (notruncate)
483 return -ENAMETOOLONG; 491 return -ENAMETOOLONG;
484#else 492 else
485 len = 30; 493 len = 30;
486#endif 494 }
487
488 for (i = 0; i < len; i++) { 495 for (i = 0; i < len; i++) {
489 if (name[i] < ' ' || name[i] == ':' 496 if (name[i] < ' ' || name[i] == ':'
490 || (name[i] > 0x7e && name[i] < 0xa0)) 497 || (name[i] > 0x7e && name[i] < 0xa0))
diff --git a/fs/affs/dir.c b/fs/affs/dir.c
index f1eba8c3644e..cbbda476a805 100644
--- a/fs/affs/dir.c
+++ b/fs/affs/dir.c
@@ -52,8 +52,10 @@ affs_readdir(struct file *file, struct dir_context *ctx)
52 int hash_pos; 52 int hash_pos;
53 int chain_pos; 53 int chain_pos;
54 u32 ino; 54 u32 ino;
55 int error = 0;
55 56
56 pr_debug("AFFS: readdir(ino=%lu,f_pos=%lx)\n",inode->i_ino,(unsigned long)ctx->pos); 57 pr_debug("AFFS: readdir(ino=%lu,f_pos=%lx)\n",
58 inode->i_ino, (unsigned long)ctx->pos);
57 59
58 if (ctx->pos < 2) { 60 if (ctx->pos < 2) {
59 file->private_data = (void *)0; 61 file->private_data = (void *)0;
@@ -72,7 +74,7 @@ affs_readdir(struct file *file, struct dir_context *ctx)
72 } 74 }
73 dir_bh = affs_bread(sb, inode->i_ino); 75 dir_bh = affs_bread(sb, inode->i_ino);
74 if (!dir_bh) 76 if (!dir_bh)
75 goto readdir_out; 77 goto out_unlock_dir;
76 78
77 /* If the directory hasn't changed since the last call to readdir(), 79 /* If the directory hasn't changed since the last call to readdir(),
78 * we can jump directly to where we left off. 80 * we can jump directly to where we left off.
@@ -88,7 +90,8 @@ affs_readdir(struct file *file, struct dir_context *ctx)
88 fh_bh = affs_bread(sb, ino); 90 fh_bh = affs_bread(sb, ino);
89 if (!fh_bh) { 91 if (!fh_bh) {
90 affs_error(sb, "readdir","Cannot read block %d", i); 92 affs_error(sb, "readdir","Cannot read block %d", i);
91 return -EIO; 93 error = -EIO;
94 goto out_brelse_dir;
92 } 95 }
93 ino = be32_to_cpu(AFFS_TAIL(sb, fh_bh)->hash_chain); 96 ino = be32_to_cpu(AFFS_TAIL(sb, fh_bh)->hash_chain);
94 affs_brelse(fh_bh); 97 affs_brelse(fh_bh);
@@ -107,29 +110,34 @@ inside:
107 do { 110 do {
108 fh_bh = affs_bread(sb, ino); 111 fh_bh = affs_bread(sb, ino);
109 if (!fh_bh) { 112 if (!fh_bh) {
110 affs_error(sb, "readdir","Cannot read block %d", ino); 113 affs_error(sb, "readdir",
114 "Cannot read block %d", ino);
111 break; 115 break;
112 } 116 }
113 117
114 namelen = min(AFFS_TAIL(sb, fh_bh)->name[0], (u8)30); 118 namelen = min(AFFS_TAIL(sb, fh_bh)->name[0], (u8)30);
115 name = AFFS_TAIL(sb, fh_bh)->name + 1; 119 name = AFFS_TAIL(sb, fh_bh)->name + 1;
116 pr_debug("AFFS: readdir(): filldir(\"%.*s\", ino=%u), hash=%d, f_pos=%x\n", 120 pr_debug("AFFS: readdir(): dir_emit(\"%.*s\", "
121 "ino=%u), hash=%d, f_pos=%x\n",
117 namelen, name, ino, hash_pos, (u32)ctx->pos); 122 namelen, name, ino, hash_pos, (u32)ctx->pos);
123
118 if (!dir_emit(ctx, name, namelen, ino, DT_UNKNOWN)) 124 if (!dir_emit(ctx, name, namelen, ino, DT_UNKNOWN))
119 goto readdir_done; 125 goto done;
120 ctx->pos++; 126 ctx->pos++;
121 ino = be32_to_cpu(AFFS_TAIL(sb, fh_bh)->hash_chain); 127 ino = be32_to_cpu(AFFS_TAIL(sb, fh_bh)->hash_chain);
122 affs_brelse(fh_bh); 128 affs_brelse(fh_bh);
123 fh_bh = NULL; 129 fh_bh = NULL;
124 } while (ino); 130 } while (ino);
125 } 131 }
126readdir_done: 132done:
127 file->f_version = inode->i_version; 133 file->f_version = inode->i_version;
128 file->private_data = (void *)(long)ino; 134 file->private_data = (void *)(long)ino;
135 affs_brelse(fh_bh);
129 136
130readdir_out: 137out_brelse_dir:
131 affs_brelse(dir_bh); 138 affs_brelse(dir_bh);
132 affs_brelse(fh_bh); 139
140out_unlock_dir:
133 affs_unlock_dir(inode); 141 affs_unlock_dir(inode);
134 return 0; 142 return error;
135} 143}
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index c36cbb4537a2..6dae1ccd176d 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -60,13 +60,13 @@ affs_get_toupper(struct super_block *sb)
60 * Note: the dentry argument is the parent dentry. 60 * Note: the dentry argument is the parent dentry.
61 */ 61 */
62static inline int 62static inline int
63__affs_hash_dentry(struct qstr *qstr, toupper_t toupper) 63__affs_hash_dentry(struct qstr *qstr, toupper_t toupper, bool notruncate)
64{ 64{
65 const u8 *name = qstr->name; 65 const u8 *name = qstr->name;
66 unsigned long hash; 66 unsigned long hash;
67 int i; 67 int i;
68 68
69 i = affs_check_name(qstr->name, qstr->len); 69 i = affs_check_name(qstr->name, qstr->len, notruncate);
70 if (i) 70 if (i)
71 return i; 71 return i;
72 72
@@ -82,16 +82,22 @@ __affs_hash_dentry(struct qstr *qstr, toupper_t toupper)
82static int 82static int
83affs_hash_dentry(const struct dentry *dentry, struct qstr *qstr) 83affs_hash_dentry(const struct dentry *dentry, struct qstr *qstr)
84{ 84{
85 return __affs_hash_dentry(qstr, affs_toupper); 85 return __affs_hash_dentry(qstr, affs_toupper,
86 affs_nofilenametruncate(dentry));
87
86} 88}
89
87static int 90static int
88affs_intl_hash_dentry(const struct dentry *dentry, struct qstr *qstr) 91affs_intl_hash_dentry(const struct dentry *dentry, struct qstr *qstr)
89{ 92{
90 return __affs_hash_dentry(qstr, affs_intl_toupper); 93 return __affs_hash_dentry(qstr, affs_intl_toupper,
94 affs_nofilenametruncate(dentry));
95
91} 96}
92 97
93static inline int __affs_compare_dentry(unsigned int len, 98static inline int __affs_compare_dentry(unsigned int len,
94 const char *str, const struct qstr *name, toupper_t toupper) 99 const char *str, const struct qstr *name, toupper_t toupper,
100 bool notruncate)
95{ 101{
96 const u8 *aname = str; 102 const u8 *aname = str;
97 const u8 *bname = name->name; 103 const u8 *bname = name->name;
@@ -101,7 +107,7 @@ static inline int __affs_compare_dentry(unsigned int len,
101 * must be valid. 'name' must be validated first. 107 * must be valid. 'name' must be validated first.
102 */ 108 */
103 109
104 if (affs_check_name(name->name, name->len)) 110 if (affs_check_name(name->name, name->len, notruncate))
105 return 1; 111 return 1;
106 112
107 /* 113 /*
@@ -126,13 +132,18 @@ static int
126affs_compare_dentry(const struct dentry *parent, const struct dentry *dentry, 132affs_compare_dentry(const struct dentry *parent, const struct dentry *dentry,
127 unsigned int len, const char *str, const struct qstr *name) 133 unsigned int len, const char *str, const struct qstr *name)
128{ 134{
129 return __affs_compare_dentry(len, str, name, affs_toupper); 135
136 return __affs_compare_dentry(len, str, name, affs_toupper,
137 affs_nofilenametruncate(parent));
130} 138}
139
131static int 140static int
132affs_intl_compare_dentry(const struct dentry *parent, const struct dentry *dentry, 141affs_intl_compare_dentry(const struct dentry *parent, const struct dentry *dentry,
133 unsigned int len, const char *str, const struct qstr *name) 142 unsigned int len, const char *str, const struct qstr *name)
134{ 143{
135 return __affs_compare_dentry(len, str, name, affs_intl_toupper); 144 return __affs_compare_dentry(len, str, name, affs_intl_toupper,
145 affs_nofilenametruncate(parent));
146
136} 147}
137 148
138/* 149/*
@@ -411,7 +422,10 @@ affs_rename(struct inode *old_dir, struct dentry *old_dentry,
411 (u32)old_dir->i_ino, (int)old_dentry->d_name.len, old_dentry->d_name.name, 422 (u32)old_dir->i_ino, (int)old_dentry->d_name.len, old_dentry->d_name.name,
412 (u32)new_dir->i_ino, (int)new_dentry->d_name.len, new_dentry->d_name.name); 423 (u32)new_dir->i_ino, (int)new_dentry->d_name.len, new_dentry->d_name.name);
413 424
414 retval = affs_check_name(new_dentry->d_name.name,new_dentry->d_name.len); 425 retval = affs_check_name(new_dentry->d_name.name,
426 new_dentry->d_name.len,
427 affs_nofilenametruncate(old_dentry));
428
415 if (retval) 429 if (retval)
416 return retval; 430 return retval;
417 431
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 307453086c3f..6d589f28bf9b 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -128,7 +128,7 @@ static void init_once(void *foo)
128 inode_init_once(&ei->vfs_inode); 128 inode_init_once(&ei->vfs_inode);
129} 129}
130 130
131static int init_inodecache(void) 131static int __init init_inodecache(void)
132{ 132{
133 affs_inode_cachep = kmem_cache_create("affs_inode_cache", 133 affs_inode_cachep = kmem_cache_create("affs_inode_cache",
134 sizeof(struct affs_inode_info), 134 sizeof(struct affs_inode_info),
@@ -163,7 +163,7 @@ static const struct super_operations affs_sops = {
163}; 163};
164 164
165enum { 165enum {
166 Opt_bs, Opt_mode, Opt_mufs, Opt_prefix, Opt_protect, 166 Opt_bs, Opt_mode, Opt_mufs, Opt_notruncate, Opt_prefix, Opt_protect,
167 Opt_reserved, Opt_root, Opt_setgid, Opt_setuid, 167 Opt_reserved, Opt_root, Opt_setgid, Opt_setuid,
168 Opt_verbose, Opt_volume, Opt_ignore, Opt_err, 168 Opt_verbose, Opt_volume, Opt_ignore, Opt_err,
169}; 169};
@@ -172,6 +172,7 @@ static const match_table_t tokens = {
172 {Opt_bs, "bs=%u"}, 172 {Opt_bs, "bs=%u"},
173 {Opt_mode, "mode=%o"}, 173 {Opt_mode, "mode=%o"},
174 {Opt_mufs, "mufs"}, 174 {Opt_mufs, "mufs"},
175 {Opt_notruncate, "nofilenametruncate"},
175 {Opt_prefix, "prefix=%s"}, 176 {Opt_prefix, "prefix=%s"},
176 {Opt_protect, "protect"}, 177 {Opt_protect, "protect"},
177 {Opt_reserved, "reserved=%u"}, 178 {Opt_reserved, "reserved=%u"},
@@ -233,6 +234,9 @@ parse_options(char *options, kuid_t *uid, kgid_t *gid, int *mode, int *reserved,
233 case Opt_mufs: 234 case Opt_mufs:
234 *mount_opts |= SF_MUFS; 235 *mount_opts |= SF_MUFS;
235 break; 236 break;
237 case Opt_notruncate:
238 *mount_opts |= SF_NO_TRUNCATE;
239 break;
236 case Opt_prefix: 240 case Opt_prefix:
237 *prefix = match_strdup(&args[0]); 241 *prefix = match_strdup(&args[0]);
238 if (!*prefix) 242 if (!*prefix)
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 3182c0e68b42..232e03d4780d 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -103,6 +103,9 @@ static struct autofs_dev_ioctl *copy_dev_ioctl(struct autofs_dev_ioctl __user *i
103 if (tmp.size < sizeof(tmp)) 103 if (tmp.size < sizeof(tmp))
104 return ERR_PTR(-EINVAL); 104 return ERR_PTR(-EINVAL);
105 105
106 if (tmp.size > (PATH_MAX + sizeof(tmp)))
107 return ERR_PTR(-ENAMETOOLONG);
108
106 return memdup_user(in, tmp.size); 109 return memdup_user(in, tmp.size);
107} 110}
108 111
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 29aa5cf6639b..7041ac35ace8 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -266,7 +266,7 @@ static void init_once(void *foo)
266 inode_init_once(&bi->vfs_inode); 266 inode_init_once(&bi->vfs_inode);
267} 267}
268 268
269static int init_inodecache(void) 269static int __init init_inodecache(void)
270{ 270{
271 bfs_inode_cachep = kmem_cache_create("bfs_inode_cache", 271 bfs_inode_cachep = kmem_cache_create("bfs_inode_cache",
272 sizeof(struct bfs_inode_info), 272 sizeof(struct bfs_inode_info),
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 0f59799fa105..aa3cb626671e 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -584,7 +584,6 @@ static int load_elf_binary(struct linux_binprm *bprm)
584 unsigned long start_code, end_code, start_data, end_data; 584 unsigned long start_code, end_code, start_data, end_data;
585 unsigned long reloc_func_desc __maybe_unused = 0; 585 unsigned long reloc_func_desc __maybe_unused = 0;
586 int executable_stack = EXSTACK_DEFAULT; 586 int executable_stack = EXSTACK_DEFAULT;
587 unsigned long def_flags = 0;
588 struct pt_regs *regs = current_pt_regs(); 587 struct pt_regs *regs = current_pt_regs();
589 struct { 588 struct {
590 struct elfhdr elf_ex; 589 struct elfhdr elf_ex;
@@ -724,9 +723,6 @@ static int load_elf_binary(struct linux_binprm *bprm)
724 if (retval) 723 if (retval)
725 goto out_free_dentry; 724 goto out_free_dentry;
726 725
727 /* OK, This is the point of no return */
728 current->mm->def_flags = def_flags;
729
730 /* Do this immediately, since STACK_TOP as used in setup_arg_pages 726 /* Do this immediately, since STACK_TOP as used in setup_arg_pages
731 may depend on the personality. */ 727 may depend on the personality. */
732 SET_PERSONALITY(loc->elf_ex); 728 SET_PERSONALITY(loc->elf_ex);
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index 29696b78d1f4..1c2ce0c87711 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -182,6 +182,9 @@ static int bdev_integrity_enabled(struct block_device *bdev, int rw)
182 */ 182 */
183int bio_integrity_enabled(struct bio *bio) 183int bio_integrity_enabled(struct bio *bio)
184{ 184{
185 if (!bio_is_rw(bio))
186 return 0;
187
185 /* Already protected? */ 188 /* Already protected? */
186 if (bio_integrity(bio)) 189 if (bio_integrity(bio))
187 return 0; 190 return 0;
@@ -309,10 +312,9 @@ static int bio_integrity_generate_verify(struct bio *bio, int operate)
309{ 312{
310 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); 313 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
311 struct blk_integrity_exchg bix; 314 struct blk_integrity_exchg bix;
312 struct bio_vec bv; 315 struct bio_vec *bv;
313 struct bvec_iter iter;
314 sector_t sector; 316 sector_t sector;
315 unsigned int sectors, ret = 0; 317 unsigned int sectors, ret = 0, i;
316 void *prot_buf = bio->bi_integrity->bip_buf; 318 void *prot_buf = bio->bi_integrity->bip_buf;
317 319
318 if (operate) 320 if (operate)
@@ -323,16 +325,16 @@ static int bio_integrity_generate_verify(struct bio *bio, int operate)
323 bix.disk_name = bio->bi_bdev->bd_disk->disk_name; 325 bix.disk_name = bio->bi_bdev->bd_disk->disk_name;
324 bix.sector_size = bi->sector_size; 326 bix.sector_size = bi->sector_size;
325 327
326 bio_for_each_segment(bv, bio, iter) { 328 bio_for_each_segment_all(bv, bio, i) {
327 void *kaddr = kmap_atomic(bv.bv_page); 329 void *kaddr = kmap_atomic(bv->bv_page);
328 bix.data_buf = kaddr + bv.bv_offset; 330 bix.data_buf = kaddr + bv->bv_offset;
329 bix.data_size = bv.bv_len; 331 bix.data_size = bv->bv_len;
330 bix.prot_buf = prot_buf; 332 bix.prot_buf = prot_buf;
331 bix.sector = sector; 333 bix.sector = sector;
332 334
333 if (operate) { 335 if (operate)
334 bi->generate_fn(&bix); 336 bi->generate_fn(&bix);
335 } else { 337 else {
336 ret = bi->verify_fn(&bix); 338 ret = bi->verify_fn(&bix);
337 if (ret) { 339 if (ret) {
338 kunmap_atomic(kaddr); 340 kunmap_atomic(kaddr);
@@ -340,7 +342,7 @@ static int bio_integrity_generate_verify(struct bio *bio, int operate)
340 } 342 }
341 } 343 }
342 344
343 sectors = bv.bv_len / bi->sector_size; 345 sectors = bv->bv_len / bi->sector_size;
344 sector += sectors; 346 sector += sectors;
345 prot_buf += sectors * bi->tuple_size; 347 prot_buf += sectors * bi->tuple_size;
346 348
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index e1ffb1e22898..c660527af838 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2025,6 +2025,7 @@ out:
2025 2025
2026static const struct vm_operations_struct btrfs_file_vm_ops = { 2026static const struct vm_operations_struct btrfs_file_vm_ops = {
2027 .fault = filemap_fault, 2027 .fault = filemap_fault,
2028 .map_pages = filemap_map_pages,
2028 .page_mkwrite = btrfs_page_mkwrite, 2029 .page_mkwrite = btrfs_page_mkwrite,
2029 .remap_pages = generic_file_remap_pages, 2030 .remap_pages = generic_file_remap_pages,
2030}; 2031};
diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c
index 8c44fdd4e1c3..834f9f3723fb 100644
--- a/fs/ceph/cache.c
+++ b/fs/ceph/cache.c
@@ -205,6 +205,7 @@ void ceph_fscache_register_inode_cookie(struct ceph_fs_client* fsc,
205 ci->fscache = fscache_acquire_cookie(fsc->fscache, 205 ci->fscache = fscache_acquire_cookie(fsc->fscache,
206 &ceph_fscache_inode_object_def, 206 &ceph_fscache_inode_object_def,
207 ci, true); 207 ci, true);
208 fscache_check_consistency(ci->fscache);
208done: 209done:
209 mutex_unlock(&inode->i_mutex); 210 mutex_unlock(&inode->i_mutex);
210 211
diff --git a/fs/ceph/cache.h b/fs/ceph/cache.h
index da95f61b7a09..5ac591bd012b 100644
--- a/fs/ceph/cache.h
+++ b/fs/ceph/cache.h
@@ -48,6 +48,12 @@ void ceph_readpage_to_fscache(struct inode *inode, struct page *page);
48void ceph_invalidate_fscache_page(struct inode* inode, struct page *page); 48void ceph_invalidate_fscache_page(struct inode* inode, struct page *page);
49void ceph_queue_revalidate(struct inode *inode); 49void ceph_queue_revalidate(struct inode *inode);
50 50
51static inline void ceph_fscache_update_objectsize(struct inode *inode)
52{
53 struct ceph_inode_info *ci = ceph_inode(inode);
54 fscache_attr_changed(ci->fscache);
55}
56
51static inline void ceph_fscache_invalidate(struct inode *inode) 57static inline void ceph_fscache_invalidate(struct inode *inode)
52{ 58{
53 fscache_invalidate(ceph_inode(inode)->fscache); 59 fscache_invalidate(ceph_inode(inode)->fscache);
@@ -135,6 +141,10 @@ static inline void ceph_readpage_to_fscache(struct inode *inode,
135{ 141{
136} 142}
137 143
144static inline void ceph_fscache_update_objectsize(struct inode *inode)
145{
146}
147
138static inline void ceph_fscache_invalidate(struct inode *inode) 148static inline void ceph_fscache_invalidate(struct inode *inode)
139{ 149{
140} 150}
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 17543383545c..2e5e648eb5c3 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -622,8 +622,10 @@ retry:
622 622
623 if (flags & CEPH_CAP_FLAG_AUTH) { 623 if (flags & CEPH_CAP_FLAG_AUTH) {
624 if (ci->i_auth_cap == NULL || 624 if (ci->i_auth_cap == NULL ||
625 ceph_seq_cmp(ci->i_auth_cap->mseq, mseq) < 0) 625 ceph_seq_cmp(ci->i_auth_cap->mseq, mseq) < 0) {
626 ci->i_auth_cap = cap; 626 ci->i_auth_cap = cap;
627 cap->mds_wanted = wanted;
628 }
627 ci->i_cap_exporting_issued = 0; 629 ci->i_cap_exporting_issued = 0;
628 } else { 630 } else {
629 WARN_ON(ci->i_auth_cap == cap); 631 WARN_ON(ci->i_auth_cap == cap);
@@ -885,7 +887,10 @@ int __ceph_caps_mds_wanted(struct ceph_inode_info *ci)
885 cap = rb_entry(p, struct ceph_cap, ci_node); 887 cap = rb_entry(p, struct ceph_cap, ci_node);
886 if (!__cap_is_valid(cap)) 888 if (!__cap_is_valid(cap))
887 continue; 889 continue;
888 mds_wanted |= cap->mds_wanted; 890 if (cap == ci->i_auth_cap)
891 mds_wanted |= cap->mds_wanted;
892 else
893 mds_wanted |= (cap->mds_wanted & ~CEPH_CAP_ANY_FILE_WR);
889 } 894 }
890 return mds_wanted; 895 return mds_wanted;
891} 896}
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 6d59006bfa27..16b54aa31f08 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -93,6 +93,8 @@ static int mdsc_show(struct seq_file *s, void *p)
93 } else if (req->r_path1) { 93 } else if (req->r_path1) {
94 seq_printf(s, " #%llx/%s", req->r_ino1.ino, 94 seq_printf(s, " #%llx/%s", req->r_ino1.ino,
95 req->r_path1); 95 req->r_path1);
96 } else {
97 seq_printf(s, " #%llx", req->r_ino1.ino);
96 } 98 }
97 99
98 if (req->r_old_dentry) { 100 if (req->r_old_dentry) {
@@ -102,7 +104,8 @@ static int mdsc_show(struct seq_file *s, void *p)
102 path = NULL; 104 path = NULL;
103 spin_lock(&req->r_old_dentry->d_lock); 105 spin_lock(&req->r_old_dentry->d_lock);
104 seq_printf(s, " #%llx/%.*s (%s)", 106 seq_printf(s, " #%llx/%.*s (%s)",
105 ceph_ino(req->r_old_dentry_dir), 107 req->r_old_dentry_dir ?
108 ceph_ino(req->r_old_dentry_dir) : 0,
106 req->r_old_dentry->d_name.len, 109 req->r_old_dentry->d_name.len,
107 req->r_old_dentry->d_name.name, 110 req->r_old_dentry->d_name.name,
108 path ? path : ""); 111 path ? path : "");
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 45eda6d7a40c..766410a12c2c 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -119,7 +119,8 @@ static int fpos_cmp(loff_t l, loff_t r)
119 * defined IFF we hold CEPH_CAP_FILE_SHARED (which will be revoked by 119 * defined IFF we hold CEPH_CAP_FILE_SHARED (which will be revoked by
120 * the MDS if/when the directory is modified). 120 * the MDS if/when the directory is modified).
121 */ 121 */
122static int __dcache_readdir(struct file *file, struct dir_context *ctx) 122static int __dcache_readdir(struct file *file, struct dir_context *ctx,
123 u32 shared_gen)
123{ 124{
124 struct ceph_file_info *fi = file->private_data; 125 struct ceph_file_info *fi = file->private_data;
125 struct dentry *parent = file->f_dentry; 126 struct dentry *parent = file->f_dentry;
@@ -133,8 +134,8 @@ static int __dcache_readdir(struct file *file, struct dir_context *ctx)
133 last = fi->dentry; 134 last = fi->dentry;
134 fi->dentry = NULL; 135 fi->dentry = NULL;
135 136
136 dout("__dcache_readdir %p at %llu (last %p)\n", dir, ctx->pos, 137 dout("__dcache_readdir %p v%u at %llu (last %p)\n",
137 last); 138 dir, shared_gen, ctx->pos, last);
138 139
139 spin_lock(&parent->d_lock); 140 spin_lock(&parent->d_lock);
140 141
@@ -161,7 +162,8 @@ more:
161 goto out_unlock; 162 goto out_unlock;
162 } 163 }
163 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); 164 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
164 if (!d_unhashed(dentry) && dentry->d_inode && 165 if (di->lease_shared_gen == shared_gen &&
166 !d_unhashed(dentry) && dentry->d_inode &&
165 ceph_snap(dentry->d_inode) != CEPH_SNAPDIR && 167 ceph_snap(dentry->d_inode) != CEPH_SNAPDIR &&
166 ceph_ino(dentry->d_inode) != CEPH_INO_CEPH && 168 ceph_ino(dentry->d_inode) != CEPH_INO_CEPH &&
167 fpos_cmp(ctx->pos, di->offset) <= 0) 169 fpos_cmp(ctx->pos, di->offset) <= 0)
@@ -190,7 +192,7 @@ more:
190 if (last) { 192 if (last) {
191 /* remember our position */ 193 /* remember our position */
192 fi->dentry = last; 194 fi->dentry = last;
193 fi->next_offset = di->offset; 195 fi->next_offset = fpos_off(di->offset);
194 } 196 }
195 dput(dentry); 197 dput(dentry);
196 return 0; 198 return 0;
@@ -252,8 +254,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
252 int err; 254 int err;
253 u32 ftype; 255 u32 ftype;
254 struct ceph_mds_reply_info_parsed *rinfo; 256 struct ceph_mds_reply_info_parsed *rinfo;
255 const int max_entries = fsc->mount_options->max_readdir;
256 const int max_bytes = fsc->mount_options->max_readdir_bytes;
257 257
258 dout("readdir %p file %p frag %u off %u\n", inode, file, frag, off); 258 dout("readdir %p file %p frag %u off %u\n", inode, file, frag, off);
259 if (fi->flags & CEPH_F_ATEND) 259 if (fi->flags & CEPH_F_ATEND)
@@ -291,8 +291,9 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
291 ceph_snap(inode) != CEPH_SNAPDIR && 291 ceph_snap(inode) != CEPH_SNAPDIR &&
292 __ceph_dir_is_complete(ci) && 292 __ceph_dir_is_complete(ci) &&
293 __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) { 293 __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) {
294 u32 shared_gen = ci->i_shared_gen;
294 spin_unlock(&ci->i_ceph_lock); 295 spin_unlock(&ci->i_ceph_lock);
295 err = __dcache_readdir(file, ctx); 296 err = __dcache_readdir(file, ctx, shared_gen);
296 if (err != -EAGAIN) 297 if (err != -EAGAIN)
297 return err; 298 return err;
298 } else { 299 } else {
@@ -322,14 +323,16 @@ more:
322 fi->last_readdir = NULL; 323 fi->last_readdir = NULL;
323 } 324 }
324 325
325 /* requery frag tree, as the frag topology may have changed */
326 frag = ceph_choose_frag(ceph_inode(inode), frag, NULL, NULL);
327
328 dout("readdir fetching %llx.%llx frag %x offset '%s'\n", 326 dout("readdir fetching %llx.%llx frag %x offset '%s'\n",
329 ceph_vinop(inode), frag, fi->last_name); 327 ceph_vinop(inode), frag, fi->last_name);
330 req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); 328 req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
331 if (IS_ERR(req)) 329 if (IS_ERR(req))
332 return PTR_ERR(req); 330 return PTR_ERR(req);
331 err = ceph_alloc_readdir_reply_buffer(req, inode);
332 if (err) {
333 ceph_mdsc_put_request(req);
334 return err;
335 }
333 req->r_inode = inode; 336 req->r_inode = inode;
334 ihold(inode); 337 ihold(inode);
335 req->r_dentry = dget(file->f_dentry); 338 req->r_dentry = dget(file->f_dentry);
@@ -340,9 +343,6 @@ more:
340 req->r_path2 = kstrdup(fi->last_name, GFP_NOFS); 343 req->r_path2 = kstrdup(fi->last_name, GFP_NOFS);
341 req->r_readdir_offset = fi->next_offset; 344 req->r_readdir_offset = fi->next_offset;
342 req->r_args.readdir.frag = cpu_to_le32(frag); 345 req->r_args.readdir.frag = cpu_to_le32(frag);
343 req->r_args.readdir.max_entries = cpu_to_le32(max_entries);
344 req->r_args.readdir.max_bytes = cpu_to_le32(max_bytes);
345 req->r_num_caps = max_entries + 1;
346 err = ceph_mdsc_do_request(mdsc, NULL, req); 346 err = ceph_mdsc_do_request(mdsc, NULL, req);
347 if (err < 0) { 347 if (err < 0) {
348 ceph_mdsc_put_request(req); 348 ceph_mdsc_put_request(req);
@@ -369,9 +369,9 @@ more:
369 fi->next_offset = 0; 369 fi->next_offset = 0;
370 off = fi->next_offset; 370 off = fi->next_offset;
371 } 371 }
372 fi->frag = frag;
372 fi->offset = fi->next_offset; 373 fi->offset = fi->next_offset;
373 fi->last_readdir = req; 374 fi->last_readdir = req;
374 fi->frag = frag;
375 375
376 if (req->r_reply_info.dir_end) { 376 if (req->r_reply_info.dir_end) {
377 kfree(fi->last_name); 377 kfree(fi->last_name);
@@ -454,7 +454,7 @@ more:
454 return 0; 454 return 0;
455} 455}
456 456
457static void reset_readdir(struct ceph_file_info *fi) 457static void reset_readdir(struct ceph_file_info *fi, unsigned frag)
458{ 458{
459 if (fi->last_readdir) { 459 if (fi->last_readdir) {
460 ceph_mdsc_put_request(fi->last_readdir); 460 ceph_mdsc_put_request(fi->last_readdir);
@@ -462,7 +462,10 @@ static void reset_readdir(struct ceph_file_info *fi)
462 } 462 }
463 kfree(fi->last_name); 463 kfree(fi->last_name);
464 fi->last_name = NULL; 464 fi->last_name = NULL;
465 fi->next_offset = 2; /* compensate for . and .. */ 465 if (ceph_frag_is_leftmost(frag))
466 fi->next_offset = 2; /* compensate for . and .. */
467 else
468 fi->next_offset = 0;
466 if (fi->dentry) { 469 if (fi->dentry) {
467 dput(fi->dentry); 470 dput(fi->dentry);
468 fi->dentry = NULL; 471 fi->dentry = NULL;
@@ -474,7 +477,7 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
474{ 477{
475 struct ceph_file_info *fi = file->private_data; 478 struct ceph_file_info *fi = file->private_data;
476 struct inode *inode = file->f_mapping->host; 479 struct inode *inode = file->f_mapping->host;
477 loff_t old_offset = offset; 480 loff_t old_offset = ceph_make_fpos(fi->frag, fi->next_offset);
478 loff_t retval; 481 loff_t retval;
479 482
480 mutex_lock(&inode->i_mutex); 483 mutex_lock(&inode->i_mutex);
@@ -491,7 +494,7 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
491 goto out; 494 goto out;
492 } 495 }
493 496
494 if (offset >= 0 && offset <= inode->i_sb->s_maxbytes) { 497 if (offset >= 0) {
495 if (offset != file->f_pos) { 498 if (offset != file->f_pos) {
496 file->f_pos = offset; 499 file->f_pos = offset;
497 file->f_version = 0; 500 file->f_version = 0;
@@ -504,14 +507,14 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
504 * seek to new frag, or seek prior to current chunk. 507 * seek to new frag, or seek prior to current chunk.
505 */ 508 */
506 if (offset == 0 || 509 if (offset == 0 ||
507 fpos_frag(offset) != fpos_frag(old_offset) || 510 fpos_frag(offset) != fi->frag ||
508 fpos_off(offset) < fi->offset) { 511 fpos_off(offset) < fi->offset) {
509 dout("dir_llseek dropping %p content\n", file); 512 dout("dir_llseek dropping %p content\n", file);
510 reset_readdir(fi); 513 reset_readdir(fi, fpos_frag(offset));
511 } 514 }
512 515
513 /* bump dir_release_count if we did a forward seek */ 516 /* bump dir_release_count if we did a forward seek */
514 if (offset > old_offset) 517 if (fpos_cmp(offset, old_offset) > 0)
515 fi->dir_release_count--; 518 fi->dir_release_count--;
516 } 519 }
517out: 520out:
@@ -812,8 +815,7 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
812 } 815 }
813 req->r_dentry = dget(dentry); 816 req->r_dentry = dget(dentry);
814 req->r_num_caps = 2; 817 req->r_num_caps = 2;
815 req->r_old_dentry = dget(old_dentry); /* or inode? hrm. */ 818 req->r_old_dentry = dget(old_dentry);
816 req->r_old_dentry_dir = ceph_get_dentry_parent_inode(old_dentry);
817 req->r_locked_dir = dir; 819 req->r_locked_dir = dir;
818 req->r_dentry_drop = CEPH_CAP_FILE_SHARED; 820 req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
819 req->r_dentry_unless = CEPH_CAP_FILE_EXCL; 821 req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
@@ -911,10 +913,11 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
911 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_RENAME, USE_AUTH_MDS); 913 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_RENAME, USE_AUTH_MDS);
912 if (IS_ERR(req)) 914 if (IS_ERR(req))
913 return PTR_ERR(req); 915 return PTR_ERR(req);
916 ihold(old_dir);
914 req->r_dentry = dget(new_dentry); 917 req->r_dentry = dget(new_dentry);
915 req->r_num_caps = 2; 918 req->r_num_caps = 2;
916 req->r_old_dentry = dget(old_dentry); 919 req->r_old_dentry = dget(old_dentry);
917 req->r_old_dentry_dir = ceph_get_dentry_parent_inode(old_dentry); 920 req->r_old_dentry_dir = old_dir;
918 req->r_locked_dir = new_dir; 921 req->r_locked_dir = new_dir;
919 req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED; 922 req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED;
920 req->r_old_dentry_unless = CEPH_CAP_FILE_EXCL; 923 req->r_old_dentry_unless = CEPH_CAP_FILE_EXCL;
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index 16796be53ca5..00d6af6a32ec 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -8,23 +8,6 @@
8#include "mds_client.h" 8#include "mds_client.h"
9 9
10/* 10/*
11 * NFS export support
12 *
13 * NFS re-export of a ceph mount is, at present, only semireliable.
14 * The basic issue is that the Ceph architectures doesn't lend itself
15 * well to generating filehandles that will remain valid forever.
16 *
17 * So, we do our best. If you're lucky, your inode will be in the
18 * client's cache. If it's not, and you have a connectable fh, then
19 * the MDS server may be able to find it for you. Otherwise, you get
20 * ESTALE.
21 *
22 * There are ways to this more reliable, but in the non-connectable fh
23 * case, we won't every work perfectly, and in the connectable case,
24 * some changes are needed on the MDS side to work better.
25 */
26
27/*
28 * Basic fh 11 * Basic fh
29 */ 12 */
30struct ceph_nfs_fh { 13struct ceph_nfs_fh {
@@ -32,22 +15,12 @@ struct ceph_nfs_fh {
32} __attribute__ ((packed)); 15} __attribute__ ((packed));
33 16
34/* 17/*
35 * Larger 'connectable' fh that includes parent ino and name hash. 18 * Larger fh that includes parent ino.
36 * Use this whenever possible, as it works more reliably.
37 */ 19 */
38struct ceph_nfs_confh { 20struct ceph_nfs_confh {
39 u64 ino, parent_ino; 21 u64 ino, parent_ino;
40 u32 parent_name_hash;
41} __attribute__ ((packed)); 22} __attribute__ ((packed));
42 23
43/*
44 * The presence of @parent_inode here tells us whether NFS wants a
45 * connectable file handle. However, we want to make a connectionable
46 * file handle unconditionally so that the MDS gets as much of a hint
47 * as possible. That means we only use @parent_dentry to indicate
48 * whether nfsd wants a connectable fh, and whether we should indicate
49 * failure from a too-small @max_len.
50 */
51static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len, 24static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
52 struct inode *parent_inode) 25 struct inode *parent_inode)
53{ 26{
@@ -56,54 +29,36 @@ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
56 struct ceph_nfs_confh *cfh = (void *)rawfh; 29 struct ceph_nfs_confh *cfh = (void *)rawfh;
57 int connected_handle_length = sizeof(*cfh)/4; 30 int connected_handle_length = sizeof(*cfh)/4;
58 int handle_length = sizeof(*fh)/4; 31 int handle_length = sizeof(*fh)/4;
59 struct dentry *dentry;
60 struct dentry *parent;
61 32
62 /* don't re-export snaps */ 33 /* don't re-export snaps */
63 if (ceph_snap(inode) != CEPH_NOSNAP) 34 if (ceph_snap(inode) != CEPH_NOSNAP)
64 return -EINVAL; 35 return -EINVAL;
65 36
66 dentry = d_find_alias(inode); 37 if (parent_inode && (*max_len < connected_handle_length)) {
38 *max_len = connected_handle_length;
39 return FILEID_INVALID;
40 } else if (*max_len < handle_length) {
41 *max_len = handle_length;
42 return FILEID_INVALID;
43 }
67 44
68 /* if we found an alias, generate a connectable fh */ 45 if (parent_inode) {
69 if (*max_len >= connected_handle_length && dentry) { 46 dout("encode_fh %llx with parent %llx\n",
70 dout("encode_fh %p connectable\n", dentry); 47 ceph_ino(inode), ceph_ino(parent_inode));
71 spin_lock(&dentry->d_lock);
72 parent = dentry->d_parent;
73 cfh->ino = ceph_ino(inode); 48 cfh->ino = ceph_ino(inode);
74 cfh->parent_ino = ceph_ino(parent->d_inode); 49 cfh->parent_ino = ceph_ino(parent_inode);
75 cfh->parent_name_hash = ceph_dentry_hash(parent->d_inode,
76 dentry);
77 *max_len = connected_handle_length; 50 *max_len = connected_handle_length;
78 type = 2; 51 type = FILEID_INO32_GEN_PARENT;
79 spin_unlock(&dentry->d_lock);
80 } else if (*max_len >= handle_length) {
81 if (parent_inode) {
82 /* nfsd wants connectable */
83 *max_len = connected_handle_length;
84 type = FILEID_INVALID;
85 } else {
86 dout("encode_fh %p\n", dentry);
87 fh->ino = ceph_ino(inode);
88 *max_len = handle_length;
89 type = 1;
90 }
91 } else { 52 } else {
53 dout("encode_fh %llx\n", ceph_ino(inode));
54 fh->ino = ceph_ino(inode);
92 *max_len = handle_length; 55 *max_len = handle_length;
93 type = FILEID_INVALID; 56 type = FILEID_INO32_GEN;
94 } 57 }
95 if (dentry)
96 dput(dentry);
97 return type; 58 return type;
98} 59}
99 60
100/* 61static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino)
101 * convert regular fh to dentry
102 *
103 * FIXME: we should try harder by querying the mds for the ino.
104 */
105static struct dentry *__fh_to_dentry(struct super_block *sb,
106 struct ceph_nfs_fh *fh, int fh_len)
107{ 62{
108 struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc; 63 struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
109 struct inode *inode; 64 struct inode *inode;
@@ -111,11 +66,7 @@ static struct dentry *__fh_to_dentry(struct super_block *sb,
111 struct ceph_vino vino; 66 struct ceph_vino vino;
112 int err; 67 int err;
113 68
114 if (fh_len < sizeof(*fh) / 4) 69 vino.ino = ino;
115 return ERR_PTR(-ESTALE);
116
117 dout("__fh_to_dentry %llx\n", fh->ino);
118 vino.ino = fh->ino;
119 vino.snap = CEPH_NOSNAP; 70 vino.snap = CEPH_NOSNAP;
120 inode = ceph_find_inode(sb, vino); 71 inode = ceph_find_inode(sb, vino);
121 if (!inode) { 72 if (!inode) {
@@ -139,139 +90,161 @@ static struct dentry *__fh_to_dentry(struct super_block *sb,
139 90
140 dentry = d_obtain_alias(inode); 91 dentry = d_obtain_alias(inode);
141 if (IS_ERR(dentry)) { 92 if (IS_ERR(dentry)) {
142 pr_err("fh_to_dentry %llx -- inode %p but ENOMEM\n",
143 fh->ino, inode);
144 iput(inode); 93 iput(inode);
145 return dentry; 94 return dentry;
146 } 95 }
147 err = ceph_init_dentry(dentry); 96 err = ceph_init_dentry(dentry);
148 if (err < 0) { 97 if (err < 0) {
149 iput(inode); 98 dput(dentry);
150 return ERR_PTR(err); 99 return ERR_PTR(err);
151 } 100 }
152 dout("__fh_to_dentry %llx %p dentry %p\n", fh->ino, inode, dentry); 101 dout("__fh_to_dentry %llx %p dentry %p\n", ino, inode, dentry);
153 return dentry; 102 return dentry;
154} 103}
155 104
156/* 105/*
157 * convert connectable fh to dentry 106 * convert regular fh to dentry
158 */ 107 */
159static struct dentry *__cfh_to_dentry(struct super_block *sb, 108static struct dentry *ceph_fh_to_dentry(struct super_block *sb,
160 struct ceph_nfs_confh *cfh, int fh_len) 109 struct fid *fid,
110 int fh_len, int fh_type)
111{
112 struct ceph_nfs_fh *fh = (void *)fid->raw;
113
114 if (fh_type != FILEID_INO32_GEN &&
115 fh_type != FILEID_INO32_GEN_PARENT)
116 return NULL;
117 if (fh_len < sizeof(*fh) / 4)
118 return NULL;
119
120 dout("fh_to_dentry %llx\n", fh->ino);
121 return __fh_to_dentry(sb, fh->ino);
122}
123
124static struct dentry *__get_parent(struct super_block *sb,
125 struct dentry *child, u64 ino)
161{ 126{
162 struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc; 127 struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
128 struct ceph_mds_request *req;
163 struct inode *inode; 129 struct inode *inode;
164 struct dentry *dentry; 130 struct dentry *dentry;
165 struct ceph_vino vino;
166 int err; 131 int err;
167 132
168 if (fh_len < sizeof(*cfh) / 4) 133 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPPARENT,
169 return ERR_PTR(-ESTALE); 134 USE_ANY_MDS);
170 135 if (IS_ERR(req))
171 dout("__cfh_to_dentry %llx (%llx/%x)\n", 136 return ERR_CAST(req);
172 cfh->ino, cfh->parent_ino, cfh->parent_name_hash);
173
174 vino.ino = cfh->ino;
175 vino.snap = CEPH_NOSNAP;
176 inode = ceph_find_inode(sb, vino);
177 if (!inode) {
178 struct ceph_mds_request *req;
179
180 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPHASH,
181 USE_ANY_MDS);
182 if (IS_ERR(req))
183 return ERR_CAST(req);
184 137
185 req->r_ino1 = vino; 138 if (child) {
186 req->r_ino2.ino = cfh->parent_ino; 139 req->r_inode = child->d_inode;
187 req->r_ino2.snap = CEPH_NOSNAP; 140 ihold(child->d_inode);
188 req->r_path2 = kmalloc(16, GFP_NOFS); 141 } else {
189 snprintf(req->r_path2, 16, "%d", cfh->parent_name_hash); 142 req->r_ino1 = (struct ceph_vino) {
190 req->r_num_caps = 1; 143 .ino = ino,
191 err = ceph_mdsc_do_request(mdsc, NULL, req); 144 .snap = CEPH_NOSNAP,
192 inode = req->r_target_inode; 145 };
193 if (inode)
194 ihold(inode);
195 ceph_mdsc_put_request(req);
196 if (!inode)
197 return ERR_PTR(err ? err : -ESTALE);
198 } 146 }
147 req->r_num_caps = 1;
148 err = ceph_mdsc_do_request(mdsc, NULL, req);
149 inode = req->r_target_inode;
150 if (inode)
151 ihold(inode);
152 ceph_mdsc_put_request(req);
153 if (!inode)
154 return ERR_PTR(-ENOENT);
199 155
200 dentry = d_obtain_alias(inode); 156 dentry = d_obtain_alias(inode);
201 if (IS_ERR(dentry)) { 157 if (IS_ERR(dentry)) {
202 pr_err("cfh_to_dentry %llx -- inode %p but ENOMEM\n",
203 cfh->ino, inode);
204 iput(inode); 158 iput(inode);
205 return dentry; 159 return dentry;
206 } 160 }
207 err = ceph_init_dentry(dentry); 161 err = ceph_init_dentry(dentry);
208 if (err < 0) { 162 if (err < 0) {
209 iput(inode); 163 dput(dentry);
210 return ERR_PTR(err); 164 return ERR_PTR(err);
211 } 165 }
212 dout("__cfh_to_dentry %llx %p dentry %p\n", cfh->ino, inode, dentry); 166 dout("__get_parent ino %llx parent %p ino %llx.%llx\n",
167 child ? ceph_ino(child->d_inode) : ino,
168 dentry, ceph_vinop(inode));
213 return dentry; 169 return dentry;
214} 170}
215 171
216static struct dentry *ceph_fh_to_dentry(struct super_block *sb, struct fid *fid, 172struct dentry *ceph_get_parent(struct dentry *child)
217 int fh_len, int fh_type)
218{ 173{
219 if (fh_type == 1) 174 /* don't re-export snaps */
220 return __fh_to_dentry(sb, (struct ceph_nfs_fh *)fid->raw, 175 if (ceph_snap(child->d_inode) != CEPH_NOSNAP)
221 fh_len); 176 return ERR_PTR(-EINVAL);
222 else 177
223 return __cfh_to_dentry(sb, (struct ceph_nfs_confh *)fid->raw, 178 dout("get_parent %p ino %llx.%llx\n",
224 fh_len); 179 child, ceph_vinop(child->d_inode));
180 return __get_parent(child->d_sb, child, 0);
225} 181}
226 182
227/* 183/*
228 * get parent, if possible. 184 * convert regular fh to parent
229 *
230 * FIXME: we could do better by querying the mds to discover the
231 * parent.
232 */ 185 */
233static struct dentry *ceph_fh_to_parent(struct super_block *sb, 186static struct dentry *ceph_fh_to_parent(struct super_block *sb,
234 struct fid *fid, 187 struct fid *fid,
235 int fh_len, int fh_type) 188 int fh_len, int fh_type)
236{ 189{
237 struct ceph_nfs_confh *cfh = (void *)fid->raw; 190 struct ceph_nfs_confh *cfh = (void *)fid->raw;
238 struct ceph_vino vino;
239 struct inode *inode;
240 struct dentry *dentry; 191 struct dentry *dentry;
241 int err;
242 192
243 if (fh_type == 1) 193 if (fh_type != FILEID_INO32_GEN_PARENT)
244 return ERR_PTR(-ESTALE); 194 return NULL;
245 if (fh_len < sizeof(*cfh) / 4) 195 if (fh_len < sizeof(*cfh) / 4)
246 return ERR_PTR(-ESTALE); 196 return NULL;
247 197
248 pr_debug("fh_to_parent %llx/%d\n", cfh->parent_ino, 198 dout("fh_to_parent %llx\n", cfh->parent_ino);
249 cfh->parent_name_hash); 199 dentry = __get_parent(sb, NULL, cfh->ino);
200 if (IS_ERR(dentry) && PTR_ERR(dentry) == -ENOENT)
201 dentry = __fh_to_dentry(sb, cfh->parent_ino);
202 return dentry;
203}
250 204
251 vino.ino = cfh->ino; 205static int ceph_get_name(struct dentry *parent, char *name,
252 vino.snap = CEPH_NOSNAP; 206 struct dentry *child)
253 inode = ceph_find_inode(sb, vino); 207{
254 if (!inode) 208 struct ceph_mds_client *mdsc;
255 return ERR_PTR(-ESTALE); 209 struct ceph_mds_request *req;
210 int err;
256 211
257 dentry = d_obtain_alias(inode); 212 mdsc = ceph_inode_to_client(child->d_inode)->mdsc;
258 if (IS_ERR(dentry)) { 213 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPNAME,
259 pr_err("fh_to_parent %llx -- inode %p but ENOMEM\n", 214 USE_ANY_MDS);
260 cfh->ino, inode); 215 if (IS_ERR(req))
261 iput(inode); 216 return PTR_ERR(req);
262 return dentry; 217
263 } 218 mutex_lock(&parent->d_inode->i_mutex);
264 err = ceph_init_dentry(dentry); 219
265 if (err < 0) { 220 req->r_inode = child->d_inode;
266 iput(inode); 221 ihold(child->d_inode);
267 return ERR_PTR(err); 222 req->r_ino2 = ceph_vino(parent->d_inode);
223 req->r_locked_dir = parent->d_inode;
224 req->r_num_caps = 2;
225 err = ceph_mdsc_do_request(mdsc, NULL, req);
226
227 mutex_unlock(&parent->d_inode->i_mutex);
228
229 if (!err) {
230 struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
231 memcpy(name, rinfo->dname, rinfo->dname_len);
232 name[rinfo->dname_len] = 0;
233 dout("get_name %p ino %llx.%llx name %s\n",
234 child, ceph_vinop(child->d_inode), name);
235 } else {
236 dout("get_name %p ino %llx.%llx err %d\n",
237 child, ceph_vinop(child->d_inode), err);
268 } 238 }
269 dout("fh_to_parent %llx %p dentry %p\n", cfh->ino, inode, dentry); 239
270 return dentry; 240 ceph_mdsc_put_request(req);
241 return err;
271} 242}
272 243
273const struct export_operations ceph_export_ops = { 244const struct export_operations ceph_export_ops = {
274 .encode_fh = ceph_encode_fh, 245 .encode_fh = ceph_encode_fh,
275 .fh_to_dentry = ceph_fh_to_dentry, 246 .fh_to_dentry = ceph_fh_to_dentry,
276 .fh_to_parent = ceph_fh_to_parent, 247 .fh_to_parent = ceph_fh_to_parent,
248 .get_parent = ceph_get_parent,
249 .get_name = ceph_get_name,
277}; 250};
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 09c7afe32e49..66075a4ad979 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -210,7 +210,7 @@ int ceph_open(struct inode *inode, struct file *file)
210 ihold(inode); 210 ihold(inode);
211 211
212 req->r_num_caps = 1; 212 req->r_num_caps = 1;
213 if (flags & (O_CREAT|O_TRUNC)) 213 if (flags & O_CREAT)
214 parent_inode = ceph_get_dentry_parent_inode(file->f_dentry); 214 parent_inode = ceph_get_dentry_parent_inode(file->f_dentry);
215 err = ceph_mdsc_do_request(mdsc, parent_inode, req); 215 err = ceph_mdsc_do_request(mdsc, parent_inode, req);
216 iput(parent_inode); 216 iput(parent_inode);
@@ -291,8 +291,9 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
291 } 291 }
292 err = finish_open(file, dentry, ceph_open, opened); 292 err = finish_open(file, dentry, ceph_open, opened);
293 } 293 }
294
295out_err: 294out_err:
295 if (!req->r_err && req->r_target_inode)
296 ceph_put_fmode(ceph_inode(req->r_target_inode), req->r_fmode);
296 ceph_mdsc_put_request(req); 297 ceph_mdsc_put_request(req);
297 dout("atomic_open result=%d\n", err); 298 dout("atomic_open result=%d\n", err);
298 return err; 299 return err;
@@ -970,6 +971,7 @@ retry_snap:
970 goto retry_snap; 971 goto retry_snap;
971 } 972 }
972 } else { 973 } else {
974 loff_t old_size = inode->i_size;
973 /* 975 /*
974 * No need to acquire the i_truncate_mutex. Because 976 * No need to acquire the i_truncate_mutex. Because
975 * the MDS revokes Fwb caps before sending truncate 977 * the MDS revokes Fwb caps before sending truncate
@@ -980,6 +982,8 @@ retry_snap:
980 written = generic_file_buffered_write(iocb, iov, nr_segs, 982 written = generic_file_buffered_write(iocb, iov, nr_segs,
981 pos, &iocb->ki_pos, 983 pos, &iocb->ki_pos,
982 count, 0); 984 count, 0);
985 if (inode->i_size > old_size)
986 ceph_fscache_update_objectsize(inode);
983 mutex_unlock(&inode->i_mutex); 987 mutex_unlock(&inode->i_mutex);
984 } 988 }
985 989
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 32d519d8a2e2..0b0728e5be2d 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -659,14 +659,6 @@ static int fill_inode(struct inode *inode,
659 le32_to_cpu(info->time_warp_seq), 659 le32_to_cpu(info->time_warp_seq),
660 &ctime, &mtime, &atime); 660 &ctime, &mtime, &atime);
661 661
662 /* only update max_size on auth cap */
663 if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
664 ci->i_max_size != le64_to_cpu(info->max_size)) {
665 dout("max_size %lld -> %llu\n", ci->i_max_size,
666 le64_to_cpu(info->max_size));
667 ci->i_max_size = le64_to_cpu(info->max_size);
668 }
669
670 ci->i_layout = info->layout; 662 ci->i_layout = info->layout;
671 inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; 663 inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1;
672 664
@@ -755,6 +747,14 @@ static int fill_inode(struct inode *inode,
755 ci->i_max_offset = 2; 747 ci->i_max_offset = 2;
756 } 748 }
757no_change: 749no_change:
750 /* only update max_size on auth cap */
751 if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
752 ci->i_max_size != le64_to_cpu(info->max_size)) {
753 dout("max_size %lld -> %llu\n", ci->i_max_size,
754 le64_to_cpu(info->max_size));
755 ci->i_max_size = le64_to_cpu(info->max_size);
756 }
757
758 spin_unlock(&ci->i_ceph_lock); 758 spin_unlock(&ci->i_ceph_lock);
759 759
760 /* queue truncate if we saw i_size decrease */ 760 /* queue truncate if we saw i_size decrease */
@@ -1044,10 +1044,59 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
1044 session, req->r_request_started, -1, 1044 session, req->r_request_started, -1,
1045 &req->r_caps_reservation); 1045 &req->r_caps_reservation);
1046 if (err < 0) 1046 if (err < 0)
1047 return err; 1047 goto done;
1048 } else { 1048 } else {
1049 WARN_ON_ONCE(1); 1049 WARN_ON_ONCE(1);
1050 } 1050 }
1051
1052 if (dir && req->r_op == CEPH_MDS_OP_LOOKUPNAME) {
1053 struct qstr dname;
1054 struct dentry *dn, *parent;
1055
1056 BUG_ON(!rinfo->head->is_target);
1057 BUG_ON(req->r_dentry);
1058
1059 parent = d_find_any_alias(dir);
1060 BUG_ON(!parent);
1061
1062 dname.name = rinfo->dname;
1063 dname.len = rinfo->dname_len;
1064 dname.hash = full_name_hash(dname.name, dname.len);
1065 vino.ino = le64_to_cpu(rinfo->targeti.in->ino);
1066 vino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
1067retry_lookup:
1068 dn = d_lookup(parent, &dname);
1069 dout("d_lookup on parent=%p name=%.*s got %p\n",
1070 parent, dname.len, dname.name, dn);
1071
1072 if (!dn) {
1073 dn = d_alloc(parent, &dname);
1074 dout("d_alloc %p '%.*s' = %p\n", parent,
1075 dname.len, dname.name, dn);
1076 if (dn == NULL) {
1077 dput(parent);
1078 err = -ENOMEM;
1079 goto done;
1080 }
1081 err = ceph_init_dentry(dn);
1082 if (err < 0) {
1083 dput(dn);
1084 dput(parent);
1085 goto done;
1086 }
1087 } else if (dn->d_inode &&
1088 (ceph_ino(dn->d_inode) != vino.ino ||
1089 ceph_snap(dn->d_inode) != vino.snap)) {
1090 dout(" dn %p points to wrong inode %p\n",
1091 dn, dn->d_inode);
1092 d_delete(dn);
1093 dput(dn);
1094 goto retry_lookup;
1095 }
1096
1097 req->r_dentry = dn;
1098 dput(parent);
1099 }
1051 } 1100 }
1052 1101
1053 if (rinfo->head->is_target) { 1102 if (rinfo->head->is_target) {
@@ -1063,7 +1112,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
1063 1112
1064 err = fill_inode(in, &rinfo->targeti, NULL, 1113 err = fill_inode(in, &rinfo->targeti, NULL,
1065 session, req->r_request_started, 1114 session, req->r_request_started,
1066 (le32_to_cpu(rinfo->head->result) == 0) ? 1115 (!req->r_aborted && rinfo->head->result == 0) ?
1067 req->r_fmode : -1, 1116 req->r_fmode : -1,
1068 &req->r_caps_reservation); 1117 &req->r_caps_reservation);
1069 if (err < 0) { 1118 if (err < 0) {
@@ -1616,8 +1665,6 @@ static const struct inode_operations ceph_symlink_iops = {
1616 .getxattr = ceph_getxattr, 1665 .getxattr = ceph_getxattr,
1617 .listxattr = ceph_listxattr, 1666 .listxattr = ceph_listxattr,
1618 .removexattr = ceph_removexattr, 1667 .removexattr = ceph_removexattr,
1619 .get_acl = ceph_get_acl,
1620 .set_acl = ceph_set_acl,
1621}; 1668};
1622 1669
1623/* 1670/*
@@ -1627,7 +1674,6 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
1627{ 1674{
1628 struct inode *inode = dentry->d_inode; 1675 struct inode *inode = dentry->d_inode;
1629 struct ceph_inode_info *ci = ceph_inode(inode); 1676 struct ceph_inode_info *ci = ceph_inode(inode);
1630 struct inode *parent_inode;
1631 const unsigned int ia_valid = attr->ia_valid; 1677 const unsigned int ia_valid = attr->ia_valid;
1632 struct ceph_mds_request *req; 1678 struct ceph_mds_request *req;
1633 struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc; 1679 struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc;
@@ -1819,9 +1865,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
1819 req->r_inode_drop = release; 1865 req->r_inode_drop = release;
1820 req->r_args.setattr.mask = cpu_to_le32(mask); 1866 req->r_args.setattr.mask = cpu_to_le32(mask);
1821 req->r_num_caps = 1; 1867 req->r_num_caps = 1;
1822 parent_inode = ceph_get_dentry_parent_inode(dentry); 1868 err = ceph_mdsc_do_request(mdsc, NULL, req);
1823 err = ceph_mdsc_do_request(mdsc, parent_inode, req);
1824 iput(parent_inode);
1825 } 1869 }
1826 dout("setattr %p result=%d (%s locally, %d remote)\n", inode, err, 1870 dout("setattr %p result=%d (%s locally, %d remote)\n", inode, err,
1827 ceph_cap_string(dirtied), mask); 1871 ceph_cap_string(dirtied), mask);
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index dc66c9e023e4..efbe08289292 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -64,7 +64,6 @@ static long __validate_layout(struct ceph_mds_client *mdsc,
64static long ceph_ioctl_set_layout(struct file *file, void __user *arg) 64static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
65{ 65{
66 struct inode *inode = file_inode(file); 66 struct inode *inode = file_inode(file);
67 struct inode *parent_inode;
68 struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; 67 struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
69 struct ceph_mds_request *req; 68 struct ceph_mds_request *req;
70 struct ceph_ioctl_layout l; 69 struct ceph_ioctl_layout l;
@@ -121,9 +120,7 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
121 cpu_to_le32(l.object_size); 120 cpu_to_le32(l.object_size);
122 req->r_args.setlayout.layout.fl_pg_pool = cpu_to_le32(l.data_pool); 121 req->r_args.setlayout.layout.fl_pg_pool = cpu_to_le32(l.data_pool);
123 122
124 parent_inode = ceph_get_dentry_parent_inode(file->f_dentry); 123 err = ceph_mdsc_do_request(mdsc, NULL, req);
125 err = ceph_mdsc_do_request(mdsc, parent_inode, req);
126 iput(parent_inode);
127 ceph_mdsc_put_request(req); 124 ceph_mdsc_put_request(req);
128 return err; 125 return err;
129} 126}
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index ae6d14e82b0f..d94ba0df9f4d 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -2,11 +2,31 @@
2 2
3#include <linux/file.h> 3#include <linux/file.h>
4#include <linux/namei.h> 4#include <linux/namei.h>
5#include <linux/random.h>
5 6
6#include "super.h" 7#include "super.h"
7#include "mds_client.h" 8#include "mds_client.h"
8#include <linux/ceph/pagelist.h> 9#include <linux/ceph/pagelist.h>
9 10
11static u64 lock_secret;
12
13static inline u64 secure_addr(void *addr)
14{
15 u64 v = lock_secret ^ (u64)(unsigned long)addr;
16 /*
17 * Set the most significant bit, so that MDS knows the 'owner'
18 * is sufficient to identify the owner of lock. (old code uses
19 * both 'owner' and 'pid')
20 */
21 v |= (1ULL << 63);
22 return v;
23}
24
25void __init ceph_flock_init(void)
26{
27 get_random_bytes(&lock_secret, sizeof(lock_secret));
28}
29
10/** 30/**
11 * Implement fcntl and flock locking functions. 31 * Implement fcntl and flock locking functions.
12 */ 32 */
@@ -14,11 +34,11 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
14 int cmd, u8 wait, struct file_lock *fl) 34 int cmd, u8 wait, struct file_lock *fl)
15{ 35{
16 struct inode *inode = file_inode(file); 36 struct inode *inode = file_inode(file);
17 struct ceph_mds_client *mdsc = 37 struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
18 ceph_sb_to_client(inode->i_sb)->mdsc;
19 struct ceph_mds_request *req; 38 struct ceph_mds_request *req;
20 int err; 39 int err;
21 u64 length = 0; 40 u64 length = 0;
41 u64 owner;
22 42
23 req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS); 43 req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS);
24 if (IS_ERR(req)) 44 if (IS_ERR(req))
@@ -32,25 +52,27 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
32 else 52 else
33 length = fl->fl_end - fl->fl_start + 1; 53 length = fl->fl_end - fl->fl_start + 1;
34 54
35 dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " 55 if (lock_type == CEPH_LOCK_FCNTL)
36 "length: %llu, wait: %d, type: %d", (int)lock_type, 56 owner = secure_addr(fl->fl_owner);
37 (int)operation, (u64)fl->fl_pid, fl->fl_start, 57 else
38 length, wait, fl->fl_type); 58 owner = secure_addr(fl->fl_file);
59
60 dout("ceph_lock_message: rule: %d, op: %d, owner: %llx, pid: %llu, "
61 "start: %llu, length: %llu, wait: %d, type: %d", (int)lock_type,
62 (int)operation, owner, (u64)fl->fl_pid, fl->fl_start, length,
63 wait, fl->fl_type);
39 64
40 req->r_args.filelock_change.rule = lock_type; 65 req->r_args.filelock_change.rule = lock_type;
41 req->r_args.filelock_change.type = cmd; 66 req->r_args.filelock_change.type = cmd;
67 req->r_args.filelock_change.owner = cpu_to_le64(owner);
42 req->r_args.filelock_change.pid = cpu_to_le64((u64)fl->fl_pid); 68 req->r_args.filelock_change.pid = cpu_to_le64((u64)fl->fl_pid);
43 /* This should be adjusted, but I'm not sure if
44 namespaces actually get id numbers*/
45 req->r_args.filelock_change.pid_namespace =
46 cpu_to_le64((u64)(unsigned long)fl->fl_nspid);
47 req->r_args.filelock_change.start = cpu_to_le64(fl->fl_start); 69 req->r_args.filelock_change.start = cpu_to_le64(fl->fl_start);
48 req->r_args.filelock_change.length = cpu_to_le64(length); 70 req->r_args.filelock_change.length = cpu_to_le64(length);
49 req->r_args.filelock_change.wait = wait; 71 req->r_args.filelock_change.wait = wait;
50 72
51 err = ceph_mdsc_do_request(mdsc, inode, req); 73 err = ceph_mdsc_do_request(mdsc, inode, req);
52 74
53 if ( operation == CEPH_MDS_OP_GETFILELOCK){ 75 if (operation == CEPH_MDS_OP_GETFILELOCK) {
54 fl->fl_pid = le64_to_cpu(req->r_reply_info.filelock_reply->pid); 76 fl->fl_pid = le64_to_cpu(req->r_reply_info.filelock_reply->pid);
55 if (CEPH_LOCK_SHARED == req->r_reply_info.filelock_reply->type) 77 if (CEPH_LOCK_SHARED == req->r_reply_info.filelock_reply->type)
56 fl->fl_type = F_RDLCK; 78 fl->fl_type = F_RDLCK;
@@ -87,14 +109,19 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
87 u8 wait = 0; 109 u8 wait = 0;
88 u16 op = CEPH_MDS_OP_SETFILELOCK; 110 u16 op = CEPH_MDS_OP_SETFILELOCK;
89 111
90 fl->fl_nspid = get_pid(task_tgid(current)); 112 if (!(fl->fl_flags & FL_POSIX))
91 dout("ceph_lock, fl_pid:%d", fl->fl_pid); 113 return -ENOLCK;
114 /* No mandatory locks */
115 if (__mandatory_lock(file->f_mapping->host) && fl->fl_type != F_UNLCK)
116 return -ENOLCK;
117
118 dout("ceph_lock, fl_owner: %p", fl->fl_owner);
92 119
93 /* set wait bit as appropriate, then make command as Ceph expects it*/ 120 /* set wait bit as appropriate, then make command as Ceph expects it*/
94 if (F_SETLKW == cmd) 121 if (IS_GETLK(cmd))
95 wait = 1;
96 if (F_GETLK == cmd)
97 op = CEPH_MDS_OP_GETFILELOCK; 122 op = CEPH_MDS_OP_GETFILELOCK;
123 else if (IS_SETLKW(cmd))
124 wait = 1;
98 125
99 if (F_RDLCK == fl->fl_type) 126 if (F_RDLCK == fl->fl_type)
100 lock_cmd = CEPH_LOCK_SHARED; 127 lock_cmd = CEPH_LOCK_SHARED;
@@ -105,7 +132,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
105 132
106 err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, lock_cmd, wait, fl); 133 err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, lock_cmd, wait, fl);
107 if (!err) { 134 if (!err) {
108 if ( op != CEPH_MDS_OP_GETFILELOCK ){ 135 if (op != CEPH_MDS_OP_GETFILELOCK) {
109 dout("mds locked, locking locally"); 136 dout("mds locked, locking locally");
110 err = posix_lock_file(file, fl, NULL); 137 err = posix_lock_file(file, fl, NULL);
111 if (err && (CEPH_MDS_OP_SETFILELOCK == op)) { 138 if (err && (CEPH_MDS_OP_SETFILELOCK == op)) {
@@ -131,20 +158,22 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
131{ 158{
132 u8 lock_cmd; 159 u8 lock_cmd;
133 int err; 160 int err;
134 u8 wait = 1; 161 u8 wait = 0;
135 162
136 fl->fl_nspid = get_pid(task_tgid(current)); 163 if (!(fl->fl_flags & FL_FLOCK))
137 dout("ceph_flock, fl_pid:%d", fl->fl_pid); 164 return -ENOLCK;
138 165 /* No mandatory locks */
139 /* set wait bit, then clear it out of cmd*/ 166 if (__mandatory_lock(file->f_mapping->host) && fl->fl_type != F_UNLCK)
140 if (cmd & LOCK_NB) 167 return -ENOLCK;
141 wait = 0; 168
142 cmd = cmd & (LOCK_SH | LOCK_EX | LOCK_UN); 169 dout("ceph_flock, fl_file: %p", fl->fl_file);
143 /* set command sequence that Ceph wants to see: 170
144 shared lock, exclusive lock, or unlock */ 171 if (IS_SETLKW(cmd))
145 if (LOCK_SH == cmd) 172 wait = 1;
173
174 if (F_RDLCK == fl->fl_type)
146 lock_cmd = CEPH_LOCK_SHARED; 175 lock_cmd = CEPH_LOCK_SHARED;
147 else if (LOCK_EX == cmd) 176 else if (F_WRLCK == fl->fl_type)
148 lock_cmd = CEPH_LOCK_EXCL; 177 lock_cmd = CEPH_LOCK_EXCL;
149 else 178 else
150 lock_cmd = CEPH_LOCK_UNLOCK; 179 lock_cmd = CEPH_LOCK_UNLOCK;
@@ -280,13 +309,14 @@ int lock_to_ceph_filelock(struct file_lock *lock,
280 struct ceph_filelock *cephlock) 309 struct ceph_filelock *cephlock)
281{ 310{
282 int err = 0; 311 int err = 0;
283
284 cephlock->start = cpu_to_le64(lock->fl_start); 312 cephlock->start = cpu_to_le64(lock->fl_start);
285 cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1); 313 cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1);
286 cephlock->client = cpu_to_le64(0); 314 cephlock->client = cpu_to_le64(0);
287 cephlock->pid = cpu_to_le64(lock->fl_pid); 315 cephlock->pid = cpu_to_le64((u64)lock->fl_pid);
288 cephlock->pid_namespace = 316 if (lock->fl_flags & FL_POSIX)
289 cpu_to_le64((u64)(unsigned long)lock->fl_nspid); 317 cephlock->owner = cpu_to_le64(secure_addr(lock->fl_owner));
318 else
319 cephlock->owner = cpu_to_le64(secure_addr(lock->fl_file));
290 320
291 switch (lock->fl_type) { 321 switch (lock->fl_type) {
292 case F_RDLCK: 322 case F_RDLCK:
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index f4f050a69a48..2b4d093d0563 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -3,6 +3,7 @@
3#include <linux/fs.h> 3#include <linux/fs.h>
4#include <linux/wait.h> 4#include <linux/wait.h>
5#include <linux/slab.h> 5#include <linux/slab.h>
6#include <linux/gfp.h>
6#include <linux/sched.h> 7#include <linux/sched.h>
7#include <linux/debugfs.h> 8#include <linux/debugfs.h>
8#include <linux/seq_file.h> 9#include <linux/seq_file.h>
@@ -165,21 +166,18 @@ static int parse_reply_info_dir(void **p, void *end,
165 if (num == 0) 166 if (num == 0)
166 goto done; 167 goto done;
167 168
168 /* alloc large array */ 169 BUG_ON(!info->dir_in);
169 info->dir_nr = num;
170 info->dir_in = kcalloc(num, sizeof(*info->dir_in) +
171 sizeof(*info->dir_dname) +
172 sizeof(*info->dir_dname_len) +
173 sizeof(*info->dir_dlease),
174 GFP_NOFS);
175 if (info->dir_in == NULL) {
176 err = -ENOMEM;
177 goto out_bad;
178 }
179 info->dir_dname = (void *)(info->dir_in + num); 170 info->dir_dname = (void *)(info->dir_in + num);
180 info->dir_dname_len = (void *)(info->dir_dname + num); 171 info->dir_dname_len = (void *)(info->dir_dname + num);
181 info->dir_dlease = (void *)(info->dir_dname_len + num); 172 info->dir_dlease = (void *)(info->dir_dname_len + num);
173 if ((unsigned long)(info->dir_dlease + num) >
174 (unsigned long)info->dir_in + info->dir_buf_size) {
175 pr_err("dir contents are larger than expected\n");
176 WARN_ON(1);
177 goto bad;
178 }
182 179
180 info->dir_nr = num;
183 while (num) { 181 while (num) {
184 /* dentry */ 182 /* dentry */
185 ceph_decode_need(p, end, sizeof(u32)*2, bad); 183 ceph_decode_need(p, end, sizeof(u32)*2, bad);
@@ -327,7 +325,9 @@ out_bad:
327 325
328static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info) 326static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info)
329{ 327{
330 kfree(info->dir_in); 328 if (!info->dir_in)
329 return;
330 free_pages((unsigned long)info->dir_in, get_order(info->dir_buf_size));
331} 331}
332 332
333 333
@@ -512,12 +512,11 @@ void ceph_mdsc_release_request(struct kref *kref)
512 struct ceph_mds_request *req = container_of(kref, 512 struct ceph_mds_request *req = container_of(kref,
513 struct ceph_mds_request, 513 struct ceph_mds_request,
514 r_kref); 514 r_kref);
515 destroy_reply_info(&req->r_reply_info);
515 if (req->r_request) 516 if (req->r_request)
516 ceph_msg_put(req->r_request); 517 ceph_msg_put(req->r_request);
517 if (req->r_reply) { 518 if (req->r_reply)
518 ceph_msg_put(req->r_reply); 519 ceph_msg_put(req->r_reply);
519 destroy_reply_info(&req->r_reply_info);
520 }
521 if (req->r_inode) { 520 if (req->r_inode) {
522 ceph_put_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN); 521 ceph_put_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN);
523 iput(req->r_inode); 522 iput(req->r_inode);
@@ -528,7 +527,9 @@ void ceph_mdsc_release_request(struct kref *kref)
528 iput(req->r_target_inode); 527 iput(req->r_target_inode);
529 if (req->r_dentry) 528 if (req->r_dentry)
530 dput(req->r_dentry); 529 dput(req->r_dentry);
531 if (req->r_old_dentry) { 530 if (req->r_old_dentry)
531 dput(req->r_old_dentry);
532 if (req->r_old_dentry_dir) {
532 /* 533 /*
533 * track (and drop pins for) r_old_dentry_dir 534 * track (and drop pins for) r_old_dentry_dir
534 * separately, since r_old_dentry's d_parent may have 535 * separately, since r_old_dentry's d_parent may have
@@ -537,7 +538,6 @@ void ceph_mdsc_release_request(struct kref *kref)
537 */ 538 */
538 ceph_put_cap_refs(ceph_inode(req->r_old_dentry_dir), 539 ceph_put_cap_refs(ceph_inode(req->r_old_dentry_dir),
539 CEPH_CAP_PIN); 540 CEPH_CAP_PIN);
540 dput(req->r_old_dentry);
541 iput(req->r_old_dentry_dir); 541 iput(req->r_old_dentry_dir);
542 } 542 }
543 kfree(req->r_path1); 543 kfree(req->r_path1);
@@ -1311,6 +1311,9 @@ static int trim_caps(struct ceph_mds_client *mdsc,
1311 trim_caps - session->s_trim_caps); 1311 trim_caps - session->s_trim_caps);
1312 session->s_trim_caps = 0; 1312 session->s_trim_caps = 0;
1313 } 1313 }
1314
1315 ceph_add_cap_releases(mdsc, session);
1316 ceph_send_cap_releases(mdsc, session);
1314 return 0; 1317 return 0;
1315} 1318}
1316 1319
@@ -1461,15 +1464,18 @@ static void discard_cap_releases(struct ceph_mds_client *mdsc,
1461 1464
1462 dout("discard_cap_releases mds%d\n", session->s_mds); 1465 dout("discard_cap_releases mds%d\n", session->s_mds);
1463 1466
1464 /* zero out the in-progress message */ 1467 if (!list_empty(&session->s_cap_releases)) {
1465 msg = list_first_entry(&session->s_cap_releases, 1468 /* zero out the in-progress message */
1466 struct ceph_msg, list_head); 1469 msg = list_first_entry(&session->s_cap_releases,
1467 head = msg->front.iov_base; 1470 struct ceph_msg, list_head);
1468 num = le32_to_cpu(head->num); 1471 head = msg->front.iov_base;
1469 dout("discard_cap_releases mds%d %p %u\n", session->s_mds, msg, num); 1472 num = le32_to_cpu(head->num);
1470 head->num = cpu_to_le32(0); 1473 dout("discard_cap_releases mds%d %p %u\n",
1471 msg->front.iov_len = sizeof(*head); 1474 session->s_mds, msg, num);
1472 session->s_num_cap_releases += num; 1475 head->num = cpu_to_le32(0);
1476 msg->front.iov_len = sizeof(*head);
1477 session->s_num_cap_releases += num;
1478 }
1473 1479
1474 /* requeue completed messages */ 1480 /* requeue completed messages */
1475 while (!list_empty(&session->s_cap_releases_done)) { 1481 while (!list_empty(&session->s_cap_releases_done)) {
@@ -1492,6 +1498,43 @@ static void discard_cap_releases(struct ceph_mds_client *mdsc,
1492 * requests 1498 * requests
1493 */ 1499 */
1494 1500
1501int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req,
1502 struct inode *dir)
1503{
1504 struct ceph_inode_info *ci = ceph_inode(dir);
1505 struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
1506 struct ceph_mount_options *opt = req->r_mdsc->fsc->mount_options;
1507 size_t size = sizeof(*rinfo->dir_in) + sizeof(*rinfo->dir_dname_len) +
1508 sizeof(*rinfo->dir_dname) + sizeof(*rinfo->dir_dlease);
1509 int order, num_entries;
1510
1511 spin_lock(&ci->i_ceph_lock);
1512 num_entries = ci->i_files + ci->i_subdirs;
1513 spin_unlock(&ci->i_ceph_lock);
1514 num_entries = max(num_entries, 1);
1515 num_entries = min(num_entries, opt->max_readdir);
1516
1517 order = get_order(size * num_entries);
1518 while (order >= 0) {
1519 rinfo->dir_in = (void*)__get_free_pages(GFP_NOFS | __GFP_NOWARN,
1520 order);
1521 if (rinfo->dir_in)
1522 break;
1523 order--;
1524 }
1525 if (!rinfo->dir_in)
1526 return -ENOMEM;
1527
1528 num_entries = (PAGE_SIZE << order) / size;
1529 num_entries = min(num_entries, opt->max_readdir);
1530
1531 rinfo->dir_buf_size = PAGE_SIZE << order;
1532 req->r_num_caps = num_entries + 1;
1533 req->r_args.readdir.max_entries = cpu_to_le32(num_entries);
1534 req->r_args.readdir.max_bytes = cpu_to_le32(opt->max_readdir_bytes);
1535 return 0;
1536}
1537
1495/* 1538/*
1496 * Create an mds request. 1539 * Create an mds request.
1497 */ 1540 */
@@ -2053,7 +2096,7 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
2053 ceph_get_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN); 2096 ceph_get_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN);
2054 if (req->r_locked_dir) 2097 if (req->r_locked_dir)
2055 ceph_get_cap_refs(ceph_inode(req->r_locked_dir), CEPH_CAP_PIN); 2098 ceph_get_cap_refs(ceph_inode(req->r_locked_dir), CEPH_CAP_PIN);
2056 if (req->r_old_dentry) 2099 if (req->r_old_dentry_dir)
2057 ceph_get_cap_refs(ceph_inode(req->r_old_dentry_dir), 2100 ceph_get_cap_refs(ceph_inode(req->r_old_dentry_dir),
2058 CEPH_CAP_PIN); 2101 CEPH_CAP_PIN);
2059 2102
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 68288917c737..e90cfccf93bd 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -67,6 +67,7 @@ struct ceph_mds_reply_info_parsed {
67 /* for readdir results */ 67 /* for readdir results */
68 struct { 68 struct {
69 struct ceph_mds_reply_dirfrag *dir_dir; 69 struct ceph_mds_reply_dirfrag *dir_dir;
70 size_t dir_buf_size;
70 int dir_nr; 71 int dir_nr;
71 char **dir_dname; 72 char **dir_dname;
72 u32 *dir_dname_len; 73 u32 *dir_dname_len;
@@ -346,7 +347,8 @@ extern void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc,
346 struct dentry *dn); 347 struct dentry *dn);
347 348
348extern void ceph_invalidate_dir_request(struct ceph_mds_request *req); 349extern void ceph_invalidate_dir_request(struct ceph_mds_request *req);
349 350extern int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req,
351 struct inode *dir);
350extern struct ceph_mds_request * 352extern struct ceph_mds_request *
351ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode); 353ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode);
352extern void ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, 354extern void ceph_mdsc_submit_request(struct ceph_mds_client *mdsc,
diff --git a/fs/ceph/strings.c b/fs/ceph/strings.c
index 4440f447fd3f..51cc23e48111 100644
--- a/fs/ceph/strings.c
+++ b/fs/ceph/strings.c
@@ -54,6 +54,7 @@ const char *ceph_mds_op_name(int op)
54 case CEPH_MDS_OP_LOOKUPHASH: return "lookuphash"; 54 case CEPH_MDS_OP_LOOKUPHASH: return "lookuphash";
55 case CEPH_MDS_OP_LOOKUPPARENT: return "lookupparent"; 55 case CEPH_MDS_OP_LOOKUPPARENT: return "lookupparent";
56 case CEPH_MDS_OP_LOOKUPINO: return "lookupino"; 56 case CEPH_MDS_OP_LOOKUPINO: return "lookupino";
57 case CEPH_MDS_OP_LOOKUPNAME: return "lookupname";
57 case CEPH_MDS_OP_GETATTR: return "getattr"; 58 case CEPH_MDS_OP_GETATTR: return "getattr";
58 case CEPH_MDS_OP_SETXATTR: return "setxattr"; 59 case CEPH_MDS_OP_SETXATTR: return "setxattr";
59 case CEPH_MDS_OP_SETATTR: return "setattr"; 60 case CEPH_MDS_OP_SETATTR: return "setattr";
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 10a4ccbf38da..06150fd745ac 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -1026,6 +1026,7 @@ static int __init init_ceph(void)
1026 if (ret) 1026 if (ret)
1027 goto out; 1027 goto out;
1028 1028
1029 ceph_flock_init();
1029 ceph_xattr_init(); 1030 ceph_xattr_init();
1030 ret = register_filesystem(&ceph_fs_type); 1031 ret = register_filesystem(&ceph_fs_type);
1031 if (ret) 1032 if (ret)
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index d8801a95b685..7866cd05a6bb 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -577,7 +577,7 @@ struct ceph_file_info {
577 577
578 /* readdir: position within a frag */ 578 /* readdir: position within a frag */
579 unsigned offset; /* offset of last chunk, adjusted for . and .. */ 579 unsigned offset; /* offset of last chunk, adjusted for . and .. */
580 u64 next_offset; /* offset of next chunk (last_name's + 1) */ 580 unsigned next_offset; /* offset of next chunk (last_name's + 1) */
581 char *last_name; /* last entry in previous chunk */ 581 char *last_name; /* last entry in previous chunk */
582 struct dentry *dentry; /* next dentry (for dcache readdir) */ 582 struct dentry *dentry; /* next dentry (for dcache readdir) */
583 int dir_release_count; 583 int dir_release_count;
@@ -871,6 +871,7 @@ extern long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
871extern const struct export_operations ceph_export_ops; 871extern const struct export_operations ceph_export_ops;
872 872
873/* locks.c */ 873/* locks.c */
874extern __init void ceph_flock_init(void);
874extern int ceph_lock(struct file *file, int cmd, struct file_lock *fl); 875extern int ceph_lock(struct file *file, int cmd, struct file_lock *fl);
875extern int ceph_flock(struct file *file, int cmd, struct file_lock *fl); 876extern int ceph_flock(struct file *file, int cmd, struct file_lock *fl);
876extern void ceph_count_locks(struct inode *inode, int *p_num, int *f_num); 877extern void ceph_count_locks(struct inode *inode, int *p_num, int *f_num);
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index a55ec37378c6..c9c2b887381e 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -64,32 +64,48 @@ static bool ceph_vxattrcb_layout_exists(struct ceph_inode_info *ci)
64} 64}
65 65
66static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val, 66static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val,
67 size_t size) 67 size_t size)
68{ 68{
69 int ret; 69 int ret;
70 struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb); 70 struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
71 struct ceph_osd_client *osdc = &fsc->client->osdc; 71 struct ceph_osd_client *osdc = &fsc->client->osdc;
72 s64 pool = ceph_file_layout_pg_pool(ci->i_layout); 72 s64 pool = ceph_file_layout_pg_pool(ci->i_layout);
73 const char *pool_name; 73 const char *pool_name;
74 char buf[128];
74 75
75 dout("ceph_vxattrcb_layout %p\n", &ci->vfs_inode); 76 dout("ceph_vxattrcb_layout %p\n", &ci->vfs_inode);
76 down_read(&osdc->map_sem); 77 down_read(&osdc->map_sem);
77 pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool); 78 pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
78 if (pool_name) 79 if (pool_name) {
79 ret = snprintf(val, size, 80 size_t len = strlen(pool_name);
80 "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%s", 81 ret = snprintf(buf, sizeof(buf),
82 "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=",
81 (unsigned long long)ceph_file_layout_su(ci->i_layout), 83 (unsigned long long)ceph_file_layout_su(ci->i_layout),
82 (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout), 84 (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout),
83 (unsigned long long)ceph_file_layout_object_size(ci->i_layout), 85 (unsigned long long)ceph_file_layout_object_size(ci->i_layout));
84 pool_name); 86 if (!size) {
85 else 87 ret += len;
86 ret = snprintf(val, size, 88 } else if (ret + len > size) {
89 ret = -ERANGE;
90 } else {
91 memcpy(val, buf, ret);
92 memcpy(val + ret, pool_name, len);
93 ret += len;
94 }
95 } else {
96 ret = snprintf(buf, sizeof(buf),
87 "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%lld", 97 "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%lld",
88 (unsigned long long)ceph_file_layout_su(ci->i_layout), 98 (unsigned long long)ceph_file_layout_su(ci->i_layout),
89 (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout), 99 (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout),
90 (unsigned long long)ceph_file_layout_object_size(ci->i_layout), 100 (unsigned long long)ceph_file_layout_object_size(ci->i_layout),
91 (unsigned long long)pool); 101 (unsigned long long)pool);
92 102 if (size) {
103 if (ret <= size)
104 memcpy(val, buf, ret);
105 else
106 ret = -ERANGE;
107 }
108 }
93 up_read(&osdc->map_sem); 109 up_read(&osdc->map_sem);
94 return ret; 110 return ret;
95} 111}
@@ -215,7 +231,7 @@ static struct ceph_vxattr ceph_dir_vxattrs[] = {
215 .name_size = sizeof("ceph.dir.layout"), 231 .name_size = sizeof("ceph.dir.layout"),
216 .getxattr_cb = ceph_vxattrcb_layout, 232 .getxattr_cb = ceph_vxattrcb_layout,
217 .readonly = false, 233 .readonly = false,
218 .hidden = false, 234 .hidden = true,
219 .exists_cb = ceph_vxattrcb_layout_exists, 235 .exists_cb = ceph_vxattrcb_layout_exists,
220 }, 236 },
221 XATTR_LAYOUT_FIELD(dir, layout, stripe_unit), 237 XATTR_LAYOUT_FIELD(dir, layout, stripe_unit),
@@ -242,7 +258,7 @@ static struct ceph_vxattr ceph_file_vxattrs[] = {
242 .name_size = sizeof("ceph.file.layout"), 258 .name_size = sizeof("ceph.file.layout"),
243 .getxattr_cb = ceph_vxattrcb_layout, 259 .getxattr_cb = ceph_vxattrcb_layout,
244 .readonly = false, 260 .readonly = false,
245 .hidden = false, 261 .hidden = true,
246 .exists_cb = ceph_vxattrcb_layout_exists, 262 .exists_cb = ceph_vxattrcb_layout_exists,
247 }, 263 },
248 XATTR_LAYOUT_FIELD(file, layout, stripe_unit), 264 XATTR_LAYOUT_FIELD(file, layout, stripe_unit),
@@ -842,7 +858,6 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
842 struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); 858 struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
843 struct inode *inode = dentry->d_inode; 859 struct inode *inode = dentry->d_inode;
844 struct ceph_inode_info *ci = ceph_inode(inode); 860 struct ceph_inode_info *ci = ceph_inode(inode);
845 struct inode *parent_inode;
846 struct ceph_mds_request *req; 861 struct ceph_mds_request *req;
847 struct ceph_mds_client *mdsc = fsc->mdsc; 862 struct ceph_mds_client *mdsc = fsc->mdsc;
848 int err; 863 int err;
@@ -893,9 +908,7 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
893 req->r_data_len = size; 908 req->r_data_len = size;
894 909
895 dout("xattr.ver (before): %lld\n", ci->i_xattrs.version); 910 dout("xattr.ver (before): %lld\n", ci->i_xattrs.version);
896 parent_inode = ceph_get_dentry_parent_inode(dentry); 911 err = ceph_mdsc_do_request(mdsc, NULL, req);
897 err = ceph_mdsc_do_request(mdsc, parent_inode, req);
898 iput(parent_inode);
899 ceph_mdsc_put_request(req); 912 ceph_mdsc_put_request(req);
900 dout("xattr.ver (after): %lld\n", ci->i_xattrs.version); 913 dout("xattr.ver (after): %lld\n", ci->i_xattrs.version);
901 914
@@ -1019,7 +1032,6 @@ static int ceph_send_removexattr(struct dentry *dentry, const char *name)
1019 struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); 1032 struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
1020 struct ceph_mds_client *mdsc = fsc->mdsc; 1033 struct ceph_mds_client *mdsc = fsc->mdsc;
1021 struct inode *inode = dentry->d_inode; 1034 struct inode *inode = dentry->d_inode;
1022 struct inode *parent_inode;
1023 struct ceph_mds_request *req; 1035 struct ceph_mds_request *req;
1024 int err; 1036 int err;
1025 1037
@@ -1033,9 +1045,7 @@ static int ceph_send_removexattr(struct dentry *dentry, const char *name)
1033 req->r_num_caps = 1; 1045 req->r_num_caps = 1;
1034 req->r_path2 = kstrdup(name, GFP_NOFS); 1046 req->r_path2 = kstrdup(name, GFP_NOFS);
1035 1047
1036 parent_inode = ceph_get_dentry_parent_inode(dentry); 1048 err = ceph_mdsc_do_request(mdsc, NULL, req);
1037 err = ceph_mdsc_do_request(mdsc, parent_inode, req);
1038 iput(parent_inode);
1039 ceph_mdsc_put_request(req); 1049 ceph_mdsc_put_request(req);
1040 return err; 1050 return err;
1041} 1051}
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 834fce759d80..216d7e99f921 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -3113,6 +3113,7 @@ cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
3113 3113
3114static struct vm_operations_struct cifs_file_vm_ops = { 3114static struct vm_operations_struct cifs_file_vm_ops = {
3115 .fault = filemap_fault, 3115 .fault = filemap_fault,
3116 .map_pages = filemap_map_pages,
3116 .page_mkwrite = cifs_page_mkwrite, 3117 .page_mkwrite = cifs_page_mkwrite,
3117 .remap_pages = generic_file_remap_pages, 3118 .remap_pages = generic_file_remap_pages,
3118}; 3119};
diff --git a/fs/dcache.c b/fs/dcache.c
index 66cba5a8a346..40707d88a945 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -3144,6 +3144,7 @@ char *simple_dname(struct dentry *dentry, char *buffer, int buflen)
3144 end = ERR_PTR(-ENAMETOOLONG); 3144 end = ERR_PTR(-ENAMETOOLONG);
3145 return end; 3145 return end;
3146} 3146}
3147EXPORT_SYMBOL(simple_dname);
3147 3148
3148/* 3149/*
3149 * Write full pathname from the root of the filesystem into the buffer. 3150 * Write full pathname from the root of the filesystem into the buffer.
diff --git a/fs/exec.c b/fs/exec.c
index 25dfeba6d55f..9e81c630dfa7 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -26,6 +26,7 @@
26#include <linux/file.h> 26#include <linux/file.h>
27#include <linux/fdtable.h> 27#include <linux/fdtable.h>
28#include <linux/mm.h> 28#include <linux/mm.h>
29#include <linux/vmacache.h>
29#include <linux/stat.h> 30#include <linux/stat.h>
30#include <linux/fcntl.h> 31#include <linux/fcntl.h>
31#include <linux/swap.h> 32#include <linux/swap.h>
@@ -822,7 +823,7 @@ EXPORT_SYMBOL(read_code);
822static int exec_mmap(struct mm_struct *mm) 823static int exec_mmap(struct mm_struct *mm)
823{ 824{
824 struct task_struct *tsk; 825 struct task_struct *tsk;
825 struct mm_struct * old_mm, *active_mm; 826 struct mm_struct *old_mm, *active_mm;
826 827
827 /* Notify parent that we're no longer interested in the old VM */ 828 /* Notify parent that we're no longer interested in the old VM */
828 tsk = current; 829 tsk = current;
@@ -848,6 +849,8 @@ static int exec_mmap(struct mm_struct *mm)
848 tsk->mm = mm; 849 tsk->mm = mm;
849 tsk->active_mm = mm; 850 tsk->active_mm = mm;
850 activate_mm(active_mm, mm); 851 activate_mm(active_mm, mm);
852 tsk->mm->vmacache_seqnum = 0;
853 vmacache_flush(tsk);
851 task_unlock(tsk); 854 task_unlock(tsk);
852 if (old_mm) { 855 if (old_mm) {
853 up_read(&old_mm->mmap_sem); 856 up_read(&old_mm->mmap_sem);
@@ -1043,7 +1046,7 @@ EXPORT_SYMBOL_GPL(get_task_comm);
1043 * so that a new one can be started 1046 * so that a new one can be started
1044 */ 1047 */
1045 1048
1046void set_task_comm(struct task_struct *tsk, char *buf) 1049void set_task_comm(struct task_struct *tsk, const char *buf)
1047{ 1050{
1048 task_lock(tsk); 1051 task_lock(tsk);
1049 trace_task_rename(tsk, buf); 1052 trace_task_rename(tsk, buf);
@@ -1052,21 +1055,6 @@ void set_task_comm(struct task_struct *tsk, char *buf)
1052 perf_event_comm(tsk); 1055 perf_event_comm(tsk);
1053} 1056}
1054 1057
1055static void filename_to_taskname(char *tcomm, const char *fn, unsigned int len)
1056{
1057 int i, ch;
1058
1059 /* Copies the binary name from after last slash */
1060 for (i = 0; (ch = *(fn++)) != '\0';) {
1061 if (ch == '/')
1062 i = 0; /* overwrite what we wrote */
1063 else
1064 if (i < len - 1)
1065 tcomm[i++] = ch;
1066 }
1067 tcomm[i] = '\0';
1068}
1069
1070int flush_old_exec(struct linux_binprm * bprm) 1058int flush_old_exec(struct linux_binprm * bprm)
1071{ 1059{
1072 int retval; 1060 int retval;
@@ -1080,8 +1068,6 @@ int flush_old_exec(struct linux_binprm * bprm)
1080 goto out; 1068 goto out;
1081 1069
1082 set_mm_exe_file(bprm->mm, bprm->file); 1070 set_mm_exe_file(bprm->mm, bprm->file);
1083
1084 filename_to_taskname(bprm->tcomm, bprm->filename, sizeof(bprm->tcomm));
1085 /* 1071 /*
1086 * Release all of the old mmap stuff 1072 * Release all of the old mmap stuff
1087 */ 1073 */
@@ -1124,7 +1110,7 @@ void setup_new_exec(struct linux_binprm * bprm)
1124 else 1110 else
1125 set_dumpable(current->mm, suid_dumpable); 1111 set_dumpable(current->mm, suid_dumpable);
1126 1112
1127 set_task_comm(current, bprm->tcomm); 1113 set_task_comm(current, kbasename(bprm->filename));
1128 1114
1129 /* Set the new mm task size. We have to do that late because it may 1115 /* Set the new mm task size. We have to do that late because it may
1130 * depend on TIF_32BIT which is only updated in flush_thread() on 1116 * depend on TIF_32BIT which is only updated in flush_thread() on
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index 1b8001bbe947..27695e6f4e46 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -4,7 +4,6 @@
4 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de> 4 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
5 */ 5 */
6 6
7#include <linux/capability.h>
8#include <linux/init.h> 7#include <linux/init.h>
9#include <linux/sched.h> 8#include <linux/sched.h>
10#include <linux/slab.h> 9#include <linux/slab.h>
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index 7cadd823bb31..7d66fb0e4cca 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -284,7 +284,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
284 int best_ndir = inodes_per_group; 284 int best_ndir = inodes_per_group;
285 int best_group = -1; 285 int best_group = -1;
286 286
287 get_random_bytes(&group, sizeof(group)); 287 group = prandom_u32();
288 parent_group = (unsigned)group % ngroups; 288 parent_group = (unsigned)group % ngroups;
289 for (i = 0; i < ngroups; i++) { 289 for (i = 0; i < ngroups; i++) {
290 group = (parent_group + i) % ngroups; 290 group = (parent_group + i) % ngroups;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index d260115c0350..3750031cfa2f 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -192,7 +192,7 @@ static void init_once(void *foo)
192 inode_init_once(&ei->vfs_inode); 192 inode_init_once(&ei->vfs_inode);
193} 193}
194 194
195static int init_inodecache(void) 195static int __init init_inodecache(void)
196{ 196{
197 ext2_inode_cachep = kmem_cache_create("ext2_inode_cache", 197 ext2_inode_cachep = kmem_cache_create("ext2_inode_cache",
198 sizeof(struct ext2_inode_info), 198 sizeof(struct ext2_inode_info),
diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c
index cfedb2cb0d8c..c0ebc4db8849 100644
--- a/fs/ext2/xattr_security.c
+++ b/fs/ext2/xattr_security.c
@@ -42,8 +42,8 @@ ext2_xattr_security_set(struct dentry *dentry, const char *name,
42 value, size, flags); 42 value, size, flags);
43} 43}
44 44
45int ext2_initxattrs(struct inode *inode, const struct xattr *xattr_array, 45static int ext2_initxattrs(struct inode *inode, const struct xattr *xattr_array,
46 void *fs_info) 46 void *fs_info)
47{ 47{
48 const struct xattr *xattr; 48 const struct xattr *xattr;
49 int err = 0; 49 int err = 0;
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 22548f56197b..158b5d4ce067 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -1727,10 +1727,7 @@ allocated:
1727 percpu_counter_sub(&sbi->s_freeblocks_counter, num); 1727 percpu_counter_sub(&sbi->s_freeblocks_counter, num);
1728 1728
1729 BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor"); 1729 BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor");
1730 err = ext3_journal_dirty_metadata(handle, gdp_bh); 1730 fatal = ext3_journal_dirty_metadata(handle, gdp_bh);
1731 if (!fatal)
1732 fatal = err;
1733
1734 if (fatal) 1731 if (fatal)
1735 goto out; 1732 goto out;
1736 1733
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index e66e4808719f..17742eed2c16 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -275,7 +275,7 @@ static inline loff_t ext3_get_htree_eof(struct file *filp)
275 * NOTE: offsets obtained *before* ext3_set_inode_flag(dir, EXT3_INODE_INDEX) 275 * NOTE: offsets obtained *before* ext3_set_inode_flag(dir, EXT3_INODE_INDEX)
276 * will be invalid once the directory was converted into a dx directory 276 * will be invalid once the directory was converted into a dx directory
277 */ 277 */
278loff_t ext3_dir_llseek(struct file *file, loff_t offset, int whence) 278static loff_t ext3_dir_llseek(struct file *file, loff_t offset, int whence)
279{ 279{
280 struct inode *inode = file->f_mapping->host; 280 struct inode *inode = file->f_mapping->host;
281 int dx_dir = is_dx_dir(inode); 281 int dx_dir = is_dx_dir(inode);
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 082afd78b107..a1b810230cc5 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -215,7 +215,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
215 int best_ndir = inodes_per_group; 215 int best_ndir = inodes_per_group;
216 int best_group = -1; 216 int best_group = -1;
217 217
218 get_random_bytes(&group, sizeof(group)); 218 group = prandom_u32();
219 parent_group = (unsigned)group % ngroups; 219 parent_group = (unsigned)group % ngroups;
220 for (i = 0; i < ngroups; i++) { 220 for (i = 0; i < ngroups; i++) {
221 group = (parent_group + i) % ngroups; 221 group = (parent_group + i) % ngroups;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index efce2bbfb5e5..f5157d0d1b43 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1559,56 +1559,17 @@ static int buffer_unmapped(handle_t *handle, struct buffer_head *bh)
1559} 1559}
1560 1560
1561/* 1561/*
1562 * Note that we always start a transaction even if we're not journalling 1562 * Note that whenever we need to map blocks we start a transaction even if
1563 * data. This is to preserve ordering: any hole instantiation within 1563 * we're not journalling data. This is to preserve ordering: any hole
1564 * __block_write_full_page -> ext3_get_block() should be journalled 1564 * instantiation within __block_write_full_page -> ext3_get_block() should be
1565 * along with the data so we don't crash and then get metadata which 1565 * journalled along with the data so we don't crash and then get metadata which
1566 * refers to old data. 1566 * refers to old data.
1567 * 1567 *
1568 * In all journalling modes block_write_full_page() will start the I/O. 1568 * In all journalling modes block_write_full_page() will start the I/O.
1569 * 1569 *
1570 * Problem:
1571 *
1572 * ext3_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() ->
1573 * ext3_writepage()
1574 *
1575 * Similar for:
1576 *
1577 * ext3_file_write() -> generic_file_write() -> __alloc_pages() -> ...
1578 *
1579 * Same applies to ext3_get_block(). We will deadlock on various things like
1580 * lock_journal and i_truncate_mutex.
1581 *
1582 * Setting PF_MEMALLOC here doesn't work - too many internal memory
1583 * allocations fail.
1584 *
1585 * 16May01: If we're reentered then journal_current_handle() will be
1586 * non-zero. We simply *return*.
1587 *
1588 * 1 July 2001: @@@ FIXME:
1589 * In journalled data mode, a data buffer may be metadata against the
1590 * current transaction. But the same file is part of a shared mapping
1591 * and someone does a writepage() on it.
1592 *
1593 * We will move the buffer onto the async_data list, but *after* it has
1594 * been dirtied. So there's a small window where we have dirty data on
1595 * BJ_Metadata.
1596 *
1597 * Note that this only applies to the last partial page in the file. The
1598 * bit which block_write_full_page() uses prepare/commit for. (That's
1599 * broken code anyway: it's wrong for msync()).
1600 *
1601 * It's a rare case: affects the final partial page, for journalled data
1602 * where the file is subject to bith write() and writepage() in the same
1603 * transction. To fix it we'll need a custom block_write_full_page().
1604 * We'll probably need that anyway for journalling writepage() output.
1605 *
1606 * We don't honour synchronous mounts for writepage(). That would be 1570 * We don't honour synchronous mounts for writepage(). That would be
1607 * disastrous. Any write() or metadata operation will sync the fs for 1571 * disastrous. Any write() or metadata operation will sync the fs for
1608 * us. 1572 * us.
1609 *
1610 * AKPM2: if all the page's buffers are mapped to disk and !data=journal,
1611 * we don't need to open a transaction here.
1612 */ 1573 */
1613static int ext3_ordered_writepage(struct page *page, 1574static int ext3_ordered_writepage(struct page *page,
1614 struct writeback_control *wbc) 1575 struct writeback_control *wbc)
@@ -1673,12 +1634,9 @@ static int ext3_ordered_writepage(struct page *page,
1673 * block_write_full_page() succeeded. Otherwise they are unmapped, 1634 * block_write_full_page() succeeded. Otherwise they are unmapped,
1674 * and generally junk. 1635 * and generally junk.
1675 */ 1636 */
1676 if (ret == 0) { 1637 if (ret == 0)
1677 err = walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE, 1638 ret = walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE,
1678 NULL, journal_dirty_data_fn); 1639 NULL, journal_dirty_data_fn);
1679 if (!ret)
1680 ret = err;
1681 }
1682 walk_page_buffers(handle, page_bufs, 0, 1640 walk_page_buffers(handle, page_bufs, 0,
1683 PAGE_CACHE_SIZE, NULL, bput_one); 1641 PAGE_CACHE_SIZE, NULL, bput_one);
1684 err = ext3_journal_stop(handle); 1642 err = ext3_journal_stop(handle);
@@ -1925,6 +1883,8 @@ retry:
1925 * and pretend the write failed... */ 1883 * and pretend the write failed... */
1926 ext3_truncate_failed_direct_write(inode); 1884 ext3_truncate_failed_direct_write(inode);
1927 ret = PTR_ERR(handle); 1885 ret = PTR_ERR(handle);
1886 if (inode->i_nlink)
1887 ext3_orphan_del(NULL, inode);
1928 goto out; 1888 goto out;
1929 } 1889 }
1930 if (inode->i_nlink) 1890 if (inode->i_nlink)
@@ -3212,21 +3172,20 @@ out_brelse:
3212 * 3172 *
3213 * We are called from a few places: 3173 * We are called from a few places:
3214 * 3174 *
3215 * - Within generic_file_write() for O_SYNC files. 3175 * - Within generic_file_aio_write() -> generic_write_sync() for O_SYNC files.
3216 * Here, there will be no transaction running. We wait for any running 3176 * Here, there will be no transaction running. We wait for any running
3217 * transaction to commit. 3177 * transaction to commit.
3218 * 3178 *
3219 * - Within sys_sync(), kupdate and such. 3179 * - Within flush work (for sys_sync(), kupdate and such).
3220 * We wait on commit, if tol to. 3180 * We wait on commit, if told to.
3221 * 3181 *
3222 * - Within prune_icache() (PF_MEMALLOC == true) 3182 * - Within iput_final() -> write_inode_now()
3223 * Here we simply return. We can't afford to block kswapd on the 3183 * We wait on commit, if told to.
3224 * journal commit.
3225 * 3184 *
3226 * In all cases it is actually safe for us to return without doing anything, 3185 * In all cases it is actually safe for us to return without doing anything,
3227 * because the inode has been copied into a raw inode buffer in 3186 * because the inode has been copied into a raw inode buffer in
3228 * ext3_mark_inode_dirty(). This is a correctness thing for O_SYNC and for 3187 * ext3_mark_inode_dirty(). This is a correctness thing for WB_SYNC_ALL
3229 * knfsd. 3188 * writeback.
3230 * 3189 *
3231 * Note that we are absolutely dependent upon all inode dirtiers doing the 3190 * Note that we are absolutely dependent upon all inode dirtiers doing the
3232 * right thing: they *must* call mark_inode_dirty() after dirtying info in 3191 * right thing: they *must* call mark_inode_dirty() after dirtying info in
@@ -3238,13 +3197,13 @@ out_brelse:
3238 * stuff(); 3197 * stuff();
3239 * inode->i_size = expr; 3198 * inode->i_size = expr;
3240 * 3199 *
3241 * is in error because a kswapd-driven write_inode() could occur while 3200 * is in error because write_inode() could occur while `stuff()' is running,
3242 * `stuff()' is running, and the new i_size will be lost. Plus the inode 3201 * and the new i_size will be lost. Plus the inode will no longer be on the
3243 * will no longer be on the superblock's dirty inode list. 3202 * superblock's dirty inode list.
3244 */ 3203 */
3245int ext3_write_inode(struct inode *inode, struct writeback_control *wbc) 3204int ext3_write_inode(struct inode *inode, struct writeback_control *wbc)
3246{ 3205{
3247 if (current->flags & PF_MEMALLOC) 3206 if (WARN_ON_ONCE(current->flags & PF_MEMALLOC))
3248 return 0; 3207 return 0;
3249 3208
3250 if (ext3_journal_current_handle()) { 3209 if (ext3_journal_current_handle()) {
@@ -3253,7 +3212,12 @@ int ext3_write_inode(struct inode *inode, struct writeback_control *wbc)
3253 return -EIO; 3212 return -EIO;
3254 } 3213 }
3255 3214
3256 if (wbc->sync_mode != WB_SYNC_ALL) 3215 /*
3216 * No need to force transaction in WB_SYNC_NONE mode. Also
3217 * ext3_sync_fs() will force the commit after everything is
3218 * written.
3219 */
3220 if (wbc->sync_mode != WB_SYNC_ALL || wbc->for_sync)
3257 return 0; 3221 return 0;
3258 3222
3259 return ext3_force_commit(inode->i_sb); 3223 return ext3_force_commit(inode->i_sb);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 95c6c5a6d0c5..08cdfe5461e3 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -527,7 +527,7 @@ static void init_once(void *foo)
527 inode_init_once(&ei->vfs_inode); 527 inode_init_once(&ei->vfs_inode);
528} 528}
529 529
530static int init_inodecache(void) 530static int __init init_inodecache(void)
531{ 531{
532 ext3_inode_cachep = kmem_cache_create("ext3_inode_cache", 532 ext3_inode_cachep = kmem_cache_create("ext3_inode_cache",
533 sizeof(struct ext3_inode_info), 533 sizeof(struct ext3_inode_info),
diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c
index 3387664ad70e..722c2bf9645d 100644
--- a/fs/ext3/xattr_security.c
+++ b/fs/ext3/xattr_security.c
@@ -43,8 +43,9 @@ ext3_xattr_security_set(struct dentry *dentry, const char *name,
43 name, value, size, flags); 43 name, value, size, flags);
44} 44}
45 45
46int ext3_initxattrs(struct inode *inode, const struct xattr *xattr_array, 46static int ext3_initxattrs(struct inode *inode,
47 void *fs_info) 47 const struct xattr *xattr_array,
48 void *fs_info)
48{ 49{
49 const struct xattr *xattr; 50 const struct xattr *xattr;
50 handle_t *handle = fs_info; 51 handle_t *handle = fs_info;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 6db7f7db7777..4e508fc83dcf 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -200,6 +200,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
200 200
201static const struct vm_operations_struct ext4_file_vm_ops = { 201static const struct vm_operations_struct ext4_file_vm_ops = {
202 .fault = filemap_fault, 202 .fault = filemap_fault,
203 .map_pages = filemap_map_pages,
203 .page_mkwrite = ext4_page_mkwrite, 204 .page_mkwrite = ext4_page_mkwrite,
204 .remap_pages = generic_file_remap_pages, 205 .remap_pages = generic_file_remap_pages,
205}; 206};
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index fa8da4cb8c4b..e93e4ec7d165 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -174,7 +174,7 @@ struct posix_acl *f2fs_get_acl(struct inode *inode, int type)
174 174
175 retval = f2fs_getxattr(inode, name_index, "", NULL, 0); 175 retval = f2fs_getxattr(inode, name_index, "", NULL, 0);
176 if (retval > 0) { 176 if (retval > 0) {
177 value = kmalloc(retval, GFP_KERNEL); 177 value = kmalloc(retval, GFP_F2FS_ZERO);
178 if (!value) 178 if (!value)
179 return ERR_PTR(-ENOMEM); 179 return ERR_PTR(-ENOMEM);
180 retval = f2fs_getxattr(inode, name_index, "", value, retval); 180 retval = f2fs_getxattr(inode, name_index, "", value, retval);
@@ -203,6 +203,12 @@ static int __f2fs_set_acl(struct inode *inode, int type,
203 size_t size = 0; 203 size_t size = 0;
204 int error; 204 int error;
205 205
206 if (acl) {
207 error = posix_acl_valid(acl);
208 if (error < 0)
209 return error;
210 }
211
206 switch (type) { 212 switch (type) {
207 case ACL_TYPE_ACCESS: 213 case ACL_TYPE_ACCESS:
208 name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS; 214 name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS;
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 293d0486a40f..4aa521aa9bc3 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -33,14 +33,12 @@ struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
33 struct address_space *mapping = META_MAPPING(sbi); 33 struct address_space *mapping = META_MAPPING(sbi);
34 struct page *page = NULL; 34 struct page *page = NULL;
35repeat: 35repeat:
36 page = grab_cache_page(mapping, index); 36 page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS);
37 if (!page) { 37 if (!page) {
38 cond_resched(); 38 cond_resched();
39 goto repeat; 39 goto repeat;
40 } 40 }
41 41
42 /* We wait writeback only inside grab_meta_page() */
43 wait_on_page_writeback(page);
44 SetPageUptodate(page); 42 SetPageUptodate(page);
45 return page; 43 return page;
46} 44}
@@ -75,23 +73,102 @@ out:
75 return page; 73 return page;
76} 74}
77 75
76inline int get_max_meta_blks(struct f2fs_sb_info *sbi, int type)
77{
78 switch (type) {
79 case META_NAT:
80 return NM_I(sbi)->max_nid / NAT_ENTRY_PER_BLOCK;
81 case META_SIT:
82 return SIT_BLK_CNT(sbi);
83 case META_SSA:
84 case META_CP:
85 return 0;
86 default:
87 BUG();
88 }
89}
90
91/*
92 * Readahead CP/NAT/SIT/SSA pages
93 */
94int ra_meta_pages(struct f2fs_sb_info *sbi, int start, int nrpages, int type)
95{
96 block_t prev_blk_addr = 0;
97 struct page *page;
98 int blkno = start;
99 int max_blks = get_max_meta_blks(sbi, type);
100
101 struct f2fs_io_info fio = {
102 .type = META,
103 .rw = READ_SYNC | REQ_META | REQ_PRIO
104 };
105
106 for (; nrpages-- > 0; blkno++) {
107 block_t blk_addr;
108
109 switch (type) {
110 case META_NAT:
111 /* get nat block addr */
112 if (unlikely(blkno >= max_blks))
113 blkno = 0;
114 blk_addr = current_nat_addr(sbi,
115 blkno * NAT_ENTRY_PER_BLOCK);
116 break;
117 case META_SIT:
118 /* get sit block addr */
119 if (unlikely(blkno >= max_blks))
120 goto out;
121 blk_addr = current_sit_addr(sbi,
122 blkno * SIT_ENTRY_PER_BLOCK);
123 if (blkno != start && prev_blk_addr + 1 != blk_addr)
124 goto out;
125 prev_blk_addr = blk_addr;
126 break;
127 case META_SSA:
128 case META_CP:
129 /* get ssa/cp block addr */
130 blk_addr = blkno;
131 break;
132 default:
133 BUG();
134 }
135
136 page = grab_cache_page(META_MAPPING(sbi), blk_addr);
137 if (!page)
138 continue;
139 if (PageUptodate(page)) {
140 mark_page_accessed(page);
141 f2fs_put_page(page, 1);
142 continue;
143 }
144
145 f2fs_submit_page_mbio(sbi, page, blk_addr, &fio);
146 mark_page_accessed(page);
147 f2fs_put_page(page, 0);
148 }
149out:
150 f2fs_submit_merged_bio(sbi, META, READ);
151 return blkno - start;
152}
153
78static int f2fs_write_meta_page(struct page *page, 154static int f2fs_write_meta_page(struct page *page,
79 struct writeback_control *wbc) 155 struct writeback_control *wbc)
80{ 156{
81 struct inode *inode = page->mapping->host; 157 struct inode *inode = page->mapping->host;
82 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 158 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
83 159
84 /* Should not write any meta pages, if any IO error was occurred */ 160 if (unlikely(sbi->por_doing))
85 if (unlikely(sbi->por_doing ||
86 is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)))
87 goto redirty_out; 161 goto redirty_out;
88
89 if (wbc->for_reclaim) 162 if (wbc->for_reclaim)
90 goto redirty_out; 163 goto redirty_out;
91 164
92 wait_on_page_writeback(page); 165 /* Should not write any meta pages, if any IO error was occurred */
166 if (unlikely(is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)))
167 goto no_write;
93 168
169 f2fs_wait_on_page_writeback(page, META);
94 write_meta_page(sbi, page); 170 write_meta_page(sbi, page);
171no_write:
95 dec_page_count(sbi, F2FS_DIRTY_META); 172 dec_page_count(sbi, F2FS_DIRTY_META);
96 unlock_page(page); 173 unlock_page(page);
97 return 0; 174 return 0;
@@ -99,6 +176,7 @@ static int f2fs_write_meta_page(struct page *page,
99redirty_out: 176redirty_out:
100 dec_page_count(sbi, F2FS_DIRTY_META); 177 dec_page_count(sbi, F2FS_DIRTY_META);
101 wbc->pages_skipped++; 178 wbc->pages_skipped++;
179 account_page_redirty(page);
102 set_page_dirty(page); 180 set_page_dirty(page);
103 return AOP_WRITEPAGE_ACTIVATE; 181 return AOP_WRITEPAGE_ACTIVATE;
104} 182}
@@ -107,21 +185,23 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
107 struct writeback_control *wbc) 185 struct writeback_control *wbc)
108{ 186{
109 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); 187 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
110 int nrpages = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); 188 long diff, written;
111 long written;
112
113 if (wbc->for_kupdate)
114 return 0;
115 189
116 /* collect a number of dirty meta pages and write together */ 190 /* collect a number of dirty meta pages and write together */
117 if (get_pages(sbi, F2FS_DIRTY_META) < nrpages) 191 if (wbc->for_kupdate ||
118 return 0; 192 get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META))
193 goto skip_write;
119 194
120 /* if mounting is failed, skip writing node pages */ 195 /* if mounting is failed, skip writing node pages */
121 mutex_lock(&sbi->cp_mutex); 196 mutex_lock(&sbi->cp_mutex);
122 written = sync_meta_pages(sbi, META, nrpages); 197 diff = nr_pages_to_write(sbi, META, wbc);
198 written = sync_meta_pages(sbi, META, wbc->nr_to_write);
123 mutex_unlock(&sbi->cp_mutex); 199 mutex_unlock(&sbi->cp_mutex);
124 wbc->nr_to_write -= written; 200 wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff);
201 return 0;
202
203skip_write:
204 wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_META);
125 return 0; 205 return 0;
126} 206}
127 207
@@ -148,10 +228,22 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
148 228
149 for (i = 0; i < nr_pages; i++) { 229 for (i = 0; i < nr_pages; i++) {
150 struct page *page = pvec.pages[i]; 230 struct page *page = pvec.pages[i];
231
151 lock_page(page); 232 lock_page(page);
152 f2fs_bug_on(page->mapping != mapping); 233
153 f2fs_bug_on(!PageDirty(page)); 234 if (unlikely(page->mapping != mapping)) {
154 clear_page_dirty_for_io(page); 235continue_unlock:
236 unlock_page(page);
237 continue;
238 }
239 if (!PageDirty(page)) {
240 /* someone wrote it for us */
241 goto continue_unlock;
242 }
243
244 if (!clear_page_dirty_for_io(page))
245 goto continue_unlock;
246
155 if (f2fs_write_meta_page(page, &wbc)) { 247 if (f2fs_write_meta_page(page, &wbc)) {
156 unlock_page(page); 248 unlock_page(page);
157 break; 249 break;
@@ -216,16 +308,15 @@ void release_orphan_inode(struct f2fs_sb_info *sbi)
216 308
217void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) 309void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
218{ 310{
219 struct list_head *head, *this; 311 struct list_head *head;
220 struct orphan_inode_entry *new = NULL, *orphan = NULL; 312 struct orphan_inode_entry *new, *orphan;
221 313
222 new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC); 314 new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC);
223 new->ino = ino; 315 new->ino = ino;
224 316
225 spin_lock(&sbi->orphan_inode_lock); 317 spin_lock(&sbi->orphan_inode_lock);
226 head = &sbi->orphan_inode_list; 318 head = &sbi->orphan_inode_list;
227 list_for_each(this, head) { 319 list_for_each_entry(orphan, head, list) {
228 orphan = list_entry(this, struct orphan_inode_entry, list);
229 if (orphan->ino == ino) { 320 if (orphan->ino == ino) {
230 spin_unlock(&sbi->orphan_inode_lock); 321 spin_unlock(&sbi->orphan_inode_lock);
231 kmem_cache_free(orphan_entry_slab, new); 322 kmem_cache_free(orphan_entry_slab, new);
@@ -234,14 +325,10 @@ void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
234 325
235 if (orphan->ino > ino) 326 if (orphan->ino > ino)
236 break; 327 break;
237 orphan = NULL;
238 } 328 }
239 329
240 /* add new_oentry into list which is sorted by inode number */ 330 /* add new orphan entry into list which is sorted by inode number */
241 if (orphan) 331 list_add_tail(&new->list, &orphan->list);
242 list_add(&new->list, this->prev);
243 else
244 list_add_tail(&new->list, head);
245 spin_unlock(&sbi->orphan_inode_lock); 332 spin_unlock(&sbi->orphan_inode_lock);
246} 333}
247 334
@@ -255,10 +342,11 @@ void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
255 list_for_each_entry(orphan, head, list) { 342 list_for_each_entry(orphan, head, list) {
256 if (orphan->ino == ino) { 343 if (orphan->ino == ino) {
257 list_del(&orphan->list); 344 list_del(&orphan->list);
258 kmem_cache_free(orphan_entry_slab, orphan);
259 f2fs_bug_on(sbi->n_orphans == 0); 345 f2fs_bug_on(sbi->n_orphans == 0);
260 sbi->n_orphans--; 346 sbi->n_orphans--;
261 break; 347 spin_unlock(&sbi->orphan_inode_lock);
348 kmem_cache_free(orphan_entry_slab, orphan);
349 return;
262 } 350 }
263 } 351 }
264 spin_unlock(&sbi->orphan_inode_lock); 352 spin_unlock(&sbi->orphan_inode_lock);
@@ -285,6 +373,8 @@ void recover_orphan_inodes(struct f2fs_sb_info *sbi)
285 start_blk = __start_cp_addr(sbi) + 1; 373 start_blk = __start_cp_addr(sbi) + 1;
286 orphan_blkaddr = __start_sum_addr(sbi) - 1; 374 orphan_blkaddr = __start_sum_addr(sbi) - 1;
287 375
376 ra_meta_pages(sbi, start_blk, orphan_blkaddr, META_CP);
377
288 for (i = 0; i < orphan_blkaddr; i++) { 378 for (i = 0; i < orphan_blkaddr; i++) {
289 struct page *page = get_meta_page(sbi, start_blk + i); 379 struct page *page = get_meta_page(sbi, start_blk + i);
290 struct f2fs_orphan_block *orphan_blk; 380 struct f2fs_orphan_block *orphan_blk;
@@ -466,14 +556,12 @@ static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new)
466{ 556{
467 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 557 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
468 struct list_head *head = &sbi->dir_inode_list; 558 struct list_head *head = &sbi->dir_inode_list;
469 struct list_head *this; 559 struct dir_inode_entry *entry;
470 560
471 list_for_each(this, head) { 561 list_for_each_entry(entry, head, list)
472 struct dir_inode_entry *entry;
473 entry = list_entry(this, struct dir_inode_entry, list);
474 if (unlikely(entry->inode == inode)) 562 if (unlikely(entry->inode == inode))
475 return -EEXIST; 563 return -EEXIST;
476 } 564
477 list_add_tail(&new->list, head); 565 list_add_tail(&new->list, head);
478 stat_inc_dirty_dir(sbi); 566 stat_inc_dirty_dir(sbi);
479 return 0; 567 return 0;
@@ -483,6 +571,7 @@ void set_dirty_dir_page(struct inode *inode, struct page *page)
483{ 571{
484 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 572 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
485 struct dir_inode_entry *new; 573 struct dir_inode_entry *new;
574 int ret = 0;
486 575
487 if (!S_ISDIR(inode->i_mode)) 576 if (!S_ISDIR(inode->i_mode))
488 return; 577 return;
@@ -492,13 +581,13 @@ void set_dirty_dir_page(struct inode *inode, struct page *page)
492 INIT_LIST_HEAD(&new->list); 581 INIT_LIST_HEAD(&new->list);
493 582
494 spin_lock(&sbi->dir_inode_lock); 583 spin_lock(&sbi->dir_inode_lock);
495 if (__add_dirty_inode(inode, new)) 584 ret = __add_dirty_inode(inode, new);
496 kmem_cache_free(inode_entry_slab, new);
497
498 inc_page_count(sbi, F2FS_DIRTY_DENTS);
499 inode_inc_dirty_dents(inode); 585 inode_inc_dirty_dents(inode);
500 SetPagePrivate(page); 586 SetPagePrivate(page);
501 spin_unlock(&sbi->dir_inode_lock); 587 spin_unlock(&sbi->dir_inode_lock);
588
589 if (ret)
590 kmem_cache_free(inode_entry_slab, new);
502} 591}
503 592
504void add_dirty_dir_inode(struct inode *inode) 593void add_dirty_dir_inode(struct inode *inode)
@@ -506,44 +595,47 @@ void add_dirty_dir_inode(struct inode *inode)
506 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 595 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
507 struct dir_inode_entry *new = 596 struct dir_inode_entry *new =
508 f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS); 597 f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
598 int ret = 0;
509 599
510 new->inode = inode; 600 new->inode = inode;
511 INIT_LIST_HEAD(&new->list); 601 INIT_LIST_HEAD(&new->list);
512 602
513 spin_lock(&sbi->dir_inode_lock); 603 spin_lock(&sbi->dir_inode_lock);
514 if (__add_dirty_inode(inode, new)) 604 ret = __add_dirty_inode(inode, new);
515 kmem_cache_free(inode_entry_slab, new);
516 spin_unlock(&sbi->dir_inode_lock); 605 spin_unlock(&sbi->dir_inode_lock);
606
607 if (ret)
608 kmem_cache_free(inode_entry_slab, new);
517} 609}
518 610
519void remove_dirty_dir_inode(struct inode *inode) 611void remove_dirty_dir_inode(struct inode *inode)
520{ 612{
521 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 613 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
522 614 struct list_head *head;
523 struct list_head *this, *head; 615 struct dir_inode_entry *entry;
524 616
525 if (!S_ISDIR(inode->i_mode)) 617 if (!S_ISDIR(inode->i_mode))
526 return; 618 return;
527 619
528 spin_lock(&sbi->dir_inode_lock); 620 spin_lock(&sbi->dir_inode_lock);
529 if (atomic_read(&F2FS_I(inode)->dirty_dents)) { 621 if (get_dirty_dents(inode)) {
530 spin_unlock(&sbi->dir_inode_lock); 622 spin_unlock(&sbi->dir_inode_lock);
531 return; 623 return;
532 } 624 }
533 625
534 head = &sbi->dir_inode_list; 626 head = &sbi->dir_inode_list;
535 list_for_each(this, head) { 627 list_for_each_entry(entry, head, list) {
536 struct dir_inode_entry *entry;
537 entry = list_entry(this, struct dir_inode_entry, list);
538 if (entry->inode == inode) { 628 if (entry->inode == inode) {
539 list_del(&entry->list); 629 list_del(&entry->list);
540 kmem_cache_free(inode_entry_slab, entry);
541 stat_dec_dirty_dir(sbi); 630 stat_dec_dirty_dir(sbi);
542 break; 631 spin_unlock(&sbi->dir_inode_lock);
632 kmem_cache_free(inode_entry_slab, entry);
633 goto done;
543 } 634 }
544 } 635 }
545 spin_unlock(&sbi->dir_inode_lock); 636 spin_unlock(&sbi->dir_inode_lock);
546 637
638done:
547 /* Only from the recovery routine */ 639 /* Only from the recovery routine */
548 if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) { 640 if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) {
549 clear_inode_flag(F2FS_I(inode), FI_DELAY_IPUT); 641 clear_inode_flag(F2FS_I(inode), FI_DELAY_IPUT);
@@ -554,15 +646,14 @@ void remove_dirty_dir_inode(struct inode *inode)
554struct inode *check_dirty_dir_inode(struct f2fs_sb_info *sbi, nid_t ino) 646struct inode *check_dirty_dir_inode(struct f2fs_sb_info *sbi, nid_t ino)
555{ 647{
556 648
557 struct list_head *this, *head; 649 struct list_head *head;
558 struct inode *inode = NULL; 650 struct inode *inode = NULL;
651 struct dir_inode_entry *entry;
559 652
560 spin_lock(&sbi->dir_inode_lock); 653 spin_lock(&sbi->dir_inode_lock);
561 654
562 head = &sbi->dir_inode_list; 655 head = &sbi->dir_inode_list;
563 list_for_each(this, head) { 656 list_for_each_entry(entry, head, list) {
564 struct dir_inode_entry *entry;
565 entry = list_entry(this, struct dir_inode_entry, list);
566 if (entry->inode->i_ino == ino) { 657 if (entry->inode->i_ino == ino) {
567 inode = entry->inode; 658 inode = entry->inode;
568 break; 659 break;
@@ -589,7 +680,7 @@ retry:
589 inode = igrab(entry->inode); 680 inode = igrab(entry->inode);
590 spin_unlock(&sbi->dir_inode_lock); 681 spin_unlock(&sbi->dir_inode_lock);
591 if (inode) { 682 if (inode) {
592 filemap_flush(inode->i_mapping); 683 filemap_fdatawrite(inode->i_mapping);
593 iput(inode); 684 iput(inode);
594 } else { 685 } else {
595 /* 686 /*
@@ -824,6 +915,7 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
824 unblock_operations(sbi); 915 unblock_operations(sbi);
825 mutex_unlock(&sbi->cp_mutex); 916 mutex_unlock(&sbi->cp_mutex);
826 917
918 stat_inc_cp_count(sbi->stat_info);
827 trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint"); 919 trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint");
828} 920}
829 921
@@ -845,11 +937,11 @@ void init_orphan_info(struct f2fs_sb_info *sbi)
845int __init create_checkpoint_caches(void) 937int __init create_checkpoint_caches(void)
846{ 938{
847 orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry", 939 orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry",
848 sizeof(struct orphan_inode_entry), NULL); 940 sizeof(struct orphan_inode_entry));
849 if (!orphan_entry_slab) 941 if (!orphan_entry_slab)
850 return -ENOMEM; 942 return -ENOMEM;
851 inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry", 943 inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry",
852 sizeof(struct dir_inode_entry), NULL); 944 sizeof(struct dir_inode_entry));
853 if (!inode_entry_slab) { 945 if (!inode_entry_slab) {
854 kmem_cache_destroy(orphan_entry_slab); 946 kmem_cache_destroy(orphan_entry_slab);
855 return -ENOMEM; 947 return -ENOMEM;
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 2261ccdd0b5f..45abd60e2bff 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -45,7 +45,7 @@ static void f2fs_read_end_io(struct bio *bio, int err)
45 45
46static void f2fs_write_end_io(struct bio *bio, int err) 46static void f2fs_write_end_io(struct bio *bio, int err)
47{ 47{
48 struct f2fs_sb_info *sbi = F2FS_SB(bio->bi_io_vec->bv_page->mapping->host->i_sb); 48 struct f2fs_sb_info *sbi = bio->bi_private;
49 struct bio_vec *bvec; 49 struct bio_vec *bvec;
50 int i; 50 int i;
51 51
@@ -55,15 +55,16 @@ static void f2fs_write_end_io(struct bio *bio, int err)
55 if (unlikely(err)) { 55 if (unlikely(err)) {
56 SetPageError(page); 56 SetPageError(page);
57 set_bit(AS_EIO, &page->mapping->flags); 57 set_bit(AS_EIO, &page->mapping->flags);
58 set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); 58 f2fs_stop_checkpoint(sbi);
59 sbi->sb->s_flags |= MS_RDONLY;
60 } 59 }
61 end_page_writeback(page); 60 end_page_writeback(page);
62 dec_page_count(sbi, F2FS_WRITEBACK); 61 dec_page_count(sbi, F2FS_WRITEBACK);
63 } 62 }
64 63
65 if (bio->bi_private) 64 if (sbi->wait_io) {
66 complete(bio->bi_private); 65 complete(sbi->wait_io);
66 sbi->wait_io = NULL;
67 }
67 68
68 if (!get_pages(sbi, F2FS_WRITEBACK) && 69 if (!get_pages(sbi, F2FS_WRITEBACK) &&
69 !list_empty(&sbi->cp_wait.task_list)) 70 !list_empty(&sbi->cp_wait.task_list))
@@ -86,6 +87,7 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
86 bio->bi_bdev = sbi->sb->s_bdev; 87 bio->bi_bdev = sbi->sb->s_bdev;
87 bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); 88 bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr);
88 bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io; 89 bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io;
90 bio->bi_private = sbi;
89 91
90 return bio; 92 return bio;
91} 93}
@@ -113,7 +115,7 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
113 */ 115 */
114 if (fio->type == META_FLUSH) { 116 if (fio->type == META_FLUSH) {
115 DECLARE_COMPLETION_ONSTACK(wait); 117 DECLARE_COMPLETION_ONSTACK(wait);
116 io->bio->bi_private = &wait; 118 io->sbi->wait_io = &wait;
117 submit_bio(rw, io->bio); 119 submit_bio(rw, io->bio);
118 wait_for_completion(&wait); 120 wait_for_completion(&wait);
119 } else { 121 } else {
@@ -132,7 +134,7 @@ void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
132 134
133 io = is_read_io(rw) ? &sbi->read_io : &sbi->write_io[btype]; 135 io = is_read_io(rw) ? &sbi->read_io : &sbi->write_io[btype];
134 136
135 mutex_lock(&io->io_mutex); 137 down_write(&io->io_rwsem);
136 138
137 /* change META to META_FLUSH in the checkpoint procedure */ 139 /* change META to META_FLUSH in the checkpoint procedure */
138 if (type >= META_FLUSH) { 140 if (type >= META_FLUSH) {
@@ -140,7 +142,7 @@ void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
140 io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO; 142 io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO;
141 } 143 }
142 __submit_merged_bio(io); 144 __submit_merged_bio(io);
143 mutex_unlock(&io->io_mutex); 145 up_write(&io->io_rwsem);
144} 146}
145 147
146/* 148/*
@@ -178,7 +180,7 @@ void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page,
178 180
179 verify_block_addr(sbi, blk_addr); 181 verify_block_addr(sbi, blk_addr);
180 182
181 mutex_lock(&io->io_mutex); 183 down_write(&io->io_rwsem);
182 184
183 if (!is_read) 185 if (!is_read)
184 inc_page_count(sbi, F2FS_WRITEBACK); 186 inc_page_count(sbi, F2FS_WRITEBACK);
@@ -202,7 +204,7 @@ alloc_new:
202 204
203 io->last_block_in_bio = blk_addr; 205 io->last_block_in_bio = blk_addr;
204 206
205 mutex_unlock(&io->io_mutex); 207 up_write(&io->io_rwsem);
206 trace_f2fs_submit_page_mbio(page, fio->rw, fio->type, blk_addr); 208 trace_f2fs_submit_page_mbio(page, fio->rw, fio->type, blk_addr);
207} 209}
208 210
@@ -797,48 +799,36 @@ static int f2fs_write_data_page(struct page *page,
797 */ 799 */
798 offset = i_size & (PAGE_CACHE_SIZE - 1); 800 offset = i_size & (PAGE_CACHE_SIZE - 1);
799 if ((page->index >= end_index + 1) || !offset) { 801 if ((page->index >= end_index + 1) || !offset) {
800 if (S_ISDIR(inode->i_mode)) { 802 inode_dec_dirty_dents(inode);
801 dec_page_count(sbi, F2FS_DIRTY_DENTS);
802 inode_dec_dirty_dents(inode);
803 }
804 goto out; 803 goto out;
805 } 804 }
806 805
807 zero_user_segment(page, offset, PAGE_CACHE_SIZE); 806 zero_user_segment(page, offset, PAGE_CACHE_SIZE);
808write: 807write:
809 if (unlikely(sbi->por_doing)) { 808 if (unlikely(sbi->por_doing))
810 err = AOP_WRITEPAGE_ACTIVATE;
811 goto redirty_out; 809 goto redirty_out;
812 }
813 810
814 /* Dentry blocks are controlled by checkpoint */ 811 /* Dentry blocks are controlled by checkpoint */
815 if (S_ISDIR(inode->i_mode)) { 812 if (S_ISDIR(inode->i_mode)) {
816 dec_page_count(sbi, F2FS_DIRTY_DENTS);
817 inode_dec_dirty_dents(inode); 813 inode_dec_dirty_dents(inode);
818 err = do_write_data_page(page, &fio); 814 err = do_write_data_page(page, &fio);
819 } else { 815 goto done;
820 f2fs_lock_op(sbi); 816 }
821
822 if (f2fs_has_inline_data(inode) || f2fs_may_inline(inode)) {
823 err = f2fs_write_inline_data(inode, page, offset);
824 f2fs_unlock_op(sbi);
825 goto out;
826 } else {
827 err = do_write_data_page(page, &fio);
828 }
829 817
830 f2fs_unlock_op(sbi); 818 if (!wbc->for_reclaim)
831 need_balance_fs = true; 819 need_balance_fs = true;
832 } 820 else if (has_not_enough_free_secs(sbi, 0))
833 if (err == -ENOENT)
834 goto out;
835 else if (err)
836 goto redirty_out; 821 goto redirty_out;
837 822
838 if (wbc->for_reclaim) { 823 f2fs_lock_op(sbi);
839 f2fs_submit_merged_bio(sbi, DATA, WRITE); 824 if (f2fs_has_inline_data(inode) || f2fs_may_inline(inode))
840 need_balance_fs = false; 825 err = f2fs_write_inline_data(inode, page, offset);
841 } 826 else
827 err = do_write_data_page(page, &fio);
828 f2fs_unlock_op(sbi);
829done:
830 if (err && err != -ENOENT)
831 goto redirty_out;
842 832
843 clear_cold_data(page); 833 clear_cold_data(page);
844out: 834out:
@@ -849,12 +839,11 @@ out:
849 839
850redirty_out: 840redirty_out:
851 wbc->pages_skipped++; 841 wbc->pages_skipped++;
842 account_page_redirty(page);
852 set_page_dirty(page); 843 set_page_dirty(page);
853 return err; 844 return AOP_WRITEPAGE_ACTIVATE;
854} 845}
855 846
856#define MAX_DESIRED_PAGES_WP 4096
857
858static int __f2fs_writepage(struct page *page, struct writeback_control *wbc, 847static int __f2fs_writepage(struct page *page, struct writeback_control *wbc,
859 void *data) 848 void *data)
860{ 849{
@@ -871,17 +860,17 @@ static int f2fs_write_data_pages(struct address_space *mapping,
871 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 860 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
872 bool locked = false; 861 bool locked = false;
873 int ret; 862 int ret;
874 long excess_nrtw = 0, desired_nrtw; 863 long diff;
875 864
876 /* deal with chardevs and other special file */ 865 /* deal with chardevs and other special file */
877 if (!mapping->a_ops->writepage) 866 if (!mapping->a_ops->writepage)
878 return 0; 867 return 0;
879 868
880 if (wbc->nr_to_write < MAX_DESIRED_PAGES_WP) { 869 if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE &&
881 desired_nrtw = MAX_DESIRED_PAGES_WP; 870 get_dirty_dents(inode) < nr_pages_to_skip(sbi, DATA))
882 excess_nrtw = desired_nrtw - wbc->nr_to_write; 871 goto skip_write;
883 wbc->nr_to_write = desired_nrtw; 872
884 } 873 diff = nr_pages_to_write(sbi, DATA, wbc);
885 874
886 if (!S_ISDIR(inode->i_mode)) { 875 if (!S_ISDIR(inode->i_mode)) {
887 mutex_lock(&sbi->writepages); 876 mutex_lock(&sbi->writepages);
@@ -895,8 +884,12 @@ static int f2fs_write_data_pages(struct address_space *mapping,
895 884
896 remove_dirty_dir_inode(inode); 885 remove_dirty_dir_inode(inode);
897 886
898 wbc->nr_to_write -= excess_nrtw; 887 wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
899 return ret; 888 return ret;
889
890skip_write:
891 wbc->pages_skipped += get_dirty_dents(inode);
892 return 0;
900} 893}
901 894
902static int f2fs_write_begin(struct file *file, struct address_space *mapping, 895static int f2fs_write_begin(struct file *file, struct address_space *mapping,
@@ -949,13 +942,19 @@ inline_data:
949 if (dn.data_blkaddr == NEW_ADDR) { 942 if (dn.data_blkaddr == NEW_ADDR) {
950 zero_user_segment(page, 0, PAGE_CACHE_SIZE); 943 zero_user_segment(page, 0, PAGE_CACHE_SIZE);
951 } else { 944 } else {
952 if (f2fs_has_inline_data(inode)) 945 if (f2fs_has_inline_data(inode)) {
953 err = f2fs_read_inline_data(inode, page); 946 err = f2fs_read_inline_data(inode, page);
954 else 947 if (err) {
948 page_cache_release(page);
949 return err;
950 }
951 } else {
955 err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, 952 err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,
956 READ_SYNC); 953 READ_SYNC);
957 if (err) 954 if (err)
958 return err; 955 return err;
956 }
957
959 lock_page(page); 958 lock_page(page);
960 if (unlikely(!PageUptodate(page))) { 959 if (unlikely(!PageUptodate(page))) {
961 f2fs_put_page(page, 1); 960 f2fs_put_page(page, 1);
@@ -1031,11 +1030,8 @@ static void f2fs_invalidate_data_page(struct page *page, unsigned int offset,
1031 unsigned int length) 1030 unsigned int length)
1032{ 1031{
1033 struct inode *inode = page->mapping->host; 1032 struct inode *inode = page->mapping->host;
1034 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 1033 if (PageDirty(page))
1035 if (S_ISDIR(inode->i_mode) && PageDirty(page)) {
1036 dec_page_count(sbi, F2FS_DIRTY_DENTS);
1037 inode_dec_dirty_dents(inode); 1034 inode_dec_dirty_dents(inode);
1038 }
1039 ClearPagePrivate(page); 1035 ClearPagePrivate(page);
1040} 1036}
1041 1037
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 3de9d20d0c14..b52c12cf5873 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -86,7 +86,6 @@ static void update_sit_info(struct f2fs_sb_info *sbi)
86{ 86{
87 struct f2fs_stat_info *si = F2FS_STAT(sbi); 87 struct f2fs_stat_info *si = F2FS_STAT(sbi);
88 unsigned int blks_per_sec, hblks_per_sec, total_vblocks, bimodal, dist; 88 unsigned int blks_per_sec, hblks_per_sec, total_vblocks, bimodal, dist;
89 struct sit_info *sit_i = SIT_I(sbi);
90 unsigned int segno, vblocks; 89 unsigned int segno, vblocks;
91 int ndirty = 0; 90 int ndirty = 0;
92 91
@@ -94,7 +93,6 @@ static void update_sit_info(struct f2fs_sb_info *sbi)
94 total_vblocks = 0; 93 total_vblocks = 0;
95 blks_per_sec = sbi->segs_per_sec * (1 << sbi->log_blocks_per_seg); 94 blks_per_sec = sbi->segs_per_sec * (1 << sbi->log_blocks_per_seg);
96 hblks_per_sec = blks_per_sec / 2; 95 hblks_per_sec = blks_per_sec / 2;
97 mutex_lock(&sit_i->sentry_lock);
98 for (segno = 0; segno < TOTAL_SEGS(sbi); segno += sbi->segs_per_sec) { 96 for (segno = 0; segno < TOTAL_SEGS(sbi); segno += sbi->segs_per_sec) {
99 vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec); 97 vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec);
100 dist = abs(vblocks - hblks_per_sec); 98 dist = abs(vblocks - hblks_per_sec);
@@ -105,7 +103,6 @@ static void update_sit_info(struct f2fs_sb_info *sbi)
105 ndirty++; 103 ndirty++;
106 } 104 }
107 } 105 }
108 mutex_unlock(&sit_i->sentry_lock);
109 dist = TOTAL_SECS(sbi) * hblks_per_sec * hblks_per_sec / 100; 106 dist = TOTAL_SECS(sbi) * hblks_per_sec * hblks_per_sec / 100;
110 si->bimodal = bimodal / dist; 107 si->bimodal = bimodal / dist;
111 if (si->dirty_count) 108 if (si->dirty_count)
@@ -236,6 +233,7 @@ static int stat_show(struct seq_file *s, void *v)
236 si->dirty_count); 233 si->dirty_count);
237 seq_printf(s, " - Prefree: %d\n - Free: %d (%d)\n\n", 234 seq_printf(s, " - Prefree: %d\n - Free: %d (%d)\n\n",
238 si->prefree_count, si->free_segs, si->free_secs); 235 si->prefree_count, si->free_segs, si->free_secs);
236 seq_printf(s, "CP calls: %d\n", si->cp_count);
239 seq_printf(s, "GC calls: %d (BG: %d)\n", 237 seq_printf(s, "GC calls: %d (BG: %d)\n",
240 si->call_count, si->bg_gc); 238 si->call_count, si->bg_gc);
241 seq_printf(s, " - data segments : %d\n", si->data_segs); 239 seq_printf(s, " - data segments : %d\n", si->data_segs);
@@ -252,10 +250,10 @@ static int stat_show(struct seq_file *s, void *v)
252 si->ndirty_dent, si->ndirty_dirs); 250 si->ndirty_dent, si->ndirty_dirs);
253 seq_printf(s, " - meta: %4d in %4d\n", 251 seq_printf(s, " - meta: %4d in %4d\n",
254 si->ndirty_meta, si->meta_pages); 252 si->ndirty_meta, si->meta_pages);
255 seq_printf(s, " - NATs: %5d > %lu\n", 253 seq_printf(s, " - NATs: %9d\n - SITs: %9d\n",
256 si->nats, NM_WOUT_THRESHOLD); 254 si->nats, si->sits);
257 seq_printf(s, " - SITs: %5d\n - free_nids: %5d\n", 255 seq_printf(s, " - free_nids: %9d\n",
258 si->sits, si->fnids); 256 si->fnids);
259 seq_puts(s, "\nDistribution of User Blocks:"); 257 seq_puts(s, "\nDistribution of User Blocks:");
260 seq_puts(s, " [ valid | invalid | free ]\n"); 258 seq_puts(s, " [ valid | invalid | free ]\n");
261 seq_puts(s, " ["); 259 seq_puts(s, " [");
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 2b7c255bcbdf..972fd0ef230f 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -21,12 +21,12 @@ static unsigned long dir_blocks(struct inode *inode)
21 >> PAGE_CACHE_SHIFT; 21 >> PAGE_CACHE_SHIFT;
22} 22}
23 23
24static unsigned int dir_buckets(unsigned int level) 24static unsigned int dir_buckets(unsigned int level, int dir_level)
25{ 25{
26 if (level < MAX_DIR_HASH_DEPTH / 2) 26 if (level < MAX_DIR_HASH_DEPTH / 2)
27 return 1 << level; 27 return 1 << (level + dir_level);
28 else 28 else
29 return 1 << ((MAX_DIR_HASH_DEPTH / 2) - 1); 29 return 1 << ((MAX_DIR_HASH_DEPTH / 2 + dir_level) - 1);
30} 30}
31 31
32static unsigned int bucket_blocks(unsigned int level) 32static unsigned int bucket_blocks(unsigned int level)
@@ -65,13 +65,14 @@ static void set_de_type(struct f2fs_dir_entry *de, struct inode *inode)
65 de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT]; 65 de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT];
66} 66}
67 67
68static unsigned long dir_block_index(unsigned int level, unsigned int idx) 68static unsigned long dir_block_index(unsigned int level,
69 int dir_level, unsigned int idx)
69{ 70{
70 unsigned long i; 71 unsigned long i;
71 unsigned long bidx = 0; 72 unsigned long bidx = 0;
72 73
73 for (i = 0; i < level; i++) 74 for (i = 0; i < level; i++)
74 bidx += dir_buckets(i) * bucket_blocks(i); 75 bidx += dir_buckets(i, dir_level) * bucket_blocks(i);
75 bidx += idx * bucket_blocks(level); 76 bidx += idx * bucket_blocks(level);
76 return bidx; 77 return bidx;
77} 78}
@@ -93,16 +94,21 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
93 f2fs_hash_t namehash, struct page **res_page) 94 f2fs_hash_t namehash, struct page **res_page)
94{ 95{
95 struct f2fs_dir_entry *de; 96 struct f2fs_dir_entry *de;
96 unsigned long bit_pos, end_pos, next_pos; 97 unsigned long bit_pos = 0;
97 struct f2fs_dentry_block *dentry_blk = kmap(dentry_page); 98 struct f2fs_dentry_block *dentry_blk = kmap(dentry_page);
98 int slots; 99 const void *dentry_bits = &dentry_blk->dentry_bitmap;
100 int max_len = 0;
99 101
100 bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
101 NR_DENTRY_IN_BLOCK, 0);
102 while (bit_pos < NR_DENTRY_IN_BLOCK) { 102 while (bit_pos < NR_DENTRY_IN_BLOCK) {
103 if (!test_bit_le(bit_pos, dentry_bits)) {
104 if (bit_pos == 0)
105 max_len = 1;
106 else if (!test_bit_le(bit_pos - 1, dentry_bits))
107 max_len++;
108 bit_pos++;
109 continue;
110 }
103 de = &dentry_blk->dentry[bit_pos]; 111 de = &dentry_blk->dentry[bit_pos];
104 slots = GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
105
106 if (early_match_name(name, namelen, namehash, de)) { 112 if (early_match_name(name, namelen, namehash, de)) {
107 if (!memcmp(dentry_blk->filename[bit_pos], 113 if (!memcmp(dentry_blk->filename[bit_pos],
108 name, namelen)) { 114 name, namelen)) {
@@ -110,20 +116,18 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
110 goto found; 116 goto found;
111 } 117 }
112 } 118 }
113 next_pos = bit_pos + slots; 119 if (max_len > *max_slots) {
114 bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, 120 *max_slots = max_len;
115 NR_DENTRY_IN_BLOCK, next_pos); 121 max_len = 0;
116 if (bit_pos >= NR_DENTRY_IN_BLOCK) 122 }
117 end_pos = NR_DENTRY_IN_BLOCK; 123 bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
118 else
119 end_pos = bit_pos;
120 if (*max_slots < end_pos - next_pos)
121 *max_slots = end_pos - next_pos;
122 } 124 }
123 125
124 de = NULL; 126 de = NULL;
125 kunmap(dentry_page); 127 kunmap(dentry_page);
126found: 128found:
129 if (max_len > *max_slots)
130 *max_slots = max_len;
127 return de; 131 return de;
128} 132}
129 133
@@ -141,10 +145,11 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
141 145
142 f2fs_bug_on(level > MAX_DIR_HASH_DEPTH); 146 f2fs_bug_on(level > MAX_DIR_HASH_DEPTH);
143 147
144 nbucket = dir_buckets(level); 148 nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level);
145 nblock = bucket_blocks(level); 149 nblock = bucket_blocks(level);
146 150
147 bidx = dir_block_index(level, le32_to_cpu(namehash) % nbucket); 151 bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level,
152 le32_to_cpu(namehash) % nbucket);
148 end_block = bidx + nblock; 153 end_block = bidx + nblock;
149 154
150 for (; bidx < end_block; bidx++) { 155 for (; bidx < end_block; bidx++) {
@@ -248,7 +253,7 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
248 struct page *page, struct inode *inode) 253 struct page *page, struct inode *inode)
249{ 254{
250 lock_page(page); 255 lock_page(page);
251 wait_on_page_writeback(page); 256 f2fs_wait_on_page_writeback(page, DATA);
252 de->ino = cpu_to_le32(inode->i_ino); 257 de->ino = cpu_to_le32(inode->i_ino);
253 set_de_type(de, inode); 258 set_de_type(de, inode);
254 kunmap(page); 259 kunmap(page);
@@ -347,14 +352,11 @@ static struct page *init_inode_metadata(struct inode *inode,
347 err = f2fs_init_security(inode, dir, name, page); 352 err = f2fs_init_security(inode, dir, name, page);
348 if (err) 353 if (err)
349 goto put_error; 354 goto put_error;
350
351 wait_on_page_writeback(page);
352 } else { 355 } else {
353 page = get_node_page(F2FS_SB(dir->i_sb), inode->i_ino); 356 page = get_node_page(F2FS_SB(dir->i_sb), inode->i_ino);
354 if (IS_ERR(page)) 357 if (IS_ERR(page))
355 return page; 358 return page;
356 359
357 wait_on_page_writeback(page);
358 set_cold_node(inode, page); 360 set_cold_node(inode, page);
359 } 361 }
360 362
@@ -372,6 +374,10 @@ static struct page *init_inode_metadata(struct inode *inode,
372 374
373put_error: 375put_error:
374 f2fs_put_page(page, 1); 376 f2fs_put_page(page, 1);
377 /* once the failed inode becomes a bad inode, i_mode is S_IFREG */
378 truncate_inode_pages(&inode->i_data, 0);
379 truncate_blocks(inode, 0);
380 remove_dirty_dir_inode(inode);
375error: 381error:
376 remove_inode_page(inode); 382 remove_inode_page(inode);
377 return ERR_PTR(err); 383 return ERR_PTR(err);
@@ -395,9 +401,6 @@ static void update_parent_metadata(struct inode *dir, struct inode *inode,
395 set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); 401 set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
396 } 402 }
397 403
398 if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR))
399 update_inode_page(dir);
400
401 if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) 404 if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK))
402 clear_inode_flag(F2FS_I(inode), FI_INC_LINK); 405 clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
403} 406}
@@ -464,10 +467,11 @@ start:
464 if (level == current_depth) 467 if (level == current_depth)
465 ++current_depth; 468 ++current_depth;
466 469
467 nbucket = dir_buckets(level); 470 nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level);
468 nblock = bucket_blocks(level); 471 nblock = bucket_blocks(level);
469 472
470 bidx = dir_block_index(level, (le32_to_cpu(dentry_hash) % nbucket)); 473 bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level,
474 (le32_to_cpu(dentry_hash) % nbucket));
471 475
472 for (block = bidx; block <= (bidx + nblock - 1); block++) { 476 for (block = bidx; block <= (bidx + nblock - 1); block++) {
473 dentry_page = get_new_data_page(dir, NULL, block, true); 477 dentry_page = get_new_data_page(dir, NULL, block, true);
@@ -487,8 +491,9 @@ start:
487 ++level; 491 ++level;
488 goto start; 492 goto start;
489add_dentry: 493add_dentry:
490 wait_on_page_writeback(dentry_page); 494 f2fs_wait_on_page_writeback(dentry_page, DATA);
491 495
496 down_write(&F2FS_I(inode)->i_sem);
492 page = init_inode_metadata(inode, dir, name); 497 page = init_inode_metadata(inode, dir, name);
493 if (IS_ERR(page)) { 498 if (IS_ERR(page)) {
494 err = PTR_ERR(page); 499 err = PTR_ERR(page);
@@ -511,7 +516,12 @@ add_dentry:
511 516
512 update_parent_metadata(dir, inode, current_depth); 517 update_parent_metadata(dir, inode, current_depth);
513fail: 518fail:
514 clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); 519 up_write(&F2FS_I(inode)->i_sem);
520
521 if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) {
522 update_inode_page(dir);
523 clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
524 }
515 kunmap(dentry_page); 525 kunmap(dentry_page);
516 f2fs_put_page(dentry_page, 1); 526 f2fs_put_page(dentry_page, 1);
517 return err; 527 return err;
@@ -528,13 +538,12 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
528 unsigned int bit_pos; 538 unsigned int bit_pos;
529 struct address_space *mapping = page->mapping; 539 struct address_space *mapping = page->mapping;
530 struct inode *dir = mapping->host; 540 struct inode *dir = mapping->host;
531 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
532 int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); 541 int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len));
533 void *kaddr = page_address(page); 542 void *kaddr = page_address(page);
534 int i; 543 int i;
535 544
536 lock_page(page); 545 lock_page(page);
537 wait_on_page_writeback(page); 546 f2fs_wait_on_page_writeback(page, DATA);
538 547
539 dentry_blk = (struct f2fs_dentry_block *)kaddr; 548 dentry_blk = (struct f2fs_dentry_block *)kaddr;
540 bit_pos = dentry - (struct f2fs_dir_entry *)dentry_blk->dentry; 549 bit_pos = dentry - (struct f2fs_dir_entry *)dentry_blk->dentry;
@@ -551,6 +560,10 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
551 dir->i_ctime = dir->i_mtime = CURRENT_TIME; 560 dir->i_ctime = dir->i_mtime = CURRENT_TIME;
552 561
553 if (inode) { 562 if (inode) {
563 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
564
565 down_write(&F2FS_I(inode)->i_sem);
566
554 if (S_ISDIR(inode->i_mode)) { 567 if (S_ISDIR(inode->i_mode)) {
555 drop_nlink(dir); 568 drop_nlink(dir);
556 update_inode_page(dir); 569 update_inode_page(dir);
@@ -561,6 +574,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
561 drop_nlink(inode); 574 drop_nlink(inode);
562 i_size_write(inode, 0); 575 i_size_write(inode, 0);
563 } 576 }
577 up_write(&F2FS_I(inode)->i_sem);
564 update_inode_page(inode); 578 update_inode_page(inode);
565 579
566 if (inode->i_nlink == 0) 580 if (inode->i_nlink == 0)
@@ -573,7 +587,6 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
573 truncate_hole(dir, page->index, page->index + 1); 587 truncate_hole(dir, page->index, page->index + 1);
574 clear_page_dirty_for_io(page); 588 clear_page_dirty_for_io(page);
575 ClearPageUptodate(page); 589 ClearPageUptodate(page);
576 dec_page_count(sbi, F2FS_DIRTY_DENTS);
577 inode_dec_dirty_dents(dir); 590 inode_dec_dirty_dents(dir);
578 } 591 }
579 f2fs_put_page(page, 1); 592 f2fs_put_page(page, 1);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index fc3c558cb4f3..2ecac8312359 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -40,6 +40,7 @@
40#define F2FS_MOUNT_DISABLE_EXT_IDENTIFY 0x00000040 40#define F2FS_MOUNT_DISABLE_EXT_IDENTIFY 0x00000040
41#define F2FS_MOUNT_INLINE_XATTR 0x00000080 41#define F2FS_MOUNT_INLINE_XATTR 0x00000080
42#define F2FS_MOUNT_INLINE_DATA 0x00000100 42#define F2FS_MOUNT_INLINE_DATA 0x00000100
43#define F2FS_MOUNT_FLUSH_MERGE 0x00000200
43 44
44#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) 45#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option)
45#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) 46#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option)
@@ -88,6 +89,16 @@ enum {
88 SIT_BITMAP 89 SIT_BITMAP
89}; 90};
90 91
92/*
93 * For CP/NAT/SIT/SSA readahead
94 */
95enum {
96 META_CP,
97 META_NAT,
98 META_SIT,
99 META_SSA
100};
101
91/* for the list of orphan inodes */ 102/* for the list of orphan inodes */
92struct orphan_inode_entry { 103struct orphan_inode_entry {
93 struct list_head list; /* list head */ 104 struct list_head list; /* list head */
@@ -187,16 +198,20 @@ struct extent_info {
187#define FADVISE_COLD_BIT 0x01 198#define FADVISE_COLD_BIT 0x01
188#define FADVISE_LOST_PINO_BIT 0x02 199#define FADVISE_LOST_PINO_BIT 0x02
189 200
201#define DEF_DIR_LEVEL 0
202
190struct f2fs_inode_info { 203struct f2fs_inode_info {
191 struct inode vfs_inode; /* serve a vfs inode */ 204 struct inode vfs_inode; /* serve a vfs inode */
192 unsigned long i_flags; /* keep an inode flags for ioctl */ 205 unsigned long i_flags; /* keep an inode flags for ioctl */
193 unsigned char i_advise; /* use to give file attribute hints */ 206 unsigned char i_advise; /* use to give file attribute hints */
207 unsigned char i_dir_level; /* use for dentry level for large dir */
194 unsigned int i_current_depth; /* use only in directory structure */ 208 unsigned int i_current_depth; /* use only in directory structure */
195 unsigned int i_pino; /* parent inode number */ 209 unsigned int i_pino; /* parent inode number */
196 umode_t i_acl_mode; /* keep file acl mode temporarily */ 210 umode_t i_acl_mode; /* keep file acl mode temporarily */
197 211
198 /* Use below internally in f2fs*/ 212 /* Use below internally in f2fs*/
199 unsigned long flags; /* use to pass per-file flags */ 213 unsigned long flags; /* use to pass per-file flags */
214 struct rw_semaphore i_sem; /* protect fi info */
200 atomic_t dirty_dents; /* # of dirty dentry pages */ 215 atomic_t dirty_dents; /* # of dirty dentry pages */
201 f2fs_hash_t chash; /* hash value of given file name */ 216 f2fs_hash_t chash; /* hash value of given file name */
202 unsigned int clevel; /* maximum level of given file name */ 217 unsigned int clevel; /* maximum level of given file name */
@@ -229,6 +244,7 @@ struct f2fs_nm_info {
229 block_t nat_blkaddr; /* base disk address of NAT */ 244 block_t nat_blkaddr; /* base disk address of NAT */
230 nid_t max_nid; /* maximum possible node ids */ 245 nid_t max_nid; /* maximum possible node ids */
231 nid_t next_scan_nid; /* the next nid to be scanned */ 246 nid_t next_scan_nid; /* the next nid to be scanned */
247 unsigned int ram_thresh; /* control the memory footprint */
232 248
233 /* NAT cache management */ 249 /* NAT cache management */
234 struct radix_tree_root nat_root;/* root of the nat entry cache */ 250 struct radix_tree_root nat_root;/* root of the nat entry cache */
@@ -238,6 +254,7 @@ struct f2fs_nm_info {
238 struct list_head dirty_nat_entries; /* cached nat entry list (dirty) */ 254 struct list_head dirty_nat_entries; /* cached nat entry list (dirty) */
239 255
240 /* free node ids management */ 256 /* free node ids management */
257 struct radix_tree_root free_nid_root;/* root of the free_nid cache */
241 struct list_head free_nid_list; /* a list for free nids */ 258 struct list_head free_nid_list; /* a list for free nids */
242 spinlock_t free_nid_list_lock; /* protect free nid list */ 259 spinlock_t free_nid_list_lock; /* protect free nid list */
243 unsigned int fcnt; /* the number of free node id */ 260 unsigned int fcnt; /* the number of free node id */
@@ -300,6 +317,12 @@ enum {
300 NO_CHECK_TYPE 317 NO_CHECK_TYPE
301}; 318};
302 319
320struct flush_cmd {
321 struct flush_cmd *next;
322 struct completion wait;
323 int ret;
324};
325
303struct f2fs_sm_info { 326struct f2fs_sm_info {
304 struct sit_info *sit_info; /* whole segment information */ 327 struct sit_info *sit_info; /* whole segment information */
305 struct free_segmap_info *free_info; /* free segment information */ 328 struct free_segmap_info *free_info; /* free segment information */
@@ -328,6 +351,14 @@ struct f2fs_sm_info {
328 351
329 unsigned int ipu_policy; /* in-place-update policy */ 352 unsigned int ipu_policy; /* in-place-update policy */
330 unsigned int min_ipu_util; /* in-place-update threshold */ 353 unsigned int min_ipu_util; /* in-place-update threshold */
354
355 /* for flush command control */
356 struct task_struct *f2fs_issue_flush; /* flush thread */
357 wait_queue_head_t flush_wait_queue; /* waiting queue for wake-up */
358 struct flush_cmd *issue_list; /* list for command issue */
359 struct flush_cmd *dispatch_list; /* list for command dispatch */
360 spinlock_t issue_lock; /* for issue list lock */
361 struct flush_cmd *issue_tail; /* list tail of issue list */
331}; 362};
332 363
333/* 364/*
@@ -378,7 +409,7 @@ struct f2fs_bio_info {
378 struct bio *bio; /* bios to merge */ 409 struct bio *bio; /* bios to merge */
379 sector_t last_block_in_bio; /* last block number */ 410 sector_t last_block_in_bio; /* last block number */
380 struct f2fs_io_info fio; /* store buffered io info. */ 411 struct f2fs_io_info fio; /* store buffered io info. */
381 struct mutex io_mutex; /* mutex for bio */ 412 struct rw_semaphore io_rwsem; /* blocking op for bio */
382}; 413};
383 414
384struct f2fs_sb_info { 415struct f2fs_sb_info {
@@ -398,6 +429,7 @@ struct f2fs_sb_info {
398 /* for bio operations */ 429 /* for bio operations */
399 struct f2fs_bio_info read_io; /* for read bios */ 430 struct f2fs_bio_info read_io; /* for read bios */
400 struct f2fs_bio_info write_io[NR_PAGE_TYPE]; /* for write bios */ 431 struct f2fs_bio_info write_io[NR_PAGE_TYPE]; /* for write bios */
432 struct completion *wait_io; /* for completion bios */
401 433
402 /* for checkpoint */ 434 /* for checkpoint */
403 struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */ 435 struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */
@@ -407,7 +439,6 @@ struct f2fs_sb_info {
407 struct mutex node_write; /* locking node writes */ 439 struct mutex node_write; /* locking node writes */
408 struct mutex writepages; /* mutex for writepages() */ 440 struct mutex writepages; /* mutex for writepages() */
409 bool por_doing; /* recovery is doing or not */ 441 bool por_doing; /* recovery is doing or not */
410 bool on_build_free_nids; /* build_free_nids is doing */
411 wait_queue_head_t cp_wait; 442 wait_queue_head_t cp_wait;
412 443
413 /* for orphan inode management */ 444 /* for orphan inode management */
@@ -436,6 +467,7 @@ struct f2fs_sb_info {
436 unsigned int total_valid_node_count; /* valid node block count */ 467 unsigned int total_valid_node_count; /* valid node block count */
437 unsigned int total_valid_inode_count; /* valid inode count */ 468 unsigned int total_valid_inode_count; /* valid inode count */
438 int active_logs; /* # of active logs */ 469 int active_logs; /* # of active logs */
470 int dir_level; /* directory level */
439 471
440 block_t user_block_count; /* # of user blocks */ 472 block_t user_block_count; /* # of user blocks */
441 block_t total_valid_block_count; /* # of valid blocks */ 473 block_t total_valid_block_count; /* # of valid blocks */
@@ -622,6 +654,11 @@ static inline int F2FS_HAS_BLOCKS(struct inode *inode)
622 return inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS; 654 return inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS;
623} 655}
624 656
657static inline bool f2fs_has_xattr_block(unsigned int ofs)
658{
659 return ofs == XATTR_NODE_OFFSET;
660}
661
625static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi, 662static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi,
626 struct inode *inode, blkcnt_t count) 663 struct inode *inode, blkcnt_t count)
627{ 664{
@@ -661,6 +698,7 @@ static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type)
661 698
662static inline void inode_inc_dirty_dents(struct inode *inode) 699static inline void inode_inc_dirty_dents(struct inode *inode)
663{ 700{
701 inc_page_count(F2FS_SB(inode->i_sb), F2FS_DIRTY_DENTS);
664 atomic_inc(&F2FS_I(inode)->dirty_dents); 702 atomic_inc(&F2FS_I(inode)->dirty_dents);
665} 703}
666 704
@@ -671,6 +709,10 @@ static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type)
671 709
672static inline void inode_dec_dirty_dents(struct inode *inode) 710static inline void inode_dec_dirty_dents(struct inode *inode)
673{ 711{
712 if (!S_ISDIR(inode->i_mode))
713 return;
714
715 dec_page_count(F2FS_SB(inode->i_sb), F2FS_DIRTY_DENTS);
674 atomic_dec(&F2FS_I(inode)->dirty_dents); 716 atomic_dec(&F2FS_I(inode)->dirty_dents);
675} 717}
676 718
@@ -679,6 +721,11 @@ static inline int get_pages(struct f2fs_sb_info *sbi, int count_type)
679 return atomic_read(&sbi->nr_pages[count_type]); 721 return atomic_read(&sbi->nr_pages[count_type]);
680} 722}
681 723
724static inline int get_dirty_dents(struct inode *inode)
725{
726 return atomic_read(&F2FS_I(inode)->dirty_dents);
727}
728
682static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type) 729static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type)
683{ 730{
684 unsigned int pages_per_sec = sbi->segs_per_sec * 731 unsigned int pages_per_sec = sbi->segs_per_sec *
@@ -689,11 +736,7 @@ static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type)
689 736
690static inline block_t valid_user_blocks(struct f2fs_sb_info *sbi) 737static inline block_t valid_user_blocks(struct f2fs_sb_info *sbi)
691{ 738{
692 block_t ret; 739 return sbi->total_valid_block_count;
693 spin_lock(&sbi->stat_lock);
694 ret = sbi->total_valid_block_count;
695 spin_unlock(&sbi->stat_lock);
696 return ret;
697} 740}
698 741
699static inline unsigned long __bitmap_size(struct f2fs_sb_info *sbi, int flag) 742static inline unsigned long __bitmap_size(struct f2fs_sb_info *sbi, int flag)
@@ -789,11 +832,7 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi,
789 832
790static inline unsigned int valid_node_count(struct f2fs_sb_info *sbi) 833static inline unsigned int valid_node_count(struct f2fs_sb_info *sbi)
791{ 834{
792 unsigned int ret; 835 return sbi->total_valid_node_count;
793 spin_lock(&sbi->stat_lock);
794 ret = sbi->total_valid_node_count;
795 spin_unlock(&sbi->stat_lock);
796 return ret;
797} 836}
798 837
799static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi) 838static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi)
@@ -814,11 +853,7 @@ static inline void dec_valid_inode_count(struct f2fs_sb_info *sbi)
814 853
815static inline unsigned int valid_inode_count(struct f2fs_sb_info *sbi) 854static inline unsigned int valid_inode_count(struct f2fs_sb_info *sbi)
816{ 855{
817 unsigned int ret; 856 return sbi->total_valid_inode_count;
818 spin_lock(&sbi->stat_lock);
819 ret = sbi->total_valid_inode_count;
820 spin_unlock(&sbi->stat_lock);
821 return ret;
822} 857}
823 858
824static inline void f2fs_put_page(struct page *page, int unlock) 859static inline void f2fs_put_page(struct page *page, int unlock)
@@ -844,9 +879,9 @@ static inline void f2fs_put_dnode(struct dnode_of_data *dn)
844} 879}
845 880
846static inline struct kmem_cache *f2fs_kmem_cache_create(const char *name, 881static inline struct kmem_cache *f2fs_kmem_cache_create(const char *name,
847 size_t size, void (*ctor)(void *)) 882 size_t size)
848{ 883{
849 return kmem_cache_create(name, size, 0, SLAB_RECLAIM_ACCOUNT, ctor); 884 return kmem_cache_create(name, size, 0, SLAB_RECLAIM_ACCOUNT, NULL);
850} 885}
851 886
852static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep, 887static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep,
@@ -983,24 +1018,28 @@ static inline void set_raw_inline(struct f2fs_inode_info *fi,
983 ri->i_inline |= F2FS_INLINE_DATA; 1018 ri->i_inline |= F2FS_INLINE_DATA;
984} 1019}
985 1020
1021static inline int f2fs_has_inline_xattr(struct inode *inode)
1022{
1023 return is_inode_flag_set(F2FS_I(inode), FI_INLINE_XATTR);
1024}
1025
986static inline unsigned int addrs_per_inode(struct f2fs_inode_info *fi) 1026static inline unsigned int addrs_per_inode(struct f2fs_inode_info *fi)
987{ 1027{
988 if (is_inode_flag_set(fi, FI_INLINE_XATTR)) 1028 if (f2fs_has_inline_xattr(&fi->vfs_inode))
989 return DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS; 1029 return DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS;
990 return DEF_ADDRS_PER_INODE; 1030 return DEF_ADDRS_PER_INODE;
991} 1031}
992 1032
993static inline void *inline_xattr_addr(struct page *page) 1033static inline void *inline_xattr_addr(struct page *page)
994{ 1034{
995 struct f2fs_inode *ri; 1035 struct f2fs_inode *ri = F2FS_INODE(page);
996 ri = (struct f2fs_inode *)page_address(page);
997 return (void *)&(ri->i_addr[DEF_ADDRS_PER_INODE - 1036 return (void *)&(ri->i_addr[DEF_ADDRS_PER_INODE -
998 F2FS_INLINE_XATTR_ADDRS]); 1037 F2FS_INLINE_XATTR_ADDRS]);
999} 1038}
1000 1039
1001static inline int inline_xattr_size(struct inode *inode) 1040static inline int inline_xattr_size(struct inode *inode)
1002{ 1041{
1003 if (is_inode_flag_set(F2FS_I(inode), FI_INLINE_XATTR)) 1042 if (f2fs_has_inline_xattr(inode))
1004 return F2FS_INLINE_XATTR_ADDRS << 2; 1043 return F2FS_INLINE_XATTR_ADDRS << 2;
1005 else 1044 else
1006 return 0; 1045 return 0;
@@ -1013,8 +1052,7 @@ static inline int f2fs_has_inline_data(struct inode *inode)
1013 1052
1014static inline void *inline_data_addr(struct page *page) 1053static inline void *inline_data_addr(struct page *page)
1015{ 1054{
1016 struct f2fs_inode *ri; 1055 struct f2fs_inode *ri = F2FS_INODE(page);
1017 ri = (struct f2fs_inode *)page_address(page);
1018 return (void *)&(ri->i_addr[1]); 1056 return (void *)&(ri->i_addr[1]);
1019} 1057}
1020 1058
@@ -1023,6 +1061,12 @@ static inline int f2fs_readonly(struct super_block *sb)
1023 return sb->s_flags & MS_RDONLY; 1061 return sb->s_flags & MS_RDONLY;
1024} 1062}
1025 1063
1064static inline void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi)
1065{
1066 set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
1067 sbi->sb->s_flags |= MS_RDONLY;
1068}
1069
1026#define get_inode_mode(i) \ 1070#define get_inode_mode(i) \
1027 ((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \ 1071 ((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \
1028 (F2FS_I(i)->i_acl_mode) : ((i)->i_mode)) 1072 (F2FS_I(i)->i_acl_mode) : ((i)->i_mode))
@@ -1048,7 +1092,7 @@ void f2fs_set_inode_flags(struct inode *);
1048struct inode *f2fs_iget(struct super_block *, unsigned long); 1092struct inode *f2fs_iget(struct super_block *, unsigned long);
1049int try_to_free_nats(struct f2fs_sb_info *, int); 1093int try_to_free_nats(struct f2fs_sb_info *, int);
1050void update_inode(struct inode *, struct page *); 1094void update_inode(struct inode *, struct page *);
1051int update_inode_page(struct inode *); 1095void update_inode_page(struct inode *);
1052int f2fs_write_inode(struct inode *, struct writeback_control *); 1096int f2fs_write_inode(struct inode *, struct writeback_control *);
1053void f2fs_evict_inode(struct inode *); 1097void f2fs_evict_inode(struct inode *);
1054 1098
@@ -1097,6 +1141,7 @@ struct dnode_of_data;
1097struct node_info; 1141struct node_info;
1098 1142
1099int is_checkpointed_node(struct f2fs_sb_info *, nid_t); 1143int is_checkpointed_node(struct f2fs_sb_info *, nid_t);
1144bool fsync_mark_done(struct f2fs_sb_info *, nid_t);
1100void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *); 1145void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *);
1101int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int); 1146int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int);
1102int truncate_inode_blocks(struct inode *, pgoff_t); 1147int truncate_inode_blocks(struct inode *, pgoff_t);
@@ -1115,6 +1160,7 @@ void alloc_nid_done(struct f2fs_sb_info *, nid_t);
1115void alloc_nid_failed(struct f2fs_sb_info *, nid_t); 1160void alloc_nid_failed(struct f2fs_sb_info *, nid_t);
1116void recover_node_page(struct f2fs_sb_info *, struct page *, 1161void recover_node_page(struct f2fs_sb_info *, struct page *,
1117 struct f2fs_summary *, struct node_info *, block_t); 1162 struct f2fs_summary *, struct node_info *, block_t);
1163bool recover_xattr_data(struct inode *, struct page *, block_t);
1118int recover_inode_page(struct f2fs_sb_info *, struct page *); 1164int recover_inode_page(struct f2fs_sb_info *, struct page *);
1119int restore_node_summary(struct f2fs_sb_info *, unsigned int, 1165int restore_node_summary(struct f2fs_sb_info *, unsigned int,
1120 struct f2fs_summary_block *); 1166 struct f2fs_summary_block *);
@@ -1129,7 +1175,9 @@ void destroy_node_manager_caches(void);
1129 */ 1175 */
1130void f2fs_balance_fs(struct f2fs_sb_info *); 1176void f2fs_balance_fs(struct f2fs_sb_info *);
1131void f2fs_balance_fs_bg(struct f2fs_sb_info *); 1177void f2fs_balance_fs_bg(struct f2fs_sb_info *);
1178int f2fs_issue_flush(struct f2fs_sb_info *);
1132void invalidate_blocks(struct f2fs_sb_info *, block_t); 1179void invalidate_blocks(struct f2fs_sb_info *, block_t);
1180void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);
1133void clear_prefree_segments(struct f2fs_sb_info *); 1181void clear_prefree_segments(struct f2fs_sb_info *);
1134int npages_for_summary_flush(struct f2fs_sb_info *); 1182int npages_for_summary_flush(struct f2fs_sb_info *);
1135void allocate_new_segments(struct f2fs_sb_info *); 1183void allocate_new_segments(struct f2fs_sb_info *);
@@ -1162,6 +1210,7 @@ void destroy_segment_manager_caches(void);
1162 */ 1210 */
1163struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t); 1211struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t);
1164struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t); 1212struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t);
1213int ra_meta_pages(struct f2fs_sb_info *, int, int, int);
1165long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); 1214long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long);
1166int acquire_orphan_inode(struct f2fs_sb_info *); 1215int acquire_orphan_inode(struct f2fs_sb_info *);
1167void release_orphan_inode(struct f2fs_sb_info *); 1216void release_orphan_inode(struct f2fs_sb_info *);
@@ -1231,7 +1280,7 @@ struct f2fs_stat_info {
1231 int util_free, util_valid, util_invalid; 1280 int util_free, util_valid, util_invalid;
1232 int rsvd_segs, overp_segs; 1281 int rsvd_segs, overp_segs;
1233 int dirty_count, node_pages, meta_pages; 1282 int dirty_count, node_pages, meta_pages;
1234 int prefree_count, call_count; 1283 int prefree_count, call_count, cp_count;
1235 int tot_segs, node_segs, data_segs, free_segs, free_secs; 1284 int tot_segs, node_segs, data_segs, free_segs, free_secs;
1236 int tot_blks, data_blks, node_blks; 1285 int tot_blks, data_blks, node_blks;
1237 int curseg[NR_CURSEG_TYPE]; 1286 int curseg[NR_CURSEG_TYPE];
@@ -1248,6 +1297,7 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
1248 return (struct f2fs_stat_info *)sbi->stat_info; 1297 return (struct f2fs_stat_info *)sbi->stat_info;
1249} 1298}
1250 1299
1300#define stat_inc_cp_count(si) ((si)->cp_count++)
1251#define stat_inc_call_count(si) ((si)->call_count++) 1301#define stat_inc_call_count(si) ((si)->call_count++)
1252#define stat_inc_bggc_count(sbi) ((sbi)->bg_gc++) 1302#define stat_inc_bggc_count(sbi) ((sbi)->bg_gc++)
1253#define stat_inc_dirty_dir(sbi) ((sbi)->n_dirty_dirs++) 1303#define stat_inc_dirty_dir(sbi) ((sbi)->n_dirty_dirs++)
@@ -1302,6 +1352,7 @@ void f2fs_destroy_stats(struct f2fs_sb_info *);
1302void __init f2fs_create_root_stats(void); 1352void __init f2fs_create_root_stats(void);
1303void f2fs_destroy_root_stats(void); 1353void f2fs_destroy_root_stats(void);
1304#else 1354#else
1355#define stat_inc_cp_count(si)
1305#define stat_inc_call_count(si) 1356#define stat_inc_call_count(si)
1306#define stat_inc_bggc_count(si) 1357#define stat_inc_bggc_count(si)
1307#define stat_inc_dirty_dir(sbi) 1358#define stat_inc_dirty_dir(sbi)
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 0dfcef53a6ed..60e7d5448a1d 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -76,7 +76,7 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
76 trace_f2fs_vm_page_mkwrite(page, DATA); 76 trace_f2fs_vm_page_mkwrite(page, DATA);
77mapped: 77mapped:
78 /* fill the page */ 78 /* fill the page */
79 wait_on_page_writeback(page); 79 f2fs_wait_on_page_writeback(page, DATA);
80out: 80out:
81 sb_end_pagefault(inode->i_sb); 81 sb_end_pagefault(inode->i_sb);
82 return block_page_mkwrite_return(err); 82 return block_page_mkwrite_return(err);
@@ -84,6 +84,7 @@ out:
84 84
85static const struct vm_operations_struct f2fs_file_vm_ops = { 85static const struct vm_operations_struct f2fs_file_vm_ops = {
86 .fault = filemap_fault, 86 .fault = filemap_fault,
87 .map_pages = filemap_map_pages,
87 .page_mkwrite = f2fs_vm_page_mkwrite, 88 .page_mkwrite = f2fs_vm_page_mkwrite,
88 .remap_pages = generic_file_remap_pages, 89 .remap_pages = generic_file_remap_pages,
89}; 90};
@@ -111,11 +112,12 @@ static int get_parent_ino(struct inode *inode, nid_t *pino)
111int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) 112int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
112{ 113{
113 struct inode *inode = file->f_mapping->host; 114 struct inode *inode = file->f_mapping->host;
115 struct f2fs_inode_info *fi = F2FS_I(inode);
114 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 116 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
115 int ret = 0; 117 int ret = 0;
116 bool need_cp = false; 118 bool need_cp = false;
117 struct writeback_control wbc = { 119 struct writeback_control wbc = {
118 .sync_mode = WB_SYNC_NONE, 120 .sync_mode = WB_SYNC_ALL,
119 .nr_to_write = LONG_MAX, 121 .nr_to_write = LONG_MAX,
120 .for_reclaim = 0, 122 .for_reclaim = 0,
121 }; 123 };
@@ -133,7 +135,7 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
133 /* guarantee free sections for fsync */ 135 /* guarantee free sections for fsync */
134 f2fs_balance_fs(sbi); 136 f2fs_balance_fs(sbi);
135 137
136 mutex_lock(&inode->i_mutex); 138 down_read(&fi->i_sem);
137 139
138 /* 140 /*
139 * Both of fdatasync() and fsync() are able to be recovered from 141 * Both of fdatasync() and fsync() are able to be recovered from
@@ -150,25 +152,33 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
150 else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi))) 152 else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi)))
151 need_cp = true; 153 need_cp = true;
152 154
155 up_read(&fi->i_sem);
156
153 if (need_cp) { 157 if (need_cp) {
154 nid_t pino; 158 nid_t pino;
155 159
156 F2FS_I(inode)->xattr_ver = 0;
157
158 /* all the dirty node pages should be flushed for POR */ 160 /* all the dirty node pages should be flushed for POR */
159 ret = f2fs_sync_fs(inode->i_sb, 1); 161 ret = f2fs_sync_fs(inode->i_sb, 1);
162
163 down_write(&fi->i_sem);
164 F2FS_I(inode)->xattr_ver = 0;
160 if (file_wrong_pino(inode) && inode->i_nlink == 1 && 165 if (file_wrong_pino(inode) && inode->i_nlink == 1 &&
161 get_parent_ino(inode, &pino)) { 166 get_parent_ino(inode, &pino)) {
162 F2FS_I(inode)->i_pino = pino; 167 F2FS_I(inode)->i_pino = pino;
163 file_got_pino(inode); 168 file_got_pino(inode);
169 up_write(&fi->i_sem);
164 mark_inode_dirty_sync(inode); 170 mark_inode_dirty_sync(inode);
165 ret = f2fs_write_inode(inode, NULL); 171 ret = f2fs_write_inode(inode, NULL);
166 if (ret) 172 if (ret)
167 goto out; 173 goto out;
174 } else {
175 up_write(&fi->i_sem);
168 } 176 }
169 } else { 177 } else {
170 /* if there is no written node page, write its inode page */ 178 /* if there is no written node page, write its inode page */
171 while (!sync_node_pages(sbi, inode->i_ino, &wbc)) { 179 while (!sync_node_pages(sbi, inode->i_ino, &wbc)) {
180 if (fsync_mark_done(sbi, inode->i_ino))
181 goto out;
172 mark_inode_dirty_sync(inode); 182 mark_inode_dirty_sync(inode);
173 ret = f2fs_write_inode(inode, NULL); 183 ret = f2fs_write_inode(inode, NULL);
174 if (ret) 184 if (ret)
@@ -177,10 +187,9 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
177 ret = wait_on_node_pages_writeback(sbi, inode->i_ino); 187 ret = wait_on_node_pages_writeback(sbi, inode->i_ino);
178 if (ret) 188 if (ret)
179 goto out; 189 goto out;
180 ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); 190 ret = f2fs_issue_flush(F2FS_SB(inode->i_sb));
181 } 191 }
182out: 192out:
183 mutex_unlock(&inode->i_mutex);
184 trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); 193 trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
185 return ret; 194 return ret;
186} 195}
@@ -245,7 +254,7 @@ static void truncate_partial_data_page(struct inode *inode, u64 from)
245 f2fs_put_page(page, 1); 254 f2fs_put_page(page, 1);
246 return; 255 return;
247 } 256 }
248 wait_on_page_writeback(page); 257 f2fs_wait_on_page_writeback(page, DATA);
249 zero_user(page, offset, PAGE_CACHE_SIZE - offset); 258 zero_user(page, offset, PAGE_CACHE_SIZE - offset);
250 set_page_dirty(page); 259 set_page_dirty(page);
251 f2fs_put_page(page, 1); 260 f2fs_put_page(page, 1);
@@ -422,7 +431,7 @@ static void fill_zero(struct inode *inode, pgoff_t index,
422 f2fs_unlock_op(sbi); 431 f2fs_unlock_op(sbi);
423 432
424 if (!IS_ERR(page)) { 433 if (!IS_ERR(page)) {
425 wait_on_page_writeback(page); 434 f2fs_wait_on_page_writeback(page, DATA);
426 zero_user(page, start, len); 435 zero_user(page, start, len);
427 set_page_dirty(page); 436 set_page_dirty(page);
428 f2fs_put_page(page, 1); 437 f2fs_put_page(page, 1);
@@ -560,6 +569,8 @@ static long f2fs_fallocate(struct file *file, int mode,
560 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) 569 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
561 return -EOPNOTSUPP; 570 return -EOPNOTSUPP;
562 571
572 mutex_lock(&inode->i_mutex);
573
563 if (mode & FALLOC_FL_PUNCH_HOLE) 574 if (mode & FALLOC_FL_PUNCH_HOLE)
564 ret = punch_hole(inode, offset, len); 575 ret = punch_hole(inode, offset, len);
565 else 576 else
@@ -569,6 +580,9 @@ static long f2fs_fallocate(struct file *file, int mode,
569 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 580 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
570 mark_inode_dirty(inode); 581 mark_inode_dirty(inode);
571 } 582 }
583
584 mutex_unlock(&inode->i_mutex);
585
572 trace_f2fs_fallocate(inode, mode, offset, len, ret); 586 trace_f2fs_fallocate(inode, mode, offset, len, ret);
573 return ret; 587 return ret;
574} 588}
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index ea0371e854b4..b90dbe55403a 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -531,15 +531,10 @@ static void move_data_page(struct inode *inode, struct page *page, int gc_type)
531 set_page_dirty(page); 531 set_page_dirty(page);
532 set_cold_data(page); 532 set_cold_data(page);
533 } else { 533 } else {
534 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
535
536 f2fs_wait_on_page_writeback(page, DATA); 534 f2fs_wait_on_page_writeback(page, DATA);
537 535
538 if (clear_page_dirty_for_io(page) && 536 if (clear_page_dirty_for_io(page))
539 S_ISDIR(inode->i_mode)) {
540 dec_page_count(sbi, F2FS_DIRTY_DENTS);
541 inode_dec_dirty_dents(inode); 537 inode_dec_dirty_dents(inode);
542 }
543 set_cold_data(page); 538 set_cold_data(page);
544 do_write_data_page(page, &fio); 539 do_write_data_page(page, &fio);
545 clear_cold_data(page); 540 clear_cold_data(page);
@@ -701,6 +696,8 @@ int f2fs_gc(struct f2fs_sb_info *sbi)
701gc_more: 696gc_more:
702 if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) 697 if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE)))
703 goto stop; 698 goto stop;
699 if (unlikely(is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)))
700 goto stop;
704 701
705 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) { 702 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) {
706 gc_type = FG_GC; 703 gc_type = FG_GC;
@@ -711,6 +708,11 @@ gc_more:
711 goto stop; 708 goto stop;
712 ret = 0; 709 ret = 0;
713 710
711 /* readahead multi ssa blocks those have contiguous address */
712 if (sbi->segs_per_sec > 1)
713 ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno), sbi->segs_per_sec,
714 META_SSA);
715
714 for (i = 0; i < sbi->segs_per_sec; i++) 716 for (i = 0; i < sbi->segs_per_sec; i++)
715 do_garbage_collect(sbi, segno + i, &ilist, gc_type); 717 do_garbage_collect(sbi, segno + i, &ilist, gc_type);
716 718
@@ -740,7 +742,7 @@ void build_gc_manager(struct f2fs_sb_info *sbi)
740int __init create_gc_caches(void) 742int __init create_gc_caches(void)
741{ 743{
742 winode_slab = f2fs_kmem_cache_create("f2fs_gc_inodes", 744 winode_slab = f2fs_kmem_cache_create("f2fs_gc_inodes",
743 sizeof(struct inode_entry), NULL); 745 sizeof(struct inode_entry));
744 if (!winode_slab) 746 if (!winode_slab)
745 return -ENOMEM; 747 return -ENOMEM;
746 return 0; 748 return 0;
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 31ee5b164ff9..383db1fabcf4 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -45,8 +45,10 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page)
45 } 45 }
46 46
47 ipage = get_node_page(sbi, inode->i_ino); 47 ipage = get_node_page(sbi, inode->i_ino);
48 if (IS_ERR(ipage)) 48 if (IS_ERR(ipage)) {
49 unlock_page(page);
49 return PTR_ERR(ipage); 50 return PTR_ERR(ipage);
51 }
50 52
51 zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE); 53 zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE);
52 54
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 28cea76d78c6..ee829d360468 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -107,6 +107,7 @@ static int do_read_inode(struct inode *inode)
107 fi->flags = 0; 107 fi->flags = 0;
108 fi->i_advise = ri->i_advise; 108 fi->i_advise = ri->i_advise;
109 fi->i_pino = le32_to_cpu(ri->i_pino); 109 fi->i_pino = le32_to_cpu(ri->i_pino);
110 fi->i_dir_level = ri->i_dir_level;
110 111
111 get_extent_info(&fi->ext, ri->i_ext); 112 get_extent_info(&fi->ext, ri->i_ext);
112 get_inline_info(fi, ri); 113 get_inline_info(fi, ri);
@@ -204,6 +205,7 @@ void update_inode(struct inode *inode, struct page *node_page)
204 ri->i_flags = cpu_to_le32(F2FS_I(inode)->i_flags); 205 ri->i_flags = cpu_to_le32(F2FS_I(inode)->i_flags);
205 ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino); 206 ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino);
206 ri->i_generation = cpu_to_le32(inode->i_generation); 207 ri->i_generation = cpu_to_le32(inode->i_generation);
208 ri->i_dir_level = F2FS_I(inode)->i_dir_level;
207 209
208 __set_inode_rdev(inode, ri); 210 __set_inode_rdev(inode, ri);
209 set_cold_node(inode, node_page); 211 set_cold_node(inode, node_page);
@@ -212,24 +214,29 @@ void update_inode(struct inode *inode, struct page *node_page)
212 clear_inode_flag(F2FS_I(inode), FI_DIRTY_INODE); 214 clear_inode_flag(F2FS_I(inode), FI_DIRTY_INODE);
213} 215}
214 216
215int update_inode_page(struct inode *inode) 217void update_inode_page(struct inode *inode)
216{ 218{
217 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 219 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
218 struct page *node_page; 220 struct page *node_page;
219 221retry:
220 node_page = get_node_page(sbi, inode->i_ino); 222 node_page = get_node_page(sbi, inode->i_ino);
221 if (IS_ERR(node_page)) 223 if (IS_ERR(node_page)) {
222 return PTR_ERR(node_page); 224 int err = PTR_ERR(node_page);
223 225 if (err == -ENOMEM) {
226 cond_resched();
227 goto retry;
228 } else if (err != -ENOENT) {
229 f2fs_stop_checkpoint(sbi);
230 }
231 return;
232 }
224 update_inode(inode, node_page); 233 update_inode(inode, node_page);
225 f2fs_put_page(node_page, 1); 234 f2fs_put_page(node_page, 1);
226 return 0;
227} 235}
228 236
229int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) 237int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
230{ 238{
231 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 239 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
232 int ret;
233 240
234 if (inode->i_ino == F2FS_NODE_INO(sbi) || 241 if (inode->i_ino == F2FS_NODE_INO(sbi) ||
235 inode->i_ino == F2FS_META_INO(sbi)) 242 inode->i_ino == F2FS_META_INO(sbi))
@@ -243,13 +250,13 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
243 * during the urgent cleaning time when runing out of free sections. 250 * during the urgent cleaning time when runing out of free sections.
244 */ 251 */
245 f2fs_lock_op(sbi); 252 f2fs_lock_op(sbi);
246 ret = update_inode_page(inode); 253 update_inode_page(inode);
247 f2fs_unlock_op(sbi); 254 f2fs_unlock_op(sbi);
248 255
249 if (wbc) 256 if (wbc)
250 f2fs_balance_fs(sbi); 257 f2fs_balance_fs(sbi);
251 258
252 return ret; 259 return 0;
253} 260}
254 261
255/* 262/*
@@ -266,7 +273,7 @@ void f2fs_evict_inode(struct inode *inode)
266 inode->i_ino == F2FS_META_INO(sbi)) 273 inode->i_ino == F2FS_META_INO(sbi))
267 goto no_delete; 274 goto no_delete;
268 275
269 f2fs_bug_on(atomic_read(&F2FS_I(inode)->dirty_dents)); 276 f2fs_bug_on(get_dirty_dents(inode));
270 remove_dirty_dir_inode(inode); 277 remove_dirty_dir_inode(inode);
271 278
272 if (inode->i_nlink || is_bad_inode(inode)) 279 if (inode->i_nlink || is_bad_inode(inode))
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 397d459e97bf..a9409d19dfd4 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -207,6 +207,8 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
207 inode = f2fs_iget(dir->i_sb, ino); 207 inode = f2fs_iget(dir->i_sb, ino);
208 if (IS_ERR(inode)) 208 if (IS_ERR(inode))
209 return ERR_CAST(inode); 209 return ERR_CAST(inode);
210
211 stat_inc_inline_inode(inode);
210 } 212 }
211 213
212 return d_splice_alias(inode, dentry); 214 return d_splice_alias(inode, dentry);
@@ -424,12 +426,17 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
424 } 426 }
425 427
426 f2fs_set_link(new_dir, new_entry, new_page, old_inode); 428 f2fs_set_link(new_dir, new_entry, new_page, old_inode);
429 down_write(&F2FS_I(old_inode)->i_sem);
427 F2FS_I(old_inode)->i_pino = new_dir->i_ino; 430 F2FS_I(old_inode)->i_pino = new_dir->i_ino;
431 up_write(&F2FS_I(old_inode)->i_sem);
428 432
429 new_inode->i_ctime = CURRENT_TIME; 433 new_inode->i_ctime = CURRENT_TIME;
434 down_write(&F2FS_I(new_inode)->i_sem);
430 if (old_dir_entry) 435 if (old_dir_entry)
431 drop_nlink(new_inode); 436 drop_nlink(new_inode);
432 drop_nlink(new_inode); 437 drop_nlink(new_inode);
438 up_write(&F2FS_I(new_inode)->i_sem);
439
433 mark_inode_dirty(new_inode); 440 mark_inode_dirty(new_inode);
434 441
435 if (!new_inode->i_nlink) 442 if (!new_inode->i_nlink)
@@ -459,7 +466,9 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
459 if (old_dir != new_dir) { 466 if (old_dir != new_dir) {
460 f2fs_set_link(old_inode, old_dir_entry, 467 f2fs_set_link(old_inode, old_dir_entry,
461 old_dir_page, new_dir); 468 old_dir_page, new_dir);
469 down_write(&F2FS_I(old_inode)->i_sem);
462 F2FS_I(old_inode)->i_pino = new_dir->i_ino; 470 F2FS_I(old_inode)->i_pino = new_dir->i_ino;
471 up_write(&F2FS_I(old_inode)->i_sem);
463 update_inode_page(old_inode); 472 update_inode_page(old_inode);
464 } else { 473 } else {
465 kunmap(old_dir_page); 474 kunmap(old_dir_page);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index b0649b76eb4f..a161e955c4c8 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -21,9 +21,27 @@
21#include "segment.h" 21#include "segment.h"
22#include <trace/events/f2fs.h> 22#include <trace/events/f2fs.h>
23 23
24#define on_build_free_nids(nmi) mutex_is_locked(&nm_i->build_lock)
25
24static struct kmem_cache *nat_entry_slab; 26static struct kmem_cache *nat_entry_slab;
25static struct kmem_cache *free_nid_slab; 27static struct kmem_cache *free_nid_slab;
26 28
29static inline bool available_free_memory(struct f2fs_nm_info *nm_i, int type)
30{
31 struct sysinfo val;
32 unsigned long mem_size = 0;
33
34 si_meminfo(&val);
35 if (type == FREE_NIDS)
36 mem_size = nm_i->fcnt * sizeof(struct free_nid);
37 else if (type == NAT_ENTRIES)
38 mem_size += nm_i->nat_cnt * sizeof(struct nat_entry);
39 mem_size >>= 12;
40
41 /* give 50:50 memory for free nids and nat caches respectively */
42 return (mem_size < ((val.totalram * nm_i->ram_thresh) >> 11));
43}
44
27static void clear_node_page_dirty(struct page *page) 45static void clear_node_page_dirty(struct page *page)
28{ 46{
29 struct address_space *mapping = page->mapping; 47 struct address_space *mapping = page->mapping;
@@ -82,42 +100,6 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
82 return dst_page; 100 return dst_page;
83} 101}
84 102
85/*
86 * Readahead NAT pages
87 */
88static void ra_nat_pages(struct f2fs_sb_info *sbi, int nid)
89{
90 struct address_space *mapping = META_MAPPING(sbi);
91 struct f2fs_nm_info *nm_i = NM_I(sbi);
92 struct page *page;
93 pgoff_t index;
94 int i;
95 struct f2fs_io_info fio = {
96 .type = META,
97 .rw = READ_SYNC | REQ_META | REQ_PRIO
98 };
99
100
101 for (i = 0; i < FREE_NID_PAGES; i++, nid += NAT_ENTRY_PER_BLOCK) {
102 if (unlikely(nid >= nm_i->max_nid))
103 nid = 0;
104 index = current_nat_addr(sbi, nid);
105
106 page = grab_cache_page(mapping, index);
107 if (!page)
108 continue;
109 if (PageUptodate(page)) {
110 mark_page_accessed(page);
111 f2fs_put_page(page, 1);
112 continue;
113 }
114 f2fs_submit_page_mbio(sbi, page, index, &fio);
115 mark_page_accessed(page);
116 f2fs_put_page(page, 0);
117 }
118 f2fs_submit_merged_bio(sbi, META, READ);
119}
120
121static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n) 103static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n)
122{ 104{
123 return radix_tree_lookup(&nm_i->nat_root, n); 105 return radix_tree_lookup(&nm_i->nat_root, n);
@@ -151,6 +133,20 @@ int is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
151 return is_cp; 133 return is_cp;
152} 134}
153 135
136bool fsync_mark_done(struct f2fs_sb_info *sbi, nid_t nid)
137{
138 struct f2fs_nm_info *nm_i = NM_I(sbi);
139 struct nat_entry *e;
140 bool fsync_done = false;
141
142 read_lock(&nm_i->nat_tree_lock);
143 e = __lookup_nat_cache(nm_i, nid);
144 if (e)
145 fsync_done = e->fsync_done;
146 read_unlock(&nm_i->nat_tree_lock);
147 return fsync_done;
148}
149
154static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid) 150static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
155{ 151{
156 struct nat_entry *new; 152 struct nat_entry *new;
@@ -164,6 +160,7 @@ static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
164 } 160 }
165 memset(new, 0, sizeof(struct nat_entry)); 161 memset(new, 0, sizeof(struct nat_entry));
166 nat_set_nid(new, nid); 162 nat_set_nid(new, nid);
163 new->checkpointed = true;
167 list_add_tail(&new->list, &nm_i->nat_entries); 164 list_add_tail(&new->list, &nm_i->nat_entries);
168 nm_i->nat_cnt++; 165 nm_i->nat_cnt++;
169 return new; 166 return new;
@@ -185,13 +182,12 @@ retry:
185 nat_set_blkaddr(e, le32_to_cpu(ne->block_addr)); 182 nat_set_blkaddr(e, le32_to_cpu(ne->block_addr));
186 nat_set_ino(e, le32_to_cpu(ne->ino)); 183 nat_set_ino(e, le32_to_cpu(ne->ino));
187 nat_set_version(e, ne->version); 184 nat_set_version(e, ne->version);
188 e->checkpointed = true;
189 } 185 }
190 write_unlock(&nm_i->nat_tree_lock); 186 write_unlock(&nm_i->nat_tree_lock);
191} 187}
192 188
193static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, 189static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
194 block_t new_blkaddr) 190 block_t new_blkaddr, bool fsync_done)
195{ 191{
196 struct f2fs_nm_info *nm_i = NM_I(sbi); 192 struct f2fs_nm_info *nm_i = NM_I(sbi);
197 struct nat_entry *e; 193 struct nat_entry *e;
@@ -205,7 +201,6 @@ retry:
205 goto retry; 201 goto retry;
206 } 202 }
207 e->ni = *ni; 203 e->ni = *ni;
208 e->checkpointed = true;
209 f2fs_bug_on(ni->blk_addr == NEW_ADDR); 204 f2fs_bug_on(ni->blk_addr == NEW_ADDR);
210 } else if (new_blkaddr == NEW_ADDR) { 205 } else if (new_blkaddr == NEW_ADDR) {
211 /* 206 /*
@@ -217,9 +212,6 @@ retry:
217 f2fs_bug_on(ni->blk_addr != NULL_ADDR); 212 f2fs_bug_on(ni->blk_addr != NULL_ADDR);
218 } 213 }
219 214
220 if (new_blkaddr == NEW_ADDR)
221 e->checkpointed = false;
222
223 /* sanity check */ 215 /* sanity check */
224 f2fs_bug_on(nat_get_blkaddr(e) != ni->blk_addr); 216 f2fs_bug_on(nat_get_blkaddr(e) != ni->blk_addr);
225 f2fs_bug_on(nat_get_blkaddr(e) == NULL_ADDR && 217 f2fs_bug_on(nat_get_blkaddr(e) == NULL_ADDR &&
@@ -239,6 +231,11 @@ retry:
239 /* change address */ 231 /* change address */
240 nat_set_blkaddr(e, new_blkaddr); 232 nat_set_blkaddr(e, new_blkaddr);
241 __set_nat_cache_dirty(nm_i, e); 233 __set_nat_cache_dirty(nm_i, e);
234
235 /* update fsync_mark if its inode nat entry is still alive */
236 e = __lookup_nat_cache(nm_i, ni->ino);
237 if (e)
238 e->fsync_done = fsync_done;
242 write_unlock(&nm_i->nat_tree_lock); 239 write_unlock(&nm_i->nat_tree_lock);
243} 240}
244 241
@@ -246,7 +243,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
246{ 243{
247 struct f2fs_nm_info *nm_i = NM_I(sbi); 244 struct f2fs_nm_info *nm_i = NM_I(sbi);
248 245
249 if (nm_i->nat_cnt <= NM_WOUT_THRESHOLD) 246 if (available_free_memory(nm_i, NAT_ENTRIES))
250 return 0; 247 return 0;
251 248
252 write_lock(&nm_i->nat_tree_lock); 249 write_lock(&nm_i->nat_tree_lock);
@@ -505,7 +502,7 @@ static void truncate_node(struct dnode_of_data *dn)
505 /* Deallocate node address */ 502 /* Deallocate node address */
506 invalidate_blocks(sbi, ni.blk_addr); 503 invalidate_blocks(sbi, ni.blk_addr);
507 dec_valid_node_count(sbi, dn->inode); 504 dec_valid_node_count(sbi, dn->inode);
508 set_node_addr(sbi, &ni, NULL_ADDR); 505 set_node_addr(sbi, &ni, NULL_ADDR, false);
509 506
510 if (dn->nid == dn->inode->i_ino) { 507 if (dn->nid == dn->inode->i_ino) {
511 remove_orphan_inode(sbi, dn->nid); 508 remove_orphan_inode(sbi, dn->nid);
@@ -763,7 +760,7 @@ skip_partial:
763 f2fs_put_page(page, 1); 760 f2fs_put_page(page, 1);
764 goto restart; 761 goto restart;
765 } 762 }
766 wait_on_page_writeback(page); 763 f2fs_wait_on_page_writeback(page, NODE);
767 ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0; 764 ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0;
768 set_page_dirty(page); 765 set_page_dirty(page);
769 unlock_page(page); 766 unlock_page(page);
@@ -852,7 +849,8 @@ struct page *new_node_page(struct dnode_of_data *dn,
852 if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) 849 if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
853 return ERR_PTR(-EPERM); 850 return ERR_PTR(-EPERM);
854 851
855 page = grab_cache_page(NODE_MAPPING(sbi), dn->nid); 852 page = grab_cache_page_write_begin(NODE_MAPPING(sbi),
853 dn->nid, AOP_FLAG_NOFS);
856 if (!page) 854 if (!page)
857 return ERR_PTR(-ENOMEM); 855 return ERR_PTR(-ENOMEM);
858 856
@@ -867,14 +865,14 @@ struct page *new_node_page(struct dnode_of_data *dn,
867 f2fs_bug_on(old_ni.blk_addr != NULL_ADDR); 865 f2fs_bug_on(old_ni.blk_addr != NULL_ADDR);
868 new_ni = old_ni; 866 new_ni = old_ni;
869 new_ni.ino = dn->inode->i_ino; 867 new_ni.ino = dn->inode->i_ino;
870 set_node_addr(sbi, &new_ni, NEW_ADDR); 868 set_node_addr(sbi, &new_ni, NEW_ADDR, false);
871 869
872 fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true); 870 fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
873 set_cold_node(dn->inode, page); 871 set_cold_node(dn->inode, page);
874 SetPageUptodate(page); 872 SetPageUptodate(page);
875 set_page_dirty(page); 873 set_page_dirty(page);
876 874
877 if (ofs == XATTR_NODE_OFFSET) 875 if (f2fs_has_xattr_block(ofs))
878 F2FS_I(dn->inode)->i_xattr_nid = dn->nid; 876 F2FS_I(dn->inode)->i_xattr_nid = dn->nid;
879 877
880 dn->node_page = page; 878 dn->node_page = page;
@@ -948,7 +946,8 @@ struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
948 struct page *page; 946 struct page *page;
949 int err; 947 int err;
950repeat: 948repeat:
951 page = grab_cache_page(NODE_MAPPING(sbi), nid); 949 page = grab_cache_page_write_begin(NODE_MAPPING(sbi),
950 nid, AOP_FLAG_NOFS);
952 if (!page) 951 if (!page)
953 return ERR_PTR(-ENOMEM); 952 return ERR_PTR(-ENOMEM);
954 953
@@ -959,7 +958,7 @@ repeat:
959 goto got_it; 958 goto got_it;
960 959
961 lock_page(page); 960 lock_page(page);
962 if (unlikely(!PageUptodate(page))) { 961 if (unlikely(!PageUptodate(page) || nid != nid_of_node(page))) {
963 f2fs_put_page(page, 1); 962 f2fs_put_page(page, 1);
964 return ERR_PTR(-EIO); 963 return ERR_PTR(-EIO);
965 } 964 }
@@ -968,7 +967,6 @@ repeat:
968 goto repeat; 967 goto repeat;
969 } 968 }
970got_it: 969got_it:
971 f2fs_bug_on(nid != nid_of_node(page));
972 mark_page_accessed(page); 970 mark_page_accessed(page);
973 return page; 971 return page;
974} 972}
@@ -1168,7 +1166,7 @@ int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino)
1168 continue; 1166 continue;
1169 1167
1170 if (ino && ino_of_node(page) == ino) { 1168 if (ino && ino_of_node(page) == ino) {
1171 wait_on_page_writeback(page); 1169 f2fs_wait_on_page_writeback(page, NODE);
1172 if (TestClearPageError(page)) 1170 if (TestClearPageError(page))
1173 ret = -EIO; 1171 ret = -EIO;
1174 } 1172 }
@@ -1201,7 +1199,7 @@ static int f2fs_write_node_page(struct page *page,
1201 if (unlikely(sbi->por_doing)) 1199 if (unlikely(sbi->por_doing))
1202 goto redirty_out; 1200 goto redirty_out;
1203 1201
1204 wait_on_page_writeback(page); 1202 f2fs_wait_on_page_writeback(page, NODE);
1205 1203
1206 /* get old block addr of this node page */ 1204 /* get old block addr of this node page */
1207 nid = nid_of_node(page); 1205 nid = nid_of_node(page);
@@ -1222,7 +1220,7 @@ static int f2fs_write_node_page(struct page *page,
1222 mutex_lock(&sbi->node_write); 1220 mutex_lock(&sbi->node_write);
1223 set_page_writeback(page); 1221 set_page_writeback(page);
1224 write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr); 1222 write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr);
1225 set_node_addr(sbi, &ni, new_addr); 1223 set_node_addr(sbi, &ni, new_addr, is_fsync_dnode(page));
1226 dec_page_count(sbi, F2FS_DIRTY_NODES); 1224 dec_page_count(sbi, F2FS_DIRTY_NODES);
1227 mutex_unlock(&sbi->node_write); 1225 mutex_unlock(&sbi->node_write);
1228 unlock_page(page); 1226 unlock_page(page);
@@ -1231,35 +1229,32 @@ static int f2fs_write_node_page(struct page *page,
1231redirty_out: 1229redirty_out:
1232 dec_page_count(sbi, F2FS_DIRTY_NODES); 1230 dec_page_count(sbi, F2FS_DIRTY_NODES);
1233 wbc->pages_skipped++; 1231 wbc->pages_skipped++;
1232 account_page_redirty(page);
1234 set_page_dirty(page); 1233 set_page_dirty(page);
1235 return AOP_WRITEPAGE_ACTIVATE; 1234 return AOP_WRITEPAGE_ACTIVATE;
1236} 1235}
1237 1236
1238/*
1239 * It is very important to gather dirty pages and write at once, so that we can
1240 * submit a big bio without interfering other data writes.
1241 * Be default, 512 pages (2MB) * 3 node types, is more reasonable.
1242 */
1243#define COLLECT_DIRTY_NODES 1536
1244static int f2fs_write_node_pages(struct address_space *mapping, 1237static int f2fs_write_node_pages(struct address_space *mapping,
1245 struct writeback_control *wbc) 1238 struct writeback_control *wbc)
1246{ 1239{
1247 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); 1240 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
1248 long nr_to_write = wbc->nr_to_write; 1241 long diff;
1249 1242
1250 /* balancing f2fs's metadata in background */ 1243 /* balancing f2fs's metadata in background */
1251 f2fs_balance_fs_bg(sbi); 1244 f2fs_balance_fs_bg(sbi);
1252 1245
1253 /* collect a number of dirty node pages and write together */ 1246 /* collect a number of dirty node pages and write together */
1254 if (get_pages(sbi, F2FS_DIRTY_NODES) < COLLECT_DIRTY_NODES) 1247 if (get_pages(sbi, F2FS_DIRTY_NODES) < nr_pages_to_skip(sbi, NODE))
1255 return 0; 1248 goto skip_write;
1256 1249
1257 /* if mounting is failed, skip writing node pages */ 1250 diff = nr_pages_to_write(sbi, NODE, wbc);
1258 wbc->nr_to_write = 3 * max_hw_blocks(sbi);
1259 wbc->sync_mode = WB_SYNC_NONE; 1251 wbc->sync_mode = WB_SYNC_NONE;
1260 sync_node_pages(sbi, 0, wbc); 1252 sync_node_pages(sbi, 0, wbc);
1261 wbc->nr_to_write = nr_to_write - (3 * max_hw_blocks(sbi) - 1253 wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
1262 wbc->nr_to_write); 1254 return 0;
1255
1256skip_write:
1257 wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_NODES);
1263 return 0; 1258 return 0;
1264} 1259}
1265 1260
@@ -1307,22 +1302,17 @@ const struct address_space_operations f2fs_node_aops = {
1307 .releasepage = f2fs_release_node_page, 1302 .releasepage = f2fs_release_node_page,
1308}; 1303};
1309 1304
1310static struct free_nid *__lookup_free_nid_list(nid_t n, struct list_head *head) 1305static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i,
1306 nid_t n)
1311{ 1307{
1312 struct list_head *this; 1308 return radix_tree_lookup(&nm_i->free_nid_root, n);
1313 struct free_nid *i;
1314 list_for_each(this, head) {
1315 i = list_entry(this, struct free_nid, list);
1316 if (i->nid == n)
1317 return i;
1318 }
1319 return NULL;
1320} 1309}
1321 1310
1322static void __del_from_free_nid_list(struct free_nid *i) 1311static void __del_from_free_nid_list(struct f2fs_nm_info *nm_i,
1312 struct free_nid *i)
1323{ 1313{
1324 list_del(&i->list); 1314 list_del(&i->list);
1325 kmem_cache_free(free_nid_slab, i); 1315 radix_tree_delete(&nm_i->free_nid_root, i->nid);
1326} 1316}
1327 1317
1328static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build) 1318static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build)
@@ -1331,7 +1321,7 @@ static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build)
1331 struct nat_entry *ne; 1321 struct nat_entry *ne;
1332 bool allocated = false; 1322 bool allocated = false;
1333 1323
1334 if (nm_i->fcnt > 2 * MAX_FREE_NIDS) 1324 if (!available_free_memory(nm_i, FREE_NIDS))
1335 return -1; 1325 return -1;
1336 1326
1337 /* 0 nid should not be used */ 1327 /* 0 nid should not be used */
@@ -1342,7 +1332,8 @@ static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build)
1342 /* do not add allocated nids */ 1332 /* do not add allocated nids */
1343 read_lock(&nm_i->nat_tree_lock); 1333 read_lock(&nm_i->nat_tree_lock);
1344 ne = __lookup_nat_cache(nm_i, nid); 1334 ne = __lookup_nat_cache(nm_i, nid);
1345 if (ne && nat_get_blkaddr(ne) != NULL_ADDR) 1335 if (ne &&
1336 (!ne->checkpointed || nat_get_blkaddr(ne) != NULL_ADDR))
1346 allocated = true; 1337 allocated = true;
1347 read_unlock(&nm_i->nat_tree_lock); 1338 read_unlock(&nm_i->nat_tree_lock);
1348 if (allocated) 1339 if (allocated)
@@ -1354,7 +1345,7 @@ static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build)
1354 i->state = NID_NEW; 1345 i->state = NID_NEW;
1355 1346
1356 spin_lock(&nm_i->free_nid_list_lock); 1347 spin_lock(&nm_i->free_nid_list_lock);
1357 if (__lookup_free_nid_list(nid, &nm_i->free_nid_list)) { 1348 if (radix_tree_insert(&nm_i->free_nid_root, i->nid, i)) {
1358 spin_unlock(&nm_i->free_nid_list_lock); 1349 spin_unlock(&nm_i->free_nid_list_lock);
1359 kmem_cache_free(free_nid_slab, i); 1350 kmem_cache_free(free_nid_slab, i);
1360 return 0; 1351 return 0;
@@ -1368,13 +1359,19 @@ static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build)
1368static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid) 1359static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid)
1369{ 1360{
1370 struct free_nid *i; 1361 struct free_nid *i;
1362 bool need_free = false;
1363
1371 spin_lock(&nm_i->free_nid_list_lock); 1364 spin_lock(&nm_i->free_nid_list_lock);
1372 i = __lookup_free_nid_list(nid, &nm_i->free_nid_list); 1365 i = __lookup_free_nid_list(nm_i, nid);
1373 if (i && i->state == NID_NEW) { 1366 if (i && i->state == NID_NEW) {
1374 __del_from_free_nid_list(i); 1367 __del_from_free_nid_list(nm_i, i);
1375 nm_i->fcnt--; 1368 nm_i->fcnt--;
1369 need_free = true;
1376 } 1370 }
1377 spin_unlock(&nm_i->free_nid_list_lock); 1371 spin_unlock(&nm_i->free_nid_list_lock);
1372
1373 if (need_free)
1374 kmem_cache_free(free_nid_slab, i);
1378} 1375}
1379 1376
1380static void scan_nat_page(struct f2fs_nm_info *nm_i, 1377static void scan_nat_page(struct f2fs_nm_info *nm_i,
@@ -1413,7 +1410,7 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
1413 return; 1410 return;
1414 1411
1415 /* readahead nat pages to be scanned */ 1412 /* readahead nat pages to be scanned */
1416 ra_nat_pages(sbi, nid); 1413 ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES, META_NAT);
1417 1414
1418 while (1) { 1415 while (1) {
1419 struct page *page = get_current_nat_page(sbi, nid); 1416 struct page *page = get_current_nat_page(sbi, nid);
@@ -1454,7 +1451,6 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
1454{ 1451{
1455 struct f2fs_nm_info *nm_i = NM_I(sbi); 1452 struct f2fs_nm_info *nm_i = NM_I(sbi);
1456 struct free_nid *i = NULL; 1453 struct free_nid *i = NULL;
1457 struct list_head *this;
1458retry: 1454retry:
1459 if (unlikely(sbi->total_valid_node_count + 1 >= nm_i->max_nid)) 1455 if (unlikely(sbi->total_valid_node_count + 1 >= nm_i->max_nid))
1460 return false; 1456 return false;
@@ -1462,13 +1458,11 @@ retry:
1462 spin_lock(&nm_i->free_nid_list_lock); 1458 spin_lock(&nm_i->free_nid_list_lock);
1463 1459
1464 /* We should not use stale free nids created by build_free_nids */ 1460 /* We should not use stale free nids created by build_free_nids */
1465 if (nm_i->fcnt && !sbi->on_build_free_nids) { 1461 if (nm_i->fcnt && !on_build_free_nids(nm_i)) {
1466 f2fs_bug_on(list_empty(&nm_i->free_nid_list)); 1462 f2fs_bug_on(list_empty(&nm_i->free_nid_list));
1467 list_for_each(this, &nm_i->free_nid_list) { 1463 list_for_each_entry(i, &nm_i->free_nid_list, list)
1468 i = list_entry(this, struct free_nid, list);
1469 if (i->state == NID_NEW) 1464 if (i->state == NID_NEW)
1470 break; 1465 break;
1471 }
1472 1466
1473 f2fs_bug_on(i->state != NID_NEW); 1467 f2fs_bug_on(i->state != NID_NEW);
1474 *nid = i->nid; 1468 *nid = i->nid;
@@ -1481,9 +1475,7 @@ retry:
1481 1475
1482 /* Let's scan nat pages and its caches to get free nids */ 1476 /* Let's scan nat pages and its caches to get free nids */
1483 mutex_lock(&nm_i->build_lock); 1477 mutex_lock(&nm_i->build_lock);
1484 sbi->on_build_free_nids = true;
1485 build_free_nids(sbi); 1478 build_free_nids(sbi);
1486 sbi->on_build_free_nids = false;
1487 mutex_unlock(&nm_i->build_lock); 1479 mutex_unlock(&nm_i->build_lock);
1488 goto retry; 1480 goto retry;
1489} 1481}
@@ -1497,10 +1489,12 @@ void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid)
1497 struct free_nid *i; 1489 struct free_nid *i;
1498 1490
1499 spin_lock(&nm_i->free_nid_list_lock); 1491 spin_lock(&nm_i->free_nid_list_lock);
1500 i = __lookup_free_nid_list(nid, &nm_i->free_nid_list); 1492 i = __lookup_free_nid_list(nm_i, nid);
1501 f2fs_bug_on(!i || i->state != NID_ALLOC); 1493 f2fs_bug_on(!i || i->state != NID_ALLOC);
1502 __del_from_free_nid_list(i); 1494 __del_from_free_nid_list(nm_i, i);
1503 spin_unlock(&nm_i->free_nid_list_lock); 1495 spin_unlock(&nm_i->free_nid_list_lock);
1496
1497 kmem_cache_free(free_nid_slab, i);
1504} 1498}
1505 1499
1506/* 1500/*
@@ -1510,20 +1504,25 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
1510{ 1504{
1511 struct f2fs_nm_info *nm_i = NM_I(sbi); 1505 struct f2fs_nm_info *nm_i = NM_I(sbi);
1512 struct free_nid *i; 1506 struct free_nid *i;
1507 bool need_free = false;
1513 1508
1514 if (!nid) 1509 if (!nid)
1515 return; 1510 return;
1516 1511
1517 spin_lock(&nm_i->free_nid_list_lock); 1512 spin_lock(&nm_i->free_nid_list_lock);
1518 i = __lookup_free_nid_list(nid, &nm_i->free_nid_list); 1513 i = __lookup_free_nid_list(nm_i, nid);
1519 f2fs_bug_on(!i || i->state != NID_ALLOC); 1514 f2fs_bug_on(!i || i->state != NID_ALLOC);
1520 if (nm_i->fcnt > 2 * MAX_FREE_NIDS) { 1515 if (!available_free_memory(nm_i, FREE_NIDS)) {
1521 __del_from_free_nid_list(i); 1516 __del_from_free_nid_list(nm_i, i);
1517 need_free = true;
1522 } else { 1518 } else {
1523 i->state = NID_NEW; 1519 i->state = NID_NEW;
1524 nm_i->fcnt++; 1520 nm_i->fcnt++;
1525 } 1521 }
1526 spin_unlock(&nm_i->free_nid_list_lock); 1522 spin_unlock(&nm_i->free_nid_list_lock);
1523
1524 if (need_free)
1525 kmem_cache_free(free_nid_slab, i);
1527} 1526}
1528 1527
1529void recover_node_page(struct f2fs_sb_info *sbi, struct page *page, 1528void recover_node_page(struct f2fs_sb_info *sbi, struct page *page,
@@ -1531,10 +1530,83 @@ void recover_node_page(struct f2fs_sb_info *sbi, struct page *page,
1531 block_t new_blkaddr) 1530 block_t new_blkaddr)
1532{ 1531{
1533 rewrite_node_page(sbi, page, sum, ni->blk_addr, new_blkaddr); 1532 rewrite_node_page(sbi, page, sum, ni->blk_addr, new_blkaddr);
1534 set_node_addr(sbi, ni, new_blkaddr); 1533 set_node_addr(sbi, ni, new_blkaddr, false);
1535 clear_node_page_dirty(page); 1534 clear_node_page_dirty(page);
1536} 1535}
1537 1536
1537void recover_inline_xattr(struct inode *inode, struct page *page)
1538{
1539 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
1540 void *src_addr, *dst_addr;
1541 size_t inline_size;
1542 struct page *ipage;
1543 struct f2fs_inode *ri;
1544
1545 if (!f2fs_has_inline_xattr(inode))
1546 return;
1547
1548 if (!IS_INODE(page))
1549 return;
1550
1551 ri = F2FS_INODE(page);
1552 if (!(ri->i_inline & F2FS_INLINE_XATTR))
1553 return;
1554
1555 ipage = get_node_page(sbi, inode->i_ino);
1556 f2fs_bug_on(IS_ERR(ipage));
1557
1558 dst_addr = inline_xattr_addr(ipage);
1559 src_addr = inline_xattr_addr(page);
1560 inline_size = inline_xattr_size(inode);
1561
1562 memcpy(dst_addr, src_addr, inline_size);
1563
1564 update_inode(inode, ipage);
1565 f2fs_put_page(ipage, 1);
1566}
1567
1568bool recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
1569{
1570 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
1571 nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid;
1572 nid_t new_xnid = nid_of_node(page);
1573 struct node_info ni;
1574
1575 recover_inline_xattr(inode, page);
1576
1577 if (!f2fs_has_xattr_block(ofs_of_node(page)))
1578 return false;
1579
1580 /* 1: invalidate the previous xattr nid */
1581 if (!prev_xnid)
1582 goto recover_xnid;
1583
1584 /* Deallocate node address */
1585 get_node_info(sbi, prev_xnid, &ni);
1586 f2fs_bug_on(ni.blk_addr == NULL_ADDR);
1587 invalidate_blocks(sbi, ni.blk_addr);
1588 dec_valid_node_count(sbi, inode);
1589 set_node_addr(sbi, &ni, NULL_ADDR, false);
1590
1591recover_xnid:
1592 /* 2: allocate new xattr nid */
1593 if (unlikely(!inc_valid_node_count(sbi, inode)))
1594 f2fs_bug_on(1);
1595
1596 remove_free_nid(NM_I(sbi), new_xnid);
1597 get_node_info(sbi, new_xnid, &ni);
1598 ni.ino = inode->i_ino;
1599 set_node_addr(sbi, &ni, NEW_ADDR, false);
1600 F2FS_I(inode)->i_xattr_nid = new_xnid;
1601
1602 /* 3: update xattr blkaddr */
1603 refresh_sit_entry(sbi, NEW_ADDR, blkaddr);
1604 set_node_addr(sbi, &ni, blkaddr, false);
1605
1606 update_inode_page(inode);
1607 return true;
1608}
1609
1538int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) 1610int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
1539{ 1611{
1540 struct f2fs_inode *src, *dst; 1612 struct f2fs_inode *src, *dst;
@@ -1567,7 +1639,7 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
1567 1639
1568 if (unlikely(!inc_valid_node_count(sbi, NULL))) 1640 if (unlikely(!inc_valid_node_count(sbi, NULL)))
1569 WARN_ON(1); 1641 WARN_ON(1);
1570 set_node_addr(sbi, &new_ni, NEW_ADDR); 1642 set_node_addr(sbi, &new_ni, NEW_ADDR, false);
1571 inc_valid_inode_count(sbi); 1643 inc_valid_inode_count(sbi);
1572 f2fs_put_page(ipage, 1); 1644 f2fs_put_page(ipage, 1);
1573 return 0; 1645 return 0;
@@ -1590,15 +1662,8 @@ static int ra_sum_pages(struct f2fs_sb_info *sbi, struct list_head *pages,
1590 for (; page_idx < start + nrpages; page_idx++) { 1662 for (; page_idx < start + nrpages; page_idx++) {
1591 /* alloc temporal page for read node summary info*/ 1663 /* alloc temporal page for read node summary info*/
1592 page = alloc_page(GFP_F2FS_ZERO); 1664 page = alloc_page(GFP_F2FS_ZERO);
1593 if (!page) { 1665 if (!page)
1594 struct page *tmp; 1666 break;
1595 list_for_each_entry_safe(page, tmp, pages, lru) {
1596 list_del(&page->lru);
1597 unlock_page(page);
1598 __free_pages(page, 0);
1599 }
1600 return -ENOMEM;
1601 }
1602 1667
1603 lock_page(page); 1668 lock_page(page);
1604 page->index = page_idx; 1669 page->index = page_idx;
@@ -1609,7 +1674,8 @@ static int ra_sum_pages(struct f2fs_sb_info *sbi, struct list_head *pages,
1609 f2fs_submit_page_mbio(sbi, page, page->index, &fio); 1674 f2fs_submit_page_mbio(sbi, page, page->index, &fio);
1610 1675
1611 f2fs_submit_merged_bio(sbi, META, READ); 1676 f2fs_submit_merged_bio(sbi, META, READ);
1612 return 0; 1677
1678 return page_idx - start;
1613} 1679}
1614 1680
1615int restore_node_summary(struct f2fs_sb_info *sbi, 1681int restore_node_summary(struct f2fs_sb_info *sbi,
@@ -1628,15 +1694,17 @@ int restore_node_summary(struct f2fs_sb_info *sbi,
1628 addr = START_BLOCK(sbi, segno); 1694 addr = START_BLOCK(sbi, segno);
1629 sum_entry = &sum->entries[0]; 1695 sum_entry = &sum->entries[0];
1630 1696
1631 for (i = 0; i < last_offset; i += nrpages, addr += nrpages) { 1697 for (i = 0; !err && i < last_offset; i += nrpages, addr += nrpages) {
1632 nrpages = min(last_offset - i, bio_blocks); 1698 nrpages = min(last_offset - i, bio_blocks);
1633 1699
1634 /* read ahead node pages */ 1700 /* read ahead node pages */
1635 err = ra_sum_pages(sbi, &page_list, addr, nrpages); 1701 nrpages = ra_sum_pages(sbi, &page_list, addr, nrpages);
1636 if (err) 1702 if (!nrpages)
1637 return err; 1703 return -ENOMEM;
1638 1704
1639 list_for_each_entry_safe(page, tmp, &page_list, lru) { 1705 list_for_each_entry_safe(page, tmp, &page_list, lru) {
1706 if (err)
1707 goto skip;
1640 1708
1641 lock_page(page); 1709 lock_page(page);
1642 if (unlikely(!PageUptodate(page))) { 1710 if (unlikely(!PageUptodate(page))) {
@@ -1648,9 +1716,9 @@ int restore_node_summary(struct f2fs_sb_info *sbi,
1648 sum_entry->ofs_in_node = 0; 1716 sum_entry->ofs_in_node = 0;
1649 sum_entry++; 1717 sum_entry++;
1650 } 1718 }
1651
1652 list_del(&page->lru);
1653 unlock_page(page); 1719 unlock_page(page);
1720skip:
1721 list_del(&page->lru);
1654 __free_pages(page, 0); 1722 __free_pages(page, 0);
1655 } 1723 }
1656 } 1724 }
@@ -1709,7 +1777,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
1709 struct f2fs_nm_info *nm_i = NM_I(sbi); 1777 struct f2fs_nm_info *nm_i = NM_I(sbi);
1710 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); 1778 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
1711 struct f2fs_summary_block *sum = curseg->sum_blk; 1779 struct f2fs_summary_block *sum = curseg->sum_blk;
1712 struct list_head *cur, *n; 1780 struct nat_entry *ne, *cur;
1713 struct page *page = NULL; 1781 struct page *page = NULL;
1714 struct f2fs_nat_block *nat_blk = NULL; 1782 struct f2fs_nat_block *nat_blk = NULL;
1715 nid_t start_nid = 0, end_nid = 0; 1783 nid_t start_nid = 0, end_nid = 0;
@@ -1721,18 +1789,17 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
1721 mutex_lock(&curseg->curseg_mutex); 1789 mutex_lock(&curseg->curseg_mutex);
1722 1790
1723 /* 1) flush dirty nat caches */ 1791 /* 1) flush dirty nat caches */
1724 list_for_each_safe(cur, n, &nm_i->dirty_nat_entries) { 1792 list_for_each_entry_safe(ne, cur, &nm_i->dirty_nat_entries, list) {
1725 struct nat_entry *ne;
1726 nid_t nid; 1793 nid_t nid;
1727 struct f2fs_nat_entry raw_ne; 1794 struct f2fs_nat_entry raw_ne;
1728 int offset = -1; 1795 int offset = -1;
1729 block_t new_blkaddr; 1796 block_t new_blkaddr;
1730 1797
1731 ne = list_entry(cur, struct nat_entry, list);
1732 nid = nat_get_nid(ne);
1733
1734 if (nat_get_blkaddr(ne) == NEW_ADDR) 1798 if (nat_get_blkaddr(ne) == NEW_ADDR)
1735 continue; 1799 continue;
1800
1801 nid = nat_get_nid(ne);
1802
1736 if (flushed) 1803 if (flushed)
1737 goto to_nat_page; 1804 goto to_nat_page;
1738 1805
@@ -1783,16 +1850,12 @@ flush_now:
1783 } else { 1850 } else {
1784 write_lock(&nm_i->nat_tree_lock); 1851 write_lock(&nm_i->nat_tree_lock);
1785 __clear_nat_cache_dirty(nm_i, ne); 1852 __clear_nat_cache_dirty(nm_i, ne);
1786 ne->checkpointed = true;
1787 write_unlock(&nm_i->nat_tree_lock); 1853 write_unlock(&nm_i->nat_tree_lock);
1788 } 1854 }
1789 } 1855 }
1790 if (!flushed) 1856 if (!flushed)
1791 mutex_unlock(&curseg->curseg_mutex); 1857 mutex_unlock(&curseg->curseg_mutex);
1792 f2fs_put_page(page, 1); 1858 f2fs_put_page(page, 1);
1793
1794 /* 2) shrink nat caches if necessary */
1795 try_to_free_nats(sbi, nm_i->nat_cnt - NM_WOUT_THRESHOLD);
1796} 1859}
1797 1860
1798static int init_node_manager(struct f2fs_sb_info *sbi) 1861static int init_node_manager(struct f2fs_sb_info *sbi)
@@ -1807,10 +1870,14 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
1807 /* segment_count_nat includes pair segment so divide to 2. */ 1870 /* segment_count_nat includes pair segment so divide to 2. */
1808 nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1; 1871 nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
1809 nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg); 1872 nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
1810 nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks; 1873
1874 /* not used nids: 0, node, meta, (and root counted as valid node) */
1875 nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks - 3;
1811 nm_i->fcnt = 0; 1876 nm_i->fcnt = 0;
1812 nm_i->nat_cnt = 0; 1877 nm_i->nat_cnt = 0;
1878 nm_i->ram_thresh = DEF_RAM_THRESHOLD;
1813 1879
1880 INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC);
1814 INIT_LIST_HEAD(&nm_i->free_nid_list); 1881 INIT_LIST_HEAD(&nm_i->free_nid_list);
1815 INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC); 1882 INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC);
1816 INIT_LIST_HEAD(&nm_i->nat_entries); 1883 INIT_LIST_HEAD(&nm_i->nat_entries);
@@ -1864,8 +1931,11 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
1864 spin_lock(&nm_i->free_nid_list_lock); 1931 spin_lock(&nm_i->free_nid_list_lock);
1865 list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) { 1932 list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) {
1866 f2fs_bug_on(i->state == NID_ALLOC); 1933 f2fs_bug_on(i->state == NID_ALLOC);
1867 __del_from_free_nid_list(i); 1934 __del_from_free_nid_list(nm_i, i);
1868 nm_i->fcnt--; 1935 nm_i->fcnt--;
1936 spin_unlock(&nm_i->free_nid_list_lock);
1937 kmem_cache_free(free_nid_slab, i);
1938 spin_lock(&nm_i->free_nid_list_lock);
1869 } 1939 }
1870 f2fs_bug_on(nm_i->fcnt); 1940 f2fs_bug_on(nm_i->fcnt);
1871 spin_unlock(&nm_i->free_nid_list_lock); 1941 spin_unlock(&nm_i->free_nid_list_lock);
@@ -1875,11 +1945,9 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
1875 while ((found = __gang_lookup_nat_cache(nm_i, 1945 while ((found = __gang_lookup_nat_cache(nm_i,
1876 nid, NATVEC_SIZE, natvec))) { 1946 nid, NATVEC_SIZE, natvec))) {
1877 unsigned idx; 1947 unsigned idx;
1878 for (idx = 0; idx < found; idx++) { 1948 nid = nat_get_nid(natvec[found - 1]) + 1;
1879 struct nat_entry *e = natvec[idx]; 1949 for (idx = 0; idx < found; idx++)
1880 nid = nat_get_nid(e) + 1; 1950 __del_from_nat_cache(nm_i, natvec[idx]);
1881 __del_from_nat_cache(nm_i, e);
1882 }
1883 } 1951 }
1884 f2fs_bug_on(nm_i->nat_cnt); 1952 f2fs_bug_on(nm_i->nat_cnt);
1885 write_unlock(&nm_i->nat_tree_lock); 1953 write_unlock(&nm_i->nat_tree_lock);
@@ -1892,12 +1960,12 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
1892int __init create_node_manager_caches(void) 1960int __init create_node_manager_caches(void)
1893{ 1961{
1894 nat_entry_slab = f2fs_kmem_cache_create("nat_entry", 1962 nat_entry_slab = f2fs_kmem_cache_create("nat_entry",
1895 sizeof(struct nat_entry), NULL); 1963 sizeof(struct nat_entry));
1896 if (!nat_entry_slab) 1964 if (!nat_entry_slab)
1897 return -ENOMEM; 1965 return -ENOMEM;
1898 1966
1899 free_nid_slab = f2fs_kmem_cache_create("free_nid", 1967 free_nid_slab = f2fs_kmem_cache_create("free_nid",
1900 sizeof(struct free_nid), NULL); 1968 sizeof(struct free_nid));
1901 if (!free_nid_slab) { 1969 if (!free_nid_slab) {
1902 kmem_cache_destroy(nat_entry_slab); 1970 kmem_cache_destroy(nat_entry_slab);
1903 return -ENOMEM; 1971 return -ENOMEM;
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index c4c79885c993..5decc1a375f0 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -17,14 +17,11 @@
17/* # of pages to perform readahead before building free nids */ 17/* # of pages to perform readahead before building free nids */
18#define FREE_NID_PAGES 4 18#define FREE_NID_PAGES 4
19 19
20/* maximum # of free node ids to produce during build_free_nids */
21#define MAX_FREE_NIDS (NAT_ENTRY_PER_BLOCK * FREE_NID_PAGES)
22
23/* maximum readahead size for node during getting data blocks */ 20/* maximum readahead size for node during getting data blocks */
24#define MAX_RA_NODE 128 21#define MAX_RA_NODE 128
25 22
26/* maximum cached nat entries to manage memory footprint */ 23/* control the memory footprint threshold (10MB per 1GB ram) */
27#define NM_WOUT_THRESHOLD (64 * NAT_ENTRY_PER_BLOCK) 24#define DEF_RAM_THRESHOLD 10
28 25
29/* vector size for gang look-up from nat cache that consists of radix tree */ 26/* vector size for gang look-up from nat cache that consists of radix tree */
30#define NATVEC_SIZE 64 27#define NATVEC_SIZE 64
@@ -45,6 +42,7 @@ struct node_info {
45struct nat_entry { 42struct nat_entry {
46 struct list_head list; /* for clean or dirty nat list */ 43 struct list_head list; /* for clean or dirty nat list */
47 bool checkpointed; /* whether it is checkpointed or not */ 44 bool checkpointed; /* whether it is checkpointed or not */
45 bool fsync_done; /* whether the latest node has fsync mark */
48 struct node_info ni; /* in-memory node information */ 46 struct node_info ni; /* in-memory node information */
49}; 47};
50 48
@@ -58,9 +56,15 @@ struct nat_entry {
58#define nat_set_version(nat, v) (nat->ni.version = v) 56#define nat_set_version(nat, v) (nat->ni.version = v)
59 57
60#define __set_nat_cache_dirty(nm_i, ne) \ 58#define __set_nat_cache_dirty(nm_i, ne) \
61 list_move_tail(&ne->list, &nm_i->dirty_nat_entries); 59 do { \
60 ne->checkpointed = false; \
61 list_move_tail(&ne->list, &nm_i->dirty_nat_entries); \
62 } while (0);
62#define __clear_nat_cache_dirty(nm_i, ne) \ 63#define __clear_nat_cache_dirty(nm_i, ne) \
63 list_move_tail(&ne->list, &nm_i->nat_entries); 64 do { \
65 ne->checkpointed = true; \
66 list_move_tail(&ne->list, &nm_i->nat_entries); \
67 } while (0);
64#define inc_node_version(version) (++version) 68#define inc_node_version(version) (++version)
65 69
66static inline void node_info_from_raw_nat(struct node_info *ni, 70static inline void node_info_from_raw_nat(struct node_info *ni,
@@ -71,6 +75,11 @@ static inline void node_info_from_raw_nat(struct node_info *ni,
71 ni->version = raw_ne->version; 75 ni->version = raw_ne->version;
72} 76}
73 77
78enum nid_type {
79 FREE_NIDS, /* indicates the free nid list */
80 NAT_ENTRIES /* indicates the cached nat entry */
81};
82
74/* 83/*
75 * For free nid mangement 84 * For free nid mangement
76 */ 85 */
@@ -236,7 +245,7 @@ static inline bool IS_DNODE(struct page *node_page)
236{ 245{
237 unsigned int ofs = ofs_of_node(node_page); 246 unsigned int ofs = ofs_of_node(node_page);
238 247
239 if (ofs == XATTR_NODE_OFFSET) 248 if (f2fs_has_xattr_block(ofs))
240 return false; 249 return false;
241 250
242 if (ofs == 3 || ofs == 4 + NIDS_PER_BLOCK || 251 if (ofs == 3 || ofs == 4 + NIDS_PER_BLOCK ||
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 976a7a934db5..b1ae89f0f44e 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -27,14 +27,12 @@ bool space_for_roll_forward(struct f2fs_sb_info *sbi)
27static struct fsync_inode_entry *get_fsync_inode(struct list_head *head, 27static struct fsync_inode_entry *get_fsync_inode(struct list_head *head,
28 nid_t ino) 28 nid_t ino)
29{ 29{
30 struct list_head *this;
31 struct fsync_inode_entry *entry; 30 struct fsync_inode_entry *entry;
32 31
33 list_for_each(this, head) { 32 list_for_each_entry(entry, head, list)
34 entry = list_entry(this, struct fsync_inode_entry, list);
35 if (entry->inode->i_ino == ino) 33 if (entry->inode->i_ino == ino)
36 return entry; 34 return entry;
37 } 35
38 return NULL; 36 return NULL;
39} 37}
40 38
@@ -136,7 +134,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
136 134
137 /* get node pages in the current segment */ 135 /* get node pages in the current segment */
138 curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); 136 curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
139 blkaddr = START_BLOCK(sbi, curseg->segno) + curseg->next_blkoff; 137 blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
140 138
141 /* read node page */ 139 /* read node page */
142 page = alloc_page(GFP_F2FS_ZERO); 140 page = alloc_page(GFP_F2FS_ZERO);
@@ -218,13 +216,12 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
218{ 216{
219 struct seg_entry *sentry; 217 struct seg_entry *sentry;
220 unsigned int segno = GET_SEGNO(sbi, blkaddr); 218 unsigned int segno = GET_SEGNO(sbi, blkaddr);
221 unsigned short blkoff = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) & 219 unsigned short blkoff = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
222 (sbi->blocks_per_seg - 1); 220 struct f2fs_summary_block *sum_node;
223 struct f2fs_summary sum; 221 struct f2fs_summary sum;
222 struct page *sum_page, *node_page;
224 nid_t ino, nid; 223 nid_t ino, nid;
225 void *kaddr;
226 struct inode *inode; 224 struct inode *inode;
227 struct page *node_page;
228 unsigned int offset; 225 unsigned int offset;
229 block_t bidx; 226 block_t bidx;
230 int i; 227 int i;
@@ -238,18 +235,15 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
238 struct curseg_info *curseg = CURSEG_I(sbi, i); 235 struct curseg_info *curseg = CURSEG_I(sbi, i);
239 if (curseg->segno == segno) { 236 if (curseg->segno == segno) {
240 sum = curseg->sum_blk->entries[blkoff]; 237 sum = curseg->sum_blk->entries[blkoff];
241 break; 238 goto got_it;
242 } 239 }
243 } 240 }
244 if (i > CURSEG_COLD_DATA) {
245 struct page *sum_page = get_sum_page(sbi, segno);
246 struct f2fs_summary_block *sum_node;
247 kaddr = page_address(sum_page);
248 sum_node = (struct f2fs_summary_block *)kaddr;
249 sum = sum_node->entries[blkoff];
250 f2fs_put_page(sum_page, 1);
251 }
252 241
242 sum_page = get_sum_page(sbi, segno);
243 sum_node = (struct f2fs_summary_block *)page_address(sum_page);
244 sum = sum_node->entries[blkoff];
245 f2fs_put_page(sum_page, 1);
246got_it:
253 /* Use the locked dnode page and inode */ 247 /* Use the locked dnode page and inode */
254 nid = le32_to_cpu(sum.nid); 248 nid = le32_to_cpu(sum.nid);
255 if (dn->inode->i_ino == nid) { 249 if (dn->inode->i_ino == nid) {
@@ -301,6 +295,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
301 if (recover_inline_data(inode, page)) 295 if (recover_inline_data(inode, page))
302 goto out; 296 goto out;
303 297
298 if (recover_xattr_data(inode, page, blkaddr))
299 goto out;
300
304 start = start_bidx_of_node(ofs_of_node(page), fi); 301 start = start_bidx_of_node(ofs_of_node(page), fi);
305 if (IS_INODE(page)) 302 if (IS_INODE(page))
306 end = start + ADDRS_PER_INODE(fi); 303 end = start + ADDRS_PER_INODE(fi);
@@ -317,7 +314,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
317 goto out; 314 goto out;
318 } 315 }
319 316
320 wait_on_page_writeback(dn.node_page); 317 f2fs_wait_on_page_writeback(dn.node_page, NODE);
321 318
322 get_node_info(sbi, dn.nid, &ni); 319 get_node_info(sbi, dn.nid, &ni);
323 f2fs_bug_on(ni.ino != ino_of_node(page)); 320 f2fs_bug_on(ni.ino != ino_of_node(page));
@@ -437,7 +434,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
437 bool need_writecp = false; 434 bool need_writecp = false;
438 435
439 fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry", 436 fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
440 sizeof(struct fsync_inode_entry), NULL); 437 sizeof(struct fsync_inode_entry));
441 if (!fsync_entry_slab) 438 if (!fsync_entry_slab)
442 return -ENOMEM; 439 return -ENOMEM;
443 440
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 7caac5f2ca9e..085f548be7a3 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -13,6 +13,7 @@
13#include <linux/bio.h> 13#include <linux/bio.h>
14#include <linux/blkdev.h> 14#include <linux/blkdev.h>
15#include <linux/prefetch.h> 15#include <linux/prefetch.h>
16#include <linux/kthread.h>
16#include <linux/vmalloc.h> 17#include <linux/vmalloc.h>
17#include <linux/swap.h> 18#include <linux/swap.h>
18 19
@@ -24,6 +25,7 @@
24#define __reverse_ffz(x) __reverse_ffs(~(x)) 25#define __reverse_ffz(x) __reverse_ffs(~(x))
25 26
26static struct kmem_cache *discard_entry_slab; 27static struct kmem_cache *discard_entry_slab;
28static struct kmem_cache *flush_cmd_slab;
27 29
28/* 30/*
29 * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since 31 * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
@@ -195,6 +197,73 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
195 f2fs_sync_fs(sbi->sb, true); 197 f2fs_sync_fs(sbi->sb, true);
196} 198}
197 199
200static int issue_flush_thread(void *data)
201{
202 struct f2fs_sb_info *sbi = data;
203 struct f2fs_sm_info *sm_i = SM_I(sbi);
204 wait_queue_head_t *q = &sm_i->flush_wait_queue;
205repeat:
206 if (kthread_should_stop())
207 return 0;
208
209 spin_lock(&sm_i->issue_lock);
210 if (sm_i->issue_list) {
211 sm_i->dispatch_list = sm_i->issue_list;
212 sm_i->issue_list = sm_i->issue_tail = NULL;
213 }
214 spin_unlock(&sm_i->issue_lock);
215
216 if (sm_i->dispatch_list) {
217 struct bio *bio = bio_alloc(GFP_NOIO, 0);
218 struct flush_cmd *cmd, *next;
219 int ret;
220
221 bio->bi_bdev = sbi->sb->s_bdev;
222 ret = submit_bio_wait(WRITE_FLUSH, bio);
223
224 for (cmd = sm_i->dispatch_list; cmd; cmd = next) {
225 cmd->ret = ret;
226 next = cmd->next;
227 complete(&cmd->wait);
228 }
229 sm_i->dispatch_list = NULL;
230 }
231
232 wait_event_interruptible(*q, kthread_should_stop() || sm_i->issue_list);
233 goto repeat;
234}
235
236int f2fs_issue_flush(struct f2fs_sb_info *sbi)
237{
238 struct f2fs_sm_info *sm_i = SM_I(sbi);
239 struct flush_cmd *cmd;
240 int ret;
241
242 if (!test_opt(sbi, FLUSH_MERGE))
243 return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL);
244
245 cmd = f2fs_kmem_cache_alloc(flush_cmd_slab, GFP_ATOMIC);
246 cmd->next = NULL;
247 cmd->ret = 0;
248 init_completion(&cmd->wait);
249
250 spin_lock(&sm_i->issue_lock);
251 if (sm_i->issue_list)
252 sm_i->issue_tail->next = cmd;
253 else
254 sm_i->issue_list = cmd;
255 sm_i->issue_tail = cmd;
256 spin_unlock(&sm_i->issue_lock);
257
258 if (!sm_i->dispatch_list)
259 wake_up(&sm_i->flush_wait_queue);
260
261 wait_for_completion(&cmd->wait);
262 ret = cmd->ret;
263 kmem_cache_free(flush_cmd_slab, cmd);
264 return ret;
265}
266
198static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, 267static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
199 enum dirty_type dirty_type) 268 enum dirty_type dirty_type)
200{ 269{
@@ -340,8 +409,7 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
340void clear_prefree_segments(struct f2fs_sb_info *sbi) 409void clear_prefree_segments(struct f2fs_sb_info *sbi)
341{ 410{
342 struct list_head *head = &(SM_I(sbi)->discard_list); 411 struct list_head *head = &(SM_I(sbi)->discard_list);
343 struct list_head *this, *next; 412 struct discard_entry *entry, *this;
344 struct discard_entry *entry;
345 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 413 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
346 unsigned long *prefree_map = dirty_i->dirty_segmap[PRE]; 414 unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
347 unsigned int total_segs = TOTAL_SEGS(sbi); 415 unsigned int total_segs = TOTAL_SEGS(sbi);
@@ -370,8 +438,7 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi)
370 mutex_unlock(&dirty_i->seglist_lock); 438 mutex_unlock(&dirty_i->seglist_lock);
371 439
372 /* send small discards */ 440 /* send small discards */
373 list_for_each_safe(this, next, head) { 441 list_for_each_entry_safe(entry, this, head, list) {
374 entry = list_entry(this, struct discard_entry, list);
375 f2fs_issue_discard(sbi, entry->blkaddr, entry->len); 442 f2fs_issue_discard(sbi, entry->blkaddr, entry->len);
376 list_del(&entry->list); 443 list_del(&entry->list);
377 SM_I(sbi)->nr_discards -= entry->len; 444 SM_I(sbi)->nr_discards -= entry->len;
@@ -405,7 +472,7 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
405 472
406 se = get_seg_entry(sbi, segno); 473 se = get_seg_entry(sbi, segno);
407 new_vblocks = se->valid_blocks + del; 474 new_vblocks = se->valid_blocks + del;
408 offset = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) & (sbi->blocks_per_seg - 1); 475 offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
409 476
410 f2fs_bug_on((new_vblocks >> (sizeof(unsigned short) << 3) || 477 f2fs_bug_on((new_vblocks >> (sizeof(unsigned short) << 3) ||
411 (new_vblocks > sbi->blocks_per_seg))); 478 (new_vblocks > sbi->blocks_per_seg)));
@@ -434,12 +501,14 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
434 get_sec_entry(sbi, segno)->valid_blocks += del; 501 get_sec_entry(sbi, segno)->valid_blocks += del;
435} 502}
436 503
437static void refresh_sit_entry(struct f2fs_sb_info *sbi, 504void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new)
438 block_t old_blkaddr, block_t new_blkaddr)
439{ 505{
440 update_sit_entry(sbi, new_blkaddr, 1); 506 update_sit_entry(sbi, new, 1);
441 if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) 507 if (GET_SEGNO(sbi, old) != NULL_SEGNO)
442 update_sit_entry(sbi, old_blkaddr, -1); 508 update_sit_entry(sbi, old, -1);
509
510 locate_dirty_segment(sbi, GET_SEGNO(sbi, old));
511 locate_dirty_segment(sbi, GET_SEGNO(sbi, new));
443} 512}
444 513
445void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) 514void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
@@ -881,17 +950,15 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
881 950
882 stat_inc_block_count(sbi, curseg); 951 stat_inc_block_count(sbi, curseg);
883 952
953 if (!__has_curseg_space(sbi, type))
954 sit_i->s_ops->allocate_segment(sbi, type, false);
884 /* 955 /*
885 * SIT information should be updated before segment allocation, 956 * SIT information should be updated before segment allocation,
886 * since SSR needs latest valid block information. 957 * since SSR needs latest valid block information.
887 */ 958 */
888 refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr); 959 refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr);
889
890 if (!__has_curseg_space(sbi, type))
891 sit_i->s_ops->allocate_segment(sbi, type, false);
892
893 locate_dirty_segment(sbi, old_cursegno); 960 locate_dirty_segment(sbi, old_cursegno);
894 locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); 961
895 mutex_unlock(&sit_i->sentry_lock); 962 mutex_unlock(&sit_i->sentry_lock);
896 963
897 if (page && IS_NODESEG(type)) 964 if (page && IS_NODESEG(type))
@@ -987,14 +1054,11 @@ void recover_data_page(struct f2fs_sb_info *sbi,
987 change_curseg(sbi, type, true); 1054 change_curseg(sbi, type, true);
988 } 1055 }
989 1056
990 curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) & 1057 curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
991 (sbi->blocks_per_seg - 1);
992 __add_sum_entry(sbi, type, sum); 1058 __add_sum_entry(sbi, type, sum);
993 1059
994 refresh_sit_entry(sbi, old_blkaddr, new_blkaddr); 1060 refresh_sit_entry(sbi, old_blkaddr, new_blkaddr);
995
996 locate_dirty_segment(sbi, old_cursegno); 1061 locate_dirty_segment(sbi, old_cursegno);
997 locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
998 1062
999 mutex_unlock(&sit_i->sentry_lock); 1063 mutex_unlock(&sit_i->sentry_lock);
1000 mutex_unlock(&curseg->curseg_mutex); 1064 mutex_unlock(&curseg->curseg_mutex);
@@ -1028,8 +1092,7 @@ void rewrite_node_page(struct f2fs_sb_info *sbi,
1028 curseg->next_segno = segno; 1092 curseg->next_segno = segno;
1029 change_curseg(sbi, type, true); 1093 change_curseg(sbi, type, true);
1030 } 1094 }
1031 curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) & 1095 curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
1032 (sbi->blocks_per_seg - 1);
1033 __add_sum_entry(sbi, type, sum); 1096 __add_sum_entry(sbi, type, sum);
1034 1097
1035 /* change the current log to the next block addr in advance */ 1098 /* change the current log to the next block addr in advance */
@@ -1037,28 +1100,50 @@ void rewrite_node_page(struct f2fs_sb_info *sbi,
1037 curseg->next_segno = next_segno; 1100 curseg->next_segno = next_segno;
1038 change_curseg(sbi, type, true); 1101 change_curseg(sbi, type, true);
1039 } 1102 }
1040 curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, next_blkaddr) & 1103 curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, next_blkaddr);
1041 (sbi->blocks_per_seg - 1);
1042 1104
1043 /* rewrite node page */ 1105 /* rewrite node page */
1044 set_page_writeback(page); 1106 set_page_writeback(page);
1045 f2fs_submit_page_mbio(sbi, page, new_blkaddr, &fio); 1107 f2fs_submit_page_mbio(sbi, page, new_blkaddr, &fio);
1046 f2fs_submit_merged_bio(sbi, NODE, WRITE); 1108 f2fs_submit_merged_bio(sbi, NODE, WRITE);
1047 refresh_sit_entry(sbi, old_blkaddr, new_blkaddr); 1109 refresh_sit_entry(sbi, old_blkaddr, new_blkaddr);
1048
1049 locate_dirty_segment(sbi, old_cursegno); 1110 locate_dirty_segment(sbi, old_cursegno);
1050 locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
1051 1111
1052 mutex_unlock(&sit_i->sentry_lock); 1112 mutex_unlock(&sit_i->sentry_lock);
1053 mutex_unlock(&curseg->curseg_mutex); 1113 mutex_unlock(&curseg->curseg_mutex);
1054} 1114}
1055 1115
1116static inline bool is_merged_page(struct f2fs_sb_info *sbi,
1117 struct page *page, enum page_type type)
1118{
1119 enum page_type btype = PAGE_TYPE_OF_BIO(type);
1120 struct f2fs_bio_info *io = &sbi->write_io[btype];
1121 struct bio_vec *bvec;
1122 int i;
1123
1124 down_read(&io->io_rwsem);
1125 if (!io->bio)
1126 goto out;
1127
1128 bio_for_each_segment_all(bvec, io->bio, i) {
1129 if (page == bvec->bv_page) {
1130 up_read(&io->io_rwsem);
1131 return true;
1132 }
1133 }
1134
1135out:
1136 up_read(&io->io_rwsem);
1137 return false;
1138}
1139
1056void f2fs_wait_on_page_writeback(struct page *page, 1140void f2fs_wait_on_page_writeback(struct page *page,
1057 enum page_type type) 1141 enum page_type type)
1058{ 1142{
1059 struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); 1143 struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
1060 if (PageWriteback(page)) { 1144 if (PageWriteback(page)) {
1061 f2fs_submit_merged_bio(sbi, type, WRITE); 1145 if (is_merged_page(sbi, page, type))
1146 f2fs_submit_merged_bio(sbi, type, WRITE);
1062 wait_on_page_writeback(page); 1147 wait_on_page_writeback(page);
1063 } 1148 }
1064} 1149}
@@ -1167,9 +1252,12 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
1167 ns->ofs_in_node = 0; 1252 ns->ofs_in_node = 0;
1168 } 1253 }
1169 } else { 1254 } else {
1170 if (restore_node_summary(sbi, segno, sum)) { 1255 int err;
1256
1257 err = restore_node_summary(sbi, segno, sum);
1258 if (err) {
1171 f2fs_put_page(new, 1); 1259 f2fs_put_page(new, 1);
1172 return -EINVAL; 1260 return err;
1173 } 1261 }
1174 } 1262 }
1175 } 1263 }
@@ -1190,6 +1278,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
1190static int restore_curseg_summaries(struct f2fs_sb_info *sbi) 1278static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
1191{ 1279{
1192 int type = CURSEG_HOT_DATA; 1280 int type = CURSEG_HOT_DATA;
1281 int err;
1193 1282
1194 if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) { 1283 if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) {
1195 /* restore for compacted data summary */ 1284 /* restore for compacted data summary */
@@ -1198,9 +1287,12 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
1198 type = CURSEG_HOT_NODE; 1287 type = CURSEG_HOT_NODE;
1199 } 1288 }
1200 1289
1201 for (; type <= CURSEG_COLD_NODE; type++) 1290 for (; type <= CURSEG_COLD_NODE; type++) {
1202 if (read_normal_summaries(sbi, type)) 1291 err = read_normal_summaries(sbi, type);
1203 return -EINVAL; 1292 if (err)
1293 return err;
1294 }
1295
1204 return 0; 1296 return 0;
1205} 1297}
1206 1298
@@ -1583,47 +1675,6 @@ static int build_curseg(struct f2fs_sb_info *sbi)
1583 return restore_curseg_summaries(sbi); 1675 return restore_curseg_summaries(sbi);
1584} 1676}
1585 1677
1586static int ra_sit_pages(struct f2fs_sb_info *sbi, int start, int nrpages)
1587{
1588 struct address_space *mapping = META_MAPPING(sbi);
1589 struct page *page;
1590 block_t blk_addr, prev_blk_addr = 0;
1591 int sit_blk_cnt = SIT_BLK_CNT(sbi);
1592 int blkno = start;
1593 struct f2fs_io_info fio = {
1594 .type = META,
1595 .rw = READ_SYNC | REQ_META | REQ_PRIO
1596 };
1597
1598 for (; blkno < start + nrpages && blkno < sit_blk_cnt; blkno++) {
1599
1600 blk_addr = current_sit_addr(sbi, blkno * SIT_ENTRY_PER_BLOCK);
1601
1602 if (blkno != start && prev_blk_addr + 1 != blk_addr)
1603 break;
1604 prev_blk_addr = blk_addr;
1605repeat:
1606 page = grab_cache_page(mapping, blk_addr);
1607 if (!page) {
1608 cond_resched();
1609 goto repeat;
1610 }
1611 if (PageUptodate(page)) {
1612 mark_page_accessed(page);
1613 f2fs_put_page(page, 1);
1614 continue;
1615 }
1616
1617 f2fs_submit_page_mbio(sbi, page, blk_addr, &fio);
1618
1619 mark_page_accessed(page);
1620 f2fs_put_page(page, 0);
1621 }
1622
1623 f2fs_submit_merged_bio(sbi, META, READ);
1624 return blkno - start;
1625}
1626
1627static void build_sit_entries(struct f2fs_sb_info *sbi) 1678static void build_sit_entries(struct f2fs_sb_info *sbi)
1628{ 1679{
1629 struct sit_info *sit_i = SIT_I(sbi); 1680 struct sit_info *sit_i = SIT_I(sbi);
@@ -1635,7 +1686,7 @@ static void build_sit_entries(struct f2fs_sb_info *sbi)
1635 int nrpages = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); 1686 int nrpages = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
1636 1687
1637 do { 1688 do {
1638 readed = ra_sit_pages(sbi, start_blk, nrpages); 1689 readed = ra_meta_pages(sbi, start_blk, nrpages, META_SIT);
1639 1690
1640 start = start_blk * sit_i->sents_per_block; 1691 start = start_blk * sit_i->sents_per_block;
1641 end = (start_blk + readed) * sit_i->sents_per_block; 1692 end = (start_blk + readed) * sit_i->sents_per_block;
@@ -1781,6 +1832,7 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
1781{ 1832{
1782 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); 1833 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
1783 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 1834 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1835 dev_t dev = sbi->sb->s_bdev->bd_dev;
1784 struct f2fs_sm_info *sm_info; 1836 struct f2fs_sm_info *sm_info;
1785 int err; 1837 int err;
1786 1838
@@ -1799,7 +1851,8 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
1799 sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count); 1851 sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
1800 sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main); 1852 sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
1801 sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr); 1853 sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
1802 sm_info->rec_prefree_segments = DEF_RECLAIM_PREFREE_SEGMENTS; 1854 sm_info->rec_prefree_segments = sm_info->main_segments *
1855 DEF_RECLAIM_PREFREE_SEGMENTS / 100;
1803 sm_info->ipu_policy = F2FS_IPU_DISABLE; 1856 sm_info->ipu_policy = F2FS_IPU_DISABLE;
1804 sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; 1857 sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
1805 1858
@@ -1807,6 +1860,16 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
1807 sm_info->nr_discards = 0; 1860 sm_info->nr_discards = 0;
1808 sm_info->max_discards = 0; 1861 sm_info->max_discards = 0;
1809 1862
1863 if (test_opt(sbi, FLUSH_MERGE)) {
1864 spin_lock_init(&sm_info->issue_lock);
1865 init_waitqueue_head(&sm_info->flush_wait_queue);
1866
1867 sm_info->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
1868 "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
1869 if (IS_ERR(sm_info->f2fs_issue_flush))
1870 return PTR_ERR(sm_info->f2fs_issue_flush);
1871 }
1872
1810 err = build_sit_info(sbi); 1873 err = build_sit_info(sbi);
1811 if (err) 1874 if (err)
1812 return err; 1875 return err;
@@ -1915,6 +1978,8 @@ void destroy_segment_manager(struct f2fs_sb_info *sbi)
1915 struct f2fs_sm_info *sm_info = SM_I(sbi); 1978 struct f2fs_sm_info *sm_info = SM_I(sbi);
1916 if (!sm_info) 1979 if (!sm_info)
1917 return; 1980 return;
1981 if (sm_info->f2fs_issue_flush)
1982 kthread_stop(sm_info->f2fs_issue_flush);
1918 destroy_dirty_segmap(sbi); 1983 destroy_dirty_segmap(sbi);
1919 destroy_curseg(sbi); 1984 destroy_curseg(sbi);
1920 destroy_free_segmap(sbi); 1985 destroy_free_segmap(sbi);
@@ -1926,13 +1991,20 @@ void destroy_segment_manager(struct f2fs_sb_info *sbi)
1926int __init create_segment_manager_caches(void) 1991int __init create_segment_manager_caches(void)
1927{ 1992{
1928 discard_entry_slab = f2fs_kmem_cache_create("discard_entry", 1993 discard_entry_slab = f2fs_kmem_cache_create("discard_entry",
1929 sizeof(struct discard_entry), NULL); 1994 sizeof(struct discard_entry));
1930 if (!discard_entry_slab) 1995 if (!discard_entry_slab)
1931 return -ENOMEM; 1996 return -ENOMEM;
1997 flush_cmd_slab = f2fs_kmem_cache_create("flush_command",
1998 sizeof(struct flush_cmd));
1999 if (!flush_cmd_slab) {
2000 kmem_cache_destroy(discard_entry_slab);
2001 return -ENOMEM;
2002 }
1932 return 0; 2003 return 0;
1933} 2004}
1934 2005
1935void destroy_segment_manager_caches(void) 2006void destroy_segment_manager_caches(void)
1936{ 2007{
1937 kmem_cache_destroy(discard_entry_slab); 2008 kmem_cache_destroy(discard_entry_slab);
2009 kmem_cache_destroy(flush_cmd_slab);
1938} 2010}
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 5731682d7516..7091204680f4 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -14,7 +14,7 @@
14#define NULL_SEGNO ((unsigned int)(~0)) 14#define NULL_SEGNO ((unsigned int)(~0))
15#define NULL_SECNO ((unsigned int)(~0)) 15#define NULL_SECNO ((unsigned int)(~0))
16 16
17#define DEF_RECLAIM_PREFREE_SEGMENTS 100 /* 200MB of prefree segments */ 17#define DEF_RECLAIM_PREFREE_SEGMENTS 5 /* 5% over total segments */
18 18
19/* L: Logical segment # in volume, R: Relative segment # in main area */ 19/* L: Logical segment # in volume, R: Relative segment # in main area */
20#define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno) 20#define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno)
@@ -57,6 +57,9 @@
57 ((blk_addr) - SM_I(sbi)->seg0_blkaddr) 57 ((blk_addr) - SM_I(sbi)->seg0_blkaddr)
58#define GET_SEGNO_FROM_SEG0(sbi, blk_addr) \ 58#define GET_SEGNO_FROM_SEG0(sbi, blk_addr) \
59 (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> sbi->log_blocks_per_seg) 59 (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> sbi->log_blocks_per_seg)
60#define GET_BLKOFF_FROM_SEG0(sbi, blk_addr) \
61 (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & (sbi->blocks_per_seg - 1))
62
60#define GET_SEGNO(sbi, blk_addr) \ 63#define GET_SEGNO(sbi, blk_addr) \
61 (((blk_addr == NULL_ADDR) || (blk_addr == NEW_ADDR)) ? \ 64 (((blk_addr == NULL_ADDR) || (blk_addr == NEW_ADDR)) ? \
62 NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \ 65 NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \
@@ -377,26 +380,12 @@ static inline void get_sit_bitmap(struct f2fs_sb_info *sbi,
377 380
378static inline block_t written_block_count(struct f2fs_sb_info *sbi) 381static inline block_t written_block_count(struct f2fs_sb_info *sbi)
379{ 382{
380 struct sit_info *sit_i = SIT_I(sbi); 383 return SIT_I(sbi)->written_valid_blocks;
381 block_t vblocks;
382
383 mutex_lock(&sit_i->sentry_lock);
384 vblocks = sit_i->written_valid_blocks;
385 mutex_unlock(&sit_i->sentry_lock);
386
387 return vblocks;
388} 384}
389 385
390static inline unsigned int free_segments(struct f2fs_sb_info *sbi) 386static inline unsigned int free_segments(struct f2fs_sb_info *sbi)
391{ 387{
392 struct free_segmap_info *free_i = FREE_I(sbi); 388 return FREE_I(sbi)->free_segments;
393 unsigned int free_segs;
394
395 read_lock(&free_i->segmap_lock);
396 free_segs = free_i->free_segments;
397 read_unlock(&free_i->segmap_lock);
398
399 return free_segs;
400} 389}
401 390
402static inline int reserved_segments(struct f2fs_sb_info *sbi) 391static inline int reserved_segments(struct f2fs_sb_info *sbi)
@@ -406,14 +395,7 @@ static inline int reserved_segments(struct f2fs_sb_info *sbi)
406 395
407static inline unsigned int free_sections(struct f2fs_sb_info *sbi) 396static inline unsigned int free_sections(struct f2fs_sb_info *sbi)
408{ 397{
409 struct free_segmap_info *free_i = FREE_I(sbi); 398 return FREE_I(sbi)->free_sections;
410 unsigned int free_secs;
411
412 read_lock(&free_i->segmap_lock);
413 free_secs = free_i->free_sections;
414 read_unlock(&free_i->segmap_lock);
415
416 return free_secs;
417} 399}
418 400
419static inline unsigned int prefree_segments(struct f2fs_sb_info *sbi) 401static inline unsigned int prefree_segments(struct f2fs_sb_info *sbi)
@@ -682,3 +664,46 @@ static inline unsigned int max_hw_blocks(struct f2fs_sb_info *sbi)
682 struct request_queue *q = bdev_get_queue(bdev); 664 struct request_queue *q = bdev_get_queue(bdev);
683 return SECTOR_TO_BLOCK(sbi, queue_max_sectors(q)); 665 return SECTOR_TO_BLOCK(sbi, queue_max_sectors(q));
684} 666}
667
668/*
669 * It is very important to gather dirty pages and write at once, so that we can
670 * submit a big bio without interfering other data writes.
671 * By default, 512 pages for directory data,
672 * 512 pages (2MB) * 3 for three types of nodes, and
673 * max_bio_blocks for meta are set.
674 */
675static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type)
676{
677 if (type == DATA)
678 return sbi->blocks_per_seg;
679 else if (type == NODE)
680 return 3 * sbi->blocks_per_seg;
681 else if (type == META)
682 return MAX_BIO_BLOCKS(max_hw_blocks(sbi));
683 else
684 return 0;
685}
686
687/*
688 * When writing pages, it'd better align nr_to_write for segment size.
689 */
690static inline long nr_pages_to_write(struct f2fs_sb_info *sbi, int type,
691 struct writeback_control *wbc)
692{
693 long nr_to_write, desired;
694
695 if (wbc->sync_mode != WB_SYNC_NONE)
696 return 0;
697
698 nr_to_write = wbc->nr_to_write;
699
700 if (type == DATA)
701 desired = 4096;
702 else if (type == NODE)
703 desired = 3 * max_hw_blocks(sbi);
704 else
705 desired = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
706
707 wbc->nr_to_write = desired;
708 return desired - nr_to_write;
709}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 856bdf994c0a..c756923a7302 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -51,6 +51,7 @@ enum {
51 Opt_disable_ext_identify, 51 Opt_disable_ext_identify,
52 Opt_inline_xattr, 52 Opt_inline_xattr,
53 Opt_inline_data, 53 Opt_inline_data,
54 Opt_flush_merge,
54 Opt_err, 55 Opt_err,
55}; 56};
56 57
@@ -67,6 +68,7 @@ static match_table_t f2fs_tokens = {
67 {Opt_disable_ext_identify, "disable_ext_identify"}, 68 {Opt_disable_ext_identify, "disable_ext_identify"},
68 {Opt_inline_xattr, "inline_xattr"}, 69 {Opt_inline_xattr, "inline_xattr"},
69 {Opt_inline_data, "inline_data"}, 70 {Opt_inline_data, "inline_data"},
71 {Opt_flush_merge, "flush_merge"},
70 {Opt_err, NULL}, 72 {Opt_err, NULL},
71}; 73};
72 74
@@ -74,6 +76,7 @@ static match_table_t f2fs_tokens = {
74enum { 76enum {
75 GC_THREAD, /* struct f2fs_gc_thread */ 77 GC_THREAD, /* struct f2fs_gc_thread */
76 SM_INFO, /* struct f2fs_sm_info */ 78 SM_INFO, /* struct f2fs_sm_info */
79 NM_INFO, /* struct f2fs_nm_info */
77 F2FS_SBI, /* struct f2fs_sb_info */ 80 F2FS_SBI, /* struct f2fs_sb_info */
78}; 81};
79 82
@@ -92,6 +95,8 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
92 return (unsigned char *)sbi->gc_thread; 95 return (unsigned char *)sbi->gc_thread;
93 else if (struct_type == SM_INFO) 96 else if (struct_type == SM_INFO)
94 return (unsigned char *)SM_I(sbi); 97 return (unsigned char *)SM_I(sbi);
98 else if (struct_type == NM_INFO)
99 return (unsigned char *)NM_I(sbi);
95 else if (struct_type == F2FS_SBI) 100 else if (struct_type == F2FS_SBI)
96 return (unsigned char *)sbi; 101 return (unsigned char *)sbi;
97 return NULL; 102 return NULL;
@@ -183,7 +188,9 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments);
183F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards); 188F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards);
184F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); 189F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
185F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util); 190F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
191F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
186F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search); 192F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search);
193F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level);
187 194
188#define ATTR_LIST(name) (&f2fs_attr_##name.attr) 195#define ATTR_LIST(name) (&f2fs_attr_##name.attr)
189static struct attribute *f2fs_attrs[] = { 196static struct attribute *f2fs_attrs[] = {
@@ -196,6 +203,8 @@ static struct attribute *f2fs_attrs[] = {
196 ATTR_LIST(ipu_policy), 203 ATTR_LIST(ipu_policy),
197 ATTR_LIST(min_ipu_util), 204 ATTR_LIST(min_ipu_util),
198 ATTR_LIST(max_victim_search), 205 ATTR_LIST(max_victim_search),
206 ATTR_LIST(dir_level),
207 ATTR_LIST(ram_thresh),
199 NULL, 208 NULL,
200}; 209};
201 210
@@ -256,9 +265,9 @@ static int parse_options(struct super_block *sb, char *options)
256 265
257 if (!name) 266 if (!name)
258 return -ENOMEM; 267 return -ENOMEM;
259 if (!strncmp(name, "on", 2)) 268 if (strlen(name) == 2 && !strncmp(name, "on", 2))
260 set_opt(sbi, BG_GC); 269 set_opt(sbi, BG_GC);
261 else if (!strncmp(name, "off", 3)) 270 else if (strlen(name) == 3 && !strncmp(name, "off", 3))
262 clear_opt(sbi, BG_GC); 271 clear_opt(sbi, BG_GC);
263 else { 272 else {
264 kfree(name); 273 kfree(name);
@@ -327,6 +336,9 @@ static int parse_options(struct super_block *sb, char *options)
327 case Opt_inline_data: 336 case Opt_inline_data:
328 set_opt(sbi, INLINE_DATA); 337 set_opt(sbi, INLINE_DATA);
329 break; 338 break;
339 case Opt_flush_merge:
340 set_opt(sbi, FLUSH_MERGE);
341 break;
330 default: 342 default:
331 f2fs_msg(sb, KERN_ERR, 343 f2fs_msg(sb, KERN_ERR,
332 "Unrecognized mount option \"%s\" or missing value", 344 "Unrecognized mount option \"%s\" or missing value",
@@ -353,12 +365,16 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
353 fi->i_current_depth = 1; 365 fi->i_current_depth = 1;
354 fi->i_advise = 0; 366 fi->i_advise = 0;
355 rwlock_init(&fi->ext.ext_lock); 367 rwlock_init(&fi->ext.ext_lock);
368 init_rwsem(&fi->i_sem);
356 369
357 set_inode_flag(fi, FI_NEW_INODE); 370 set_inode_flag(fi, FI_NEW_INODE);
358 371
359 if (test_opt(F2FS_SB(sb), INLINE_XATTR)) 372 if (test_opt(F2FS_SB(sb), INLINE_XATTR))
360 set_inode_flag(fi, FI_INLINE_XATTR); 373 set_inode_flag(fi, FI_INLINE_XATTR);
361 374
375 /* Will be used by directory only */
376 fi->i_dir_level = F2FS_SB(sb)->dir_level;
377
362 return &fi->vfs_inode; 378 return &fi->vfs_inode;
363} 379}
364 380
@@ -526,6 +542,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
526 seq_puts(seq, ",disable_ext_identify"); 542 seq_puts(seq, ",disable_ext_identify");
527 if (test_opt(sbi, INLINE_DATA)) 543 if (test_opt(sbi, INLINE_DATA))
528 seq_puts(seq, ",inline_data"); 544 seq_puts(seq, ",inline_data");
545 if (test_opt(sbi, FLUSH_MERGE))
546 seq_puts(seq, ",flush_merge");
529 seq_printf(seq, ",active_logs=%u", sbi->active_logs); 547 seq_printf(seq, ",active_logs=%u", sbi->active_logs);
530 548
531 return 0; 549 return 0;
@@ -539,13 +557,22 @@ static int segment_info_seq_show(struct seq_file *seq, void *offset)
539 le32_to_cpu(sbi->raw_super->segment_count_main); 557 le32_to_cpu(sbi->raw_super->segment_count_main);
540 int i; 558 int i;
541 559
560 seq_puts(seq, "format: segment_type|valid_blocks\n"
561 "segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n");
562
542 for (i = 0; i < total_segs; i++) { 563 for (i = 0; i < total_segs; i++) {
543 seq_printf(seq, "%u", get_valid_blocks(sbi, i, 1)); 564 struct seg_entry *se = get_seg_entry(sbi, i);
544 if (i != 0 && (i % 10) == 0) 565
545 seq_puts(seq, "\n"); 566 if ((i % 10) == 0)
567 seq_printf(seq, "%-5d", i);
568 seq_printf(seq, "%d|%-3u", se->type,
569 get_valid_blocks(sbi, i, 1));
570 if ((i % 10) == 9 || i == (total_segs - 1))
571 seq_putc(seq, '\n');
546 else 572 else
547 seq_puts(seq, " "); 573 seq_putc(seq, ' ');
548 } 574 }
575
549 return 0; 576 return 0;
550} 577}
551 578
@@ -640,6 +667,8 @@ static struct inode *f2fs_nfs_get_inode(struct super_block *sb,
640 667
641 if (unlikely(ino < F2FS_ROOT_INO(sbi))) 668 if (unlikely(ino < F2FS_ROOT_INO(sbi)))
642 return ERR_PTR(-ESTALE); 669 return ERR_PTR(-ESTALE);
670 if (unlikely(ino >= NM_I(sbi)->max_nid))
671 return ERR_PTR(-ESTALE);
643 672
644 /* 673 /*
645 * f2fs_iget isn't quite right if the inode is currently unallocated! 674 * f2fs_iget isn't quite right if the inode is currently unallocated!
@@ -787,6 +816,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
787 816
788 for (i = 0; i < NR_COUNT_TYPE; i++) 817 for (i = 0; i < NR_COUNT_TYPE; i++)
789 atomic_set(&sbi->nr_pages[i], 0); 818 atomic_set(&sbi->nr_pages[i], 0);
819
820 sbi->dir_level = DEF_DIR_LEVEL;
790} 821}
791 822
792/* 823/*
@@ -898,11 +929,11 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
898 sbi->por_doing = false; 929 sbi->por_doing = false;
899 spin_lock_init(&sbi->stat_lock); 930 spin_lock_init(&sbi->stat_lock);
900 931
901 mutex_init(&sbi->read_io.io_mutex); 932 init_rwsem(&sbi->read_io.io_rwsem);
902 sbi->read_io.sbi = sbi; 933 sbi->read_io.sbi = sbi;
903 sbi->read_io.bio = NULL; 934 sbi->read_io.bio = NULL;
904 for (i = 0; i < NR_PAGE_TYPE; i++) { 935 for (i = 0; i < NR_PAGE_TYPE; i++) {
905 mutex_init(&sbi->write_io[i].io_mutex); 936 init_rwsem(&sbi->write_io[i].io_rwsem);
906 sbi->write_io[i].sbi = sbi; 937 sbi->write_io[i].sbi = sbi;
907 sbi->write_io[i].bio = NULL; 938 sbi->write_io[i].bio = NULL;
908 } 939 }
@@ -991,28 +1022,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
991 goto free_root_inode; 1022 goto free_root_inode;
992 } 1023 }
993 1024
994 /* recover fsynced data */
995 if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
996 err = recover_fsync_data(sbi);
997 if (err)
998 f2fs_msg(sb, KERN_ERR,
999 "Cannot recover all fsync data errno=%ld", err);
1000 }
1001
1002 /*
1003 * If filesystem is not mounted as read-only then
1004 * do start the gc_thread.
1005 */
1006 if (!(sb->s_flags & MS_RDONLY)) {
1007 /* After POR, we can run background GC thread.*/
1008 err = start_gc_thread(sbi);
1009 if (err)
1010 goto free_gc;
1011 }
1012
1013 err = f2fs_build_stats(sbi); 1025 err = f2fs_build_stats(sbi);
1014 if (err) 1026 if (err)
1015 goto free_gc; 1027 goto free_root_inode;
1016 1028
1017 if (f2fs_proc_root) 1029 if (f2fs_proc_root)
1018 sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root); 1030 sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root);
@@ -1034,17 +1046,36 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
1034 err = kobject_init_and_add(&sbi->s_kobj, &f2fs_ktype, NULL, 1046 err = kobject_init_and_add(&sbi->s_kobj, &f2fs_ktype, NULL,
1035 "%s", sb->s_id); 1047 "%s", sb->s_id);
1036 if (err) 1048 if (err)
1037 goto fail; 1049 goto free_proc;
1050
1051 /* recover fsynced data */
1052 if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
1053 err = recover_fsync_data(sbi);
1054 if (err)
1055 f2fs_msg(sb, KERN_ERR,
1056 "Cannot recover all fsync data errno=%ld", err);
1057 }
1038 1058
1059 /*
1060 * If filesystem is not mounted as read-only then
1061 * do start the gc_thread.
1062 */
1063 if (!(sb->s_flags & MS_RDONLY)) {
1064 /* After POR, we can run background GC thread.*/
1065 err = start_gc_thread(sbi);
1066 if (err)
1067 goto free_kobj;
1068 }
1039 return 0; 1069 return 0;
1040fail: 1070
1071free_kobj:
1072 kobject_del(&sbi->s_kobj);
1073free_proc:
1041 if (sbi->s_proc) { 1074 if (sbi->s_proc) {
1042 remove_proc_entry("segment_info", sbi->s_proc); 1075 remove_proc_entry("segment_info", sbi->s_proc);
1043 remove_proc_entry(sb->s_id, f2fs_proc_root); 1076 remove_proc_entry(sb->s_id, f2fs_proc_root);
1044 } 1077 }
1045 f2fs_destroy_stats(sbi); 1078 f2fs_destroy_stats(sbi);
1046free_gc:
1047 stop_gc_thread(sbi);
1048free_root_inode: 1079free_root_inode:
1049 dput(sb->s_root); 1080 dput(sb->s_root);
1050 sb->s_root = NULL; 1081 sb->s_root = NULL;
@@ -1084,7 +1115,7 @@ MODULE_ALIAS_FS("f2fs");
1084static int __init init_inodecache(void) 1115static int __init init_inodecache(void)
1085{ 1116{
1086 f2fs_inode_cachep = f2fs_kmem_cache_create("f2fs_inode_cache", 1117 f2fs_inode_cachep = f2fs_kmem_cache_create("f2fs_inode_cache",
1087 sizeof(struct f2fs_inode_info), NULL); 1118 sizeof(struct f2fs_inode_info));
1088 if (!f2fs_inode_cachep) 1119 if (!f2fs_inode_cachep)
1089 return -ENOMEM; 1120 return -ENOMEM;
1090 return 0; 1121 return 0;
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 89d0422a91a8..503c2451131e 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -275,7 +275,7 @@ static void *read_all_xattrs(struct inode *inode, struct page *ipage)
275 275
276 inline_size = inline_xattr_size(inode); 276 inline_size = inline_xattr_size(inode);
277 277
278 txattr_addr = kzalloc(inline_size + size, GFP_KERNEL); 278 txattr_addr = kzalloc(inline_size + size, GFP_F2FS_ZERO);
279 if (!txattr_addr) 279 if (!txattr_addr)
280 return NULL; 280 return NULL;
281 281
@@ -407,6 +407,8 @@ int f2fs_getxattr(struct inode *inode, int name_index, const char *name,
407 if (name == NULL) 407 if (name == NULL)
408 return -EINVAL; 408 return -EINVAL;
409 name_len = strlen(name); 409 name_len = strlen(name);
410 if (name_len > F2FS_NAME_LEN)
411 return -ERANGE;
410 412
411 base_addr = read_all_xattrs(inode, NULL); 413 base_addr = read_all_xattrs(inode, NULL);
412 if (!base_addr) 414 if (!base_addr)
@@ -590,7 +592,10 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name,
590 f2fs_balance_fs(sbi); 592 f2fs_balance_fs(sbi);
591 593
592 f2fs_lock_op(sbi); 594 f2fs_lock_op(sbi);
595 /* protect xattr_ver */
596 down_write(&F2FS_I(inode)->i_sem);
593 err = __f2fs_setxattr(inode, name_index, name, value, value_len, ipage); 597 err = __f2fs_setxattr(inode, name_index, name, value, value_len, ipage);
598 up_write(&F2FS_I(inode)->i_sem);
594 f2fs_unlock_op(sbi); 599 f2fs_unlock_op(sbi);
595 600
596 return err; 601 return err;
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index 23e363f38302..13b691a8a7d2 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -569,7 +569,7 @@ static ssize_t cuse_class_waiting_show(struct device *dev,
569 569
570 return sprintf(buf, "%d\n", atomic_read(&cc->fc.num_waiting)); 570 return sprintf(buf, "%d\n", atomic_read(&cc->fc.num_waiting));
571} 571}
572static DEVICE_ATTR(waiting, S_IFREG | 0400, cuse_class_waiting_show, NULL); 572static DEVICE_ATTR(waiting, 0400, cuse_class_waiting_show, NULL);
573 573
574static ssize_t cuse_class_abort_store(struct device *dev, 574static ssize_t cuse_class_abort_store(struct device *dev,
575 struct device_attribute *attr, 575 struct device_attribute *attr,
@@ -580,7 +580,7 @@ static ssize_t cuse_class_abort_store(struct device *dev,
580 fuse_abort_conn(&cc->fc); 580 fuse_abort_conn(&cc->fc);
581 return count; 581 return count;
582} 582}
583static DEVICE_ATTR(abort, S_IFREG | 0200, NULL, cuse_class_abort_store); 583static DEVICE_ATTR(abort, 0200, NULL, cuse_class_abort_store);
584 584
585static struct attribute *cuse_class_dev_attrs[] = { 585static struct attribute *cuse_class_dev_attrs[] = {
586 &dev_attr_waiting.attr, 586 &dev_attr_waiting.attr,
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 65df7d8be4f5..48992cac714b 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -2117,6 +2117,7 @@ static int fuse_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
2117static const struct vm_operations_struct fuse_file_vm_ops = { 2117static const struct vm_operations_struct fuse_file_vm_ops = {
2118 .close = fuse_vma_close, 2118 .close = fuse_vma_close,
2119 .fault = filemap_fault, 2119 .fault = filemap_fault,
2120 .map_pages = filemap_map_pages,
2120 .page_mkwrite = fuse_page_mkwrite, 2121 .page_mkwrite = fuse_page_mkwrite,
2121 .remap_pages = generic_file_remap_pages, 2122 .remap_pages = generic_file_remap_pages,
2122}; 2123};
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 6c794085abac..80d67253623c 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -494,6 +494,7 @@ out:
494 494
495static const struct vm_operations_struct gfs2_vm_ops = { 495static const struct vm_operations_struct gfs2_vm_ops = {
496 .fault = filemap_fault, 496 .fault = filemap_fault,
497 .map_pages = filemap_map_pages,
497 .page_mkwrite = gfs2_page_mkwrite, 498 .page_mkwrite = gfs2_page_mkwrite,
498 .remap_pages = generic_file_remap_pages, 499 .remap_pages = generic_file_remap_pages,
499}; 500};
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 6af66ee56390..4556ce1af5b0 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -93,7 +93,7 @@ static void init_once(void *foo)
93 inode_init_once(&ei->vfs_inode); 93 inode_init_once(&ei->vfs_inode);
94} 94}
95 95
96static int init_inodecache(void) 96static int __init init_inodecache(void)
97{ 97{
98 isofs_inode_cachep = kmem_cache_create("isofs_inode_cache", 98 isofs_inode_cachep = kmem_cache_create("isofs_inode_cache",
99 sizeof(struct iso_inode_info), 99 sizeof(struct iso_inode_info),
diff --git a/fs/jffs2/compr_rtime.c b/fs/jffs2/compr_rtime.c
index 16a5047903a6..406d9cc84ba8 100644
--- a/fs/jffs2/compr_rtime.c
+++ b/fs/jffs2/compr_rtime.c
@@ -33,7 +33,7 @@ static int jffs2_rtime_compress(unsigned char *data_in,
33 unsigned char *cpage_out, 33 unsigned char *cpage_out,
34 uint32_t *sourcelen, uint32_t *dstlen) 34 uint32_t *sourcelen, uint32_t *dstlen)
35{ 35{
36 short positions[256]; 36 unsigned short positions[256];
37 int outpos = 0; 37 int outpos = 0;
38 int pos=0; 38 int pos=0;
39 39
@@ -74,7 +74,7 @@ static int jffs2_rtime_decompress(unsigned char *data_in,
74 unsigned char *cpage_out, 74 unsigned char *cpage_out,
75 uint32_t srclen, uint32_t destlen) 75 uint32_t srclen, uint32_t destlen)
76{ 76{
77 short positions[256]; 77 unsigned short positions[256];
78 int outpos = 0; 78 int outpos = 0;
79 int pos=0; 79 int pos=0;
80 80
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index f73991522672..601afd1afddf 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -457,12 +457,14 @@ struct inode *jffs2_new_inode (struct inode *dir_i, umode_t mode, struct jffs2_r
457 The umask is only applied if there's no default ACL */ 457 The umask is only applied if there's no default ACL */
458 ret = jffs2_init_acl_pre(dir_i, inode, &mode); 458 ret = jffs2_init_acl_pre(dir_i, inode, &mode);
459 if (ret) { 459 if (ret) {
460 make_bad_inode(inode); 460 mutex_unlock(&f->sem);
461 iput(inode); 461 make_bad_inode(inode);
462 return ERR_PTR(ret); 462 iput(inode);
463 return ERR_PTR(ret);
463 } 464 }
464 ret = jffs2_do_new_inode (c, f, mode, ri); 465 ret = jffs2_do_new_inode (c, f, mode, ri);
465 if (ret) { 466 if (ret) {
467 mutex_unlock(&f->sem);
466 make_bad_inode(inode); 468 make_bad_inode(inode);
467 iput(inode); 469 iput(inode);
468 return ERR_PTR(ret); 470 return ERR_PTR(ret);
@@ -479,6 +481,7 @@ struct inode *jffs2_new_inode (struct inode *dir_i, umode_t mode, struct jffs2_r
479 inode->i_size = 0; 481 inode->i_size = 0;
480 482
481 if (insert_inode_locked(inode) < 0) { 483 if (insert_inode_locked(inode) < 0) {
484 mutex_unlock(&f->sem);
482 make_bad_inode(inode); 485 make_bad_inode(inode);
483 iput(inode); 486 iput(inode);
484 return ERR_PTR(-EINVAL); 487 return ERR_PTR(-EINVAL);
diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h
index e4619b00f7c5..fa35ff79ab35 100644
--- a/fs/jffs2/nodelist.h
+++ b/fs/jffs2/nodelist.h
@@ -231,7 +231,7 @@ struct jffs2_tmp_dnode_info
231 uint32_t version; 231 uint32_t version;
232 uint32_t data_crc; 232 uint32_t data_crc;
233 uint32_t partial_crc; 233 uint32_t partial_crc;
234 uint16_t csize; 234 uint32_t csize;
235 uint16_t overlapped; 235 uint16_t overlapped;
236}; 236};
237 237
diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c
index 03310721712f..b6bd4affd9ad 100644
--- a/fs/jffs2/nodemgmt.c
+++ b/fs/jffs2/nodemgmt.c
@@ -179,6 +179,7 @@ int jffs2_reserve_space(struct jffs2_sb_info *c, uint32_t minsize,
179 spin_unlock(&c->erase_completion_lock); 179 spin_unlock(&c->erase_completion_lock);
180 180
181 schedule(); 181 schedule();
182 remove_wait_queue(&c->erase_wait, &wait);
182 } else 183 } else
183 spin_unlock(&c->erase_completion_lock); 184 spin_unlock(&c->erase_completion_lock);
184 } else if (ret) 185 } else if (ret)
@@ -211,20 +212,25 @@ out:
211int jffs2_reserve_space_gc(struct jffs2_sb_info *c, uint32_t minsize, 212int jffs2_reserve_space_gc(struct jffs2_sb_info *c, uint32_t minsize,
212 uint32_t *len, uint32_t sumsize) 213 uint32_t *len, uint32_t sumsize)
213{ 214{
214 int ret = -EAGAIN; 215 int ret;
215 minsize = PAD(minsize); 216 minsize = PAD(minsize);
216 217
217 jffs2_dbg(1, "%s(): Requested 0x%x bytes\n", __func__, minsize); 218 jffs2_dbg(1, "%s(): Requested 0x%x bytes\n", __func__, minsize);
218 219
219 spin_lock(&c->erase_completion_lock); 220 while (true) {
220 while(ret == -EAGAIN) { 221 spin_lock(&c->erase_completion_lock);
221 ret = jffs2_do_reserve_space(c, minsize, len, sumsize); 222 ret = jffs2_do_reserve_space(c, minsize, len, sumsize);
222 if (ret) { 223 if (ret) {
223 jffs2_dbg(1, "%s(): looping, ret is %d\n", 224 jffs2_dbg(1, "%s(): looping, ret is %d\n",
224 __func__, ret); 225 __func__, ret);
225 } 226 }
227 spin_unlock(&c->erase_completion_lock);
228
229 if (ret == -EAGAIN)
230 cond_resched();
231 else
232 break;
226 } 233 }
227 spin_unlock(&c->erase_completion_lock);
228 if (!ret) 234 if (!ret)
229 ret = jffs2_prealloc_raw_node_refs(c, c->nextblock, 1); 235 ret = jffs2_prealloc_raw_node_refs(c, c->nextblock, 1);
230 236
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 10d6c41aecad..6bf06a07f3e0 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -235,6 +235,7 @@ out_err:
235 if (warned++ == 0) 235 if (warned++ == 0)
236 printk(KERN_WARNING 236 printk(KERN_WARNING
237 "lockd_up: makesock failed, error=%d\n", err); 237 "lockd_up: makesock failed, error=%d\n", err);
238 svc_shutdown_net(serv, net);
238 return err; 239 return err;
239} 240}
240 241
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index c320ac52353e..08b8ea8c353e 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -339,7 +339,7 @@ ncp_lookup_validate(struct dentry *dentry, unsigned int flags)
339 if (val) 339 if (val)
340 goto finished; 340 goto finished;
341 341
342 DDPRINTK("ncp_lookup_validate: %pd2 not valid, age=%ld, server lookup\n", 342 ncp_dbg(2, "%pd2 not valid, age=%ld, server lookup\n",
343 dentry, NCP_GET_AGE(dentry)); 343 dentry, NCP_GET_AGE(dentry));
344 344
345 len = sizeof(__name); 345 len = sizeof(__name);
@@ -358,7 +358,7 @@ ncp_lookup_validate(struct dentry *dentry, unsigned int flags)
358 res = ncp_obtain_info(server, dir, __name, &(finfo.i)); 358 res = ncp_obtain_info(server, dir, __name, &(finfo.i));
359 } 359 }
360 finfo.volume = finfo.i.volNumber; 360 finfo.volume = finfo.i.volNumber;
361 DDPRINTK("ncp_lookup_validate: looked for %pd/%s, res=%d\n", 361 ncp_dbg(2, "looked for %pd/%s, res=%d\n",
362 dentry->d_parent, __name, res); 362 dentry->d_parent, __name, res);
363 /* 363 /*
364 * If we didn't find it, or if it has a different dirEntNum to 364 * If we didn't find it, or if it has a different dirEntNum to
@@ -372,14 +372,14 @@ ncp_lookup_validate(struct dentry *dentry, unsigned int flags)
372 ncp_new_dentry(dentry); 372 ncp_new_dentry(dentry);
373 val=1; 373 val=1;
374 } else 374 } else
375 DDPRINTK("ncp_lookup_validate: found, but dirEntNum changed\n"); 375 ncp_dbg(2, "found, but dirEntNum changed\n");
376 376
377 ncp_update_inode2(inode, &finfo); 377 ncp_update_inode2(inode, &finfo);
378 mutex_unlock(&inode->i_mutex); 378 mutex_unlock(&inode->i_mutex);
379 } 379 }
380 380
381finished: 381finished:
382 DDPRINTK("ncp_lookup_validate: result=%d\n", val); 382 ncp_dbg(2, "result=%d\n", val);
383 dput(parent); 383 dput(parent);
384 return val; 384 return val;
385} 385}
@@ -453,8 +453,7 @@ static int ncp_readdir(struct file *file, struct dir_context *ctx)
453 ctl.page = NULL; 453 ctl.page = NULL;
454 ctl.cache = NULL; 454 ctl.cache = NULL;
455 455
456 DDPRINTK("ncp_readdir: reading %pD2, pos=%d\n", file, 456 ncp_dbg(2, "reading %pD2, pos=%d\n", file, (int)ctx->pos);
457 (int) ctx->pos);
458 457
459 result = -EIO; 458 result = -EIO;
460 /* Do not generate '.' and '..' when server is dead. */ 459 /* Do not generate '.' and '..' when server is dead. */
@@ -697,8 +696,7 @@ ncp_read_volume_list(struct file *file, struct dir_context *ctx,
697 struct ncp_entry_info entry; 696 struct ncp_entry_info entry;
698 int i; 697 int i;
699 698
700 DPRINTK("ncp_read_volume_list: pos=%ld\n", 699 ncp_dbg(1, "pos=%ld\n", (unsigned long)ctx->pos);
701 (unsigned long) ctx->pos);
702 700
703 for (i = 0; i < NCP_NUMBER_OF_VOLUMES; i++) { 701 for (i = 0; i < NCP_NUMBER_OF_VOLUMES; i++) {
704 int inval_dentry; 702 int inval_dentry;
@@ -708,12 +706,11 @@ ncp_read_volume_list(struct file *file, struct dir_context *ctx,
708 if (!strlen(info.volume_name)) 706 if (!strlen(info.volume_name))
709 continue; 707 continue;
710 708
711 DPRINTK("ncp_read_volume_list: found vol: %s\n", 709 ncp_dbg(1, "found vol: %s\n", info.volume_name);
712 info.volume_name);
713 710
714 if (ncp_lookup_volume(server, info.volume_name, 711 if (ncp_lookup_volume(server, info.volume_name,
715 &entry.i)) { 712 &entry.i)) {
716 DPRINTK("ncpfs: could not lookup vol %s\n", 713 ncp_dbg(1, "could not lookup vol %s\n",
717 info.volume_name); 714 info.volume_name);
718 continue; 715 continue;
719 } 716 }
@@ -738,14 +735,13 @@ ncp_do_readdir(struct file *file, struct dir_context *ctx,
738 int more; 735 int more;
739 size_t bufsize; 736 size_t bufsize;
740 737
741 DPRINTK("ncp_do_readdir: %pD2, fpos=%ld\n", file, 738 ncp_dbg(1, "%pD2, fpos=%ld\n", file, (unsigned long)ctx->pos);
742 (unsigned long) ctx->pos); 739 ncp_vdbg("init %pD, volnum=%d, dirent=%u\n",
743 PPRINTK("ncp_do_readdir: init %pD, volnum=%d, dirent=%u\n", 740 file, NCP_FINFO(dir)->volNumber, NCP_FINFO(dir)->dirEntNum);
744 file, NCP_FINFO(dir)->volNumber, NCP_FINFO(dir)->dirEntNum);
745 741
746 err = ncp_initialize_search(server, dir, &seq); 742 err = ncp_initialize_search(server, dir, &seq);
747 if (err) { 743 if (err) {
748 DPRINTK("ncp_do_readdir: init failed, err=%d\n", err); 744 ncp_dbg(1, "init failed, err=%d\n", err);
749 return; 745 return;
750 } 746 }
751 /* We MUST NOT use server->buffer_size handshaked with server if we are 747 /* We MUST NOT use server->buffer_size handshaked with server if we are
@@ -808,8 +804,7 @@ int ncp_conn_logged_in(struct super_block *sb)
808 goto out; 804 goto out;
809 result = -ENOENT; 805 result = -ENOENT;
810 if (ncp_get_volume_root(server, __name, &volNumber, &dirEntNum, &DosDirNum)) { 806 if (ncp_get_volume_root(server, __name, &volNumber, &dirEntNum, &DosDirNum)) {
811 PPRINTK("ncp_conn_logged_in: %s not found\n", 807 ncp_vdbg("%s not found\n", server->m.mounted_vol);
812 server->m.mounted_vol);
813 goto out; 808 goto out;
814 } 809 }
815 dent = sb->s_root; 810 dent = sb->s_root;
@@ -822,10 +817,10 @@ int ncp_conn_logged_in(struct super_block *sb)
822 NCP_FINFO(ino)->DosDirNum = DosDirNum; 817 NCP_FINFO(ino)->DosDirNum = DosDirNum;
823 result = 0; 818 result = 0;
824 } else { 819 } else {
825 DPRINTK("ncpfs: sb->s_root->d_inode == NULL!\n"); 820 ncp_dbg(1, "sb->s_root->d_inode == NULL!\n");
826 } 821 }
827 } else { 822 } else {
828 DPRINTK("ncpfs: sb->s_root == NULL!\n"); 823 ncp_dbg(1, "sb->s_root == NULL!\n");
829 } 824 }
830 } else 825 } else
831 result = 0; 826 result = 0;
@@ -846,7 +841,7 @@ static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry, unsig
846 if (!ncp_conn_valid(server)) 841 if (!ncp_conn_valid(server))
847 goto finished; 842 goto finished;
848 843
849 PPRINTK("ncp_lookup: server lookup for %pd2\n", dentry); 844 ncp_vdbg("server lookup for %pd2\n", dentry);
850 845
851 len = sizeof(__name); 846 len = sizeof(__name);
852 if (ncp_is_server_root(dir)) { 847 if (ncp_is_server_root(dir)) {
@@ -854,15 +849,15 @@ static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry, unsig
854 dentry->d_name.len, 1); 849 dentry->d_name.len, 1);
855 if (!res) 850 if (!res)
856 res = ncp_lookup_volume(server, __name, &(finfo.i)); 851 res = ncp_lookup_volume(server, __name, &(finfo.i));
857 if (!res) 852 if (!res)
858 ncp_update_known_namespace(server, finfo.i.volNumber, NULL); 853 ncp_update_known_namespace(server, finfo.i.volNumber, NULL);
859 } else { 854 } else {
860 res = ncp_io2vol(server, __name, &len, dentry->d_name.name, 855 res = ncp_io2vol(server, __name, &len, dentry->d_name.name,
861 dentry->d_name.len, !ncp_preserve_case(dir)); 856 dentry->d_name.len, !ncp_preserve_case(dir));
862 if (!res) 857 if (!res)
863 res = ncp_obtain_info(server, dir, __name, &(finfo.i)); 858 res = ncp_obtain_info(server, dir, __name, &(finfo.i));
864 } 859 }
865 PPRINTK("ncp_lookup: looked for %pd2, res=%d\n", dentry, res); 860 ncp_vdbg("looked for %pd2, res=%d\n", dentry, res);
866 /* 861 /*
867 * If we didn't find an entry, make a negative dentry. 862 * If we didn't find an entry, make a negative dentry.
868 */ 863 */
@@ -886,7 +881,7 @@ add_entry:
886 } 881 }
887 882
888finished: 883finished:
889 PPRINTK("ncp_lookup: result=%d\n", error); 884 ncp_vdbg("result=%d\n", error);
890 return ERR_PTR(error); 885 return ERR_PTR(error);
891} 886}
892 887
@@ -909,7 +904,7 @@ out:
909 return error; 904 return error;
910 905
911out_close: 906out_close:
912 PPRINTK("ncp_instantiate: %pd2 failed, closing file\n", dentry); 907 ncp_vdbg("%pd2 failed, closing file\n", dentry);
913 ncp_close_file(NCP_SERVER(dir), finfo->file_handle); 908 ncp_close_file(NCP_SERVER(dir), finfo->file_handle);
914 goto out; 909 goto out;
915} 910}
@@ -923,7 +918,7 @@ int ncp_create_new(struct inode *dir, struct dentry *dentry, umode_t mode,
923 int opmode; 918 int opmode;
924 __u8 __name[NCP_MAXPATHLEN + 1]; 919 __u8 __name[NCP_MAXPATHLEN + 1];
925 920
926 PPRINTK("ncp_create_new: creating %pd2, mode=%hx\n", dentry, mode); 921 ncp_vdbg("creating %pd2, mode=%hx\n", dentry, mode);
927 922
928 ncp_age_dentry(server, dentry); 923 ncp_age_dentry(server, dentry);
929 len = sizeof(__name); 924 len = sizeof(__name);
@@ -952,7 +947,7 @@ int ncp_create_new(struct inode *dir, struct dentry *dentry, umode_t mode,
952 error = -ENAMETOOLONG; 947 error = -ENAMETOOLONG;
953 else if (result < 0) 948 else if (result < 0)
954 error = result; 949 error = result;
955 DPRINTK("ncp_create: %pd2 failed\n", dentry); 950 ncp_dbg(1, "%pd2 failed\n", dentry);
956 goto out; 951 goto out;
957 } 952 }
958 opmode = O_WRONLY; 953 opmode = O_WRONLY;
@@ -985,7 +980,7 @@ static int ncp_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
985 int error, len; 980 int error, len;
986 __u8 __name[NCP_MAXPATHLEN + 1]; 981 __u8 __name[NCP_MAXPATHLEN + 1];
987 982
988 DPRINTK("ncp_mkdir: making %pd2\n", dentry); 983 ncp_dbg(1, "making %pd2\n", dentry);
989 984
990 ncp_age_dentry(server, dentry); 985 ncp_age_dentry(server, dentry);
991 len = sizeof(__name); 986 len = sizeof(__name);
@@ -1022,7 +1017,7 @@ static int ncp_rmdir(struct inode *dir, struct dentry *dentry)
1022 int error, result, len; 1017 int error, result, len;
1023 __u8 __name[NCP_MAXPATHLEN + 1]; 1018 __u8 __name[NCP_MAXPATHLEN + 1];
1024 1019
1025 DPRINTK("ncp_rmdir: removing %pd2\n", dentry); 1020 ncp_dbg(1, "removing %pd2\n", dentry);
1026 1021
1027 len = sizeof(__name); 1022 len = sizeof(__name);
1028 error = ncp_io2vol(server, __name, &len, dentry->d_name.name, 1023 error = ncp_io2vol(server, __name, &len, dentry->d_name.name,
@@ -1067,13 +1062,13 @@ static int ncp_unlink(struct inode *dir, struct dentry *dentry)
1067 int error; 1062 int error;
1068 1063
1069 server = NCP_SERVER(dir); 1064 server = NCP_SERVER(dir);
1070 DPRINTK("ncp_unlink: unlinking %pd2\n", dentry); 1065 ncp_dbg(1, "unlinking %pd2\n", dentry);
1071 1066
1072 /* 1067 /*
1073 * Check whether to close the file ... 1068 * Check whether to close the file ...
1074 */ 1069 */
1075 if (inode) { 1070 if (inode) {
1076 PPRINTK("ncp_unlink: closing file\n"); 1071 ncp_vdbg("closing file\n");
1077 ncp_make_closed(inode); 1072 ncp_make_closed(inode);
1078 } 1073 }
1079 1074
@@ -1087,7 +1082,7 @@ static int ncp_unlink(struct inode *dir, struct dentry *dentry)
1087#endif 1082#endif
1088 switch (error) { 1083 switch (error) {
1089 case 0x00: 1084 case 0x00:
1090 DPRINTK("ncp: removed %pd2\n", dentry); 1085 ncp_dbg(1, "removed %pd2\n", dentry);
1091 break; 1086 break;
1092 case 0x85: 1087 case 0x85:
1093 case 0x8A: 1088 case 0x8A:
@@ -1120,7 +1115,7 @@ static int ncp_rename(struct inode *old_dir, struct dentry *old_dentry,
1120 int old_len, new_len; 1115 int old_len, new_len;
1121 __u8 __old_name[NCP_MAXPATHLEN + 1], __new_name[NCP_MAXPATHLEN + 1]; 1116 __u8 __old_name[NCP_MAXPATHLEN + 1], __new_name[NCP_MAXPATHLEN + 1];
1122 1117
1123 DPRINTK("ncp_rename: %pd2 to %pd2\n", old_dentry, new_dentry); 1118 ncp_dbg(1, "%pd2 to %pd2\n", old_dentry, new_dentry);
1124 1119
1125 ncp_age_dentry(server, old_dentry); 1120 ncp_age_dentry(server, old_dentry);
1126 ncp_age_dentry(server, new_dentry); 1121 ncp_age_dentry(server, new_dentry);
@@ -1150,8 +1145,8 @@ static int ncp_rename(struct inode *old_dir, struct dentry *old_dentry,
1150#endif 1145#endif
1151 switch (error) { 1146 switch (error) {
1152 case 0x00: 1147 case 0x00:
1153 DPRINTK("ncp renamed %pd -> %pd.\n", 1148 ncp_dbg(1, "renamed %pd -> %pd\n",
1154 old_dentry, new_dentry); 1149 old_dentry, new_dentry);
1155 break; 1150 break;
1156 case 0x9E: 1151 case 0x9E:
1157 error = -ENAMETOOLONG; 1152 error = -ENAMETOOLONG;
@@ -1173,7 +1168,7 @@ static int ncp_mknod(struct inode * dir, struct dentry *dentry,
1173 if (!new_valid_dev(rdev)) 1168 if (!new_valid_dev(rdev))
1174 return -EINVAL; 1169 return -EINVAL;
1175 if (ncp_is_nfs_extras(NCP_SERVER(dir), NCP_FINFO(dir)->volNumber)) { 1170 if (ncp_is_nfs_extras(NCP_SERVER(dir), NCP_FINFO(dir)->volNumber)) {
1176 DPRINTK(KERN_DEBUG "ncp_mknod: mode = 0%ho\n", mode); 1171 ncp_dbg(1, "mode = 0%ho\n", mode);
1177 return ncp_create_new(dir, dentry, mode, rdev, 0); 1172 return ncp_create_new(dir, dentry, mode, rdev, 0);
1178 } 1173 }
1179 return -EPERM; /* Strange, but true */ 1174 return -EPERM; /* Strange, but true */
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c
index 8f5074e1ecb9..77640a8bfb87 100644
--- a/fs/ncpfs/file.c
+++ b/fs/ncpfs/file.c
@@ -6,6 +6,8 @@
6 * 6 *
7 */ 7 */
8 8
9#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10
9#include <asm/uaccess.h> 11#include <asm/uaccess.h>
10 12
11#include <linux/time.h> 13#include <linux/time.h>
@@ -34,11 +36,11 @@ int ncp_make_open(struct inode *inode, int right)
34 36
35 error = -EINVAL; 37 error = -EINVAL;
36 if (!inode) { 38 if (!inode) {
37 printk(KERN_ERR "ncp_make_open: got NULL inode\n"); 39 pr_err("%s: got NULL inode\n", __func__);
38 goto out; 40 goto out;
39 } 41 }
40 42
41 DPRINTK("ncp_make_open: opened=%d, volume # %u, dir entry # %u\n", 43 ncp_dbg(1, "opened=%d, volume # %u, dir entry # %u\n",
42 atomic_read(&NCP_FINFO(inode)->opened), 44 atomic_read(&NCP_FINFO(inode)->opened),
43 NCP_FINFO(inode)->volNumber, 45 NCP_FINFO(inode)->volNumber,
44 NCP_FINFO(inode)->dirEntNum); 46 NCP_FINFO(inode)->dirEntNum);
@@ -71,7 +73,7 @@ int ncp_make_open(struct inode *inode, int right)
71 break; 73 break;
72 } 74 }
73 if (result) { 75 if (result) {
74 PPRINTK("ncp_make_open: failed, result=%d\n", result); 76 ncp_vdbg("failed, result=%d\n", result);
75 goto out_unlock; 77 goto out_unlock;
76 } 78 }
77 /* 79 /*
@@ -83,7 +85,7 @@ int ncp_make_open(struct inode *inode, int right)
83 } 85 }
84 86
85 access = NCP_FINFO(inode)->access; 87 access = NCP_FINFO(inode)->access;
86 PPRINTK("ncp_make_open: file open, access=%x\n", access); 88 ncp_vdbg("file open, access=%x\n", access);
87 if (access == right || access == O_RDWR) { 89 if (access == right || access == O_RDWR) {
88 atomic_inc(&NCP_FINFO(inode)->opened); 90 atomic_inc(&NCP_FINFO(inode)->opened);
89 error = 0; 91 error = 0;
@@ -107,7 +109,7 @@ ncp_file_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
107 void* freepage; 109 void* freepage;
108 size_t freelen; 110 size_t freelen;
109 111
110 DPRINTK("ncp_file_read: enter %pd2\n", dentry); 112 ncp_dbg(1, "enter %pd2\n", dentry);
111 113
112 pos = *ppos; 114 pos = *ppos;
113 115
@@ -124,7 +126,7 @@ ncp_file_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
124 126
125 error = ncp_make_open(inode, O_RDONLY); 127 error = ncp_make_open(inode, O_RDONLY);
126 if (error) { 128 if (error) {
127 DPRINTK(KERN_ERR "ncp_file_read: open failed, error=%d\n", error); 129 ncp_dbg(1, "open failed, error=%d\n", error);
128 return error; 130 return error;
129 } 131 }
130 132
@@ -165,7 +167,7 @@ ncp_file_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
165 167
166 file_accessed(file); 168 file_accessed(file);
167 169
168 DPRINTK("ncp_file_read: exit %pd2\n", dentry); 170 ncp_dbg(1, "exit %pd2\n", dentry);
169outrel: 171outrel:
170 ncp_inode_close(inode); 172 ncp_inode_close(inode);
171 return already_read ? already_read : error; 173 return already_read ? already_read : error;
@@ -182,7 +184,7 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t *
182 int errno; 184 int errno;
183 void* bouncebuffer; 185 void* bouncebuffer;
184 186
185 DPRINTK("ncp_file_write: enter %pd2\n", dentry); 187 ncp_dbg(1, "enter %pd2\n", dentry);
186 if ((ssize_t) count < 0) 188 if ((ssize_t) count < 0)
187 return -EINVAL; 189 return -EINVAL;
188 pos = *ppos; 190 pos = *ppos;
@@ -211,7 +213,7 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t *
211 return 0; 213 return 0;
212 errno = ncp_make_open(inode, O_WRONLY); 214 errno = ncp_make_open(inode, O_WRONLY);
213 if (errno) { 215 if (errno) {
214 DPRINTK(KERN_ERR "ncp_file_write: open failed, error=%d\n", errno); 216 ncp_dbg(1, "open failed, error=%d\n", errno);
215 return errno; 217 return errno;
216 } 218 }
217 bufsize = NCP_SERVER(inode)->buffer_size; 219 bufsize = NCP_SERVER(inode)->buffer_size;
@@ -261,7 +263,7 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t *
261 i_size_write(inode, pos); 263 i_size_write(inode, pos);
262 mutex_unlock(&inode->i_mutex); 264 mutex_unlock(&inode->i_mutex);
263 } 265 }
264 DPRINTK("ncp_file_write: exit %pd2\n", dentry); 266 ncp_dbg(1, "exit %pd2\n", dentry);
265outrel: 267outrel:
266 ncp_inode_close(inode); 268 ncp_inode_close(inode);
267 return already_written ? already_written : errno; 269 return already_written ? already_written : errno;
@@ -269,7 +271,7 @@ outrel:
269 271
270static int ncp_release(struct inode *inode, struct file *file) { 272static int ncp_release(struct inode *inode, struct file *file) {
271 if (ncp_make_closed(inode)) { 273 if (ncp_make_closed(inode)) {
272 DPRINTK("ncp_release: failed to close\n"); 274 ncp_dbg(1, "failed to close\n");
273 } 275 }
274 return 0; 276 return 0;
275} 277}
diff --git a/fs/ncpfs/getopt.c b/fs/ncpfs/getopt.c
index 0af3349de851..03ffde1f44d6 100644
--- a/fs/ncpfs/getopt.c
+++ b/fs/ncpfs/getopt.c
@@ -2,6 +2,8 @@
2 * getopt.c 2 * getopt.c
3 */ 3 */
4 4
5#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
6
5#include <linux/kernel.h> 7#include <linux/kernel.h>
6#include <linux/string.h> 8#include <linux/string.h>
7 9
@@ -46,8 +48,8 @@ int ncp_getopt(const char *caller, char **options, const struct ncp_option *opts
46 if (opts->has_arg & OPT_NOPARAM) { 48 if (opts->has_arg & OPT_NOPARAM) {
47 return opts->val; 49 return opts->val;
48 } 50 }
49 printk(KERN_INFO "%s: the %s option requires an argument\n", 51 pr_info("%s: the %s option requires an argument\n",
50 caller, token); 52 caller, token);
51 return -EINVAL; 53 return -EINVAL;
52 } 54 }
53 if (opts->has_arg & OPT_INT) { 55 if (opts->has_arg & OPT_INT) {
@@ -57,18 +59,18 @@ int ncp_getopt(const char *caller, char **options, const struct ncp_option *opts
57 if (!*v) { 59 if (!*v) {
58 return opts->val; 60 return opts->val;
59 } 61 }
60 printk(KERN_INFO "%s: invalid numeric value in %s=%s\n", 62 pr_info("%s: invalid numeric value in %s=%s\n",
61 caller, token, val); 63 caller, token, val);
62 return -EDOM; 64 return -EDOM;
63 } 65 }
64 if (opts->has_arg & OPT_STRING) { 66 if (opts->has_arg & OPT_STRING) {
65 return opts->val; 67 return opts->val;
66 } 68 }
67 printk(KERN_INFO "%s: unexpected argument %s to the %s option\n", 69 pr_info("%s: unexpected argument %s to the %s option\n",
68 caller, val, token); 70 caller, val, token);
69 return -EINVAL; 71 return -EINVAL;
70 } 72 }
71 } 73 }
72 printk(KERN_INFO "%s: Unrecognized mount option %s\n", caller, token); 74 pr_info("%s: Unrecognized mount option %s\n", caller, token);
73 return -EOPNOTSUPP; 75 return -EOPNOTSUPP;
74} 76}
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 647d86d2db39..81b4f643ecef 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -9,6 +9,8 @@
9 * 9 *
10 */ 10 */
11 11
12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13
12#include <linux/module.h> 14#include <linux/module.h>
13 15
14#include <asm/uaccess.h> 16#include <asm/uaccess.h>
@@ -133,7 +135,7 @@ void ncp_update_inode(struct inode *inode, struct ncp_entry_info *nwinfo)
133 NCP_FINFO(inode)->access = nwinfo->access; 135 NCP_FINFO(inode)->access = nwinfo->access;
134 memcpy(NCP_FINFO(inode)->file_handle, nwinfo->file_handle, 136 memcpy(NCP_FINFO(inode)->file_handle, nwinfo->file_handle,
135 sizeof(nwinfo->file_handle)); 137 sizeof(nwinfo->file_handle));
136 DPRINTK("ncp_update_inode: updated %s, volnum=%d, dirent=%u\n", 138 ncp_dbg(1, "updated %s, volnum=%d, dirent=%u\n",
137 nwinfo->i.entryName, NCP_FINFO(inode)->volNumber, 139 nwinfo->i.entryName, NCP_FINFO(inode)->volNumber,
138 NCP_FINFO(inode)->dirEntNum); 140 NCP_FINFO(inode)->dirEntNum);
139} 141}
@@ -141,8 +143,7 @@ void ncp_update_inode(struct inode *inode, struct ncp_entry_info *nwinfo)
141static void ncp_update_dates(struct inode *inode, struct nw_info_struct *nwi) 143static void ncp_update_dates(struct inode *inode, struct nw_info_struct *nwi)
142{ 144{
143 /* NFS namespace mode overrides others if it's set. */ 145 /* NFS namespace mode overrides others if it's set. */
144 DPRINTK(KERN_DEBUG "ncp_update_dates_and_mode: (%s) nfs.mode=0%o\n", 146 ncp_dbg(1, "(%s) nfs.mode=0%o\n", nwi->entryName, nwi->nfs.mode);
145 nwi->entryName, nwi->nfs.mode);
146 if (nwi->nfs.mode) { 147 if (nwi->nfs.mode) {
147 /* XXX Security? */ 148 /* XXX Security? */
148 inode->i_mode = nwi->nfs.mode; 149 inode->i_mode = nwi->nfs.mode;
@@ -230,7 +231,7 @@ static void ncp_set_attr(struct inode *inode, struct ncp_entry_info *nwinfo)
230 231
231 ncp_update_attrs(inode, nwinfo); 232 ncp_update_attrs(inode, nwinfo);
232 233
233 DDPRINTK("ncp_read_inode: inode->i_mode = %u\n", inode->i_mode); 234 ncp_dbg(2, "inode->i_mode = %u\n", inode->i_mode);
234 235
235 set_nlink(inode, 1); 236 set_nlink(inode, 1);
236 inode->i_uid = server->m.uid; 237 inode->i_uid = server->m.uid;
@@ -258,7 +259,7 @@ ncp_iget(struct super_block *sb, struct ncp_entry_info *info)
258 struct inode *inode; 259 struct inode *inode;
259 260
260 if (info == NULL) { 261 if (info == NULL) {
261 printk(KERN_ERR "ncp_iget: info is NULL\n"); 262 pr_err("%s: info is NULL\n", __func__);
262 return NULL; 263 return NULL;
263 } 264 }
264 265
@@ -290,7 +291,7 @@ ncp_iget(struct super_block *sb, struct ncp_entry_info *info)
290 } 291 }
291 insert_inode_hash(inode); 292 insert_inode_hash(inode);
292 } else 293 } else
293 printk(KERN_ERR "ncp_iget: iget failed!\n"); 294 pr_err("%s: iget failed!\n", __func__);
294 return inode; 295 return inode;
295} 296}
296 297
@@ -301,12 +302,12 @@ ncp_evict_inode(struct inode *inode)
301 clear_inode(inode); 302 clear_inode(inode);
302 303
303 if (S_ISDIR(inode->i_mode)) { 304 if (S_ISDIR(inode->i_mode)) {
304 DDPRINTK("ncp_evict_inode: put directory %ld\n", inode->i_ino); 305 ncp_dbg(2, "put directory %ld\n", inode->i_ino);
305 } 306 }
306 307
307 if (ncp_make_closed(inode) != 0) { 308 if (ncp_make_closed(inode) != 0) {
308 /* We can't do anything but complain. */ 309 /* We can't do anything but complain. */
309 printk(KERN_ERR "ncp_evict_inode: could not close\n"); 310 pr_err("%s: could not close\n", __func__);
310 } 311 }
311} 312}
312 313
@@ -621,7 +622,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
621 now because of PATH_MAX changes.. */ 622 now because of PATH_MAX changes.. */
622 if (server->m.time_out < 1) { 623 if (server->m.time_out < 1) {
623 server->m.time_out = 10; 624 server->m.time_out = 10;
624 printk(KERN_INFO "You need to recompile your ncpfs utils..\n"); 625 pr_info("You need to recompile your ncpfs utils..\n");
625 } 626 }
626 server->m.time_out = server->m.time_out * HZ / 100; 627 server->m.time_out = server->m.time_out * HZ / 100;
627 server->m.file_mode = (server->m.file_mode & S_IRWXUGO) | S_IFREG; 628 server->m.file_mode = (server->m.file_mode & S_IRWXUGO) | S_IFREG;
@@ -682,7 +683,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
682 ncp_unlock_server(server); 683 ncp_unlock_server(server);
683 if (error < 0) 684 if (error < 0)
684 goto out_rxbuf; 685 goto out_rxbuf;
685 DPRINTK("ncp_fill_super: NCP_SBP(sb) = %x\n", (int) NCP_SBP(sb)); 686 ncp_dbg(1, "NCP_SBP(sb) = %p\n", NCP_SBP(sb));
686 687
687 error = -EMSGSIZE; /* -EREMOTESIDEINCOMPATIBLE */ 688 error = -EMSGSIZE; /* -EREMOTESIDEINCOMPATIBLE */
688#ifdef CONFIG_NCPFS_PACKET_SIGNING 689#ifdef CONFIG_NCPFS_PACKET_SIGNING
@@ -710,7 +711,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
710 if (ncp_negotiate_buffersize(server, default_bufsize, 711 if (ncp_negotiate_buffersize(server, default_bufsize,
711 &(server->buffer_size)) != 0) 712 &(server->buffer_size)) != 0)
712 goto out_disconnect; 713 goto out_disconnect;
713 DPRINTK("ncpfs: bufsize = %d\n", server->buffer_size); 714 ncp_dbg(1, "bufsize = %d\n", server->buffer_size);
714 715
715 memset(&finfo, 0, sizeof(finfo)); 716 memset(&finfo, 0, sizeof(finfo));
716 finfo.i.attributes = aDIR; 717 finfo.i.attributes = aDIR;
@@ -739,7 +740,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
739 root_inode = ncp_iget(sb, &finfo); 740 root_inode = ncp_iget(sb, &finfo);
740 if (!root_inode) 741 if (!root_inode)
741 goto out_disconnect; 742 goto out_disconnect;
742 DPRINTK("ncp_fill_super: root vol=%d\n", NCP_FINFO(root_inode)->volNumber); 743 ncp_dbg(1, "root vol=%d\n", NCP_FINFO(root_inode)->volNumber);
743 sb->s_root = d_make_root(root_inode); 744 sb->s_root = d_make_root(root_inode);
744 if (!sb->s_root) 745 if (!sb->s_root)
745 goto out_disconnect; 746 goto out_disconnect;
@@ -985,8 +986,7 @@ int ncp_notify_change(struct dentry *dentry, struct iattr *attr)
985 if ((attr->ia_valid & ATTR_SIZE) != 0) { 986 if ((attr->ia_valid & ATTR_SIZE) != 0) {
986 int written; 987 int written;
987 988
988 DPRINTK("ncpfs: trying to change size to %ld\n", 989 ncp_dbg(1, "trying to change size to %llu\n", attr->ia_size);
989 attr->ia_size);
990 990
991 if ((result = ncp_make_open(inode, O_WRONLY)) < 0) { 991 if ((result = ncp_make_open(inode, O_WRONLY)) < 0) {
992 result = -EACCES; 992 result = -EACCES;
@@ -1072,7 +1072,7 @@ MODULE_ALIAS_FS("ncpfs");
1072static int __init init_ncp_fs(void) 1072static int __init init_ncp_fs(void)
1073{ 1073{
1074 int err; 1074 int err;
1075 DPRINTK("ncpfs: init_ncp_fs called\n"); 1075 ncp_dbg(1, "called\n");
1076 1076
1077 err = init_inodecache(); 1077 err = init_inodecache();
1078 if (err) 1078 if (err)
@@ -1089,7 +1089,7 @@ out1:
1089 1089
1090static void __exit exit_ncp_fs(void) 1090static void __exit exit_ncp_fs(void)
1091{ 1091{
1092 DPRINTK("ncpfs: exit_ncp_fs called\n"); 1092 ncp_dbg(1, "called\n");
1093 unregister_filesystem(&ncp_fs_type); 1093 unregister_filesystem(&ncp_fs_type);
1094 destroy_inodecache(); 1094 destroy_inodecache();
1095} 1095}
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index 60426ccb3b65..d5659d96ee7f 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -41,7 +41,7 @@ ncp_get_fs_info(struct ncp_server * server, struct inode *inode,
41 return -EFAULT; 41 return -EFAULT;
42 42
43 if (info.version != NCP_GET_FS_INFO_VERSION) { 43 if (info.version != NCP_GET_FS_INFO_VERSION) {
44 DPRINTK("info.version invalid: %d\n", info.version); 44 ncp_dbg(1, "info.version invalid: %d\n", info.version);
45 return -EINVAL; 45 return -EINVAL;
46 } 46 }
47 /* TODO: info.addr = server->m.serv_addr; */ 47 /* TODO: info.addr = server->m.serv_addr; */
@@ -66,7 +66,7 @@ ncp_get_fs_info_v2(struct ncp_server * server, struct inode *inode,
66 return -EFAULT; 66 return -EFAULT;
67 67
68 if (info2.version != NCP_GET_FS_INFO_VERSION_V2) { 68 if (info2.version != NCP_GET_FS_INFO_VERSION_V2) {
69 DPRINTK("info.version invalid: %d\n", info2.version); 69 ncp_dbg(1, "info.version invalid: %d\n", info2.version);
70 return -EINVAL; 70 return -EINVAL;
71 } 71 }
72 info2.mounted_uid = from_kuid_munged(current_user_ns(), server->m.mounted_uid); 72 info2.mounted_uid = from_kuid_munged(current_user_ns(), server->m.mounted_uid);
@@ -132,7 +132,7 @@ ncp_get_compat_fs_info_v2(struct ncp_server * server, struct inode *inode,
132 return -EFAULT; 132 return -EFAULT;
133 133
134 if (info2.version != NCP_GET_FS_INFO_VERSION_V2) { 134 if (info2.version != NCP_GET_FS_INFO_VERSION_V2) {
135 DPRINTK("info.version invalid: %d\n", info2.version); 135 ncp_dbg(1, "info.version invalid: %d\n", info2.version);
136 return -EINVAL; 136 return -EINVAL;
137 } 137 }
138 info2.mounted_uid = from_kuid_munged(current_user_ns(), server->m.mounted_uid); 138 info2.mounted_uid = from_kuid_munged(current_user_ns(), server->m.mounted_uid);
@@ -308,8 +308,7 @@ static long __ncp_ioctl(struct inode *inode, unsigned int cmd, unsigned long arg
308 else 308 else
309 result = server->reply_size; 309 result = server->reply_size;
310 ncp_unlock_server(server); 310 ncp_unlock_server(server);
311 DPRINTK("ncp_ioctl: copy %d bytes\n", 311 ncp_dbg(1, "copy %d bytes\n", result);
312 result);
313 if (result >= 0) 312 if (result >= 0)
314 if (copy_to_user(request.data, bouncebuffer, result)) 313 if (copy_to_user(request.data, bouncebuffer, result))
315 result = -EFAULT; 314 result = -EFAULT;
@@ -385,9 +384,9 @@ static long __ncp_ioctl(struct inode *inode, unsigned int cmd, unsigned long arg
385 sr.namespace = server->name_space[sr.volNumber]; 384 sr.namespace = server->name_space[sr.volNumber];
386 result = 0; 385 result = 0;
387 } else 386 } else
388 DPRINTK("ncpfs: s_root->d_inode==NULL\n"); 387 ncp_dbg(1, "s_root->d_inode==NULL\n");
389 } else 388 } else
390 DPRINTK("ncpfs: s_root==NULL\n"); 389 ncp_dbg(1, "s_root==NULL\n");
391 } else { 390 } else {
392 sr.volNumber = -1; 391 sr.volNumber = -1;
393 sr.namespace = 0; 392 sr.namespace = 0;
@@ -440,11 +439,11 @@ static long __ncp_ioctl(struct inode *inode, unsigned int cmd, unsigned long arg
440 NCP_FINFO(s_inode)->DosDirNum = dosde; 439 NCP_FINFO(s_inode)->DosDirNum = dosde;
441 server->root_setuped = 1; 440 server->root_setuped = 1;
442 } else { 441 } else {
443 DPRINTK("ncpfs: s_root->d_inode==NULL\n"); 442 ncp_dbg(1, "s_root->d_inode==NULL\n");
444 result = -EIO; 443 result = -EIO;
445 } 444 }
446 } else { 445 } else {
447 DPRINTK("ncpfs: s_root==NULL\n"); 446 ncp_dbg(1, "s_root==NULL\n");
448 result = -EIO; 447 result = -EIO;
449 } 448 }
450 } 449 }
diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c
index 3c5dd55d284c..b359d12eb359 100644
--- a/fs/ncpfs/mmap.c
+++ b/fs/ncpfs/mmap.c
@@ -107,7 +107,7 @@ int ncp_mmap(struct file *file, struct vm_area_struct *vma)
107{ 107{
108 struct inode *inode = file_inode(file); 108 struct inode *inode = file_inode(file);
109 109
110 DPRINTK("ncp_mmap: called\n"); 110 ncp_dbg(1, "called\n");
111 111
112 if (!ncp_conn_valid(NCP_SERVER(inode))) 112 if (!ncp_conn_valid(NCP_SERVER(inode)))
113 return -EIO; 113 return -EIO;
diff --git a/fs/ncpfs/ncp_fs.h b/fs/ncpfs/ncp_fs.h
index 31831afe1c3b..b9f69e1b1f43 100644
--- a/fs/ncpfs/ncp_fs.h
+++ b/fs/ncpfs/ncp_fs.h
@@ -2,30 +2,32 @@
2#include "ncp_fs_i.h" 2#include "ncp_fs_i.h"
3#include "ncp_fs_sb.h" 3#include "ncp_fs_sb.h"
4 4
5/* define because it is easy to change PRINTK to {*}PRINTK */
6#define PRINTK(format, args...) printk(KERN_DEBUG format , ## args)
7
8#undef NCPFS_PARANOIA 5#undef NCPFS_PARANOIA
9#ifdef NCPFS_PARANOIA 6#ifdef NCPFS_PARANOIA
10#define PPRINTK(format, args...) PRINTK(format , ## args) 7#define ncp_vdbg(fmt, ...) \
8 pr_debug(fmt, ##__VA_ARGS__)
11#else 9#else
12#define PPRINTK(format, args...) 10#define ncp_vdbg(fmt, ...) \
11do { \
12 if (0) \
13 pr_debug(fmt, ##__VA_ARGS__); \
14} while (0)
13#endif 15#endif
14 16
15#ifndef DEBUG_NCP 17#ifndef DEBUG_NCP
16#define DEBUG_NCP 0 18#define DEBUG_NCP 0
17#endif 19#endif
18#if DEBUG_NCP > 0 20
19#define DPRINTK(format, args...) PRINTK(format , ## args) 21#if DEBUG_NCP > 0 && !defined(DEBUG)
20#else 22#define DEBUG
21#define DPRINTK(format, args...)
22#endif
23#if DEBUG_NCP > 1
24#define DDPRINTK(format, args...) PRINTK(format , ## args)
25#else
26#define DDPRINTK(format, args...)
27#endif 23#endif
28 24
25#define ncp_dbg(level, fmt, ...) \
26do { \
27 if (level <= DEBUG_NCP) \
28 pr_debug(fmt, ##__VA_ARGS__); \
29} while (0)
30
29#define NCP_MAX_RPC_TIMEOUT (6*HZ) 31#define NCP_MAX_RPC_TIMEOUT (6*HZ)
30 32
31 33
diff --git a/fs/ncpfs/ncplib_kernel.c b/fs/ncpfs/ncplib_kernel.c
index 981a95617fc9..482387532f54 100644
--- a/fs/ncpfs/ncplib_kernel.c
+++ b/fs/ncpfs/ncplib_kernel.c
@@ -9,14 +9,14 @@
9 * 9 *
10 */ 10 */
11 11
12 12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13 13
14#include "ncp_fs.h" 14#include "ncp_fs.h"
15 15
16static inline void assert_server_locked(struct ncp_server *server) 16static inline void assert_server_locked(struct ncp_server *server)
17{ 17{
18 if (server->lock == 0) { 18 if (server->lock == 0) {
19 DPRINTK("ncpfs: server not locked!\n"); 19 ncp_dbg(1, "server not locked!\n");
20 } 20 }
21} 21}
22 22
@@ -75,7 +75,7 @@ static void ncp_add_pstring(struct ncp_server *server, const char *s)
75 int len = strlen(s); 75 int len = strlen(s);
76 assert_server_locked(server); 76 assert_server_locked(server);
77 if (len > 255) { 77 if (len > 255) {
78 DPRINTK("ncpfs: string too long: %s\n", s); 78 ncp_dbg(1, "string too long: %s\n", s);
79 len = 255; 79 len = 255;
80 } 80 }
81 ncp_add_byte(server, len); 81 ncp_add_byte(server, len);
@@ -225,7 +225,7 @@ int ncp_get_volume_info_with_number(struct ncp_server* server,
225 result = -EIO; 225 result = -EIO;
226 len = ncp_reply_byte(server, 29); 226 len = ncp_reply_byte(server, 29);
227 if (len > NCP_VOLNAME_LEN) { 227 if (len > NCP_VOLNAME_LEN) {
228 DPRINTK("ncpfs: volume name too long: %d\n", len); 228 ncp_dbg(1, "volume name too long: %d\n", len);
229 goto out; 229 goto out;
230 } 230 }
231 memcpy(&(target->volume_name), ncp_reply_data(server, 30), len); 231 memcpy(&(target->volume_name), ncp_reply_data(server, 30), len);
@@ -259,7 +259,7 @@ int ncp_get_directory_info(struct ncp_server* server, __u8 n,
259 result = -EIO; 259 result = -EIO;
260 len = ncp_reply_byte(server, 21); 260 len = ncp_reply_byte(server, 21);
261 if (len > NCP_VOLNAME_LEN) { 261 if (len > NCP_VOLNAME_LEN) {
262 DPRINTK("ncpfs: volume name too long: %d\n", len); 262 ncp_dbg(1, "volume name too long: %d\n", len);
263 goto out; 263 goto out;
264 } 264 }
265 memcpy(&(target->volume_name), ncp_reply_data(server, 22), len); 265 memcpy(&(target->volume_name), ncp_reply_data(server, 22), len);
@@ -295,9 +295,9 @@ ncp_make_closed(struct inode *inode)
295 err = ncp_close_file(NCP_SERVER(inode), NCP_FINFO(inode)->file_handle); 295 err = ncp_close_file(NCP_SERVER(inode), NCP_FINFO(inode)->file_handle);
296 296
297 if (!err) 297 if (!err)
298 PPRINTK("ncp_make_closed: volnum=%d, dirent=%u, error=%d\n", 298 ncp_vdbg("volnum=%d, dirent=%u, error=%d\n",
299 NCP_FINFO(inode)->volNumber, 299 NCP_FINFO(inode)->volNumber,
300 NCP_FINFO(inode)->dirEntNum, err); 300 NCP_FINFO(inode)->dirEntNum, err);
301 } 301 }
302 mutex_unlock(&NCP_FINFO(inode)->open_mutex); 302 mutex_unlock(&NCP_FINFO(inode)->open_mutex);
303 return err; 303 return err;
@@ -394,8 +394,7 @@ int ncp_obtain_nfs_info(struct ncp_server *server,
394 394
395 if ((result = ncp_request(server, 87)) == 0) { 395 if ((result = ncp_request(server, 87)) == 0) {
396 ncp_extract_nfs_info(ncp_reply_data(server, 0), &target->nfs); 396 ncp_extract_nfs_info(ncp_reply_data(server, 0), &target->nfs);
397 DPRINTK(KERN_DEBUG 397 ncp_dbg(1, "(%s) mode=0%o, rdev=0x%x\n",
398 "ncp_obtain_nfs_info: (%s) mode=0%o, rdev=0x%x\n",
399 target->entryName, target->nfs.mode, 398 target->entryName, target->nfs.mode,
400 target->nfs.rdev); 399 target->nfs.rdev);
401 } else { 400 } else {
@@ -425,7 +424,7 @@ int ncp_obtain_info(struct ncp_server *server, struct inode *dir, const char *pa
425 int result; 424 int result;
426 425
427 if (target == NULL) { 426 if (target == NULL) {
428 printk(KERN_ERR "ncp_obtain_info: invalid call\n"); 427 pr_err("%s: invalid call\n", __func__);
429 return -EINVAL; 428 return -EINVAL;
430 } 429 }
431 ncp_init_request(server); 430 ncp_init_request(server);
@@ -498,7 +497,7 @@ ncp_get_known_namespace(struct ncp_server *server, __u8 volume)
498 namespace = ncp_reply_data(server, 2); 497 namespace = ncp_reply_data(server, 2);
499 498
500 while (no_namespaces > 0) { 499 while (no_namespaces > 0) {
501 DPRINTK("get_namespaces: found %d on %d\n", *namespace, volume); 500 ncp_dbg(1, "found %d on %d\n", *namespace, volume);
502 501
503#ifdef CONFIG_NCPFS_NFS_NS 502#ifdef CONFIG_NCPFS_NFS_NS
504 if ((*namespace == NW_NS_NFS) && !(server->m.flags&NCP_MOUNT_NO_NFS)) 503 if ((*namespace == NW_NS_NFS) && !(server->m.flags&NCP_MOUNT_NO_NFS))
@@ -531,8 +530,7 @@ ncp_update_known_namespace(struct ncp_server *server, __u8 volume, int *ret_ns)
531 if (ret_ns) 530 if (ret_ns)
532 *ret_ns = ns; 531 *ret_ns = ns;
533 532
534 DPRINTK("lookup_vol: namespace[%d] = %d\n", 533 ncp_dbg(1, "namespace[%d] = %d\n", volume, server->name_space[volume]);
535 volume, server->name_space[volume]);
536 534
537 if (server->name_space[volume] == ns) 535 if (server->name_space[volume] == ns)
538 return 0; 536 return 0;
@@ -596,7 +594,7 @@ ncp_get_volume_root(struct ncp_server *server,
596{ 594{
597 int result; 595 int result;
598 596
599 DPRINTK("ncp_get_volume_root: looking up vol %s\n", volname); 597 ncp_dbg(1, "looking up vol %s\n", volname);
600 598
601 ncp_init_request(server); 599 ncp_init_request(server);
602 ncp_add_byte(server, 22); /* Subfunction: Generate dir handle */ 600 ncp_add_byte(server, 22); /* Subfunction: Generate dir handle */
diff --git a/fs/ncpfs/sock.c b/fs/ncpfs/sock.c
index 3a1587222c8a..04a69a4d8e96 100644
--- a/fs/ncpfs/sock.c
+++ b/fs/ncpfs/sock.c
@@ -8,6 +8,7 @@
8 * 8 *
9 */ 9 */
10 10
11#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11 12
12#include <linux/time.h> 13#include <linux/time.h>
13#include <linux/errno.h> 14#include <linux/errno.h>
@@ -231,7 +232,7 @@ static void __ncptcp_try_send(struct ncp_server *server)
231 return; 232 return;
232 233
233 if (result < 0) { 234 if (result < 0) {
234 printk(KERN_ERR "ncpfs: tcp: Send failed: %d\n", result); 235 pr_err("tcp: Send failed: %d\n", result);
235 __ncp_abort_request(server, rq, result); 236 __ncp_abort_request(server, rq, result);
236 return; 237 return;
237 } 238 }
@@ -332,7 +333,7 @@ static int ncp_add_request(struct ncp_server *server, struct ncp_request_reply *
332 mutex_lock(&server->rcv.creq_mutex); 333 mutex_lock(&server->rcv.creq_mutex);
333 if (!ncp_conn_valid(server)) { 334 if (!ncp_conn_valid(server)) {
334 mutex_unlock(&server->rcv.creq_mutex); 335 mutex_unlock(&server->rcv.creq_mutex);
335 printk(KERN_ERR "ncpfs: tcp: Server died\n"); 336 pr_err("tcp: Server died\n");
336 return -EIO; 337 return -EIO;
337 } 338 }
338 ncp_req_get(req); 339 ncp_req_get(req);
@@ -405,15 +406,15 @@ void ncpdgram_rcv_proc(struct work_struct *work)
405 } 406 }
406 result = _recv(sock, buf, sizeof(buf), MSG_DONTWAIT); 407 result = _recv(sock, buf, sizeof(buf), MSG_DONTWAIT);
407 if (result < 0) { 408 if (result < 0) {
408 DPRINTK("recv failed with %d\n", result); 409 ncp_dbg(1, "recv failed with %d\n", result);
409 continue; 410 continue;
410 } 411 }
411 if (result < 10) { 412 if (result < 10) {
412 DPRINTK("too short (%u) watchdog packet\n", result); 413 ncp_dbg(1, "too short (%u) watchdog packet\n", result);
413 continue; 414 continue;
414 } 415 }
415 if (buf[9] != '?') { 416 if (buf[9] != '?') {
416 DPRINTK("bad signature (%02X) in watchdog packet\n", buf[9]); 417 ncp_dbg(1, "bad signature (%02X) in watchdog packet\n", buf[9]);
417 continue; 418 continue;
418 } 419 }
419 buf[9] = 'Y'; 420 buf[9] = 'Y';
@@ -448,7 +449,7 @@ void ncpdgram_rcv_proc(struct work_struct *work)
448 result -= 8; 449 result -= 8;
449 hdrl = sock->sk->sk_family == AF_INET ? 8 : 6; 450 hdrl = sock->sk->sk_family == AF_INET ? 8 : 6;
450 if (sign_verify_reply(server, server->rxbuf + hdrl, result - hdrl, cpu_to_le32(result), server->rxbuf + result)) { 451 if (sign_verify_reply(server, server->rxbuf + hdrl, result - hdrl, cpu_to_le32(result), server->rxbuf + result)) {
451 printk(KERN_INFO "ncpfs: Signature violation\n"); 452 pr_info("Signature violation\n");
452 result = -EIO; 453 result = -EIO;
453 } 454 }
454 } 455 }
@@ -524,7 +525,7 @@ static int do_tcp_rcv(struct ncp_server *server, void *buffer, size_t len)
524 return result; 525 return result;
525 } 526 }
526 if (result > len) { 527 if (result > len) {
527 printk(KERN_ERR "ncpfs: tcp: bug in recvmsg (%u > %Zu)\n", result, len); 528 pr_err("tcp: bug in recvmsg (%u > %Zu)\n", result, len);
528 return -EIO; 529 return -EIO;
529 } 530 }
530 return result; 531 return result;
@@ -552,9 +553,9 @@ static int __ncptcp_rcv_proc(struct ncp_server *server)
552 __ncptcp_abort(server); 553 __ncptcp_abort(server);
553 } 554 }
554 if (result < 0) { 555 if (result < 0) {
555 printk(KERN_ERR "ncpfs: tcp: error in recvmsg: %d\n", result); 556 pr_err("tcp: error in recvmsg: %d\n", result);
556 } else { 557 } else {
557 DPRINTK(KERN_ERR "ncpfs: tcp: EOF\n"); 558 ncp_dbg(1, "tcp: EOF\n");
558 } 559 }
559 return -EIO; 560 return -EIO;
560 } 561 }
@@ -566,20 +567,20 @@ static int __ncptcp_rcv_proc(struct ncp_server *server)
566 switch (server->rcv.state) { 567 switch (server->rcv.state) {
567 case 0: 568 case 0:
568 if (server->rcv.buf.magic != htonl(NCP_TCP_RCVD_MAGIC)) { 569 if (server->rcv.buf.magic != htonl(NCP_TCP_RCVD_MAGIC)) {
569 printk(KERN_ERR "ncpfs: tcp: Unexpected reply type %08X\n", ntohl(server->rcv.buf.magic)); 570 pr_err("tcp: Unexpected reply type %08X\n", ntohl(server->rcv.buf.magic));
570 __ncptcp_abort(server); 571 __ncptcp_abort(server);
571 return -EIO; 572 return -EIO;
572 } 573 }
573 datalen = ntohl(server->rcv.buf.len) & 0x0FFFFFFF; 574 datalen = ntohl(server->rcv.buf.len) & 0x0FFFFFFF;
574 if (datalen < 10) { 575 if (datalen < 10) {
575 printk(KERN_ERR "ncpfs: tcp: Unexpected reply len %d\n", datalen); 576 pr_err("tcp: Unexpected reply len %d\n", datalen);
576 __ncptcp_abort(server); 577 __ncptcp_abort(server);
577 return -EIO; 578 return -EIO;
578 } 579 }
579#ifdef CONFIG_NCPFS_PACKET_SIGNING 580#ifdef CONFIG_NCPFS_PACKET_SIGNING
580 if (server->sign_active) { 581 if (server->sign_active) {
581 if (datalen < 18) { 582 if (datalen < 18) {
582 printk(KERN_ERR "ncpfs: tcp: Unexpected reply len %d\n", datalen); 583 pr_err("tcp: Unexpected reply len %d\n", datalen);
583 __ncptcp_abort(server); 584 __ncptcp_abort(server);
584 return -EIO; 585 return -EIO;
585 } 586 }
@@ -604,7 +605,7 @@ cont:;
604 server->rcv.len = datalen - 10; 605 server->rcv.len = datalen - 10;
605 break; 606 break;
606 } 607 }
607 DPRINTK("ncpfs: tcp: Unexpected NCP type %02X\n", type); 608 ncp_dbg(1, "tcp: Unexpected NCP type %02X\n", type);
608skipdata2:; 609skipdata2:;
609 server->rcv.state = 2; 610 server->rcv.state = 2;
610skipdata:; 611skipdata:;
@@ -614,11 +615,11 @@ skipdata:;
614 } 615 }
615 req = server->rcv.creq; 616 req = server->rcv.creq;
616 if (!req) { 617 if (!req) {
617 DPRINTK(KERN_ERR "ncpfs: Reply without appropriate request\n"); 618 ncp_dbg(1, "Reply without appropriate request\n");
618 goto skipdata2; 619 goto skipdata2;
619 } 620 }
620 if (datalen > req->datalen + 8) { 621 if (datalen > req->datalen + 8) {
621 printk(KERN_ERR "ncpfs: tcp: Unexpected reply len %d (expected at most %Zd)\n", datalen, req->datalen + 8); 622 pr_err("tcp: Unexpected reply len %d (expected at most %Zd)\n", datalen, req->datalen + 8);
622 server->rcv.state = 3; 623 server->rcv.state = 3;
623 goto skipdata; 624 goto skipdata;
624 } 625 }
@@ -638,12 +639,12 @@ skipdata:;
638 req = server->rcv.creq; 639 req = server->rcv.creq;
639 if (req->tx_type != NCP_ALLOC_SLOT_REQUEST) { 640 if (req->tx_type != NCP_ALLOC_SLOT_REQUEST) {
640 if (((struct ncp_reply_header*)server->rxbuf)->sequence != server->sequence) { 641 if (((struct ncp_reply_header*)server->rxbuf)->sequence != server->sequence) {
641 printk(KERN_ERR "ncpfs: tcp: Bad sequence number\n"); 642 pr_err("tcp: Bad sequence number\n");
642 __ncp_abort_request(server, req, -EIO); 643 __ncp_abort_request(server, req, -EIO);
643 return -EIO; 644 return -EIO;
644 } 645 }
645 if ((((struct ncp_reply_header*)server->rxbuf)->conn_low | (((struct ncp_reply_header*)server->rxbuf)->conn_high << 8)) != server->connection) { 646 if ((((struct ncp_reply_header*)server->rxbuf)->conn_low | (((struct ncp_reply_header*)server->rxbuf)->conn_high << 8)) != server->connection) {
646 printk(KERN_ERR "ncpfs: tcp: Connection number mismatch\n"); 647 pr_err("tcp: Connection number mismatch\n");
647 __ncp_abort_request(server, req, -EIO); 648 __ncp_abort_request(server, req, -EIO);
648 return -EIO; 649 return -EIO;
649 } 650 }
@@ -651,7 +652,7 @@ skipdata:;
651#ifdef CONFIG_NCPFS_PACKET_SIGNING 652#ifdef CONFIG_NCPFS_PACKET_SIGNING
652 if (server->sign_active && req->tx_type != NCP_DEALLOC_SLOT_REQUEST) { 653 if (server->sign_active && req->tx_type != NCP_DEALLOC_SLOT_REQUEST) {
653 if (sign_verify_reply(server, server->rxbuf + 6, req->datalen - 6, cpu_to_be32(req->datalen + 16), &server->rcv.buf.type)) { 654 if (sign_verify_reply(server, server->rxbuf + 6, req->datalen - 6, cpu_to_be32(req->datalen + 16), &server->rcv.buf.type)) {
654 printk(KERN_ERR "ncpfs: tcp: Signature violation\n"); 655 pr_err("tcp: Signature violation\n");
655 __ncp_abort_request(server, req, -EIO); 656 __ncp_abort_request(server, req, -EIO);
656 return -EIO; 657 return -EIO;
657 } 658 }
@@ -742,7 +743,7 @@ static int ncp_do_request(struct ncp_server *server, int size,
742 int result; 743 int result;
743 744
744 if (server->lock == 0) { 745 if (server->lock == 0) {
745 printk(KERN_ERR "ncpfs: Server not locked!\n"); 746 pr_err("Server not locked!\n");
746 return -EIO; 747 return -EIO;
747 } 748 }
748 if (!ncp_conn_valid(server)) { 749 if (!ncp_conn_valid(server)) {
@@ -781,7 +782,7 @@ static int ncp_do_request(struct ncp_server *server, int size,
781 spin_unlock_irqrestore(&current->sighand->siglock, flags); 782 spin_unlock_irqrestore(&current->sighand->siglock, flags);
782 } 783 }
783 784
784 DDPRINTK("do_ncp_rpc_call returned %d\n", result); 785 ncp_dbg(2, "do_ncp_rpc_call returned %d\n", result);
785 786
786 return result; 787 return result;
787} 788}
@@ -811,7 +812,7 @@ int ncp_request2(struct ncp_server *server, int function,
811 812
812 result = ncp_do_request(server, server->current_size, reply, size); 813 result = ncp_do_request(server, server->current_size, reply, size);
813 if (result < 0) { 814 if (result < 0) {
814 DPRINTK("ncp_request_error: %d\n", result); 815 ncp_dbg(1, "ncp_request_error: %d\n", result);
815 goto out; 816 goto out;
816 } 817 }
817 server->completion = reply->completion_code; 818 server->completion = reply->completion_code;
@@ -822,7 +823,7 @@ int ncp_request2(struct ncp_server *server, int function,
822 result = reply->completion_code; 823 result = reply->completion_code;
823 824
824 if (result != 0) 825 if (result != 0)
825 PPRINTK("ncp_request: completion code=%x\n", result); 826 ncp_vdbg("completion code=%x\n", result);
826out: 827out:
827 return result; 828 return result;
828} 829}
@@ -865,14 +866,14 @@ void ncp_lock_server(struct ncp_server *server)
865{ 866{
866 mutex_lock(&server->mutex); 867 mutex_lock(&server->mutex);
867 if (server->lock) 868 if (server->lock)
868 printk(KERN_WARNING "ncp_lock_server: was locked!\n"); 869 pr_warn("%s: was locked!\n", __func__);
869 server->lock = 1; 870 server->lock = 1;
870} 871}
871 872
872void ncp_unlock_server(struct ncp_server *server) 873void ncp_unlock_server(struct ncp_server *server)
873{ 874{
874 if (!server->lock) { 875 if (!server->lock) {
875 printk(KERN_WARNING "ncp_unlock_server: was not locked!\n"); 876 pr_warn("%s: was not locked!\n", __func__);
876 return; 877 return;
877 } 878 }
878 server->lock = 0; 879 server->lock = 0;
diff --git a/fs/ncpfs/symlink.c b/fs/ncpfs/symlink.c
index 52439ddc8de0..1a63bfdb4a65 100644
--- a/fs/ncpfs/symlink.c
+++ b/fs/ncpfs/symlink.c
@@ -112,7 +112,7 @@ int ncp_symlink(struct inode *dir, struct dentry *dentry, const char *symname) {
112 __le32 attr; 112 __le32 attr;
113 unsigned int hdr; 113 unsigned int hdr;
114 114
115 DPRINTK("ncp_symlink(dir=%p,dentry=%p,symname=%s)\n",dir,dentry,symname); 115 ncp_dbg(1, "dir=%p, dentry=%p, symname=%s\n", dir, dentry, symname);
116 116
117 if (ncp_is_nfs_extras(NCP_SERVER(dir), NCP_FINFO(dir)->volNumber)) 117 if (ncp_is_nfs_extras(NCP_SERVER(dir), NCP_FINFO(dir)->volNumber))
118 kludge = 0; 118 kludge = 0;
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index ae2e87b95453..41db5258e7a7 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -112,7 +112,8 @@ out:
112 * TODO: keep track of all layouts (and delegations) in a hash table 112 * TODO: keep track of all layouts (and delegations) in a hash table
113 * hashed by filehandle. 113 * hashed by filehandle.
114 */ 114 */
115static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp, struct nfs_fh *fh) 115static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp,
116 struct nfs_fh *fh, nfs4_stateid *stateid)
116{ 117{
117 struct nfs_server *server; 118 struct nfs_server *server;
118 struct inode *ino; 119 struct inode *ino;
@@ -120,17 +121,19 @@ static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp,
120 121
121 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { 122 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
122 list_for_each_entry(lo, &server->layouts, plh_layouts) { 123 list_for_each_entry(lo, &server->layouts, plh_layouts) {
124 if (!nfs4_stateid_match_other(&lo->plh_stateid, stateid))
125 continue;
123 if (nfs_compare_fh(fh, &NFS_I(lo->plh_inode)->fh)) 126 if (nfs_compare_fh(fh, &NFS_I(lo->plh_inode)->fh))
124 continue; 127 continue;
125 ino = igrab(lo->plh_inode); 128 ino = igrab(lo->plh_inode);
126 if (!ino) 129 if (!ino)
127 continue; 130 break;
128 spin_lock(&ino->i_lock); 131 spin_lock(&ino->i_lock);
129 /* Is this layout in the process of being freed? */ 132 /* Is this layout in the process of being freed? */
130 if (NFS_I(ino)->layout != lo) { 133 if (NFS_I(ino)->layout != lo) {
131 spin_unlock(&ino->i_lock); 134 spin_unlock(&ino->i_lock);
132 iput(ino); 135 iput(ino);
133 continue; 136 break;
134 } 137 }
135 pnfs_get_layout_hdr(lo); 138 pnfs_get_layout_hdr(lo);
136 spin_unlock(&ino->i_lock); 139 spin_unlock(&ino->i_lock);
@@ -141,13 +144,14 @@ static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp,
141 return NULL; 144 return NULL;
142} 145}
143 146
144static struct pnfs_layout_hdr * get_layout_by_fh(struct nfs_client *clp, struct nfs_fh *fh) 147static struct pnfs_layout_hdr * get_layout_by_fh(struct nfs_client *clp,
148 struct nfs_fh *fh, nfs4_stateid *stateid)
145{ 149{
146 struct pnfs_layout_hdr *lo; 150 struct pnfs_layout_hdr *lo;
147 151
148 spin_lock(&clp->cl_lock); 152 spin_lock(&clp->cl_lock);
149 rcu_read_lock(); 153 rcu_read_lock();
150 lo = get_layout_by_fh_locked(clp, fh); 154 lo = get_layout_by_fh_locked(clp, fh, stateid);
151 rcu_read_unlock(); 155 rcu_read_unlock();
152 spin_unlock(&clp->cl_lock); 156 spin_unlock(&clp->cl_lock);
153 157
@@ -162,9 +166,9 @@ static u32 initiate_file_draining(struct nfs_client *clp,
162 u32 rv = NFS4ERR_NOMATCHING_LAYOUT; 166 u32 rv = NFS4ERR_NOMATCHING_LAYOUT;
163 LIST_HEAD(free_me_list); 167 LIST_HEAD(free_me_list);
164 168
165 lo = get_layout_by_fh(clp, &args->cbl_fh); 169 lo = get_layout_by_fh(clp, &args->cbl_fh, &args->cbl_stateid);
166 if (!lo) 170 if (!lo)
167 return NFS4ERR_NOMATCHING_LAYOUT; 171 goto out;
168 172
169 ino = lo->plh_inode; 173 ino = lo->plh_inode;
170 spin_lock(&ino->i_lock); 174 spin_lock(&ino->i_lock);
@@ -179,6 +183,7 @@ static u32 initiate_file_draining(struct nfs_client *clp,
179 pnfs_free_lseg_list(&free_me_list); 183 pnfs_free_lseg_list(&free_me_list);
180 pnfs_put_layout_hdr(lo); 184 pnfs_put_layout_hdr(lo);
181 iput(ino); 185 iput(ino);
186out:
182 return rv; 187 return rv;
183} 188}
184 189
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 4a48fe4b84b6..d9f3d067cd15 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -69,21 +69,28 @@ const struct address_space_operations nfs_dir_aops = {
69 69
70static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir, struct rpc_cred *cred) 70static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir, struct rpc_cred *cred)
71{ 71{
72 struct nfs_inode *nfsi = NFS_I(dir);
72 struct nfs_open_dir_context *ctx; 73 struct nfs_open_dir_context *ctx;
73 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); 74 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
74 if (ctx != NULL) { 75 if (ctx != NULL) {
75 ctx->duped = 0; 76 ctx->duped = 0;
76 ctx->attr_gencount = NFS_I(dir)->attr_gencount; 77 ctx->attr_gencount = nfsi->attr_gencount;
77 ctx->dir_cookie = 0; 78 ctx->dir_cookie = 0;
78 ctx->dup_cookie = 0; 79 ctx->dup_cookie = 0;
79 ctx->cred = get_rpccred(cred); 80 ctx->cred = get_rpccred(cred);
81 spin_lock(&dir->i_lock);
82 list_add(&ctx->list, &nfsi->open_files);
83 spin_unlock(&dir->i_lock);
80 return ctx; 84 return ctx;
81 } 85 }
82 return ERR_PTR(-ENOMEM); 86 return ERR_PTR(-ENOMEM);
83} 87}
84 88
85static void put_nfs_open_dir_context(struct nfs_open_dir_context *ctx) 89static void put_nfs_open_dir_context(struct inode *dir, struct nfs_open_dir_context *ctx)
86{ 90{
91 spin_lock(&dir->i_lock);
92 list_del(&ctx->list);
93 spin_unlock(&dir->i_lock);
87 put_rpccred(ctx->cred); 94 put_rpccred(ctx->cred);
88 kfree(ctx); 95 kfree(ctx);
89} 96}
@@ -126,7 +133,7 @@ out:
126static int 133static int
127nfs_closedir(struct inode *inode, struct file *filp) 134nfs_closedir(struct inode *inode, struct file *filp)
128{ 135{
129 put_nfs_open_dir_context(filp->private_data); 136 put_nfs_open_dir_context(filp->f_path.dentry->d_inode, filp->private_data);
130 return 0; 137 return 0;
131} 138}
132 139
@@ -306,10 +313,9 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des
306 if (printk_ratelimit()) { 313 if (printk_ratelimit()) {
307 pr_notice("NFS: directory %pD2 contains a readdir loop." 314 pr_notice("NFS: directory %pD2 contains a readdir loop."
308 "Please contact your server vendor. " 315 "Please contact your server vendor. "
309 "The file: %s has duplicate cookie %llu\n", 316 "The file: %.*s has duplicate cookie %llu\n",
310 desc->file, 317 desc->file, array->array[i].string.len,
311 array->array[i].string.name, 318 array->array[i].string.name, *desc->dir_cookie);
312 *desc->dir_cookie);
313 } 319 }
314 status = -ELOOP; 320 status = -ELOOP;
315 goto out; 321 goto out;
@@ -437,6 +443,22 @@ void nfs_advise_use_readdirplus(struct inode *dir)
437 set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags); 443 set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags);
438} 444}
439 445
446/*
447 * This function is mainly for use by nfs_getattr().
448 *
449 * If this is an 'ls -l', we want to force use of readdirplus.
450 * Do this by checking if there is an active file descriptor
451 * and calling nfs_advise_use_readdirplus, then forcing a
452 * cache flush.
453 */
454void nfs_force_use_readdirplus(struct inode *dir)
455{
456 if (!list_empty(&NFS_I(dir)->open_files)) {
457 nfs_advise_use_readdirplus(dir);
458 nfs_zap_mapping(dir, dir->i_mapping);
459 }
460}
461
440static 462static
441void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) 463void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
442{ 464{
@@ -815,6 +837,17 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc)
815 goto out; 837 goto out;
816} 838}
817 839
840static bool nfs_dir_mapping_need_revalidate(struct inode *dir)
841{
842 struct nfs_inode *nfsi = NFS_I(dir);
843
844 if (nfs_attribute_cache_expired(dir))
845 return true;
846 if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
847 return true;
848 return false;
849}
850
818/* The file offset position represents the dirent entry number. A 851/* The file offset position represents the dirent entry number. A
819 last cookie cache takes care of the common case of reading the 852 last cookie cache takes care of the common case of reading the
820 whole directory. 853 whole directory.
@@ -847,7 +880,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
847 desc->plus = nfs_use_readdirplus(inode, ctx) ? 1 : 0; 880 desc->plus = nfs_use_readdirplus(inode, ctx) ? 1 : 0;
848 881
849 nfs_block_sillyrename(dentry); 882 nfs_block_sillyrename(dentry);
850 if (ctx->pos == 0 || nfs_attribute_cache_expired(inode)) 883 if (ctx->pos == 0 || nfs_dir_mapping_need_revalidate(inode))
851 res = nfs_revalidate_mapping(inode, file->f_mapping); 884 res = nfs_revalidate_mapping(inode, file->f_mapping);
852 if (res < 0) 885 if (res < 0)
853 goto out; 886 goto out;
@@ -1911,6 +1944,7 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1911 struct inode *old_inode = old_dentry->d_inode; 1944 struct inode *old_inode = old_dentry->d_inode;
1912 struct inode *new_inode = new_dentry->d_inode; 1945 struct inode *new_inode = new_dentry->d_inode;
1913 struct dentry *dentry = NULL, *rehash = NULL; 1946 struct dentry *dentry = NULL, *rehash = NULL;
1947 struct rpc_task *task;
1914 int error = -EBUSY; 1948 int error = -EBUSY;
1915 1949
1916 dfprintk(VFS, "NFS: rename(%pd2 -> %pd2, ct=%d)\n", 1950 dfprintk(VFS, "NFS: rename(%pd2 -> %pd2, ct=%d)\n",
@@ -1958,8 +1992,16 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1958 if (new_inode != NULL) 1992 if (new_inode != NULL)
1959 NFS_PROTO(new_inode)->return_delegation(new_inode); 1993 NFS_PROTO(new_inode)->return_delegation(new_inode);
1960 1994
1961 error = NFS_PROTO(old_dir)->rename(old_dir, &old_dentry->d_name, 1995 task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, NULL);
1962 new_dir, &new_dentry->d_name); 1996 if (IS_ERR(task)) {
1997 error = PTR_ERR(task);
1998 goto out;
1999 }
2000
2001 error = rpc_wait_for_completion_task(task);
2002 if (error == 0)
2003 error = task->tk_status;
2004 rpc_put_task(task);
1963 nfs_mark_for_revalidate(old_inode); 2005 nfs_mark_for_revalidate(old_inode);
1964out: 2006out:
1965 if (rehash) 2007 if (rehash)
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 5bb790a69c71..284ca901fe16 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -617,6 +617,7 @@ out:
617 617
618static const struct vm_operations_struct nfs_file_vm_ops = { 618static const struct vm_operations_struct nfs_file_vm_ops = {
619 .fault = filemap_fault, 619 .fault = filemap_fault,
620 .map_pages = filemap_map_pages,
620 .page_mkwrite = nfs_vm_page_mkwrite, 621 .page_mkwrite = nfs_vm_page_mkwrite,
621 .remap_pages = generic_file_remap_pages, 622 .remap_pages = generic_file_remap_pages,
622}; 623};
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index c4702baa22b8..0c438973f3c8 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -588,6 +588,25 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr)
588} 588}
589EXPORT_SYMBOL_GPL(nfs_setattr_update_inode); 589EXPORT_SYMBOL_GPL(nfs_setattr_update_inode);
590 590
591static void nfs_request_parent_use_readdirplus(struct dentry *dentry)
592{
593 struct dentry *parent;
594
595 parent = dget_parent(dentry);
596 nfs_force_use_readdirplus(parent->d_inode);
597 dput(parent);
598}
599
600static bool nfs_need_revalidate_inode(struct inode *inode)
601{
602 if (NFS_I(inode)->cache_validity &
603 (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL))
604 return true;
605 if (nfs_attribute_cache_expired(inode))
606 return true;
607 return false;
608}
609
591int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) 610int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
592{ 611{
593 struct inode *inode = dentry->d_inode; 612 struct inode *inode = dentry->d_inode;
@@ -616,10 +635,13 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
616 ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))) 635 ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)))
617 need_atime = 0; 636 need_atime = 0;
618 637
619 if (need_atime) 638 if (need_atime || nfs_need_revalidate_inode(inode)) {
620 err = __nfs_revalidate_inode(NFS_SERVER(inode), inode); 639 struct nfs_server *server = NFS_SERVER(inode);
621 else 640
622 err = nfs_revalidate_inode(NFS_SERVER(inode), inode); 641 if (server->caps & NFS_CAP_READDIRPLUS)
642 nfs_request_parent_use_readdirplus(dentry);
643 err = __nfs_revalidate_inode(server, inode);
644 }
623 if (!err) { 645 if (!err) {
624 generic_fillattr(inode, stat); 646 generic_fillattr(inode, stat);
625 stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode)); 647 stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode));
@@ -961,9 +983,7 @@ int nfs_attribute_cache_expired(struct inode *inode)
961 */ 983 */
962int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) 984int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
963{ 985{
964 if (!(NFS_I(inode)->cache_validity & 986 if (!nfs_need_revalidate_inode(inode))
965 (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL))
966 && !nfs_attribute_cache_expired(inode))
967 return NFS_STALE(inode) ? -ESTALE : 0; 987 return NFS_STALE(inode) ? -ESTALE : 0;
968 return __nfs_revalidate_inode(server, inode); 988 return __nfs_revalidate_inode(server, inode);
969} 989}
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index b46cf5a67329..dd8bfc2e2464 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -301,6 +301,7 @@ extern struct nfs_client *nfs_init_client(struct nfs_client *clp,
301 const char *ip_addr); 301 const char *ip_addr);
302 302
303/* dir.c */ 303/* dir.c */
304extern void nfs_force_use_readdirplus(struct inode *dir);
304extern unsigned long nfs_access_cache_count(struct shrinker *shrink, 305extern unsigned long nfs_access_cache_count(struct shrinker *shrink,
305 struct shrink_control *sc); 306 struct shrink_control *sc);
306extern unsigned long nfs_access_cache_scan(struct shrinker *shrink, 307extern unsigned long nfs_access_cache_scan(struct shrinker *shrink,
@@ -474,6 +475,13 @@ extern int nfs_migrate_page(struct address_space *,
474#define nfs_migrate_page NULL 475#define nfs_migrate_page NULL
475#endif 476#endif
476 477
478/* unlink.c */
479extern struct rpc_task *
480nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
481 struct dentry *old_dentry, struct dentry *new_dentry,
482 void (*complete)(struct rpc_task *, struct nfs_renamedata *));
483extern int nfs_sillyrename(struct inode *dir, struct dentry *dentry);
484
477/* direct.c */ 485/* direct.c */
478void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo, 486void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
479 struct nfs_direct_req *dreq); 487 struct nfs_direct_req *dreq);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index a462ef0fb5d6..db60149c4579 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -479,41 +479,6 @@ nfs3_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
479} 479}
480 480
481static int 481static int
482nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name,
483 struct inode *new_dir, struct qstr *new_name)
484{
485 struct nfs_renameargs arg = {
486 .old_dir = NFS_FH(old_dir),
487 .old_name = old_name,
488 .new_dir = NFS_FH(new_dir),
489 .new_name = new_name,
490 };
491 struct nfs_renameres res;
492 struct rpc_message msg = {
493 .rpc_proc = &nfs3_procedures[NFS3PROC_RENAME],
494 .rpc_argp = &arg,
495 .rpc_resp = &res,
496 };
497 int status = -ENOMEM;
498
499 dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name);
500
501 res.old_fattr = nfs_alloc_fattr();
502 res.new_fattr = nfs_alloc_fattr();
503 if (res.old_fattr == NULL || res.new_fattr == NULL)
504 goto out;
505
506 status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0);
507 nfs_post_op_update_inode(old_dir, res.old_fattr);
508 nfs_post_op_update_inode(new_dir, res.new_fattr);
509out:
510 nfs_free_fattr(res.old_fattr);
511 nfs_free_fattr(res.new_fattr);
512 dprintk("NFS reply rename: %d\n", status);
513 return status;
514}
515
516static int
517nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) 482nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
518{ 483{
519 struct nfs3_linkargs arg = { 484 struct nfs3_linkargs arg = {
@@ -968,7 +933,6 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
968 .unlink_setup = nfs3_proc_unlink_setup, 933 .unlink_setup = nfs3_proc_unlink_setup,
969 .unlink_rpc_prepare = nfs3_proc_unlink_rpc_prepare, 934 .unlink_rpc_prepare = nfs3_proc_unlink_rpc_prepare,
970 .unlink_done = nfs3_proc_unlink_done, 935 .unlink_done = nfs3_proc_unlink_done,
971 .rename = nfs3_proc_rename,
972 .rename_setup = nfs3_proc_rename_setup, 936 .rename_setup = nfs3_proc_rename_setup,
973 .rename_rpc_prepare = nfs3_proc_rename_rpc_prepare, 937 .rename_rpc_prepare = nfs3_proc_rename_rpc_prepare,
974 .rename_done = nfs3_proc_rename_done, 938 .rename_done = nfs3_proc_rename_done,
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index a5b27c2d9689..e1d1badbe53c 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -427,6 +427,7 @@ extern void nfs4_close_sync(struct nfs4_state *, fmode_t);
427extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t); 427extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t);
428extern void nfs_inode_find_state_and_recover(struct inode *inode, 428extern void nfs_inode_find_state_and_recover(struct inode *inode,
429 const nfs4_stateid *stateid); 429 const nfs4_stateid *stateid);
430extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *, struct nfs4_state *);
430extern void nfs4_schedule_lease_recovery(struct nfs_client *); 431extern void nfs4_schedule_lease_recovery(struct nfs_client *);
431extern int nfs4_wait_clnt_recover(struct nfs_client *clp); 432extern int nfs4_wait_clnt_recover(struct nfs_client *clp);
432extern int nfs4_client_recover_expired_lease(struct nfs_client *clp); 433extern int nfs4_client_recover_expired_lease(struct nfs_client *clp);
@@ -500,6 +501,16 @@ static inline bool nfs4_stateid_match(const nfs4_stateid *dst, const nfs4_statei
500 return memcmp(dst, src, sizeof(*dst)) == 0; 501 return memcmp(dst, src, sizeof(*dst)) == 0;
501} 502}
502 503
504static inline bool nfs4_stateid_match_other(const nfs4_stateid *dst, const nfs4_stateid *src)
505{
506 return memcmp(dst->other, src->other, NFS4_STATEID_OTHER_SIZE) == 0;
507}
508
509static inline bool nfs4_stateid_is_newer(const nfs4_stateid *s1, const nfs4_stateid *s2)
510{
511 return (s32)(be32_to_cpu(s1->seqid) - be32_to_cpu(s2->seqid)) > 0;
512}
513
503static inline bool nfs4_valid_open_stateid(const struct nfs4_state *state) 514static inline bool nfs4_valid_open_stateid(const struct nfs4_state *state)
504{ 515{
505 return test_bit(NFS_STATE_RECOVERY_FAILED, &state->flags) == 0; 516 return test_bit(NFS_STATE_RECOVERY_FAILED, &state->flags) == 0;
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 0e46d3d1b6cc..aa9ef4876046 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -531,6 +531,13 @@ int nfs40_walk_client_list(struct nfs_client *new,
531 *result = pos; 531 *result = pos;
532 dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n", 532 dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n",
533 __func__, pos, atomic_read(&pos->cl_count)); 533 __func__, pos, atomic_read(&pos->cl_count));
534 goto out;
535 case -ERESTARTSYS:
536 case -ETIMEDOUT:
537 /* The callback path may have been inadvertently
538 * changed. Schedule recovery!
539 */
540 nfs4_schedule_path_down_recovery(pos);
534 default: 541 default:
535 goto out; 542 goto out;
536 } 543 }
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 450bfedbe2f4..397be39c6dc8 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1068,6 +1068,7 @@ static void nfs4_opendata_free(struct kref *kref)
1068 dput(p->dentry); 1068 dput(p->dentry);
1069 nfs_sb_deactive(sb); 1069 nfs_sb_deactive(sb);
1070 nfs_fattr_free_names(&p->f_attr); 1070 nfs_fattr_free_names(&p->f_attr);
1071 kfree(p->f_attr.mdsthreshold);
1071 kfree(p); 1072 kfree(p);
1072} 1073}
1073 1074
@@ -1137,12 +1138,71 @@ static void update_open_stateflags(struct nfs4_state *state, fmode_t fmode)
1137 nfs4_state_set_mode_locked(state, state->state | fmode); 1138 nfs4_state_set_mode_locked(state, state->state | fmode);
1138} 1139}
1139 1140
1140static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode) 1141static void nfs_test_and_clear_all_open_stateid(struct nfs4_state *state)
1142{
1143 struct nfs_client *clp = state->owner->so_server->nfs_client;
1144 bool need_recover = false;
1145
1146 if (test_and_clear_bit(NFS_O_RDONLY_STATE, &state->flags) && state->n_rdonly)
1147 need_recover = true;
1148 if (test_and_clear_bit(NFS_O_WRONLY_STATE, &state->flags) && state->n_wronly)
1149 need_recover = true;
1150 if (test_and_clear_bit(NFS_O_RDWR_STATE, &state->flags) && state->n_rdwr)
1151 need_recover = true;
1152 if (need_recover)
1153 nfs4_state_mark_reclaim_nograce(clp, state);
1154}
1155
1156static bool nfs_need_update_open_stateid(struct nfs4_state *state,
1157 nfs4_stateid *stateid)
1158{
1159 if (test_and_set_bit(NFS_OPEN_STATE, &state->flags) == 0)
1160 return true;
1161 if (!nfs4_stateid_match_other(stateid, &state->open_stateid)) {
1162 nfs_test_and_clear_all_open_stateid(state);
1163 return true;
1164 }
1165 if (nfs4_stateid_is_newer(stateid, &state->open_stateid))
1166 return true;
1167 return false;
1168}
1169
1170static void nfs_clear_open_stateid_locked(struct nfs4_state *state,
1171 nfs4_stateid *stateid, fmode_t fmode)
1141{ 1172{
1173 clear_bit(NFS_O_RDWR_STATE, &state->flags);
1174 switch (fmode & (FMODE_READ|FMODE_WRITE)) {
1175 case FMODE_WRITE:
1176 clear_bit(NFS_O_RDONLY_STATE, &state->flags);
1177 break;
1178 case FMODE_READ:
1179 clear_bit(NFS_O_WRONLY_STATE, &state->flags);
1180 break;
1181 case 0:
1182 clear_bit(NFS_O_RDONLY_STATE, &state->flags);
1183 clear_bit(NFS_O_WRONLY_STATE, &state->flags);
1184 clear_bit(NFS_OPEN_STATE, &state->flags);
1185 }
1186 if (stateid == NULL)
1187 return;
1188 if (!nfs_need_update_open_stateid(state, stateid))
1189 return;
1142 if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) 1190 if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
1143 nfs4_stateid_copy(&state->stateid, stateid); 1191 nfs4_stateid_copy(&state->stateid, stateid);
1144 nfs4_stateid_copy(&state->open_stateid, stateid); 1192 nfs4_stateid_copy(&state->open_stateid, stateid);
1145 set_bit(NFS_OPEN_STATE, &state->flags); 1193}
1194
1195static void nfs_clear_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode)
1196{
1197 write_seqlock(&state->seqlock);
1198 nfs_clear_open_stateid_locked(state, stateid, fmode);
1199 write_sequnlock(&state->seqlock);
1200 if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags))
1201 nfs4_schedule_state_manager(state->owner->so_server->nfs_client);
1202}
1203
1204static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode)
1205{
1146 switch (fmode) { 1206 switch (fmode) {
1147 case FMODE_READ: 1207 case FMODE_READ:
1148 set_bit(NFS_O_RDONLY_STATE, &state->flags); 1208 set_bit(NFS_O_RDONLY_STATE, &state->flags);
@@ -1153,13 +1213,11 @@ static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *
1153 case FMODE_READ|FMODE_WRITE: 1213 case FMODE_READ|FMODE_WRITE:
1154 set_bit(NFS_O_RDWR_STATE, &state->flags); 1214 set_bit(NFS_O_RDWR_STATE, &state->flags);
1155 } 1215 }
1156} 1216 if (!nfs_need_update_open_stateid(state, stateid))
1157 1217 return;
1158static void nfs_set_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode) 1218 if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
1159{ 1219 nfs4_stateid_copy(&state->stateid, stateid);
1160 write_seqlock(&state->seqlock); 1220 nfs4_stateid_copy(&state->open_stateid, stateid);
1161 nfs_set_open_stateid_locked(state, stateid, fmode);
1162 write_sequnlock(&state->seqlock);
1163} 1221}
1164 1222
1165static void __update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, const nfs4_stateid *deleg_stateid, fmode_t fmode) 1223static void __update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, const nfs4_stateid *deleg_stateid, fmode_t fmode)
@@ -1217,6 +1275,8 @@ no_delegation:
1217 __update_open_stateid(state, open_stateid, NULL, fmode); 1275 __update_open_stateid(state, open_stateid, NULL, fmode);
1218 ret = 1; 1276 ret = 1;
1219 } 1277 }
1278 if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags))
1279 nfs4_schedule_state_manager(state->owner->so_server->nfs_client);
1220 1280
1221 return ret; 1281 return ret;
1222} 1282}
@@ -1450,12 +1510,15 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *
1450 struct nfs4_state *newstate; 1510 struct nfs4_state *newstate;
1451 int ret; 1511 int ret;
1452 1512
1513 /* Don't trigger recovery in nfs_test_and_clear_all_open_stateid */
1514 clear_bit(NFS_O_RDWR_STATE, &state->flags);
1515 clear_bit(NFS_O_WRONLY_STATE, &state->flags);
1516 clear_bit(NFS_O_RDONLY_STATE, &state->flags);
1453 /* memory barrier prior to reading state->n_* */ 1517 /* memory barrier prior to reading state->n_* */
1454 clear_bit(NFS_DELEGATED_STATE, &state->flags); 1518 clear_bit(NFS_DELEGATED_STATE, &state->flags);
1455 clear_bit(NFS_OPEN_STATE, &state->flags); 1519 clear_bit(NFS_OPEN_STATE, &state->flags);
1456 smp_rmb(); 1520 smp_rmb();
1457 if (state->n_rdwr != 0) { 1521 if (state->n_rdwr != 0) {
1458 clear_bit(NFS_O_RDWR_STATE, &state->flags);
1459 ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &newstate); 1522 ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &newstate);
1460 if (ret != 0) 1523 if (ret != 0)
1461 return ret; 1524 return ret;
@@ -1463,7 +1526,6 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *
1463 return -ESTALE; 1526 return -ESTALE;
1464 } 1527 }
1465 if (state->n_wronly != 0) { 1528 if (state->n_wronly != 0) {
1466 clear_bit(NFS_O_WRONLY_STATE, &state->flags);
1467 ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &newstate); 1529 ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &newstate);
1468 if (ret != 0) 1530 if (ret != 0)
1469 return ret; 1531 return ret;
@@ -1471,7 +1533,6 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *
1471 return -ESTALE; 1533 return -ESTALE;
1472 } 1534 }
1473 if (state->n_rdonly != 0) { 1535 if (state->n_rdonly != 0) {
1474 clear_bit(NFS_O_RDONLY_STATE, &state->flags);
1475 ret = nfs4_open_recover_helper(opendata, FMODE_READ, &newstate); 1536 ret = nfs4_open_recover_helper(opendata, FMODE_READ, &newstate);
1476 if (ret != 0) 1537 if (ret != 0)
1477 return ret; 1538 return ret;
@@ -2244,10 +2305,12 @@ static int _nfs4_do_open(struct inode *dir,
2244 } 2305 }
2245 } 2306 }
2246 2307
2247 if (ctx_th && server->attr_bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD) { 2308 if (server->attr_bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD) {
2248 opendata->f_attr.mdsthreshold = pnfs_mdsthreshold_alloc(); 2309 if (!opendata->f_attr.mdsthreshold) {
2249 if (!opendata->f_attr.mdsthreshold) 2310 opendata->f_attr.mdsthreshold = pnfs_mdsthreshold_alloc();
2250 goto err_free_label; 2311 if (!opendata->f_attr.mdsthreshold)
2312 goto err_free_label;
2313 }
2251 opendata->o_arg.open_bitmap = &nfs4_pnfs_open_bitmap[0]; 2314 opendata->o_arg.open_bitmap = &nfs4_pnfs_open_bitmap[0];
2252 } 2315 }
2253 if (dentry->d_inode != NULL) 2316 if (dentry->d_inode != NULL)
@@ -2275,11 +2338,10 @@ static int _nfs4_do_open(struct inode *dir,
2275 if (opendata->file_created) 2338 if (opendata->file_created)
2276 *opened |= FILE_CREATED; 2339 *opened |= FILE_CREATED;
2277 2340
2278 if (pnfs_use_threshold(ctx_th, opendata->f_attr.mdsthreshold, server)) 2341 if (pnfs_use_threshold(ctx_th, opendata->f_attr.mdsthreshold, server)) {
2279 *ctx_th = opendata->f_attr.mdsthreshold; 2342 *ctx_th = opendata->f_attr.mdsthreshold;
2280 else 2343 opendata->f_attr.mdsthreshold = NULL;
2281 kfree(opendata->f_attr.mdsthreshold); 2344 }
2282 opendata->f_attr.mdsthreshold = NULL;
2283 2345
2284 nfs4_label_free(olabel); 2346 nfs4_label_free(olabel);
2285 2347
@@ -2289,7 +2351,6 @@ static int _nfs4_do_open(struct inode *dir,
2289err_free_label: 2351err_free_label:
2290 nfs4_label_free(olabel); 2352 nfs4_label_free(olabel);
2291err_opendata_put: 2353err_opendata_put:
2292 kfree(opendata->f_attr.mdsthreshold);
2293 nfs4_opendata_put(opendata); 2354 nfs4_opendata_put(opendata);
2294err_put_state_owner: 2355err_put_state_owner:
2295 nfs4_put_state_owner(sp); 2356 nfs4_put_state_owner(sp);
@@ -2479,26 +2540,6 @@ static void nfs4_free_closedata(void *data)
2479 kfree(calldata); 2540 kfree(calldata);
2480} 2541}
2481 2542
2482static void nfs4_close_clear_stateid_flags(struct nfs4_state *state,
2483 fmode_t fmode)
2484{
2485 spin_lock(&state->owner->so_lock);
2486 clear_bit(NFS_O_RDWR_STATE, &state->flags);
2487 switch (fmode & (FMODE_READ|FMODE_WRITE)) {
2488 case FMODE_WRITE:
2489 clear_bit(NFS_O_RDONLY_STATE, &state->flags);
2490 break;
2491 case FMODE_READ:
2492 clear_bit(NFS_O_WRONLY_STATE, &state->flags);
2493 break;
2494 case 0:
2495 clear_bit(NFS_O_RDONLY_STATE, &state->flags);
2496 clear_bit(NFS_O_WRONLY_STATE, &state->flags);
2497 clear_bit(NFS_OPEN_STATE, &state->flags);
2498 }
2499 spin_unlock(&state->owner->so_lock);
2500}
2501
2502static void nfs4_close_done(struct rpc_task *task, void *data) 2543static void nfs4_close_done(struct rpc_task *task, void *data)
2503{ 2544{
2504 struct nfs4_closedata *calldata = data; 2545 struct nfs4_closedata *calldata = data;
@@ -2517,9 +2558,9 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
2517 if (calldata->roc) 2558 if (calldata->roc)
2518 pnfs_roc_set_barrier(state->inode, 2559 pnfs_roc_set_barrier(state->inode,
2519 calldata->roc_barrier); 2560 calldata->roc_barrier);
2520 nfs_set_open_stateid(state, &calldata->res.stateid, 0); 2561 nfs_clear_open_stateid(state, &calldata->res.stateid, 0);
2521 renew_lease(server, calldata->timestamp); 2562 renew_lease(server, calldata->timestamp);
2522 break; 2563 goto out_release;
2523 case -NFS4ERR_ADMIN_REVOKED: 2564 case -NFS4ERR_ADMIN_REVOKED:
2524 case -NFS4ERR_STALE_STATEID: 2565 case -NFS4ERR_STALE_STATEID:
2525 case -NFS4ERR_OLD_STATEID: 2566 case -NFS4ERR_OLD_STATEID:
@@ -2533,7 +2574,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
2533 goto out_release; 2574 goto out_release;
2534 } 2575 }
2535 } 2576 }
2536 nfs4_close_clear_stateid_flags(state, calldata->arg.fmode); 2577 nfs_clear_open_stateid(state, NULL, calldata->arg.fmode);
2537out_release: 2578out_release:
2538 nfs_release_seqid(calldata->arg.seqid); 2579 nfs_release_seqid(calldata->arg.seqid);
2539 nfs_refresh_inode(calldata->inode, calldata->res.fattr); 2580 nfs_refresh_inode(calldata->inode, calldata->res.fattr);
@@ -3507,49 +3548,6 @@ static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
3507 return 1; 3548 return 1;
3508} 3549}
3509 3550
3510static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
3511 struct inode *new_dir, struct qstr *new_name)
3512{
3513 struct nfs_server *server = NFS_SERVER(old_dir);
3514 struct nfs_renameargs arg = {
3515 .old_dir = NFS_FH(old_dir),
3516 .new_dir = NFS_FH(new_dir),
3517 .old_name = old_name,
3518 .new_name = new_name,
3519 };
3520 struct nfs_renameres res = {
3521 .server = server,
3522 };
3523 struct rpc_message msg = {
3524 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME],
3525 .rpc_argp = &arg,
3526 .rpc_resp = &res,
3527 };
3528 int status = -ENOMEM;
3529
3530 status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
3531 if (!status) {
3532 update_changeattr(old_dir, &res.old_cinfo);
3533 update_changeattr(new_dir, &res.new_cinfo);
3534 }
3535 return status;
3536}
3537
3538static int nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
3539 struct inode *new_dir, struct qstr *new_name)
3540{
3541 struct nfs4_exception exception = { };
3542 int err;
3543 do {
3544 err = _nfs4_proc_rename(old_dir, old_name,
3545 new_dir, new_name);
3546 trace_nfs4_rename(old_dir, old_name, new_dir, new_name, err);
3547 err = nfs4_handle_exception(NFS_SERVER(old_dir), err,
3548 &exception);
3549 } while (exception.retry);
3550 return err;
3551}
3552
3553static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) 3551static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
3554{ 3552{
3555 struct nfs_server *server = NFS_SERVER(inode); 3553 struct nfs_server *server = NFS_SERVER(inode);
@@ -4884,6 +4882,20 @@ nfs4_init_uniform_client_string(const struct nfs_client *clp,
4884 nodename); 4882 nodename);
4885} 4883}
4886 4884
4885/*
4886 * nfs4_callback_up_net() starts only "tcp" and "tcp6" callback
4887 * services. Advertise one based on the address family of the
4888 * clientaddr.
4889 */
4890static unsigned int
4891nfs4_init_callback_netid(const struct nfs_client *clp, char *buf, size_t len)
4892{
4893 if (strchr(clp->cl_ipaddr, ':') != NULL)
4894 return scnprintf(buf, len, "tcp6");
4895 else
4896 return scnprintf(buf, len, "tcp");
4897}
4898
4887/** 4899/**
4888 * nfs4_proc_setclientid - Negotiate client ID 4900 * nfs4_proc_setclientid - Negotiate client ID
4889 * @clp: state data structure 4901 * @clp: state data structure
@@ -4925,12 +4937,10 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
4925 setclientid.sc_name, 4937 setclientid.sc_name,
4926 sizeof(setclientid.sc_name)); 4938 sizeof(setclientid.sc_name));
4927 /* cb_client4 */ 4939 /* cb_client4 */
4928 rcu_read_lock(); 4940 setclientid.sc_netid_len =
4929 setclientid.sc_netid_len = scnprintf(setclientid.sc_netid, 4941 nfs4_init_callback_netid(clp,
4930 sizeof(setclientid.sc_netid), "%s", 4942 setclientid.sc_netid,
4931 rpc_peeraddr2str(clp->cl_rpcclient, 4943 sizeof(setclientid.sc_netid));
4932 RPC_DISPLAY_NETID));
4933 rcu_read_unlock();
4934 setclientid.sc_uaddr_len = scnprintf(setclientid.sc_uaddr, 4944 setclientid.sc_uaddr_len = scnprintf(setclientid.sc_uaddr,
4935 sizeof(setclientid.sc_uaddr), "%s.%u.%u", 4945 sizeof(setclientid.sc_uaddr), "%s.%u.%u",
4936 clp->cl_ipaddr, port >> 8, port & 255); 4946 clp->cl_ipaddr, port >> 8, port & 255);
@@ -8408,7 +8418,6 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
8408 .unlink_setup = nfs4_proc_unlink_setup, 8418 .unlink_setup = nfs4_proc_unlink_setup,
8409 .unlink_rpc_prepare = nfs4_proc_unlink_rpc_prepare, 8419 .unlink_rpc_prepare = nfs4_proc_unlink_rpc_prepare,
8410 .unlink_done = nfs4_proc_unlink_done, 8420 .unlink_done = nfs4_proc_unlink_done,
8411 .rename = nfs4_proc_rename,
8412 .rename_setup = nfs4_proc_rename_setup, 8421 .rename_setup = nfs4_proc_rename_setup,
8413 .rename_rpc_prepare = nfs4_proc_rename_rpc_prepare, 8422 .rename_rpc_prepare = nfs4_proc_rename_rpc_prepare,
8414 .rename_done = nfs4_proc_rename_done, 8423 .rename_done = nfs4_proc_rename_done,
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 0deb32105ccf..2349518eef2c 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1316,7 +1316,7 @@ static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_st
1316 return 1; 1316 return 1;
1317} 1317}
1318 1318
1319static int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state) 1319int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state)
1320{ 1320{
1321 set_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags); 1321 set_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags);
1322 clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags); 1322 clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
@@ -2075,8 +2075,10 @@ again:
2075 switch (status) { 2075 switch (status) {
2076 case 0: 2076 case 0:
2077 break; 2077 break;
2078 case -NFS4ERR_DELAY:
2079 case -ETIMEDOUT: 2078 case -ETIMEDOUT:
2079 if (clnt->cl_softrtry)
2080 break;
2081 case -NFS4ERR_DELAY:
2080 case -EAGAIN: 2082 case -EAGAIN:
2081 ssleep(1); 2083 ssleep(1);
2082 case -NFS4ERR_STALE_CLIENTID: 2084 case -NFS4ERR_STALE_CLIENTID:
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 72f3bf1754ef..73ce8d4fe2c8 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -203,8 +203,7 @@ static int nfs4_stat_to_errno(int);
203 2 + encode_verifier_maxsz + 5 + \ 203 2 + encode_verifier_maxsz + 5 + \
204 nfs4_label_maxsz) 204 nfs4_label_maxsz)
205#define decode_readdir_maxsz (op_decode_hdr_maxsz + \ 205#define decode_readdir_maxsz (op_decode_hdr_maxsz + \
206 decode_verifier_maxsz + \ 206 decode_verifier_maxsz)
207 nfs4_label_maxsz + nfs4_fattr_maxsz)
208#define encode_readlink_maxsz (op_encode_hdr_maxsz) 207#define encode_readlink_maxsz (op_encode_hdr_maxsz)
209#define decode_readlink_maxsz (op_decode_hdr_maxsz + 1) 208#define decode_readlink_maxsz (op_decode_hdr_maxsz + 1)
210#define encode_write_maxsz (op_encode_hdr_maxsz + \ 209#define encode_write_maxsz (op_encode_hdr_maxsz + \
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 4755858e37a0..cb53d450ae32 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -662,7 +662,18 @@ pnfs_destroy_all_layouts(struct nfs_client *clp)
662 */ 662 */
663static bool pnfs_seqid_is_newer(u32 s1, u32 s2) 663static bool pnfs_seqid_is_newer(u32 s1, u32 s2)
664{ 664{
665 return (s32)s1 - (s32)s2 > 0; 665 return (s32)(s1 - s2) > 0;
666}
667
668static void
669pnfs_verify_layout_stateid(struct pnfs_layout_hdr *lo,
670 const nfs4_stateid *new,
671 struct list_head *free_me_list)
672{
673 if (nfs4_stateid_match_other(&lo->plh_stateid, new))
674 return;
675 /* Layout is new! Kill existing layout segments */
676 pnfs_mark_matching_lsegs_invalid(lo, free_me_list, NULL);
666} 677}
667 678
668/* update lo->plh_stateid with new if is more recent */ 679/* update lo->plh_stateid with new if is more recent */
@@ -1315,6 +1326,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
1315 struct nfs4_layoutget_res *res = &lgp->res; 1326 struct nfs4_layoutget_res *res = &lgp->res;
1316 struct pnfs_layout_segment *lseg; 1327 struct pnfs_layout_segment *lseg;
1317 struct inode *ino = lo->plh_inode; 1328 struct inode *ino = lo->plh_inode;
1329 LIST_HEAD(free_me);
1318 int status = 0; 1330 int status = 0;
1319 1331
1320 /* Inject layout blob into I/O device driver */ 1332 /* Inject layout blob into I/O device driver */
@@ -1341,6 +1353,8 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
1341 goto out_forget_reply; 1353 goto out_forget_reply;
1342 } 1354 }
1343 1355
1356 /* Check that the new stateid matches the old stateid */
1357 pnfs_verify_layout_stateid(lo, &res->stateid, &free_me);
1344 /* Done processing layoutget. Set the layout stateid */ 1358 /* Done processing layoutget. Set the layout stateid */
1345 pnfs_set_layout_stateid(lo, &res->stateid, false); 1359 pnfs_set_layout_stateid(lo, &res->stateid, false);
1346 1360
@@ -1355,6 +1369,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
1355 } 1369 }
1356 1370
1357 spin_unlock(&ino->i_lock); 1371 spin_unlock(&ino->i_lock);
1372 pnfs_free_lseg_list(&free_me);
1358 return lseg; 1373 return lseg;
1359out: 1374out:
1360 return ERR_PTR(status); 1375 return ERR_PTR(status);
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index fddbba2d9eff..e55ce9e8b034 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -357,30 +357,6 @@ nfs_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
357} 357}
358 358
359static int 359static int
360nfs_proc_rename(struct inode *old_dir, struct qstr *old_name,
361 struct inode *new_dir, struct qstr *new_name)
362{
363 struct nfs_renameargs arg = {
364 .old_dir = NFS_FH(old_dir),
365 .old_name = old_name,
366 .new_dir = NFS_FH(new_dir),
367 .new_name = new_name,
368 };
369 struct rpc_message msg = {
370 .rpc_proc = &nfs_procedures[NFSPROC_RENAME],
371 .rpc_argp = &arg,
372 };
373 int status;
374
375 dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name);
376 status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0);
377 nfs_mark_for_revalidate(old_dir);
378 nfs_mark_for_revalidate(new_dir);
379 dprintk("NFS reply rename: %d\n", status);
380 return status;
381}
382
383static int
384nfs_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) 360nfs_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
385{ 361{
386 struct nfs_linkargs arg = { 362 struct nfs_linkargs arg = {
@@ -745,7 +721,6 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
745 .unlink_setup = nfs_proc_unlink_setup, 721 .unlink_setup = nfs_proc_unlink_setup,
746 .unlink_rpc_prepare = nfs_proc_unlink_rpc_prepare, 722 .unlink_rpc_prepare = nfs_proc_unlink_rpc_prepare,
747 .unlink_done = nfs_proc_unlink_done, 723 .unlink_done = nfs_proc_unlink_done,
748 .rename = nfs_proc_rename,
749 .rename_setup = nfs_proc_rename_setup, 724 .rename_setup = nfs_proc_rename_setup,
750 .rename_rpc_prepare = nfs_proc_rename_rpc_prepare, 725 .rename_rpc_prepare = nfs_proc_rename_rpc_prepare,
751 .rename_done = nfs_proc_rename_done, 726 .rename_done = nfs_proc_rename_done,
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 11d78944de79..de54129336c6 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -14,6 +14,7 @@
14#include <linux/sched.h> 14#include <linux/sched.h>
15#include <linux/wait.h> 15#include <linux/wait.h>
16#include <linux/namei.h> 16#include <linux/namei.h>
17#include <linux/fsnotify.h>
17 18
18#include "internal.h" 19#include "internal.h"
19#include "nfs4_fs.h" 20#include "nfs4_fs.h"
@@ -353,8 +354,8 @@ static void nfs_async_rename_done(struct rpc_task *task, void *calldata)
353 return; 354 return;
354 } 355 }
355 356
356 if (task->tk_status != 0) 357 if (data->complete)
357 nfs_cancel_async_unlink(old_dentry); 358 data->complete(task, data);
358} 359}
359 360
360/** 361/**
@@ -399,9 +400,10 @@ static const struct rpc_call_ops nfs_rename_ops = {
399 * 400 *
400 * It's expected that valid references to the dentries and inodes are held 401 * It's expected that valid references to the dentries and inodes are held
401 */ 402 */
402static struct rpc_task * 403struct rpc_task *
403nfs_async_rename(struct inode *old_dir, struct inode *new_dir, 404nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
404 struct dentry *old_dentry, struct dentry *new_dentry) 405 struct dentry *old_dentry, struct dentry *new_dentry,
406 void (*complete)(struct rpc_task *, struct nfs_renamedata *))
405{ 407{
406 struct nfs_renamedata *data; 408 struct nfs_renamedata *data;
407 struct rpc_message msg = { }; 409 struct rpc_message msg = { };
@@ -438,6 +440,7 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
438 data->new_dentry = dget(new_dentry); 440 data->new_dentry = dget(new_dentry);
439 nfs_fattr_init(&data->old_fattr); 441 nfs_fattr_init(&data->old_fattr);
440 nfs_fattr_init(&data->new_fattr); 442 nfs_fattr_init(&data->new_fattr);
443 data->complete = complete;
441 444
442 /* set up nfs_renameargs */ 445 /* set up nfs_renameargs */
443 data->args.old_dir = NFS_FH(old_dir); 446 data->args.old_dir = NFS_FH(old_dir);
@@ -456,6 +459,27 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
456 return rpc_run_task(&task_setup_data); 459 return rpc_run_task(&task_setup_data);
457} 460}
458 461
462/*
463 * Perform tasks needed when a sillyrename is done such as cancelling the
464 * queued async unlink if it failed.
465 */
466static void
467nfs_complete_sillyrename(struct rpc_task *task, struct nfs_renamedata *data)
468{
469 struct dentry *dentry = data->old_dentry;
470
471 if (task->tk_status != 0) {
472 nfs_cancel_async_unlink(dentry);
473 return;
474 }
475
476 /*
477 * vfs_unlink and the like do not issue this when a file is
478 * sillyrenamed, so do it here.
479 */
480 fsnotify_nameremove(dentry, 0);
481}
482
459#define SILLYNAME_PREFIX ".nfs" 483#define SILLYNAME_PREFIX ".nfs"
460#define SILLYNAME_PREFIX_LEN ((unsigned)sizeof(SILLYNAME_PREFIX) - 1) 484#define SILLYNAME_PREFIX_LEN ((unsigned)sizeof(SILLYNAME_PREFIX) - 1)
461#define SILLYNAME_FILEID_LEN ((unsigned)sizeof(u64) << 1) 485#define SILLYNAME_FILEID_LEN ((unsigned)sizeof(u64) << 1)
@@ -548,7 +572,8 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
548 } 572 }
549 573
550 /* run the rename task, undo unlink if it fails */ 574 /* run the rename task, undo unlink if it fails */
551 task = nfs_async_rename(dir, dir, dentry, sdentry); 575 task = nfs_async_rename(dir, dir, dentry, sdentry,
576 nfs_complete_sillyrename);
552 if (IS_ERR(task)) { 577 if (IS_ERR(task)) {
553 error = -EBUSY; 578 error = -EBUSY;
554 nfs_cancel_async_unlink(dentry); 579 nfs_cancel_async_unlink(dentry);
diff --git a/fs/nfsd/acl.h b/fs/nfsd/acl.h
index a812fd1b92a4..b481e1f5eecc 100644
--- a/fs/nfsd/acl.h
+++ b/fs/nfsd/acl.h
@@ -39,9 +39,13 @@ struct nfs4_acl;
39struct svc_fh; 39struct svc_fh;
40struct svc_rqst; 40struct svc_rqst;
41 41
42/* Maximum ACL we'll accept from client; chosen (somewhat arbitrarily) to 42/*
43 * fit in a page: */ 43 * Maximum ACL we'll accept from a client; chosen (somewhat
44#define NFS4_ACL_MAX 170 44 * arbitrarily) so that kmalloc'ing the ACL shouldn't require a
45 * high-order allocation. This allows 204 ACEs on x86_64:
46 */
47#define NFS4_ACL_MAX ((PAGE_SIZE - sizeof(struct nfs4_acl)) \
48 / sizeof(struct nfs4_ace))
45 49
46struct nfs4_acl *nfs4_acl_new(int); 50struct nfs4_acl *nfs4_acl_new(int);
47int nfs4_acl_get_whotype(char *, u32); 51int nfs4_acl_get_whotype(char *, u32);
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index d190e33d0ec2..6f3f392d48af 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -542,7 +542,10 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
542 * up setting a 3-element effective posix ACL with all 542 * up setting a 3-element effective posix ACL with all
543 * permissions zero. 543 * permissions zero.
544 */ 544 */
545 nace = 4 + state->users->n + state->groups->n; 545 if (!state->users->n && !state->groups->n)
546 nace = 3;
547 else /* Note we also include a MASK ACE in this case: */
548 nace = 4 + state->users->n + state->groups->n;
546 pacl = posix_acl_alloc(nace, GFP_KERNEL); 549 pacl = posix_acl_alloc(nace, GFP_KERNEL);
547 if (!pacl) 550 if (!pacl)
548 return ERR_PTR(-ENOMEM); 551 return ERR_PTR(-ENOMEM);
@@ -586,9 +589,11 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
586 add_to_mask(state, &state->groups->aces[i].perms); 589 add_to_mask(state, &state->groups->aces[i].perms);
587 } 590 }
588 591
589 pace++; 592 if (!state->users->n && !state->groups->n) {
590 pace->e_tag = ACL_MASK; 593 pace++;
591 low_mode_from_nfs4(state->mask.allow, &pace->e_perm, flags); 594 pace->e_tag = ACL_MASK;
595 low_mode_from_nfs4(state->mask.allow, &pace->e_perm, flags);
596 }
592 597
593 pace++; 598 pace++;
594 pace->e_tag = ACL_OTHER; 599 pace->e_tag = ACL_OTHER;
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 7f05cd140de3..39c8ef875f91 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -32,6 +32,7 @@
32 */ 32 */
33 33
34#include <linux/sunrpc/clnt.h> 34#include <linux/sunrpc/clnt.h>
35#include <linux/sunrpc/xprt.h>
35#include <linux/sunrpc/svc_xprt.h> 36#include <linux/sunrpc/svc_xprt.h>
36#include <linux/slab.h> 37#include <linux/slab.h>
37#include "nfsd.h" 38#include "nfsd.h"
@@ -635,6 +636,22 @@ static struct rpc_cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc
635 } 636 }
636} 637}
637 638
639static struct rpc_clnt *create_backchannel_client(struct rpc_create_args *args)
640{
641 struct rpc_xprt *xprt;
642
643 if (args->protocol != XPRT_TRANSPORT_BC_TCP)
644 return rpc_create(args);
645
646 xprt = args->bc_xprt->xpt_bc_xprt;
647 if (xprt) {
648 xprt_get(xprt);
649 return rpc_create_xprt(args, xprt);
650 }
651
652 return rpc_create(args);
653}
654
638static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn, struct nfsd4_session *ses) 655static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn, struct nfsd4_session *ses)
639{ 656{
640 struct rpc_timeout timeparms = { 657 struct rpc_timeout timeparms = {
@@ -674,7 +691,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
674 args.authflavor = ses->se_cb_sec.flavor; 691 args.authflavor = ses->se_cb_sec.flavor;
675 } 692 }
676 /* Create RPC client */ 693 /* Create RPC client */
677 client = rpc_create(&args); 694 client = create_backchannel_client(&args);
678 if (IS_ERR(client)) { 695 if (IS_ERR(client)) {
679 dprintk("NFSD: couldn't create callback client: %ld\n", 696 dprintk("NFSD: couldn't create callback client: %ld\n",
680 PTR_ERR(client)); 697 PTR_ERR(client));
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 82189b208af3..d543222babf3 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1273,6 +1273,8 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
1273 struct nfsd4_op *op; 1273 struct nfsd4_op *op;
1274 struct nfsd4_operation *opdesc; 1274 struct nfsd4_operation *opdesc;
1275 struct nfsd4_compound_state *cstate = &resp->cstate; 1275 struct nfsd4_compound_state *cstate = &resp->cstate;
1276 struct svc_fh *current_fh = &cstate->current_fh;
1277 struct svc_fh *save_fh = &cstate->save_fh;
1276 int slack_bytes; 1278 int slack_bytes;
1277 u32 plen = 0; 1279 u32 plen = 0;
1278 __be32 status; 1280 __be32 status;
@@ -1288,11 +1290,11 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
1288 resp->tag = args->tag; 1290 resp->tag = args->tag;
1289 resp->opcnt = 0; 1291 resp->opcnt = 0;
1290 resp->rqstp = rqstp; 1292 resp->rqstp = rqstp;
1291 resp->cstate.minorversion = args->minorversion; 1293 cstate->minorversion = args->minorversion;
1292 resp->cstate.replay_owner = NULL; 1294 cstate->replay_owner = NULL;
1293 resp->cstate.session = NULL; 1295 cstate->session = NULL;
1294 fh_init(&resp->cstate.current_fh, NFS4_FHSIZE); 1296 fh_init(current_fh, NFS4_FHSIZE);
1295 fh_init(&resp->cstate.save_fh, NFS4_FHSIZE); 1297 fh_init(save_fh, NFS4_FHSIZE);
1296 /* 1298 /*
1297 * Don't use the deferral mechanism for NFSv4; compounds make it 1299 * Don't use the deferral mechanism for NFSv4; compounds make it
1298 * too hard to avoid non-idempotency problems. 1300 * too hard to avoid non-idempotency problems.
@@ -1345,20 +1347,28 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
1345 1347
1346 opdesc = OPDESC(op); 1348 opdesc = OPDESC(op);
1347 1349
1348 if (!cstate->current_fh.fh_dentry) { 1350 if (!current_fh->fh_dentry) {
1349 if (!(opdesc->op_flags & ALLOWED_WITHOUT_FH)) { 1351 if (!(opdesc->op_flags & ALLOWED_WITHOUT_FH)) {
1350 op->status = nfserr_nofilehandle; 1352 op->status = nfserr_nofilehandle;
1351 goto encode_op; 1353 goto encode_op;
1352 } 1354 }
1353 } else if (cstate->current_fh.fh_export->ex_fslocs.migrated && 1355 } else if (current_fh->fh_export->ex_fslocs.migrated &&
1354 !(opdesc->op_flags & ALLOWED_ON_ABSENT_FS)) { 1356 !(opdesc->op_flags & ALLOWED_ON_ABSENT_FS)) {
1355 op->status = nfserr_moved; 1357 op->status = nfserr_moved;
1356 goto encode_op; 1358 goto encode_op;
1357 } 1359 }
1358 1360
1361 fh_clear_wcc(current_fh);
1362
1359 /* If op is non-idempotent */ 1363 /* If op is non-idempotent */
1360 if (opdesc->op_flags & OP_MODIFIES_SOMETHING) { 1364 if (opdesc->op_flags & OP_MODIFIES_SOMETHING) {
1361 plen = opdesc->op_rsize_bop(rqstp, op); 1365 plen = opdesc->op_rsize_bop(rqstp, op);
1366 /*
1367 * If there's still another operation, make sure
1368 * we'll have space to at least encode an error:
1369 */
1370 if (resp->opcnt < args->opcnt)
1371 plen += COMPOUND_ERR_SLACK_SPACE;
1362 op->status = nfsd4_check_resp_size(resp, plen); 1372 op->status = nfsd4_check_resp_size(resp, plen);
1363 } 1373 }
1364 1374
@@ -1377,12 +1387,12 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
1377 clear_current_stateid(cstate); 1387 clear_current_stateid(cstate);
1378 1388
1379 if (need_wrongsec_check(rqstp)) 1389 if (need_wrongsec_check(rqstp))
1380 op->status = check_nfsd_access(cstate->current_fh.fh_export, rqstp); 1390 op->status = check_nfsd_access(current_fh->fh_export, rqstp);
1381 } 1391 }
1382 1392
1383encode_op: 1393encode_op:
1384 /* Only from SEQUENCE */ 1394 /* Only from SEQUENCE */
1385 if (resp->cstate.status == nfserr_replay_cache) { 1395 if (cstate->status == nfserr_replay_cache) {
1386 dprintk("%s NFS4.1 replay from cache\n", __func__); 1396 dprintk("%s NFS4.1 replay from cache\n", __func__);
1387 status = op->status; 1397 status = op->status;
1388 goto out; 1398 goto out;
@@ -1411,10 +1421,10 @@ encode_op:
1411 nfsd4_increment_op_stats(op->opnum); 1421 nfsd4_increment_op_stats(op->opnum);
1412 } 1422 }
1413 1423
1414 resp->cstate.status = status; 1424 cstate->status = status;
1415 fh_put(&resp->cstate.current_fh); 1425 fh_put(current_fh);
1416 fh_put(&resp->cstate.save_fh); 1426 fh_put(save_fh);
1417 BUG_ON(resp->cstate.replay_owner); 1427 BUG_ON(cstate->replay_owner);
1418out: 1428out:
1419 /* Reset deferral mechanism for RPC deferrals */ 1429 /* Reset deferral mechanism for RPC deferrals */
1420 rqstp->rq_usedeferral = 1; 1430 rqstp->rq_usedeferral = 1;
@@ -1523,7 +1533,8 @@ static inline u32 nfsd4_setattr_rsize(struct svc_rqst *rqstp, struct nfsd4_op *o
1523 1533
1524static inline u32 nfsd4_setclientid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) 1534static inline u32 nfsd4_setclientid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
1525{ 1535{
1526 return (op_encode_hdr_size + 2 + 1024) * sizeof(__be32); 1536 return (op_encode_hdr_size + 2 + XDR_QUADLEN(NFS4_VERIFIER_SIZE)) *
1537 sizeof(__be32);
1527} 1538}
1528 1539
1529static inline u32 nfsd4_write_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) 1540static inline u32 nfsd4_write_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index d5d070fbeb35..3ba65979a3cd 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1538,7 +1538,7 @@ out_err:
1538} 1538}
1539 1539
1540/* 1540/*
1541 * Cache a reply. nfsd4_check_drc_limit() has bounded the cache size. 1541 * Cache a reply. nfsd4_check_resp_size() has bounded the cache size.
1542 */ 1542 */
1543void 1543void
1544nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) 1544nfsd4_store_cache_entry(struct nfsd4_compoundres *resp)
@@ -1596,7 +1596,7 @@ nfsd4_enc_sequence_replay(struct nfsd4_compoundargs *args,
1596 * The sequence operation is not cached because we can use the slot and 1596 * The sequence operation is not cached because we can use the slot and
1597 * session values. 1597 * session values.
1598 */ 1598 */
1599__be32 1599static __be32
1600nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp, 1600nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
1601 struct nfsd4_sequence *seq) 1601 struct nfsd4_sequence *seq)
1602{ 1602{
@@ -1605,9 +1605,8 @@ nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
1605 1605
1606 dprintk("--> %s slot %p\n", __func__, slot); 1606 dprintk("--> %s slot %p\n", __func__, slot);
1607 1607
1608 /* Either returns 0 or nfserr_retry_uncached */
1609 status = nfsd4_enc_sequence_replay(resp->rqstp->rq_argp, resp); 1608 status = nfsd4_enc_sequence_replay(resp->rqstp->rq_argp, resp);
1610 if (status == nfserr_retry_uncached_rep) 1609 if (status)
1611 return status; 1610 return status;
1612 1611
1613 /* The sequence operation has been encoded, cstate->datap set. */ 1612 /* The sequence operation has been encoded, cstate->datap set. */
@@ -2287,7 +2286,8 @@ out:
2287 if (!list_empty(&clp->cl_revoked)) 2286 if (!list_empty(&clp->cl_revoked))
2288 seq->status_flags |= SEQ4_STATUS_RECALLABLE_STATE_REVOKED; 2287 seq->status_flags |= SEQ4_STATUS_RECALLABLE_STATE_REVOKED;
2289out_no_session: 2288out_no_session:
2290 kfree(conn); 2289 if (conn)
2290 free_conn(conn);
2291 spin_unlock(&nn->client_lock); 2291 spin_unlock(&nn->client_lock);
2292 return status; 2292 return status;
2293out_put_session: 2293out_put_session:
@@ -3627,8 +3627,11 @@ static __be32 nfsd4_lookup_stateid(stateid_t *stateid, unsigned char typemask,
3627 return nfserr_bad_stateid; 3627 return nfserr_bad_stateid;
3628 status = lookup_clientid(&stateid->si_opaque.so_clid, sessions, 3628 status = lookup_clientid(&stateid->si_opaque.so_clid, sessions,
3629 nn, &cl); 3629 nn, &cl);
3630 if (status == nfserr_stale_clientid) 3630 if (status == nfserr_stale_clientid) {
3631 if (sessions)
3632 return nfserr_bad_stateid;
3631 return nfserr_stale_stateid; 3633 return nfserr_stale_stateid;
3634 }
3632 if (status) 3635 if (status)
3633 return status; 3636 return status;
3634 *s = find_stateid_by_type(cl, stateid, typemask); 3637 *s = find_stateid_by_type(cl, stateid, typemask);
@@ -5062,7 +5065,6 @@ nfs4_state_destroy_net(struct net *net)
5062 int i; 5065 int i;
5063 struct nfs4_client *clp = NULL; 5066 struct nfs4_client *clp = NULL;
5064 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 5067 struct nfsd_net *nn = net_generic(net, nfsd_net_id);
5065 struct rb_node *node, *tmp;
5066 5068
5067 for (i = 0; i < CLIENT_HASH_SIZE; i++) { 5069 for (i = 0; i < CLIENT_HASH_SIZE; i++) {
5068 while (!list_empty(&nn->conf_id_hashtbl[i])) { 5070 while (!list_empty(&nn->conf_id_hashtbl[i])) {
@@ -5071,13 +5073,11 @@ nfs4_state_destroy_net(struct net *net)
5071 } 5073 }
5072 } 5074 }
5073 5075
5074 node = rb_first(&nn->unconf_name_tree); 5076 for (i = 0; i < CLIENT_HASH_SIZE; i++) {
5075 while (node != NULL) { 5077 while (!list_empty(&nn->unconf_id_hashtbl[i])) {
5076 tmp = node; 5078 clp = list_entry(nn->unconf_id_hashtbl[i].next, struct nfs4_client, cl_idhash);
5077 node = rb_next(tmp); 5079 destroy_client(clp);
5078 clp = rb_entry(tmp, struct nfs4_client, cl_namenode); 5080 }
5079 rb_erase(tmp, &nn->unconf_name_tree);
5080 destroy_client(clp);
5081 } 5081 }
5082 5082
5083 kfree(nn->sessionid_hashtbl); 5083 kfree(nn->sessionid_hashtbl);
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 63f2395c57ed..2723c1badd01 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -294,7 +294,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
294 READ32(nace); 294 READ32(nace);
295 295
296 if (nace > NFS4_ACL_MAX) 296 if (nace > NFS4_ACL_MAX)
297 return nfserr_resource; 297 return nfserr_fbig;
298 298
299 *acl = nfs4_acl_new(nace); 299 *acl = nfs4_acl_new(nace);
300 if (*acl == NULL) 300 if (*acl == NULL)
@@ -1222,7 +1222,6 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
1222 } 1222 }
1223 write->wr_head.iov_base = p; 1223 write->wr_head.iov_base = p;
1224 write->wr_head.iov_len = avail; 1224 write->wr_head.iov_len = avail;
1225 WARN_ON(avail != (XDR_QUADLEN(avail) << 2));
1226 write->wr_pagelist = argp->pagelist; 1225 write->wr_pagelist = argp->pagelist;
1227 1226
1228 len = XDR_QUADLEN(write->wr_buflen) << 2; 1227 len = XDR_QUADLEN(write->wr_buflen) << 2;
@@ -2483,6 +2482,8 @@ out_acl:
2483 goto out; 2482 goto out;
2484 } 2483 }
2485 if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) { 2484 if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) {
2485 if ((buflen -= 16) < 0)
2486 goto out_resource;
2486 WRITE32(3); 2487 WRITE32(3);
2487 WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD0); 2488 WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD0);
2488 WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD1); 2489 WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD1);
@@ -2499,8 +2500,10 @@ out:
2499 security_release_secctx(context, contextlen); 2500 security_release_secctx(context, contextlen);
2500#endif /* CONFIG_NFSD_V4_SECURITY_LABEL */ 2501#endif /* CONFIG_NFSD_V4_SECURITY_LABEL */
2501 kfree(acl); 2502 kfree(acl);
2502 if (tempfh) 2503 if (tempfh) {
2503 fh_put(tempfh); 2504 fh_put(tempfh);
2505 kfree(tempfh);
2506 }
2504 return status; 2507 return status;
2505out_nfserr: 2508out_nfserr:
2506 status = nfserrno(err); 2509 status = nfserrno(err);
@@ -3471,6 +3474,9 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr,
3471 struct nfsd4_test_stateid_id *stateid, *next; 3474 struct nfsd4_test_stateid_id *stateid, *next;
3472 __be32 *p; 3475 __be32 *p;
3473 3476
3477 if (nfserr)
3478 return nfserr;
3479
3474 RESERVE_SPACE(4 + (4 * test_stateid->ts_num_ids)); 3480 RESERVE_SPACE(4 + (4 * test_stateid->ts_num_ids));
3475 *p++ = htonl(test_stateid->ts_num_ids); 3481 *p++ = htonl(test_stateid->ts_num_ids);
3476 3482
@@ -3579,8 +3585,6 @@ __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 pad)
3579 return 0; 3585 return 0;
3580 3586
3581 session = resp->cstate.session; 3587 session = resp->cstate.session;
3582 if (session == NULL)
3583 return 0;
3584 3588
3585 if (xb->page_len == 0) { 3589 if (xb->page_len == 0) {
3586 length = (char *)resp->p - (char *)xb->head[0].iov_base + pad; 3590 length = (char *)resp->p - (char *)xb->head[0].iov_base + pad;
@@ -3620,9 +3624,17 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
3620 BUG_ON(op->opnum < 0 || op->opnum >= ARRAY_SIZE(nfsd4_enc_ops) || 3624 BUG_ON(op->opnum < 0 || op->opnum >= ARRAY_SIZE(nfsd4_enc_ops) ||
3621 !nfsd4_enc_ops[op->opnum]); 3625 !nfsd4_enc_ops[op->opnum]);
3622 op->status = nfsd4_enc_ops[op->opnum](resp, op->status, &op->u); 3626 op->status = nfsd4_enc_ops[op->opnum](resp, op->status, &op->u);
3623 /* nfsd4_check_drc_limit guarantees enough room for error status */ 3627 /* nfsd4_check_resp_size guarantees enough room for error status */
3624 if (!op->status) 3628 if (!op->status)
3625 op->status = nfsd4_check_resp_size(resp, 0); 3629 op->status = nfsd4_check_resp_size(resp, 0);
3630 if (op->status == nfserr_resource && nfsd4_has_session(&resp->cstate)) {
3631 struct nfsd4_slot *slot = resp->cstate.slot;
3632
3633 if (slot->sl_flags & NFSD4_SLOT_CACHETHIS)
3634 op->status = nfserr_rep_too_big_to_cache;
3635 else
3636 op->status = nfserr_rep_too_big;
3637 }
3626 if (so) { 3638 if (so) {
3627 so->so_replay.rp_status = op->status; 3639 so->so_replay.rp_status = op->status;
3628 so->so_replay.rp_buflen = (char *)resp->p - (char *)(statp+1); 3640 so->so_replay.rp_buflen = (char *)resp->p - (char *)(statp+1);
@@ -3691,6 +3703,12 @@ int nfsd4_release_compoundargs(void *rq, __be32 *p, void *resp)
3691int 3703int
3692nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compoundargs *args) 3704nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compoundargs *args)
3693{ 3705{
3706 if (rqstp->rq_arg.head[0].iov_len % 4) {
3707 /* client is nuts */
3708 dprintk("%s: compound not properly padded! (peeraddr=%pISc xid=0x%x)",
3709 __func__, svc_addr(rqstp), be32_to_cpu(rqstp->rq_xid));
3710 return 0;
3711 }
3694 args->p = p; 3712 args->p = p;
3695 args->end = rqstp->rq_arg.head[0].iov_base + rqstp->rq_arg.head[0].iov_len; 3713 args->end = rqstp->rq_arg.head[0].iov_base + rqstp->rq_arg.head[0].iov_len;
3696 args->pagelist = rqstp->rq_arg.pages; 3714 args->pagelist = rqstp->rq_arg.pages;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 7f555179bf81..f34d9de802ab 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -699,6 +699,11 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net)
699 if (err != 0 || fd < 0) 699 if (err != 0 || fd < 0)
700 return -EINVAL; 700 return -EINVAL;
701 701
702 if (svc_alien_sock(net, fd)) {
703 printk(KERN_ERR "%s: socket net is different to NFSd's one\n", __func__);
704 return -EINVAL;
705 }
706
702 err = nfsd_create_serv(net); 707 err = nfsd_create_serv(net);
703 if (err != 0) 708 if (err != 0)
704 return err; 709 return err;
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 30f34ab02137..479eb681c27c 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -282,7 +282,7 @@ void nfsd_lockd_shutdown(void);
282 * reason. 282 * reason.
283 */ 283 */
284#define COMPOUND_SLACK_SPACE 140 /* OP_GETFH */ 284#define COMPOUND_SLACK_SPACE 140 /* OP_GETFH */
285#define COMPOUND_ERR_SLACK_SPACE 12 /* OP_SETATTR */ 285#define COMPOUND_ERR_SLACK_SPACE 16 /* OP_SETATTR */
286 286
287#define NFSD_LAUNDROMAT_MINTIMEOUT 1 /* seconds */ 287#define NFSD_LAUNDROMAT_MINTIMEOUT 1 /* seconds */
288 288
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
index 4775bc4896c8..ad67964d0bb1 100644
--- a/fs/nfsd/nfsfh.h
+++ b/fs/nfsd/nfsfh.h
@@ -133,6 +133,17 @@ fh_init(struct svc_fh *fhp, int maxsize)
133 133
134#ifdef CONFIG_NFSD_V3 134#ifdef CONFIG_NFSD_V3
135/* 135/*
136 * The wcc data stored in current_fh should be cleared
137 * between compound ops.
138 */
139static inline void
140fh_clear_wcc(struct svc_fh *fhp)
141{
142 fhp->fh_post_saved = 0;
143 fhp->fh_pre_saved = 0;
144}
145
146/*
136 * Fill in the pre_op attr for the wcc data 147 * Fill in the pre_op attr for the wcc data
137 */ 148 */
138static inline void 149static inline void
@@ -152,7 +163,8 @@ fill_pre_wcc(struct svc_fh *fhp)
152 163
153extern void fill_post_wcc(struct svc_fh *); 164extern void fill_post_wcc(struct svc_fh *);
154#else 165#else
155#define fill_pre_wcc(ignored) 166#define fh_clear_wcc(ignored)
167#define fill_pre_wcc(ignored)
156#define fill_post_wcc(notused) 168#define fill_post_wcc(notused)
157#endif /* CONFIG_NFSD_V3 */ 169#endif /* CONFIG_NFSD_V3 */
158 170
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index b17d93214d01..9c769a47ac5a 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -152,7 +152,7 @@ encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp,
152 type = (stat->mode & S_IFMT); 152 type = (stat->mode & S_IFMT);
153 153
154 *p++ = htonl(nfs_ftypes[type >> 12]); 154 *p++ = htonl(nfs_ftypes[type >> 12]);
155 *p++ = htonl((u32) (stat->mode & S_IALLUGO)); 155 *p++ = htonl((u32) stat->mode);
156 *p++ = htonl((u32) stat->nlink); 156 *p++ = htonl((u32) stat->nlink);
157 *p++ = htonl((u32) from_kuid(&init_user_ns, stat->uid)); 157 *p++ = htonl((u32) from_kuid(&init_user_ns, stat->uid));
158 *p++ = htonl((u32) from_kgid(&init_user_ns, stat->gid)); 158 *p++ = htonl((u32) from_kgid(&init_user_ns, stat->gid));
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 915808b36df7..16f0673a423c 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -404,6 +404,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
404 umode_t ftype = 0; 404 umode_t ftype = 0;
405 __be32 err; 405 __be32 err;
406 int host_err; 406 int host_err;
407 bool get_write_count;
407 int size_change = 0; 408 int size_change = 0;
408 409
409 if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE)) 410 if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
@@ -411,10 +412,18 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
411 if (iap->ia_valid & ATTR_SIZE) 412 if (iap->ia_valid & ATTR_SIZE)
412 ftype = S_IFREG; 413 ftype = S_IFREG;
413 414
415 /* Callers that do fh_verify should do the fh_want_write: */
416 get_write_count = !fhp->fh_dentry;
417
414 /* Get inode */ 418 /* Get inode */
415 err = fh_verify(rqstp, fhp, ftype, accmode); 419 err = fh_verify(rqstp, fhp, ftype, accmode);
416 if (err) 420 if (err)
417 goto out; 421 goto out;
422 if (get_write_count) {
423 host_err = fh_want_write(fhp);
424 if (host_err)
425 return nfserrno(host_err);
426 }
418 427
419 dentry = fhp->fh_dentry; 428 dentry = fhp->fh_dentry;
420 inode = dentry->d_inode; 429 inode = dentry->d_inode;
@@ -1706,10 +1715,10 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
1706 dput(odentry); 1715 dput(odentry);
1707 out_nfserr: 1716 out_nfserr:
1708 err = nfserrno(host_err); 1717 err = nfserrno(host_err);
1709 1718 /*
1710 /* we cannot reply on fh_unlock on the two filehandles, 1719 * We cannot rely on fh_unlock on the two filehandles,
1711 * as that would do the wrong thing if the two directories 1720 * as that would do the wrong thing if the two directories
1712 * were the same, so again we do it by hand 1721 * were the same, so again we do it by hand.
1713 */ 1722 */
1714 fill_post_wcc(ffhp); 1723 fill_post_wcc(ffhp);
1715 fill_post_wcc(tfhp); 1724 fill_post_wcc(tfhp);
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index d278a0d03496..5ea7df305083 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -574,8 +574,6 @@ extern __be32 nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
574 struct nfsd4_compound_state *, 574 struct nfsd4_compound_state *,
575 struct nfsd4_setclientid_confirm *setclientid_confirm); 575 struct nfsd4_setclientid_confirm *setclientid_confirm);
576extern void nfsd4_store_cache_entry(struct nfsd4_compoundres *resp); 576extern void nfsd4_store_cache_entry(struct nfsd4_compoundres *resp);
577extern __be32 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
578 struct nfsd4_sequence *seq);
579extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp, 577extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp,
580 struct nfsd4_compound_state *, struct nfsd4_exchange_id *); 578 struct nfsd4_compound_state *, struct nfsd4_exchange_id *);
581extern __be32 nfsd4_backchannel_ctl(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_backchannel_ctl *); 579extern __be32 nfsd4_backchannel_ctl(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_backchannel_ctl *);
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index 08fdb77852ac..f3a82fbcae02 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -134,6 +134,7 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
134 134
135static const struct vm_operations_struct nilfs_file_vm_ops = { 135static const struct vm_operations_struct nilfs_file_vm_ops = {
136 .fault = filemap_fault, 136 .fault = filemap_fault,
137 .map_pages = filemap_map_pages,
137 .page_mkwrite = nilfs_page_mkwrite, 138 .page_mkwrite = nilfs_page_mkwrite,
138 .remap_pages = generic_file_remap_pages, 139 .remap_pages = generic_file_remap_pages,
139}; 140};
diff --git a/fs/ntfs/debug.c b/fs/ntfs/debug.c
index 807150e2c2b9..dd6103cc93c1 100644
--- a/fs/ntfs/debug.c
+++ b/fs/ntfs/debug.c
@@ -18,16 +18,9 @@
18 * distribution in the file COPYING); if not, write to the Free Software 18 * distribution in the file COPYING); if not, write to the Free Software
19 * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 19 * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */ 20 */
21 21#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
22#include "debug.h" 22#include "debug.h"
23 23
24/*
25 * A static buffer to hold the error string being displayed and a spinlock
26 * to protect concurrent accesses to it.
27 */
28static char err_buf[1024];
29static DEFINE_SPINLOCK(err_buf_lock);
30
31/** 24/**
32 * __ntfs_warning - output a warning to the syslog 25 * __ntfs_warning - output a warning to the syslog
33 * @function: name of function outputting the warning 26 * @function: name of function outputting the warning
@@ -50,6 +43,7 @@ static DEFINE_SPINLOCK(err_buf_lock);
50void __ntfs_warning(const char *function, const struct super_block *sb, 43void __ntfs_warning(const char *function, const struct super_block *sb,
51 const char *fmt, ...) 44 const char *fmt, ...)
52{ 45{
46 struct va_format vaf;
53 va_list args; 47 va_list args;
54 int flen = 0; 48 int flen = 0;
55 49
@@ -59,17 +53,15 @@ void __ntfs_warning(const char *function, const struct super_block *sb,
59#endif 53#endif
60 if (function) 54 if (function)
61 flen = strlen(function); 55 flen = strlen(function);
62 spin_lock(&err_buf_lock);
63 va_start(args, fmt); 56 va_start(args, fmt);
64 vsnprintf(err_buf, sizeof(err_buf), fmt, args); 57 vaf.fmt = fmt;
65 va_end(args); 58 vaf.va = &args;
66 if (sb) 59 if (sb)
67 printk(KERN_ERR "NTFS-fs warning (device %s): %s(): %s\n", 60 pr_warn("(device %s): %s(): %pV\n",
68 sb->s_id, flen ? function : "", err_buf); 61 sb->s_id, flen ? function : "", &vaf);
69 else 62 else
70 printk(KERN_ERR "NTFS-fs warning: %s(): %s\n", 63 pr_warn("%s(): %pV\n", flen ? function : "", &vaf);
71 flen ? function : "", err_buf); 64 va_end(args);
72 spin_unlock(&err_buf_lock);
73} 65}
74 66
75/** 67/**
@@ -94,6 +86,7 @@ void __ntfs_warning(const char *function, const struct super_block *sb,
94void __ntfs_error(const char *function, const struct super_block *sb, 86void __ntfs_error(const char *function, const struct super_block *sb,
95 const char *fmt, ...) 87 const char *fmt, ...)
96{ 88{
89 struct va_format vaf;
97 va_list args; 90 va_list args;
98 int flen = 0; 91 int flen = 0;
99 92
@@ -103,17 +96,15 @@ void __ntfs_error(const char *function, const struct super_block *sb,
103#endif 96#endif
104 if (function) 97 if (function)
105 flen = strlen(function); 98 flen = strlen(function);
106 spin_lock(&err_buf_lock);
107 va_start(args, fmt); 99 va_start(args, fmt);
108 vsnprintf(err_buf, sizeof(err_buf), fmt, args); 100 vaf.fmt = fmt;
109 va_end(args); 101 vaf.va = &args;
110 if (sb) 102 if (sb)
111 printk(KERN_ERR "NTFS-fs error (device %s): %s(): %s\n", 103 pr_err("(device %s): %s(): %pV\n",
112 sb->s_id, flen ? function : "", err_buf); 104 sb->s_id, flen ? function : "", &vaf);
113 else 105 else
114 printk(KERN_ERR "NTFS-fs error: %s(): %s\n", 106 pr_err("%s(): %pV\n", flen ? function : "", &vaf);
115 flen ? function : "", err_buf); 107 va_end(args);
116 spin_unlock(&err_buf_lock);
117} 108}
118 109
119#ifdef DEBUG 110#ifdef DEBUG
@@ -124,6 +115,7 @@ int debug_msgs = 0;
124void __ntfs_debug (const char *file, int line, const char *function, 115void __ntfs_debug (const char *file, int line, const char *function,
125 const char *fmt, ...) 116 const char *fmt, ...)
126{ 117{
118 struct va_format vaf;
127 va_list args; 119 va_list args;
128 int flen = 0; 120 int flen = 0;
129 121
@@ -131,13 +123,11 @@ void __ntfs_debug (const char *file, int line, const char *function,
131 return; 123 return;
132 if (function) 124 if (function)
133 flen = strlen(function); 125 flen = strlen(function);
134 spin_lock(&err_buf_lock);
135 va_start(args, fmt); 126 va_start(args, fmt);
136 vsnprintf(err_buf, sizeof(err_buf), fmt, args); 127 vaf.fmt = fmt;
128 vaf.va = &args;
129 pr_debug("(%s, %d): %s(): %pV", file, line, flen ? function : "", &vaf);
137 va_end(args); 130 va_end(args);
138 printk(KERN_DEBUG "NTFS-fs DEBUG (%s, %d): %s(): %s\n", file, line,
139 flen ? function : "", err_buf);
140 spin_unlock(&err_buf_lock);
141} 131}
142 132
143/* Dump a runlist. Caller has to provide synchronisation for @rl. */ 133/* Dump a runlist. Caller has to provide synchronisation for @rl. */
@@ -149,12 +139,12 @@ void ntfs_debug_dump_runlist(const runlist_element *rl)
149 139
150 if (!debug_msgs) 140 if (!debug_msgs)
151 return; 141 return;
152 printk(KERN_DEBUG "NTFS-fs DEBUG: Dumping runlist (values in hex):\n"); 142 pr_debug("Dumping runlist (values in hex):\n");
153 if (!rl) { 143 if (!rl) {
154 printk(KERN_DEBUG "Run list not present.\n"); 144 pr_debug("Run list not present.\n");
155 return; 145 return;
156 } 146 }
157 printk(KERN_DEBUG "VCN LCN Run length\n"); 147 pr_debug("VCN LCN Run length\n");
158 for (i = 0; ; i++) { 148 for (i = 0; ; i++) {
159 LCN lcn = (rl + i)->lcn; 149 LCN lcn = (rl + i)->lcn;
160 150
@@ -163,13 +153,13 @@ void ntfs_debug_dump_runlist(const runlist_element *rl)
163 153
164 if (index > -LCN_ENOENT - 1) 154 if (index > -LCN_ENOENT - 1)
165 index = 3; 155 index = 3;
166 printk(KERN_DEBUG "%-16Lx %s %-16Lx%s\n", 156 pr_debug("%-16Lx %s %-16Lx%s\n",
167 (long long)(rl + i)->vcn, lcn_str[index], 157 (long long)(rl + i)->vcn, lcn_str[index],
168 (long long)(rl + i)->length, 158 (long long)(rl + i)->length,
169 (rl + i)->length ? "" : 159 (rl + i)->length ? "" :
170 " (runlist end)"); 160 " (runlist end)");
171 } else 161 } else
172 printk(KERN_DEBUG "%-16Lx %-16Lx %-16Lx%s\n", 162 pr_debug("%-16Lx %-16Lx %-16Lx%s\n",
173 (long long)(rl + i)->vcn, 163 (long long)(rl + i)->vcn,
174 (long long)(rl + i)->lcn, 164 (long long)(rl + i)->lcn,
175 (long long)(rl + i)->length, 165 (long long)(rl + i)->length,
diff --git a/fs/ntfs/debug.h b/fs/ntfs/debug.h
index 53c27eaf2307..61bf091e32a8 100644
--- a/fs/ntfs/debug.h
+++ b/fs/ntfs/debug.h
@@ -48,7 +48,12 @@ extern void ntfs_debug_dump_runlist(const runlist_element *rl);
48 48
49#else /* !DEBUG */ 49#else /* !DEBUG */
50 50
51#define ntfs_debug(f, a...) do {} while (0) 51#define ntfs_debug(fmt, ...) \
52do { \
53 if (0) \
54 no_printk(fmt, ##__VA_ARGS__); \
55} while (0)
56
52#define ntfs_debug_dump_runlist(rl) do {} while (0) 57#define ntfs_debug_dump_runlist(rl) do {} while (0)
53 58
54#endif /* !DEBUG */ 59#endif /* !DEBUG */
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index bd5610d48242..9de2491f2926 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -19,6 +19,7 @@
19 * distribution in the file COPYING); if not, write to the Free Software 19 * distribution in the file COPYING); if not, write to the Free Software
20 * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 20 * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 */ 21 */
22#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
22 23
23#include <linux/stddef.h> 24#include <linux/stddef.h>
24#include <linux/init.h> 25#include <linux/init.h>
@@ -1896,7 +1897,7 @@ get_ctx_vol_failed:
1896 vol->minor_ver = vi->minor_ver; 1897 vol->minor_ver = vi->minor_ver;
1897 ntfs_attr_put_search_ctx(ctx); 1898 ntfs_attr_put_search_ctx(ctx);
1898 unmap_mft_record(NTFS_I(vol->vol_ino)); 1899 unmap_mft_record(NTFS_I(vol->vol_ino));
1899 printk(KERN_INFO "NTFS volume version %i.%i.\n", vol->major_ver, 1900 pr_info("volume version %i.%i.\n", vol->major_ver,
1900 vol->minor_ver); 1901 vol->minor_ver);
1901 if (vol->major_ver < 3 && NVolSparseEnabled(vol)) { 1902 if (vol->major_ver < 3 && NVolSparseEnabled(vol)) {
1902 ntfs_warning(vol->sb, "Disabling sparse support due to NTFS " 1903 ntfs_warning(vol->sb, "Disabling sparse support due to NTFS "
@@ -3095,7 +3096,7 @@ static int __init init_ntfs_fs(void)
3095 int err = 0; 3096 int err = 0;
3096 3097
3097 /* This may be ugly but it results in pretty output so who cares. (-8 */ 3098 /* This may be ugly but it results in pretty output so who cares. (-8 */
3098 printk(KERN_INFO "NTFS driver " NTFS_VERSION " [Flags: R/" 3099 pr_info("driver " NTFS_VERSION " [Flags: R/"
3099#ifdef NTFS_RW 3100#ifdef NTFS_RW
3100 "W" 3101 "W"
3101#else 3102#else
@@ -3115,16 +3116,15 @@ static int __init init_ntfs_fs(void)
3115 sizeof(ntfs_index_context), 0 /* offset */, 3116 sizeof(ntfs_index_context), 0 /* offset */,
3116 SLAB_HWCACHE_ALIGN, NULL /* ctor */); 3117 SLAB_HWCACHE_ALIGN, NULL /* ctor */);
3117 if (!ntfs_index_ctx_cache) { 3118 if (!ntfs_index_ctx_cache) {
3118 printk(KERN_CRIT "NTFS: Failed to create %s!\n", 3119 pr_crit("Failed to create %s!\n", ntfs_index_ctx_cache_name);
3119 ntfs_index_ctx_cache_name);
3120 goto ictx_err_out; 3120 goto ictx_err_out;
3121 } 3121 }
3122 ntfs_attr_ctx_cache = kmem_cache_create(ntfs_attr_ctx_cache_name, 3122 ntfs_attr_ctx_cache = kmem_cache_create(ntfs_attr_ctx_cache_name,
3123 sizeof(ntfs_attr_search_ctx), 0 /* offset */, 3123 sizeof(ntfs_attr_search_ctx), 0 /* offset */,
3124 SLAB_HWCACHE_ALIGN, NULL /* ctor */); 3124 SLAB_HWCACHE_ALIGN, NULL /* ctor */);
3125 if (!ntfs_attr_ctx_cache) { 3125 if (!ntfs_attr_ctx_cache) {
3126 printk(KERN_CRIT "NTFS: Failed to create %s!\n", 3126 pr_crit("NTFS: Failed to create %s!\n",
3127 ntfs_attr_ctx_cache_name); 3127 ntfs_attr_ctx_cache_name);
3128 goto actx_err_out; 3128 goto actx_err_out;
3129 } 3129 }
3130 3130
@@ -3132,8 +3132,7 @@ static int __init init_ntfs_fs(void)
3132 (NTFS_MAX_NAME_LEN+1) * sizeof(ntfschar), 0, 3132 (NTFS_MAX_NAME_LEN+1) * sizeof(ntfschar), 0,
3133 SLAB_HWCACHE_ALIGN, NULL); 3133 SLAB_HWCACHE_ALIGN, NULL);
3134 if (!ntfs_name_cache) { 3134 if (!ntfs_name_cache) {
3135 printk(KERN_CRIT "NTFS: Failed to create %s!\n", 3135 pr_crit("Failed to create %s!\n", ntfs_name_cache_name);
3136 ntfs_name_cache_name);
3137 goto name_err_out; 3136 goto name_err_out;
3138 } 3137 }
3139 3138
@@ -3141,8 +3140,7 @@ static int __init init_ntfs_fs(void)
3141 sizeof(ntfs_inode), 0, 3140 sizeof(ntfs_inode), 0,
3142 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL); 3141 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL);
3143 if (!ntfs_inode_cache) { 3142 if (!ntfs_inode_cache) {
3144 printk(KERN_CRIT "NTFS: Failed to create %s!\n", 3143 pr_crit("Failed to create %s!\n", ntfs_inode_cache_name);
3145 ntfs_inode_cache_name);
3146 goto inode_err_out; 3144 goto inode_err_out;
3147 } 3145 }
3148 3146
@@ -3151,15 +3149,14 @@ static int __init init_ntfs_fs(void)
3151 SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, 3149 SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
3152 ntfs_big_inode_init_once); 3150 ntfs_big_inode_init_once);
3153 if (!ntfs_big_inode_cache) { 3151 if (!ntfs_big_inode_cache) {
3154 printk(KERN_CRIT "NTFS: Failed to create %s!\n", 3152 pr_crit("Failed to create %s!\n", ntfs_big_inode_cache_name);
3155 ntfs_big_inode_cache_name);
3156 goto big_inode_err_out; 3153 goto big_inode_err_out;
3157 } 3154 }
3158 3155
3159 /* Register the ntfs sysctls. */ 3156 /* Register the ntfs sysctls. */
3160 err = ntfs_sysctl(1); 3157 err = ntfs_sysctl(1);
3161 if (err) { 3158 if (err) {
3162 printk(KERN_CRIT "NTFS: Failed to register NTFS sysctls!\n"); 3159 pr_crit("Failed to register NTFS sysctls!\n");
3163 goto sysctl_err_out; 3160 goto sysctl_err_out;
3164 } 3161 }
3165 3162
@@ -3168,7 +3165,7 @@ static int __init init_ntfs_fs(void)
3168 ntfs_debug("NTFS driver registered successfully."); 3165 ntfs_debug("NTFS driver registered successfully.");
3169 return 0; /* Success! */ 3166 return 0; /* Success! */
3170 } 3167 }
3171 printk(KERN_CRIT "NTFS: Failed to register NTFS filesystem driver!\n"); 3168 pr_crit("Failed to register NTFS filesystem driver!\n");
3172 3169
3173 /* Unregister the ntfs sysctls. */ 3170 /* Unregister the ntfs sysctls. */
3174 ntfs_sysctl(0); 3171 ntfs_sysctl(0);
@@ -3184,8 +3181,7 @@ actx_err_out:
3184 kmem_cache_destroy(ntfs_index_ctx_cache); 3181 kmem_cache_destroy(ntfs_index_ctx_cache);
3185ictx_err_out: 3182ictx_err_out:
3186 if (!err) { 3183 if (!err) {
3187 printk(KERN_CRIT "NTFS: Aborting NTFS filesystem driver " 3184 pr_crit("Aborting NTFS filesystem driver registration...\n");
3188 "registration...\n");
3189 err = -ENOMEM; 3185 err = -ENOMEM;
3190 } 3186 }
3191 return err; 3187 return err;
diff --git a/fs/ocfs2/cluster/sys.c b/fs/ocfs2/cluster/sys.c
index a4b07730b2e1..b7f57271d49c 100644
--- a/fs/ocfs2/cluster/sys.c
+++ b/fs/ocfs2/cluster/sys.c
@@ -41,7 +41,7 @@ static ssize_t version_show(struct kobject *kobj, struct kobj_attribute *attr,
41 return snprintf(buf, PAGE_SIZE, "%u\n", O2NM_API_VERSION); 41 return snprintf(buf, PAGE_SIZE, "%u\n", O2NM_API_VERSION);
42} 42}
43static struct kobj_attribute attr_version = 43static struct kobj_attribute attr_version =
44 __ATTR(interface_revision, S_IFREG | S_IRUGO, version_show, NULL); 44 __ATTR(interface_revision, S_IRUGO, version_show, NULL);
45 45
46static struct attribute *o2cb_attrs[] = { 46static struct attribute *o2cb_attrs[] = {
47 &attr_version.attr, 47 &attr_version.attr,
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index 5c8343fe7438..83f1a665ae97 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -496,7 +496,7 @@ static ssize_t ocfs2_max_locking_protocol_show(struct kobject *kobj,
496} 496}
497 497
498static struct kobj_attribute ocfs2_attr_max_locking_protocol = 498static struct kobj_attribute ocfs2_attr_max_locking_protocol =
499 __ATTR(max_locking_protocol, S_IFREG | S_IRUGO, 499 __ATTR(max_locking_protocol, S_IRUGO,
500 ocfs2_max_locking_protocol_show, NULL); 500 ocfs2_max_locking_protocol_show, NULL);
501 501
502static ssize_t ocfs2_loaded_cluster_plugins_show(struct kobject *kobj, 502static ssize_t ocfs2_loaded_cluster_plugins_show(struct kobject *kobj,
@@ -528,7 +528,7 @@ static ssize_t ocfs2_loaded_cluster_plugins_show(struct kobject *kobj,
528} 528}
529 529
530static struct kobj_attribute ocfs2_attr_loaded_cluster_plugins = 530static struct kobj_attribute ocfs2_attr_loaded_cluster_plugins =
531 __ATTR(loaded_cluster_plugins, S_IFREG | S_IRUGO, 531 __ATTR(loaded_cluster_plugins, S_IRUGO,
532 ocfs2_loaded_cluster_plugins_show, NULL); 532 ocfs2_loaded_cluster_plugins_show, NULL);
533 533
534static ssize_t ocfs2_active_cluster_plugin_show(struct kobject *kobj, 534static ssize_t ocfs2_active_cluster_plugin_show(struct kobject *kobj,
@@ -550,7 +550,7 @@ static ssize_t ocfs2_active_cluster_plugin_show(struct kobject *kobj,
550} 550}
551 551
552static struct kobj_attribute ocfs2_attr_active_cluster_plugin = 552static struct kobj_attribute ocfs2_attr_active_cluster_plugin =
553 __ATTR(active_cluster_plugin, S_IFREG | S_IRUGO, 553 __ATTR(active_cluster_plugin, S_IRUGO,
554 ocfs2_active_cluster_plugin_show, NULL); 554 ocfs2_active_cluster_plugin_show, NULL);
555 555
556static ssize_t ocfs2_cluster_stack_show(struct kobject *kobj, 556static ssize_t ocfs2_cluster_stack_show(struct kobject *kobj,
@@ -599,7 +599,7 @@ static ssize_t ocfs2_cluster_stack_store(struct kobject *kobj,
599 599
600 600
601static struct kobj_attribute ocfs2_attr_cluster_stack = 601static struct kobj_attribute ocfs2_attr_cluster_stack =
602 __ATTR(cluster_stack, S_IFREG | S_IRUGO | S_IWUSR, 602 __ATTR(cluster_stack, S_IRUGO | S_IWUSR,
603 ocfs2_cluster_stack_show, 603 ocfs2_cluster_stack_show,
604 ocfs2_cluster_stack_store); 604 ocfs2_cluster_stack_store);
605 605
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 656e401794de..64db2bceac59 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -138,8 +138,8 @@ static const char * const task_state_array[] = {
138 "D (disk sleep)", /* 2 */ 138 "D (disk sleep)", /* 2 */
139 "T (stopped)", /* 4 */ 139 "T (stopped)", /* 4 */
140 "t (tracing stop)", /* 8 */ 140 "t (tracing stop)", /* 8 */
141 "Z (zombie)", /* 16 */ 141 "X (dead)", /* 16 */
142 "X (dead)", /* 32 */ 142 "Z (zombie)", /* 32 */
143}; 143};
144 144
145static inline const char *get_task_state(struct task_struct *tsk) 145static inline const char *get_task_state(struct task_struct *tsk)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index b9760628e1fd..6b7087e2e8fb 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1236,6 +1236,9 @@ static ssize_t proc_fault_inject_write(struct file * file,
1236 make_it_fail = simple_strtol(strstrip(buffer), &end, 0); 1236 make_it_fail = simple_strtol(strstrip(buffer), &end, 0);
1237 if (*end) 1237 if (*end)
1238 return -EINVAL; 1238 return -EINVAL;
1239 if (make_it_fail < 0 || make_it_fail > 1)
1240 return -EINVAL;
1241
1239 task = get_proc_task(file_inode(file)); 1242 task = get_proc_task(file_inode(file));
1240 if (!task) 1243 if (!task)
1241 return -ESRCH; 1244 return -ESRCH;
@@ -2588,7 +2591,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2588 REG("environ", S_IRUSR, proc_environ_operations), 2591 REG("environ", S_IRUSR, proc_environ_operations),
2589 INF("auxv", S_IRUSR, proc_pid_auxv), 2592 INF("auxv", S_IRUSR, proc_pid_auxv),
2590 ONE("status", S_IRUGO, proc_pid_status), 2593 ONE("status", S_IRUGO, proc_pid_status),
2591 ONE("personality", S_IRUGO, proc_pid_personality), 2594 ONE("personality", S_IRUSR, proc_pid_personality),
2592 INF("limits", S_IRUGO, proc_pid_limits), 2595 INF("limits", S_IRUGO, proc_pid_limits),
2593#ifdef CONFIG_SCHED_DEBUG 2596#ifdef CONFIG_SCHED_DEBUG
2594 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), 2597 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
@@ -2598,7 +2601,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2598#endif 2601#endif
2599 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), 2602 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
2600#ifdef CONFIG_HAVE_ARCH_TRACEHOOK 2603#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
2601 INF("syscall", S_IRUGO, proc_pid_syscall), 2604 INF("syscall", S_IRUSR, proc_pid_syscall),
2602#endif 2605#endif
2603 INF("cmdline", S_IRUGO, proc_pid_cmdline), 2606 INF("cmdline", S_IRUGO, proc_pid_cmdline),
2604 ONE("stat", S_IRUGO, proc_tgid_stat), 2607 ONE("stat", S_IRUGO, proc_tgid_stat),
@@ -2617,7 +2620,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2617#ifdef CONFIG_PROC_PAGE_MONITOR 2620#ifdef CONFIG_PROC_PAGE_MONITOR
2618 REG("clear_refs", S_IWUSR, proc_clear_refs_operations), 2621 REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
2619 REG("smaps", S_IRUGO, proc_pid_smaps_operations), 2622 REG("smaps", S_IRUGO, proc_pid_smaps_operations),
2620 REG("pagemap", S_IRUGO, proc_pagemap_operations), 2623 REG("pagemap", S_IRUSR, proc_pagemap_operations),
2621#endif 2624#endif
2622#ifdef CONFIG_SECURITY 2625#ifdef CONFIG_SECURITY
2623 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), 2626 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
@@ -2626,7 +2629,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2626 INF("wchan", S_IRUGO, proc_pid_wchan), 2629 INF("wchan", S_IRUGO, proc_pid_wchan),
2627#endif 2630#endif
2628#ifdef CONFIG_STACKTRACE 2631#ifdef CONFIG_STACKTRACE
2629 ONE("stack", S_IRUGO, proc_pid_stack), 2632 ONE("stack", S_IRUSR, proc_pid_stack),
2630#endif 2633#endif
2631#ifdef CONFIG_SCHEDSTATS 2634#ifdef CONFIG_SCHEDSTATS
2632 INF("schedstat", S_IRUGO, proc_pid_schedstat), 2635 INF("schedstat", S_IRUGO, proc_pid_schedstat),
@@ -2927,14 +2930,14 @@ static const struct pid_entry tid_base_stuff[] = {
2927 REG("environ", S_IRUSR, proc_environ_operations), 2930 REG("environ", S_IRUSR, proc_environ_operations),
2928 INF("auxv", S_IRUSR, proc_pid_auxv), 2931 INF("auxv", S_IRUSR, proc_pid_auxv),
2929 ONE("status", S_IRUGO, proc_pid_status), 2932 ONE("status", S_IRUGO, proc_pid_status),
2930 ONE("personality", S_IRUGO, proc_pid_personality), 2933 ONE("personality", S_IRUSR, proc_pid_personality),
2931 INF("limits", S_IRUGO, proc_pid_limits), 2934 INF("limits", S_IRUGO, proc_pid_limits),
2932#ifdef CONFIG_SCHED_DEBUG 2935#ifdef CONFIG_SCHED_DEBUG
2933 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), 2936 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
2934#endif 2937#endif
2935 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), 2938 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
2936#ifdef CONFIG_HAVE_ARCH_TRACEHOOK 2939#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
2937 INF("syscall", S_IRUGO, proc_pid_syscall), 2940 INF("syscall", S_IRUSR, proc_pid_syscall),
2938#endif 2941#endif
2939 INF("cmdline", S_IRUGO, proc_pid_cmdline), 2942 INF("cmdline", S_IRUGO, proc_pid_cmdline),
2940 ONE("stat", S_IRUGO, proc_tid_stat), 2943 ONE("stat", S_IRUGO, proc_tid_stat),
@@ -2955,7 +2958,7 @@ static const struct pid_entry tid_base_stuff[] = {
2955#ifdef CONFIG_PROC_PAGE_MONITOR 2958#ifdef CONFIG_PROC_PAGE_MONITOR
2956 REG("clear_refs", S_IWUSR, proc_clear_refs_operations), 2959 REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
2957 REG("smaps", S_IRUGO, proc_tid_smaps_operations), 2960 REG("smaps", S_IRUGO, proc_tid_smaps_operations),
2958 REG("pagemap", S_IRUGO, proc_pagemap_operations), 2961 REG("pagemap", S_IRUSR, proc_pagemap_operations),
2959#endif 2962#endif
2960#ifdef CONFIG_SECURITY 2963#ifdef CONFIG_SECURITY
2961 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), 2964 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
@@ -2964,7 +2967,7 @@ static const struct pid_entry tid_base_stuff[] = {
2964 INF("wchan", S_IRUGO, proc_pid_wchan), 2967 INF("wchan", S_IRUGO, proc_pid_wchan),
2965#endif 2968#endif
2966#ifdef CONFIG_STACKTRACE 2969#ifdef CONFIG_STACKTRACE
2967 ONE("stack", S_IRUGO, proc_pid_stack), 2970 ONE("stack", S_IRUSR, proc_pid_stack),
2968#endif 2971#endif
2969#ifdef CONFIG_SCHEDSTATS 2972#ifdef CONFIG_SCHEDSTATS
2970 INF("schedstat", S_IRUGO, proc_pid_schedstat), 2973 INF("schedstat", S_IRUGO, proc_pid_schedstat),
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 985ea881b5bc..0788d093f5d8 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -11,6 +11,7 @@
11 11
12#include <linux/proc_fs.h> 12#include <linux/proc_fs.h>
13 13
14#include "../mount.h"
14#include "internal.h" 15#include "internal.h"
15#include "fd.h" 16#include "fd.h"
16 17
@@ -48,8 +49,9 @@ static int seq_show(struct seq_file *m, void *v)
48 } 49 }
49 50
50 if (!ret) { 51 if (!ret) {
51 seq_printf(m, "pos:\t%lli\nflags:\t0%o\n", 52 seq_printf(m, "pos:\t%lli\nflags:\t0%o\nmnt_id:\t%i\n",
52 (long long)file->f_pos, f_flags); 53 (long long)file->f_pos, f_flags,
54 real_mount(file->f_path.mnt)->mnt_id);
53 if (file->f_op->show_fdinfo) 55 if (file->f_op->show_fdinfo)
54 ret = file->f_op->show_fdinfo(m, file); 56 ret = file->f_op->show_fdinfo(m, file);
55 fput(file); 57 fput(file);
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 8f20e3404fd2..0adbc02d60e3 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -47,7 +47,7 @@ static void proc_evict_inode(struct inode *inode)
47 pde_put(de); 47 pde_put(de);
48 head = PROC_I(inode)->sysctl; 48 head = PROC_I(inode)->sysctl;
49 if (head) { 49 if (head) {
50 rcu_assign_pointer(PROC_I(inode)->sysctl, NULL); 50 RCU_INIT_POINTER(PROC_I(inode)->sysctl, NULL);
51 sysctl_head_put(head); 51 sysctl_head_put(head);
52 } 52 }
53 /* Release any associated namespace */ 53 /* Release any associated namespace */
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 136e548d9567..7445af0b1aa3 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -73,7 +73,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
73 available += pagecache; 73 available += pagecache;
74 74
75 /* 75 /*
76 * Part of the reclaimable swap consists of items that are in use, 76 * Part of the reclaimable slab consists of items that are in use,
77 * and cannot be freed. Cap this estimate at the low watermark. 77 * and cannot be freed. Cap this estimate at the low watermark.
78 */ 78 */
79 available += global_page_state(NR_SLAB_RECLAIMABLE) - 79 available += global_page_state(NR_SLAB_RECLAIMABLE) -
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index fb52b548080d..442177b1119a 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1,4 +1,5 @@
1#include <linux/mm.h> 1#include <linux/mm.h>
2#include <linux/vmacache.h>
2#include <linux/hugetlb.h> 3#include <linux/hugetlb.h>
3#include <linux/huge_mm.h> 4#include <linux/huge_mm.h>
4#include <linux/mount.h> 5#include <linux/mount.h>
@@ -152,7 +153,7 @@ static void *m_start(struct seq_file *m, loff_t *pos)
152 153
153 /* 154 /*
154 * We remember last_addr rather than next_addr to hit with 155 * We remember last_addr rather than next_addr to hit with
155 * mmap_cache most of the time. We have zero last_addr at 156 * vmacache most of the time. We have zero last_addr at
156 * the beginning and also after lseek. We will have -1 last_addr 157 * the beginning and also after lseek. We will have -1 last_addr
157 * after the end of the vmas. 158 * after the end of the vmas.
158 */ 159 */
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 88d4585b30f1..6a8e785b29da 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -484,7 +484,6 @@ static int __init update_note_header_size_elf64(const Elf64_Ehdr *ehdr_ptr)
484 phdr_ptr->p_memsz = real_sz; 484 phdr_ptr->p_memsz = real_sz;
485 if (real_sz == 0) { 485 if (real_sz == 0) {
486 pr_warn("Warning: Zero PT_NOTE entries found\n"); 486 pr_warn("Warning: Zero PT_NOTE entries found\n");
487 return -EINVAL;
488 } 487 }
489 } 488 }
490 489
@@ -671,7 +670,6 @@ static int __init update_note_header_size_elf32(const Elf32_Ehdr *ehdr_ptr)
671 phdr_ptr->p_memsz = real_sz; 670 phdr_ptr->p_memsz = real_sz;
672 if (real_sz == 0) { 671 if (real_sz == 0) {
673 pr_warn("Warning: Zero PT_NOTE entries found\n"); 672 pr_warn("Warning: Zero PT_NOTE entries found\n");
674 return -EINVAL;
675 } 673 }
676 } 674 }
677 675
@@ -1118,4 +1116,3 @@ void vmcore_cleanup(void)
1118 } 1116 }
1119 free_elfcorebuf(); 1117 free_elfcorebuf();
1120} 1118}
1121EXPORT_SYMBOL_GPL(vmcore_cleanup);
diff --git a/fs/quota/Kconfig b/fs/quota/Kconfig
index 880fd9884366..c51df1dd237e 100644
--- a/fs/quota/Kconfig
+++ b/fs/quota/Kconfig
@@ -8,9 +8,10 @@ config QUOTA
8 help 8 help
9 If you say Y here, you will be able to set per user limits for disk 9 If you say Y here, you will be able to set per user limits for disk
10 usage (also called disk quotas). Currently, it works for the 10 usage (also called disk quotas). Currently, it works for the
11 ext2, ext3, and reiserfs file system. ext3 also supports journalled 11 ext2, ext3, ext4, jfs, ocfs2 and reiserfs file systems.
12 quotas for which you don't need to run quotacheck(8) after an unclean 12 Note that gfs2 and xfs use their own quota system.
13 shutdown. 13 Ext3, ext4 and reiserfs also support journaled quotas for which
14 you don't need to run quotacheck(8) after an unclean shutdown.
14 For further details, read the Quota mini-HOWTO, available from 15 For further details, read the Quota mini-HOWTO, available from
15 <http://www.tldp.org/docs.html#howto>, or the documentation provided 16 <http://www.tldp.org/docs.html#howto>, or the documentation provided
16 with the quota tools. Probably the quota support is only useful for 17 with the quota tools. Probably the quota support is only useful for
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index 1fd2051109a3..af677353a3f5 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -125,6 +125,7 @@ int reiserfs_readdir_inode(struct inode *inode, struct dir_context *ctx)
125 int d_reclen; 125 int d_reclen;
126 char *d_name; 126 char *d_name;
127 ino_t d_ino; 127 ino_t d_ino;
128 loff_t cur_pos = deh_offset(deh);
128 129
129 if (!de_visible(deh)) 130 if (!de_visible(deh))
130 /* it is hidden entry */ 131 /* it is hidden entry */
@@ -196,8 +197,9 @@ int reiserfs_readdir_inode(struct inode *inode, struct dir_context *ctx)
196 if (local_buf != small_buf) { 197 if (local_buf != small_buf) {
197 kfree(local_buf); 198 kfree(local_buf);
198 } 199 }
199 // next entry should be looked for with such offset 200
200 next_pos = deh_offset(deh) + 1; 201 /* deh_offset(deh) may be invalid now. */
202 next_pos = cur_pos + 1;
201 203
202 if (item_moved(&tmp_ih, &path_to_entry)) { 204 if (item_moved(&tmp_ih, &path_to_entry)) {
203 set_cpu_key_k_offset(&pos_key, 205 set_cpu_key_k_offset(&pos_key,
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 123c79b7261e..4f34dbae823d 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1538,6 +1538,7 @@ out_unlock:
1538 1538
1539static const struct vm_operations_struct ubifs_file_vm_ops = { 1539static const struct vm_operations_struct ubifs_file_vm_ops = {
1540 .fault = filemap_fault, 1540 .fault = filemap_fault,
1541 .map_pages = filemap_map_pages,
1541 .page_mkwrite = ubifs_vm_page_mkwrite, 1542 .page_mkwrite = ubifs_vm_page_mkwrite,
1542 .remap_pages = generic_file_remap_pages, 1543 .remap_pages = generic_file_remap_pages,
1543}; 1544};
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 64f2b7334d08..3286db047a40 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -175,7 +175,7 @@ static void init_once(void *foo)
175 inode_init_once(&ei->vfs_inode); 175 inode_init_once(&ei->vfs_inode);
176} 176}
177 177
178static int init_inodecache(void) 178static int __init init_inodecache(void)
179{ 179{
180 udf_inode_cachep = kmem_cache_create("udf_inode_cache", 180 udf_inode_cachep = kmem_cache_create("udf_inode_cache",
181 sizeof(struct udf_inode_info), 181 sizeof(struct udf_inode_info),
@@ -505,6 +505,7 @@ static int udf_parse_options(char *options, struct udf_options *uopt,
505 while ((p = strsep(&options, ",")) != NULL) { 505 while ((p = strsep(&options, ",")) != NULL) {
506 substring_t args[MAX_OPT_ARGS]; 506 substring_t args[MAX_OPT_ARGS];
507 int token; 507 int token;
508 unsigned n;
508 if (!*p) 509 if (!*p)
509 continue; 510 continue;
510 511
@@ -516,7 +517,10 @@ static int udf_parse_options(char *options, struct udf_options *uopt,
516 case Opt_bs: 517 case Opt_bs:
517 if (match_int(&args[0], &option)) 518 if (match_int(&args[0], &option))
518 return 0; 519 return 0;
519 uopt->blocksize = option; 520 n = option;
521 if (n != 512 && n != 1024 && n != 2048 && n != 4096)
522 return 0;
523 uopt->blocksize = n;
520 uopt->flags |= (1 << UDF_FLAG_BLOCKSIZE_SET); 524 uopt->flags |= (1 << UDF_FLAG_BLOCKSIZE_SET);
521 break; 525 break;
522 case Opt_unhide: 526 case Opt_unhide:
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index a7ea492ae660..0ab1de4b39a5 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -38,7 +38,6 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count)
38{ 38{
39 struct super_block * sb; 39 struct super_block * sb;
40 struct ufs_sb_private_info * uspi; 40 struct ufs_sb_private_info * uspi;
41 struct ufs_super_block_first * usb1;
42 struct ufs_cg_private_info * ucpi; 41 struct ufs_cg_private_info * ucpi;
43 struct ufs_cylinder_group * ucg; 42 struct ufs_cylinder_group * ucg;
44 unsigned cgno, bit, end_bit, bbase, blkmap, i; 43 unsigned cgno, bit, end_bit, bbase, blkmap, i;
@@ -46,7 +45,6 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count)
46 45
47 sb = inode->i_sb; 46 sb = inode->i_sb;
48 uspi = UFS_SB(sb)->s_uspi; 47 uspi = UFS_SB(sb)->s_uspi;
49 usb1 = ubh_get_usb_first(uspi);
50 48
51 UFSD("ENTER, fragment %llu, count %u\n", 49 UFSD("ENTER, fragment %llu, count %u\n",
52 (unsigned long long)fragment, count); 50 (unsigned long long)fragment, count);
@@ -135,7 +133,6 @@ void ufs_free_blocks(struct inode *inode, u64 fragment, unsigned count)
135{ 133{
136 struct super_block * sb; 134 struct super_block * sb;
137 struct ufs_sb_private_info * uspi; 135 struct ufs_sb_private_info * uspi;
138 struct ufs_super_block_first * usb1;
139 struct ufs_cg_private_info * ucpi; 136 struct ufs_cg_private_info * ucpi;
140 struct ufs_cylinder_group * ucg; 137 struct ufs_cylinder_group * ucg;
141 unsigned overflow, cgno, bit, end_bit, i; 138 unsigned overflow, cgno, bit, end_bit, i;
@@ -143,7 +140,6 @@ void ufs_free_blocks(struct inode *inode, u64 fragment, unsigned count)
143 140
144 sb = inode->i_sb; 141 sb = inode->i_sb;
145 uspi = UFS_SB(sb)->s_uspi; 142 uspi = UFS_SB(sb)->s_uspi;
146 usb1 = ubh_get_usb_first(uspi);
147 143
148 UFSD("ENTER, fragment %llu, count %u\n", 144 UFSD("ENTER, fragment %llu, count %u\n",
149 (unsigned long long)fragment, count); 145 (unsigned long long)fragment, count);
@@ -499,7 +495,6 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment,
499{ 495{
500 struct super_block * sb; 496 struct super_block * sb;
501 struct ufs_sb_private_info * uspi; 497 struct ufs_sb_private_info * uspi;
502 struct ufs_super_block_first * usb1;
503 struct ufs_cg_private_info * ucpi; 498 struct ufs_cg_private_info * ucpi;
504 struct ufs_cylinder_group * ucg; 499 struct ufs_cylinder_group * ucg;
505 unsigned cgno, fragno, fragoff, count, fragsize, i; 500 unsigned cgno, fragno, fragoff, count, fragsize, i;
@@ -509,7 +504,6 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment,
509 504
510 sb = inode->i_sb; 505 sb = inode->i_sb;
511 uspi = UFS_SB(sb)->s_uspi; 506 uspi = UFS_SB(sb)->s_uspi;
512 usb1 = ubh_get_usb_first (uspi);
513 count = newcount - oldcount; 507 count = newcount - oldcount;
514 508
515 cgno = ufs_dtog(uspi, fragment); 509 cgno = ufs_dtog(uspi, fragment);
@@ -577,7 +571,6 @@ static u64 ufs_alloc_fragments(struct inode *inode, unsigned cgno,
577{ 571{
578 struct super_block * sb; 572 struct super_block * sb;
579 struct ufs_sb_private_info * uspi; 573 struct ufs_sb_private_info * uspi;
580 struct ufs_super_block_first * usb1;
581 struct ufs_cg_private_info * ucpi; 574 struct ufs_cg_private_info * ucpi;
582 struct ufs_cylinder_group * ucg; 575 struct ufs_cylinder_group * ucg;
583 unsigned oldcg, i, j, k, allocsize; 576 unsigned oldcg, i, j, k, allocsize;
@@ -588,7 +581,6 @@ static u64 ufs_alloc_fragments(struct inode *inode, unsigned cgno,
588 581
589 sb = inode->i_sb; 582 sb = inode->i_sb;
590 uspi = UFS_SB(sb)->s_uspi; 583 uspi = UFS_SB(sb)->s_uspi;
591 usb1 = ubh_get_usb_first(uspi);
592 oldcg = cgno; 584 oldcg = cgno;
593 585
594 /* 586 /*
@@ -690,7 +682,6 @@ static u64 ufs_alloccg_block(struct inode *inode,
690{ 682{
691 struct super_block * sb; 683 struct super_block * sb;
692 struct ufs_sb_private_info * uspi; 684 struct ufs_sb_private_info * uspi;
693 struct ufs_super_block_first * usb1;
694 struct ufs_cylinder_group * ucg; 685 struct ufs_cylinder_group * ucg;
695 u64 result, blkno; 686 u64 result, blkno;
696 687
@@ -698,7 +689,6 @@ static u64 ufs_alloccg_block(struct inode *inode,
698 689
699 sb = inode->i_sb; 690 sb = inode->i_sb;
700 uspi = UFS_SB(sb)->s_uspi; 691 uspi = UFS_SB(sb)->s_uspi;
701 usb1 = ubh_get_usb_first(uspi);
702 ucg = ubh_get_ucg(UCPI_UBH(ucpi)); 692 ucg = ubh_get_ucg(UCPI_UBH(ucpi));
703 693
704 if (goal == 0) { 694 if (goal == 0) {
@@ -794,7 +784,6 @@ static u64 ufs_bitmap_search(struct super_block *sb,
794 0x0, 0x2, 0x6, 0xe, 0x1e, 0x3e, 0x7e, 0xfe, 0x1fe 784 0x0, 0x2, 0x6, 0xe, 0x1e, 0x3e, 0x7e, 0xfe, 0x1fe
795 }; 785 };
796 struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi; 786 struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
797 struct ufs_super_block_first *usb1;
798 struct ufs_cylinder_group *ucg; 787 struct ufs_cylinder_group *ucg;
799 unsigned start, length, loc; 788 unsigned start, length, loc;
800 unsigned pos, want, blockmap, mask, end; 789 unsigned pos, want, blockmap, mask, end;
@@ -803,7 +792,6 @@ static u64 ufs_bitmap_search(struct super_block *sb,
803 UFSD("ENTER, cg %u, goal %llu, count %u\n", ucpi->c_cgx, 792 UFSD("ENTER, cg %u, goal %llu, count %u\n", ucpi->c_cgx,
804 (unsigned long long)goal, count); 793 (unsigned long long)goal, count);
805 794
806 usb1 = ubh_get_usb_first (uspi);
807 ucg = ubh_get_ucg(UCPI_UBH(ucpi)); 795 ucg = ubh_get_ucg(UCPI_UBH(ucpi));
808 796
809 if (goal) 797 if (goal)
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index d0426d74817b..98f7211599ff 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -57,7 +57,6 @@ void ufs_free_inode (struct inode * inode)
57{ 57{
58 struct super_block * sb; 58 struct super_block * sb;
59 struct ufs_sb_private_info * uspi; 59 struct ufs_sb_private_info * uspi;
60 struct ufs_super_block_first * usb1;
61 struct ufs_cg_private_info * ucpi; 60 struct ufs_cg_private_info * ucpi;
62 struct ufs_cylinder_group * ucg; 61 struct ufs_cylinder_group * ucg;
63 int is_directory; 62 int is_directory;
@@ -67,7 +66,6 @@ void ufs_free_inode (struct inode * inode)
67 66
68 sb = inode->i_sb; 67 sb = inode->i_sb;
69 uspi = UFS_SB(sb)->s_uspi; 68 uspi = UFS_SB(sb)->s_uspi;
70 usb1 = ubh_get_usb_first(uspi);
71 69
72 ino = inode->i_ino; 70 ino = inode->i_ino;
73 71
@@ -175,7 +173,6 @@ struct inode *ufs_new_inode(struct inode *dir, umode_t mode)
175 struct super_block * sb; 173 struct super_block * sb;
176 struct ufs_sb_info * sbi; 174 struct ufs_sb_info * sbi;
177 struct ufs_sb_private_info * uspi; 175 struct ufs_sb_private_info * uspi;
178 struct ufs_super_block_first * usb1;
179 struct ufs_cg_private_info * ucpi; 176 struct ufs_cg_private_info * ucpi;
180 struct ufs_cylinder_group * ucg; 177 struct ufs_cylinder_group * ucg;
181 struct inode * inode; 178 struct inode * inode;
@@ -195,7 +192,6 @@ struct inode *ufs_new_inode(struct inode *dir, umode_t mode)
195 ufsi = UFS_I(inode); 192 ufsi = UFS_I(inode);
196 sbi = UFS_SB(sb); 193 sbi = UFS_SB(sb);
197 uspi = sbi->s_uspi; 194 uspi = sbi->s_uspi;
198 usb1 = ubh_get_usb_first(uspi);
199 195
200 mutex_lock(&sbi->s_lock); 196 mutex_lock(&sbi->s_lock);
201 197
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index b8c6791f046f..c1183f9f69dc 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -524,11 +524,9 @@ static int ufs_read_cylinder_structures(struct super_block *sb)
524 struct ufs_buffer_head * ubh; 524 struct ufs_buffer_head * ubh;
525 unsigned char * base, * space; 525 unsigned char * base, * space;
526 unsigned size, blks, i; 526 unsigned size, blks, i;
527 struct ufs_super_block_third *usb3;
528 527
529 UFSD("ENTER\n"); 528 UFSD("ENTER\n");
530 529
531 usb3 = ubh_get_usb_third(uspi);
532 /* 530 /*
533 * Read cs structures from (usually) first data block 531 * Read cs structures from (usually) first data block
534 * on the device. 532 * on the device.
@@ -1390,15 +1388,11 @@ static int ufs_statfs(struct dentry *dentry, struct kstatfs *buf)
1390 struct super_block *sb = dentry->d_sb; 1388 struct super_block *sb = dentry->d_sb;
1391 struct ufs_sb_private_info *uspi= UFS_SB(sb)->s_uspi; 1389 struct ufs_sb_private_info *uspi= UFS_SB(sb)->s_uspi;
1392 unsigned flags = UFS_SB(sb)->s_flags; 1390 unsigned flags = UFS_SB(sb)->s_flags;
1393 struct ufs_super_block_first *usb1;
1394 struct ufs_super_block_second *usb2;
1395 struct ufs_super_block_third *usb3; 1391 struct ufs_super_block_third *usb3;
1396 u64 id = huge_encode_dev(sb->s_bdev->bd_dev); 1392 u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
1397 1393
1398 lock_ufs(sb); 1394 lock_ufs(sb);
1399 1395
1400 usb1 = ubh_get_usb_first(uspi);
1401 usb2 = ubh_get_usb_second(uspi);
1402 usb3 = ubh_get_usb_third(uspi); 1396 usb3 = ubh_get_usb_third(uspi);
1403 1397
1404 if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) { 1398 if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) {
@@ -1454,7 +1448,7 @@ static void init_once(void *foo)
1454 inode_init_once(&ei->vfs_inode); 1448 inode_init_once(&ei->vfs_inode);
1455} 1449}
1456 1450
1457static int init_inodecache(void) 1451static int __init init_inodecache(void)
1458{ 1452{
1459 ufs_inode_cachep = kmem_cache_create("ufs_inode_cache", 1453 ufs_inode_cachep = kmem_cache_create("ufs_inode_cache",
1460 sizeof(struct ufs_inode_info), 1454 sizeof(struct ufs_inode_info),
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index f7abff8c16ca..003c0051b62f 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1483,6 +1483,7 @@ const struct file_operations xfs_dir_file_operations = {
1483 1483
1484static const struct vm_operations_struct xfs_file_vm_ops = { 1484static const struct vm_operations_struct xfs_file_vm_ops = {
1485 .fault = filemap_fault, 1485 .fault = filemap_fault,
1486 .map_pages = filemap_map_pages,
1486 .page_mkwrite = xfs_vm_page_mkwrite, 1487 .page_mkwrite = xfs_vm_page_mkwrite,
1487 .remap_pages = generic_file_remap_pages, 1488 .remap_pages = generic_file_remap_pages,
1488}; 1489};