aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/acl.c22
-rw-r--r--fs/9p/acl.h6
-rw-r--r--fs/9p/cache.c20
-rw-r--r--fs/9p/cache.h9
-rw-r--r--fs/9p/v9fs.c45
-rw-r--r--fs/9p/v9fs.h29
-rw-r--r--fs/9p/v9fs_vfs.h6
-rw-r--r--fs/9p/vfs_file.c36
-rw-r--r--fs/9p/vfs_inode.c177
-rw-r--r--fs/9p/vfs_inode_dotl.c162
-rw-r--r--fs/9p/vfs_super.c2
-rw-r--r--fs/Kconfig4
-rw-r--r--fs/Makefile3
-rw-r--r--fs/afs/internal.h2
-rw-r--r--fs/afs/rxrpc.c3
-rw-r--r--fs/aio.c56
-rw-r--r--fs/autofs4/autofs_i.h11
-rw-r--r--fs/autofs4/dev-ioctl.c2
-rw-r--r--fs/autofs4/inode.c2
-rw-r--r--fs/autofs4/waitq.c2
-rw-r--r--fs/befs/linuxvfs.c23
-rw-r--r--fs/binfmt_elf.c13
-rw-r--r--fs/block_dev.c51
-rw-r--r--fs/btrfs/async-thread.c9
-rw-r--r--fs/btrfs/ctree.h2
-rw-r--r--fs/btrfs/disk-io.c5
-rw-r--r--fs/btrfs/extent-tree.c15
-rw-r--r--fs/btrfs/inode.c5
-rw-r--r--fs/btrfs/tree-log.c34
-rw-r--r--fs/btrfs/volumes.c17
-rw-r--r--fs/btrfs/volumes.h2
-rw-r--r--fs/buffer.c28
-rw-r--r--fs/cifs/cifsfs.c6
-rw-r--r--fs/cifs/cifsglob.h1
-rw-r--r--fs/cifs/cifssmb.c3
-rw-r--r--fs/cifs/connect.c14
-rw-r--r--fs/cifs/dir.c26
-rw-r--r--fs/cifs/file.c57
-rw-r--r--fs/cifs/inode.c33
-rw-r--r--fs/cifs/readdir.c7
-rw-r--r--fs/cifs/sess.c7
-rw-r--r--fs/cifs/transport.c2
-rw-r--r--fs/dcache.c88
-rw-r--r--fs/ecryptfs/crypto.c53
-rw-r--r--fs/ecryptfs/file.c23
-rw-r--r--fs/ecryptfs/inode.c51
-rw-r--r--fs/ecryptfs/keystore.c47
-rw-r--r--fs/ecryptfs/kthread.c2
-rw-r--r--fs/ecryptfs/main.c23
-rw-r--r--fs/ecryptfs/miscdev.c104
-rw-r--r--fs/ecryptfs/read_write.c41
-rw-r--r--fs/eventpoll.c293
-rw-r--r--fs/exec.c2
-rw-r--r--fs/ext3/ialloc.c8
-rw-r--r--fs/ext3/inode.c24
-rw-r--r--fs/ext3/namei.c6
-rw-r--r--fs/ext3/xattr.c12
-rw-r--r--fs/ext4/balloc.c3
-rw-r--r--fs/ext4/bitmap.c8
-rw-r--r--fs/ext4/ext4.h7
-rw-r--r--fs/ext4/ext4_jbd2.h56
-rw-r--r--fs/ext4/extents.c15
-rw-r--r--fs/ext4/ialloc.c11
-rw-r--r--fs/ext4/inode.c98
-rw-r--r--fs/ext4/ioctl.c13
-rw-r--r--fs/ext4/mballoc.c8
-rw-r--r--fs/ext4/namei.c14
-rw-r--r--fs/ext4/page-io.c36
-rw-r--r--fs/ext4/super.c30
-rw-r--r--fs/ext4/xattr.c13
-rw-r--r--fs/fat/dir.c9
-rw-r--r--fs/fat/fat.h1
-rw-r--r--fs/fat/inode.c9
-rw-r--r--fs/fifo.c9
-rw-r--r--fs/fs-writeback.c464
-rw-r--r--fs/fuse/dev.c25
-rw-r--r--fs/fuse/dir.c1
-rw-r--r--fs/fuse/file.c2
-rw-r--r--fs/fuse/fuse_i.h3
-rw-r--r--fs/fuse/inode.c17
-rw-r--r--fs/gfs2/ops_fstype.c4
-rw-r--r--fs/hfs/btree.c20
-rw-r--r--fs/hfs/trans.c2
-rw-r--r--fs/hfsplus/catalog.c4
-rw-r--r--fs/hfsplus/dir.c11
-rw-r--r--fs/hfsplus/hfsplus_fs.h16
-rw-r--r--fs/hfsplus/part_tbl.c32
-rw-r--r--fs/hfsplus/super.c16
-rw-r--r--fs/hfsplus/wrapper.c87
-rw-r--r--fs/hppfs/hppfs.c1
-rw-r--r--fs/hugetlbfs/inode.c28
-rw-r--r--fs/inode.c5
-rw-r--r--fs/internal.h3
-rw-r--r--fs/ioprio.c9
-rw-r--r--fs/jbd/journal.c8
-rw-r--r--fs/jbd2/commit.c4
-rw-r--r--fs/jbd2/journal.c8
-rw-r--r--fs/jbd2/transaction.c2
-rw-r--r--fs/jffs2/gc.c2
-rw-r--r--fs/lockd/clnt4xdr.c2
-rw-r--r--fs/lockd/clntxdr.c2
-rw-r--r--fs/lockd/svc.c2
-rw-r--r--fs/locks.c6
-rw-r--r--fs/namei.c61
-rw-r--r--fs/namespace.c26
-rw-r--r--fs/nfs/callback.h2
-rw-r--r--fs/nfs/callback_proc.c27
-rw-r--r--fs/nfs/callback_xdr.c24
-rw-r--r--fs/nfs/delegation.c27
-rw-r--r--fs/nfs/delegation.h1
-rw-r--r--fs/nfs/dir.c58
-rw-r--r--fs/nfs/file.c39
-rw-r--r--fs/nfs/inode.c2
-rw-r--r--fs/nfs/internal.h2
-rw-r--r--fs/nfs/nfs3proc.c1
-rw-r--r--fs/nfs/nfs4_fs.h3
-rw-r--r--fs/nfs/nfs4filelayout.c10
-rw-r--r--fs/nfs/nfs4proc.c66
-rw-r--r--fs/nfs/nfs4state.c35
-rw-r--r--fs/nfs/nfs4xdr.c2
-rw-r--r--fs/nfs/objlayout/objio_osd.c31
-rw-r--r--fs/nfs/objlayout/objlayout.c10
-rw-r--r--fs/nfs/objlayout/pnfs_osd_xdr_cli.c3
-rw-r--r--fs/nfs/pnfs.c78
-rw-r--r--fs/nfs/pnfs.h7
-rw-r--r--fs/nfs/proc.c1
-rw-r--r--fs/nfs/super.c53
-rw-r--r--fs/nfs/write.c48
-rw-r--r--fs/nfsd/export.c2
-rw-r--r--fs/nfsd/nfs3xdr.c22
-rw-r--r--fs/nfsd/nfs4proc.c13
-rw-r--r--fs/nfsd/nfs4recover.c2
-rw-r--r--fs/nfsd/nfs4state.c115
-rw-r--r--fs/nfsd/nfs4xdr.c14
-rw-r--r--fs/nfsd/state.h6
-rw-r--r--fs/nfsd/vfs.c3
-rw-r--r--fs/nfsd/vfs.h1
-rw-r--r--fs/nilfs2/gcinode.c2
-rw-r--r--fs/nilfs2/ioctl.c17
-rw-r--r--fs/nilfs2/segment.c2
-rw-r--r--fs/nilfs2/super.c3
-rw-r--r--fs/nilfs2/the_nilfs.c2
-rw-r--r--fs/nilfs2/the_nilfs.h2
-rw-r--r--fs/notify/mark.c8
-rw-r--r--fs/ocfs2/alloc.c2
-rw-r--r--fs/ocfs2/refcounttree.c12
-rw-r--r--fs/ocfs2/suballoc.c4
-rw-r--r--fs/open.c6
-rw-r--r--fs/partitions/check.c59
-rw-r--r--fs/pipe.c31
-rw-r--r--fs/proc/base.c225
-rw-r--r--fs/proc/meminfo.c7
-rw-r--r--fs/proc/namespaces.c2
-rw-r--r--fs/proc/task_mmu.c95
-rw-r--r--fs/proc/uptime.c9
-rw-r--r--fs/quota/quota.c4
-rw-r--r--fs/ramfs/file-nommu.c1
-rw-r--r--fs/reiserfs/super.c27
-rw-r--r--fs/seq_file.c6
-rw-r--r--fs/signalfd.c15
-rw-r--r--fs/splice.c9
-rw-r--r--fs/stat.c5
-rw-r--r--fs/statfs.c2
-rw-r--r--fs/super.c39
-rw-r--r--fs/sync.c31
-rw-r--r--fs/sysfs/inode.c11
-rw-r--r--fs/ubifs/budget.c2
-rw-r--r--fs/ubifs/debug.h17
-rw-r--r--fs/ubifs/sb.c8
-rw-r--r--fs/udf/file.c8
-rw-r--r--fs/udf/inode.c21
-rw-r--r--fs/udf/super.c108
-rw-r--r--fs/xfs/linux-2.6/xfs_acl.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_discard.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c8
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c34
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c16
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c102
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c48
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h2
-rw-r--r--fs/xfs/quota/xfs_dquot_item.c10
-rw-r--r--fs/xfs/quota/xfs_qm.c3
-rw-r--r--fs/xfs/xfs_attr.c14
-rw-r--r--fs/xfs/xfs_attr_leaf.c64
-rw-r--r--fs/xfs/xfs_bmap.c10
-rw-r--r--fs/xfs/xfs_buf_item.c4
-rw-r--r--fs/xfs/xfs_iget.c18
-rw-r--r--fs/xfs/xfs_inode.c29
-rw-r--r--fs/xfs/xfs_inode.h1
-rw-r--r--fs/xfs/xfs_inode_item.c10
-rw-r--r--fs/xfs/xfs_log_recover.c33
-rw-r--r--fs/xfs/xfs_mount.c29
-rw-r--r--fs/xfs/xfs_trans.c27
-rw-r--r--fs/xfs/xfs_trans.h2
-rw-r--r--fs/xfs/xfs_trans_ail.c192
-rw-r--r--fs/xfs/xfs_trans_priv.h18
-rw-r--r--fs/xfs/xfs_vnodeops.c15
-rw-r--r--fs/yaffs2/Kconfig161
-rw-r--r--fs/yaffs2/Makefile17
-rw-r--r--fs/yaffs2/yaffs_allocator.c396
-rw-r--r--fs/yaffs2/yaffs_allocator.h30
-rw-r--r--fs/yaffs2/yaffs_attribs.c124
-rw-r--r--fs/yaffs2/yaffs_attribs.h28
-rw-r--r--fs/yaffs2/yaffs_bitmap.c98
-rw-r--r--fs/yaffs2/yaffs_bitmap.h33
-rw-r--r--fs/yaffs2/yaffs_checkptrw.c415
-rw-r--r--fs/yaffs2/yaffs_checkptrw.h33
-rw-r--r--fs/yaffs2/yaffs_ecc.c298
-rw-r--r--fs/yaffs2/yaffs_ecc.h44
-rw-r--r--fs/yaffs2/yaffs_getblockinfo.h35
-rw-r--r--fs/yaffs2/yaffs_guts.c5164
-rw-r--r--fs/yaffs2/yaffs_guts.h915
-rw-r--r--fs/yaffs2/yaffs_linux.h41
-rw-r--r--fs/yaffs2/yaffs_mtdif.c54
-rw-r--r--fs/yaffs2/yaffs_mtdif.h23
-rw-r--r--fs/yaffs2/yaffs_mtdif1.c330
-rw-r--r--fs/yaffs2/yaffs_mtdif1.h29
-rw-r--r--fs/yaffs2/yaffs_mtdif2.c225
-rw-r--r--fs/yaffs2/yaffs_mtdif2.h29
-rw-r--r--fs/yaffs2/yaffs_nameval.c201
-rw-r--r--fs/yaffs2/yaffs_nameval.h28
-rw-r--r--fs/yaffs2/yaffs_nand.c127
-rw-r--r--fs/yaffs2/yaffs_nand.h38
-rw-r--r--fs/yaffs2/yaffs_packedtags1.c53
-rw-r--r--fs/yaffs2/yaffs_packedtags1.h39
-rw-r--r--fs/yaffs2/yaffs_packedtags2.c196
-rw-r--r--fs/yaffs2/yaffs_packedtags2.h47
-rw-r--r--fs/yaffs2/yaffs_tagscompat.c422
-rw-r--r--fs/yaffs2/yaffs_tagscompat.h36
-rw-r--r--fs/yaffs2/yaffs_tagsvalidity.c27
-rw-r--r--fs/yaffs2/yaffs_tagsvalidity.h23
-rw-r--r--fs/yaffs2/yaffs_trace.h57
-rw-r--r--fs/yaffs2/yaffs_verify.c535
-rw-r--r--fs/yaffs2/yaffs_verify.h43
-rw-r--r--fs/yaffs2/yaffs_vfs.c2792
-rw-r--r--fs/yaffs2/yaffs_yaffs1.c433
-rw-r--r--fs/yaffs2/yaffs_yaffs1.h22
-rw-r--r--fs/yaffs2/yaffs_yaffs2.c1598
-rw-r--r--fs/yaffs2/yaffs_yaffs2.h39
-rw-r--r--fs/yaffs2/yportenv.h70
242 files changed, 18761 insertions, 1746 deletions
diff --git a/fs/9p/acl.c b/fs/9p/acl.c
index 535ab6eccb1..4a866cd0628 100644
--- a/fs/9p/acl.c
+++ b/fs/9p/acl.c
@@ -185,12 +185,15 @@ int v9fs_acl_chmod(struct dentry *dentry)
185} 185}
186 186
187int v9fs_set_create_acl(struct dentry *dentry, 187int v9fs_set_create_acl(struct dentry *dentry,
188 struct posix_acl *dpacl, struct posix_acl *pacl) 188 struct posix_acl **dpacl, struct posix_acl **pacl)
189{ 189{
190 v9fs_set_acl(dentry, ACL_TYPE_DEFAULT, dpacl); 190 if (dentry) {
191 v9fs_set_acl(dentry, ACL_TYPE_ACCESS, pacl); 191 v9fs_set_acl(dentry, ACL_TYPE_DEFAULT, *dpacl);
192 posix_acl_release(dpacl); 192 v9fs_set_acl(dentry, ACL_TYPE_ACCESS, *pacl);
193 posix_acl_release(pacl); 193 }
194 posix_acl_release(*dpacl);
195 posix_acl_release(*pacl);
196 *dpacl = *pacl = NULL;
194 return 0; 197 return 0;
195} 198}
196 199
@@ -212,11 +215,11 @@ int v9fs_acl_mode(struct inode *dir, mode_t *modep,
212 struct posix_acl *clone; 215 struct posix_acl *clone;
213 216
214 if (S_ISDIR(mode)) 217 if (S_ISDIR(mode))
215 *dpacl = acl; 218 *dpacl = posix_acl_dup(acl);
216 clone = posix_acl_clone(acl, GFP_NOFS); 219 clone = posix_acl_clone(acl, GFP_NOFS);
217 retval = -ENOMEM; 220 posix_acl_release(acl);
218 if (!clone) 221 if (!clone)
219 goto cleanup; 222 return -ENOMEM;
220 223
221 retval = posix_acl_create_masq(clone, &mode); 224 retval = posix_acl_create_masq(clone, &mode);
222 if (retval < 0) { 225 if (retval < 0) {
@@ -225,11 +228,12 @@ int v9fs_acl_mode(struct inode *dir, mode_t *modep,
225 } 228 }
226 if (retval > 0) 229 if (retval > 0)
227 *pacl = clone; 230 *pacl = clone;
231 else
232 posix_acl_release(clone);
228 } 233 }
229 *modep = mode; 234 *modep = mode;
230 return 0; 235 return 0;
231cleanup: 236cleanup:
232 posix_acl_release(acl);
233 return retval; 237 return retval;
234 238
235} 239}
diff --git a/fs/9p/acl.h b/fs/9p/acl.h
index 7ef3ac9f6d9..c47ea9cf303 100644
--- a/fs/9p/acl.h
+++ b/fs/9p/acl.h
@@ -19,7 +19,7 @@ extern int v9fs_get_acl(struct inode *, struct p9_fid *);
19extern int v9fs_check_acl(struct inode *inode, int mask, unsigned int flags); 19extern int v9fs_check_acl(struct inode *inode, int mask, unsigned int flags);
20extern int v9fs_acl_chmod(struct dentry *); 20extern int v9fs_acl_chmod(struct dentry *);
21extern int v9fs_set_create_acl(struct dentry *, 21extern int v9fs_set_create_acl(struct dentry *,
22 struct posix_acl *, struct posix_acl *); 22 struct posix_acl **, struct posix_acl **);
23extern int v9fs_acl_mode(struct inode *dir, mode_t *modep, 23extern int v9fs_acl_mode(struct inode *dir, mode_t *modep,
24 struct posix_acl **dpacl, struct posix_acl **pacl); 24 struct posix_acl **dpacl, struct posix_acl **pacl);
25#else 25#else
@@ -33,8 +33,8 @@ static inline int v9fs_acl_chmod(struct dentry *dentry)
33 return 0; 33 return 0;
34} 34}
35static inline int v9fs_set_create_acl(struct dentry *dentry, 35static inline int v9fs_set_create_acl(struct dentry *dentry,
36 struct posix_acl *dpacl, 36 struct posix_acl **dpacl,
37 struct posix_acl *pacl) 37 struct posix_acl **pacl)
38{ 38{
39 return 0; 39 return 0;
40} 40}
diff --git a/fs/9p/cache.c b/fs/9p/cache.c
index 5b335c5086a..945aa5f02f9 100644
--- a/fs/9p/cache.c
+++ b/fs/9p/cache.c
@@ -108,11 +108,10 @@ static uint16_t v9fs_cache_inode_get_key(const void *cookie_netfs_data,
108 void *buffer, uint16_t bufmax) 108 void *buffer, uint16_t bufmax)
109{ 109{
110 const struct v9fs_inode *v9inode = cookie_netfs_data; 110 const struct v9fs_inode *v9inode = cookie_netfs_data;
111 memcpy(buffer, &v9inode->fscache_key->path, 111 memcpy(buffer, &v9inode->qid.path, sizeof(v9inode->qid.path));
112 sizeof(v9inode->fscache_key->path));
113 P9_DPRINTK(P9_DEBUG_FSC, "inode %p get key %llu", &v9inode->vfs_inode, 112 P9_DPRINTK(P9_DEBUG_FSC, "inode %p get key %llu", &v9inode->vfs_inode,
114 v9inode->fscache_key->path); 113 v9inode->qid.path);
115 return sizeof(v9inode->fscache_key->path); 114 return sizeof(v9inode->qid.path);
116} 115}
117 116
118static void v9fs_cache_inode_get_attr(const void *cookie_netfs_data, 117static void v9fs_cache_inode_get_attr(const void *cookie_netfs_data,
@@ -129,11 +128,10 @@ static uint16_t v9fs_cache_inode_get_aux(const void *cookie_netfs_data,
129 void *buffer, uint16_t buflen) 128 void *buffer, uint16_t buflen)
130{ 129{
131 const struct v9fs_inode *v9inode = cookie_netfs_data; 130 const struct v9fs_inode *v9inode = cookie_netfs_data;
132 memcpy(buffer, &v9inode->fscache_key->version, 131 memcpy(buffer, &v9inode->qid.version, sizeof(v9inode->qid.version));
133 sizeof(v9inode->fscache_key->version));
134 P9_DPRINTK(P9_DEBUG_FSC, "inode %p get aux %u", &v9inode->vfs_inode, 132 P9_DPRINTK(P9_DEBUG_FSC, "inode %p get aux %u", &v9inode->vfs_inode,
135 v9inode->fscache_key->version); 133 v9inode->qid.version);
136 return sizeof(v9inode->fscache_key->version); 134 return sizeof(v9inode->qid.version);
137} 135}
138 136
139static enum 137static enum
@@ -143,11 +141,11 @@ fscache_checkaux v9fs_cache_inode_check_aux(void *cookie_netfs_data,
143{ 141{
144 const struct v9fs_inode *v9inode = cookie_netfs_data; 142 const struct v9fs_inode *v9inode = cookie_netfs_data;
145 143
146 if (buflen != sizeof(v9inode->fscache_key->version)) 144 if (buflen != sizeof(v9inode->qid.version))
147 return FSCACHE_CHECKAUX_OBSOLETE; 145 return FSCACHE_CHECKAUX_OBSOLETE;
148 146
149 if (memcmp(buffer, &v9inode->fscache_key->version, 147 if (memcmp(buffer, &v9inode->qid.version,
150 sizeof(v9inode->fscache_key->version))) 148 sizeof(v9inode->qid.version)))
151 return FSCACHE_CHECKAUX_OBSOLETE; 149 return FSCACHE_CHECKAUX_OBSOLETE;
152 150
153 return FSCACHE_CHECKAUX_OKAY; 151 return FSCACHE_CHECKAUX_OKAY;
diff --git a/fs/9p/cache.h b/fs/9p/cache.h
index 049507a5b01..40cc54ced5d 100644
--- a/fs/9p/cache.h
+++ b/fs/9p/cache.h
@@ -93,15 +93,6 @@ static inline void v9fs_uncache_page(struct inode *inode, struct page *page)
93 BUG_ON(PageFsCache(page)); 93 BUG_ON(PageFsCache(page));
94} 94}
95 95
96static inline void v9fs_fscache_set_key(struct inode *inode,
97 struct p9_qid *qid)
98{
99 struct v9fs_inode *v9inode = V9FS_I(inode);
100 spin_lock(&v9inode->fscache_lock);
101 v9inode->fscache_key = qid;
102 spin_unlock(&v9inode->fscache_lock);
103}
104
105static inline void v9fs_fscache_wait_on_page_write(struct inode *inode, 96static inline void v9fs_fscache_wait_on_page_write(struct inode *inode,
106 struct page *page) 97 struct page *page)
107{ 98{
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index c82b017f51f..ef966188611 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -78,6 +78,25 @@ static const match_table_t tokens = {
78 {Opt_err, NULL} 78 {Opt_err, NULL}
79}; 79};
80 80
81/* Interpret mount options for cache mode */
82static int get_cache_mode(char *s)
83{
84 int version = -EINVAL;
85
86 if (!strcmp(s, "loose")) {
87 version = CACHE_LOOSE;
88 P9_DPRINTK(P9_DEBUG_9P, "Cache mode: loose\n");
89 } else if (!strcmp(s, "fscache")) {
90 version = CACHE_FSCACHE;
91 P9_DPRINTK(P9_DEBUG_9P, "Cache mode: fscache\n");
92 } else if (!strcmp(s, "none")) {
93 version = CACHE_NONE;
94 P9_DPRINTK(P9_DEBUG_9P, "Cache mode: none\n");
95 } else
96 printk(KERN_INFO "9p: Unknown Cache mode %s.\n", s);
97 return version;
98}
99
81/** 100/**
82 * v9fs_parse_options - parse mount options into session structure 101 * v9fs_parse_options - parse mount options into session structure
83 * @v9ses: existing v9fs session information 102 * @v9ses: existing v9fs session information
@@ -97,7 +116,7 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
97 /* setup defaults */ 116 /* setup defaults */
98 v9ses->afid = ~0; 117 v9ses->afid = ~0;
99 v9ses->debug = 0; 118 v9ses->debug = 0;
100 v9ses->cache = 0; 119 v9ses->cache = CACHE_NONE;
101#ifdef CONFIG_9P_FSCACHE 120#ifdef CONFIG_9P_FSCACHE
102 v9ses->cachetag = NULL; 121 v9ses->cachetag = NULL;
103#endif 122#endif
@@ -171,13 +190,13 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
171 "problem allocating copy of cache arg\n"); 190 "problem allocating copy of cache arg\n");
172 goto free_and_return; 191 goto free_and_return;
173 } 192 }
193 ret = get_cache_mode(s);
194 if (ret == -EINVAL) {
195 kfree(s);
196 goto free_and_return;
197 }
174 198
175 if (strcmp(s, "loose") == 0) 199 v9ses->cache = ret;
176 v9ses->cache = CACHE_LOOSE;
177 else if (strcmp(s, "fscache") == 0)
178 v9ses->cache = CACHE_FSCACHE;
179 else
180 v9ses->cache = CACHE_NONE;
181 kfree(s); 200 kfree(s);
182 break; 201 break;
183 202
@@ -200,9 +219,15 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
200 } else { 219 } else {
201 v9ses->flags |= V9FS_ACCESS_SINGLE; 220 v9ses->flags |= V9FS_ACCESS_SINGLE;
202 v9ses->uid = simple_strtoul(s, &e, 10); 221 v9ses->uid = simple_strtoul(s, &e, 10);
203 if (*e != '\0') 222 if (*e != '\0') {
204 v9ses->uid = ~0; 223 ret = -EINVAL;
224 printk(KERN_INFO "9p: Unknown access "
225 "argument %s.\n", s);
226 kfree(s);
227 goto free_and_return;
228 }
205 } 229 }
230
206 kfree(s); 231 kfree(s);
207 break; 232 break;
208 233
@@ -487,8 +512,8 @@ static void v9fs_inode_init_once(void *foo)
487 struct v9fs_inode *v9inode = (struct v9fs_inode *)foo; 512 struct v9fs_inode *v9inode = (struct v9fs_inode *)foo;
488#ifdef CONFIG_9P_FSCACHE 513#ifdef CONFIG_9P_FSCACHE
489 v9inode->fscache = NULL; 514 v9inode->fscache = NULL;
490 v9inode->fscache_key = NULL;
491#endif 515#endif
516 memset(&v9inode->qid, 0, sizeof(v9inode->qid));
492 inode_init_once(&v9inode->vfs_inode); 517 inode_init_once(&v9inode->vfs_inode);
493} 518}
494 519
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index e5ebedfc5ed..e78956cbd70 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -125,8 +125,8 @@ struct v9fs_inode {
125#ifdef CONFIG_9P_FSCACHE 125#ifdef CONFIG_9P_FSCACHE
126 spinlock_t fscache_lock; 126 spinlock_t fscache_lock;
127 struct fscache_cookie *fscache; 127 struct fscache_cookie *fscache;
128 struct p9_qid *fscache_key;
129#endif 128#endif
129 struct p9_qid qid;
130 unsigned int cache_validity; 130 unsigned int cache_validity;
131 struct p9_fid *writeback_fid; 131 struct p9_fid *writeback_fid;
132 struct mutex v_mutex; 132 struct mutex v_mutex;
@@ -153,13 +153,13 @@ extern void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd,
153 void *p); 153 void *p);
154extern struct inode *v9fs_inode_from_fid(struct v9fs_session_info *v9ses, 154extern struct inode *v9fs_inode_from_fid(struct v9fs_session_info *v9ses,
155 struct p9_fid *fid, 155 struct p9_fid *fid,
156 struct super_block *sb); 156 struct super_block *sb, int new);
157extern const struct inode_operations v9fs_dir_inode_operations_dotl; 157extern const struct inode_operations v9fs_dir_inode_operations_dotl;
158extern const struct inode_operations v9fs_file_inode_operations_dotl; 158extern const struct inode_operations v9fs_file_inode_operations_dotl;
159extern const struct inode_operations v9fs_symlink_inode_operations_dotl; 159extern const struct inode_operations v9fs_symlink_inode_operations_dotl;
160extern struct inode *v9fs_inode_from_fid_dotl(struct v9fs_session_info *v9ses, 160extern struct inode *v9fs_inode_from_fid_dotl(struct v9fs_session_info *v9ses,
161 struct p9_fid *fid, 161 struct p9_fid *fid,
162 struct super_block *sb); 162 struct super_block *sb, int new);
163 163
164/* other default globals */ 164/* other default globals */
165#define V9FS_PORT 564 165#define V9FS_PORT 564
@@ -201,8 +201,27 @@ v9fs_get_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
201 struct super_block *sb) 201 struct super_block *sb)
202{ 202{
203 if (v9fs_proto_dotl(v9ses)) 203 if (v9fs_proto_dotl(v9ses))
204 return v9fs_inode_from_fid_dotl(v9ses, fid, sb); 204 return v9fs_inode_from_fid_dotl(v9ses, fid, sb, 0);
205 else 205 else
206 return v9fs_inode_from_fid(v9ses, fid, sb); 206 return v9fs_inode_from_fid(v9ses, fid, sb, 0);
207} 207}
208
209/**
210 * v9fs_get_new_inode_from_fid - Helper routine to populate an inode by
211 * issuing a attribute request
212 * @v9ses: session information
213 * @fid: fid to issue attribute request for
214 * @sb: superblock on which to create inode
215 *
216 */
217static inline struct inode *
218v9fs_get_new_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
219 struct super_block *sb)
220{
221 if (v9fs_proto_dotl(v9ses))
222 return v9fs_inode_from_fid_dotl(v9ses, fid, sb, 1);
223 else
224 return v9fs_inode_from_fid(v9ses, fid, sb, 1);
225}
226
208#endif 227#endif
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index 4014160903a..f9a28eab781 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -54,9 +54,9 @@ extern struct kmem_cache *v9fs_inode_cache;
54 54
55struct inode *v9fs_alloc_inode(struct super_block *sb); 55struct inode *v9fs_alloc_inode(struct super_block *sb);
56void v9fs_destroy_inode(struct inode *inode); 56void v9fs_destroy_inode(struct inode *inode);
57struct inode *v9fs_get_inode(struct super_block *sb, int mode); 57struct inode *v9fs_get_inode(struct super_block *sb, int mode, dev_t);
58int v9fs_init_inode(struct v9fs_session_info *v9ses, 58int v9fs_init_inode(struct v9fs_session_info *v9ses,
59 struct inode *inode, int mode); 59 struct inode *inode, int mode, dev_t);
60void v9fs_evict_inode(struct inode *inode); 60void v9fs_evict_inode(struct inode *inode);
61ino_t v9fs_qid2ino(struct p9_qid *qid); 61ino_t v9fs_qid2ino(struct p9_qid *qid);
62void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *); 62void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *);
@@ -82,4 +82,6 @@ static inline void v9fs_invalidate_inode_attr(struct inode *inode)
82 v9inode->cache_validity |= V9FS_INO_INVALID_ATTR; 82 v9inode->cache_validity |= V9FS_INO_INVALID_ATTR;
83 return; 83 return;
84} 84}
85
86int v9fs_open_to_dotl_flags(int flags);
85#endif 87#endif
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index ffed55817f0..9d6e1685d92 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -65,7 +65,7 @@ int v9fs_file_open(struct inode *inode, struct file *file)
65 v9inode = V9FS_I(inode); 65 v9inode = V9FS_I(inode);
66 v9ses = v9fs_inode2v9ses(inode); 66 v9ses = v9fs_inode2v9ses(inode);
67 if (v9fs_proto_dotl(v9ses)) 67 if (v9fs_proto_dotl(v9ses))
68 omode = file->f_flags; 68 omode = v9fs_open_to_dotl_flags(file->f_flags);
69 else 69 else
70 omode = v9fs_uflags2omode(file->f_flags, 70 omode = v9fs_uflags2omode(file->f_flags,
71 v9fs_proto_dotu(v9ses)); 71 v9fs_proto_dotu(v9ses));
@@ -169,7 +169,18 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl)
169 169
170 /* convert posix lock to p9 tlock args */ 170 /* convert posix lock to p9 tlock args */
171 memset(&flock, 0, sizeof(flock)); 171 memset(&flock, 0, sizeof(flock));
172 flock.type = fl->fl_type; 172 /* map the lock type */
173 switch (fl->fl_type) {
174 case F_RDLCK:
175 flock.type = P9_LOCK_TYPE_RDLCK;
176 break;
177 case F_WRLCK:
178 flock.type = P9_LOCK_TYPE_WRLCK;
179 break;
180 case F_UNLCK:
181 flock.type = P9_LOCK_TYPE_UNLCK;
182 break;
183 }
173 flock.start = fl->fl_start; 184 flock.start = fl->fl_start;
174 if (fl->fl_end == OFFSET_MAX) 185 if (fl->fl_end == OFFSET_MAX)
175 flock.length = 0; 186 flock.length = 0;
@@ -245,7 +256,7 @@ static int v9fs_file_getlock(struct file *filp, struct file_lock *fl)
245 256
246 /* convert posix lock to p9 tgetlock args */ 257 /* convert posix lock to p9 tgetlock args */
247 memset(&glock, 0, sizeof(glock)); 258 memset(&glock, 0, sizeof(glock));
248 glock.type = fl->fl_type; 259 glock.type = P9_LOCK_TYPE_UNLCK;
249 glock.start = fl->fl_start; 260 glock.start = fl->fl_start;
250 if (fl->fl_end == OFFSET_MAX) 261 if (fl->fl_end == OFFSET_MAX)
251 glock.length = 0; 262 glock.length = 0;
@@ -257,17 +268,26 @@ static int v9fs_file_getlock(struct file *filp, struct file_lock *fl)
257 res = p9_client_getlock_dotl(fid, &glock); 268 res = p9_client_getlock_dotl(fid, &glock);
258 if (res < 0) 269 if (res < 0)
259 return res; 270 return res;
260 if (glock.type != F_UNLCK) { 271 /* map 9p lock type to os lock type */
261 fl->fl_type = glock.type; 272 switch (glock.type) {
273 case P9_LOCK_TYPE_RDLCK:
274 fl->fl_type = F_RDLCK;
275 break;
276 case P9_LOCK_TYPE_WRLCK:
277 fl->fl_type = F_WRLCK;
278 break;
279 case P9_LOCK_TYPE_UNLCK:
280 fl->fl_type = F_UNLCK;
281 break;
282 }
283 if (glock.type != P9_LOCK_TYPE_UNLCK) {
262 fl->fl_start = glock.start; 284 fl->fl_start = glock.start;
263 if (glock.length == 0) 285 if (glock.length == 0)
264 fl->fl_end = OFFSET_MAX; 286 fl->fl_end = OFFSET_MAX;
265 else 287 else
266 fl->fl_end = glock.start + glock.length - 1; 288 fl->fl_end = glock.start + glock.length - 1;
267 fl->fl_pid = glock.proc_id; 289 fl->fl_pid = glock.proc_id;
268 } else 290 }
269 fl->fl_type = F_UNLCK;
270
271 return res; 291 return res;
272} 292}
273 293
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 7f6c6770319..c72e20cdfb9 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -95,15 +95,18 @@ static int unixmode2p9mode(struct v9fs_session_info *v9ses, int mode)
95/** 95/**
96 * p9mode2unixmode- convert plan9 mode bits to unix mode bits 96 * p9mode2unixmode- convert plan9 mode bits to unix mode bits
97 * @v9ses: v9fs session information 97 * @v9ses: v9fs session information
98 * @mode: mode to convert 98 * @stat: p9_wstat from which mode need to be derived
99 * @rdev: major number, minor number in case of device files.
99 * 100 *
100 */ 101 */
101 102static int p9mode2unixmode(struct v9fs_session_info *v9ses,
102static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode) 103 struct p9_wstat *stat, dev_t *rdev)
103{ 104{
104 int res; 105 int res;
106 int mode = stat->mode;
105 107
106 res = mode & 0777; 108 res = mode & S_IALLUGO;
109 *rdev = 0;
107 110
108 if ((mode & P9_DMDIR) == P9_DMDIR) 111 if ((mode & P9_DMDIR) == P9_DMDIR)
109 res |= S_IFDIR; 112 res |= S_IFDIR;
@@ -116,9 +119,26 @@ static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode)
116 && (v9ses->nodev == 0)) 119 && (v9ses->nodev == 0))
117 res |= S_IFIFO; 120 res |= S_IFIFO;
118 else if ((mode & P9_DMDEVICE) && (v9fs_proto_dotu(v9ses)) 121 else if ((mode & P9_DMDEVICE) && (v9fs_proto_dotu(v9ses))
119 && (v9ses->nodev == 0)) 122 && (v9ses->nodev == 0)) {
120 res |= S_IFBLK; 123 char type = 0, ext[32];
121 else 124 int major = -1, minor = -1;
125
126 strncpy(ext, stat->extension, sizeof(ext));
127 sscanf(ext, "%c %u %u", &type, &major, &minor);
128 switch (type) {
129 case 'c':
130 res |= S_IFCHR;
131 break;
132 case 'b':
133 res |= S_IFBLK;
134 break;
135 default:
136 P9_DPRINTK(P9_DEBUG_ERROR,
137 "Unknown special type %c %s\n", type,
138 stat->extension);
139 };
140 *rdev = MKDEV(major, minor);
141 } else
122 res |= S_IFREG; 142 res |= S_IFREG;
123 143
124 if (v9fs_proto_dotu(v9ses)) { 144 if (v9fs_proto_dotu(v9ses)) {
@@ -131,7 +151,6 @@ static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode)
131 if ((mode & P9_DMSETVTX) == P9_DMSETVTX) 151 if ((mode & P9_DMSETVTX) == P9_DMSETVTX)
132 res |= S_ISVTX; 152 res |= S_ISVTX;
133 } 153 }
134
135 return res; 154 return res;
136} 155}
137 156
@@ -216,7 +235,6 @@ struct inode *v9fs_alloc_inode(struct super_block *sb)
216 return NULL; 235 return NULL;
217#ifdef CONFIG_9P_FSCACHE 236#ifdef CONFIG_9P_FSCACHE
218 v9inode->fscache = NULL; 237 v9inode->fscache = NULL;
219 v9inode->fscache_key = NULL;
220 spin_lock_init(&v9inode->fscache_lock); 238 spin_lock_init(&v9inode->fscache_lock);
221#endif 239#endif
222 v9inode->writeback_fid = NULL; 240 v9inode->writeback_fid = NULL;
@@ -243,13 +261,13 @@ void v9fs_destroy_inode(struct inode *inode)
243} 261}
244 262
245int v9fs_init_inode(struct v9fs_session_info *v9ses, 263int v9fs_init_inode(struct v9fs_session_info *v9ses,
246 struct inode *inode, int mode) 264 struct inode *inode, int mode, dev_t rdev)
247{ 265{
248 int err = 0; 266 int err = 0;
249 267
250 inode_init_owner(inode, NULL, mode); 268 inode_init_owner(inode, NULL, mode);
251 inode->i_blocks = 0; 269 inode->i_blocks = 0;
252 inode->i_rdev = 0; 270 inode->i_rdev = rdev;
253 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 271 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
254 inode->i_mapping->a_ops = &v9fs_addr_operations; 272 inode->i_mapping->a_ops = &v9fs_addr_operations;
255 273
@@ -336,7 +354,7 @@ error:
336 * 354 *
337 */ 355 */
338 356
339struct inode *v9fs_get_inode(struct super_block *sb, int mode) 357struct inode *v9fs_get_inode(struct super_block *sb, int mode, dev_t rdev)
340{ 358{
341 int err; 359 int err;
342 struct inode *inode; 360 struct inode *inode;
@@ -349,7 +367,7 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
349 P9_EPRINTK(KERN_WARNING, "Problem allocating inode\n"); 367 P9_EPRINTK(KERN_WARNING, "Problem allocating inode\n");
350 return ERR_PTR(-ENOMEM); 368 return ERR_PTR(-ENOMEM);
351 } 369 }
352 err = v9fs_init_inode(v9ses, inode, mode); 370 err = v9fs_init_inode(v9ses, inode, mode, rdev);
353 if (err) { 371 if (err) {
354 iput(inode); 372 iput(inode);
355 return ERR_PTR(err); 373 return ERR_PTR(err);
@@ -433,17 +451,62 @@ void v9fs_evict_inode(struct inode *inode)
433 } 451 }
434} 452}
435 453
454static int v9fs_test_inode(struct inode *inode, void *data)
455{
456 int umode;
457 dev_t rdev;
458 struct v9fs_inode *v9inode = V9FS_I(inode);
459 struct p9_wstat *st = (struct p9_wstat *)data;
460 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
461
462 umode = p9mode2unixmode(v9ses, st, &rdev);
463 /* don't match inode of different type */
464 if ((inode->i_mode & S_IFMT) != (umode & S_IFMT))
465 return 0;
466
467 /* compare qid details */
468 if (memcmp(&v9inode->qid.version,
469 &st->qid.version, sizeof(v9inode->qid.version)))
470 return 0;
471
472 if (v9inode->qid.type != st->qid.type)
473 return 0;
474 return 1;
475}
476
477static int v9fs_test_new_inode(struct inode *inode, void *data)
478{
479 return 0;
480}
481
482static int v9fs_set_inode(struct inode *inode, void *data)
483{
484 struct v9fs_inode *v9inode = V9FS_I(inode);
485 struct p9_wstat *st = (struct p9_wstat *)data;
486
487 memcpy(&v9inode->qid, &st->qid, sizeof(st->qid));
488 return 0;
489}
490
436static struct inode *v9fs_qid_iget(struct super_block *sb, 491static struct inode *v9fs_qid_iget(struct super_block *sb,
437 struct p9_qid *qid, 492 struct p9_qid *qid,
438 struct p9_wstat *st) 493 struct p9_wstat *st,
494 int new)
439{ 495{
496 dev_t rdev;
440 int retval, umode; 497 int retval, umode;
441 unsigned long i_ino; 498 unsigned long i_ino;
442 struct inode *inode; 499 struct inode *inode;
443 struct v9fs_session_info *v9ses = sb->s_fs_info; 500 struct v9fs_session_info *v9ses = sb->s_fs_info;
501 int (*test)(struct inode *, void *);
502
503 if (new)
504 test = v9fs_test_new_inode;
505 else
506 test = v9fs_test_inode;
444 507
445 i_ino = v9fs_qid2ino(qid); 508 i_ino = v9fs_qid2ino(qid);
446 inode = iget_locked(sb, i_ino); 509 inode = iget5_locked(sb, i_ino, test, v9fs_set_inode, st);
447 if (!inode) 510 if (!inode)
448 return ERR_PTR(-ENOMEM); 511 return ERR_PTR(-ENOMEM);
449 if (!(inode->i_state & I_NEW)) 512 if (!(inode->i_state & I_NEW))
@@ -453,14 +516,14 @@ static struct inode *v9fs_qid_iget(struct super_block *sb,
453 * FIXME!! we may need support for stale inodes 516 * FIXME!! we may need support for stale inodes
454 * later. 517 * later.
455 */ 518 */
456 umode = p9mode2unixmode(v9ses, st->mode); 519 inode->i_ino = i_ino;
457 retval = v9fs_init_inode(v9ses, inode, umode); 520 umode = p9mode2unixmode(v9ses, st, &rdev);
521 retval = v9fs_init_inode(v9ses, inode, umode, rdev);
458 if (retval) 522 if (retval)
459 goto error; 523 goto error;
460 524
461 v9fs_stat2inode(st, inode, sb); 525 v9fs_stat2inode(st, inode, sb);
462#ifdef CONFIG_9P_FSCACHE 526#ifdef CONFIG_9P_FSCACHE
463 v9fs_fscache_set_key(inode, &st->qid);
464 v9fs_cache_inode_get_cookie(inode); 527 v9fs_cache_inode_get_cookie(inode);
465#endif 528#endif
466 unlock_new_inode(inode); 529 unlock_new_inode(inode);
@@ -474,7 +537,7 @@ error:
474 537
475struct inode * 538struct inode *
476v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, 539v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
477 struct super_block *sb) 540 struct super_block *sb, int new)
478{ 541{
479 struct p9_wstat *st; 542 struct p9_wstat *st;
480 struct inode *inode = NULL; 543 struct inode *inode = NULL;
@@ -483,7 +546,7 @@ v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
483 if (IS_ERR(st)) 546 if (IS_ERR(st))
484 return ERR_CAST(st); 547 return ERR_CAST(st);
485 548
486 inode = v9fs_qid_iget(sb, &st->qid, st); 549 inode = v9fs_qid_iget(sb, &st->qid, st, new);
487 p9stat_free(st); 550 p9stat_free(st);
488 kfree(st); 551 kfree(st);
489 return inode; 552 return inode;
@@ -585,19 +648,17 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
585 } 648 }
586 649
587 /* instantiate inode and assign the unopened fid to the dentry */ 650 /* instantiate inode and assign the unopened fid to the dentry */
588 inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb); 651 inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb);
589 if (IS_ERR(inode)) { 652 if (IS_ERR(inode)) {
590 err = PTR_ERR(inode); 653 err = PTR_ERR(inode);
591 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err); 654 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
592 goto error; 655 goto error;
593 } 656 }
594 d_instantiate(dentry, inode);
595 err = v9fs_fid_add(dentry, fid); 657 err = v9fs_fid_add(dentry, fid);
596 if (err < 0) 658 if (err < 0)
597 goto error; 659 goto error;
598 660 d_instantiate(dentry, inode);
599 return ofid; 661 return ofid;
600
601error: 662error:
602 if (ofid) 663 if (ofid)
603 p9_client_clunk(ofid); 664 p9_client_clunk(ofid);
@@ -738,6 +799,7 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
738struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, 799struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
739 struct nameidata *nameidata) 800 struct nameidata *nameidata)
740{ 801{
802 struct dentry *res;
741 struct super_block *sb; 803 struct super_block *sb;
742 struct v9fs_session_info *v9ses; 804 struct v9fs_session_info *v9ses;
743 struct p9_fid *dfid, *fid; 805 struct p9_fid *dfid, *fid;
@@ -769,22 +831,35 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
769 831
770 return ERR_PTR(result); 832 return ERR_PTR(result);
771 } 833 }
772 834 /*
773 inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb); 835 * Make sure we don't use a wrong inode due to parallel
836 * unlink. For cached mode create calls request for new
837 * inode. But with cache disabled, lookup should do this.
838 */
839 if (v9ses->cache)
840 inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
841 else
842 inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb);
774 if (IS_ERR(inode)) { 843 if (IS_ERR(inode)) {
775 result = PTR_ERR(inode); 844 result = PTR_ERR(inode);
776 inode = NULL; 845 inode = NULL;
777 goto error; 846 goto error;
778 } 847 }
779
780 result = v9fs_fid_add(dentry, fid); 848 result = v9fs_fid_add(dentry, fid);
781 if (result < 0) 849 if (result < 0)
782 goto error_iput; 850 goto error_iput;
783
784inst_out: 851inst_out:
785 d_add(dentry, inode); 852 /*
786 return NULL; 853 * If we had a rename on the server and a parallel lookup
787 854 * for the new name, then make sure we instantiate with
855 * the new name. ie look up for a/b, while on server somebody
856 * moved b under k and client parallely did a lookup for
857 * k/b.
858 */
859 res = d_materialise_unique(dentry, inode);
860 if (!IS_ERR(res))
861 return res;
862 result = PTR_ERR(res);
788error_iput: 863error_iput:
789 iput(inode); 864 iput(inode);
790error: 865error:
@@ -950,7 +1025,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
950 return PTR_ERR(st); 1025 return PTR_ERR(st);
951 1026
952 v9fs_stat2inode(st, dentry->d_inode, dentry->d_inode->i_sb); 1027 v9fs_stat2inode(st, dentry->d_inode, dentry->d_inode->i_sb);
953 generic_fillattr(dentry->d_inode, stat); 1028 generic_fillattr(dentry->d_inode, stat);
954 1029
955 p9stat_free(st); 1030 p9stat_free(st);
956 kfree(st); 1031 kfree(st);
@@ -1034,6 +1109,7 @@ void
1034v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode, 1109v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
1035 struct super_block *sb) 1110 struct super_block *sb)
1036{ 1111{
1112 mode_t mode;
1037 char ext[32]; 1113 char ext[32];
1038 char tag_name[14]; 1114 char tag_name[14];
1039 unsigned int i_nlink; 1115 unsigned int i_nlink;
@@ -1069,31 +1145,9 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
1069 inode->i_nlink = i_nlink; 1145 inode->i_nlink = i_nlink;
1070 } 1146 }
1071 } 1147 }
1072 inode->i_mode = p9mode2unixmode(v9ses, stat->mode); 1148 mode = stat->mode & S_IALLUGO;
1073 if ((S_ISBLK(inode->i_mode)) || (S_ISCHR(inode->i_mode))) { 1149 mode |= inode->i_mode & ~S_IALLUGO;
1074 char type = 0; 1150 inode->i_mode = mode;
1075 int major = -1;
1076 int minor = -1;
1077
1078 strncpy(ext, stat->extension, sizeof(ext));
1079 sscanf(ext, "%c %u %u", &type, &major, &minor);
1080 switch (type) {
1081 case 'c':
1082 inode->i_mode &= ~S_IFBLK;
1083 inode->i_mode |= S_IFCHR;
1084 break;
1085 case 'b':
1086 break;
1087 default:
1088 P9_DPRINTK(P9_DEBUG_ERROR,
1089 "Unknown special type %c %s\n", type,
1090 stat->extension);
1091 };
1092 inode->i_rdev = MKDEV(major, minor);
1093 init_special_inode(inode, inode->i_mode, inode->i_rdev);
1094 } else
1095 inode->i_rdev = 0;
1096
1097 i_size_write(inode, stat->length); 1151 i_size_write(inode, stat->length);
1098 1152
1099 /* not real number of blocks, but 512 byte ones ... */ 1153 /* not real number of blocks, but 512 byte ones ... */
@@ -1359,6 +1413,8 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
1359 1413
1360int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode) 1414int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode)
1361{ 1415{
1416 int umode;
1417 dev_t rdev;
1362 loff_t i_size; 1418 loff_t i_size;
1363 struct p9_wstat *st; 1419 struct p9_wstat *st;
1364 struct v9fs_session_info *v9ses; 1420 struct v9fs_session_info *v9ses;
@@ -1367,6 +1423,12 @@ int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode)
1367 st = p9_client_stat(fid); 1423 st = p9_client_stat(fid);
1368 if (IS_ERR(st)) 1424 if (IS_ERR(st))
1369 return PTR_ERR(st); 1425 return PTR_ERR(st);
1426 /*
1427 * Don't update inode if the file type is different
1428 */
1429 umode = p9mode2unixmode(v9ses, st, &rdev);
1430 if ((inode->i_mode & S_IFMT) != (umode & S_IFMT))
1431 goto out;
1370 1432
1371 spin_lock(&inode->i_lock); 1433 spin_lock(&inode->i_lock);
1372 /* 1434 /*
@@ -1378,6 +1440,7 @@ int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode)
1378 if (v9ses->cache) 1440 if (v9ses->cache)
1379 inode->i_size = i_size; 1441 inode->i_size = i_size;
1380 spin_unlock(&inode->i_lock); 1442 spin_unlock(&inode->i_lock);
1443out:
1381 p9stat_free(st); 1444 p9stat_free(st);
1382 kfree(st); 1445 kfree(st);
1383 return 0; 1446 return 0;
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 691c78f58be..c873172ab37 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -86,18 +86,63 @@ static struct dentry *v9fs_dentry_from_dir_inode(struct inode *inode)
86 return dentry; 86 return dentry;
87} 87}
88 88
89static int v9fs_test_inode_dotl(struct inode *inode, void *data)
90{
91 struct v9fs_inode *v9inode = V9FS_I(inode);
92 struct p9_stat_dotl *st = (struct p9_stat_dotl *)data;
93
94 /* don't match inode of different type */
95 if ((inode->i_mode & S_IFMT) != (st->st_mode & S_IFMT))
96 return 0;
97
98 if (inode->i_generation != st->st_gen)
99 return 0;
100
101 /* compare qid details */
102 if (memcmp(&v9inode->qid.version,
103 &st->qid.version, sizeof(v9inode->qid.version)))
104 return 0;
105
106 if (v9inode->qid.type != st->qid.type)
107 return 0;
108 return 1;
109}
110
111/* Always get a new inode */
112static int v9fs_test_new_inode_dotl(struct inode *inode, void *data)
113{
114 return 0;
115}
116
117static int v9fs_set_inode_dotl(struct inode *inode, void *data)
118{
119 struct v9fs_inode *v9inode = V9FS_I(inode);
120 struct p9_stat_dotl *st = (struct p9_stat_dotl *)data;
121
122 memcpy(&v9inode->qid, &st->qid, sizeof(st->qid));
123 inode->i_generation = st->st_gen;
124 return 0;
125}
126
89static struct inode *v9fs_qid_iget_dotl(struct super_block *sb, 127static struct inode *v9fs_qid_iget_dotl(struct super_block *sb,
90 struct p9_qid *qid, 128 struct p9_qid *qid,
91 struct p9_fid *fid, 129 struct p9_fid *fid,
92 struct p9_stat_dotl *st) 130 struct p9_stat_dotl *st,
131 int new)
93{ 132{
94 int retval; 133 int retval;
95 unsigned long i_ino; 134 unsigned long i_ino;
96 struct inode *inode; 135 struct inode *inode;
97 struct v9fs_session_info *v9ses = sb->s_fs_info; 136 struct v9fs_session_info *v9ses = sb->s_fs_info;
137 int (*test)(struct inode *, void *);
138
139 if (new)
140 test = v9fs_test_new_inode_dotl;
141 else
142 test = v9fs_test_inode_dotl;
98 143
99 i_ino = v9fs_qid2ino(qid); 144 i_ino = v9fs_qid2ino(qid);
100 inode = iget_locked(sb, i_ino); 145 inode = iget5_locked(sb, i_ino, test, v9fs_set_inode_dotl, st);
101 if (!inode) 146 if (!inode)
102 return ERR_PTR(-ENOMEM); 147 return ERR_PTR(-ENOMEM);
103 if (!(inode->i_state & I_NEW)) 148 if (!(inode->i_state & I_NEW))
@@ -107,13 +152,14 @@ static struct inode *v9fs_qid_iget_dotl(struct super_block *sb,
107 * FIXME!! we may need support for stale inodes 152 * FIXME!! we may need support for stale inodes
108 * later. 153 * later.
109 */ 154 */
110 retval = v9fs_init_inode(v9ses, inode, st->st_mode); 155 inode->i_ino = i_ino;
156 retval = v9fs_init_inode(v9ses, inode,
157 st->st_mode, new_decode_dev(st->st_rdev));
111 if (retval) 158 if (retval)
112 goto error; 159 goto error;
113 160
114 v9fs_stat2inode_dotl(st, inode); 161 v9fs_stat2inode_dotl(st, inode);
115#ifdef CONFIG_9P_FSCACHE 162#ifdef CONFIG_9P_FSCACHE
116 v9fs_fscache_set_key(inode, &st->qid);
117 v9fs_cache_inode_get_cookie(inode); 163 v9fs_cache_inode_get_cookie(inode);
118#endif 164#endif
119 retval = v9fs_get_acl(inode, fid); 165 retval = v9fs_get_acl(inode, fid);
@@ -131,20 +177,72 @@ error:
131 177
132struct inode * 178struct inode *
133v9fs_inode_from_fid_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid, 179v9fs_inode_from_fid_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid,
134 struct super_block *sb) 180 struct super_block *sb, int new)
135{ 181{
136 struct p9_stat_dotl *st; 182 struct p9_stat_dotl *st;
137 struct inode *inode = NULL; 183 struct inode *inode = NULL;
138 184
139 st = p9_client_getattr_dotl(fid, P9_STATS_BASIC); 185 st = p9_client_getattr_dotl(fid, P9_STATS_BASIC | P9_STATS_GEN);
140 if (IS_ERR(st)) 186 if (IS_ERR(st))
141 return ERR_CAST(st); 187 return ERR_CAST(st);
142 188
143 inode = v9fs_qid_iget_dotl(sb, &st->qid, fid, st); 189 inode = v9fs_qid_iget_dotl(sb, &st->qid, fid, st, new);
144 kfree(st); 190 kfree(st);
145 return inode; 191 return inode;
146} 192}
147 193
194struct dotl_openflag_map {
195 int open_flag;
196 int dotl_flag;
197};
198
199static int v9fs_mapped_dotl_flags(int flags)
200{
201 int i;
202 int rflags = 0;
203 struct dotl_openflag_map dotl_oflag_map[] = {
204 { O_CREAT, P9_DOTL_CREATE },
205 { O_EXCL, P9_DOTL_EXCL },
206 { O_NOCTTY, P9_DOTL_NOCTTY },
207 { O_TRUNC, P9_DOTL_TRUNC },
208 { O_APPEND, P9_DOTL_APPEND },
209 { O_NONBLOCK, P9_DOTL_NONBLOCK },
210 { O_DSYNC, P9_DOTL_DSYNC },
211 { FASYNC, P9_DOTL_FASYNC },
212 { O_DIRECT, P9_DOTL_DIRECT },
213 { O_LARGEFILE, P9_DOTL_LARGEFILE },
214 { O_DIRECTORY, P9_DOTL_DIRECTORY },
215 { O_NOFOLLOW, P9_DOTL_NOFOLLOW },
216 { O_NOATIME, P9_DOTL_NOATIME },
217 { O_CLOEXEC, P9_DOTL_CLOEXEC },
218 { O_SYNC, P9_DOTL_SYNC},
219 };
220 for (i = 0; i < ARRAY_SIZE(dotl_oflag_map); i++) {
221 if (flags & dotl_oflag_map[i].open_flag)
222 rflags |= dotl_oflag_map[i].dotl_flag;
223 }
224 return rflags;
225}
226
227/**
228 * v9fs_open_to_dotl_flags- convert Linux specific open flags to
229 * plan 9 open flag.
230 * @flags: flags to convert
231 */
232int v9fs_open_to_dotl_flags(int flags)
233{
234 int rflags = 0;
235
236 /*
237 * We have same bits for P9_DOTL_READONLY, P9_DOTL_WRONLY
238 * and P9_DOTL_NOACCESS
239 */
240 rflags |= flags & O_ACCMODE;
241 rflags |= v9fs_mapped_dotl_flags(flags);
242
243 return rflags;
244}
245
148/** 246/**
149 * v9fs_vfs_create_dotl - VFS hook to create files for 9P2000.L protocol. 247 * v9fs_vfs_create_dotl - VFS hook to create files for 9P2000.L protocol.
150 * @dir: directory inode that is being created 248 * @dir: directory inode that is being created
@@ -213,7 +311,8 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
213 "Failed to get acl values in creat %d\n", err); 311 "Failed to get acl values in creat %d\n", err);
214 goto error; 312 goto error;
215 } 313 }
216 err = p9_client_create_dotl(ofid, name, flags, mode, gid, &qid); 314 err = p9_client_create_dotl(ofid, name, v9fs_open_to_dotl_flags(flags),
315 mode, gid, &qid);
217 if (err < 0) { 316 if (err < 0) {
218 P9_DPRINTK(P9_DEBUG_VFS, 317 P9_DPRINTK(P9_DEBUG_VFS,
219 "p9_client_open_dotl failed in creat %d\n", 318 "p9_client_open_dotl failed in creat %d\n",
@@ -230,19 +329,19 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
230 fid = NULL; 329 fid = NULL;
231 goto error; 330 goto error;
232 } 331 }
233 inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb); 332 inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb);
234 if (IS_ERR(inode)) { 333 if (IS_ERR(inode)) {
235 err = PTR_ERR(inode); 334 err = PTR_ERR(inode);
236 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err); 335 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
237 goto error; 336 goto error;
238 } 337 }
239 d_instantiate(dentry, inode);
240 err = v9fs_fid_add(dentry, fid); 338 err = v9fs_fid_add(dentry, fid);
241 if (err < 0) 339 if (err < 0)
242 goto error; 340 goto error;
341 d_instantiate(dentry, inode);
243 342
244 /* Now set the ACL based on the default value */ 343 /* Now set the ACL based on the default value */
245 v9fs_set_create_acl(dentry, dacl, pacl); 344 v9fs_set_create_acl(dentry, &dacl, &pacl);
246 345
247 v9inode = V9FS_I(inode); 346 v9inode = V9FS_I(inode);
248 mutex_lock(&v9inode->v_mutex); 347 mutex_lock(&v9inode->v_mutex);
@@ -283,6 +382,7 @@ error:
283err_clunk_old_fid: 382err_clunk_old_fid:
284 if (ofid) 383 if (ofid)
285 p9_client_clunk(ofid); 384 p9_client_clunk(ofid);
385 v9fs_set_create_acl(NULL, &dacl, &pacl);
286 return err; 386 return err;
287} 387}
288 388
@@ -350,17 +450,17 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
350 goto error; 450 goto error;
351 } 451 }
352 452
353 inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb); 453 inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb);
354 if (IS_ERR(inode)) { 454 if (IS_ERR(inode)) {
355 err = PTR_ERR(inode); 455 err = PTR_ERR(inode);
356 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", 456 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
357 err); 457 err);
358 goto error; 458 goto error;
359 } 459 }
360 d_instantiate(dentry, inode);
361 err = v9fs_fid_add(dentry, fid); 460 err = v9fs_fid_add(dentry, fid);
362 if (err < 0) 461 if (err < 0)
363 goto error; 462 goto error;
463 d_instantiate(dentry, inode);
364 fid = NULL; 464 fid = NULL;
365 } else { 465 } else {
366 /* 466 /*
@@ -368,7 +468,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
368 * inode with stat. We need to get an inode 468 * inode with stat. We need to get an inode
369 * so that we can set the acl with dentry 469 * so that we can set the acl with dentry
370 */ 470 */
371 inode = v9fs_get_inode(dir->i_sb, mode); 471 inode = v9fs_get_inode(dir->i_sb, mode, 0);
372 if (IS_ERR(inode)) { 472 if (IS_ERR(inode)) {
373 err = PTR_ERR(inode); 473 err = PTR_ERR(inode);
374 goto error; 474 goto error;
@@ -376,12 +476,13 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
376 d_instantiate(dentry, inode); 476 d_instantiate(dentry, inode);
377 } 477 }
378 /* Now set the ACL based on the default value */ 478 /* Now set the ACL based on the default value */
379 v9fs_set_create_acl(dentry, dacl, pacl); 479 v9fs_set_create_acl(dentry, &dacl, &pacl);
380 inc_nlink(dir); 480 inc_nlink(dir);
381 v9fs_invalidate_inode_attr(dir); 481 v9fs_invalidate_inode_attr(dir);
382error: 482error:
383 if (fid) 483 if (fid)
384 p9_client_clunk(fid); 484 p9_client_clunk(fid);
485 v9fs_set_create_acl(NULL, &dacl, &pacl);
385 return err; 486 return err;
386} 487}
387 488
@@ -493,6 +594,7 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
493void 594void
494v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode) 595v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
495{ 596{
597 mode_t mode;
496 struct v9fs_inode *v9inode = V9FS_I(inode); 598 struct v9fs_inode *v9inode = V9FS_I(inode);
497 599
498 if ((stat->st_result_mask & P9_STATS_BASIC) == P9_STATS_BASIC) { 600 if ((stat->st_result_mask & P9_STATS_BASIC) == P9_STATS_BASIC) {
@@ -505,11 +607,10 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
505 inode->i_uid = stat->st_uid; 607 inode->i_uid = stat->st_uid;
506 inode->i_gid = stat->st_gid; 608 inode->i_gid = stat->st_gid;
507 inode->i_nlink = stat->st_nlink; 609 inode->i_nlink = stat->st_nlink;
508 inode->i_mode = stat->st_mode;
509 inode->i_rdev = new_decode_dev(stat->st_rdev);
510 610
511 if ((S_ISBLK(inode->i_mode)) || (S_ISCHR(inode->i_mode))) 611 mode = stat->st_mode & S_IALLUGO;
512 init_special_inode(inode, inode->i_mode, inode->i_rdev); 612 mode |= inode->i_mode & ~S_IALLUGO;
613 inode->i_mode = mode;
513 614
514 i_size_write(inode, stat->st_size); 615 i_size_write(inode, stat->st_size);
515 inode->i_blocks = stat->st_blocks; 616 inode->i_blocks = stat->st_blocks;
@@ -547,7 +648,7 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
547 inode->i_blocks = stat->st_blocks; 648 inode->i_blocks = stat->st_blocks;
548 } 649 }
549 if (stat->st_result_mask & P9_STATS_GEN) 650 if (stat->st_result_mask & P9_STATS_GEN)
550 inode->i_generation = stat->st_gen; 651 inode->i_generation = stat->st_gen;
551 652
552 /* Currently we don't support P9_STATS_BTIME and P9_STATS_DATA_VERSION 653 /* Currently we don't support P9_STATS_BTIME and P9_STATS_DATA_VERSION
553 * because the inode structure does not have fields for them. 654 * because the inode structure does not have fields for them.
@@ -603,21 +704,21 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
603 } 704 }
604 705
605 /* instantiate inode and assign the unopened fid to dentry */ 706 /* instantiate inode and assign the unopened fid to dentry */
606 inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb); 707 inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb);
607 if (IS_ERR(inode)) { 708 if (IS_ERR(inode)) {
608 err = PTR_ERR(inode); 709 err = PTR_ERR(inode);
609 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", 710 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
610 err); 711 err);
611 goto error; 712 goto error;
612 } 713 }
613 d_instantiate(dentry, inode);
614 err = v9fs_fid_add(dentry, fid); 714 err = v9fs_fid_add(dentry, fid);
615 if (err < 0) 715 if (err < 0)
616 goto error; 716 goto error;
717 d_instantiate(dentry, inode);
617 fid = NULL; 718 fid = NULL;
618 } else { 719 } else {
619 /* Not in cached mode. No need to populate inode with stat */ 720 /* Not in cached mode. No need to populate inode with stat */
620 inode = v9fs_get_inode(dir->i_sb, S_IFLNK); 721 inode = v9fs_get_inode(dir->i_sb, S_IFLNK, 0);
621 if (IS_ERR(inode)) { 722 if (IS_ERR(inode)) {
622 err = PTR_ERR(inode); 723 err = PTR_ERR(inode);
623 goto error; 724 goto error;
@@ -756,24 +857,24 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
756 goto error; 857 goto error;
757 } 858 }
758 859
759 inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb); 860 inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb);
760 if (IS_ERR(inode)) { 861 if (IS_ERR(inode)) {
761 err = PTR_ERR(inode); 862 err = PTR_ERR(inode);
762 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", 863 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
763 err); 864 err);
764 goto error; 865 goto error;
765 } 866 }
766 d_instantiate(dentry, inode);
767 err = v9fs_fid_add(dentry, fid); 867 err = v9fs_fid_add(dentry, fid);
768 if (err < 0) 868 if (err < 0)
769 goto error; 869 goto error;
870 d_instantiate(dentry, inode);
770 fid = NULL; 871 fid = NULL;
771 } else { 872 } else {
772 /* 873 /*
773 * Not in cached mode. No need to populate inode with stat. 874 * Not in cached mode. No need to populate inode with stat.
774 * socket syscall returns a fd, so we need instantiate 875 * socket syscall returns a fd, so we need instantiate
775 */ 876 */
776 inode = v9fs_get_inode(dir->i_sb, mode); 877 inode = v9fs_get_inode(dir->i_sb, mode, rdev);
777 if (IS_ERR(inode)) { 878 if (IS_ERR(inode)) {
778 err = PTR_ERR(inode); 879 err = PTR_ERR(inode);
779 goto error; 880 goto error;
@@ -781,10 +882,11 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
781 d_instantiate(dentry, inode); 882 d_instantiate(dentry, inode);
782 } 883 }
783 /* Now set the ACL based on the default value */ 884 /* Now set the ACL based on the default value */
784 v9fs_set_create_acl(dentry, dacl, pacl); 885 v9fs_set_create_acl(dentry, &dacl, &pacl);
785error: 886error:
786 if (fid) 887 if (fid)
787 p9_client_clunk(fid); 888 p9_client_clunk(fid);
889 v9fs_set_create_acl(NULL, &dacl, &pacl);
788 return err; 890 return err;
789} 891}
790 892
@@ -838,6 +940,11 @@ int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode)
838 st = p9_client_getattr_dotl(fid, P9_STATS_ALL); 940 st = p9_client_getattr_dotl(fid, P9_STATS_ALL);
839 if (IS_ERR(st)) 941 if (IS_ERR(st))
840 return PTR_ERR(st); 942 return PTR_ERR(st);
943 /*
944 * Don't update inode if the file type is different
945 */
946 if ((inode->i_mode & S_IFMT) != (st->st_mode & S_IFMT))
947 goto out;
841 948
842 spin_lock(&inode->i_lock); 949 spin_lock(&inode->i_lock);
843 /* 950 /*
@@ -849,6 +956,7 @@ int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode)
849 if (v9ses->cache) 956 if (v9ses->cache)
850 inode->i_size = i_size; 957 inode->i_size = i_size;
851 spin_unlock(&inode->i_lock); 958 spin_unlock(&inode->i_lock);
959out:
852 kfree(st); 960 kfree(st);
853 return 0; 961 return 0;
854} 962}
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index feef6cdc1fd..c70251d47ed 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -149,7 +149,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
149 else 149 else
150 sb->s_d_op = &v9fs_dentry_operations; 150 sb->s_d_op = &v9fs_dentry_operations;
151 151
152 inode = v9fs_get_inode(sb, S_IFDIR | mode); 152 inode = v9fs_get_inode(sb, S_IFDIR | mode, 0);
153 if (IS_ERR(inode)) { 153 if (IS_ERR(inode)) {
154 retval = PTR_ERR(inode); 154 retval = PTR_ERR(inode);
155 goto release_sb; 155 goto release_sb;
diff --git a/fs/Kconfig b/fs/Kconfig
index 19891aab9c6..88701cc00e0 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -192,6 +192,10 @@ source "fs/hfsplus/Kconfig"
192source "fs/befs/Kconfig" 192source "fs/befs/Kconfig"
193source "fs/bfs/Kconfig" 193source "fs/bfs/Kconfig"
194source "fs/efs/Kconfig" 194source "fs/efs/Kconfig"
195
196# Patched by YAFFS
197source "fs/yaffs2/Kconfig"
198
195source "fs/jffs2/Kconfig" 199source "fs/jffs2/Kconfig"
196# UBIFS File system configuration 200# UBIFS File system configuration
197source "fs/ubifs/Kconfig" 201source "fs/ubifs/Kconfig"
diff --git a/fs/Makefile b/fs/Makefile
index fb68c2b8cf8..2999b4d4b26 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -124,3 +124,6 @@ obj-$(CONFIG_GFS2_FS) += gfs2/
124obj-$(CONFIG_EXOFS_FS) += exofs/ 124obj-$(CONFIG_EXOFS_FS) += exofs/
125obj-$(CONFIG_CEPH_FS) += ceph/ 125obj-$(CONFIG_CEPH_FS) += ceph/
126obj-$(CONFIG_PSTORE) += pstore/ 126obj-$(CONFIG_PSTORE) += pstore/
127
128# Patched by YAFFS
129obj-$(CONFIG_YAFFS_FS) += yaffs2/
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 5a9b6843bac..1f3624d3b24 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -109,7 +109,7 @@ struct afs_call {
109 unsigned reply_size; /* current size of reply */ 109 unsigned reply_size; /* current size of reply */
110 unsigned first_offset; /* offset into mapping[first] */ 110 unsigned first_offset; /* offset into mapping[first] */
111 unsigned last_to; /* amount of mapping[last] */ 111 unsigned last_to; /* amount of mapping[last] */
112 unsigned short offset; /* offset into received data store */ 112 unsigned offset; /* offset into received data store */
113 unsigned char unmarshall; /* unmarshalling phase */ 113 unsigned char unmarshall; /* unmarshalling phase */
114 bool incoming; /* T if incoming call */ 114 bool incoming; /* T if incoming call */
115 bool send_pages; /* T if data from mapping should be sent */ 115 bool send_pages; /* T if data from mapping should be sent */
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index e45a323aebb..8ad8c2a0703 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -314,6 +314,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
314 struct msghdr msg; 314 struct msghdr msg;
315 struct kvec iov[1]; 315 struct kvec iov[1];
316 int ret; 316 int ret;
317 struct sk_buff *skb;
317 318
318 _enter("%x,{%d},", addr->s_addr, ntohs(call->port)); 319 _enter("%x,{%d},", addr->s_addr, ntohs(call->port));
319 320
@@ -380,6 +381,8 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
380 381
381error_do_abort: 382error_do_abort:
382 rxrpc_kernel_abort_call(rxcall, RX_USER_ABORT); 383 rxrpc_kernel_abort_call(rxcall, RX_USER_ABORT);
384 while ((skb = skb_dequeue(&call->rx_queue)))
385 afs_free_skb(skb);
383 rxrpc_kernel_end_call(rxcall); 386 rxrpc_kernel_end_call(rxcall);
384 call->rxcall = NULL; 387 call->rxcall = NULL;
385error_kill_call: 388error_kill_call:
diff --git a/fs/aio.c b/fs/aio.c
index e29ec485af2..99bb74461b0 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -228,12 +228,6 @@ static void __put_ioctx(struct kioctx *ctx)
228 call_rcu(&ctx->rcu_head, ctx_rcu_free); 228 call_rcu(&ctx->rcu_head, ctx_rcu_free);
229} 229}
230 230
231static inline void get_ioctx(struct kioctx *kioctx)
232{
233 BUG_ON(atomic_read(&kioctx->users) <= 0);
234 atomic_inc(&kioctx->users);
235}
236
237static inline int try_get_ioctx(struct kioctx *kioctx) 231static inline int try_get_ioctx(struct kioctx *kioctx)
238{ 232{
239 return atomic_inc_not_zero(&kioctx->users); 233 return atomic_inc_not_zero(&kioctx->users);
@@ -273,7 +267,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
273 mm = ctx->mm = current->mm; 267 mm = ctx->mm = current->mm;
274 atomic_inc(&mm->mm_count); 268 atomic_inc(&mm->mm_count);
275 269
276 atomic_set(&ctx->users, 1); 270 atomic_set(&ctx->users, 2);
277 spin_lock_init(&ctx->ctx_lock); 271 spin_lock_init(&ctx->ctx_lock);
278 spin_lock_init(&ctx->ring_info.ring_lock); 272 spin_lock_init(&ctx->ring_info.ring_lock);
279 init_waitqueue_head(&ctx->wait); 273 init_waitqueue_head(&ctx->wait);
@@ -527,11 +521,16 @@ static void aio_fput_routine(struct work_struct *data)
527 fput(req->ki_filp); 521 fput(req->ki_filp);
528 522
529 /* Link the iocb into the context's free list */ 523 /* Link the iocb into the context's free list */
524 rcu_read_lock();
530 spin_lock_irq(&ctx->ctx_lock); 525 spin_lock_irq(&ctx->ctx_lock);
531 really_put_req(ctx, req); 526 really_put_req(ctx, req);
527 /*
528 * at that point ctx might've been killed, but actual
529 * freeing is RCU'd
530 */
532 spin_unlock_irq(&ctx->ctx_lock); 531 spin_unlock_irq(&ctx->ctx_lock);
532 rcu_read_unlock();
533 533
534 put_ioctx(ctx);
535 spin_lock_irq(&fput_lock); 534 spin_lock_irq(&fput_lock);
536 } 535 }
537 spin_unlock_irq(&fput_lock); 536 spin_unlock_irq(&fput_lock);
@@ -562,7 +561,6 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
562 * this function will be executed w/out any aio kthread wakeup. 561 * this function will be executed w/out any aio kthread wakeup.
563 */ 562 */
564 if (unlikely(!fput_atomic(req->ki_filp))) { 563 if (unlikely(!fput_atomic(req->ki_filp))) {
565 get_ioctx(ctx);
566 spin_lock(&fput_lock); 564 spin_lock(&fput_lock);
567 list_add(&req->ki_list, &fput_head); 565 list_add(&req->ki_list, &fput_head);
568 spin_unlock(&fput_lock); 566 spin_unlock(&fput_lock);
@@ -1256,10 +1254,10 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
1256 ret = PTR_ERR(ioctx); 1254 ret = PTR_ERR(ioctx);
1257 if (!IS_ERR(ioctx)) { 1255 if (!IS_ERR(ioctx)) {
1258 ret = put_user(ioctx->user_id, ctxp); 1256 ret = put_user(ioctx->user_id, ctxp);
1259 if (!ret) 1257 if (!ret) {
1258 put_ioctx(ioctx);
1260 return 0; 1259 return 0;
1261 1260 }
1262 get_ioctx(ioctx); /* io_destroy() expects us to hold a ref */
1263 io_destroy(ioctx); 1261 io_destroy(ioctx);
1264 } 1262 }
1265 1263
@@ -1397,6 +1395,10 @@ static ssize_t aio_setup_vectored_rw(int type, struct kiocb *kiocb, bool compat)
1397 if (ret < 0) 1395 if (ret < 0)
1398 goto out; 1396 goto out;
1399 1397
1398 ret = rw_verify_area(type, kiocb->ki_filp, &kiocb->ki_pos, ret);
1399 if (ret < 0)
1400 goto out;
1401
1400 kiocb->ki_nr_segs = kiocb->ki_nbytes; 1402 kiocb->ki_nr_segs = kiocb->ki_nbytes;
1401 kiocb->ki_cur_seg = 0; 1403 kiocb->ki_cur_seg = 0;
1402 /* ki_nbytes/left now reflect bytes instead of segs */ 1404 /* ki_nbytes/left now reflect bytes instead of segs */
@@ -1408,11 +1410,17 @@ out:
1408 return ret; 1410 return ret;
1409} 1411}
1410 1412
1411static ssize_t aio_setup_single_vector(struct kiocb *kiocb) 1413static ssize_t aio_setup_single_vector(int type, struct file * file, struct kiocb *kiocb)
1412{ 1414{
1415 int bytes;
1416
1417 bytes = rw_verify_area(type, file, &kiocb->ki_pos, kiocb->ki_left);
1418 if (bytes < 0)
1419 return bytes;
1420
1413 kiocb->ki_iovec = &kiocb->ki_inline_vec; 1421 kiocb->ki_iovec = &kiocb->ki_inline_vec;
1414 kiocb->ki_iovec->iov_base = kiocb->ki_buf; 1422 kiocb->ki_iovec->iov_base = kiocb->ki_buf;
1415 kiocb->ki_iovec->iov_len = kiocb->ki_left; 1423 kiocb->ki_iovec->iov_len = bytes;
1416 kiocb->ki_nr_segs = 1; 1424 kiocb->ki_nr_segs = 1;
1417 kiocb->ki_cur_seg = 0; 1425 kiocb->ki_cur_seg = 0;
1418 return 0; 1426 return 0;
@@ -1437,10 +1445,7 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb, bool compat)
1437 if (unlikely(!access_ok(VERIFY_WRITE, kiocb->ki_buf, 1445 if (unlikely(!access_ok(VERIFY_WRITE, kiocb->ki_buf,
1438 kiocb->ki_left))) 1446 kiocb->ki_left)))
1439 break; 1447 break;
1440 ret = security_file_permission(file, MAY_READ); 1448 ret = aio_setup_single_vector(READ, file, kiocb);
1441 if (unlikely(ret))
1442 break;
1443 ret = aio_setup_single_vector(kiocb);
1444 if (ret) 1449 if (ret)
1445 break; 1450 break;
1446 ret = -EINVAL; 1451 ret = -EINVAL;
@@ -1455,10 +1460,7 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb, bool compat)
1455 if (unlikely(!access_ok(VERIFY_READ, kiocb->ki_buf, 1460 if (unlikely(!access_ok(VERIFY_READ, kiocb->ki_buf,
1456 kiocb->ki_left))) 1461 kiocb->ki_left)))
1457 break; 1462 break;
1458 ret = security_file_permission(file, MAY_WRITE); 1463 ret = aio_setup_single_vector(WRITE, file, kiocb);
1459 if (unlikely(ret))
1460 break;
1461 ret = aio_setup_single_vector(kiocb);
1462 if (ret) 1464 if (ret)
1463 break; 1465 break;
1464 ret = -EINVAL; 1466 ret = -EINVAL;
@@ -1469,9 +1471,6 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb, bool compat)
1469 ret = -EBADF; 1471 ret = -EBADF;
1470 if (unlikely(!(file->f_mode & FMODE_READ))) 1472 if (unlikely(!(file->f_mode & FMODE_READ)))
1471 break; 1473 break;
1472 ret = security_file_permission(file, MAY_READ);
1473 if (unlikely(ret))
1474 break;
1475 ret = aio_setup_vectored_rw(READ, kiocb, compat); 1474 ret = aio_setup_vectored_rw(READ, kiocb, compat);
1476 if (ret) 1475 if (ret)
1477 break; 1476 break;
@@ -1483,9 +1482,6 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb, bool compat)
1483 ret = -EBADF; 1482 ret = -EBADF;
1484 if (unlikely(!(file->f_mode & FMODE_WRITE))) 1483 if (unlikely(!(file->f_mode & FMODE_WRITE)))
1485 break; 1484 break;
1486 ret = security_file_permission(file, MAY_WRITE);
1487 if (unlikely(ret))
1488 break;
1489 ret = aio_setup_vectored_rw(WRITE, kiocb, compat); 1485 ret = aio_setup_vectored_rw(WRITE, kiocb, compat);
1490 if (ret) 1486 if (ret)
1491 break; 1487 break;
@@ -1622,7 +1618,6 @@ long do_io_submit(aio_context_t ctx_id, long nr,
1622 struct kioctx *ctx; 1618 struct kioctx *ctx;
1623 long ret = 0; 1619 long ret = 0;
1624 int i; 1620 int i;
1625 struct blk_plug plug;
1626 1621
1627 if (unlikely(nr < 0)) 1622 if (unlikely(nr < 0))
1628 return -EINVAL; 1623 return -EINVAL;
@@ -1639,8 +1634,6 @@ long do_io_submit(aio_context_t ctx_id, long nr,
1639 return -EINVAL; 1634 return -EINVAL;
1640 } 1635 }
1641 1636
1642 blk_start_plug(&plug);
1643
1644 /* 1637 /*
1645 * AKPM: should this return a partial result if some of the IOs were 1638 * AKPM: should this return a partial result if some of the IOs were
1646 * successfully submitted? 1639 * successfully submitted?
@@ -1663,7 +1656,6 @@ long do_io_submit(aio_context_t ctx_id, long nr,
1663 if (ret) 1656 if (ret)
1664 break; 1657 break;
1665 } 1658 }
1666 blk_finish_plug(&plug);
1667 1659
1668 put_ioctx(ctx); 1660 put_ioctx(ctx);
1669 return i ? i : ret; 1661 return i ? i : ret;
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 475f9c597cb..756d3286bee 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -278,6 +278,17 @@ int autofs4_fill_super(struct super_block *, void *, int);
278struct autofs_info *autofs4_new_ino(struct autofs_sb_info *); 278struct autofs_info *autofs4_new_ino(struct autofs_sb_info *);
279void autofs4_clean_ino(struct autofs_info *); 279void autofs4_clean_ino(struct autofs_info *);
280 280
281static inline int autofs_prepare_pipe(struct file *pipe)
282{
283 if (!pipe->f_op || !pipe->f_op->write)
284 return -EINVAL;
285 if (!S_ISFIFO(pipe->f_dentry->d_inode->i_mode))
286 return -EINVAL;
287 /* We want a packet pipe */
288 pipe->f_flags |= O_DIRECT;
289 return 0;
290}
291
281/* Queue management functions */ 292/* Queue management functions */
282 293
283int autofs4_wait(struct autofs_sb_info *,struct dentry *, enum autofs_notify); 294int autofs4_wait(struct autofs_sb_info *,struct dentry *, enum autofs_notify);
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 509fe1eb66a..de542716245 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -376,7 +376,7 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp,
376 err = -EBADF; 376 err = -EBADF;
377 goto out; 377 goto out;
378 } 378 }
379 if (!pipe->f_op || !pipe->f_op->write) { 379 if (autofs_prepare_pipe(pipe) < 0) {
380 err = -EPIPE; 380 err = -EPIPE;
381 fput(pipe); 381 fput(pipe);
382 goto out; 382 goto out;
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 180fa2425e4..7c26678e2ca 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -292,7 +292,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
292 printk("autofs: could not open pipe file descriptor\n"); 292 printk("autofs: could not open pipe file descriptor\n");
293 goto fail_dput; 293 goto fail_dput;
294 } 294 }
295 if (!pipe->f_op || !pipe->f_op->write) 295 if (autofs_prepare_pipe(pipe) < 0)
296 goto fail_fput; 296 goto fail_fput;
297 sbi->pipe = pipe; 297 sbi->pipe = pipe;
298 sbi->pipefd = pipefd; 298 sbi->pipefd = pipefd;
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 25435987d6a..813ea10fdde 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -90,7 +90,7 @@ static int autofs4_write(struct file *file, const void *addr, int bytes)
90 90
91 return (bytes > 0); 91 return (bytes > 0);
92} 92}
93 93
94static void autofs4_notify_daemon(struct autofs_sb_info *sbi, 94static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
95 struct autofs_wait_queue *wq, 95 struct autofs_wait_queue *wq,
96 int type) 96 int type)
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 54b8c28bebc..720d885e8dc 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -474,17 +474,22 @@ befs_follow_link(struct dentry *dentry, struct nameidata *nd)
474 befs_data_stream *data = &befs_ino->i_data.ds; 474 befs_data_stream *data = &befs_ino->i_data.ds;
475 befs_off_t len = data->size; 475 befs_off_t len = data->size;
476 476
477 befs_debug(sb, "Follow long symlink"); 477 if (len == 0) {
478 478 befs_error(sb, "Long symlink with illegal length");
479 link = kmalloc(len, GFP_NOFS);
480 if (!link) {
481 link = ERR_PTR(-ENOMEM);
482 } else if (befs_read_lsymlink(sb, data, link, len) != len) {
483 kfree(link);
484 befs_error(sb, "Failed to read entire long symlink");
485 link = ERR_PTR(-EIO); 479 link = ERR_PTR(-EIO);
486 } else { 480 } else {
487 link[len - 1] = '\0'; 481 befs_debug(sb, "Follow long symlink");
482
483 link = kmalloc(len, GFP_NOFS);
484 if (!link) {
485 link = ERR_PTR(-ENOMEM);
486 } else if (befs_read_lsymlink(sb, data, link, len) != len) {
487 kfree(link);
488 befs_error(sb, "Failed to read entire long symlink");
489 link = ERR_PTR(-EIO);
490 } else {
491 link[len - 1] = '\0';
492 }
488 } 493 }
489 } else { 494 } else {
490 link = befs_ino->i_data.symlink; 495 link = befs_ino->i_data.symlink;
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 303983fabfd..618493e44ae 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -796,7 +796,16 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
796 * might try to exec. This is because the brk will 796 * might try to exec. This is because the brk will
797 * follow the loader, and is not movable. */ 797 * follow the loader, and is not movable. */
798#if defined(CONFIG_X86) || defined(CONFIG_ARM) 798#if defined(CONFIG_X86) || defined(CONFIG_ARM)
799 load_bias = 0; 799 /* Memory randomization might have been switched off
800 * in runtime via sysctl.
801 * If that is the case, retain the original non-zero
802 * load_bias value in order to establish proper
803 * non-randomized mappings.
804 */
805 if (current->flags & PF_RANDOMIZE)
806 load_bias = 0;
807 else
808 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
800#else 809#else
801 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr); 810 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
802#endif 811#endif
@@ -1413,7 +1422,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
1413 for (i = 1; i < view->n; ++i) { 1422 for (i = 1; i < view->n; ++i) {
1414 const struct user_regset *regset = &view->regsets[i]; 1423 const struct user_regset *regset = &view->regsets[i];
1415 do_thread_regset_writeback(t->task, regset); 1424 do_thread_regset_writeback(t->task, regset);
1416 if (regset->core_note_type && 1425 if (regset->core_note_type && regset->get &&
1417 (!regset->active || regset->active(t->task, regset))) { 1426 (!regset->active || regset->active(t->task, regset))) {
1418 int ret; 1427 int ret;
1419 size_t size = regset->n * regset->size; 1428 size_t size = regset->n * regset->size;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 610e8e0b04b..8d78633c00b 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -44,27 +44,31 @@ inline struct block_device *I_BDEV(struct inode *inode)
44{ 44{
45 return &BDEV_I(inode)->bdev; 45 return &BDEV_I(inode)->bdev;
46} 46}
47
48EXPORT_SYMBOL(I_BDEV); 47EXPORT_SYMBOL(I_BDEV);
49 48
50/* 49/*
51 * move the inode from it's current bdi to the a new bdi. if the inode is dirty 50 * Move the inode from its current bdi to a new bdi. If the inode is dirty we
52 * we need to move it onto the dirty list of @dst so that the inode is always 51 * need to move it onto the dirty list of @dst so that the inode is always on
53 * on the right list. 52 * the right list.
54 */ 53 */
55static void bdev_inode_switch_bdi(struct inode *inode, 54static void bdev_inode_switch_bdi(struct inode *inode,
56 struct backing_dev_info *dst) 55 struct backing_dev_info *dst)
57{ 56{
58 spin_lock(&inode_wb_list_lock); 57 struct backing_dev_info *old = inode->i_data.backing_dev_info;
58
59 if (unlikely(dst == old)) /* deadlock avoidance */
60 return;
61 bdi_lock_two(&old->wb, &dst->wb);
59 spin_lock(&inode->i_lock); 62 spin_lock(&inode->i_lock);
60 inode->i_data.backing_dev_info = dst; 63 inode->i_data.backing_dev_info = dst;
61 if (inode->i_state & I_DIRTY) 64 if (inode->i_state & I_DIRTY)
62 list_move(&inode->i_wb_list, &dst->wb.b_dirty); 65 list_move(&inode->i_wb_list, &dst->wb.b_dirty);
63 spin_unlock(&inode->i_lock); 66 spin_unlock(&inode->i_lock);
64 spin_unlock(&inode_wb_list_lock); 67 spin_unlock(&old->wb.list_lock);
68 spin_unlock(&dst->wb.list_lock);
65} 69}
66 70
67static sector_t max_block(struct block_device *bdev) 71sector_t blkdev_max_block(struct block_device *bdev)
68{ 72{
69 sector_t retval = ~((sector_t)0); 73 sector_t retval = ~((sector_t)0);
70 loff_t sz = i_size_read(bdev->bd_inode); 74 loff_t sz = i_size_read(bdev->bd_inode);
@@ -135,7 +139,7 @@ static int
135blkdev_get_block(struct inode *inode, sector_t iblock, 139blkdev_get_block(struct inode *inode, sector_t iblock,
136 struct buffer_head *bh, int create) 140 struct buffer_head *bh, int create)
137{ 141{
138 if (iblock >= max_block(I_BDEV(inode))) { 142 if (iblock >= blkdev_max_block(I_BDEV(inode))) {
139 if (create) 143 if (create)
140 return -EIO; 144 return -EIO;
141 145
@@ -157,7 +161,7 @@ static int
157blkdev_get_blocks(struct inode *inode, sector_t iblock, 161blkdev_get_blocks(struct inode *inode, sector_t iblock,
158 struct buffer_head *bh, int create) 162 struct buffer_head *bh, int create)
159{ 163{
160 sector_t end_block = max_block(I_BDEV(inode)); 164 sector_t end_block = blkdev_max_block(I_BDEV(inode));
161 unsigned long max_blocks = bh->b_size >> inode->i_blkbits; 165 unsigned long max_blocks = bh->b_size >> inode->i_blkbits;
162 166
163 if ((iblock + max_blocks) > end_block) { 167 if ((iblock + max_blocks) > end_block) {
@@ -1075,6 +1079,7 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
1075static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) 1079static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1076{ 1080{
1077 struct gendisk *disk; 1081 struct gendisk *disk;
1082 struct module *owner;
1078 int ret; 1083 int ret;
1079 int partno; 1084 int partno;
1080 int perm = 0; 1085 int perm = 0;
@@ -1100,6 +1105,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1100 disk = get_gendisk(bdev->bd_dev, &partno); 1105 disk = get_gendisk(bdev->bd_dev, &partno);
1101 if (!disk) 1106 if (!disk)
1102 goto out; 1107 goto out;
1108 owner = disk->fops->owner;
1103 1109
1104 disk_block_events(disk); 1110 disk_block_events(disk);
1105 mutex_lock_nested(&bdev->bd_mutex, for_part); 1111 mutex_lock_nested(&bdev->bd_mutex, for_part);
@@ -1127,8 +1133,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1127 bdev->bd_disk = NULL; 1133 bdev->bd_disk = NULL;
1128 mutex_unlock(&bdev->bd_mutex); 1134 mutex_unlock(&bdev->bd_mutex);
1129 disk_unblock_events(disk); 1135 disk_unblock_events(disk);
1130 module_put(disk->fops->owner);
1131 put_disk(disk); 1136 put_disk(disk);
1137 module_put(owner);
1132 goto restart; 1138 goto restart;
1133 } 1139 }
1134 } 1140 }
@@ -1147,8 +1153,12 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1147 * The latter is necessary to prevent ghost 1153 * The latter is necessary to prevent ghost
1148 * partitions on a removed medium. 1154 * partitions on a removed medium.
1149 */ 1155 */
1150 if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM)) 1156 if (bdev->bd_invalidated) {
1151 rescan_partitions(disk, bdev); 1157 if (!ret)
1158 rescan_partitions(disk, bdev);
1159 else if (ret == -ENOMEDIUM)
1160 invalidate_partitions(disk, bdev);
1161 }
1152 if (ret) 1162 if (ret)
1153 goto out_clear; 1163 goto out_clear;
1154 } else { 1164 } else {
@@ -1178,14 +1188,18 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1178 if (bdev->bd_disk->fops->open) 1188 if (bdev->bd_disk->fops->open)
1179 ret = bdev->bd_disk->fops->open(bdev, mode); 1189 ret = bdev->bd_disk->fops->open(bdev, mode);
1180 /* the same as first opener case, read comment there */ 1190 /* the same as first opener case, read comment there */
1181 if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM)) 1191 if (bdev->bd_invalidated) {
1182 rescan_partitions(bdev->bd_disk, bdev); 1192 if (!ret)
1193 rescan_partitions(bdev->bd_disk, bdev);
1194 else if (ret == -ENOMEDIUM)
1195 invalidate_partitions(bdev->bd_disk, bdev);
1196 }
1183 if (ret) 1197 if (ret)
1184 goto out_unlock_bdev; 1198 goto out_unlock_bdev;
1185 } 1199 }
1186 /* only one opener holds refs to the module and disk */ 1200 /* only one opener holds refs to the module and disk */
1187 module_put(disk->fops->owner);
1188 put_disk(disk); 1201 put_disk(disk);
1202 module_put(owner);
1189 } 1203 }
1190 bdev->bd_openers++; 1204 bdev->bd_openers++;
1191 if (for_part) 1205 if (for_part)
@@ -1205,8 +1219,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1205 out_unlock_bdev: 1219 out_unlock_bdev:
1206 mutex_unlock(&bdev->bd_mutex); 1220 mutex_unlock(&bdev->bd_mutex);
1207 disk_unblock_events(disk); 1221 disk_unblock_events(disk);
1208 module_put(disk->fops->owner);
1209 put_disk(disk); 1222 put_disk(disk);
1223 module_put(owner);
1210 out: 1224 out:
1211 bdput(bdev); 1225 bdput(bdev);
1212 1226
@@ -1427,8 +1441,6 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
1427 if (!bdev->bd_openers) { 1441 if (!bdev->bd_openers) {
1428 struct module *owner = disk->fops->owner; 1442 struct module *owner = disk->fops->owner;
1429 1443
1430 put_disk(disk);
1431 module_put(owner);
1432 disk_put_part(bdev->bd_part); 1444 disk_put_part(bdev->bd_part);
1433 bdev->bd_part = NULL; 1445 bdev->bd_part = NULL;
1434 bdev->bd_disk = NULL; 1446 bdev->bd_disk = NULL;
@@ -1437,6 +1449,9 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
1437 if (bdev != bdev->bd_contains) 1449 if (bdev != bdev->bd_contains)
1438 victim = bdev->bd_contains; 1450 victim = bdev->bd_contains;
1439 bdev->bd_contains = NULL; 1451 bdev->bd_contains = NULL;
1452
1453 put_disk(disk);
1454 module_put(owner);
1440 } 1455 }
1441 mutex_unlock(&bdev->bd_mutex); 1456 mutex_unlock(&bdev->bd_mutex);
1442 bdput(bdev); 1457 bdput(bdev);
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 7ec14097fef..8006a28390f 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -212,10 +212,17 @@ static noinline int run_ordered_completions(struct btrfs_workers *workers,
212 212
213 work->ordered_func(work); 213 work->ordered_func(work);
214 214
215 /* now take the lock again and call the freeing code */ 215 /* now take the lock again and drop our item from the list */
216 spin_lock(&workers->order_lock); 216 spin_lock(&workers->order_lock);
217 list_del(&work->order_list); 217 list_del(&work->order_list);
218 spin_unlock(&workers->order_lock);
219
220 /*
221 * we don't want to call the ordered free functions
222 * with the lock held though
223 */
218 work->ordered_free(work); 224 work->ordered_free(work);
225 spin_lock(&workers->order_lock);
219 } 226 }
220 227
221 spin_unlock(&workers->order_lock); 228 spin_unlock(&workers->order_lock);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 3b859a3e6a0..66179bcb16f 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1972,7 +1972,7 @@ BTRFS_SETGET_STACK_FUNCS(root_last_snapshot, struct btrfs_root_item,
1972 1972
1973static inline bool btrfs_root_readonly(struct btrfs_root *root) 1973static inline bool btrfs_root_readonly(struct btrfs_root *root)
1974{ 1974{
1975 return root->root_item.flags & BTRFS_ROOT_SUBVOL_RDONLY; 1975 return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_RDONLY)) != 0;
1976} 1976}
1977 1977
1978/* struct btrfs_super_block */ 1978/* struct btrfs_super_block */
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 1ac8db5dc0a..57106a99b52 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -801,7 +801,8 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
801 801
802#ifdef CONFIG_MIGRATION 802#ifdef CONFIG_MIGRATION
803static int btree_migratepage(struct address_space *mapping, 803static int btree_migratepage(struct address_space *mapping,
804 struct page *newpage, struct page *page) 804 struct page *newpage, struct page *page,
805 enum migrate_mode mode)
805{ 806{
806 /* 807 /*
807 * we can't safely write a btree page from here, 808 * we can't safely write a btree page from here,
@@ -816,7 +817,7 @@ static int btree_migratepage(struct address_space *mapping,
816 if (page_has_private(page) && 817 if (page_has_private(page) &&
817 !try_to_release_page(page, GFP_KERNEL)) 818 !try_to_release_page(page, GFP_KERNEL))
818 return -EAGAIN; 819 return -EAGAIN;
819 return migrate_page(mapping, newpage, page); 820 return migrate_page(mapping, newpage, page, mode);
820} 821}
821#endif 822#endif
822 823
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 71cd456fdb6..f88d5363114 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1784,6 +1784,9 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
1784 1784
1785 1785
1786 for (i = 0; i < multi->num_stripes; i++, stripe++) { 1786 for (i = 0; i < multi->num_stripes; i++, stripe++) {
1787 if (!stripe->dev->can_discard)
1788 continue;
1789
1787 ret = btrfs_issue_discard(stripe->dev->bdev, 1790 ret = btrfs_issue_discard(stripe->dev->bdev,
1788 stripe->physical, 1791 stripe->physical,
1789 stripe->length); 1792 stripe->length);
@@ -1791,11 +1794,16 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
1791 discarded_bytes += stripe->length; 1794 discarded_bytes += stripe->length;
1792 else if (ret != -EOPNOTSUPP) 1795 else if (ret != -EOPNOTSUPP)
1793 break; 1796 break;
1797
1798 /*
1799 * Just in case we get back EOPNOTSUPP for some reason,
1800 * just ignore the return value so we don't screw up
1801 * people calling discard_extent.
1802 */
1803 ret = 0;
1794 } 1804 }
1795 kfree(multi); 1805 kfree(multi);
1796 } 1806 }
1797 if (discarded_bytes && ret == -EOPNOTSUPP)
1798 ret = 0;
1799 1807
1800 if (actual_bytes) 1808 if (actual_bytes)
1801 *actual_bytes = discarded_bytes; 1809 *actual_bytes = discarded_bytes;
@@ -3321,7 +3329,8 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3321 smp_mb(); 3329 smp_mb();
3322 nr_pages = min_t(unsigned long, nr_pages, 3330 nr_pages = min_t(unsigned long, nr_pages,
3323 root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT); 3331 root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT);
3324 writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages); 3332 writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages,
3333 WB_REASON_FS_FREE_SPACE);
3325 3334
3326 spin_lock(&space_info->lock); 3335 spin_lock(&space_info->lock);
3327 if (reserved > space_info->bytes_reserved) 3336 if (reserved > space_info->bytes_reserved)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 3601f0aebdd..d42e6bfdd3a 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4124,7 +4124,8 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4124 4124
4125 /* special case for "." */ 4125 /* special case for "." */
4126 if (filp->f_pos == 0) { 4126 if (filp->f_pos == 0) {
4127 over = filldir(dirent, ".", 1, 1, btrfs_ino(inode), DT_DIR); 4127 over = filldir(dirent, ".", 1,
4128 filp->f_pos, btrfs_ino(inode), DT_DIR);
4128 if (over) 4129 if (over)
4129 return 0; 4130 return 0;
4130 filp->f_pos = 1; 4131 filp->f_pos = 1;
@@ -4133,7 +4134,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4133 if (filp->f_pos == 1) { 4134 if (filp->f_pos == 1) {
4134 u64 pino = parent_ino(filp->f_path.dentry); 4135 u64 pino = parent_ino(filp->f_path.dentry);
4135 over = filldir(dirent, "..", 2, 4136 over = filldir(dirent, "..", 2,
4136 2, pino, DT_DIR); 4137 filp->f_pos, pino, DT_DIR);
4137 if (over) 4138 if (over)
4138 return 0; 4139 return 0;
4139 filp->f_pos = 2; 4140 filp->f_pos = 2;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 4ce8a9f41d1..faf7d0bc0a0 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -691,6 +691,8 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
691 kfree(name); 691 kfree(name);
692 692
693 iput(inode); 693 iput(inode);
694
695 btrfs_run_delayed_items(trans, root);
694 return ret; 696 return ret;
695} 697}
696 698
@@ -799,14 +801,15 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
799 struct extent_buffer *eb, int slot, 801 struct extent_buffer *eb, int slot,
800 struct btrfs_key *key) 802 struct btrfs_key *key)
801{ 803{
802 struct inode *dir;
803 int ret;
804 struct btrfs_inode_ref *ref; 804 struct btrfs_inode_ref *ref;
805 struct btrfs_dir_item *di;
806 struct inode *dir;
805 struct inode *inode; 807 struct inode *inode;
806 char *name;
807 int namelen;
808 unsigned long ref_ptr; 808 unsigned long ref_ptr;
809 unsigned long ref_end; 809 unsigned long ref_end;
810 char *name;
811 int namelen;
812 int ret;
810 int search_done = 0; 813 int search_done = 0;
811 814
812 /* 815 /*
@@ -895,6 +898,7 @@ again:
895 ret = btrfs_unlink_inode(trans, root, dir, 898 ret = btrfs_unlink_inode(trans, root, dir,
896 inode, victim_name, 899 inode, victim_name,
897 victim_name_len); 900 victim_name_len);
901 btrfs_run_delayed_items(trans, root);
898 } 902 }
899 kfree(victim_name); 903 kfree(victim_name);
900 ptr = (unsigned long)(victim_ref + 1) + victim_name_len; 904 ptr = (unsigned long)(victim_ref + 1) + victim_name_len;
@@ -909,6 +913,25 @@ again:
909 } 913 }
910 btrfs_release_path(path); 914 btrfs_release_path(path);
911 915
916 /* look for a conflicting sequence number */
917 di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir),
918 btrfs_inode_ref_index(eb, ref),
919 name, namelen, 0);
920 if (di && !IS_ERR(di)) {
921 ret = drop_one_dir_item(trans, root, path, dir, di);
922 BUG_ON(ret);
923 }
924 btrfs_release_path(path);
925
926 /* look for a conflicing name */
927 di = btrfs_lookup_dir_item(trans, root, path, btrfs_ino(dir),
928 name, namelen, 0);
929 if (di && !IS_ERR(di)) {
930 ret = drop_one_dir_item(trans, root, path, dir, di);
931 BUG_ON(ret);
932 }
933 btrfs_release_path(path);
934
912insert: 935insert:
913 /* insert our name */ 936 /* insert our name */
914 ret = btrfs_add_link(trans, dir, inode, name, namelen, 0, 937 ret = btrfs_add_link(trans, dir, inode, name, namelen, 0,
@@ -1456,6 +1479,9 @@ again:
1456 ret = btrfs_unlink_inode(trans, root, dir, inode, 1479 ret = btrfs_unlink_inode(trans, root, dir, inode,
1457 name, name_len); 1480 name, name_len);
1458 BUG_ON(ret); 1481 BUG_ON(ret);
1482
1483 btrfs_run_delayed_items(trans, root);
1484
1459 kfree(name); 1485 kfree(name);
1460 iput(inode); 1486 iput(inode);
1461 1487
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 19450bc5363..43baaf0c674 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -500,6 +500,9 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
500 fs_devices->rw_devices--; 500 fs_devices->rw_devices--;
501 } 501 }
502 502
503 if (device->can_discard)
504 fs_devices->num_can_discard--;
505
503 new_device = kmalloc(sizeof(*new_device), GFP_NOFS); 506 new_device = kmalloc(sizeof(*new_device), GFP_NOFS);
504 BUG_ON(!new_device); 507 BUG_ON(!new_device);
505 memcpy(new_device, device, sizeof(*new_device)); 508 memcpy(new_device, device, sizeof(*new_device));
@@ -508,6 +511,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
508 new_device->bdev = NULL; 511 new_device->bdev = NULL;
509 new_device->writeable = 0; 512 new_device->writeable = 0;
510 new_device->in_fs_metadata = 0; 513 new_device->in_fs_metadata = 0;
514 new_device->can_discard = 0;
511 list_replace_rcu(&device->dev_list, &new_device->dev_list); 515 list_replace_rcu(&device->dev_list, &new_device->dev_list);
512 516
513 call_rcu(&device->rcu, free_device); 517 call_rcu(&device->rcu, free_device);
@@ -547,6 +551,7 @@ int btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
547static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, 551static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
548 fmode_t flags, void *holder) 552 fmode_t flags, void *holder)
549{ 553{
554 struct request_queue *q;
550 struct block_device *bdev; 555 struct block_device *bdev;
551 struct list_head *head = &fs_devices->devices; 556 struct list_head *head = &fs_devices->devices;
552 struct btrfs_device *device; 557 struct btrfs_device *device;
@@ -603,6 +608,12 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
603 seeding = 0; 608 seeding = 0;
604 } 609 }
605 610
611 q = bdev_get_queue(bdev);
612 if (blk_queue_discard(q)) {
613 device->can_discard = 1;
614 fs_devices->num_can_discard++;
615 }
616
606 device->bdev = bdev; 617 device->bdev = bdev;
607 device->in_fs_metadata = 0; 618 device->in_fs_metadata = 0;
608 device->mode = flags; 619 device->mode = flags;
@@ -1542,6 +1553,7 @@ error:
1542 1553
1543int btrfs_init_new_device(struct btrfs_root *root, char *device_path) 1554int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1544{ 1555{
1556 struct request_queue *q;
1545 struct btrfs_trans_handle *trans; 1557 struct btrfs_trans_handle *trans;
1546 struct btrfs_device *device; 1558 struct btrfs_device *device;
1547 struct block_device *bdev; 1559 struct block_device *bdev;
@@ -1611,6 +1623,9 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1611 1623
1612 lock_chunks(root); 1624 lock_chunks(root);
1613 1625
1626 q = bdev_get_queue(bdev);
1627 if (blk_queue_discard(q))
1628 device->can_discard = 1;
1614 device->writeable = 1; 1629 device->writeable = 1;
1615 device->work.func = pending_bios_fn; 1630 device->work.func = pending_bios_fn;
1616 generate_random_uuid(device->uuid); 1631 generate_random_uuid(device->uuid);
@@ -1646,6 +1661,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1646 root->fs_info->fs_devices->num_devices++; 1661 root->fs_info->fs_devices->num_devices++;
1647 root->fs_info->fs_devices->open_devices++; 1662 root->fs_info->fs_devices->open_devices++;
1648 root->fs_info->fs_devices->rw_devices++; 1663 root->fs_info->fs_devices->rw_devices++;
1664 if (device->can_discard)
1665 root->fs_info->fs_devices->num_can_discard++;
1649 root->fs_info->fs_devices->total_rw_bytes += device->total_bytes; 1666 root->fs_info->fs_devices->total_rw_bytes += device->total_bytes;
1650 1667
1651 if (!blk_queue_nonrot(bdev_get_queue(bdev))) 1668 if (!blk_queue_nonrot(bdev_get_queue(bdev)))
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 7c12d61ae7a..6d866db4e17 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -48,6 +48,7 @@ struct btrfs_device {
48 int writeable; 48 int writeable;
49 int in_fs_metadata; 49 int in_fs_metadata;
50 int missing; 50 int missing;
51 int can_discard;
51 52
52 spinlock_t io_lock; 53 spinlock_t io_lock;
53 54
@@ -104,6 +105,7 @@ struct btrfs_fs_devices {
104 u64 rw_devices; 105 u64 rw_devices;
105 u64 missing_devices; 106 u64 missing_devices;
106 u64 total_rw_bytes; 107 u64 total_rw_bytes;
108 u64 num_can_discard;
107 struct block_device *latest_bdev; 109 struct block_device *latest_bdev;
108 110
109 /* all of the devices in the FS, protected by a mutex 111 /* all of the devices in the FS, protected by a mutex
diff --git a/fs/buffer.c b/fs/buffer.c
index 1a80b048ade..d7586936b25 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -285,7 +285,7 @@ static void free_more_memory(void)
285 struct zone *zone; 285 struct zone *zone;
286 int nid; 286 int nid;
287 287
288 wakeup_flusher_threads(1024); 288 wakeup_flusher_threads(1024, WB_REASON_FREE_MORE_MEM);
289 yield(); 289 yield();
290 290
291 for_each_online_node(nid) { 291 for_each_online_node(nid) {
@@ -968,6 +968,7 @@ init_page_buffers(struct page *page, struct block_device *bdev,
968 struct buffer_head *head = page_buffers(page); 968 struct buffer_head *head = page_buffers(page);
969 struct buffer_head *bh = head; 969 struct buffer_head *bh = head;
970 int uptodate = PageUptodate(page); 970 int uptodate = PageUptodate(page);
971 sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode));
971 972
972 do { 973 do {
973 if (!buffer_mapped(bh)) { 974 if (!buffer_mapped(bh)) {
@@ -976,7 +977,8 @@ init_page_buffers(struct page *page, struct block_device *bdev,
976 bh->b_blocknr = block; 977 bh->b_blocknr = block;
977 if (uptodate) 978 if (uptodate)
978 set_buffer_uptodate(bh); 979 set_buffer_uptodate(bh);
979 set_buffer_mapped(bh); 980 if (block < end_block)
981 set_buffer_mapped(bh);
980 } 982 }
981 block++; 983 block++;
982 bh = bh->b_this_page; 984 bh = bh->b_this_page;
@@ -1082,6 +1084,9 @@ grow_buffers(struct block_device *bdev, sector_t block, int size)
1082static struct buffer_head * 1084static struct buffer_head *
1083__getblk_slow(struct block_device *bdev, sector_t block, int size) 1085__getblk_slow(struct block_device *bdev, sector_t block, int size)
1084{ 1086{
1087 int ret;
1088 struct buffer_head *bh;
1089
1085 /* Size must be multiple of hard sectorsize */ 1090 /* Size must be multiple of hard sectorsize */
1086 if (unlikely(size & (bdev_logical_block_size(bdev)-1) || 1091 if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
1087 (size < 512 || size > PAGE_SIZE))) { 1092 (size < 512 || size > PAGE_SIZE))) {
@@ -1094,20 +1099,21 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size)
1094 return NULL; 1099 return NULL;
1095 } 1100 }
1096 1101
1097 for (;;) { 1102retry:
1098 struct buffer_head * bh; 1103 bh = __find_get_block(bdev, block, size);
1099 int ret; 1104 if (bh)
1105 return bh;
1100 1106
1107 ret = grow_buffers(bdev, block, size);
1108 if (ret == 0) {
1109 free_more_memory();
1110 goto retry;
1111 } else if (ret > 0) {
1101 bh = __find_get_block(bdev, block, size); 1112 bh = __find_get_block(bdev, block, size);
1102 if (bh) 1113 if (bh)
1103 return bh; 1114 return bh;
1104
1105 ret = grow_buffers(bdev, block, size);
1106 if (ret < 0)
1107 return NULL;
1108 if (ret == 0)
1109 free_more_memory();
1110 } 1115 }
1116 return NULL;
1111} 1117}
1112 1118
1113/* 1119/*
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index bc4b12ca537..53e7d72177d 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -566,6 +566,12 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb)
566 struct inode *dir = dentry->d_inode; 566 struct inode *dir = dentry->d_inode;
567 struct dentry *child; 567 struct dentry *child;
568 568
569 if (!dir) {
570 dput(dentry);
571 dentry = ERR_PTR(-ENOENT);
572 break;
573 }
574
569 /* skip separators */ 575 /* skip separators */
570 while (*s == sep) 576 while (*s == sep)
571 s++; 577 s++;
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 6255fa812c7..7cb9dd22531 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -43,6 +43,7 @@
43 43
44#define CIFS_MIN_RCV_POOL 4 44#define CIFS_MIN_RCV_POOL 4
45 45
46#define MAX_REOPEN_ATT 5 /* these many maximum attempts to reopen a file */
46/* 47/*
47 * default attribute cache timeout (jiffies) 48 * default attribute cache timeout (jiffies)
48 */ 49 */
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 1a9fe7f816d..07132c4e99f 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -4079,7 +4079,8 @@ int CIFSFindNext(const int xid, struct cifs_tcon *tcon,
4079 T2_FNEXT_RSP_PARMS *parms; 4079 T2_FNEXT_RSP_PARMS *parms;
4080 char *response_data; 4080 char *response_data;
4081 int rc = 0; 4081 int rc = 0;
4082 int bytes_returned, name_len; 4082 int bytes_returned;
4083 unsigned int name_len;
4083 __u16 params, byte_count; 4084 __u16 params, byte_count;
4084 4085
4085 cFYI(1, "In FindNext"); 4086 cFYI(1, "In FindNext");
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index ccc1afa0bf3..b7758094770 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1258,7 +1258,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
1258 /* ignore */ 1258 /* ignore */
1259 } else if (strnicmp(data, "guest", 5) == 0) { 1259 } else if (strnicmp(data, "guest", 5) == 0) {
1260 /* ignore */ 1260 /* ignore */
1261 } else if (strnicmp(data, "rw", 2) == 0) { 1261 } else if (strnicmp(data, "rw", 2) == 0 && strlen(data) == 2) {
1262 /* ignore */ 1262 /* ignore */
1263 } else if (strnicmp(data, "ro", 2) == 0) { 1263 } else if (strnicmp(data, "ro", 2) == 0) {
1264 /* ignore */ 1264 /* ignore */
@@ -1361,7 +1361,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
1361 vol->server_ino = 1; 1361 vol->server_ino = 1;
1362 } else if (strnicmp(data, "noserverino", 9) == 0) { 1362 } else if (strnicmp(data, "noserverino", 9) == 0) {
1363 vol->server_ino = 0; 1363 vol->server_ino = 0;
1364 } else if (strnicmp(data, "rwpidforward", 4) == 0) { 1364 } else if (strnicmp(data, "rwpidforward", 12) == 0) {
1365 vol->rwpidforward = 1; 1365 vol->rwpidforward = 1;
1366 } else if (strnicmp(data, "cifsacl", 7) == 0) { 1366 } else if (strnicmp(data, "cifsacl", 7) == 0) {
1367 vol->cifs_acl = 1; 1367 vol->cifs_acl = 1;
@@ -2767,10 +2767,10 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
2767 2767
2768/* 2768/*
2769 * When the server doesn't allow large posix writes, only allow a wsize of 2769 * When the server doesn't allow large posix writes, only allow a wsize of
2770 * 128k minus the size of the WRITE_AND_X header. That allows for a write up 2770 * 2^17-1 minus the size of the WRITE_AND_X header. That allows for a write up
2771 * to the maximum size described by RFC1002. 2771 * to the maximum size described by RFC1002.
2772 */ 2772 */
2773#define CIFS_MAX_RFC1002_WSIZE (128 * 1024 - sizeof(WRITE_REQ) + 4) 2773#define CIFS_MAX_RFC1002_WSIZE ((1<<17) - 1 - sizeof(WRITE_REQ) + 4)
2774 2774
2775/* 2775/*
2776 * The default wsize is 1M. find_get_pages seems to return a maximum of 256 2776 * The default wsize is 1M. find_get_pages seems to return a maximum of 256
@@ -2838,7 +2838,8 @@ cleanup_volume_info_contents(struct smb_vol *volume_info)
2838 kfree(volume_info->username); 2838 kfree(volume_info->username);
2839 kzfree(volume_info->password); 2839 kzfree(volume_info->password);
2840 kfree(volume_info->UNC); 2840 kfree(volume_info->UNC);
2841 kfree(volume_info->UNCip); 2841 if (volume_info->UNCip != volume_info->UNC + 2)
2842 kfree(volume_info->UNCip);
2842 kfree(volume_info->domainname); 2843 kfree(volume_info->domainname);
2843 kfree(volume_info->iocharset); 2844 kfree(volume_info->iocharset);
2844 kfree(volume_info->prepath); 2845 kfree(volume_info->prepath);
@@ -3003,7 +3004,7 @@ cifs_get_volume_info(char *mount_data, const char *devname)
3003int 3004int
3004cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info) 3005cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info)
3005{ 3006{
3006 int rc = 0; 3007 int rc;
3007 int xid; 3008 int xid;
3008 struct cifs_ses *pSesInfo; 3009 struct cifs_ses *pSesInfo;
3009 struct cifs_tcon *tcon; 3010 struct cifs_tcon *tcon;
@@ -3032,6 +3033,7 @@ try_mount_again:
3032 FreeXid(xid); 3033 FreeXid(xid);
3033 } 3034 }
3034#endif 3035#endif
3036 rc = 0;
3035 tcon = NULL; 3037 tcon = NULL;
3036 pSesInfo = NULL; 3038 pSesInfo = NULL;
3037 srvTcp = NULL; 3039 srvTcp = NULL;
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index fa8c21d913b..ed5c07b0cdb 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -110,8 +110,8 @@ cifs_bp_rename_retry:
110 } 110 }
111 rcu_read_unlock(); 111 rcu_read_unlock();
112 if (namelen != dfsplen || read_seqretry(&rename_lock, seq)) { 112 if (namelen != dfsplen || read_seqretry(&rename_lock, seq)) {
113 cERROR(1, "did not end path lookup where expected namelen is %d", 113 cFYI(1, "did not end path lookup where expected. namelen=%d "
114 namelen); 114 "dfsplen=%d", namelen, dfsplen);
115 /* presumably this is only possible if racing with a rename 115 /* presumably this is only possible if racing with a rename
116 of one of the parent directories (we can not lock the dentries 116 of one of the parent directories (we can not lock the dentries
117 above us to prevent this, but retrying should be harmless) */ 117 above us to prevent this, but retrying should be harmless) */
@@ -583,10 +583,26 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
583 * If either that or op not supported returned, follow 583 * If either that or op not supported returned, follow
584 * the normal lookup. 584 * the normal lookup.
585 */ 585 */
586 if ((rc == 0) || (rc == -ENOENT)) 586 switch (rc) {
587 case 0:
588 /*
589 * The server may allow us to open things like
590 * FIFOs, but the client isn't set up to deal
591 * with that. If it's not a regular file, just
592 * close it and proceed as if it were a normal
593 * lookup.
594 */
595 if (newInode && !S_ISREG(newInode->i_mode)) {
596 CIFSSMBClose(xid, pTcon, fileHandle);
597 break;
598 }
599 case -ENOENT:
587 posix_open = true; 600 posix_open = true;
588 else if ((rc == -EINVAL) || (rc != -EOPNOTSUPP)) 601 case -EOPNOTSUPP:
602 break;
603 default:
589 pTcon->broken_posix_open = true; 604 pTcon->broken_posix_open = true;
605 }
590 } 606 }
591 if (!posix_open) 607 if (!posix_open)
592 rc = cifs_get_inode_info_unix(&newInode, full_path, 608 rc = cifs_get_inode_info_unix(&newInode, full_path,
@@ -641,7 +657,7 @@ lookup_out:
641static int 657static int
642cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd) 658cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd)
643{ 659{
644 if (nd->flags & LOOKUP_RCU) 660 if (nd && (nd->flags & LOOKUP_RCU))
645 return -ECHILD; 661 return -ECHILD;
646 662
647 if (direntry->d_inode) { 663 if (direntry->d_inode) {
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index a9b4a24f2a1..9040cb0695c 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -973,10 +973,11 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
973struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode, 973struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
974 bool fsuid_only) 974 bool fsuid_only)
975{ 975{
976 struct cifsFileInfo *open_file; 976 struct cifsFileInfo *open_file, *inv_file = NULL;
977 struct cifs_sb_info *cifs_sb; 977 struct cifs_sb_info *cifs_sb;
978 bool any_available = false; 978 bool any_available = false;
979 int rc; 979 int rc;
980 unsigned int refind = 0;
980 981
981 /* Having a null inode here (because mapping->host was set to zero by 982 /* Having a null inode here (because mapping->host was set to zero by
982 the VFS or MM) should not happen but we had reports of on oops (due to 983 the VFS or MM) should not happen but we had reports of on oops (due to
@@ -996,40 +997,25 @@ struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
996 997
997 spin_lock(&cifs_file_list_lock); 998 spin_lock(&cifs_file_list_lock);
998refind_writable: 999refind_writable:
1000 if (refind > MAX_REOPEN_ATT) {
1001 spin_unlock(&cifs_file_list_lock);
1002 return NULL;
1003 }
999 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { 1004 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1000 if (!any_available && open_file->pid != current->tgid) 1005 if (!any_available && open_file->pid != current->tgid)
1001 continue; 1006 continue;
1002 if (fsuid_only && open_file->uid != current_fsuid()) 1007 if (fsuid_only && open_file->uid != current_fsuid())
1003 continue; 1008 continue;
1004 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) { 1009 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1005 cifsFileInfo_get(open_file);
1006
1007 if (!open_file->invalidHandle) { 1010 if (!open_file->invalidHandle) {
1008 /* found a good writable file */ 1011 /* found a good writable file */
1012 cifsFileInfo_get(open_file);
1009 spin_unlock(&cifs_file_list_lock); 1013 spin_unlock(&cifs_file_list_lock);
1010 return open_file; 1014 return open_file;
1015 } else {
1016 if (!inv_file)
1017 inv_file = open_file;
1011 } 1018 }
1012
1013 spin_unlock(&cifs_file_list_lock);
1014
1015 /* Had to unlock since following call can block */
1016 rc = cifs_reopen_file(open_file, false);
1017 if (!rc)
1018 return open_file;
1019
1020 /* if it fails, try another handle if possible */
1021 cFYI(1, "wp failed on reopen file");
1022 cifsFileInfo_put(open_file);
1023
1024 spin_lock(&cifs_file_list_lock);
1025
1026 /* else we simply continue to the next entry. Thus
1027 we do not loop on reopen errors. If we
1028 can not reopen the file, for example if we
1029 reconnected to a server with another client
1030 racing to delete or lock the file we would not
1031 make progress if we restarted before the beginning
1032 of the loop here. */
1033 } 1019 }
1034 } 1020 }
1035 /* couldn't find useable FH with same pid, try any available */ 1021 /* couldn't find useable FH with same pid, try any available */
@@ -1037,7 +1023,30 @@ refind_writable:
1037 any_available = true; 1023 any_available = true;
1038 goto refind_writable; 1024 goto refind_writable;
1039 } 1025 }
1026
1027 if (inv_file) {
1028 any_available = false;
1029 cifsFileInfo_get(inv_file);
1030 }
1031
1040 spin_unlock(&cifs_file_list_lock); 1032 spin_unlock(&cifs_file_list_lock);
1033
1034 if (inv_file) {
1035 rc = cifs_reopen_file(inv_file, false);
1036 if (!rc)
1037 return inv_file;
1038 else {
1039 spin_lock(&cifs_file_list_lock);
1040 list_move_tail(&inv_file->flist,
1041 &cifs_inode->openFileList);
1042 spin_unlock(&cifs_file_list_lock);
1043 cifsFileInfo_put(inv_file);
1044 spin_lock(&cifs_file_list_lock);
1045 ++refind;
1046 goto refind_writable;
1047 }
1048 }
1049
1041 return NULL; 1050 return NULL;
1042} 1051}
1043 1052
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 9b018c8334f..745e5cdca8f 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -562,7 +562,16 @@ int cifs_get_file_info(struct file *filp)
562 562
563 xid = GetXid(); 563 xid = GetXid();
564 rc = CIFSSMBQFileInfo(xid, tcon, cfile->netfid, &find_data); 564 rc = CIFSSMBQFileInfo(xid, tcon, cfile->netfid, &find_data);
565 if (rc == -EOPNOTSUPP || rc == -EINVAL) { 565 switch (rc) {
566 case 0:
567 cifs_all_info_to_fattr(&fattr, &find_data, cifs_sb, false);
568 break;
569 case -EREMOTE:
570 cifs_create_dfs_fattr(&fattr, inode->i_sb);
571 rc = 0;
572 break;
573 case -EOPNOTSUPP:
574 case -EINVAL:
566 /* 575 /*
567 * FIXME: legacy server -- fall back to path-based call? 576 * FIXME: legacy server -- fall back to path-based call?
568 * for now, just skip revalidating and mark inode for 577 * for now, just skip revalidating and mark inode for
@@ -570,18 +579,14 @@ int cifs_get_file_info(struct file *filp)
570 */ 579 */
571 rc = 0; 580 rc = 0;
572 CIFS_I(inode)->time = 0; 581 CIFS_I(inode)->time = 0;
582 default:
573 goto cgfi_exit; 583 goto cgfi_exit;
574 } else if (rc == -EREMOTE) { 584 }
575 cifs_create_dfs_fattr(&fattr, inode->i_sb);
576 rc = 0;
577 } else if (rc)
578 goto cgfi_exit;
579 585
580 /* 586 /*
581 * don't bother with SFU junk here -- just mark inode as needing 587 * don't bother with SFU junk here -- just mark inode as needing
582 * revalidation. 588 * revalidation.
583 */ 589 */
584 cifs_all_info_to_fattr(&fattr, &find_data, cifs_sb, false);
585 fattr.cf_uniqueid = CIFS_I(inode)->uniqueid; 590 fattr.cf_uniqueid = CIFS_I(inode)->uniqueid;
586 fattr.cf_flags |= CIFS_FATTR_NEED_REVAL; 591 fattr.cf_flags |= CIFS_FATTR_NEED_REVAL;
587 cifs_fattr_to_inode(inode, &fattr); 592 cifs_fattr_to_inode(inode, &fattr);
@@ -764,20 +769,10 @@ char *cifs_build_path_to_root(struct smb_vol *vol, struct cifs_sb_info *cifs_sb,
764 if (full_path == NULL) 769 if (full_path == NULL)
765 return full_path; 770 return full_path;
766 771
767 if (dfsplen) { 772 if (dfsplen)
768 strncpy(full_path, tcon->treeName, dfsplen); 773 strncpy(full_path, tcon->treeName, dfsplen);
769 /* switch slash direction in prepath depending on whether
770 * windows or posix style path names
771 */
772 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) {
773 int i;
774 for (i = 0; i < dfsplen; i++) {
775 if (full_path[i] == '\\')
776 full_path[i] = '/';
777 }
778 }
779 }
780 strncpy(full_path + dfsplen, vol->prepath, pplen); 774 strncpy(full_path + dfsplen, vol->prepath, pplen);
775 convert_delimiter(full_path, CIFS_DIR_SEP(cifs_sb));
781 full_path[dfsplen + pplen] = 0; /* add trailing null */ 776 full_path[dfsplen + pplen] = 0; /* add trailing null */
782 return full_path; 777 return full_path;
783} 778}
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 6751e745bbc..c71032ba5b7 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -85,9 +85,12 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name,
85 85
86 dentry = d_lookup(parent, name); 86 dentry = d_lookup(parent, name);
87 if (dentry) { 87 if (dentry) {
88 /* FIXME: check for inode number changes? */ 88 inode = dentry->d_inode;
89 if (dentry->d_inode != NULL) 89 /* update inode in place if i_ino didn't change */
90 if (inode && CIFS_I(inode)->uniqueid == fattr->cf_uniqueid) {
91 cifs_fattr_to_inode(inode, fattr);
90 return dentry; 92 return dentry;
93 }
91 d_drop(dentry); 94 d_drop(dentry);
92 dput(dentry); 95 dput(dentry);
93 } 96 }
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index d3e619692ee..0cfae19129b 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -244,16 +244,15 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifs_ses *ses,
244 /* copy user */ 244 /* copy user */
245 /* BB what about null user mounts - check that we do this BB */ 245 /* BB what about null user mounts - check that we do this BB */
246 /* copy user */ 246 /* copy user */
247 if (ses->user_name != NULL) 247 if (ses->user_name != NULL) {
248 strncpy(bcc_ptr, ses->user_name, MAX_USERNAME_SIZE); 248 strncpy(bcc_ptr, ses->user_name, MAX_USERNAME_SIZE);
249 bcc_ptr += strnlen(ses->user_name, MAX_USERNAME_SIZE);
250 }
249 /* else null user mount */ 251 /* else null user mount */
250
251 bcc_ptr += strnlen(ses->user_name, MAX_USERNAME_SIZE);
252 *bcc_ptr = 0; 252 *bcc_ptr = 0;
253 bcc_ptr++; /* account for null termination */ 253 bcc_ptr++; /* account for null termination */
254 254
255 /* copy domain */ 255 /* copy domain */
256
257 if (ses->domainName != NULL) { 256 if (ses->domainName != NULL) {
258 strncpy(bcc_ptr, ses->domainName, 256); 257 strncpy(bcc_ptr, ses->domainName, 256);
259 bcc_ptr += strnlen(ses->domainName, 256); 258 bcc_ptr += strnlen(ses->domainName, 256);
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 147aa22c3c3..c1b9c4b1073 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -362,6 +362,8 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov,
362 mid = AllocMidQEntry(hdr, server); 362 mid = AllocMidQEntry(hdr, server);
363 if (mid == NULL) { 363 if (mid == NULL) {
364 mutex_unlock(&server->srv_mutex); 364 mutex_unlock(&server->srv_mutex);
365 atomic_dec(&server->inFlight);
366 wake_up(&server->request_q);
365 return -ENOMEM; 367 return -ENOMEM;
366 } 368 }
367 369
diff --git a/fs/dcache.c b/fs/dcache.c
index fbdcbca4072..0b51cfc9291 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -241,6 +241,7 @@ static void dentry_lru_add(struct dentry *dentry)
241static void __dentry_lru_del(struct dentry *dentry) 241static void __dentry_lru_del(struct dentry *dentry)
242{ 242{
243 list_del_init(&dentry->d_lru); 243 list_del_init(&dentry->d_lru);
244 dentry->d_flags &= ~DCACHE_SHRINK_LIST;
244 dentry->d_sb->s_nr_dentry_unused--; 245 dentry->d_sb->s_nr_dentry_unused--;
245 dentry_stat.nr_unused--; 246 dentry_stat.nr_unused--;
246} 247}
@@ -753,6 +754,7 @@ relock:
753 spin_unlock(&dentry->d_lock); 754 spin_unlock(&dentry->d_lock);
754 } else { 755 } else {
755 list_move_tail(&dentry->d_lru, &tmp); 756 list_move_tail(&dentry->d_lru, &tmp);
757 dentry->d_flags |= DCACHE_SHRINK_LIST;
756 spin_unlock(&dentry->d_lock); 758 spin_unlock(&dentry->d_lock);
757 if (!--cnt) 759 if (!--cnt)
758 break; 760 break;
@@ -1144,14 +1146,18 @@ resume:
1144 /* 1146 /*
1145 * move only zero ref count dentries to the end 1147 * move only zero ref count dentries to the end
1146 * of the unused list for prune_dcache 1148 * of the unused list for prune_dcache
1149 *
1150 * Those which are presently on the shrink list, being processed
1151 * by shrink_dentry_list(), shouldn't be moved. Otherwise the
1152 * loop in shrink_dcache_parent() might not make any progress
1153 * and loop forever.
1147 */ 1154 */
1148 if (!dentry->d_count) { 1155 if (dentry->d_count) {
1156 dentry_lru_del(dentry);
1157 } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) {
1149 dentry_lru_move_tail(dentry); 1158 dentry_lru_move_tail(dentry);
1150 found++; 1159 found++;
1151 } else {
1152 dentry_lru_del(dentry);
1153 } 1160 }
1154
1155 /* 1161 /*
1156 * We can return to the caller if we have found some (this 1162 * We can return to the caller if we have found some (this
1157 * ensures forward progress). We'll be coming back to find 1163 * ensures forward progress). We'll be coming back to find
@@ -2427,6 +2433,7 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
2427 if (d_ancestor(alias, dentry)) { 2433 if (d_ancestor(alias, dentry)) {
2428 /* Check for loops */ 2434 /* Check for loops */
2429 actual = ERR_PTR(-ELOOP); 2435 actual = ERR_PTR(-ELOOP);
2436 spin_unlock(&inode->i_lock);
2430 } else if (IS_ROOT(alias)) { 2437 } else if (IS_ROOT(alias)) {
2431 /* Is this an anonymous mountpoint that we 2438 /* Is this an anonymous mountpoint that we
2432 * could splice into our tree? */ 2439 * could splice into our tree? */
@@ -2436,7 +2443,7 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
2436 goto found; 2443 goto found;
2437 } else { 2444 } else {
2438 /* Nope, but we must(!) avoid directory 2445 /* Nope, but we must(!) avoid directory
2439 * aliasing */ 2446 * aliasing. This drops inode->i_lock */
2440 actual = __d_unalias(inode, dentry, alias); 2447 actual = __d_unalias(inode, dentry, alias);
2441 } 2448 }
2442 write_sequnlock(&rename_lock); 2449 write_sequnlock(&rename_lock);
@@ -2487,16 +2494,14 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name)
2487/** 2494/**
2488 * prepend_path - Prepend path string to a buffer 2495 * prepend_path - Prepend path string to a buffer
2489 * @path: the dentry/vfsmount to report 2496 * @path: the dentry/vfsmount to report
2490 * @root: root vfsmnt/dentry (may be modified by this function) 2497 * @root: root vfsmnt/dentry
2491 * @buffer: pointer to the end of the buffer 2498 * @buffer: pointer to the end of the buffer
2492 * @buflen: pointer to buffer length 2499 * @buflen: pointer to buffer length
2493 * 2500 *
2494 * Caller holds the rename_lock. 2501 * Caller holds the rename_lock.
2495 *
2496 * If path is not reachable from the supplied root, then the value of
2497 * root is changed (without modifying refcounts).
2498 */ 2502 */
2499static int prepend_path(const struct path *path, struct path *root, 2503static int prepend_path(const struct path *path,
2504 const struct path *root,
2500 char **buffer, int *buflen) 2505 char **buffer, int *buflen)
2501{ 2506{
2502 struct dentry *dentry = path->dentry; 2507 struct dentry *dentry = path->dentry;
@@ -2531,10 +2536,10 @@ static int prepend_path(const struct path *path, struct path *root,
2531 dentry = parent; 2536 dentry = parent;
2532 } 2537 }
2533 2538
2534out:
2535 if (!error && !slash) 2539 if (!error && !slash)
2536 error = prepend(buffer, buflen, "/", 1); 2540 error = prepend(buffer, buflen, "/", 1);
2537 2541
2542out:
2538 br_read_unlock(vfsmount_lock); 2543 br_read_unlock(vfsmount_lock);
2539 return error; 2544 return error;
2540 2545
@@ -2548,15 +2553,17 @@ global_root:
2548 WARN(1, "Root dentry has weird name <%.*s>\n", 2553 WARN(1, "Root dentry has weird name <%.*s>\n",
2549 (int) dentry->d_name.len, dentry->d_name.name); 2554 (int) dentry->d_name.len, dentry->d_name.name);
2550 } 2555 }
2551 root->mnt = vfsmnt; 2556 if (!slash)
2552 root->dentry = dentry; 2557 error = prepend(buffer, buflen, "/", 1);
2558 if (!error)
2559 error = vfsmnt->mnt_ns ? 1 : 2;
2553 goto out; 2560 goto out;
2554} 2561}
2555 2562
2556/** 2563/**
2557 * __d_path - return the path of a dentry 2564 * __d_path - return the path of a dentry
2558 * @path: the dentry/vfsmount to report 2565 * @path: the dentry/vfsmount to report
2559 * @root: root vfsmnt/dentry (may be modified by this function) 2566 * @root: root vfsmnt/dentry
2560 * @buf: buffer to return value in 2567 * @buf: buffer to return value in
2561 * @buflen: buffer length 2568 * @buflen: buffer length
2562 * 2569 *
@@ -2567,10 +2574,10 @@ global_root:
2567 * 2574 *
2568 * "buflen" should be positive. 2575 * "buflen" should be positive.
2569 * 2576 *
2570 * If path is not reachable from the supplied root, then the value of 2577 * If the path is not reachable from the supplied root, return %NULL.
2571 * root is changed (without modifying refcounts).
2572 */ 2578 */
2573char *__d_path(const struct path *path, struct path *root, 2579char *__d_path(const struct path *path,
2580 const struct path *root,
2574 char *buf, int buflen) 2581 char *buf, int buflen)
2575{ 2582{
2576 char *res = buf + buflen; 2583 char *res = buf + buflen;
@@ -2581,7 +2588,28 @@ char *__d_path(const struct path *path, struct path *root,
2581 error = prepend_path(path, root, &res, &buflen); 2588 error = prepend_path(path, root, &res, &buflen);
2582 write_sequnlock(&rename_lock); 2589 write_sequnlock(&rename_lock);
2583 2590
2584 if (error) 2591 if (error < 0)
2592 return ERR_PTR(error);
2593 if (error > 0)
2594 return NULL;
2595 return res;
2596}
2597
2598char *d_absolute_path(const struct path *path,
2599 char *buf, int buflen)
2600{
2601 struct path root = {};
2602 char *res = buf + buflen;
2603 int error;
2604
2605 prepend(&res, &buflen, "\0", 1);
2606 write_seqlock(&rename_lock);
2607 error = prepend_path(path, &root, &res, &buflen);
2608 write_sequnlock(&rename_lock);
2609
2610 if (error > 1)
2611 error = -EINVAL;
2612 if (error < 0)
2585 return ERR_PTR(error); 2613 return ERR_PTR(error);
2586 return res; 2614 return res;
2587} 2615}
@@ -2589,8 +2617,9 @@ char *__d_path(const struct path *path, struct path *root,
2589/* 2617/*
2590 * same as __d_path but appends "(deleted)" for unlinked files. 2618 * same as __d_path but appends "(deleted)" for unlinked files.
2591 */ 2619 */
2592static int path_with_deleted(const struct path *path, struct path *root, 2620static int path_with_deleted(const struct path *path,
2593 char **buf, int *buflen) 2621 const struct path *root,
2622 char **buf, int *buflen)
2594{ 2623{
2595 prepend(buf, buflen, "\0", 1); 2624 prepend(buf, buflen, "\0", 1);
2596 if (d_unlinked(path->dentry)) { 2625 if (d_unlinked(path->dentry)) {
@@ -2627,7 +2656,6 @@ char *d_path(const struct path *path, char *buf, int buflen)
2627{ 2656{
2628 char *res = buf + buflen; 2657 char *res = buf + buflen;
2629 struct path root; 2658 struct path root;
2630 struct path tmp;
2631 int error; 2659 int error;
2632 2660
2633 /* 2661 /*
@@ -2642,9 +2670,8 @@ char *d_path(const struct path *path, char *buf, int buflen)
2642 2670
2643 get_fs_root(current->fs, &root); 2671 get_fs_root(current->fs, &root);
2644 write_seqlock(&rename_lock); 2672 write_seqlock(&rename_lock);
2645 tmp = root; 2673 error = path_with_deleted(path, &root, &res, &buflen);
2646 error = path_with_deleted(path, &tmp, &res, &buflen); 2674 if (error < 0)
2647 if (error)
2648 res = ERR_PTR(error); 2675 res = ERR_PTR(error);
2649 write_sequnlock(&rename_lock); 2676 write_sequnlock(&rename_lock);
2650 path_put(&root); 2677 path_put(&root);
@@ -2665,7 +2692,6 @@ char *d_path_with_unreachable(const struct path *path, char *buf, int buflen)
2665{ 2692{
2666 char *res = buf + buflen; 2693 char *res = buf + buflen;
2667 struct path root; 2694 struct path root;
2668 struct path tmp;
2669 int error; 2695 int error;
2670 2696
2671 if (path->dentry->d_op && path->dentry->d_op->d_dname) 2697 if (path->dentry->d_op && path->dentry->d_op->d_dname)
@@ -2673,9 +2699,8 @@ char *d_path_with_unreachable(const struct path *path, char *buf, int buflen)
2673 2699
2674 get_fs_root(current->fs, &root); 2700 get_fs_root(current->fs, &root);
2675 write_seqlock(&rename_lock); 2701 write_seqlock(&rename_lock);
2676 tmp = root; 2702 error = path_with_deleted(path, &root, &res, &buflen);
2677 error = path_with_deleted(path, &tmp, &res, &buflen); 2703 if (error > 0)
2678 if (!error && !path_equal(&tmp, &root))
2679 error = prepend_unreachable(&res, &buflen); 2704 error = prepend_unreachable(&res, &buflen);
2680 write_sequnlock(&rename_lock); 2705 write_sequnlock(&rename_lock);
2681 path_put(&root); 2706 path_put(&root);
@@ -2806,19 +2831,18 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
2806 write_seqlock(&rename_lock); 2831 write_seqlock(&rename_lock);
2807 if (!d_unlinked(pwd.dentry)) { 2832 if (!d_unlinked(pwd.dentry)) {
2808 unsigned long len; 2833 unsigned long len;
2809 struct path tmp = root;
2810 char *cwd = page + PAGE_SIZE; 2834 char *cwd = page + PAGE_SIZE;
2811 int buflen = PAGE_SIZE; 2835 int buflen = PAGE_SIZE;
2812 2836
2813 prepend(&cwd, &buflen, "\0", 1); 2837 prepend(&cwd, &buflen, "\0", 1);
2814 error = prepend_path(&pwd, &tmp, &cwd, &buflen); 2838 error = prepend_path(&pwd, &root, &cwd, &buflen);
2815 write_sequnlock(&rename_lock); 2839 write_sequnlock(&rename_lock);
2816 2840
2817 if (error) 2841 if (error < 0)
2818 goto out; 2842 goto out;
2819 2843
2820 /* Unreachable from current root */ 2844 /* Unreachable from current root */
2821 if (!path_equal(&tmp, &root)) { 2845 if (error > 0) {
2822 error = prepend_unreachable(&cwd, &buflen); 2846 error = prepend_unreachable(&cwd, &buflen);
2823 if (error) 2847 if (error)
2824 goto out; 2848 goto out;
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 58609bde3b9..c6602d24517 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -417,17 +417,6 @@ static int ecryptfs_encrypt_extent(struct page *enc_extent_page,
417 (unsigned long long)(extent_base + extent_offset), rc); 417 (unsigned long long)(extent_base + extent_offset), rc);
418 goto out; 418 goto out;
419 } 419 }
420 if (unlikely(ecryptfs_verbosity > 0)) {
421 ecryptfs_printk(KERN_DEBUG, "Encrypting extent "
422 "with iv:\n");
423 ecryptfs_dump_hex(extent_iv, crypt_stat->iv_bytes);
424 ecryptfs_printk(KERN_DEBUG, "First 8 bytes before "
425 "encryption:\n");
426 ecryptfs_dump_hex((char *)
427 (page_address(page)
428 + (extent_offset * crypt_stat->extent_size)),
429 8);
430 }
431 rc = ecryptfs_encrypt_page_offset(crypt_stat, enc_extent_page, 0, 420 rc = ecryptfs_encrypt_page_offset(crypt_stat, enc_extent_page, 0,
432 page, (extent_offset 421 page, (extent_offset
433 * crypt_stat->extent_size), 422 * crypt_stat->extent_size),
@@ -440,14 +429,6 @@ static int ecryptfs_encrypt_extent(struct page *enc_extent_page,
440 goto out; 429 goto out;
441 } 430 }
442 rc = 0; 431 rc = 0;
443 if (unlikely(ecryptfs_verbosity > 0)) {
444 ecryptfs_printk(KERN_DEBUG, "Encrypt extent [0x%.16llx]; "
445 "rc = [%d]\n",
446 (unsigned long long)(extent_base + extent_offset), rc);
447 ecryptfs_printk(KERN_DEBUG, "First 8 bytes after "
448 "encryption:\n");
449 ecryptfs_dump_hex((char *)(page_address(enc_extent_page)), 8);
450 }
451out: 432out:
452 return rc; 433 return rc;
453} 434}
@@ -543,17 +524,6 @@ static int ecryptfs_decrypt_extent(struct page *page,
543 (unsigned long long)(extent_base + extent_offset), rc); 524 (unsigned long long)(extent_base + extent_offset), rc);
544 goto out; 525 goto out;
545 } 526 }
546 if (unlikely(ecryptfs_verbosity > 0)) {
547 ecryptfs_printk(KERN_DEBUG, "Decrypting extent "
548 "with iv:\n");
549 ecryptfs_dump_hex(extent_iv, crypt_stat->iv_bytes);
550 ecryptfs_printk(KERN_DEBUG, "First 8 bytes before "
551 "decryption:\n");
552 ecryptfs_dump_hex((char *)
553 (page_address(enc_extent_page)
554 + (extent_offset * crypt_stat->extent_size)),
555 8);
556 }
557 rc = ecryptfs_decrypt_page_offset(crypt_stat, page, 527 rc = ecryptfs_decrypt_page_offset(crypt_stat, page,
558 (extent_offset 528 (extent_offset
559 * crypt_stat->extent_size), 529 * crypt_stat->extent_size),
@@ -567,16 +537,6 @@ static int ecryptfs_decrypt_extent(struct page *page,
567 goto out; 537 goto out;
568 } 538 }
569 rc = 0; 539 rc = 0;
570 if (unlikely(ecryptfs_verbosity > 0)) {
571 ecryptfs_printk(KERN_DEBUG, "Decrypt extent [0x%.16llx]; "
572 "rc = [%d]\n",
573 (unsigned long long)(extent_base + extent_offset), rc);
574 ecryptfs_printk(KERN_DEBUG, "First 8 bytes after "
575 "decryption:\n");
576 ecryptfs_dump_hex((char *)(page_address(page)
577 + (extent_offset
578 * crypt_stat->extent_size)), 8);
579 }
580out: 540out:
581 return rc; 541 return rc;
582} 542}
@@ -1618,7 +1578,8 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry)
1618 rc = ecryptfs_read_xattr_region(page_virt, ecryptfs_inode); 1578 rc = ecryptfs_read_xattr_region(page_virt, ecryptfs_inode);
1619 if (rc) { 1579 if (rc) {
1620 printk(KERN_DEBUG "Valid eCryptfs headers not found in " 1580 printk(KERN_DEBUG "Valid eCryptfs headers not found in "
1621 "file header region or xattr region\n"); 1581 "file header region or xattr region, inode %lu\n",
1582 ecryptfs_inode->i_ino);
1622 rc = -EINVAL; 1583 rc = -EINVAL;
1623 goto out; 1584 goto out;
1624 } 1585 }
@@ -1627,7 +1588,8 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry)
1627 ECRYPTFS_DONT_VALIDATE_HEADER_SIZE); 1588 ECRYPTFS_DONT_VALIDATE_HEADER_SIZE);
1628 if (rc) { 1589 if (rc) {
1629 printk(KERN_DEBUG "Valid eCryptfs headers not found in " 1590 printk(KERN_DEBUG "Valid eCryptfs headers not found in "
1630 "file xattr region either\n"); 1591 "file xattr region either, inode %lu\n",
1592 ecryptfs_inode->i_ino);
1631 rc = -EINVAL; 1593 rc = -EINVAL;
1632 } 1594 }
1633 if (crypt_stat->mount_crypt_stat->flags 1595 if (crypt_stat->mount_crypt_stat->flags
@@ -1638,7 +1600,8 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry)
1638 "crypto metadata only in the extended attribute " 1600 "crypto metadata only in the extended attribute "
1639 "region, but eCryptfs was mounted without " 1601 "region, but eCryptfs was mounted without "
1640 "xattr support enabled. eCryptfs will not treat " 1602 "xattr support enabled. eCryptfs will not treat "
1641 "this like an encrypted file.\n"); 1603 "this like an encrypted file, inode %lu\n",
1604 ecryptfs_inode->i_ino);
1642 rc = -EINVAL; 1605 rc = -EINVAL;
1643 } 1606 }
1644 } 1607 }
@@ -1943,7 +1906,7 @@ static unsigned char *portable_filename_chars = ("-.0123456789ABCD"
1943 1906
1944/* We could either offset on every reverse map or just pad some 0x00's 1907/* We could either offset on every reverse map or just pad some 0x00's
1945 * at the front here */ 1908 * at the front here */
1946static const unsigned char filename_rev_map[] = { 1909static const unsigned char filename_rev_map[256] = {
1947 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 7 */ 1910 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 7 */
1948 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 15 */ 1911 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 15 */
1949 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 23 */ 1912 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 23 */
@@ -1959,7 +1922,7 @@ static const unsigned char filename_rev_map[] = {
1959 0x00, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, /* 103 */ 1922 0x00, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, /* 103 */
1960 0x2D, 0x2E, 0x2F, 0x30, 0x31, 0x32, 0x33, 0x34, /* 111 */ 1923 0x2D, 0x2E, 0x2F, 0x30, 0x31, 0x32, 0x33, 0x34, /* 111 */
1961 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, /* 119 */ 1924 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, /* 119 */
1962 0x3D, 0x3E, 0x3F 1925 0x3D, 0x3E, 0x3F /* 123 - 255 initialized to 0x00 */
1963}; 1926};
1964 1927
1965/** 1928/**
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 4ec9eb00a24..0c1a6527004 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -139,6 +139,27 @@ out:
139 return rc; 139 return rc;
140} 140}
141 141
142static void ecryptfs_vma_close(struct vm_area_struct *vma)
143{
144 filemap_write_and_wait(vma->vm_file->f_mapping);
145}
146
147static const struct vm_operations_struct ecryptfs_file_vm_ops = {
148 .close = ecryptfs_vma_close,
149 .fault = filemap_fault,
150};
151
152static int ecryptfs_file_mmap(struct file *file, struct vm_area_struct *vma)
153{
154 int rc;
155
156 rc = generic_file_mmap(file, vma);
157 if (!rc)
158 vma->vm_ops = &ecryptfs_file_vm_ops;
159
160 return rc;
161}
162
142struct kmem_cache *ecryptfs_file_info_cache; 163struct kmem_cache *ecryptfs_file_info_cache;
143 164
144/** 165/**
@@ -348,7 +369,7 @@ const struct file_operations ecryptfs_main_fops = {
348#ifdef CONFIG_COMPAT 369#ifdef CONFIG_COMPAT
349 .compat_ioctl = ecryptfs_compat_ioctl, 370 .compat_ioctl = ecryptfs_compat_ioctl,
350#endif 371#endif
351 .mmap = generic_file_mmap, 372 .mmap = ecryptfs_file_mmap,
352 .open = ecryptfs_open, 373 .open = ecryptfs_open,
353 .flush = ecryptfs_flush, 374 .flush = ecryptfs_flush,
354 .release = ecryptfs_release, 375 .release = ecryptfs_release,
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 7349ade17de..2717329386d 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -69,6 +69,7 @@ static int ecryptfs_inode_set(struct inode *inode, void *opaque)
69 inode->i_ino = lower_inode->i_ino; 69 inode->i_ino = lower_inode->i_ino;
70 inode->i_version++; 70 inode->i_version++;
71 inode->i_mapping->a_ops = &ecryptfs_aops; 71 inode->i_mapping->a_ops = &ecryptfs_aops;
72 inode->i_mapping->backing_dev_info = inode->i_sb->s_bdi;
72 73
73 if (S_ISLNK(inode->i_mode)) 74 if (S_ISLNK(inode->i_mode))
74 inode->i_op = &ecryptfs_symlink_iops; 75 inode->i_op = &ecryptfs_symlink_iops;
@@ -853,18 +854,6 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia,
853 size_t num_zeros = (PAGE_CACHE_SIZE 854 size_t num_zeros = (PAGE_CACHE_SIZE
854 - (ia->ia_size & ~PAGE_CACHE_MASK)); 855 - (ia->ia_size & ~PAGE_CACHE_MASK));
855 856
856
857 /*
858 * XXX(truncate) this should really happen at the begginning
859 * of ->setattr. But the code is too messy to that as part
860 * of a larger patch. ecryptfs is also totally missing out
861 * on the inode_change_ok check at the beginning of
862 * ->setattr while would include this.
863 */
864 rc = inode_newsize_ok(inode, ia->ia_size);
865 if (rc)
866 goto out;
867
868 if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) { 857 if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
869 truncate_setsize(inode, ia->ia_size); 858 truncate_setsize(inode, ia->ia_size);
870 lower_ia->ia_size = ia->ia_size; 859 lower_ia->ia_size = ia->ia_size;
@@ -914,6 +903,28 @@ out:
914 return rc; 903 return rc;
915} 904}
916 905
906static int ecryptfs_inode_newsize_ok(struct inode *inode, loff_t offset)
907{
908 struct ecryptfs_crypt_stat *crypt_stat;
909 loff_t lower_oldsize, lower_newsize;
910
911 crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat;
912 lower_oldsize = upper_size_to_lower_size(crypt_stat,
913 i_size_read(inode));
914 lower_newsize = upper_size_to_lower_size(crypt_stat, offset);
915 if (lower_newsize > lower_oldsize) {
916 /*
917 * The eCryptfs inode and the new *lower* size are mixed here
918 * because we may not have the lower i_mutex held and/or it may
919 * not be appropriate to call inode_newsize_ok() with inodes
920 * from other filesystems.
921 */
922 return inode_newsize_ok(inode, lower_newsize);
923 }
924
925 return 0;
926}
927
917/** 928/**
918 * ecryptfs_truncate 929 * ecryptfs_truncate
919 * @dentry: The ecryptfs layer dentry 930 * @dentry: The ecryptfs layer dentry
@@ -930,6 +941,10 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
930 struct iattr lower_ia = { .ia_valid = 0 }; 941 struct iattr lower_ia = { .ia_valid = 0 };
931 int rc; 942 int rc;
932 943
944 rc = ecryptfs_inode_newsize_ok(dentry->d_inode, new_length);
945 if (rc)
946 return rc;
947
933 rc = truncate_upper(dentry, &ia, &lower_ia); 948 rc = truncate_upper(dentry, &ia, &lower_ia);
934 if (!rc && lower_ia.ia_valid & ATTR_SIZE) { 949 if (!rc && lower_ia.ia_valid & ATTR_SIZE) {
935 struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); 950 struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
@@ -1011,6 +1026,16 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
1011 } 1026 }
1012 } 1027 }
1013 mutex_unlock(&crypt_stat->cs_mutex); 1028 mutex_unlock(&crypt_stat->cs_mutex);
1029
1030 rc = inode_change_ok(inode, ia);
1031 if (rc)
1032 goto out;
1033 if (ia->ia_valid & ATTR_SIZE) {
1034 rc = ecryptfs_inode_newsize_ok(inode, ia->ia_size);
1035 if (rc)
1036 goto out;
1037 }
1038
1014 if (S_ISREG(inode->i_mode)) { 1039 if (S_ISREG(inode->i_mode)) {
1015 rc = filemap_write_and_wait(inode->i_mapping); 1040 rc = filemap_write_and_wait(inode->i_mapping);
1016 if (rc) 1041 if (rc)
@@ -1094,6 +1119,8 @@ ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value,
1094 } 1119 }
1095 1120
1096 rc = vfs_setxattr(lower_dentry, name, value, size, flags); 1121 rc = vfs_setxattr(lower_dentry, name, value, size, flags);
1122 if (!rc)
1123 fsstack_copy_attr_all(dentry->d_inode, lower_dentry->d_inode);
1097out: 1124out:
1098 return rc; 1125 return rc;
1099} 1126}
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 27a7fefb83e..89dc18e7e95 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -1868,11 +1868,6 @@ int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat,
1868 * just one will be sufficient to decrypt to get the FEK. */ 1868 * just one will be sufficient to decrypt to get the FEK. */
1869find_next_matching_auth_tok: 1869find_next_matching_auth_tok:
1870 found_auth_tok = 0; 1870 found_auth_tok = 0;
1871 if (auth_tok_key) {
1872 up_write(&(auth_tok_key->sem));
1873 key_put(auth_tok_key);
1874 auth_tok_key = NULL;
1875 }
1876 list_for_each_entry(auth_tok_list_item, &auth_tok_list, list) { 1871 list_for_each_entry(auth_tok_list_item, &auth_tok_list, list) {
1877 candidate_auth_tok = &auth_tok_list_item->auth_tok; 1872 candidate_auth_tok = &auth_tok_list_item->auth_tok;
1878 if (unlikely(ecryptfs_verbosity > 0)) { 1873 if (unlikely(ecryptfs_verbosity > 0)) {
@@ -1909,14 +1904,22 @@ found_matching_auth_tok:
1909 memcpy(&(candidate_auth_tok->token.private_key), 1904 memcpy(&(candidate_auth_tok->token.private_key),
1910 &(matching_auth_tok->token.private_key), 1905 &(matching_auth_tok->token.private_key),
1911 sizeof(struct ecryptfs_private_key)); 1906 sizeof(struct ecryptfs_private_key));
1907 up_write(&(auth_tok_key->sem));
1908 key_put(auth_tok_key);
1912 rc = decrypt_pki_encrypted_session_key(candidate_auth_tok, 1909 rc = decrypt_pki_encrypted_session_key(candidate_auth_tok,
1913 crypt_stat); 1910 crypt_stat);
1914 } else if (candidate_auth_tok->token_type == ECRYPTFS_PASSWORD) { 1911 } else if (candidate_auth_tok->token_type == ECRYPTFS_PASSWORD) {
1915 memcpy(&(candidate_auth_tok->token.password), 1912 memcpy(&(candidate_auth_tok->token.password),
1916 &(matching_auth_tok->token.password), 1913 &(matching_auth_tok->token.password),
1917 sizeof(struct ecryptfs_password)); 1914 sizeof(struct ecryptfs_password));
1915 up_write(&(auth_tok_key->sem));
1916 key_put(auth_tok_key);
1918 rc = decrypt_passphrase_encrypted_session_key( 1917 rc = decrypt_passphrase_encrypted_session_key(
1919 candidate_auth_tok, crypt_stat); 1918 candidate_auth_tok, crypt_stat);
1919 } else {
1920 up_write(&(auth_tok_key->sem));
1921 key_put(auth_tok_key);
1922 rc = -EINVAL;
1920 } 1923 }
1921 if (rc) { 1924 if (rc) {
1922 struct ecryptfs_auth_tok_list_item *auth_tok_list_item_tmp; 1925 struct ecryptfs_auth_tok_list_item *auth_tok_list_item_tmp;
@@ -1956,15 +1959,12 @@ found_matching_auth_tok:
1956out_wipe_list: 1959out_wipe_list:
1957 wipe_auth_tok_list(&auth_tok_list); 1960 wipe_auth_tok_list(&auth_tok_list);
1958out: 1961out:
1959 if (auth_tok_key) {
1960 up_write(&(auth_tok_key->sem));
1961 key_put(auth_tok_key);
1962 }
1963 return rc; 1962 return rc;
1964} 1963}
1965 1964
1966static int 1965static int
1967pki_encrypt_session_key(struct ecryptfs_auth_tok *auth_tok, 1966pki_encrypt_session_key(struct key *auth_tok_key,
1967 struct ecryptfs_auth_tok *auth_tok,
1968 struct ecryptfs_crypt_stat *crypt_stat, 1968 struct ecryptfs_crypt_stat *crypt_stat,
1969 struct ecryptfs_key_record *key_rec) 1969 struct ecryptfs_key_record *key_rec)
1970{ 1970{
@@ -1979,6 +1979,8 @@ pki_encrypt_session_key(struct ecryptfs_auth_tok *auth_tok,
1979 crypt_stat->cipher, 1979 crypt_stat->cipher,
1980 crypt_stat->key_size), 1980 crypt_stat->key_size),
1981 crypt_stat, &payload, &payload_len); 1981 crypt_stat, &payload, &payload_len);
1982 up_write(&(auth_tok_key->sem));
1983 key_put(auth_tok_key);
1982 if (rc) { 1984 if (rc) {
1983 ecryptfs_printk(KERN_ERR, "Error generating tag 66 packet\n"); 1985 ecryptfs_printk(KERN_ERR, "Error generating tag 66 packet\n");
1984 goto out; 1986 goto out;
@@ -2008,6 +2010,8 @@ out:
2008 * write_tag_1_packet - Write an RFC2440-compatible tag 1 (public key) packet 2010 * write_tag_1_packet - Write an RFC2440-compatible tag 1 (public key) packet
2009 * @dest: Buffer into which to write the packet 2011 * @dest: Buffer into which to write the packet
2010 * @remaining_bytes: Maximum number of bytes that can be writtn 2012 * @remaining_bytes: Maximum number of bytes that can be writtn
2013 * @auth_tok_key: The authentication token key to unlock and put when done with
2014 * @auth_tok
2011 * @auth_tok: The authentication token used for generating the tag 1 packet 2015 * @auth_tok: The authentication token used for generating the tag 1 packet
2012 * @crypt_stat: The cryptographic context 2016 * @crypt_stat: The cryptographic context
2013 * @key_rec: The key record struct for the tag 1 packet 2017 * @key_rec: The key record struct for the tag 1 packet
@@ -2018,7 +2022,7 @@ out:
2018 */ 2022 */
2019static int 2023static int
2020write_tag_1_packet(char *dest, size_t *remaining_bytes, 2024write_tag_1_packet(char *dest, size_t *remaining_bytes,
2021 struct ecryptfs_auth_tok *auth_tok, 2025 struct key *auth_tok_key, struct ecryptfs_auth_tok *auth_tok,
2022 struct ecryptfs_crypt_stat *crypt_stat, 2026 struct ecryptfs_crypt_stat *crypt_stat,
2023 struct ecryptfs_key_record *key_rec, size_t *packet_size) 2027 struct ecryptfs_key_record *key_rec, size_t *packet_size)
2024{ 2028{
@@ -2039,12 +2043,15 @@ write_tag_1_packet(char *dest, size_t *remaining_bytes,
2039 memcpy(key_rec->enc_key, 2043 memcpy(key_rec->enc_key,
2040 auth_tok->session_key.encrypted_key, 2044 auth_tok->session_key.encrypted_key,
2041 auth_tok->session_key.encrypted_key_size); 2045 auth_tok->session_key.encrypted_key_size);
2046 up_write(&(auth_tok_key->sem));
2047 key_put(auth_tok_key);
2042 goto encrypted_session_key_set; 2048 goto encrypted_session_key_set;
2043 } 2049 }
2044 if (auth_tok->session_key.encrypted_key_size == 0) 2050 if (auth_tok->session_key.encrypted_key_size == 0)
2045 auth_tok->session_key.encrypted_key_size = 2051 auth_tok->session_key.encrypted_key_size =
2046 auth_tok->token.private_key.key_size; 2052 auth_tok->token.private_key.key_size;
2047 rc = pki_encrypt_session_key(auth_tok, crypt_stat, key_rec); 2053 rc = pki_encrypt_session_key(auth_tok_key, auth_tok, crypt_stat,
2054 key_rec);
2048 if (rc) { 2055 if (rc) {
2049 printk(KERN_ERR "Failed to encrypt session key via a key " 2056 printk(KERN_ERR "Failed to encrypt session key via a key "
2050 "module; rc = [%d]\n", rc); 2057 "module; rc = [%d]\n", rc);
@@ -2421,6 +2428,8 @@ ecryptfs_generate_key_packet_set(char *dest_base,
2421 &max, auth_tok, 2428 &max, auth_tok,
2422 crypt_stat, key_rec, 2429 crypt_stat, key_rec,
2423 &written); 2430 &written);
2431 up_write(&(auth_tok_key->sem));
2432 key_put(auth_tok_key);
2424 if (rc) { 2433 if (rc) {
2425 ecryptfs_printk(KERN_WARNING, "Error " 2434 ecryptfs_printk(KERN_WARNING, "Error "
2426 "writing tag 3 packet\n"); 2435 "writing tag 3 packet\n");
@@ -2438,8 +2447,8 @@ ecryptfs_generate_key_packet_set(char *dest_base,
2438 } 2447 }
2439 (*len) += written; 2448 (*len) += written;
2440 } else if (auth_tok->token_type == ECRYPTFS_PRIVATE_KEY) { 2449 } else if (auth_tok->token_type == ECRYPTFS_PRIVATE_KEY) {
2441 rc = write_tag_1_packet(dest_base + (*len), 2450 rc = write_tag_1_packet(dest_base + (*len), &max,
2442 &max, auth_tok, 2451 auth_tok_key, auth_tok,
2443 crypt_stat, key_rec, &written); 2452 crypt_stat, key_rec, &written);
2444 if (rc) { 2453 if (rc) {
2445 ecryptfs_printk(KERN_WARNING, "Error " 2454 ecryptfs_printk(KERN_WARNING, "Error "
@@ -2448,14 +2457,13 @@ ecryptfs_generate_key_packet_set(char *dest_base,
2448 } 2457 }
2449 (*len) += written; 2458 (*len) += written;
2450 } else { 2459 } else {
2460 up_write(&(auth_tok_key->sem));
2461 key_put(auth_tok_key);
2451 ecryptfs_printk(KERN_WARNING, "Unsupported " 2462 ecryptfs_printk(KERN_WARNING, "Unsupported "
2452 "authentication token type\n"); 2463 "authentication token type\n");
2453 rc = -EINVAL; 2464 rc = -EINVAL;
2454 goto out_free; 2465 goto out_free;
2455 } 2466 }
2456 up_write(&(auth_tok_key->sem));
2457 key_put(auth_tok_key);
2458 auth_tok_key = NULL;
2459 } 2467 }
2460 if (likely(max > 0)) { 2468 if (likely(max > 0)) {
2461 dest_base[(*len)] = 0x00; 2469 dest_base[(*len)] = 0x00;
@@ -2468,11 +2476,6 @@ out_free:
2468out: 2476out:
2469 if (rc) 2477 if (rc)
2470 (*len) = 0; 2478 (*len) = 0;
2471 if (auth_tok_key) {
2472 up_write(&(auth_tok_key->sem));
2473 key_put(auth_tok_key);
2474 }
2475
2476 mutex_unlock(&crypt_stat->keysig_list_mutex); 2479 mutex_unlock(&crypt_stat->keysig_list_mutex);
2477 return rc; 2480 return rc;
2478} 2481}
diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c
index 69f994a7d52..0dbe58a8b17 100644
--- a/fs/ecryptfs/kthread.c
+++ b/fs/ecryptfs/kthread.c
@@ -149,7 +149,7 @@ int ecryptfs_privileged_open(struct file **lower_file,
149 (*lower_file) = dentry_open(lower_dentry, lower_mnt, flags, cred); 149 (*lower_file) = dentry_open(lower_dentry, lower_mnt, flags, cred);
150 if (!IS_ERR(*lower_file)) 150 if (!IS_ERR(*lower_file))
151 goto out; 151 goto out;
152 if (flags & O_RDONLY) { 152 if ((flags & O_ACCMODE) == O_RDONLY) {
153 rc = PTR_ERR((*lower_file)); 153 rc = PTR_ERR((*lower_file));
154 goto out; 154 goto out;
155 } 155 }
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 9f1bb747d77..b4a6befb121 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -175,6 +175,7 @@ enum { ecryptfs_opt_sig, ecryptfs_opt_ecryptfs_sig,
175 ecryptfs_opt_encrypted_view, ecryptfs_opt_fnek_sig, 175 ecryptfs_opt_encrypted_view, ecryptfs_opt_fnek_sig,
176 ecryptfs_opt_fn_cipher, ecryptfs_opt_fn_cipher_key_bytes, 176 ecryptfs_opt_fn_cipher, ecryptfs_opt_fn_cipher_key_bytes,
177 ecryptfs_opt_unlink_sigs, ecryptfs_opt_mount_auth_tok_only, 177 ecryptfs_opt_unlink_sigs, ecryptfs_opt_mount_auth_tok_only,
178 ecryptfs_opt_check_dev_ruid,
178 ecryptfs_opt_err }; 179 ecryptfs_opt_err };
179 180
180static const match_table_t tokens = { 181static const match_table_t tokens = {
@@ -191,6 +192,7 @@ static const match_table_t tokens = {
191 {ecryptfs_opt_fn_cipher_key_bytes, "ecryptfs_fn_key_bytes=%u"}, 192 {ecryptfs_opt_fn_cipher_key_bytes, "ecryptfs_fn_key_bytes=%u"},
192 {ecryptfs_opt_unlink_sigs, "ecryptfs_unlink_sigs"}, 193 {ecryptfs_opt_unlink_sigs, "ecryptfs_unlink_sigs"},
193 {ecryptfs_opt_mount_auth_tok_only, "ecryptfs_mount_auth_tok_only"}, 194 {ecryptfs_opt_mount_auth_tok_only, "ecryptfs_mount_auth_tok_only"},
195 {ecryptfs_opt_check_dev_ruid, "ecryptfs_check_dev_ruid"},
194 {ecryptfs_opt_err, NULL} 196 {ecryptfs_opt_err, NULL}
195}; 197};
196 198
@@ -236,6 +238,7 @@ static void ecryptfs_init_mount_crypt_stat(
236 * ecryptfs_parse_options 238 * ecryptfs_parse_options
237 * @sb: The ecryptfs super block 239 * @sb: The ecryptfs super block
238 * @options: The options passed to the kernel 240 * @options: The options passed to the kernel
241 * @check_ruid: set to 1 if device uid should be checked against the ruid
239 * 242 *
240 * Parse mount options: 243 * Parse mount options:
241 * debug=N - ecryptfs_verbosity level for debug output 244 * debug=N - ecryptfs_verbosity level for debug output
@@ -251,7 +254,8 @@ static void ecryptfs_init_mount_crypt_stat(
251 * 254 *
252 * Returns zero on success; non-zero on error 255 * Returns zero on success; non-zero on error
253 */ 256 */
254static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options) 257static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options,
258 uid_t *check_ruid)
255{ 259{
256 char *p; 260 char *p;
257 int rc = 0; 261 int rc = 0;
@@ -276,6 +280,8 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options)
276 char *cipher_key_bytes_src; 280 char *cipher_key_bytes_src;
277 char *fn_cipher_key_bytes_src; 281 char *fn_cipher_key_bytes_src;
278 282
283 *check_ruid = 0;
284
279 if (!options) { 285 if (!options) {
280 rc = -EINVAL; 286 rc = -EINVAL;
281 goto out; 287 goto out;
@@ -380,6 +386,9 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options)
380 mount_crypt_stat->flags |= 386 mount_crypt_stat->flags |=
381 ECRYPTFS_GLOBAL_MOUNT_AUTH_TOK_ONLY; 387 ECRYPTFS_GLOBAL_MOUNT_AUTH_TOK_ONLY;
382 break; 388 break;
389 case ecryptfs_opt_check_dev_ruid:
390 *check_ruid = 1;
391 break;
383 case ecryptfs_opt_err: 392 case ecryptfs_opt_err:
384 default: 393 default:
385 printk(KERN_WARNING 394 printk(KERN_WARNING
@@ -475,6 +484,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
475 const char *err = "Getting sb failed"; 484 const char *err = "Getting sb failed";
476 struct inode *inode; 485 struct inode *inode;
477 struct path path; 486 struct path path;
487 uid_t check_ruid;
478 int rc; 488 int rc;
479 489
480 sbi = kmem_cache_zalloc(ecryptfs_sb_info_cache, GFP_KERNEL); 490 sbi = kmem_cache_zalloc(ecryptfs_sb_info_cache, GFP_KERNEL);
@@ -483,7 +493,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
483 goto out; 493 goto out;
484 } 494 }
485 495
486 rc = ecryptfs_parse_options(sbi, raw_data); 496 rc = ecryptfs_parse_options(sbi, raw_data, &check_ruid);
487 if (rc) { 497 if (rc) {
488 err = "Error parsing options"; 498 err = "Error parsing options";
489 goto out; 499 goto out;
@@ -521,6 +531,15 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
521 "known incompatibilities\n"); 531 "known incompatibilities\n");
522 goto out_free; 532 goto out_free;
523 } 533 }
534
535 if (check_ruid && path.dentry->d_inode->i_uid != current_uid()) {
536 rc = -EPERM;
537 printk(KERN_ERR "Mount of device (uid: %d) not owned by "
538 "requested user (uid: %d)\n",
539 path.dentry->d_inode->i_uid, current_uid());
540 goto out_free;
541 }
542
524 ecryptfs_set_superblock_lower(s, path.dentry->d_sb); 543 ecryptfs_set_superblock_lower(s, path.dentry->d_sb);
525 s->s_maxbytes = path.dentry->d_sb->s_maxbytes; 544 s->s_maxbytes = path.dentry->d_sb->s_maxbytes;
526 s->s_blocksize = path.dentry->d_sb->s_blocksize; 545 s->s_blocksize = path.dentry->d_sb->s_blocksize;
diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c
index 940a82e63dc..de42310cd4f 100644
--- a/fs/ecryptfs/miscdev.c
+++ b/fs/ecryptfs/miscdev.c
@@ -49,7 +49,10 @@ ecryptfs_miscdev_poll(struct file *file, poll_table *pt)
49 mutex_lock(&ecryptfs_daemon_hash_mux); 49 mutex_lock(&ecryptfs_daemon_hash_mux);
50 /* TODO: Just use file->private_data? */ 50 /* TODO: Just use file->private_data? */
51 rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns()); 51 rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns());
52 BUG_ON(rc || !daemon); 52 if (rc || !daemon) {
53 mutex_unlock(&ecryptfs_daemon_hash_mux);
54 return -EINVAL;
55 }
53 mutex_lock(&daemon->mux); 56 mutex_lock(&daemon->mux);
54 mutex_unlock(&ecryptfs_daemon_hash_mux); 57 mutex_unlock(&ecryptfs_daemon_hash_mux);
55 if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) { 58 if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) {
@@ -122,6 +125,7 @@ ecryptfs_miscdev_open(struct inode *inode, struct file *file)
122 goto out_unlock_daemon; 125 goto out_unlock_daemon;
123 } 126 }
124 daemon->flags |= ECRYPTFS_DAEMON_MISCDEV_OPEN; 127 daemon->flags |= ECRYPTFS_DAEMON_MISCDEV_OPEN;
128 file->private_data = daemon;
125 atomic_inc(&ecryptfs_num_miscdev_opens); 129 atomic_inc(&ecryptfs_num_miscdev_opens);
126out_unlock_daemon: 130out_unlock_daemon:
127 mutex_unlock(&daemon->mux); 131 mutex_unlock(&daemon->mux);
@@ -152,9 +156,9 @@ ecryptfs_miscdev_release(struct inode *inode, struct file *file)
152 156
153 mutex_lock(&ecryptfs_daemon_hash_mux); 157 mutex_lock(&ecryptfs_daemon_hash_mux);
154 rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns()); 158 rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns());
155 BUG_ON(rc || !daemon); 159 if (rc || !daemon)
160 daemon = file->private_data;
156 mutex_lock(&daemon->mux); 161 mutex_lock(&daemon->mux);
157 BUG_ON(daemon->pid != task_pid(current));
158 BUG_ON(!(daemon->flags & ECRYPTFS_DAEMON_MISCDEV_OPEN)); 162 BUG_ON(!(daemon->flags & ECRYPTFS_DAEMON_MISCDEV_OPEN));
159 daemon->flags &= ~ECRYPTFS_DAEMON_MISCDEV_OPEN; 163 daemon->flags &= ~ECRYPTFS_DAEMON_MISCDEV_OPEN;
160 atomic_dec(&ecryptfs_num_miscdev_opens); 164 atomic_dec(&ecryptfs_num_miscdev_opens);
@@ -191,31 +195,32 @@ int ecryptfs_send_miscdev(char *data, size_t data_size,
191 struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type, 195 struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type,
192 u16 msg_flags, struct ecryptfs_daemon *daemon) 196 u16 msg_flags, struct ecryptfs_daemon *daemon)
193{ 197{
194 int rc = 0; 198 struct ecryptfs_message *msg;
195 199
196 mutex_lock(&msg_ctx->mux); 200 msg = kmalloc((sizeof(*msg) + data_size), GFP_KERNEL);
197 msg_ctx->msg = kmalloc((sizeof(*msg_ctx->msg) + data_size), 201 if (!msg) {
198 GFP_KERNEL);
199 if (!msg_ctx->msg) {
200 rc = -ENOMEM;
201 printk(KERN_ERR "%s: Out of memory whilst attempting " 202 printk(KERN_ERR "%s: Out of memory whilst attempting "
202 "to kmalloc(%zd, GFP_KERNEL)\n", __func__, 203 "to kmalloc(%zd, GFP_KERNEL)\n", __func__,
203 (sizeof(*msg_ctx->msg) + data_size)); 204 (sizeof(*msg) + data_size));
204 goto out_unlock; 205 return -ENOMEM;
205 } 206 }
207
208 mutex_lock(&msg_ctx->mux);
209 msg_ctx->msg = msg;
206 msg_ctx->msg->index = msg_ctx->index; 210 msg_ctx->msg->index = msg_ctx->index;
207 msg_ctx->msg->data_len = data_size; 211 msg_ctx->msg->data_len = data_size;
208 msg_ctx->type = msg_type; 212 msg_ctx->type = msg_type;
209 memcpy(msg_ctx->msg->data, data, data_size); 213 memcpy(msg_ctx->msg->data, data, data_size);
210 msg_ctx->msg_size = (sizeof(*msg_ctx->msg) + data_size); 214 msg_ctx->msg_size = (sizeof(*msg_ctx->msg) + data_size);
211 mutex_lock(&daemon->mux);
212 list_add_tail(&msg_ctx->daemon_out_list, &daemon->msg_ctx_out_queue); 215 list_add_tail(&msg_ctx->daemon_out_list, &daemon->msg_ctx_out_queue);
216 mutex_unlock(&msg_ctx->mux);
217
218 mutex_lock(&daemon->mux);
213 daemon->num_queued_msg_ctx++; 219 daemon->num_queued_msg_ctx++;
214 wake_up_interruptible(&daemon->wait); 220 wake_up_interruptible(&daemon->wait);
215 mutex_unlock(&daemon->mux); 221 mutex_unlock(&daemon->mux);
216out_unlock: 222
217 mutex_unlock(&msg_ctx->mux); 223 return 0;
218 return rc;
219} 224}
220 225
221/** 226/**
@@ -246,8 +251,16 @@ ecryptfs_miscdev_read(struct file *file, char __user *buf, size_t count,
246 mutex_lock(&ecryptfs_daemon_hash_mux); 251 mutex_lock(&ecryptfs_daemon_hash_mux);
247 /* TODO: Just use file->private_data? */ 252 /* TODO: Just use file->private_data? */
248 rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns()); 253 rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns());
249 BUG_ON(rc || !daemon); 254 if (rc || !daemon) {
255 mutex_unlock(&ecryptfs_daemon_hash_mux);
256 return -EINVAL;
257 }
250 mutex_lock(&daemon->mux); 258 mutex_lock(&daemon->mux);
259 if (task_pid(current) != daemon->pid) {
260 mutex_unlock(&daemon->mux);
261 mutex_unlock(&ecryptfs_daemon_hash_mux);
262 return -EPERM;
263 }
251 if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) { 264 if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) {
252 rc = 0; 265 rc = 0;
253 mutex_unlock(&ecryptfs_daemon_hash_mux); 266 mutex_unlock(&ecryptfs_daemon_hash_mux);
@@ -284,9 +297,6 @@ check_list:
284 * message from the queue; try again */ 297 * message from the queue; try again */
285 goto check_list; 298 goto check_list;
286 } 299 }
287 BUG_ON(euid != daemon->euid);
288 BUG_ON(current_user_ns() != daemon->user_ns);
289 BUG_ON(task_pid(current) != daemon->pid);
290 msg_ctx = list_first_entry(&daemon->msg_ctx_out_queue, 300 msg_ctx = list_first_entry(&daemon->msg_ctx_out_queue,
291 struct ecryptfs_msg_ctx, daemon_out_list); 301 struct ecryptfs_msg_ctx, daemon_out_list);
292 BUG_ON(!msg_ctx); 302 BUG_ON(!msg_ctx);
@@ -409,11 +419,47 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf,
409 ssize_t sz = 0; 419 ssize_t sz = 0;
410 char *data; 420 char *data;
411 uid_t euid = current_euid(); 421 uid_t euid = current_euid();
422 unsigned char packet_size_peek[3];
412 int rc; 423 int rc;
413 424
414 if (count == 0) 425 if (count == 0) {
415 goto out; 426 goto out;
427 } else if (count == (1 + 4)) {
428 /* Likely a harmless MSG_HELO or MSG_QUIT - no packet length */
429 goto memdup;
430 } else if (count < (1 + 4 + 1)
431 || count > (1 + 4 + 2 + sizeof(struct ecryptfs_message) + 4
432 + ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES)) {
433 printk(KERN_WARNING "%s: Acceptable packet size range is "
434 "[%d-%lu], but amount of data written is [%zu].",
435 __func__, (1 + 4 + 1),
436 (1 + 4 + 2 + sizeof(struct ecryptfs_message) + 4
437 + ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES), count);
438 return -EINVAL;
439 }
440
441 if (copy_from_user(packet_size_peek, (buf + 1 + 4),
442 sizeof(packet_size_peek))) {
443 printk(KERN_WARNING "%s: Error while inspecting packet size\n",
444 __func__);
445 return -EFAULT;
446 }
447
448 rc = ecryptfs_parse_packet_length(packet_size_peek, &packet_size,
449 &packet_size_length);
450 if (rc) {
451 printk(KERN_WARNING "%s: Error parsing packet length; "
452 "rc = [%d]\n", __func__, rc);
453 return rc;
454 }
416 455
456 if ((1 + 4 + packet_size_length + packet_size) != count) {
457 printk(KERN_WARNING "%s: Invalid packet size [%zu]\n", __func__,
458 packet_size);
459 return -EINVAL;
460 }
461
462memdup:
417 data = memdup_user(buf, count); 463 data = memdup_user(buf, count);
418 if (IS_ERR(data)) { 464 if (IS_ERR(data)) {
419 printk(KERN_ERR "%s: memdup_user returned error [%ld]\n", 465 printk(KERN_ERR "%s: memdup_user returned error [%ld]\n",
@@ -435,23 +481,7 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf,
435 } 481 }
436 memcpy(&counter_nbo, &data[i], 4); 482 memcpy(&counter_nbo, &data[i], 4);
437 seq = be32_to_cpu(counter_nbo); 483 seq = be32_to_cpu(counter_nbo);
438 i += 4; 484 i += 4 + packet_size_length;
439 rc = ecryptfs_parse_packet_length(&data[i], &packet_size,
440 &packet_size_length);
441 if (rc) {
442 printk(KERN_WARNING "%s: Error parsing packet length; "
443 "rc = [%d]\n", __func__, rc);
444 goto out_free;
445 }
446 i += packet_size_length;
447 if ((1 + 4 + packet_size_length + packet_size) != count) {
448 printk(KERN_WARNING "%s: (1 + packet_size_length([%zd])"
449 " + packet_size([%zd]))([%zd]) != "
450 "count([%zd]). Invalid packet format.\n",
451 __func__, packet_size_length, packet_size,
452 (1 + packet_size_length + packet_size), count);
453 goto out_free;
454 }
455 rc = ecryptfs_miscdev_response(&data[i], packet_size, 485 rc = ecryptfs_miscdev_response(&data[i], packet_size,
456 euid, current_user_ns(), 486 euid, current_user_ns(),
457 task_pid(current), seq); 487 task_pid(current), seq);
diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c
index 85d43096311..608c1c3fde1 100644
--- a/fs/ecryptfs/read_write.c
+++ b/fs/ecryptfs/read_write.c
@@ -39,15 +39,16 @@
39int ecryptfs_write_lower(struct inode *ecryptfs_inode, char *data, 39int ecryptfs_write_lower(struct inode *ecryptfs_inode, char *data,
40 loff_t offset, size_t size) 40 loff_t offset, size_t size)
41{ 41{
42 struct ecryptfs_inode_info *inode_info; 42 struct file *lower_file;
43 mm_segment_t fs_save; 43 mm_segment_t fs_save;
44 ssize_t rc; 44 ssize_t rc;
45 45
46 inode_info = ecryptfs_inode_to_private(ecryptfs_inode); 46 lower_file = ecryptfs_inode_to_private(ecryptfs_inode)->lower_file;
47 BUG_ON(!inode_info->lower_file); 47 if (!lower_file)
48 return -EIO;
48 fs_save = get_fs(); 49 fs_save = get_fs();
49 set_fs(get_ds()); 50 set_fs(get_ds());
50 rc = vfs_write(inode_info->lower_file, data, size, &offset); 51 rc = vfs_write(lower_file, data, size, &offset);
51 set_fs(fs_save); 52 set_fs(fs_save);
52 mark_inode_dirty_sync(ecryptfs_inode); 53 mark_inode_dirty_sync(ecryptfs_inode);
53 return rc; 54 return rc;
@@ -129,13 +130,18 @@ int ecryptfs_write(struct inode *ecryptfs_inode, char *data, loff_t offset,
129 pgoff_t ecryptfs_page_idx = (pos >> PAGE_CACHE_SHIFT); 130 pgoff_t ecryptfs_page_idx = (pos >> PAGE_CACHE_SHIFT);
130 size_t start_offset_in_page = (pos & ~PAGE_CACHE_MASK); 131 size_t start_offset_in_page = (pos & ~PAGE_CACHE_MASK);
131 size_t num_bytes = (PAGE_CACHE_SIZE - start_offset_in_page); 132 size_t num_bytes = (PAGE_CACHE_SIZE - start_offset_in_page);
132 size_t total_remaining_bytes = ((offset + size) - pos); 133 loff_t total_remaining_bytes = ((offset + size) - pos);
134
135 if (fatal_signal_pending(current)) {
136 rc = -EINTR;
137 break;
138 }
133 139
134 if (num_bytes > total_remaining_bytes) 140 if (num_bytes > total_remaining_bytes)
135 num_bytes = total_remaining_bytes; 141 num_bytes = total_remaining_bytes;
136 if (pos < offset) { 142 if (pos < offset) {
137 /* remaining zeros to write, up to destination offset */ 143 /* remaining zeros to write, up to destination offset */
138 size_t total_remaining_zeros = (offset - pos); 144 loff_t total_remaining_zeros = (offset - pos);
139 145
140 if (num_bytes > total_remaining_zeros) 146 if (num_bytes > total_remaining_zeros)
141 num_bytes = total_remaining_zeros; 147 num_bytes = total_remaining_zeros;
@@ -192,15 +198,19 @@ int ecryptfs_write(struct inode *ecryptfs_inode, char *data, loff_t offset,
192 } 198 }
193 pos += num_bytes; 199 pos += num_bytes;
194 } 200 }
195 if ((offset + size) > ecryptfs_file_size) { 201 if (pos > ecryptfs_file_size) {
196 i_size_write(ecryptfs_inode, (offset + size)); 202 i_size_write(ecryptfs_inode, pos);
197 if (crypt_stat->flags & ECRYPTFS_ENCRYPTED) { 203 if (crypt_stat->flags & ECRYPTFS_ENCRYPTED) {
198 rc = ecryptfs_write_inode_size_to_metadata( 204 int rc2;
205
206 rc2 = ecryptfs_write_inode_size_to_metadata(
199 ecryptfs_inode); 207 ecryptfs_inode);
200 if (rc) { 208 if (rc2) {
201 printk(KERN_ERR "Problem with " 209 printk(KERN_ERR "Problem with "
202 "ecryptfs_write_inode_size_to_metadata; " 210 "ecryptfs_write_inode_size_to_metadata; "
203 "rc = [%d]\n", rc); 211 "rc = [%d]\n", rc2);
212 if (!rc)
213 rc = rc2;
204 goto out; 214 goto out;
205 } 215 }
206 } 216 }
@@ -225,15 +235,16 @@ out:
225int ecryptfs_read_lower(char *data, loff_t offset, size_t size, 235int ecryptfs_read_lower(char *data, loff_t offset, size_t size,
226 struct inode *ecryptfs_inode) 236 struct inode *ecryptfs_inode)
227{ 237{
228 struct ecryptfs_inode_info *inode_info = 238 struct file *lower_file;
229 ecryptfs_inode_to_private(ecryptfs_inode);
230 mm_segment_t fs_save; 239 mm_segment_t fs_save;
231 ssize_t rc; 240 ssize_t rc;
232 241
233 BUG_ON(!inode_info->lower_file); 242 lower_file = ecryptfs_inode_to_private(ecryptfs_inode)->lower_file;
243 if (!lower_file)
244 return -EIO;
234 fs_save = get_fs(); 245 fs_save = get_fs();
235 set_fs(get_ds()); 246 set_fs(get_ds());
236 rc = vfs_read(inode_info->lower_file, data, size, &offset); 247 rc = vfs_read(lower_file, data, size, &offset);
237 set_fs(fs_save); 248 set_fs(fs_save);
238 return rc; 249 return rc;
239} 250}
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index f9cfd168fbe..35a852a2682 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -70,6 +70,15 @@
70 * simultaneous inserts (A into B and B into A) from racing and 70 * simultaneous inserts (A into B and B into A) from racing and
71 * constructing a cycle without either insert observing that it is 71 * constructing a cycle without either insert observing that it is
72 * going to. 72 * going to.
73 * It is necessary to acquire multiple "ep->mtx"es at once in the
74 * case when one epoll fd is added to another. In this case, we
75 * always acquire the locks in the order of nesting (i.e. after
76 * epoll_ctl(e1, EPOLL_CTL_ADD, e2), e1->mtx will always be acquired
77 * before e2->mtx). Since we disallow cycles of epoll file
78 * descriptors, this ensures that the mutexes are well-ordered. In
79 * order to communicate this nesting to lockdep, when walking a tree
80 * of epoll file descriptors, we use the current recursion depth as
81 * the lockdep subkey.
73 * It is possible to drop the "ep->mtx" and to use the global 82 * It is possible to drop the "ep->mtx" and to use the global
74 * mutex "epmutex" (together with "ep->lock") to have it working, 83 * mutex "epmutex" (together with "ep->lock") to have it working,
75 * but having "ep->mtx" will make the interface more scalable. 84 * but having "ep->mtx" will make the interface more scalable.
@@ -188,6 +197,12 @@ struct eventpoll {
188 197
189 /* The user that created the eventpoll descriptor */ 198 /* The user that created the eventpoll descriptor */
190 struct user_struct *user; 199 struct user_struct *user;
200
201 struct file *file;
202
203 /* used to optimize loop detection check */
204 int visited;
205 struct list_head visited_list_link;
191}; 206};
192 207
193/* Wait structure used by the poll hooks */ 208/* Wait structure used by the poll hooks */
@@ -246,6 +261,15 @@ static struct kmem_cache *epi_cache __read_mostly;
246/* Slab cache used to allocate "struct eppoll_entry" */ 261/* Slab cache used to allocate "struct eppoll_entry" */
247static struct kmem_cache *pwq_cache __read_mostly; 262static struct kmem_cache *pwq_cache __read_mostly;
248 263
264/* Visited nodes during ep_loop_check(), so we can unset them when we finish */
265static LIST_HEAD(visited_list);
266
267/*
268 * List of files with newly added links, where we may need to limit the number
269 * of emanating paths. Protected by the epmutex.
270 */
271static LIST_HEAD(tfile_check_list);
272
249#ifdef CONFIG_SYSCTL 273#ifdef CONFIG_SYSCTL
250 274
251#include <linux/sysctl.h> 275#include <linux/sysctl.h>
@@ -267,6 +291,12 @@ ctl_table epoll_table[] = {
267}; 291};
268#endif /* CONFIG_SYSCTL */ 292#endif /* CONFIG_SYSCTL */
269 293
294static const struct file_operations eventpoll_fops;
295
296static inline int is_file_epoll(struct file *f)
297{
298 return f->f_op == &eventpoll_fops;
299}
270 300
271/* Setup the structure that is used as key for the RB tree */ 301/* Setup the structure that is used as key for the RB tree */
272static inline void ep_set_ffd(struct epoll_filefd *ffd, 302static inline void ep_set_ffd(struct epoll_filefd *ffd,
@@ -290,6 +320,11 @@ static inline int ep_is_linked(struct list_head *p)
290 return !list_empty(p); 320 return !list_empty(p);
291} 321}
292 322
323static inline struct eppoll_entry *ep_pwq_from_wait(wait_queue_t *p)
324{
325 return container_of(p, struct eppoll_entry, wait);
326}
327
293/* Get the "struct epitem" from a wait queue pointer */ 328/* Get the "struct epitem" from a wait queue pointer */
294static inline struct epitem *ep_item_from_wait(wait_queue_t *p) 329static inline struct epitem *ep_item_from_wait(wait_queue_t *p)
295{ 330{
@@ -437,6 +472,18 @@ static void ep_poll_safewake(wait_queue_head_t *wq)
437 put_cpu(); 472 put_cpu();
438} 473}
439 474
475static void ep_remove_wait_queue(struct eppoll_entry *pwq)
476{
477 wait_queue_head_t *whead;
478
479 rcu_read_lock();
480 /* If it is cleared by POLLFREE, it should be rcu-safe */
481 whead = rcu_dereference(pwq->whead);
482 if (whead)
483 remove_wait_queue(whead, &pwq->wait);
484 rcu_read_unlock();
485}
486
440/* 487/*
441 * This function unregisters poll callbacks from the associated file 488 * This function unregisters poll callbacks from the associated file
442 * descriptor. Must be called with "mtx" held (or "epmutex" if called from 489 * descriptor. Must be called with "mtx" held (or "epmutex" if called from
@@ -451,7 +498,7 @@ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi)
451 pwq = list_first_entry(lsthead, struct eppoll_entry, llink); 498 pwq = list_first_entry(lsthead, struct eppoll_entry, llink);
452 499
453 list_del(&pwq->llink); 500 list_del(&pwq->llink);
454 remove_wait_queue(pwq->whead, &pwq->wait); 501 ep_remove_wait_queue(pwq);
455 kmem_cache_free(pwq_cache, pwq); 502 kmem_cache_free(pwq_cache, pwq);
456 } 503 }
457} 504}
@@ -464,13 +511,15 @@ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi)
464 * @ep: Pointer to the epoll private data structure. 511 * @ep: Pointer to the epoll private data structure.
465 * @sproc: Pointer to the scan callback. 512 * @sproc: Pointer to the scan callback.
466 * @priv: Private opaque data passed to the @sproc callback. 513 * @priv: Private opaque data passed to the @sproc callback.
514 * @depth: The current depth of recursive f_op->poll calls.
467 * 515 *
468 * Returns: The same integer error code returned by the @sproc callback. 516 * Returns: The same integer error code returned by the @sproc callback.
469 */ 517 */
470static int ep_scan_ready_list(struct eventpoll *ep, 518static int ep_scan_ready_list(struct eventpoll *ep,
471 int (*sproc)(struct eventpoll *, 519 int (*sproc)(struct eventpoll *,
472 struct list_head *, void *), 520 struct list_head *, void *),
473 void *priv) 521 void *priv,
522 int depth)
474{ 523{
475 int error, pwake = 0; 524 int error, pwake = 0;
476 unsigned long flags; 525 unsigned long flags;
@@ -481,7 +530,7 @@ static int ep_scan_ready_list(struct eventpoll *ep,
481 * We need to lock this because we could be hit by 530 * We need to lock this because we could be hit by
482 * eventpoll_release_file() and epoll_ctl(). 531 * eventpoll_release_file() and epoll_ctl().
483 */ 532 */
484 mutex_lock(&ep->mtx); 533 mutex_lock_nested(&ep->mtx, depth);
485 534
486 /* 535 /*
487 * Steal the ready list, and re-init the original one to the 536 * Steal the ready list, and re-init the original one to the
@@ -670,7 +719,7 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head,
670 719
671static int ep_poll_readyevents_proc(void *priv, void *cookie, int call_nests) 720static int ep_poll_readyevents_proc(void *priv, void *cookie, int call_nests)
672{ 721{
673 return ep_scan_ready_list(priv, ep_read_events_proc, NULL); 722 return ep_scan_ready_list(priv, ep_read_events_proc, NULL, call_nests + 1);
674} 723}
675 724
676static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait) 725static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait)
@@ -700,12 +749,6 @@ static const struct file_operations eventpoll_fops = {
700 .llseek = noop_llseek, 749 .llseek = noop_llseek,
701}; 750};
702 751
703/* Fast test to see if the file is an evenpoll file */
704static inline int is_file_epoll(struct file *f)
705{
706 return f->f_op == &eventpoll_fops;
707}
708
709/* 752/*
710 * This is called from eventpoll_release() to unlink files from the eventpoll 753 * This is called from eventpoll_release() to unlink files from the eventpoll
711 * interface. We need to have this facility to cleanup correctly files that are 754 * interface. We need to have this facility to cleanup correctly files that are
@@ -737,7 +780,7 @@ void eventpoll_release_file(struct file *file)
737 780
738 ep = epi->ep; 781 ep = epi->ep;
739 list_del_init(&epi->fllink); 782 list_del_init(&epi->fllink);
740 mutex_lock(&ep->mtx); 783 mutex_lock_nested(&ep->mtx, 0);
741 ep_remove(ep, epi); 784 ep_remove(ep, epi);
742 mutex_unlock(&ep->mtx); 785 mutex_unlock(&ep->mtx);
743 } 786 }
@@ -816,6 +859,17 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
816 struct epitem *epi = ep_item_from_wait(wait); 859 struct epitem *epi = ep_item_from_wait(wait);
817 struct eventpoll *ep = epi->ep; 860 struct eventpoll *ep = epi->ep;
818 861
862 if ((unsigned long)key & POLLFREE) {
863 ep_pwq_from_wait(wait)->whead = NULL;
864 /*
865 * whead = NULL above can race with ep_remove_wait_queue()
866 * which can do another remove_wait_queue() after us, so we
867 * can't use __remove_wait_queue(). whead->lock is held by
868 * the caller.
869 */
870 list_del_init(&wait->task_list);
871 }
872
819 spin_lock_irqsave(&ep->lock, flags); 873 spin_lock_irqsave(&ep->lock, flags);
820 874
821 /* 875 /*
@@ -915,6 +969,103 @@ static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi)
915 rb_insert_color(&epi->rbn, &ep->rbr); 969 rb_insert_color(&epi->rbn, &ep->rbr);
916} 970}
917 971
972
973
974#define PATH_ARR_SIZE 5
975/*
976 * These are the number paths of length 1 to 5, that we are allowing to emanate
977 * from a single file of interest. For example, we allow 1000 paths of length
978 * 1, to emanate from each file of interest. This essentially represents the
979 * potential wakeup paths, which need to be limited in order to avoid massive
980 * uncontrolled wakeup storms. The common use case should be a single ep which
981 * is connected to n file sources. In this case each file source has 1 path
982 * of length 1. Thus, the numbers below should be more than sufficient. These
983 * path limits are enforced during an EPOLL_CTL_ADD operation, since a modify
984 * and delete can't add additional paths. Protected by the epmutex.
985 */
986static const int path_limits[PATH_ARR_SIZE] = { 1000, 500, 100, 50, 10 };
987static int path_count[PATH_ARR_SIZE];
988
989static int path_count_inc(int nests)
990{
991 /* Allow an arbitrary number of depth 1 paths */
992 if (nests == 0)
993 return 0;
994
995 if (++path_count[nests] > path_limits[nests])
996 return -1;
997 return 0;
998}
999
1000static void path_count_init(void)
1001{
1002 int i;
1003
1004 for (i = 0; i < PATH_ARR_SIZE; i++)
1005 path_count[i] = 0;
1006}
1007
1008static int reverse_path_check_proc(void *priv, void *cookie, int call_nests)
1009{
1010 int error = 0;
1011 struct file *file = priv;
1012 struct file *child_file;
1013 struct epitem *epi;
1014
1015 list_for_each_entry(epi, &file->f_ep_links, fllink) {
1016 child_file = epi->ep->file;
1017 if (is_file_epoll(child_file)) {
1018 if (list_empty(&child_file->f_ep_links)) {
1019 if (path_count_inc(call_nests)) {
1020 error = -1;
1021 break;
1022 }
1023 } else {
1024 error = ep_call_nested(&poll_loop_ncalls,
1025 EP_MAX_NESTS,
1026 reverse_path_check_proc,
1027 child_file, child_file,
1028 current);
1029 }
1030 if (error != 0)
1031 break;
1032 } else {
1033 printk(KERN_ERR "reverse_path_check_proc: "
1034 "file is not an ep!\n");
1035 }
1036 }
1037 return error;
1038}
1039
1040/**
1041 * reverse_path_check - The tfile_check_list is list of file *, which have
1042 * links that are proposed to be newly added. We need to
1043 * make sure that those added links don't add too many
1044 * paths such that we will spend all our time waking up
1045 * eventpoll objects.
1046 *
1047 * Returns: Returns zero if the proposed links don't create too many paths,
1048 * -1 otherwise.
1049 */
1050static int reverse_path_check(void)
1051{
1052 int length = 0;
1053 int error = 0;
1054 struct file *current_file;
1055
1056 /* let's call this for all tfiles */
1057 list_for_each_entry(current_file, &tfile_check_list, f_tfile_llink) {
1058 length++;
1059 path_count_init();
1060 error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
1061 reverse_path_check_proc, current_file,
1062 current_file, current);
1063 if (error)
1064 break;
1065 }
1066 return error;
1067}
1068
918/* 1069/*
919 * Must be called with "mtx" held. 1070 * Must be called with "mtx" held.
920 */ 1071 */
@@ -976,6 +1127,11 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
976 */ 1127 */
977 ep_rbtree_insert(ep, epi); 1128 ep_rbtree_insert(ep, epi);
978 1129
1130 /* now check if we've created too many backpaths */
1131 error = -EINVAL;
1132 if (reverse_path_check())
1133 goto error_remove_epi;
1134
979 /* We have to drop the new item inside our item list to keep track of it */ 1135 /* We have to drop the new item inside our item list to keep track of it */
980 spin_lock_irqsave(&ep->lock, flags); 1136 spin_lock_irqsave(&ep->lock, flags);
981 1137
@@ -1000,6 +1156,14 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
1000 1156
1001 return 0; 1157 return 0;
1002 1158
1159error_remove_epi:
1160 spin_lock(&tfile->f_lock);
1161 if (ep_is_linked(&epi->fllink))
1162 list_del_init(&epi->fllink);
1163 spin_unlock(&tfile->f_lock);
1164
1165 rb_erase(&epi->rbn, &ep->rbr);
1166
1003error_unregister: 1167error_unregister:
1004 ep_unregister_pollwait(ep, epi); 1168 ep_unregister_pollwait(ep, epi);
1005 1169
@@ -1134,7 +1298,7 @@ static int ep_send_events(struct eventpoll *ep,
1134 esed.maxevents = maxevents; 1298 esed.maxevents = maxevents;
1135 esed.events = events; 1299 esed.events = events;
1136 1300
1137 return ep_scan_ready_list(ep, ep_send_events_proc, &esed); 1301 return ep_scan_ready_list(ep, ep_send_events_proc, &esed, 0);
1138} 1302}
1139 1303
1140static inline struct timespec ep_set_mstimeout(long ms) 1304static inline struct timespec ep_set_mstimeout(long ms)
@@ -1264,18 +1428,36 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
1264 int error = 0; 1428 int error = 0;
1265 struct file *file = priv; 1429 struct file *file = priv;
1266 struct eventpoll *ep = file->private_data; 1430 struct eventpoll *ep = file->private_data;
1431 struct eventpoll *ep_tovisit;
1267 struct rb_node *rbp; 1432 struct rb_node *rbp;
1268 struct epitem *epi; 1433 struct epitem *epi;
1269 1434
1270 mutex_lock(&ep->mtx); 1435 mutex_lock_nested(&ep->mtx, call_nests + 1);
1436 ep->visited = 1;
1437 list_add(&ep->visited_list_link, &visited_list);
1271 for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) { 1438 for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
1272 epi = rb_entry(rbp, struct epitem, rbn); 1439 epi = rb_entry(rbp, struct epitem, rbn);
1273 if (unlikely(is_file_epoll(epi->ffd.file))) { 1440 if (unlikely(is_file_epoll(epi->ffd.file))) {
1441 ep_tovisit = epi->ffd.file->private_data;
1442 if (ep_tovisit->visited)
1443 continue;
1274 error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS, 1444 error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
1275 ep_loop_check_proc, epi->ffd.file, 1445 ep_loop_check_proc, epi->ffd.file,
1276 epi->ffd.file->private_data, current); 1446 ep_tovisit, current);
1277 if (error != 0) 1447 if (error != 0)
1278 break; 1448 break;
1449 } else {
1450 /*
1451 * If we've reached a file that is not associated with
1452 * an ep, then we need to check if the newly added
1453 * links are going to add too many wakeup paths. We do
1454 * this by adding it to the tfile_check_list, if it's
1455 * not already there, and calling reverse_path_check()
1456 * during ep_insert().
1457 */
1458 if (list_empty(&epi->ffd.file->f_tfile_llink))
1459 list_add(&epi->ffd.file->f_tfile_llink,
1460 &tfile_check_list);
1279 } 1461 }
1280 } 1462 }
1281 mutex_unlock(&ep->mtx); 1463 mutex_unlock(&ep->mtx);
@@ -1296,8 +1478,31 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
1296 */ 1478 */
1297static int ep_loop_check(struct eventpoll *ep, struct file *file) 1479static int ep_loop_check(struct eventpoll *ep, struct file *file)
1298{ 1480{
1299 return ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS, 1481 int ret;
1482 struct eventpoll *ep_cur, *ep_next;
1483
1484 ret = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
1300 ep_loop_check_proc, file, ep, current); 1485 ep_loop_check_proc, file, ep, current);
1486 /* clear visited list */
1487 list_for_each_entry_safe(ep_cur, ep_next, &visited_list,
1488 visited_list_link) {
1489 ep_cur->visited = 0;
1490 list_del(&ep_cur->visited_list_link);
1491 }
1492 return ret;
1493}
1494
1495static void clear_tfile_check_list(void)
1496{
1497 struct file *file;
1498
1499 /* first clear the tfile_check_list */
1500 while (!list_empty(&tfile_check_list)) {
1501 file = list_first_entry(&tfile_check_list, struct file,
1502 f_tfile_llink);
1503 list_del_init(&file->f_tfile_llink);
1504 }
1505 INIT_LIST_HEAD(&tfile_check_list);
1301} 1506}
1302 1507
1303/* 1508/*
@@ -1305,8 +1510,9 @@ static int ep_loop_check(struct eventpoll *ep, struct file *file)
1305 */ 1510 */
1306SYSCALL_DEFINE1(epoll_create1, int, flags) 1511SYSCALL_DEFINE1(epoll_create1, int, flags)
1307{ 1512{
1308 int error; 1513 int error, fd;
1309 struct eventpoll *ep = NULL; 1514 struct eventpoll *ep = NULL;
1515 struct file *file;
1310 1516
1311 /* Check the EPOLL_* constant for consistency. */ 1517 /* Check the EPOLL_* constant for consistency. */
1312 BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC); 1518 BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC);
@@ -1323,11 +1529,25 @@ SYSCALL_DEFINE1(epoll_create1, int, flags)
1323 * Creates all the items needed to setup an eventpoll file. That is, 1529 * Creates all the items needed to setup an eventpoll file. That is,
1324 * a file structure and a free file descriptor. 1530 * a file structure and a free file descriptor.
1325 */ 1531 */
1326 error = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep, 1532 fd = get_unused_fd_flags(O_RDWR | (flags & O_CLOEXEC));
1533 if (fd < 0) {
1534 error = fd;
1535 goto out_free_ep;
1536 }
1537 file = anon_inode_getfile("[eventpoll]", &eventpoll_fops, ep,
1327 O_RDWR | (flags & O_CLOEXEC)); 1538 O_RDWR | (flags & O_CLOEXEC));
1328 if (error < 0) 1539 if (IS_ERR(file)) {
1329 ep_free(ep); 1540 error = PTR_ERR(file);
1330 1541 goto out_free_fd;
1542 }
1543 fd_install(fd, file);
1544 ep->file = file;
1545 return fd;
1546
1547out_free_fd:
1548 put_unused_fd(fd);
1549out_free_ep:
1550 ep_free(ep);
1331 return error; 1551 return error;
1332} 1552}
1333 1553
@@ -1393,23 +1613,29 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1393 /* 1613 /*
1394 * When we insert an epoll file descriptor, inside another epoll file 1614 * When we insert an epoll file descriptor, inside another epoll file
1395 * descriptor, there is the change of creating closed loops, which are 1615 * descriptor, there is the change of creating closed loops, which are
1396 * better be handled here, than in more critical paths. 1616 * better be handled here, than in more critical paths. While we are
1617 * checking for loops we also determine the list of files reachable
1618 * and hang them on the tfile_check_list, so we can check that we
1619 * haven't created too many possible wakeup paths.
1397 * 1620 *
1398 * We hold epmutex across the loop check and the insert in this case, in 1621 * We need to hold the epmutex across both ep_insert and ep_remove
1399 * order to prevent two separate inserts from racing and each doing the 1622 * b/c we want to make sure we are looking at a coherent view of
1400 * insert "at the same time" such that ep_loop_check passes on both 1623 * epoll network.
1401 * before either one does the insert, thereby creating a cycle.
1402 */ 1624 */
1403 if (unlikely(is_file_epoll(tfile) && op == EPOLL_CTL_ADD)) { 1625 if (op == EPOLL_CTL_ADD || op == EPOLL_CTL_DEL) {
1404 mutex_lock(&epmutex); 1626 mutex_lock(&epmutex);
1405 did_lock_epmutex = 1; 1627 did_lock_epmutex = 1;
1406 error = -ELOOP; 1628 }
1407 if (ep_loop_check(ep, tfile) != 0) 1629 if (op == EPOLL_CTL_ADD) {
1408 goto error_tgt_fput; 1630 if (is_file_epoll(tfile)) {
1631 error = -ELOOP;
1632 if (ep_loop_check(ep, tfile) != 0)
1633 goto error_tgt_fput;
1634 } else
1635 list_add(&tfile->f_tfile_llink, &tfile_check_list);
1409 } 1636 }
1410 1637
1411 1638 mutex_lock_nested(&ep->mtx, 0);
1412 mutex_lock(&ep->mtx);
1413 1639
1414 /* 1640 /*
1415 * Try to lookup the file inside our RB tree, Since we grabbed "mtx" 1641 * Try to lookup the file inside our RB tree, Since we grabbed "mtx"
@@ -1426,6 +1652,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1426 error = ep_insert(ep, &epds, tfile, fd); 1652 error = ep_insert(ep, &epds, tfile, fd);
1427 } else 1653 } else
1428 error = -EEXIST; 1654 error = -EEXIST;
1655 clear_tfile_check_list();
1429 break; 1656 break;
1430 case EPOLL_CTL_DEL: 1657 case EPOLL_CTL_DEL:
1431 if (epi) 1658 if (epi)
@@ -1444,7 +1671,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1444 mutex_unlock(&ep->mtx); 1671 mutex_unlock(&ep->mtx);
1445 1672
1446error_tgt_fput: 1673error_tgt_fput:
1447 if (unlikely(did_lock_epmutex)) 1674 if (did_lock_epmutex)
1448 mutex_unlock(&epmutex); 1675 mutex_unlock(&epmutex);
1449 1676
1450 fput(tfile); 1677 fput(tfile);
diff --git a/fs/exec.c b/fs/exec.c
index 99845622986..188d5974f3e 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1413,6 +1413,8 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
1413 printable(bprm->buf[2]) && 1413 printable(bprm->buf[2]) &&
1414 printable(bprm->buf[3])) 1414 printable(bprm->buf[3]))
1415 break; /* -ENOEXEC */ 1415 break; /* -ENOEXEC */
1416 if (try)
1417 break; /* -ENOEXEC */
1416 request_module("binfmt-%04x", *(unsigned short *)(&bprm->buf[2])); 1418 request_module("binfmt-%04x", *(unsigned short *)(&bprm->buf[2]));
1417#endif 1419#endif
1418 } 1420 }
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index bfc2dc43681..0b3da7cc8ab 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -561,8 +561,12 @@ got:
561 if (IS_DIRSYNC(inode)) 561 if (IS_DIRSYNC(inode))
562 handle->h_sync = 1; 562 handle->h_sync = 1;
563 if (insert_inode_locked(inode) < 0) { 563 if (insert_inode_locked(inode) < 0) {
564 err = -EINVAL; 564 /*
565 goto fail_drop; 565 * Likely a bitmap corruption causing inode to be allocated
566 * twice.
567 */
568 err = -EIO;
569 goto fail;
566 } 570 }
567 spin_lock(&sbi->s_next_gen_lock); 571 spin_lock(&sbi->s_next_gen_lock);
568 inode->i_generation = sbi->s_next_generation++; 572 inode->i_generation = sbi->s_next_generation++;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 3451d23c3ba..db9ba1a3f7f 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1568,7 +1568,13 @@ static int ext3_ordered_writepage(struct page *page,
1568 int err; 1568 int err;
1569 1569
1570 J_ASSERT(PageLocked(page)); 1570 J_ASSERT(PageLocked(page));
1571 WARN_ON_ONCE(IS_RDONLY(inode)); 1571 /*
1572 * We don't want to warn for emergency remount. The condition is
1573 * ordered to avoid dereferencing inode->i_sb in non-error case to
1574 * avoid slow-downs.
1575 */
1576 WARN_ON_ONCE(IS_RDONLY(inode) &&
1577 !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ERROR_FS));
1572 1578
1573 /* 1579 /*
1574 * We give up here if we're reentered, because it might be for a 1580 * We give up here if we're reentered, because it might be for a
@@ -1642,7 +1648,13 @@ static int ext3_writeback_writepage(struct page *page,
1642 int err; 1648 int err;
1643 1649
1644 J_ASSERT(PageLocked(page)); 1650 J_ASSERT(PageLocked(page));
1645 WARN_ON_ONCE(IS_RDONLY(inode)); 1651 /*
1652 * We don't want to warn for emergency remount. The condition is
1653 * ordered to avoid dereferencing inode->i_sb in non-error case to
1654 * avoid slow-downs.
1655 */
1656 WARN_ON_ONCE(IS_RDONLY(inode) &&
1657 !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ERROR_FS));
1646 1658
1647 if (ext3_journal_current_handle()) 1659 if (ext3_journal_current_handle())
1648 goto out_fail; 1660 goto out_fail;
@@ -1684,7 +1696,13 @@ static int ext3_journalled_writepage(struct page *page,
1684 int err; 1696 int err;
1685 1697
1686 J_ASSERT(PageLocked(page)); 1698 J_ASSERT(PageLocked(page));
1687 WARN_ON_ONCE(IS_RDONLY(inode)); 1699 /*
1700 * We don't want to warn for emergency remount. The condition is
1701 * ordered to avoid dereferencing inode->i_sb in non-error case to
1702 * avoid slow-downs.
1703 */
1704 WARN_ON_ONCE(IS_RDONLY(inode) &&
1705 !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ERROR_FS));
1688 1706
1689 if (ext3_journal_current_handle()) 1707 if (ext3_journal_current_handle())
1690 goto no_write; 1708 goto no_write;
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 34b6d9bfc48..e5a71111cb3 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -2210,9 +2210,11 @@ static int ext3_symlink (struct inode * dir,
2210 /* 2210 /*
2211 * For non-fast symlinks, we just allocate inode and put it on 2211 * For non-fast symlinks, we just allocate inode and put it on
2212 * orphan list in the first transaction => we need bitmap, 2212 * orphan list in the first transaction => we need bitmap,
2213 * group descriptor, sb, inode block, quota blocks. 2213 * group descriptor, sb, inode block, quota blocks, and
2214 * possibly selinux xattr blocks.
2214 */ 2215 */
2215 credits = 4 + EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb); 2216 credits = 4 + EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) +
2217 EXT3_XATTR_TRANS_BLOCKS;
2216 } else { 2218 } else {
2217 /* 2219 /*
2218 * Fast symlink. We have to add entry to directory 2220 * Fast symlink. We have to add entry to directory
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index 32e6cc23bd9..d565759d82e 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -803,8 +803,16 @@ inserted:
803 /* We need to allocate a new block */ 803 /* We need to allocate a new block */
804 ext3_fsblk_t goal = ext3_group_first_block_no(sb, 804 ext3_fsblk_t goal = ext3_group_first_block_no(sb,
805 EXT3_I(inode)->i_block_group); 805 EXT3_I(inode)->i_block_group);
806 ext3_fsblk_t block = ext3_new_block(handle, inode, 806 ext3_fsblk_t block;
807 goal, &error); 807
808 /*
809 * Protect us agaist concurrent allocations to the
810 * same inode from ext3_..._writepage(). Reservation
811 * code does not expect racing allocations.
812 */
813 mutex_lock(&EXT3_I(inode)->truncate_mutex);
814 block = ext3_new_block(handle, inode, goal, &error);
815 mutex_unlock(&EXT3_I(inode)->truncate_mutex);
808 if (error) 816 if (error)
809 goto cleanup; 817 goto cleanup;
810 ea_idebug(inode, "creating block %d", block); 818 ea_idebug(inode, "creating block %d", block);
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 264f6949511..ebe95f56514 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -514,7 +514,8 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
514 if (bitmap_bh == NULL) 514 if (bitmap_bh == NULL)
515 continue; 515 continue;
516 516
517 x = ext4_count_free(bitmap_bh, sb->s_blocksize); 517 x = ext4_count_free(bitmap_bh->b_data,
518 EXT4_BLOCKS_PER_GROUP(sb) / 8);
518 printk(KERN_DEBUG "group %u: stored = %d, counted = %u\n", 519 printk(KERN_DEBUG "group %u: stored = %d, counted = %u\n",
519 i, ext4_free_blks_count(sb, gdp), x); 520 i, ext4_free_blks_count(sb, gdp), x);
520 bitmap_count += x; 521 bitmap_count += x;
diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c
index fa3af81ac56..012faaaec4a 100644
--- a/fs/ext4/bitmap.c
+++ b/fs/ext4/bitmap.c
@@ -15,15 +15,13 @@
15 15
16static const int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0}; 16static const int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0};
17 17
18unsigned int ext4_count_free(struct buffer_head *map, unsigned int numchars) 18unsigned int ext4_count_free(char *bitmap, unsigned int numchars)
19{ 19{
20 unsigned int i, sum = 0; 20 unsigned int i, sum = 0;
21 21
22 if (!map)
23 return 0;
24 for (i = 0; i < numchars; i++) 22 for (i = 0; i < numchars; i++)
25 sum += nibblemap[map->b_data[i] & 0xf] + 23 sum += nibblemap[bitmap[i] & 0xf] +
26 nibblemap[(map->b_data[i] >> 4) & 0xf]; 24 nibblemap[(bitmap[i] >> 4) & 0xf];
27 return sum; 25 return sum;
28} 26}
29 27
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 1921392cd70..e0113aa0d3a 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -175,6 +175,7 @@ struct mpage_da_data {
175 */ 175 */
176#define EXT4_IO_END_UNWRITTEN 0x0001 176#define EXT4_IO_END_UNWRITTEN 0x0001
177#define EXT4_IO_END_ERROR 0x0002 177#define EXT4_IO_END_ERROR 0x0002
178#define EXT4_IO_END_QUEUED 0x0004
178 179
179struct ext4_io_page { 180struct ext4_io_page {
180 struct page *p_page; 181 struct page *p_page;
@@ -357,8 +358,7 @@ struct flex_groups {
357 358
358/* Flags that should be inherited by new inodes from their parent. */ 359/* Flags that should be inherited by new inodes from their parent. */
359#define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\ 360#define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
360 EXT4_SYNC_FL | EXT4_IMMUTABLE_FL | EXT4_APPEND_FL |\ 361 EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
361 EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
362 EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\ 362 EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
363 EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL) 363 EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL)
364 364
@@ -526,6 +526,7 @@ struct ext4_new_group_data {
526#define EXT4_FREE_BLOCKS_METADATA 0x0001 526#define EXT4_FREE_BLOCKS_METADATA 0x0001
527#define EXT4_FREE_BLOCKS_FORGET 0x0002 527#define EXT4_FREE_BLOCKS_FORGET 0x0002
528#define EXT4_FREE_BLOCKS_VALIDATED 0x0004 528#define EXT4_FREE_BLOCKS_VALIDATED 0x0004
529#define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE 0x0008
529 530
530/* 531/*
531 * ioctl commands 532 * ioctl commands
@@ -1712,7 +1713,7 @@ struct mmpd_data {
1712# define NORET_AND noreturn, 1713# define NORET_AND noreturn,
1713 1714
1714/* bitmap.c */ 1715/* bitmap.c */
1715extern unsigned int ext4_count_free(struct buffer_head *, unsigned); 1716extern unsigned int ext4_count_free(char *bitmap, unsigned numchars);
1716 1717
1717/* balloc.c */ 1718/* balloc.c */
1718extern unsigned int ext4_block_group(struct super_block *sb, 1719extern unsigned int ext4_block_group(struct super_block *sb,
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index bb85757689b..95af6f87850 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -261,43 +261,45 @@ static inline void ext4_update_inode_fsync_trans(handle_t *handle,
261/* super.c */ 261/* super.c */
262int ext4_force_commit(struct super_block *sb); 262int ext4_force_commit(struct super_block *sb);
263 263
264static inline int ext4_should_journal_data(struct inode *inode) 264/*
265 * Ext4 inode journal modes
266 */
267#define EXT4_INODE_JOURNAL_DATA_MODE 0x01 /* journal data mode */
268#define EXT4_INODE_ORDERED_DATA_MODE 0x02 /* ordered data mode */
269#define EXT4_INODE_WRITEBACK_DATA_MODE 0x04 /* writeback data mode */
270
271static inline int ext4_inode_journal_mode(struct inode *inode)
265{ 272{
266 if (EXT4_JOURNAL(inode) == NULL) 273 if (EXT4_JOURNAL(inode) == NULL)
267 return 0; 274 return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */
268 if (!S_ISREG(inode->i_mode)) 275 /* We do not support data journalling with delayed allocation */
269 return 1; 276 if (!S_ISREG(inode->i_mode) ||
270 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) 277 test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
271 return 1; 278 return EXT4_INODE_JOURNAL_DATA_MODE; /* journal data */
272 if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA)) 279 if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA) &&
273 return 1; 280 !test_opt(inode->i_sb, DELALLOC))
274 return 0; 281 return EXT4_INODE_JOURNAL_DATA_MODE; /* journal data */
282 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
283 return EXT4_INODE_ORDERED_DATA_MODE; /* ordered */
284 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
285 return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */
286 else
287 BUG();
288}
289
290static inline int ext4_should_journal_data(struct inode *inode)
291{
292 return ext4_inode_journal_mode(inode) & EXT4_INODE_JOURNAL_DATA_MODE;
275} 293}
276 294
277static inline int ext4_should_order_data(struct inode *inode) 295static inline int ext4_should_order_data(struct inode *inode)
278{ 296{
279 if (EXT4_JOURNAL(inode) == NULL) 297 return ext4_inode_journal_mode(inode) & EXT4_INODE_ORDERED_DATA_MODE;
280 return 0;
281 if (!S_ISREG(inode->i_mode))
282 return 0;
283 if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA))
284 return 0;
285 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
286 return 1;
287 return 0;
288} 298}
289 299
290static inline int ext4_should_writeback_data(struct inode *inode) 300static inline int ext4_should_writeback_data(struct inode *inode)
291{ 301{
292 if (!S_ISREG(inode->i_mode)) 302 return ext4_inode_journal_mode(inode) & EXT4_INODE_WRITEBACK_DATA_MODE;
293 return 0;
294 if (EXT4_JOURNAL(inode) == NULL)
295 return 1;
296 if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA))
297 return 0;
298 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
299 return 1;
300 return 0;
301} 303}
302 304
303/* 305/*
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index f815cc81e7a..611647b28a4 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -341,6 +341,8 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
341 ext4_fsblk_t block = ext4_ext_pblock(ext); 341 ext4_fsblk_t block = ext4_ext_pblock(ext);
342 int len = ext4_ext_get_actual_len(ext); 342 int len = ext4_ext_get_actual_len(ext);
343 343
344 if (len == 0)
345 return 0;
344 return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len); 346 return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
345} 347}
346 348
@@ -2844,7 +2846,7 @@ static int ext4_split_extent_at(handle_t *handle,
2844 if (err) 2846 if (err)
2845 goto fix_extent_len; 2847 goto fix_extent_len;
2846 /* update the extent length and mark as initialized */ 2848 /* update the extent length and mark as initialized */
2847 ex->ee_len = cpu_to_le32(ee_len); 2849 ex->ee_len = cpu_to_le16(ee_len);
2848 ext4_ext_try_to_merge(inode, path, ex); 2850 ext4_ext_try_to_merge(inode, path, ex);
2849 err = ext4_ext_dirty(handle, inode, path + depth); 2851 err = ext4_ext_dirty(handle, inode, path + depth);
2850 goto out; 2852 goto out;
@@ -3596,17 +3598,18 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3596 } 3598 }
3597 3599
3598 err = check_eofblocks_fl(handle, inode, map->m_lblk, path, ar.len); 3600 err = check_eofblocks_fl(handle, inode, map->m_lblk, path, ar.len);
3599 if (err) 3601 if (!err)
3600 goto out2; 3602 err = ext4_ext_insert_extent(handle, inode, path,
3601 3603 &newex, flags);
3602 err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
3603 if (err) { 3604 if (err) {
3605 int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ?
3606 EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0;
3604 /* free data blocks we just allocated */ 3607 /* free data blocks we just allocated */
3605 /* not a good idea to call discard here directly, 3608 /* not a good idea to call discard here directly,
3606 * but otherwise we'd need to call it every free() */ 3609 * but otherwise we'd need to call it every free() */
3607 ext4_discard_preallocations(inode); 3610 ext4_discard_preallocations(inode);
3608 ext4_free_blocks(handle, inode, NULL, ext4_ext_pblock(&newex), 3611 ext4_free_blocks(handle, inode, NULL, ext4_ext_pblock(&newex),
3609 ext4_ext_get_actual_len(&newex), 0); 3612 ext4_ext_get_actual_len(&newex), fb_flags);
3610 goto out2; 3613 goto out2;
3611 } 3614 }
3612 3615
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 21bb2f61e50..29272de3023 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -1021,8 +1021,12 @@ got:
1021 if (IS_DIRSYNC(inode)) 1021 if (IS_DIRSYNC(inode))
1022 ext4_handle_sync(handle); 1022 ext4_handle_sync(handle);
1023 if (insert_inode_locked(inode) < 0) { 1023 if (insert_inode_locked(inode) < 0) {
1024 err = -EINVAL; 1024 /*
1025 goto fail_drop; 1025 * Likely a bitmap corruption causing inode to be allocated
1026 * twice.
1027 */
1028 err = -EIO;
1029 goto fail;
1026 } 1030 }
1027 spin_lock(&sbi->s_next_gen_lock); 1031 spin_lock(&sbi->s_next_gen_lock);
1028 inode->i_generation = sbi->s_next_generation++; 1032 inode->i_generation = sbi->s_next_generation++;
@@ -1189,7 +1193,8 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
1189 if (!bitmap_bh) 1193 if (!bitmap_bh)
1190 continue; 1194 continue;
1191 1195
1192 x = ext4_count_free(bitmap_bh, EXT4_INODES_PER_GROUP(sb) / 8); 1196 x = ext4_count_free(bitmap_bh->b_data,
1197 EXT4_INODES_PER_GROUP(sb) / 8);
1193 printk(KERN_DEBUG "group %lu: stored = %d, counted = %lu\n", 1198 printk(KERN_DEBUG "group %lu: stored = %d, counted = %lu\n",
1194 (unsigned long) i, ext4_free_inodes_count(sb, gdp), x); 1199 (unsigned long) i, ext4_free_inodes_count(sb, gdp), x);
1195 bitmap_count += x; 1200 bitmap_count += x;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index e3126c05100..00a5acdd654 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -189,6 +189,9 @@ void ext4_evict_inode(struct inode *inode)
189 int err; 189 int err;
190 190
191 trace_ext4_evict_inode(inode); 191 trace_ext4_evict_inode(inode);
192
193 ext4_ioend_wait(inode);
194
192 if (inode->i_nlink) { 195 if (inode->i_nlink) {
193 truncate_inode_pages(&inode->i_data, 0); 196 truncate_inode_pages(&inode->i_data, 0);
194 goto no_delete; 197 goto no_delete;
@@ -1131,6 +1134,15 @@ void ext4_da_update_reserve_space(struct inode *inode,
1131 used = ei->i_reserved_data_blocks; 1134 used = ei->i_reserved_data_blocks;
1132 } 1135 }
1133 1136
1137 if (unlikely(ei->i_allocated_meta_blocks > ei->i_reserved_meta_blocks)) {
1138 ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, allocated %d "
1139 "with only %d reserved metadata blocks\n", __func__,
1140 inode->i_ino, ei->i_allocated_meta_blocks,
1141 ei->i_reserved_meta_blocks);
1142 WARN_ON(1);
1143 ei->i_allocated_meta_blocks = ei->i_reserved_meta_blocks;
1144 }
1145
1134 /* Update per-inode reservations */ 1146 /* Update per-inode reservations */
1135 ei->i_reserved_data_blocks -= used; 1147 ei->i_reserved_data_blocks -= used;
1136 ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks; 1148 ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks;
@@ -1849,6 +1861,8 @@ static int ext4_journalled_write_end(struct file *file,
1849 from = pos & (PAGE_CACHE_SIZE - 1); 1861 from = pos & (PAGE_CACHE_SIZE - 1);
1850 to = from + len; 1862 to = from + len;
1851 1863
1864 BUG_ON(!ext4_handle_valid(handle));
1865
1852 if (copied < len) { 1866 if (copied < len) {
1853 if (!PageUptodate(page)) 1867 if (!PageUptodate(page))
1854 copied = 0; 1868 copied = 0;
@@ -2121,8 +2135,11 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
2121 clear_buffer_unwritten(bh); 2135 clear_buffer_unwritten(bh);
2122 } 2136 }
2123 2137
2124 /* skip page if block allocation undone */ 2138 /*
2125 if (buffer_delay(bh) || buffer_unwritten(bh)) 2139 * skip page if block allocation undone and
2140 * block is dirty
2141 */
2142 if (ext4_bh_delay_or_unwritten(NULL, bh))
2126 skip_page = 1; 2143 skip_page = 1;
2127 bh = bh->b_this_page; 2144 bh = bh->b_this_page;
2128 block_start += bh->b_size; 2145 block_start += bh->b_size;
@@ -2148,7 +2165,12 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
2148 else if (test_opt(inode->i_sb, MBLK_IO_SUBMIT)) 2165 else if (test_opt(inode->i_sb, MBLK_IO_SUBMIT))
2149 err = ext4_bio_write_page(&io_submit, page, 2166 err = ext4_bio_write_page(&io_submit, page,
2150 len, mpd->wbc); 2167 len, mpd->wbc);
2151 else 2168 else if (buffer_uninit(page_bufs)) {
2169 ext4_set_bh_endio(page_bufs, inode);
2170 err = block_write_full_page_endio(page,
2171 noalloc_get_block_write,
2172 mpd->wbc, ext4_end_io_buffer_write);
2173 } else
2152 err = block_write_full_page(page, 2174 err = block_write_full_page(page,
2153 noalloc_get_block_write, mpd->wbc); 2175 noalloc_get_block_write, mpd->wbc);
2154 2176
@@ -2564,6 +2586,8 @@ static int __ext4_journalled_writepage(struct page *page,
2564 goto out; 2586 goto out;
2565 } 2587 }
2566 2588
2589 BUG_ON(!ext4_handle_valid(handle));
2590
2567 ret = walk_page_buffers(handle, page_bufs, 0, len, NULL, 2591 ret = walk_page_buffers(handle, page_bufs, 0, len, NULL,
2568 do_journal_get_write_access); 2592 do_journal_get_write_access);
2569 2593
@@ -2663,8 +2687,12 @@ static int ext4_writepage(struct page *page,
2663 * We don't want to do block allocation, so redirty 2687 * We don't want to do block allocation, so redirty
2664 * the page and return. We may reach here when we do 2688 * the page and return. We may reach here when we do
2665 * a journal commit via journal_submit_inode_data_buffers. 2689 * a journal commit via journal_submit_inode_data_buffers.
2666 * We can also reach here via shrink_page_list 2690 * We can also reach here via shrink_page_list but it
2691 * should never be for direct reclaim so warn if that
2692 * happens
2667 */ 2693 */
2694 WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD)) ==
2695 PF_MEMALLOC);
2668 goto redirty_page; 2696 goto redirty_page;
2669 } 2697 }
2670 if (commit_write) 2698 if (commit_write)
@@ -2741,7 +2769,7 @@ static int write_cache_pages_da(struct address_space *mapping,
2741 index = wbc->range_start >> PAGE_CACHE_SHIFT; 2769 index = wbc->range_start >> PAGE_CACHE_SHIFT;
2742 end = wbc->range_end >> PAGE_CACHE_SHIFT; 2770 end = wbc->range_end >> PAGE_CACHE_SHIFT;
2743 2771
2744 if (wbc->sync_mode == WB_SYNC_ALL) 2772 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
2745 tag = PAGECACHE_TAG_TOWRITE; 2773 tag = PAGECACHE_TAG_TOWRITE;
2746 else 2774 else
2747 tag = PAGECACHE_TAG_DIRTY; 2775 tag = PAGECACHE_TAG_DIRTY;
@@ -2973,7 +3001,7 @@ static int ext4_da_writepages(struct address_space *mapping,
2973 } 3001 }
2974 3002
2975retry: 3003retry:
2976 if (wbc->sync_mode == WB_SYNC_ALL) 3004 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
2977 tag_pages_for_writeback(mapping, index, end); 3005 tag_pages_for_writeback(mapping, index, end);
2978 3006
2979 while (!ret && wbc->nr_to_write > 0) { 3007 while (!ret && wbc->nr_to_write > 0) {
@@ -3093,7 +3121,7 @@ static int ext4_nonda_switch(struct super_block *sb)
3093 * start pushing delalloc when 1/2 of free blocks are dirty. 3121 * start pushing delalloc when 1/2 of free blocks are dirty.
3094 */ 3122 */
3095 if (free_blocks < 2 * dirty_blocks) 3123 if (free_blocks < 2 * dirty_blocks)
3096 writeback_inodes_sb_if_idle(sb); 3124 writeback_inodes_sb_if_idle(sb, WB_REASON_FS_FREE_SPACE);
3097 3125
3098 return 0; 3126 return 0;
3099} 3127}
@@ -3197,13 +3225,14 @@ static int ext4_da_write_end(struct file *file,
3197 int write_mode = (int)(unsigned long)fsdata; 3225 int write_mode = (int)(unsigned long)fsdata;
3198 3226
3199 if (write_mode == FALL_BACK_TO_NONDELALLOC) { 3227 if (write_mode == FALL_BACK_TO_NONDELALLOC) {
3200 if (ext4_should_order_data(inode)) { 3228 switch (ext4_inode_journal_mode(inode)) {
3229 case EXT4_INODE_ORDERED_DATA_MODE:
3201 return ext4_ordered_write_end(file, mapping, pos, 3230 return ext4_ordered_write_end(file, mapping, pos,
3202 len, copied, page, fsdata); 3231 len, copied, page, fsdata);
3203 } else if (ext4_should_writeback_data(inode)) { 3232 case EXT4_INODE_WRITEBACK_DATA_MODE:
3204 return ext4_writeback_write_end(file, mapping, pos, 3233 return ext4_writeback_write_end(file, mapping, pos,
3205 len, copied, page, fsdata); 3234 len, copied, page, fsdata);
3206 } else { 3235 default:
3207 BUG(); 3236 BUG();
3208 } 3237 }
3209 } 3238 }
@@ -3219,7 +3248,7 @@ static int ext4_da_write_end(struct file *file,
3219 */ 3248 */
3220 3249
3221 new_i_size = pos + copied; 3250 new_i_size = pos + copied;
3222 if (new_i_size > EXT4_I(inode)->i_disksize) { 3251 if (copied && new_i_size > EXT4_I(inode)->i_disksize) {
3223 if (ext4_da_should_update_i_disksize(page, end)) { 3252 if (ext4_da_should_update_i_disksize(page, end)) {
3224 down_write(&EXT4_I(inode)->i_data_sem); 3253 down_write(&EXT4_I(inode)->i_data_sem);
3225 if (new_i_size > EXT4_I(inode)->i_disksize) { 3254 if (new_i_size > EXT4_I(inode)->i_disksize) {
@@ -3495,12 +3524,17 @@ static ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
3495 } 3524 }
3496 3525
3497retry: 3526retry:
3498 if (rw == READ && ext4_should_dioread_nolock(inode)) 3527 if (rw == READ && ext4_should_dioread_nolock(inode)) {
3528 if (unlikely(!list_empty(&ei->i_completed_io_list))) {
3529 mutex_lock(&inode->i_mutex);
3530 ext4_flush_completed_IO(inode);
3531 mutex_unlock(&inode->i_mutex);
3532 }
3499 ret = __blockdev_direct_IO(rw, iocb, inode, 3533 ret = __blockdev_direct_IO(rw, iocb, inode,
3500 inode->i_sb->s_bdev, iov, 3534 inode->i_sb->s_bdev, iov,
3501 offset, nr_segs, 3535 offset, nr_segs,
3502 ext4_get_block, NULL, NULL, 0); 3536 ext4_get_block, NULL, NULL, 0);
3503 else { 3537 } else {
3504 ret = blockdev_direct_IO(rw, iocb, inode, 3538 ret = blockdev_direct_IO(rw, iocb, inode,
3505 inode->i_sb->s_bdev, iov, 3539 inode->i_sb->s_bdev, iov,
3506 offset, nr_segs, 3540 offset, nr_segs,
@@ -3635,8 +3669,15 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
3635 goto out; 3669 goto out;
3636 } 3670 }
3637 3671
3638 io_end->flag = EXT4_IO_END_UNWRITTEN; 3672 /*
3673 * It may be over-defensive here to check EXT4_IO_END_UNWRITTEN now,
3674 * but being more careful is always safe for the future change.
3675 */
3639 inode = io_end->inode; 3676 inode = io_end->inode;
3677 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
3678 io_end->flag |= EXT4_IO_END_UNWRITTEN;
3679 atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
3680 }
3640 3681
3641 /* Add the io_end to per-inode completed io list*/ 3682 /* Add the io_end to per-inode completed io list*/
3642 spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); 3683 spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
@@ -3891,18 +3932,25 @@ static const struct address_space_operations ext4_da_aops = {
3891 3932
3892void ext4_set_aops(struct inode *inode) 3933void ext4_set_aops(struct inode *inode)
3893{ 3934{
3894 if (ext4_should_order_data(inode) && 3935 switch (ext4_inode_journal_mode(inode)) {
3895 test_opt(inode->i_sb, DELALLOC)) 3936 case EXT4_INODE_ORDERED_DATA_MODE:
3896 inode->i_mapping->a_ops = &ext4_da_aops; 3937 if (test_opt(inode->i_sb, DELALLOC))
3897 else if (ext4_should_order_data(inode)) 3938 inode->i_mapping->a_ops = &ext4_da_aops;
3898 inode->i_mapping->a_ops = &ext4_ordered_aops; 3939 else
3899 else if (ext4_should_writeback_data(inode) && 3940 inode->i_mapping->a_ops = &ext4_ordered_aops;
3900 test_opt(inode->i_sb, DELALLOC)) 3941 break;
3901 inode->i_mapping->a_ops = &ext4_da_aops; 3942 case EXT4_INODE_WRITEBACK_DATA_MODE:
3902 else if (ext4_should_writeback_data(inode)) 3943 if (test_opt(inode->i_sb, DELALLOC))
3903 inode->i_mapping->a_ops = &ext4_writeback_aops; 3944 inode->i_mapping->a_ops = &ext4_da_aops;
3904 else 3945 else
3946 inode->i_mapping->a_ops = &ext4_writeback_aops;
3947 break;
3948 case EXT4_INODE_JOURNAL_DATA_MODE:
3905 inode->i_mapping->a_ops = &ext4_journalled_aops; 3949 inode->i_mapping->a_ops = &ext4_journalled_aops;
3950 break;
3951 default:
3952 BUG();
3953 }
3906} 3954}
3907 3955
3908/* 3956/*
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 808c554e773..4cbe1c2c996 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -35,7 +35,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
35 handle_t *handle = NULL; 35 handle_t *handle = NULL;
36 int err, migrate = 0; 36 int err, migrate = 0;
37 struct ext4_iloc iloc; 37 struct ext4_iloc iloc;
38 unsigned int oldflags; 38 unsigned int oldflags, mask, i;
39 unsigned int jflag; 39 unsigned int jflag;
40 40
41 if (!inode_owner_or_capable(inode)) 41 if (!inode_owner_or_capable(inode))
@@ -112,9 +112,14 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
112 if (err) 112 if (err)
113 goto flags_err; 113 goto flags_err;
114 114
115 flags = flags & EXT4_FL_USER_MODIFIABLE; 115 for (i = 0, mask = 1; i < 32; i++, mask <<= 1) {
116 flags |= oldflags & ~EXT4_FL_USER_MODIFIABLE; 116 if (!(mask & EXT4_FL_USER_MODIFIABLE))
117 ei->i_flags = flags; 117 continue;
118 if (mask & flags)
119 ext4_set_inode_flag(inode, i);
120 else
121 ext4_clear_inode_flag(inode, i);
122 }
118 123
119 ext4_set_inode_flags(inode); 124 ext4_set_inode_flags(inode);
120 inode->i_ctime = ext4_current_time(inode); 125 inode->i_ctime = ext4_current_time(inode);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 6ed859d5685..b6adf68a5c0 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2528,6 +2528,9 @@ int ext4_mb_release(struct super_block *sb)
2528 struct ext4_sb_info *sbi = EXT4_SB(sb); 2528 struct ext4_sb_info *sbi = EXT4_SB(sb);
2529 struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits); 2529 struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2530 2530
2531 if (sbi->s_proc)
2532 remove_proc_entry("mb_groups", sbi->s_proc);
2533
2531 if (sbi->s_group_info) { 2534 if (sbi->s_group_info) {
2532 for (i = 0; i < ngroups; i++) { 2535 for (i = 0; i < ngroups; i++) {
2533 grinfo = ext4_get_group_info(sb, i); 2536 grinfo = ext4_get_group_info(sb, i);
@@ -2575,8 +2578,6 @@ int ext4_mb_release(struct super_block *sb)
2575 } 2578 }
2576 2579
2577 free_percpu(sbi->s_locality_groups); 2580 free_percpu(sbi->s_locality_groups);
2578 if (sbi->s_proc)
2579 remove_proc_entry("mb_groups", sbi->s_proc);
2580 2581
2581 return 0; 2582 return 0;
2582} 2583}
@@ -4583,6 +4584,7 @@ do_more:
4583 */ 4584 */
4584 new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS); 4585 new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS);
4585 if (!new_entry) { 4586 if (!new_entry) {
4587 ext4_mb_unload_buddy(&e4b);
4586 err = -ENOMEM; 4588 err = -ENOMEM;
4587 goto error_return; 4589 goto error_return;
4588 } 4590 }
@@ -4637,7 +4639,7 @@ do_more:
4637 } 4639 }
4638 ext4_mark_super_dirty(sb); 4640 ext4_mark_super_dirty(sb);
4639error_return: 4641error_return:
4640 if (freed) 4642 if (freed && !(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
4641 dquot_free_block(inode, freed); 4643 dquot_free_block(inode, freed);
4642 brelse(bitmap_bh); 4644 brelse(bitmap_bh);
4643 ext4_std_error(sb, err); 4645 ext4_std_error(sb, err);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index b754b7721f5..3d36d5a1e19 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1589,7 +1589,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
1589 dxtrace(dx_show_index("node", frames[1].entries)); 1589 dxtrace(dx_show_index("node", frames[1].entries));
1590 dxtrace(dx_show_index("node", 1590 dxtrace(dx_show_index("node",
1591 ((struct dx_node *) bh2->b_data)->entries)); 1591 ((struct dx_node *) bh2->b_data)->entries));
1592 err = ext4_handle_dirty_metadata(handle, inode, bh2); 1592 err = ext4_handle_dirty_metadata(handle, dir, bh2);
1593 if (err) 1593 if (err)
1594 goto journal_error; 1594 goto journal_error;
1595 brelse (bh2); 1595 brelse (bh2);
@@ -1615,7 +1615,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
1615 if (err) 1615 if (err)
1616 goto journal_error; 1616 goto journal_error;
1617 } 1617 }
1618 err = ext4_handle_dirty_metadata(handle, inode, frames[0].bh); 1618 err = ext4_handle_dirty_metadata(handle, dir, frames[0].bh);
1619 if (err) { 1619 if (err) {
1620 ext4_std_error(inode->i_sb, err); 1620 ext4_std_error(inode->i_sb, err);
1621 goto cleanup; 1621 goto cleanup;
@@ -1866,7 +1866,7 @@ retry:
1866 ext4_set_de_type(dir->i_sb, de, S_IFDIR); 1866 ext4_set_de_type(dir->i_sb, de, S_IFDIR);
1867 inode->i_nlink = 2; 1867 inode->i_nlink = 2;
1868 BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata"); 1868 BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata");
1869 err = ext4_handle_dirty_metadata(handle, dir, dir_block); 1869 err = ext4_handle_dirty_metadata(handle, inode, dir_block);
1870 if (err) 1870 if (err)
1871 goto out_clear_inode; 1871 goto out_clear_inode;
1872 err = ext4_mark_inode_dirty(handle, inode); 1872 err = ext4_mark_inode_dirty(handle, inode);
@@ -2264,9 +2264,11 @@ static int ext4_symlink(struct inode *dir,
2264 /* 2264 /*
2265 * For non-fast symlinks, we just allocate inode and put it on 2265 * For non-fast symlinks, we just allocate inode and put it on
2266 * orphan list in the first transaction => we need bitmap, 2266 * orphan list in the first transaction => we need bitmap,
2267 * group descriptor, sb, inode block, quota blocks. 2267 * group descriptor, sb, inode block, quota blocks, and
2268 * possibly selinux xattr blocks.
2268 */ 2269 */
2269 credits = 4 + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb); 2270 credits = 4 + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) +
2271 EXT4_XATTR_TRANS_BLOCKS;
2270 } else { 2272 } else {
2271 /* 2273 /*
2272 * Fast symlink. We have to add entry to directory 2274 * Fast symlink. We have to add entry to directory
@@ -2538,7 +2540,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
2538 PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) = 2540 PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) =
2539 cpu_to_le32(new_dir->i_ino); 2541 cpu_to_le32(new_dir->i_ino);
2540 BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata"); 2542 BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata");
2541 retval = ext4_handle_dirty_metadata(handle, old_dir, dir_bh); 2543 retval = ext4_handle_dirty_metadata(handle, old_inode, dir_bh);
2542 if (retval) { 2544 if (retval) {
2543 ext4_std_error(old_dir->i_sb, retval); 2545 ext4_std_error(old_dir->i_sb, retval);
2544 goto end_rename; 2546 goto end_rename;
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 7bb8f76d470..d99d74aca8a 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -142,7 +142,23 @@ static void ext4_end_io_work(struct work_struct *work)
142 unsigned long flags; 142 unsigned long flags;
143 int ret; 143 int ret;
144 144
145 mutex_lock(&inode->i_mutex); 145 if (!mutex_trylock(&inode->i_mutex)) {
146 /*
147 * Requeue the work instead of waiting so that the work
148 * items queued after this can be processed.
149 */
150 queue_work(EXT4_SB(inode->i_sb)->dio_unwritten_wq, &io->work);
151 /*
152 * To prevent the ext4-dio-unwritten thread from keeping
153 * requeueing end_io requests and occupying cpu for too long,
154 * yield the cpu if it sees an end_io request that has already
155 * been requeued.
156 */
157 if (io->flag & EXT4_IO_END_QUEUED)
158 yield();
159 io->flag |= EXT4_IO_END_QUEUED;
160 return;
161 }
146 ret = ext4_end_io_nolock(io); 162 ret = ext4_end_io_nolock(io);
147 if (ret < 0) { 163 if (ret < 0) {
148 mutex_unlock(&inode->i_mutex); 164 mutex_unlock(&inode->i_mutex);
@@ -338,8 +354,10 @@ submit_and_retry:
338 if ((io_end->num_io_pages >= MAX_IO_PAGES) && 354 if ((io_end->num_io_pages >= MAX_IO_PAGES) &&
339 (io_end->pages[io_end->num_io_pages-1] != io_page)) 355 (io_end->pages[io_end->num_io_pages-1] != io_page))
340 goto submit_and_retry; 356 goto submit_and_retry;
341 if (buffer_uninit(bh)) 357 if (buffer_uninit(bh) && !(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
342 io->io_end->flag |= EXT4_IO_END_UNWRITTEN; 358 io_end->flag |= EXT4_IO_END_UNWRITTEN;
359 atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
360 }
343 io->io_end->size += bh->b_size; 361 io->io_end->size += bh->b_size;
344 io->io_next_block++; 362 io->io_next_block++;
345 ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh)); 363 ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh));
@@ -387,6 +405,18 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
387 405
388 block_end = block_start + blocksize; 406 block_end = block_start + blocksize;
389 if (block_start >= len) { 407 if (block_start >= len) {
408 /*
409 * Comments copied from block_write_full_page_endio:
410 *
411 * The page straddles i_size. It must be zeroed out on
412 * each and every writepage invocation because it may
413 * be mmapped. "A file is mapped in multiples of the
414 * page size. For a file that is not a multiple of
415 * the page size, the remaining memory is zeroed when
416 * mapped, and writes to that region are not written
417 * out to the file."
418 */
419 zero_user_segment(page, block_start, block_end);
390 clear_buffer_dirty(bh); 420 clear_buffer_dirty(bh);
391 set_buffer_uptodate(bh); 421 set_buffer_uptodate(bh);
392 continue; 422 continue;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 9ea71aa864b..489d406c0d7 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -433,6 +433,7 @@ void __ext4_error(struct super_block *sb, const char *function,
433 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n", 433 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n",
434 sb->s_id, function, line, current->comm, &vaf); 434 sb->s_id, function, line, current->comm, &vaf);
435 va_end(args); 435 va_end(args);
436 save_error_info(sb, function, line);
436 437
437 ext4_handle_error(sb); 438 ext4_handle_error(sb);
438} 439}
@@ -859,6 +860,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
859 ei->i_reserved_meta_blocks = 0; 860 ei->i_reserved_meta_blocks = 0;
860 ei->i_allocated_meta_blocks = 0; 861 ei->i_allocated_meta_blocks = 0;
861 ei->i_da_metadata_calc_len = 0; 862 ei->i_da_metadata_calc_len = 0;
863 ei->i_da_metadata_calc_last_lblock = 0;
862 spin_lock_init(&(ei->i_block_reservation_lock)); 864 spin_lock_init(&(ei->i_block_reservation_lock));
863#ifdef CONFIG_QUOTA 865#ifdef CONFIG_QUOTA
864 ei->i_reserved_quota = 0; 866 ei->i_reserved_quota = 0;
@@ -892,7 +894,6 @@ static void ext4_i_callback(struct rcu_head *head)
892 894
893static void ext4_destroy_inode(struct inode *inode) 895static void ext4_destroy_inode(struct inode *inode)
894{ 896{
895 ext4_ioend_wait(inode);
896 if (!list_empty(&(EXT4_I(inode)->i_orphan))) { 897 if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
897 ext4_msg(inode->i_sb, KERN_ERR, 898 ext4_msg(inode->i_sb, KERN_ERR,
898 "Inode %lu (%p): orphan list check failed!", 899 "Inode %lu (%p): orphan list check failed!",
@@ -1114,9 +1115,9 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
1114 seq_puts(seq, ",block_validity"); 1115 seq_puts(seq, ",block_validity");
1115 1116
1116 if (!test_opt(sb, INIT_INODE_TABLE)) 1117 if (!test_opt(sb, INIT_INODE_TABLE))
1117 seq_puts(seq, ",noinit_inode_table"); 1118 seq_puts(seq, ",noinit_itable");
1118 else if (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT) 1119 else if (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT)
1119 seq_printf(seq, ",init_inode_table=%u", 1120 seq_printf(seq, ",init_itable=%u",
1120 (unsigned) sbi->s_li_wait_mult); 1121 (unsigned) sbi->s_li_wait_mult);
1121 1122
1122 ext4_show_quota_options(seq, sb); 1123 ext4_show_quota_options(seq, sb);
@@ -1292,8 +1293,7 @@ enum {
1292 Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity, 1293 Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
1293 Opt_inode_readahead_blks, Opt_journal_ioprio, 1294 Opt_inode_readahead_blks, Opt_journal_ioprio,
1294 Opt_dioread_nolock, Opt_dioread_lock, 1295 Opt_dioread_nolock, Opt_dioread_lock,
1295 Opt_discard, Opt_nodiscard, 1296 Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
1296 Opt_init_inode_table, Opt_noinit_inode_table,
1297}; 1297};
1298 1298
1299static const match_table_t tokens = { 1299static const match_table_t tokens = {
@@ -1366,9 +1366,9 @@ static const match_table_t tokens = {
1366 {Opt_dioread_lock, "dioread_lock"}, 1366 {Opt_dioread_lock, "dioread_lock"},
1367 {Opt_discard, "discard"}, 1367 {Opt_discard, "discard"},
1368 {Opt_nodiscard, "nodiscard"}, 1368 {Opt_nodiscard, "nodiscard"},
1369 {Opt_init_inode_table, "init_itable=%u"}, 1369 {Opt_init_itable, "init_itable=%u"},
1370 {Opt_init_inode_table, "init_itable"}, 1370 {Opt_init_itable, "init_itable"},
1371 {Opt_noinit_inode_table, "noinit_itable"}, 1371 {Opt_noinit_itable, "noinit_itable"},
1372 {Opt_err, NULL}, 1372 {Opt_err, NULL},
1373}; 1373};
1374 1374
@@ -1845,7 +1845,7 @@ set_qf_format:
1845 case Opt_dioread_lock: 1845 case Opt_dioread_lock:
1846 clear_opt(sb, DIOREAD_NOLOCK); 1846 clear_opt(sb, DIOREAD_NOLOCK);
1847 break; 1847 break;
1848 case Opt_init_inode_table: 1848 case Opt_init_itable:
1849 set_opt(sb, INIT_INODE_TABLE); 1849 set_opt(sb, INIT_INODE_TABLE);
1850 if (args[0].from) { 1850 if (args[0].from) {
1851 if (match_int(&args[0], &option)) 1851 if (match_int(&args[0], &option))
@@ -1856,7 +1856,7 @@ set_qf_format:
1856 return 0; 1856 return 0;
1857 sbi->s_li_wait_mult = option; 1857 sbi->s_li_wait_mult = option;
1858 break; 1858 break;
1859 case Opt_noinit_inode_table: 1859 case Opt_noinit_itable:
1860 clear_opt(sb, INIT_INODE_TABLE); 1860 clear_opt(sb, INIT_INODE_TABLE);
1861 break; 1861 break;
1862 default: 1862 default:
@@ -1959,17 +1959,16 @@ static int ext4_fill_flex_info(struct super_block *sb)
1959 struct ext4_group_desc *gdp = NULL; 1959 struct ext4_group_desc *gdp = NULL;
1960 ext4_group_t flex_group_count; 1960 ext4_group_t flex_group_count;
1961 ext4_group_t flex_group; 1961 ext4_group_t flex_group;
1962 int groups_per_flex = 0; 1962 unsigned int groups_per_flex = 0;
1963 size_t size; 1963 size_t size;
1964 int i; 1964 int i;
1965 1965
1966 sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; 1966 sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
1967 groups_per_flex = 1 << sbi->s_log_groups_per_flex; 1967 if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) {
1968
1969 if (groups_per_flex < 2) {
1970 sbi->s_log_groups_per_flex = 0; 1968 sbi->s_log_groups_per_flex = 0;
1971 return 1; 1969 return 1;
1972 } 1970 }
1971 groups_per_flex = 1 << sbi->s_log_groups_per_flex;
1973 1972
1974 /* We allocate both existing and potentially added groups */ 1973 /* We allocate both existing and potentially added groups */
1975 flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + 1974 flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
@@ -3621,7 +3620,8 @@ no_journal:
3621 goto failed_mount4; 3620 goto failed_mount4;
3622 } 3621 }
3623 3622
3624 ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY); 3623 if (ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY))
3624 sb->s_flags |= MS_RDONLY;
3625 3625
3626 /* determine the minimum size of new large inodes, if present */ 3626 /* determine the minimum size of new large inodes, if present */
3627 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) { 3627 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index c757adc9725..c2865cc3101 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -487,18 +487,19 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
487 ext4_free_blocks(handle, inode, bh, 0, 1, 487 ext4_free_blocks(handle, inode, bh, 0, 1,
488 EXT4_FREE_BLOCKS_METADATA | 488 EXT4_FREE_BLOCKS_METADATA |
489 EXT4_FREE_BLOCKS_FORGET); 489 EXT4_FREE_BLOCKS_FORGET);
490 unlock_buffer(bh);
490 } else { 491 } else {
491 le32_add_cpu(&BHDR(bh)->h_refcount, -1); 492 le32_add_cpu(&BHDR(bh)->h_refcount, -1);
493 if (ce)
494 mb_cache_entry_release(ce);
495 unlock_buffer(bh);
492 error = ext4_handle_dirty_metadata(handle, inode, bh); 496 error = ext4_handle_dirty_metadata(handle, inode, bh);
493 if (IS_SYNC(inode)) 497 if (IS_SYNC(inode))
494 ext4_handle_sync(handle); 498 ext4_handle_sync(handle);
495 dquot_free_block(inode, 1); 499 dquot_free_block(inode, 1);
496 ea_bdebug(bh, "refcount now=%d; releasing", 500 ea_bdebug(bh, "refcount now=%d; releasing",
497 le32_to_cpu(BHDR(bh)->h_refcount)); 501 le32_to_cpu(BHDR(bh)->h_refcount));
498 if (ce)
499 mb_cache_entry_release(ce);
500 } 502 }
501 unlock_buffer(bh);
502out: 503out:
503 ext4_std_error(inode->i_sb, error); 504 ext4_std_error(inode->i_sb, error);
504 return; 505 return;
@@ -820,8 +821,14 @@ inserted:
820 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) 821 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
821 goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; 822 goal = goal & EXT4_MAX_BLOCK_FILE_PHYS;
822 823
824 /*
825 * take i_data_sem because we will test
826 * i_delalloc_reserved_flag in ext4_mb_new_blocks
827 */
828 down_read((&EXT4_I(inode)->i_data_sem));
823 block = ext4_new_meta_blocks(handle, inode, goal, 0, 829 block = ext4_new_meta_blocks(handle, inode, goal, 0,
824 NULL, &error); 830 NULL, &error);
831 up_read((&EXT4_I(inode)->i_data_sem));
825 if (error) 832 if (error)
826 goto cleanup; 833 goto cleanup;
827 834
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 4ad64732cbc..dc563788fa8 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -754,6 +754,13 @@ static int fat_ioctl_readdir(struct inode *inode, struct file *filp,
754 return ret; 754 return ret;
755} 755}
756 756
757static int fat_ioctl_volume_id(struct inode *dir)
758{
759 struct super_block *sb = dir->i_sb;
760 struct msdos_sb_info *sbi = MSDOS_SB(sb);
761 return sbi->vol_id;
762}
763
757static long fat_dir_ioctl(struct file *filp, unsigned int cmd, 764static long fat_dir_ioctl(struct file *filp, unsigned int cmd,
758 unsigned long arg) 765 unsigned long arg)
759{ 766{
@@ -770,6 +777,8 @@ static long fat_dir_ioctl(struct file *filp, unsigned int cmd,
770 short_only = 0; 777 short_only = 0;
771 both = 1; 778 both = 1;
772 break; 779 break;
780 case VFAT_IOCTL_GET_VOLUME_ID:
781 return fat_ioctl_volume_id(inode);
773 default: 782 default:
774 return fat_generic_ioctl(filp, cmd, arg); 783 return fat_generic_ioctl(filp, cmd, arg);
775 } 784 }
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index 8276cc282de..74fe5d3ed01 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -78,6 +78,7 @@ struct msdos_sb_info {
78 const void *dir_ops; /* Opaque; default directory operations */ 78 const void *dir_ops; /* Opaque; default directory operations */
79 int dir_per_block; /* dir entries per block */ 79 int dir_per_block; /* dir entries per block */
80 int dir_per_block_bits; /* log2(dir_per_block) */ 80 int dir_per_block_bits; /* log2(dir_per_block) */
81 unsigned long vol_id; /* volume ID */
81 82
82 int fatent_shift; 83 int fatent_shift;
83 struct fatent_operations *fatent_ops; 84 struct fatent_operations *fatent_ops;
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index cb8d8391ac0..9836839e0eb 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -1245,6 +1245,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
1245 struct inode *root_inode = NULL, *fat_inode = NULL; 1245 struct inode *root_inode = NULL, *fat_inode = NULL;
1246 struct buffer_head *bh; 1246 struct buffer_head *bh;
1247 struct fat_boot_sector *b; 1247 struct fat_boot_sector *b;
1248 struct fat_boot_bsx *bsx;
1248 struct msdos_sb_info *sbi; 1249 struct msdos_sb_info *sbi;
1249 u16 logical_sector_size; 1250 u16 logical_sector_size;
1250 u32 total_sectors, total_clusters, fat_clusters, rootdir_sectors; 1251 u32 total_sectors, total_clusters, fat_clusters, rootdir_sectors;
@@ -1390,6 +1391,8 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
1390 goto out_fail; 1391 goto out_fail;
1391 } 1392 }
1392 1393
1394 bsx = (struct fat_boot_bsx *)(bh->b_data + FAT32_BSX_OFFSET);
1395
1393 fsinfo = (struct fat_boot_fsinfo *)fsinfo_bh->b_data; 1396 fsinfo = (struct fat_boot_fsinfo *)fsinfo_bh->b_data;
1394 if (!IS_FSINFO(fsinfo)) { 1397 if (!IS_FSINFO(fsinfo)) {
1395 fat_msg(sb, KERN_WARNING, "Invalid FSINFO signature: " 1398 fat_msg(sb, KERN_WARNING, "Invalid FSINFO signature: "
@@ -1405,8 +1408,14 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
1405 } 1408 }
1406 1409
1407 brelse(fsinfo_bh); 1410 brelse(fsinfo_bh);
1411 } else {
1412 bsx = (struct fat_boot_bsx *)(bh->b_data + FAT16_BSX_OFFSET);
1408 } 1413 }
1409 1414
1415 /* interpret volume ID as a little endian 32 bit integer */
1416 sbi->vol_id = (((u32)bsx->vol_id[0]) | ((u32)bsx->vol_id[1] << 8) |
1417 ((u32)bsx->vol_id[2] << 16) | ((u32)bsx->vol_id[3] << 24));
1418
1410 sbi->dir_per_block = sb->s_blocksize / sizeof(struct msdos_dir_entry); 1419 sbi->dir_per_block = sb->s_blocksize / sizeof(struct msdos_dir_entry);
1411 sbi->dir_per_block_bits = ffs(sbi->dir_per_block) - 1; 1420 sbi->dir_per_block_bits = ffs(sbi->dir_per_block) - 1;
1412 1421
diff --git a/fs/fifo.c b/fs/fifo.c
index b1a524d798e..cf6f4345ceb 100644
--- a/fs/fifo.c
+++ b/fs/fifo.c
@@ -14,7 +14,7 @@
14#include <linux/sched.h> 14#include <linux/sched.h>
15#include <linux/pipe_fs_i.h> 15#include <linux/pipe_fs_i.h>
16 16
17static void wait_for_partner(struct inode* inode, unsigned int *cnt) 17static int wait_for_partner(struct inode* inode, unsigned int *cnt)
18{ 18{
19 int cur = *cnt; 19 int cur = *cnt;
20 20
@@ -23,6 +23,7 @@ static void wait_for_partner(struct inode* inode, unsigned int *cnt)
23 if (signal_pending(current)) 23 if (signal_pending(current))
24 break; 24 break;
25 } 25 }
26 return cur == *cnt ? -ERESTARTSYS : 0;
26} 27}
27 28
28static void wake_up_partner(struct inode* inode) 29static void wake_up_partner(struct inode* inode)
@@ -67,8 +68,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
67 * seen a writer */ 68 * seen a writer */
68 filp->f_version = pipe->w_counter; 69 filp->f_version = pipe->w_counter;
69 } else { 70 } else {
70 wait_for_partner(inode, &pipe->w_counter); 71 if (wait_for_partner(inode, &pipe->w_counter))
71 if(signal_pending(current))
72 goto err_rd; 72 goto err_rd;
73 } 73 }
74 } 74 }
@@ -90,8 +90,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
90 wake_up_partner(inode); 90 wake_up_partner(inode);
91 91
92 if (!pipe->readers) { 92 if (!pipe->readers) {
93 wait_for_partner(inode, &pipe->r_counter); 93 if (wait_for_partner(inode, &pipe->r_counter))
94 if (signal_pending(current))
95 goto err_wr; 94 goto err_wr;
96 } 95 }
97 break; 96 break;
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 0f015a0468d..73c3992b2bb 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -35,15 +35,29 @@
35struct wb_writeback_work { 35struct wb_writeback_work {
36 long nr_pages; 36 long nr_pages;
37 struct super_block *sb; 37 struct super_block *sb;
38 unsigned long *older_than_this;
38 enum writeback_sync_modes sync_mode; 39 enum writeback_sync_modes sync_mode;
40 unsigned int tagged_writepages:1;
39 unsigned int for_kupdate:1; 41 unsigned int for_kupdate:1;
40 unsigned int range_cyclic:1; 42 unsigned int range_cyclic:1;
41 unsigned int for_background:1; 43 unsigned int for_background:1;
44 enum wb_reason reason; /* why was writeback initiated? */
42 45
43 struct list_head list; /* pending work list */ 46 struct list_head list; /* pending work list */
44 struct completion *done; /* set if the caller waits */ 47 struct completion *done; /* set if the caller waits */
45}; 48};
46 49
50const char *wb_reason_name[] = {
51 [WB_REASON_BACKGROUND] = "background",
52 [WB_REASON_TRY_TO_FREE_PAGES] = "try_to_free_pages",
53 [WB_REASON_SYNC] = "sync",
54 [WB_REASON_PERIODIC] = "periodic",
55 [WB_REASON_LAPTOP_TIMER] = "laptop_timer",
56 [WB_REASON_FREE_MORE_MEM] = "free_more_memory",
57 [WB_REASON_FS_FREE_SPACE] = "fs_free_space",
58 [WB_REASON_FORKER_THREAD] = "forker_thread"
59};
60
47/* 61/*
48 * Include the creation of the trace points after defining the 62 * Include the creation of the trace points after defining the
49 * wb_writeback_work structure so that the definition remains local to this 63 * wb_writeback_work structure so that the definition remains local to this
@@ -113,7 +127,7 @@ static void bdi_queue_work(struct backing_dev_info *bdi,
113 127
114static void 128static void
115__bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, 129__bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
116 bool range_cyclic) 130 bool range_cyclic, enum wb_reason reason)
117{ 131{
118 struct wb_writeback_work *work; 132 struct wb_writeback_work *work;
119 133
@@ -133,6 +147,7 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
133 work->sync_mode = WB_SYNC_NONE; 147 work->sync_mode = WB_SYNC_NONE;
134 work->nr_pages = nr_pages; 148 work->nr_pages = nr_pages;
135 work->range_cyclic = range_cyclic; 149 work->range_cyclic = range_cyclic;
150 work->reason = reason;
136 151
137 bdi_queue_work(bdi, work); 152 bdi_queue_work(bdi, work);
138} 153}
@@ -148,9 +163,10 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
148 * completion. Caller need not hold sb s_umount semaphore. 163 * completion. Caller need not hold sb s_umount semaphore.
149 * 164 *
150 */ 165 */
151void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages) 166void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
167 enum wb_reason reason)
152{ 168{
153 __bdi_start_writeback(bdi, nr_pages, true); 169 __bdi_start_writeback(bdi, nr_pages, true, reason);
154} 170}
155 171
156/** 172/**
@@ -180,12 +196,13 @@ void bdi_start_background_writeback(struct backing_dev_info *bdi)
180 */ 196 */
181void inode_wb_list_del(struct inode *inode) 197void inode_wb_list_del(struct inode *inode)
182{ 198{
183 spin_lock(&inode_wb_list_lock); 199 struct backing_dev_info *bdi = inode_to_bdi(inode);
200
201 spin_lock(&bdi->wb.list_lock);
184 list_del_init(&inode->i_wb_list); 202 list_del_init(&inode->i_wb_list);
185 spin_unlock(&inode_wb_list_lock); 203 spin_unlock(&bdi->wb.list_lock);
186} 204}
187 205
188
189/* 206/*
190 * Redirty an inode: set its when-it-was dirtied timestamp and move it to the 207 * Redirty an inode: set its when-it-was dirtied timestamp and move it to the
191 * furthest end of its superblock's dirty-inode list. 208 * furthest end of its superblock's dirty-inode list.
@@ -195,11 +212,9 @@ void inode_wb_list_del(struct inode *inode)
195 * the case then the inode must have been redirtied while it was being written 212 * the case then the inode must have been redirtied while it was being written
196 * out and we don't reset its dirtied_when. 213 * out and we don't reset its dirtied_when.
197 */ 214 */
198static void redirty_tail(struct inode *inode) 215static void redirty_tail(struct inode *inode, struct bdi_writeback *wb)
199{ 216{
200 struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; 217 assert_spin_locked(&wb->list_lock);
201
202 assert_spin_locked(&inode_wb_list_lock);
203 if (!list_empty(&wb->b_dirty)) { 218 if (!list_empty(&wb->b_dirty)) {
204 struct inode *tail; 219 struct inode *tail;
205 220
@@ -213,11 +228,9 @@ static void redirty_tail(struct inode *inode)
213/* 228/*
214 * requeue inode for re-scanning after bdi->b_io list is exhausted. 229 * requeue inode for re-scanning after bdi->b_io list is exhausted.
215 */ 230 */
216static void requeue_io(struct inode *inode) 231static void requeue_io(struct inode *inode, struct bdi_writeback *wb)
217{ 232{
218 struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; 233 assert_spin_locked(&wb->list_lock);
219
220 assert_spin_locked(&inode_wb_list_lock);
221 list_move(&inode->i_wb_list, &wb->b_more_io); 234 list_move(&inode->i_wb_list, &wb->b_more_io);
222} 235}
223 236
@@ -225,7 +238,7 @@ static void inode_sync_complete(struct inode *inode)
225{ 238{
226 /* 239 /*
227 * Prevent speculative execution through 240 * Prevent speculative execution through
228 * spin_unlock(&inode_wb_list_lock); 241 * spin_unlock(&wb->list_lock);
229 */ 242 */
230 243
231 smp_mb(); 244 smp_mb();
@@ -250,31 +263,33 @@ static bool inode_dirtied_after(struct inode *inode, unsigned long t)
250/* 263/*
251 * Move expired dirty inodes from @delaying_queue to @dispatch_queue. 264 * Move expired dirty inodes from @delaying_queue to @dispatch_queue.
252 */ 265 */
253static void move_expired_inodes(struct list_head *delaying_queue, 266static int move_expired_inodes(struct list_head *delaying_queue,
254 struct list_head *dispatch_queue, 267 struct list_head *dispatch_queue,
255 unsigned long *older_than_this) 268 struct wb_writeback_work *work)
256{ 269{
257 LIST_HEAD(tmp); 270 LIST_HEAD(tmp);
258 struct list_head *pos, *node; 271 struct list_head *pos, *node;
259 struct super_block *sb = NULL; 272 struct super_block *sb = NULL;
260 struct inode *inode; 273 struct inode *inode;
261 int do_sb_sort = 0; 274 int do_sb_sort = 0;
275 int moved = 0;
262 276
263 while (!list_empty(delaying_queue)) { 277 while (!list_empty(delaying_queue)) {
264 inode = wb_inode(delaying_queue->prev); 278 inode = wb_inode(delaying_queue->prev);
265 if (older_than_this && 279 if (work->older_than_this &&
266 inode_dirtied_after(inode, *older_than_this)) 280 inode_dirtied_after(inode, *work->older_than_this))
267 break; 281 break;
268 if (sb && sb != inode->i_sb) 282 if (sb && sb != inode->i_sb)
269 do_sb_sort = 1; 283 do_sb_sort = 1;
270 sb = inode->i_sb; 284 sb = inode->i_sb;
271 list_move(&inode->i_wb_list, &tmp); 285 list_move(&inode->i_wb_list, &tmp);
286 moved++;
272 } 287 }
273 288
274 /* just one sb in list, splice to dispatch_queue and we're done */ 289 /* just one sb in list, splice to dispatch_queue and we're done */
275 if (!do_sb_sort) { 290 if (!do_sb_sort) {
276 list_splice(&tmp, dispatch_queue); 291 list_splice(&tmp, dispatch_queue);
277 return; 292 goto out;
278 } 293 }
279 294
280 /* Move inodes from one superblock together */ 295 /* Move inodes from one superblock together */
@@ -286,6 +301,8 @@ static void move_expired_inodes(struct list_head *delaying_queue,
286 list_move(&inode->i_wb_list, dispatch_queue); 301 list_move(&inode->i_wb_list, dispatch_queue);
287 } 302 }
288 } 303 }
304out:
305 return moved;
289} 306}
290 307
291/* 308/*
@@ -299,11 +316,13 @@ static void move_expired_inodes(struct list_head *delaying_queue,
299 * | 316 * |
300 * +--> dequeue for IO 317 * +--> dequeue for IO
301 */ 318 */
302static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this) 319static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work)
303{ 320{
304 assert_spin_locked(&inode_wb_list_lock); 321 int moved;
322 assert_spin_locked(&wb->list_lock);
305 list_splice_init(&wb->b_more_io, &wb->b_io); 323 list_splice_init(&wb->b_more_io, &wb->b_io);
306 move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this); 324 moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, work);
325 trace_writeback_queue_io(wb, work, moved);
307} 326}
308 327
309static int write_inode(struct inode *inode, struct writeback_control *wbc) 328static int write_inode(struct inode *inode, struct writeback_control *wbc)
@@ -316,7 +335,8 @@ static int write_inode(struct inode *inode, struct writeback_control *wbc)
316/* 335/*
317 * Wait for writeback on an inode to complete. 336 * Wait for writeback on an inode to complete.
318 */ 337 */
319static void inode_wait_for_writeback(struct inode *inode) 338static void inode_wait_for_writeback(struct inode *inode,
339 struct bdi_writeback *wb)
320{ 340{
321 DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC); 341 DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC);
322 wait_queue_head_t *wqh; 342 wait_queue_head_t *wqh;
@@ -324,15 +344,15 @@ static void inode_wait_for_writeback(struct inode *inode)
324 wqh = bit_waitqueue(&inode->i_state, __I_SYNC); 344 wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
325 while (inode->i_state & I_SYNC) { 345 while (inode->i_state & I_SYNC) {
326 spin_unlock(&inode->i_lock); 346 spin_unlock(&inode->i_lock);
327 spin_unlock(&inode_wb_list_lock); 347 spin_unlock(&wb->list_lock);
328 __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE); 348 __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE);
329 spin_lock(&inode_wb_list_lock); 349 spin_lock(&wb->list_lock);
330 spin_lock(&inode->i_lock); 350 spin_lock(&inode->i_lock);
331 } 351 }
332} 352}
333 353
334/* 354/*
335 * Write out an inode's dirty pages. Called under inode_wb_list_lock and 355 * Write out an inode's dirty pages. Called under wb->list_lock and
336 * inode->i_lock. Either the caller has an active reference on the inode or 356 * inode->i_lock. Either the caller has an active reference on the inode or
337 * the inode has I_WILL_FREE set. 357 * the inode has I_WILL_FREE set.
338 * 358 *
@@ -343,13 +363,15 @@ static void inode_wait_for_writeback(struct inode *inode)
343 * livelocks, etc. 363 * livelocks, etc.
344 */ 364 */
345static int 365static int
346writeback_single_inode(struct inode *inode, struct writeback_control *wbc) 366writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
367 struct writeback_control *wbc)
347{ 368{
348 struct address_space *mapping = inode->i_mapping; 369 struct address_space *mapping = inode->i_mapping;
370 long nr_to_write = wbc->nr_to_write;
349 unsigned dirty; 371 unsigned dirty;
350 int ret; 372 int ret;
351 373
352 assert_spin_locked(&inode_wb_list_lock); 374 assert_spin_locked(&wb->list_lock);
353 assert_spin_locked(&inode->i_lock); 375 assert_spin_locked(&inode->i_lock);
354 376
355 if (!atomic_read(&inode->i_count)) 377 if (!atomic_read(&inode->i_count))
@@ -367,14 +389,16 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
367 * completed a full scan of b_io. 389 * completed a full scan of b_io.
368 */ 390 */
369 if (wbc->sync_mode != WB_SYNC_ALL) { 391 if (wbc->sync_mode != WB_SYNC_ALL) {
370 requeue_io(inode); 392 requeue_io(inode, wb);
393 trace_writeback_single_inode_requeue(inode, wbc,
394 nr_to_write);
371 return 0; 395 return 0;
372 } 396 }
373 397
374 /* 398 /*
375 * It's a data-integrity sync. We must wait. 399 * It's a data-integrity sync. We must wait.
376 */ 400 */
377 inode_wait_for_writeback(inode); 401 inode_wait_for_writeback(inode, wb);
378 } 402 }
379 403
380 BUG_ON(inode->i_state & I_SYNC); 404 BUG_ON(inode->i_state & I_SYNC);
@@ -383,7 +407,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
383 inode->i_state |= I_SYNC; 407 inode->i_state |= I_SYNC;
384 inode->i_state &= ~I_DIRTY_PAGES; 408 inode->i_state &= ~I_DIRTY_PAGES;
385 spin_unlock(&inode->i_lock); 409 spin_unlock(&inode->i_lock);
386 spin_unlock(&inode_wb_list_lock); 410 spin_unlock(&wb->list_lock);
387 411
388 ret = do_writepages(mapping, wbc); 412 ret = do_writepages(mapping, wbc);
389 413
@@ -414,10 +438,19 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
414 ret = err; 438 ret = err;
415 } 439 }
416 440
417 spin_lock(&inode_wb_list_lock); 441 spin_lock(&wb->list_lock);
418 spin_lock(&inode->i_lock); 442 spin_lock(&inode->i_lock);
419 inode->i_state &= ~I_SYNC; 443 inode->i_state &= ~I_SYNC;
420 if (!(inode->i_state & I_FREEING)) { 444 if (!(inode->i_state & I_FREEING)) {
445 /*
446 * Sync livelock prevention. Each inode is tagged and synced in
447 * one shot. If still dirty, it will be redirty_tail()'ed below.
448 * Update the dirty time to prevent enqueue and sync it again.
449 */
450 if ((inode->i_state & I_DIRTY) &&
451 (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages))
452 inode->dirtied_when = jiffies;
453
421 if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { 454 if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
422 /* 455 /*
423 * We didn't write back all the pages. nfs_writepages() 456 * We didn't write back all the pages. nfs_writepages()
@@ -428,7 +461,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
428 /* 461 /*
429 * slice used up: queue for next turn 462 * slice used up: queue for next turn
430 */ 463 */
431 requeue_io(inode); 464 requeue_io(inode, wb);
432 } else { 465 } else {
433 /* 466 /*
434 * Writeback blocked by something other than 467 * Writeback blocked by something other than
@@ -437,7 +470,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
437 * retrying writeback of the dirty page/inode 470 * retrying writeback of the dirty page/inode
438 * that cannot be performed immediately. 471 * that cannot be performed immediately.
439 */ 472 */
440 redirty_tail(inode); 473 redirty_tail(inode, wb);
441 } 474 }
442 } else if (inode->i_state & I_DIRTY) { 475 } else if (inode->i_state & I_DIRTY) {
443 /* 476 /*
@@ -446,7 +479,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
446 * submission or metadata updates after data IO 479 * submission or metadata updates after data IO
447 * completion. 480 * completion.
448 */ 481 */
449 redirty_tail(inode); 482 redirty_tail(inode, wb);
450 } else { 483 } else {
451 /* 484 /*
452 * The inode is clean. At this point we either have 485 * The inode is clean. At this point we either have
@@ -457,33 +490,39 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
457 } 490 }
458 } 491 }
459 inode_sync_complete(inode); 492 inode_sync_complete(inode);
493 trace_writeback_single_inode(inode, wbc, nr_to_write);
460 return ret; 494 return ret;
461} 495}
462 496
463/* 497static long writeback_chunk_size(struct backing_dev_info *bdi,
464 * For background writeback the caller does not have the sb pinned 498 struct wb_writeback_work *work)
465 * before calling writeback. So make sure that we do pin it, so it doesn't
466 * go away while we are writing inodes from it.
467 */
468static bool pin_sb_for_writeback(struct super_block *sb)
469{ 499{
470 spin_lock(&sb_lock); 500 long pages;
471 if (list_empty(&sb->s_instances)) {
472 spin_unlock(&sb_lock);
473 return false;
474 }
475 501
476 sb->s_count++; 502 /*
477 spin_unlock(&sb_lock); 503 * WB_SYNC_ALL mode does livelock avoidance by syncing dirty
478 504 * inodes/pages in one big loop. Setting wbc.nr_to_write=LONG_MAX
479 if (down_read_trylock(&sb->s_umount)) { 505 * here avoids calling into writeback_inodes_wb() more than once.
480 if (sb->s_root) 506 *
481 return true; 507 * The intended call sequence for WB_SYNC_ALL writeback is:
482 up_read(&sb->s_umount); 508 *
509 * wb_writeback()
510 * writeback_sb_inodes() <== called only once
511 * write_cache_pages() <== called once for each inode
512 * (quickly) tag currently dirty pages
513 * (maybe slowly) sync all tagged pages
514 */
515 if (work->sync_mode == WB_SYNC_ALL || work->tagged_writepages)
516 pages = LONG_MAX;
517 else {
518 pages = min(bdi->avg_write_bandwidth / 2,
519 global_dirty_limit / DIRTY_SCOPE);
520 pages = min(pages, work->nr_pages);
521 pages = round_down(pages + MIN_WRITEBACK_PAGES,
522 MIN_WRITEBACK_PAGES);
483 } 523 }
484 524
485 put_super(sb); 525 return pages;
486 return false;
487} 526}
488 527
489/* 528/*
@@ -493,24 +532,36 @@ static bool pin_sb_for_writeback(struct super_block *sb)
493 * inodes. Otherwise write only ones which go sequentially 532 * inodes. Otherwise write only ones which go sequentially
494 * in reverse order. 533 * in reverse order.
495 * 534 *
496 * Return 1, if the caller writeback routine should be 535 * Return the number of pages and/or inodes written.
497 * interrupted. Otherwise return 0.
498 */ 536 */
499static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, 537static long writeback_sb_inodes(struct super_block *sb,
500 struct writeback_control *wbc, bool only_this_sb) 538 struct bdi_writeback *wb,
539 struct wb_writeback_work *work)
501{ 540{
541 struct writeback_control wbc = {
542 .sync_mode = work->sync_mode,
543 .tagged_writepages = work->tagged_writepages,
544 .for_kupdate = work->for_kupdate,
545 .for_background = work->for_background,
546 .range_cyclic = work->range_cyclic,
547 .range_start = 0,
548 .range_end = LLONG_MAX,
549 };
550 unsigned long start_time = jiffies;
551 long write_chunk;
552 long wrote = 0; /* count both pages and inodes */
553
502 while (!list_empty(&wb->b_io)) { 554 while (!list_empty(&wb->b_io)) {
503 long pages_skipped;
504 struct inode *inode = wb_inode(wb->b_io.prev); 555 struct inode *inode = wb_inode(wb->b_io.prev);
505 556
506 if (inode->i_sb != sb) { 557 if (inode->i_sb != sb) {
507 if (only_this_sb) { 558 if (work->sb) {
508 /* 559 /*
509 * We only want to write back data for this 560 * We only want to write back data for this
510 * superblock, move all inodes not belonging 561 * superblock, move all inodes not belonging
511 * to it back onto the dirty list. 562 * to it back onto the dirty list.
512 */ 563 */
513 redirty_tail(inode); 564 redirty_tail(inode, wb);
514 continue; 565 continue;
515 } 566 }
516 567
@@ -519,7 +570,7 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
519 * Bounce back to the caller to unpin this and 570 * Bounce back to the caller to unpin this and
520 * pin the next superblock. 571 * pin the next superblock.
521 */ 572 */
522 return 0; 573 break;
523 } 574 }
524 575
525 /* 576 /*
@@ -530,104 +581,124 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
530 spin_lock(&inode->i_lock); 581 spin_lock(&inode->i_lock);
531 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { 582 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
532 spin_unlock(&inode->i_lock); 583 spin_unlock(&inode->i_lock);
533 requeue_io(inode); 584 redirty_tail(inode, wb);
534 continue; 585 continue;
535 } 586 }
536
537 /*
538 * Was this inode dirtied after sync_sb_inodes was called?
539 * This keeps sync from extra jobs and livelock.
540 */
541 if (inode_dirtied_after(inode, wbc->wb_start)) {
542 spin_unlock(&inode->i_lock);
543 return 1;
544 }
545
546 __iget(inode); 587 __iget(inode);
588 write_chunk = writeback_chunk_size(wb->bdi, work);
589 wbc.nr_to_write = write_chunk;
590 wbc.pages_skipped = 0;
591
592 writeback_single_inode(inode, wb, &wbc);
547 593
548 pages_skipped = wbc->pages_skipped; 594 work->nr_pages -= write_chunk - wbc.nr_to_write;
549 writeback_single_inode(inode, wbc); 595 wrote += write_chunk - wbc.nr_to_write;
550 if (wbc->pages_skipped != pages_skipped) { 596 if (!(inode->i_state & I_DIRTY))
597 wrote++;
598 if (wbc.pages_skipped) {
551 /* 599 /*
552 * writeback is not making progress due to locked 600 * writeback is not making progress due to locked
553 * buffers. Skip this inode for now. 601 * buffers. Skip this inode for now.
554 */ 602 */
555 redirty_tail(inode); 603 redirty_tail(inode, wb);
556 } 604 }
557 spin_unlock(&inode->i_lock); 605 spin_unlock(&inode->i_lock);
558 spin_unlock(&inode_wb_list_lock); 606 spin_unlock(&wb->list_lock);
559 iput(inode); 607 iput(inode);
560 cond_resched(); 608 cond_resched();
561 spin_lock(&inode_wb_list_lock); 609 spin_lock(&wb->list_lock);
562 if (wbc->nr_to_write <= 0) { 610 /*
563 wbc->more_io = 1; 611 * bail out to wb_writeback() often enough to check
564 return 1; 612 * background threshold and other termination conditions.
613 */
614 if (wrote) {
615 if (time_is_before_jiffies(start_time + HZ / 10UL))
616 break;
617 if (work->nr_pages <= 0)
618 break;
565 } 619 }
566 if (!list_empty(&wb->b_more_io))
567 wbc->more_io = 1;
568 } 620 }
569 /* b_io is empty */ 621 return wrote;
570 return 1;
571} 622}
572 623
573void writeback_inodes_wb(struct bdi_writeback *wb, 624static long __writeback_inodes_wb(struct bdi_writeback *wb,
574 struct writeback_control *wbc) 625 struct wb_writeback_work *work)
575{ 626{
576 int ret = 0; 627 unsigned long start_time = jiffies;
577 628 long wrote = 0;
578 if (!wbc->wb_start)
579 wbc->wb_start = jiffies; /* livelock avoidance */
580 spin_lock(&inode_wb_list_lock);
581 if (!wbc->for_kupdate || list_empty(&wb->b_io))
582 queue_io(wb, wbc->older_than_this);
583 629
584 while (!list_empty(&wb->b_io)) { 630 while (!list_empty(&wb->b_io)) {
585 struct inode *inode = wb_inode(wb->b_io.prev); 631 struct inode *inode = wb_inode(wb->b_io.prev);
586 struct super_block *sb = inode->i_sb; 632 struct super_block *sb = inode->i_sb;
587 633
588 if (!pin_sb_for_writeback(sb)) { 634 if (!grab_super_passive(sb)) {
589 requeue_io(inode); 635 /*
636 * grab_super_passive() may fail consistently due to
637 * s_umount being grabbed by someone else. Don't use
638 * requeue_io() to avoid busy retrying the inode/sb.
639 */
640 redirty_tail(inode, wb);
590 continue; 641 continue;
591 } 642 }
592 ret = writeback_sb_inodes(sb, wb, wbc, false); 643 wrote += writeback_sb_inodes(sb, wb, work);
593 drop_super(sb); 644 drop_super(sb);
594 645
595 if (ret) 646 /* refer to the same tests at the end of writeback_sb_inodes */
596 break; 647 if (wrote) {
648 if (time_is_before_jiffies(start_time + HZ / 10UL))
649 break;
650 if (work->nr_pages <= 0)
651 break;
652 }
597 } 653 }
598 spin_unlock(&inode_wb_list_lock);
599 /* Leave any unwritten inodes on b_io */ 654 /* Leave any unwritten inodes on b_io */
655 return wrote;
600} 656}
601 657
602static void __writeback_inodes_sb(struct super_block *sb, 658long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
603 struct bdi_writeback *wb, struct writeback_control *wbc) 659 enum wb_reason reason)
604{ 660{
605 WARN_ON(!rwsem_is_locked(&sb->s_umount)); 661 struct wb_writeback_work work = {
662 .nr_pages = nr_pages,
663 .sync_mode = WB_SYNC_NONE,
664 .range_cyclic = 1,
665 .reason = reason,
666 };
606 667
607 spin_lock(&inode_wb_list_lock); 668 spin_lock(&wb->list_lock);
608 if (!wbc->for_kupdate || list_empty(&wb->b_io)) 669 if (list_empty(&wb->b_io))
609 queue_io(wb, wbc->older_than_this); 670 queue_io(wb, &work);
610 writeback_sb_inodes(sb, wb, wbc, true); 671 __writeback_inodes_wb(wb, &work);
611 spin_unlock(&inode_wb_list_lock); 672 spin_unlock(&wb->list_lock);
612}
613 673
614/* 674 return nr_pages - work.nr_pages;
615 * The maximum number of pages to writeout in a single bdi flush/kupdate 675}
616 * operation. We do this so we don't hold I_SYNC against an inode for
617 * enormous amounts of time, which would block a userspace task which has
618 * been forced to throttle against that inode. Also, the code reevaluates
619 * the dirty each time it has written this many pages.
620 */
621#define MAX_WRITEBACK_PAGES 1024
622 676
623static inline bool over_bground_thresh(void) 677static bool over_bground_thresh(struct backing_dev_info *bdi)
624{ 678{
625 unsigned long background_thresh, dirty_thresh; 679 unsigned long background_thresh, dirty_thresh;
626 680
627 global_dirty_limits(&background_thresh, &dirty_thresh); 681 global_dirty_limits(&background_thresh, &dirty_thresh);
628 682
629 return (global_page_state(NR_FILE_DIRTY) + 683 if (global_page_state(NR_FILE_DIRTY) +
630 global_page_state(NR_UNSTABLE_NFS) > background_thresh); 684 global_page_state(NR_UNSTABLE_NFS) > background_thresh)
685 return true;
686
687 if (bdi_stat(bdi, BDI_RECLAIMABLE) >
688 bdi_dirty_limit(bdi, background_thresh))
689 return true;
690
691 return false;
692}
693
694/*
695 * Called under wb->list_lock. If there are multiple wb per bdi,
696 * only the flusher working on the first wb should do it.
697 */
698static void wb_update_bandwidth(struct bdi_writeback *wb,
699 unsigned long start_time)
700{
701 __bdi_update_bandwidth(wb->bdi, 0, 0, 0, 0, 0, start_time);
631} 702}
632 703
633/* 704/*
@@ -648,47 +719,16 @@ static inline bool over_bground_thresh(void)
648static long wb_writeback(struct bdi_writeback *wb, 719static long wb_writeback(struct bdi_writeback *wb,
649 struct wb_writeback_work *work) 720 struct wb_writeback_work *work)
650{ 721{
651 struct writeback_control wbc = { 722 unsigned long wb_start = jiffies;
652 .sync_mode = work->sync_mode, 723 long nr_pages = work->nr_pages;
653 .older_than_this = NULL,
654 .for_kupdate = work->for_kupdate,
655 .for_background = work->for_background,
656 .range_cyclic = work->range_cyclic,
657 };
658 unsigned long oldest_jif; 724 unsigned long oldest_jif;
659 long wrote = 0;
660 long write_chunk;
661 struct inode *inode; 725 struct inode *inode;
726 long progress;
662 727
663 if (wbc.for_kupdate) { 728 oldest_jif = jiffies;
664 wbc.older_than_this = &oldest_jif; 729 work->older_than_this = &oldest_jif;
665 oldest_jif = jiffies -
666 msecs_to_jiffies(dirty_expire_interval * 10);
667 }
668 if (!wbc.range_cyclic) {
669 wbc.range_start = 0;
670 wbc.range_end = LLONG_MAX;
671 }
672
673 /*
674 * WB_SYNC_ALL mode does livelock avoidance by syncing dirty
675 * inodes/pages in one big loop. Setting wbc.nr_to_write=LONG_MAX
676 * here avoids calling into writeback_inodes_wb() more than once.
677 *
678 * The intended call sequence for WB_SYNC_ALL writeback is:
679 *
680 * wb_writeback()
681 * __writeback_inodes_sb() <== called only once
682 * write_cache_pages() <== called once for each inode
683 * (quickly) tag currently dirty pages
684 * (maybe slowly) sync all tagged pages
685 */
686 if (wbc.sync_mode == WB_SYNC_NONE)
687 write_chunk = MAX_WRITEBACK_PAGES;
688 else
689 write_chunk = LONG_MAX;
690 730
691 wbc.wb_start = jiffies; /* livelock avoidance */ 731 spin_lock(&wb->list_lock);
692 for (;;) { 732 for (;;) {
693 /* 733 /*
694 * Stop writeback when nr_pages has been consumed 734 * Stop writeback when nr_pages has been consumed
@@ -710,55 +750,57 @@ static long wb_writeback(struct bdi_writeback *wb,
710 * For background writeout, stop when we are below the 750 * For background writeout, stop when we are below the
711 * background dirty threshold 751 * background dirty threshold
712 */ 752 */
713 if (work->for_background && !over_bground_thresh()) 753 if (work->for_background && !over_bground_thresh(wb->bdi))
714 break; 754 break;
715 755
716 wbc.more_io = 0; 756 if (work->for_kupdate) {
717 wbc.nr_to_write = write_chunk; 757 oldest_jif = jiffies -
718 wbc.pages_skipped = 0; 758 msecs_to_jiffies(dirty_expire_interval * 10);
759 work->older_than_this = &oldest_jif;
760 }
719 761
720 trace_wbc_writeback_start(&wbc, wb->bdi); 762 trace_writeback_start(wb->bdi, work);
763 if (list_empty(&wb->b_io))
764 queue_io(wb, work);
721 if (work->sb) 765 if (work->sb)
722 __writeback_inodes_sb(work->sb, wb, &wbc); 766 progress = writeback_sb_inodes(work->sb, wb, work);
723 else 767 else
724 writeback_inodes_wb(wb, &wbc); 768 progress = __writeback_inodes_wb(wb, work);
725 trace_wbc_writeback_written(&wbc, wb->bdi); 769 trace_writeback_written(wb->bdi, work);
726 770
727 work->nr_pages -= write_chunk - wbc.nr_to_write; 771 wb_update_bandwidth(wb, wb_start);
728 wrote += write_chunk - wbc.nr_to_write;
729 772
730 /* 773 /*
731 * If we consumed everything, see if we have more 774 * Did we write something? Try for more
775 *
776 * Dirty inodes are moved to b_io for writeback in batches.
777 * The completion of the current batch does not necessarily
778 * mean the overall work is done. So we keep looping as long
779 * as made some progress on cleaning pages or inodes.
732 */ 780 */
733 if (wbc.nr_to_write <= 0) 781 if (progress)
734 continue; 782 continue;
735 /* 783 /*
736 * Didn't write everything and we don't have more IO, bail 784 * No more inodes for IO, bail
737 */ 785 */
738 if (!wbc.more_io) 786 if (list_empty(&wb->b_more_io))
739 break; 787 break;
740 /* 788 /*
741 * Did we write something? Try for more
742 */
743 if (wbc.nr_to_write < write_chunk)
744 continue;
745 /*
746 * Nothing written. Wait for some inode to 789 * Nothing written. Wait for some inode to
747 * become available for writeback. Otherwise 790 * become available for writeback. Otherwise
748 * we'll just busyloop. 791 * we'll just busyloop.
749 */ 792 */
750 spin_lock(&inode_wb_list_lock);
751 if (!list_empty(&wb->b_more_io)) { 793 if (!list_empty(&wb->b_more_io)) {
794 trace_writeback_wait(wb->bdi, work);
752 inode = wb_inode(wb->b_more_io.prev); 795 inode = wb_inode(wb->b_more_io.prev);
753 trace_wbc_writeback_wait(&wbc, wb->bdi);
754 spin_lock(&inode->i_lock); 796 spin_lock(&inode->i_lock);
755 inode_wait_for_writeback(inode); 797 inode_wait_for_writeback(inode, wb);
756 spin_unlock(&inode->i_lock); 798 spin_unlock(&inode->i_lock);
757 } 799 }
758 spin_unlock(&inode_wb_list_lock);
759 } 800 }
801 spin_unlock(&wb->list_lock);
760 802
761 return wrote; 803 return nr_pages - work->nr_pages;
762} 804}
763 805
764/* 806/*
@@ -792,13 +834,14 @@ static unsigned long get_nr_dirty_pages(void)
792 834
793static long wb_check_background_flush(struct bdi_writeback *wb) 835static long wb_check_background_flush(struct bdi_writeback *wb)
794{ 836{
795 if (over_bground_thresh()) { 837 if (over_bground_thresh(wb->bdi)) {
796 838
797 struct wb_writeback_work work = { 839 struct wb_writeback_work work = {
798 .nr_pages = LONG_MAX, 840 .nr_pages = LONG_MAX,
799 .sync_mode = WB_SYNC_NONE, 841 .sync_mode = WB_SYNC_NONE,
800 .for_background = 1, 842 .for_background = 1,
801 .range_cyclic = 1, 843 .range_cyclic = 1,
844 .reason = WB_REASON_BACKGROUND,
802 }; 845 };
803 846
804 return wb_writeback(wb, &work); 847 return wb_writeback(wb, &work);
@@ -832,6 +875,7 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
832 .sync_mode = WB_SYNC_NONE, 875 .sync_mode = WB_SYNC_NONE,
833 .for_kupdate = 1, 876 .for_kupdate = 1,
834 .range_cyclic = 1, 877 .range_cyclic = 1,
878 .reason = WB_REASON_PERIODIC,
835 }; 879 };
836 880
837 return wb_writeback(wb, &work); 881 return wb_writeback(wb, &work);
@@ -950,7 +994,7 @@ int bdi_writeback_thread(void *data)
950 * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back 994 * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back
951 * the whole world. 995 * the whole world.
952 */ 996 */
953void wakeup_flusher_threads(long nr_pages) 997void wakeup_flusher_threads(long nr_pages, enum wb_reason reason)
954{ 998{
955 struct backing_dev_info *bdi; 999 struct backing_dev_info *bdi;
956 1000
@@ -963,7 +1007,7 @@ void wakeup_flusher_threads(long nr_pages)
963 list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { 1007 list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
964 if (!bdi_has_dirty_io(bdi)) 1008 if (!bdi_has_dirty_io(bdi))
965 continue; 1009 continue;
966 __bdi_start_writeback(bdi, nr_pages, false); 1010 __bdi_start_writeback(bdi, nr_pages, false, reason);
967 } 1011 }
968 rcu_read_unlock(); 1012 rcu_read_unlock();
969} 1013}
@@ -1089,10 +1133,10 @@ void __mark_inode_dirty(struct inode *inode, int flags)
1089 } 1133 }
1090 1134
1091 spin_unlock(&inode->i_lock); 1135 spin_unlock(&inode->i_lock);
1092 spin_lock(&inode_wb_list_lock); 1136 spin_lock(&bdi->wb.list_lock);
1093 inode->dirtied_when = jiffies; 1137 inode->dirtied_when = jiffies;
1094 list_move(&inode->i_wb_list, &bdi->wb.b_dirty); 1138 list_move(&inode->i_wb_list, &bdi->wb.b_dirty);
1095 spin_unlock(&inode_wb_list_lock); 1139 spin_unlock(&bdi->wb.list_lock);
1096 1140
1097 if (wakeup_bdi) 1141 if (wakeup_bdi)
1098 bdi_wakeup_thread_delayed(bdi); 1142 bdi_wakeup_thread_delayed(bdi);
@@ -1184,14 +1228,18 @@ static void wait_sb_inodes(struct super_block *sb)
1184 * on how many (if any) will be written, and this function does not wait 1228 * on how many (if any) will be written, and this function does not wait
1185 * for IO completion of submitted IO. 1229 * for IO completion of submitted IO.
1186 */ 1230 */
1187void writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr) 1231void writeback_inodes_sb_nr(struct super_block *sb,
1232 unsigned long nr,
1233 enum wb_reason reason)
1188{ 1234{
1189 DECLARE_COMPLETION_ONSTACK(done); 1235 DECLARE_COMPLETION_ONSTACK(done);
1190 struct wb_writeback_work work = { 1236 struct wb_writeback_work work = {
1191 .sb = sb, 1237 .sb = sb,
1192 .sync_mode = WB_SYNC_NONE, 1238 .sync_mode = WB_SYNC_NONE,
1193 .done = &done, 1239 .tagged_writepages = 1,
1194 .nr_pages = nr, 1240 .done = &done,
1241 .nr_pages = nr,
1242 .reason = reason,
1195 }; 1243 };
1196 1244
1197 WARN_ON(!rwsem_is_locked(&sb->s_umount)); 1245 WARN_ON(!rwsem_is_locked(&sb->s_umount));
@@ -1208,9 +1256,9 @@ EXPORT_SYMBOL(writeback_inodes_sb_nr);
1208 * on how many (if any) will be written, and this function does not wait 1256 * on how many (if any) will be written, and this function does not wait
1209 * for IO completion of submitted IO. 1257 * for IO completion of submitted IO.
1210 */ 1258 */
1211void writeback_inodes_sb(struct super_block *sb) 1259void writeback_inodes_sb(struct super_block *sb, enum wb_reason reason)
1212{ 1260{
1213 return writeback_inodes_sb_nr(sb, get_nr_dirty_pages()); 1261 return writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason);
1214} 1262}
1215EXPORT_SYMBOL(writeback_inodes_sb); 1263EXPORT_SYMBOL(writeback_inodes_sb);
1216 1264
@@ -1221,11 +1269,11 @@ EXPORT_SYMBOL(writeback_inodes_sb);
1221 * Invoke writeback_inodes_sb if no writeback is currently underway. 1269 * Invoke writeback_inodes_sb if no writeback is currently underway.
1222 * Returns 1 if writeback was started, 0 if not. 1270 * Returns 1 if writeback was started, 0 if not.
1223 */ 1271 */
1224int writeback_inodes_sb_if_idle(struct super_block *sb) 1272int writeback_inodes_sb_if_idle(struct super_block *sb, enum wb_reason reason)
1225{ 1273{
1226 if (!writeback_in_progress(sb->s_bdi)) { 1274 if (!writeback_in_progress(sb->s_bdi)) {
1227 down_read(&sb->s_umount); 1275 down_read(&sb->s_umount);
1228 writeback_inodes_sb(sb); 1276 writeback_inodes_sb(sb, reason);
1229 up_read(&sb->s_umount); 1277 up_read(&sb->s_umount);
1230 return 1; 1278 return 1;
1231 } else 1279 } else
@@ -1242,11 +1290,12 @@ EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
1242 * Returns 1 if writeback was started, 0 if not. 1290 * Returns 1 if writeback was started, 0 if not.
1243 */ 1291 */
1244int writeback_inodes_sb_nr_if_idle(struct super_block *sb, 1292int writeback_inodes_sb_nr_if_idle(struct super_block *sb,
1245 unsigned long nr) 1293 unsigned long nr,
1294 enum wb_reason reason)
1246{ 1295{
1247 if (!writeback_in_progress(sb->s_bdi)) { 1296 if (!writeback_in_progress(sb->s_bdi)) {
1248 down_read(&sb->s_umount); 1297 down_read(&sb->s_umount);
1249 writeback_inodes_sb_nr(sb, nr); 1298 writeback_inodes_sb_nr(sb, nr, reason);
1250 up_read(&sb->s_umount); 1299 up_read(&sb->s_umount);
1251 return 1; 1300 return 1;
1252 } else 1301 } else
@@ -1270,6 +1319,7 @@ void sync_inodes_sb(struct super_block *sb)
1270 .nr_pages = LONG_MAX, 1319 .nr_pages = LONG_MAX,
1271 .range_cyclic = 0, 1320 .range_cyclic = 0,
1272 .done = &done, 1321 .done = &done,
1322 .reason = WB_REASON_SYNC,
1273 }; 1323 };
1274 1324
1275 WARN_ON(!rwsem_is_locked(&sb->s_umount)); 1325 WARN_ON(!rwsem_is_locked(&sb->s_umount));
@@ -1293,6 +1343,7 @@ EXPORT_SYMBOL(sync_inodes_sb);
1293 */ 1343 */
1294int write_inode_now(struct inode *inode, int sync) 1344int write_inode_now(struct inode *inode, int sync)
1295{ 1345{
1346 struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
1296 int ret; 1347 int ret;
1297 struct writeback_control wbc = { 1348 struct writeback_control wbc = {
1298 .nr_to_write = LONG_MAX, 1349 .nr_to_write = LONG_MAX,
@@ -1305,11 +1356,11 @@ int write_inode_now(struct inode *inode, int sync)
1305 wbc.nr_to_write = 0; 1356 wbc.nr_to_write = 0;
1306 1357
1307 might_sleep(); 1358 might_sleep();
1308 spin_lock(&inode_wb_list_lock); 1359 spin_lock(&wb->list_lock);
1309 spin_lock(&inode->i_lock); 1360 spin_lock(&inode->i_lock);
1310 ret = writeback_single_inode(inode, &wbc); 1361 ret = writeback_single_inode(inode, wb, &wbc);
1311 spin_unlock(&inode->i_lock); 1362 spin_unlock(&inode->i_lock);
1312 spin_unlock(&inode_wb_list_lock); 1363 spin_unlock(&wb->list_lock);
1313 if (sync) 1364 if (sync)
1314 inode_sync_wait(inode); 1365 inode_sync_wait(inode);
1315 return ret; 1366 return ret;
@@ -1329,13 +1380,14 @@ EXPORT_SYMBOL(write_inode_now);
1329 */ 1380 */
1330int sync_inode(struct inode *inode, struct writeback_control *wbc) 1381int sync_inode(struct inode *inode, struct writeback_control *wbc)
1331{ 1382{
1383 struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
1332 int ret; 1384 int ret;
1333 1385
1334 spin_lock(&inode_wb_list_lock); 1386 spin_lock(&wb->list_lock);
1335 spin_lock(&inode->i_lock); 1387 spin_lock(&inode->i_lock);
1336 ret = writeback_single_inode(inode, wbc); 1388 ret = writeback_single_inode(inode, wb, wbc);
1337 spin_unlock(&inode->i_lock); 1389 spin_unlock(&inode->i_lock);
1338 spin_unlock(&inode_wb_list_lock); 1390 spin_unlock(&wb->list_lock);
1339 return ret; 1391 return ret;
1340} 1392}
1341EXPORT_SYMBOL(sync_inode); 1393EXPORT_SYMBOL(sync_inode);
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 640fc229df1..c858b5c8320 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -19,6 +19,7 @@
19#include <linux/pipe_fs_i.h> 19#include <linux/pipe_fs_i.h>
20#include <linux/swap.h> 20#include <linux/swap.h>
21#include <linux/splice.h> 21#include <linux/splice.h>
22#include <linux/freezer.h>
22 23
23MODULE_ALIAS_MISCDEV(FUSE_MINOR); 24MODULE_ALIAS_MISCDEV(FUSE_MINOR);
24MODULE_ALIAS("devname:fuse"); 25MODULE_ALIAS("devname:fuse");
@@ -258,10 +259,14 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
258 forget->forget_one.nlookup = nlookup; 259 forget->forget_one.nlookup = nlookup;
259 260
260 spin_lock(&fc->lock); 261 spin_lock(&fc->lock);
261 fc->forget_list_tail->next = forget; 262 if (fc->connected) {
262 fc->forget_list_tail = forget; 263 fc->forget_list_tail->next = forget;
263 wake_up(&fc->waitq); 264 fc->forget_list_tail = forget;
264 kill_fasync(&fc->fasync, SIGIO, POLL_IN); 265 wake_up(&fc->waitq);
266 kill_fasync(&fc->fasync, SIGIO, POLL_IN);
267 } else {
268 kfree(forget);
269 }
265 spin_unlock(&fc->lock); 270 spin_unlock(&fc->lock);
266} 271}
267 272
@@ -383,7 +388,10 @@ __acquires(fc->lock)
383 * Wait it out. 388 * Wait it out.
384 */ 389 */
385 spin_unlock(&fc->lock); 390 spin_unlock(&fc->lock);
386 wait_event(req->waitq, req->state == FUSE_REQ_FINISHED); 391
392 while (req->state != FUSE_REQ_FINISHED)
393 wait_event_freezable(req->waitq,
394 req->state == FUSE_REQ_FINISHED);
387 spin_lock(&fc->lock); 395 spin_lock(&fc->lock);
388 396
389 if (!req->aborted) 397 if (!req->aborted)
@@ -1358,6 +1366,10 @@ static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
1358 if (outarg.namelen > FUSE_NAME_MAX) 1366 if (outarg.namelen > FUSE_NAME_MAX)
1359 goto err; 1367 goto err;
1360 1368
1369 err = -EINVAL;
1370 if (size != sizeof(outarg) + outarg.namelen + 1)
1371 goto err;
1372
1361 name.name = buf; 1373 name.name = buf;
1362 name.len = outarg.namelen; 1374 name.len = outarg.namelen;
1363 err = fuse_copy_one(cs, buf, outarg.namelen + 1); 1375 err = fuse_copy_one(cs, buf, outarg.namelen + 1);
@@ -1504,7 +1516,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
1504 else if (outarg->offset + num > file_size) 1516 else if (outarg->offset + num > file_size)
1505 num = file_size - outarg->offset; 1517 num = file_size - outarg->offset;
1506 1518
1507 while (num) { 1519 while (num && req->num_pages < FUSE_MAX_PAGES_PER_REQ) {
1508 struct page *page; 1520 struct page *page;
1509 unsigned int this_num; 1521 unsigned int this_num;
1510 1522
@@ -1518,6 +1530,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
1518 1530
1519 num -= this_num; 1531 num -= this_num;
1520 total_len += this_num; 1532 total_len += this_num;
1533 index++;
1521 } 1534 }
1522 req->misc.retrieve_in.offset = outarg->offset; 1535 req->misc.retrieve_in.offset = outarg->offset;
1523 req->misc.retrieve_in.size = total_len; 1536 req->misc.retrieve_in.size = total_len;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index d5016071459..c04a025c677 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -858,6 +858,7 @@ int fuse_update_attributes(struct inode *inode, struct kstat *stat,
858 if (stat) { 858 if (stat) {
859 generic_fillattr(inode, stat); 859 generic_fillattr(inode, stat);
860 stat->mode = fi->orig_i_mode; 860 stat->mode = fi->orig_i_mode;
861 stat->ino = fi->orig_ino;
861 } 862 }
862 } 863 }
863 864
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 82a66466a24..79fca8dfa3e 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1710,7 +1710,7 @@ static int fuse_verify_ioctl_iov(struct iovec *iov, size_t count)
1710 size_t n; 1710 size_t n;
1711 u32 max = FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT; 1711 u32 max = FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT;
1712 1712
1713 for (n = 0; n < count; n++) { 1713 for (n = 0; n < count; n++, iov++) {
1714 if (iov->iov_len > (size_t) max) 1714 if (iov->iov_len > (size_t) max)
1715 return -ENOMEM; 1715 return -ENOMEM;
1716 max -= iov->iov_len; 1716 max -= iov->iov_len;
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index b788becada7..f6215501097 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -82,6 +82,9 @@ struct fuse_inode {
82 preserve the original mode */ 82 preserve the original mode */
83 mode_t orig_i_mode; 83 mode_t orig_i_mode;
84 84
85 /** 64 bit inode number */
86 u64 orig_ino;
87
85 /** Version of last attribute change */ 88 /** Version of last attribute change */
86 u64 attr_version; 89 u64 attr_version;
87 90
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 38f84cd48b6..69a1e0f04f4 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -91,6 +91,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
91 fi->nlookup = 0; 91 fi->nlookup = 0;
92 fi->attr_version = 0; 92 fi->attr_version = 0;
93 fi->writectr = 0; 93 fi->writectr = 0;
94 fi->orig_ino = 0;
94 INIT_LIST_HEAD(&fi->write_files); 95 INIT_LIST_HEAD(&fi->write_files);
95 INIT_LIST_HEAD(&fi->queued_writes); 96 INIT_LIST_HEAD(&fi->queued_writes);
96 INIT_LIST_HEAD(&fi->writepages); 97 INIT_LIST_HEAD(&fi->writepages);
@@ -140,6 +141,18 @@ static int fuse_remount_fs(struct super_block *sb, int *flags, char *data)
140 return 0; 141 return 0;
141} 142}
142 143
144/*
145 * ino_t is 32-bits on 32-bit arch. We have to squash the 64-bit value down
146 * so that it will fit.
147 */
148static ino_t fuse_squash_ino(u64 ino64)
149{
150 ino_t ino = (ino_t) ino64;
151 if (sizeof(ino_t) < sizeof(u64))
152 ino ^= ino64 >> (sizeof(u64) - sizeof(ino_t)) * 8;
153 return ino;
154}
155
143void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, 156void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
144 u64 attr_valid) 157 u64 attr_valid)
145{ 158{
@@ -149,7 +162,7 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
149 fi->attr_version = ++fc->attr_version; 162 fi->attr_version = ++fc->attr_version;
150 fi->i_time = attr_valid; 163 fi->i_time = attr_valid;
151 164
152 inode->i_ino = attr->ino; 165 inode->i_ino = fuse_squash_ino(attr->ino);
153 inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); 166 inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
154 inode->i_nlink = attr->nlink; 167 inode->i_nlink = attr->nlink;
155 inode->i_uid = attr->uid; 168 inode->i_uid = attr->uid;
@@ -175,6 +188,8 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
175 fi->orig_i_mode = inode->i_mode; 188 fi->orig_i_mode = inode->i_mode;
176 if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS)) 189 if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS))
177 inode->i_mode &= ~S_ISVTX; 190 inode->i_mode &= ~S_ISVTX;
191
192 fi->orig_ino = attr->ino;
178} 193}
179 194
180void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, 195void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 2a77071fb7b..fa780e66691 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1018,13 +1018,13 @@ hostdata_error:
1018 fsname++; 1018 fsname++;
1019 if (lm->lm_mount == NULL) { 1019 if (lm->lm_mount == NULL) {
1020 fs_info(sdp, "Now mounting FS...\n"); 1020 fs_info(sdp, "Now mounting FS...\n");
1021 complete(&sdp->sd_locking_init); 1021 complete_all(&sdp->sd_locking_init);
1022 return 0; 1022 return 0;
1023 } 1023 }
1024 ret = lm->lm_mount(sdp, fsname); 1024 ret = lm->lm_mount(sdp, fsname);
1025 if (ret == 0) 1025 if (ret == 0)
1026 fs_info(sdp, "Joined cluster. Now mounting FS...\n"); 1026 fs_info(sdp, "Joined cluster. Now mounting FS...\n");
1027 complete(&sdp->sd_locking_init); 1027 complete_all(&sdp->sd_locking_init);
1028 return ret; 1028 return ret;
1029} 1029}
1030 1030
diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c
index 3ebc437736f..1cbdeea1db4 100644
--- a/fs/hfs/btree.c
+++ b/fs/hfs/btree.c
@@ -46,11 +46,26 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke
46 case HFS_EXT_CNID: 46 case HFS_EXT_CNID:
47 hfs_inode_read_fork(tree->inode, mdb->drXTExtRec, mdb->drXTFlSize, 47 hfs_inode_read_fork(tree->inode, mdb->drXTExtRec, mdb->drXTFlSize,
48 mdb->drXTFlSize, be32_to_cpu(mdb->drXTClpSiz)); 48 mdb->drXTFlSize, be32_to_cpu(mdb->drXTClpSiz));
49 if (HFS_I(tree->inode)->alloc_blocks >
50 HFS_I(tree->inode)->first_blocks) {
51 printk(KERN_ERR "hfs: invalid btree extent records\n");
52 unlock_new_inode(tree->inode);
53 goto free_inode;
54 }
55
49 tree->inode->i_mapping->a_ops = &hfs_btree_aops; 56 tree->inode->i_mapping->a_ops = &hfs_btree_aops;
50 break; 57 break;
51 case HFS_CAT_CNID: 58 case HFS_CAT_CNID:
52 hfs_inode_read_fork(tree->inode, mdb->drCTExtRec, mdb->drCTFlSize, 59 hfs_inode_read_fork(tree->inode, mdb->drCTExtRec, mdb->drCTFlSize,
53 mdb->drCTFlSize, be32_to_cpu(mdb->drCTClpSiz)); 60 mdb->drCTFlSize, be32_to_cpu(mdb->drCTClpSiz));
61
62 if (!HFS_I(tree->inode)->first_blocks) {
63 printk(KERN_ERR "hfs: invalid btree extent records "
64 "(0 size).\n");
65 unlock_new_inode(tree->inode);
66 goto free_inode;
67 }
68
54 tree->inode->i_mapping->a_ops = &hfs_btree_aops; 69 tree->inode->i_mapping->a_ops = &hfs_btree_aops;
55 break; 70 break;
56 default: 71 default:
@@ -59,11 +74,6 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke
59 } 74 }
60 unlock_new_inode(tree->inode); 75 unlock_new_inode(tree->inode);
61 76
62 if (!HFS_I(tree->inode)->first_blocks) {
63 printk(KERN_ERR "hfs: invalid btree extent records (0 size).\n");
64 goto free_inode;
65 }
66
67 mapping = tree->inode->i_mapping; 77 mapping = tree->inode->i_mapping;
68 page = read_mapping_page(mapping, 0, NULL); 78 page = read_mapping_page(mapping, 0, NULL);
69 if (IS_ERR(page)) 79 if (IS_ERR(page))
diff --git a/fs/hfs/trans.c b/fs/hfs/trans.c
index e673a88b8ae..b1ce4c7ad3f 100644
--- a/fs/hfs/trans.c
+++ b/fs/hfs/trans.c
@@ -40,6 +40,8 @@ int hfs_mac2asc(struct super_block *sb, char *out, const struct hfs_name *in)
40 40
41 src = in->name; 41 src = in->name;
42 srclen = in->len; 42 srclen = in->len;
43 if (srclen > HFS_NAMELEN)
44 srclen = HFS_NAMELEN;
43 dst = out; 45 dst = out;
44 dstlen = HFS_MAX_NAMELEN; 46 dstlen = HFS_MAX_NAMELEN;
45 if (nls_io) { 47 if (nls_io) {
diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c
index b4ba1b31933..408073ae7a2 100644
--- a/fs/hfsplus/catalog.c
+++ b/fs/hfsplus/catalog.c
@@ -360,6 +360,10 @@ int hfsplus_rename_cat(u32 cnid,
360 err = hfs_brec_find(&src_fd); 360 err = hfs_brec_find(&src_fd);
361 if (err) 361 if (err)
362 goto out; 362 goto out;
363 if (src_fd.entrylength > sizeof(entry) || src_fd.entrylength < 0) {
364 err = -EIO;
365 goto out;
366 }
363 367
364 hfs_bnode_read(src_fd.bnode, &entry, src_fd.entryoffset, 368 hfs_bnode_read(src_fd.bnode, &entry, src_fd.entryoffset,
365 src_fd.entrylength); 369 src_fd.entrylength);
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 4df5059c25d..159f5ebf519 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -146,6 +146,11 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir)
146 filp->f_pos++; 146 filp->f_pos++;
147 /* fall through */ 147 /* fall through */
148 case 1: 148 case 1:
149 if (fd.entrylength > sizeof(entry) || fd.entrylength < 0) {
150 err = -EIO;
151 goto out;
152 }
153
149 hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, 154 hfs_bnode_read(fd.bnode, &entry, fd.entryoffset,
150 fd.entrylength); 155 fd.entrylength);
151 if (be16_to_cpu(entry.type) != HFSPLUS_FOLDER_THREAD) { 156 if (be16_to_cpu(entry.type) != HFSPLUS_FOLDER_THREAD) {
@@ -177,6 +182,12 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir)
177 err = -EIO; 182 err = -EIO;
178 goto out; 183 goto out;
179 } 184 }
185
186 if (fd.entrylength > sizeof(entry) || fd.entrylength < 0) {
187 err = -EIO;
188 goto out;
189 }
190
180 hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, 191 hfs_bnode_read(fd.bnode, &entry, fd.entryoffset,
181 fd.entrylength); 192 fd.entrylength);
182 type = be16_to_cpu(entry.type); 193 type = be16_to_cpu(entry.type);
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index d6857523336..4e7f64b705d 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -13,6 +13,7 @@
13#include <linux/fs.h> 13#include <linux/fs.h>
14#include <linux/mutex.h> 14#include <linux/mutex.h>
15#include <linux/buffer_head.h> 15#include <linux/buffer_head.h>
16#include <linux/blkdev.h>
16#include "hfsplus_raw.h" 17#include "hfsplus_raw.h"
17 18
18#define DBG_BNODE_REFS 0x00000001 19#define DBG_BNODE_REFS 0x00000001
@@ -110,7 +111,9 @@ struct hfsplus_vh;
110struct hfs_btree; 111struct hfs_btree;
111 112
112struct hfsplus_sb_info { 113struct hfsplus_sb_info {
114 void *s_vhdr_buf;
113 struct hfsplus_vh *s_vhdr; 115 struct hfsplus_vh *s_vhdr;
116 void *s_backup_vhdr_buf;
114 struct hfsplus_vh *s_backup_vhdr; 117 struct hfsplus_vh *s_backup_vhdr;
115 struct hfs_btree *ext_tree; 118 struct hfs_btree *ext_tree;
116 struct hfs_btree *cat_tree; 119 struct hfs_btree *cat_tree;
@@ -258,6 +261,15 @@ struct hfsplus_readdir_data {
258 struct hfsplus_cat_key key; 261 struct hfsplus_cat_key key;
259}; 262};
260 263
264/*
265 * Find minimum acceptible I/O size for an hfsplus sb.
266 */
267static inline unsigned short hfsplus_min_io_size(struct super_block *sb)
268{
269 return max_t(unsigned short, bdev_logical_block_size(sb->s_bdev),
270 HFSPLUS_SECTOR_SIZE);
271}
272
261#define hfs_btree_open hfsplus_btree_open 273#define hfs_btree_open hfsplus_btree_open
262#define hfs_btree_close hfsplus_btree_close 274#define hfs_btree_close hfsplus_btree_close
263#define hfs_btree_write hfsplus_btree_write 275#define hfs_btree_write hfsplus_btree_write
@@ -436,8 +448,8 @@ int hfsplus_compare_dentry(const struct dentry *parent,
436/* wrapper.c */ 448/* wrapper.c */
437int hfsplus_read_wrapper(struct super_block *); 449int hfsplus_read_wrapper(struct super_block *);
438int hfs_part_find(struct super_block *, sector_t *, sector_t *); 450int hfs_part_find(struct super_block *, sector_t *, sector_t *);
439int hfsplus_submit_bio(struct block_device *bdev, sector_t sector, 451int hfsplus_submit_bio(struct super_block *sb, sector_t sector,
440 void *data, int rw); 452 void *buf, void **data, int rw);
441 453
442/* time macros */ 454/* time macros */
443#define __hfsp_mt2ut(t) (be32_to_cpu(t) - 2082844800U) 455#define __hfsp_mt2ut(t) (be32_to_cpu(t) - 2082844800U)
diff --git a/fs/hfsplus/part_tbl.c b/fs/hfsplus/part_tbl.c
index 40ad88c12c6..eb355d81e27 100644
--- a/fs/hfsplus/part_tbl.c
+++ b/fs/hfsplus/part_tbl.c
@@ -88,11 +88,12 @@ static int hfs_parse_old_pmap(struct super_block *sb, struct old_pmap *pm,
88 return -ENOENT; 88 return -ENOENT;
89} 89}
90 90
91static int hfs_parse_new_pmap(struct super_block *sb, struct new_pmap *pm, 91static int hfs_parse_new_pmap(struct super_block *sb, void *buf,
92 sector_t *part_start, sector_t *part_size) 92 struct new_pmap *pm, sector_t *part_start, sector_t *part_size)
93{ 93{
94 struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); 94 struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb);
95 int size = be32_to_cpu(pm->pmMapBlkCnt); 95 int size = be32_to_cpu(pm->pmMapBlkCnt);
96 int buf_size = hfsplus_min_io_size(sb);
96 int res; 97 int res;
97 int i = 0; 98 int i = 0;
98 99
@@ -107,11 +108,14 @@ static int hfs_parse_new_pmap(struct super_block *sb, struct new_pmap *pm,
107 if (++i >= size) 108 if (++i >= size)
108 return -ENOENT; 109 return -ENOENT;
109 110
110 res = hfsplus_submit_bio(sb->s_bdev, 111 pm = (struct new_pmap *)((u8 *)pm + HFSPLUS_SECTOR_SIZE);
111 *part_start + HFS_PMAP_BLK + i, 112 if ((u8 *)pm - (u8 *)buf >= buf_size) {
112 pm, READ); 113 res = hfsplus_submit_bio(sb,
113 if (res) 114 *part_start + HFS_PMAP_BLK + i,
114 return res; 115 buf, (void **)&pm, READ);
116 if (res)
117 return res;
118 }
115 } while (pm->pmSig == cpu_to_be16(HFS_NEW_PMAP_MAGIC)); 119 } while (pm->pmSig == cpu_to_be16(HFS_NEW_PMAP_MAGIC));
116 120
117 return -ENOENT; 121 return -ENOENT;
@@ -124,15 +128,15 @@ static int hfs_parse_new_pmap(struct super_block *sb, struct new_pmap *pm,
124int hfs_part_find(struct super_block *sb, 128int hfs_part_find(struct super_block *sb,
125 sector_t *part_start, sector_t *part_size) 129 sector_t *part_start, sector_t *part_size)
126{ 130{
127 void *data; 131 void *buf, *data;
128 int res; 132 int res;
129 133
130 data = kmalloc(HFSPLUS_SECTOR_SIZE, GFP_KERNEL); 134 buf = kmalloc(hfsplus_min_io_size(sb), GFP_KERNEL);
131 if (!data) 135 if (!buf)
132 return -ENOMEM; 136 return -ENOMEM;
133 137
134 res = hfsplus_submit_bio(sb->s_bdev, *part_start + HFS_PMAP_BLK, 138 res = hfsplus_submit_bio(sb, *part_start + HFS_PMAP_BLK,
135 data, READ); 139 buf, &data, READ);
136 if (res) 140 if (res)
137 goto out; 141 goto out;
138 142
@@ -141,13 +145,13 @@ int hfs_part_find(struct super_block *sb,
141 res = hfs_parse_old_pmap(sb, data, part_start, part_size); 145 res = hfs_parse_old_pmap(sb, data, part_start, part_size);
142 break; 146 break;
143 case HFS_NEW_PMAP_MAGIC: 147 case HFS_NEW_PMAP_MAGIC:
144 res = hfs_parse_new_pmap(sb, data, part_start, part_size); 148 res = hfs_parse_new_pmap(sb, buf, data, part_start, part_size);
145 break; 149 break;
146 default: 150 default:
147 res = -ENOENT; 151 res = -ENOENT;
148 break; 152 break;
149 } 153 }
150out: 154out:
151 kfree(data); 155 kfree(buf);
152 return res; 156 return res;
153} 157}
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 84a47b709f5..c3a76fd25f2 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -197,17 +197,17 @@ int hfsplus_sync_fs(struct super_block *sb, int wait)
197 write_backup = 1; 197 write_backup = 1;
198 } 198 }
199 199
200 error2 = hfsplus_submit_bio(sb->s_bdev, 200 error2 = hfsplus_submit_bio(sb,
201 sbi->part_start + HFSPLUS_VOLHEAD_SECTOR, 201 sbi->part_start + HFSPLUS_VOLHEAD_SECTOR,
202 sbi->s_vhdr, WRITE_SYNC); 202 sbi->s_vhdr_buf, NULL, WRITE_SYNC);
203 if (!error) 203 if (!error)
204 error = error2; 204 error = error2;
205 if (!write_backup) 205 if (!write_backup)
206 goto out; 206 goto out;
207 207
208 error2 = hfsplus_submit_bio(sb->s_bdev, 208 error2 = hfsplus_submit_bio(sb,
209 sbi->part_start + sbi->sect_count - 2, 209 sbi->part_start + sbi->sect_count - 2,
210 sbi->s_backup_vhdr, WRITE_SYNC); 210 sbi->s_backup_vhdr_buf, NULL, WRITE_SYNC);
211 if (!error) 211 if (!error)
212 error2 = error; 212 error2 = error;
213out: 213out:
@@ -251,8 +251,8 @@ static void hfsplus_put_super(struct super_block *sb)
251 hfs_btree_close(sbi->ext_tree); 251 hfs_btree_close(sbi->ext_tree);
252 iput(sbi->alloc_file); 252 iput(sbi->alloc_file);
253 iput(sbi->hidden_dir); 253 iput(sbi->hidden_dir);
254 kfree(sbi->s_vhdr); 254 kfree(sbi->s_vhdr_buf);
255 kfree(sbi->s_backup_vhdr); 255 kfree(sbi->s_backup_vhdr_buf);
256 unload_nls(sbi->nls); 256 unload_nls(sbi->nls);
257 kfree(sb->s_fs_info); 257 kfree(sb->s_fs_info);
258 sb->s_fs_info = NULL; 258 sb->s_fs_info = NULL;
@@ -508,8 +508,8 @@ out_close_cat_tree:
508out_close_ext_tree: 508out_close_ext_tree:
509 hfs_btree_close(sbi->ext_tree); 509 hfs_btree_close(sbi->ext_tree);
510out_free_vhdr: 510out_free_vhdr:
511 kfree(sbi->s_vhdr); 511 kfree(sbi->s_vhdr_buf);
512 kfree(sbi->s_backup_vhdr); 512 kfree(sbi->s_backup_vhdr_buf);
513out_unload_nls: 513out_unload_nls:
514 unload_nls(sbi->nls); 514 unload_nls(sbi->nls);
515 unload_nls(nls); 515 unload_nls(nls);
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c
index 4ac88ff79aa..aac1563174e 100644
--- a/fs/hfsplus/wrapper.c
+++ b/fs/hfsplus/wrapper.c
@@ -31,25 +31,67 @@ static void hfsplus_end_io_sync(struct bio *bio, int err)
31 complete(bio->bi_private); 31 complete(bio->bi_private);
32} 32}
33 33
34int hfsplus_submit_bio(struct block_device *bdev, sector_t sector, 34/*
35 void *data, int rw) 35 * hfsplus_submit_bio - Perfrom block I/O
36 * @sb: super block of volume for I/O
37 * @sector: block to read or write, for blocks of HFSPLUS_SECTOR_SIZE bytes
38 * @buf: buffer for I/O
39 * @data: output pointer for location of requested data
40 * @rw: direction of I/O
41 *
42 * The unit of I/O is hfsplus_min_io_size(sb), which may be bigger than
43 * HFSPLUS_SECTOR_SIZE, and @buf must be sized accordingly. On reads
44 * @data will return a pointer to the start of the requested sector,
45 * which may not be the same location as @buf.
46 *
47 * If @sector is not aligned to the bdev logical block size it will
48 * be rounded down. For writes this means that @buf should contain data
49 * that starts at the rounded-down address. As long as the data was
50 * read using hfsplus_submit_bio() and the same buffer is used things
51 * will work correctly.
52 */
53int hfsplus_submit_bio(struct super_block *sb, sector_t sector,
54 void *buf, void **data, int rw)
36{ 55{
37 DECLARE_COMPLETION_ONSTACK(wait); 56 DECLARE_COMPLETION_ONSTACK(wait);
38 struct bio *bio; 57 struct bio *bio;
39 int ret = 0; 58 int ret = 0;
59 u64 io_size;
60 loff_t start;
61 int offset;
62
63 /*
64 * Align sector to hardware sector size and find offset. We
65 * assume that io_size is a power of two, which _should_
66 * be true.
67 */
68 io_size = hfsplus_min_io_size(sb);
69 start = (loff_t)sector << HFSPLUS_SECTOR_SHIFT;
70 offset = start & (io_size - 1);
71 sector &= ~((io_size >> HFSPLUS_SECTOR_SHIFT) - 1);
40 72
41 bio = bio_alloc(GFP_NOIO, 1); 73 bio = bio_alloc(GFP_NOIO, 1);
42 bio->bi_sector = sector; 74 bio->bi_sector = sector;
43 bio->bi_bdev = bdev; 75 bio->bi_bdev = sb->s_bdev;
44 bio->bi_end_io = hfsplus_end_io_sync; 76 bio->bi_end_io = hfsplus_end_io_sync;
45 bio->bi_private = &wait; 77 bio->bi_private = &wait;
46 78
47 /* 79 if (!(rw & WRITE) && data)
48 * We always submit one sector at a time, so bio_add_page must not fail. 80 *data = (u8 *)buf + offset;
49 */ 81
50 if (bio_add_page(bio, virt_to_page(data), HFSPLUS_SECTOR_SIZE, 82 while (io_size > 0) {
51 offset_in_page(data)) != HFSPLUS_SECTOR_SIZE) 83 unsigned int page_offset = offset_in_page(buf);
52 BUG(); 84 unsigned int len = min_t(unsigned int, PAGE_SIZE - page_offset,
85 io_size);
86
87 ret = bio_add_page(bio, virt_to_page(buf), len, page_offset);
88 if (ret != len) {
89 ret = -EIO;
90 goto out;
91 }
92 io_size -= len;
93 buf = (u8 *)buf + len;
94 }
53 95
54 submit_bio(rw, bio); 96 submit_bio(rw, bio);
55 wait_for_completion(&wait); 97 wait_for_completion(&wait);
@@ -57,8 +99,9 @@ int hfsplus_submit_bio(struct block_device *bdev, sector_t sector,
57 if (!bio_flagged(bio, BIO_UPTODATE)) 99 if (!bio_flagged(bio, BIO_UPTODATE))
58 ret = -EIO; 100 ret = -EIO;
59 101
102out:
60 bio_put(bio); 103 bio_put(bio);
61 return ret; 104 return ret < 0 ? ret : 0;
62} 105}
63 106
64static int hfsplus_read_mdb(void *bufptr, struct hfsplus_wd *wd) 107static int hfsplus_read_mdb(void *bufptr, struct hfsplus_wd *wd)
@@ -147,17 +190,17 @@ int hfsplus_read_wrapper(struct super_block *sb)
147 } 190 }
148 191
149 error = -ENOMEM; 192 error = -ENOMEM;
150 sbi->s_vhdr = kmalloc(HFSPLUS_SECTOR_SIZE, GFP_KERNEL); 193 sbi->s_vhdr_buf = kmalloc(hfsplus_min_io_size(sb), GFP_KERNEL);
151 if (!sbi->s_vhdr) 194 if (!sbi->s_vhdr_buf)
152 goto out; 195 goto out;
153 sbi->s_backup_vhdr = kmalloc(HFSPLUS_SECTOR_SIZE, GFP_KERNEL); 196 sbi->s_backup_vhdr_buf = kmalloc(hfsplus_min_io_size(sb), GFP_KERNEL);
154 if (!sbi->s_backup_vhdr) 197 if (!sbi->s_backup_vhdr_buf)
155 goto out_free_vhdr; 198 goto out_free_vhdr;
156 199
157reread: 200reread:
158 error = hfsplus_submit_bio(sb->s_bdev, 201 error = hfsplus_submit_bio(sb, part_start + HFSPLUS_VOLHEAD_SECTOR,
159 part_start + HFSPLUS_VOLHEAD_SECTOR, 202 sbi->s_vhdr_buf, (void **)&sbi->s_vhdr,
160 sbi->s_vhdr, READ); 203 READ);
161 if (error) 204 if (error)
162 goto out_free_backup_vhdr; 205 goto out_free_backup_vhdr;
163 206
@@ -186,9 +229,9 @@ reread:
186 goto reread; 229 goto reread;
187 } 230 }
188 231
189 error = hfsplus_submit_bio(sb->s_bdev, 232 error = hfsplus_submit_bio(sb, part_start + part_size - 2,
190 part_start + part_size - 2, 233 sbi->s_backup_vhdr_buf,
191 sbi->s_backup_vhdr, READ); 234 (void **)&sbi->s_backup_vhdr, READ);
192 if (error) 235 if (error)
193 goto out_free_backup_vhdr; 236 goto out_free_backup_vhdr;
194 237
@@ -232,9 +275,9 @@ reread:
232 return 0; 275 return 0;
233 276
234out_free_backup_vhdr: 277out_free_backup_vhdr:
235 kfree(sbi->s_backup_vhdr); 278 kfree(sbi->s_backup_vhdr_buf);
236out_free_vhdr: 279out_free_vhdr:
237 kfree(sbi->s_vhdr); 280 kfree(sbi->s_vhdr_buf);
238out: 281out:
239 return error; 282 return error;
240} 283}
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index 85c098a499f..9d71c95b193 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -16,6 +16,7 @@
16#include <linux/statfs.h> 16#include <linux/statfs.h>
17#include <linux/types.h> 17#include <linux/types.h>
18#include <linux/pid_namespace.h> 18#include <linux/pid_namespace.h>
19#include <linux/namei.h>
19#include <asm/uaccess.h> 20#include <asm/uaccess.h>
20#include "os.h" 21#include "os.h"
21 22
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 7aafeb8fa30..6327a069d83 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -238,17 +238,10 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
238 loff_t isize; 238 loff_t isize;
239 ssize_t retval = 0; 239 ssize_t retval = 0;
240 240
241 mutex_lock(&inode->i_mutex);
242
243 /* validate length */ 241 /* validate length */
244 if (len == 0) 242 if (len == 0)
245 goto out; 243 goto out;
246 244
247 isize = i_size_read(inode);
248 if (!isize)
249 goto out;
250
251 end_index = (isize - 1) >> huge_page_shift(h);
252 for (;;) { 245 for (;;) {
253 struct page *page; 246 struct page *page;
254 unsigned long nr, ret; 247 unsigned long nr, ret;
@@ -256,18 +249,21 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
256 249
257 /* nr is the maximum number of bytes to copy from this page */ 250 /* nr is the maximum number of bytes to copy from this page */
258 nr = huge_page_size(h); 251 nr = huge_page_size(h);
252 isize = i_size_read(inode);
253 if (!isize)
254 goto out;
255 end_index = (isize - 1) >> huge_page_shift(h);
259 if (index >= end_index) { 256 if (index >= end_index) {
260 if (index > end_index) 257 if (index > end_index)
261 goto out; 258 goto out;
262 nr = ((isize - 1) & ~huge_page_mask(h)) + 1; 259 nr = ((isize - 1) & ~huge_page_mask(h)) + 1;
263 if (nr <= offset) { 260 if (nr <= offset)
264 goto out; 261 goto out;
265 }
266 } 262 }
267 nr = nr - offset; 263 nr = nr - offset;
268 264
269 /* Find the page */ 265 /* Find the page */
270 page = find_get_page(mapping, index); 266 page = find_lock_page(mapping, index);
271 if (unlikely(page == NULL)) { 267 if (unlikely(page == NULL)) {
272 /* 268 /*
273 * We have a HOLE, zero out the user-buffer for the 269 * We have a HOLE, zero out the user-buffer for the
@@ -279,17 +275,18 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
279 else 275 else
280 ra = 0; 276 ra = 0;
281 } else { 277 } else {
278 unlock_page(page);
279
282 /* 280 /*
283 * We have the page, copy it to user space buffer. 281 * We have the page, copy it to user space buffer.
284 */ 282 */
285 ra = hugetlbfs_read_actor(page, offset, buf, len, nr); 283 ra = hugetlbfs_read_actor(page, offset, buf, len, nr);
286 ret = ra; 284 ret = ra;
285 page_cache_release(page);
287 } 286 }
288 if (ra < 0) { 287 if (ra < 0) {
289 if (retval == 0) 288 if (retval == 0)
290 retval = ra; 289 retval = ra;
291 if (page)
292 page_cache_release(page);
293 goto out; 290 goto out;
294 } 291 }
295 292
@@ -299,16 +296,12 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
299 index += offset >> huge_page_shift(h); 296 index += offset >> huge_page_shift(h);
300 offset &= ~huge_page_mask(h); 297 offset &= ~huge_page_mask(h);
301 298
302 if (page)
303 page_cache_release(page);
304
305 /* short read or no more work */ 299 /* short read or no more work */
306 if ((ret != nr) || (len == 0)) 300 if ((ret != nr) || (len == 0))
307 break; 301 break;
308 } 302 }
309out: 303out:
310 *ppos = ((loff_t)index << huge_page_shift(h)) + offset; 304 *ppos = ((loff_t)index << huge_page_shift(h)) + offset;
311 mutex_unlock(&inode->i_mutex);
312 return retval; 305 return retval;
313} 306}
314 307
@@ -575,7 +568,8 @@ static int hugetlbfs_set_page_dirty(struct page *page)
575} 568}
576 569
577static int hugetlbfs_migrate_page(struct address_space *mapping, 570static int hugetlbfs_migrate_page(struct address_space *mapping,
578 struct page *newpage, struct page *page) 571 struct page *newpage, struct page *page,
572 enum migrate_mode mode)
579{ 573{
580 int rc; 574 int rc;
581 575
diff --git a/fs/inode.c b/fs/inode.c
index dbf0e760c0a..7aa468a4b19 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -37,7 +37,7 @@
37 * inode_lru, inode->i_lru 37 * inode_lru, inode->i_lru
38 * inode_sb_list_lock protects: 38 * inode_sb_list_lock protects:
39 * sb->s_inodes, inode->i_sb_list 39 * sb->s_inodes, inode->i_sb_list
40 * inode_wb_list_lock protects: 40 * bdi->wb.list_lock protects:
41 * bdi->wb.b_{dirty,io,more_io}, inode->i_wb_list 41 * bdi->wb.b_{dirty,io,more_io}, inode->i_wb_list
42 * inode_hash_lock protects: 42 * inode_hash_lock protects:
43 * inode_hashtable, inode->i_hash 43 * inode_hashtable, inode->i_hash
@@ -48,7 +48,7 @@
48 * inode->i_lock 48 * inode->i_lock
49 * inode_lru_lock 49 * inode_lru_lock
50 * 50 *
51 * inode_wb_list_lock 51 * bdi->wb.list_lock
52 * inode->i_lock 52 * inode->i_lock
53 * 53 *
54 * inode_hash_lock 54 * inode_hash_lock
@@ -68,7 +68,6 @@ static LIST_HEAD(inode_lru);
68static DEFINE_SPINLOCK(inode_lru_lock); 68static DEFINE_SPINLOCK(inode_lru_lock);
69 69
70__cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock); 70__cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock);
71__cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_wb_list_lock);
72 71
73/* 72/*
74 * iprune_sem provides exclusion between the icache shrinking and the 73 * iprune_sem provides exclusion between the icache shrinking and the
diff --git a/fs/internal.h b/fs/internal.h
index b29c46e4e32..c905f59dbbe 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -97,6 +97,7 @@ extern struct file *get_empty_filp(void);
97 * super.c 97 * super.c
98 */ 98 */
99extern int do_remount_sb(struct super_block *, int, void *, int); 99extern int do_remount_sb(struct super_block *, int, void *, int);
100extern bool grab_super_passive(struct super_block *sb);
100extern void __put_super(struct super_block *sb); 101extern void __put_super(struct super_block *sb);
101extern void put_super(struct super_block *sb); 102extern void put_super(struct super_block *sb);
102extern struct dentry *mount_fs(struct file_system_type *, 103extern struct dentry *mount_fs(struct file_system_type *,
@@ -135,3 +136,5 @@ extern void inode_wb_list_del(struct inode *inode);
135extern int get_nr_dirty_inodes(void); 136extern int get_nr_dirty_inodes(void);
136extern void evict_inodes(struct super_block *); 137extern void evict_inodes(struct super_block *);
137extern int invalidate_inodes(struct super_block *, bool); 138extern int invalidate_inodes(struct super_block *, bool);
139
140extern struct dentry *__d_alloc(struct super_block *, const struct qstr *);
diff --git a/fs/ioprio.c b/fs/ioprio.c
index 7da2a06508e..95a6c2b04e0 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -30,7 +30,7 @@
30 30
31int set_task_ioprio(struct task_struct *task, int ioprio) 31int set_task_ioprio(struct task_struct *task, int ioprio)
32{ 32{
33 int err; 33 int err, i;
34 struct io_context *ioc; 34 struct io_context *ioc;
35 const struct cred *cred = current_cred(), *tcred; 35 const struct cred *cred = current_cred(), *tcred;
36 36
@@ -60,12 +60,17 @@ int set_task_ioprio(struct task_struct *task, int ioprio)
60 err = -ENOMEM; 60 err = -ENOMEM;
61 break; 61 break;
62 } 62 }
63 /* let other ioc users see the new values */
64 smp_wmb();
63 task->io_context = ioc; 65 task->io_context = ioc;
64 } while (1); 66 } while (1);
65 67
66 if (!err) { 68 if (!err) {
67 ioc->ioprio = ioprio; 69 ioc->ioprio = ioprio;
68 ioc->ioprio_changed = 1; 70 /* make sure schedulers see the new ioprio value */
71 wmb();
72 for (i = 0; i < IOC_IOPRIO_CHANGED_BITS; i++)
73 set_bit(i, ioc->ioprio_changed);
69 } 74 }
70 75
71 task_unlock(task); 76 task_unlock(task);
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index e2d4285fbe9..9f36384e2e8 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -1131,6 +1131,14 @@ static int journal_get_superblock(journal_t *journal)
1131 goto out; 1131 goto out;
1132 } 1132 }
1133 1133
1134 if (be32_to_cpu(sb->s_first) == 0 ||
1135 be32_to_cpu(sb->s_first) >= journal->j_maxlen) {
1136 printk(KERN_WARNING
1137 "JBD: Invalid start block of journal: %u\n",
1138 be32_to_cpu(sb->s_first));
1139 goto out;
1140 }
1141
1134 return 0; 1142 return 0;
1135 1143
1136out: 1144out:
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index eef6979821a..36c2e800e73 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -683,7 +683,7 @@ start_journal_io:
683 if (commit_transaction->t_need_data_flush && 683 if (commit_transaction->t_need_data_flush &&
684 (journal->j_fs_dev != journal->j_dev) && 684 (journal->j_fs_dev != journal->j_dev) &&
685 (journal->j_flags & JBD2_BARRIER)) 685 (journal->j_flags & JBD2_BARRIER))
686 blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); 686 blkdev_issue_flush(journal->j_fs_dev, GFP_NOFS, NULL);
687 687
688 /* Done it all: now write the commit record asynchronously. */ 688 /* Done it all: now write the commit record asynchronously. */
689 if (JBD2_HAS_INCOMPAT_FEATURE(journal, 689 if (JBD2_HAS_INCOMPAT_FEATURE(journal,
@@ -819,7 +819,7 @@ wait_for_iobuf:
819 if (JBD2_HAS_INCOMPAT_FEATURE(journal, 819 if (JBD2_HAS_INCOMPAT_FEATURE(journal,
820 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT) && 820 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT) &&
821 journal->j_flags & JBD2_BARRIER) { 821 journal->j_flags & JBD2_BARRIER) {
822 blkdev_issue_flush(journal->j_dev, GFP_KERNEL, NULL); 822 blkdev_issue_flush(journal->j_dev, GFP_NOFS, NULL);
823 } 823 }
824 824
825 if (err) 825 if (err)
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 0dfa5b598e6..40c5fb73e9c 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1251,6 +1251,14 @@ static int journal_get_superblock(journal_t *journal)
1251 goto out; 1251 goto out;
1252 } 1252 }
1253 1253
1254 if (be32_to_cpu(sb->s_first) == 0 ||
1255 be32_to_cpu(sb->s_first) >= journal->j_maxlen) {
1256 printk(KERN_WARNING
1257 "JBD2: Invalid start block of journal: %u\n",
1258 be32_to_cpu(sb->s_first));
1259 goto out;
1260 }
1261
1254 return 0; 1262 return 0;
1255 1263
1256out: 1264out:
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 2d7109414cd..9baa39ea6be 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1902,6 +1902,8 @@ zap_buffer_unlocked:
1902 clear_buffer_mapped(bh); 1902 clear_buffer_mapped(bh);
1903 clear_buffer_req(bh); 1903 clear_buffer_req(bh);
1904 clear_buffer_new(bh); 1904 clear_buffer_new(bh);
1905 clear_buffer_delay(bh);
1906 clear_buffer_unwritten(bh);
1905 bh->b_bdev = NULL; 1907 bh->b_bdev = NULL;
1906 return may_free; 1908 return may_free;
1907} 1909}
diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c
index 31dce611337..4bbd5211bb3 100644
--- a/fs/jffs2/gc.c
+++ b/fs/jffs2/gc.c
@@ -225,8 +225,8 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
225 return 0; 225 return 0;
226 226
227 D1(printk(KERN_DEBUG "No progress from erasing blocks; doing GC anyway\n")); 227 D1(printk(KERN_DEBUG "No progress from erasing blocks; doing GC anyway\n"));
228 spin_lock(&c->erase_completion_lock);
229 mutex_lock(&c->alloc_sem); 228 mutex_lock(&c->alloc_sem);
229 spin_lock(&c->erase_completion_lock);
230 } 230 }
231 231
232 /* First, work out which block we're garbage-collecting */ 232 /* First, work out which block we're garbage-collecting */
diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c
index f848b52c67b..046bb77c601 100644
--- a/fs/lockd/clnt4xdr.c
+++ b/fs/lockd/clnt4xdr.c
@@ -241,7 +241,7 @@ static int decode_nlm4_stat(struct xdr_stream *xdr, __be32 *stat)
241 p = xdr_inline_decode(xdr, 4); 241 p = xdr_inline_decode(xdr, 4);
242 if (unlikely(p == NULL)) 242 if (unlikely(p == NULL))
243 goto out_overflow; 243 goto out_overflow;
244 if (unlikely(*p > nlm4_failed)) 244 if (unlikely(ntohl(*p) > ntohl(nlm4_failed)))
245 goto out_bad_xdr; 245 goto out_bad_xdr;
246 *stat = *p; 246 *stat = *p;
247 return 0; 247 return 0;
diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c
index 180ac34feb9..36057cedac6 100644
--- a/fs/lockd/clntxdr.c
+++ b/fs/lockd/clntxdr.c
@@ -236,7 +236,7 @@ static int decode_nlm_stat(struct xdr_stream *xdr,
236 p = xdr_inline_decode(xdr, 4); 236 p = xdr_inline_decode(xdr, 4);
237 if (unlikely(p == NULL)) 237 if (unlikely(p == NULL))
238 goto out_overflow; 238 goto out_overflow;
239 if (unlikely(*p > nlm_lck_denied_grace_period)) 239 if (unlikely(ntohl(*p) > ntohl(nlm_lck_denied_grace_period)))
240 goto out_enum; 240 goto out_enum;
241 *stat = *p; 241 *stat = *p;
242 return 0; 242 return 0;
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index abfff9d7979..1743064cd0c 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -440,7 +440,7 @@ static int param_set_##name(const char *val, struct kernel_param *kp) \
440 __typeof__(type) num = which_strtol(val, &endp, 0); \ 440 __typeof__(type) num = which_strtol(val, &endp, 0); \
441 if (endp == val || *endp || num < (min) || num > (max)) \ 441 if (endp == val || *endp || num < (min) || num > (max)) \
442 return -EINVAL; \ 442 return -EINVAL; \
443 *((int *) kp->arg) = num; \ 443 *((type *) kp->arg) = num; \
444 return 0; \ 444 return 0; \
445} 445}
446 446
diff --git a/fs/locks.c b/fs/locks.c
index b286539d547..35388d524c7 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -315,7 +315,7 @@ static int flock_make_lock(struct file *filp, struct file_lock **lock,
315 return 0; 315 return 0;
316} 316}
317 317
318static int assign_type(struct file_lock *fl, int type) 318static int assign_type(struct file_lock *fl, long type)
319{ 319{
320 switch (type) { 320 switch (type) {
321 case F_RDLCK: 321 case F_RDLCK:
@@ -452,7 +452,7 @@ static const struct lock_manager_operations lease_manager_ops = {
452/* 452/*
453 * Initialize a lease, use the default lock manager operations 453 * Initialize a lease, use the default lock manager operations
454 */ 454 */
455static int lease_init(struct file *filp, int type, struct file_lock *fl) 455static int lease_init(struct file *filp, long type, struct file_lock *fl)
456 { 456 {
457 if (assign_type(fl, type) != 0) 457 if (assign_type(fl, type) != 0)
458 return -EINVAL; 458 return -EINVAL;
@@ -470,7 +470,7 @@ static int lease_init(struct file *filp, int type, struct file_lock *fl)
470} 470}
471 471
472/* Allocate a file_lock initialised to this type of lease */ 472/* Allocate a file_lock initialised to this type of lease */
473static struct file_lock *lease_alloc(struct file *filp, int type) 473static struct file_lock *lease_alloc(struct file *filp, long type)
474{ 474{
475 struct file_lock *fl = locks_alloc_lock(); 475 struct file_lock *fl = locks_alloc_lock();
476 int error = -ENOMEM; 476 int error = -ENOMEM;
diff --git a/fs/namei.c b/fs/namei.c
index 14ab8d3f2f0..16bda6cd602 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -136,7 +136,7 @@ static int do_getname(const char __user *filename, char *page)
136 return retval; 136 return retval;
137} 137}
138 138
139static char *getname_flags(const char __user * filename, int flags) 139static char *getname_flags(const char __user *filename, int flags, int *empty)
140{ 140{
141 char *tmp, *result; 141 char *tmp, *result;
142 142
@@ -147,6 +147,8 @@ static char *getname_flags(const char __user * filename, int flags)
147 147
148 result = tmp; 148 result = tmp;
149 if (retval < 0) { 149 if (retval < 0) {
150 if (retval == -ENOENT && empty)
151 *empty = 1;
150 if (retval != -ENOENT || !(flags & LOOKUP_EMPTY)) { 152 if (retval != -ENOENT || !(flags & LOOKUP_EMPTY)) {
151 __putname(tmp); 153 __putname(tmp);
152 result = ERR_PTR(retval); 154 result = ERR_PTR(retval);
@@ -159,7 +161,7 @@ static char *getname_flags(const char __user * filename, int flags)
159 161
160char *getname(const char __user * filename) 162char *getname(const char __user * filename)
161{ 163{
162 return getname_flags(filename, 0); 164 return getname_flags(filename, 0, 0);
163} 165}
164 166
165#ifdef CONFIG_AUDITSYSCALL 167#ifdef CONFIG_AUDITSYSCALL
@@ -779,17 +781,20 @@ static int follow_automount(struct path *path, unsigned flags,
779 if ((flags & LOOKUP_NO_AUTOMOUNT) && !(flags & LOOKUP_CONTINUE)) 781 if ((flags & LOOKUP_NO_AUTOMOUNT) && !(flags & LOOKUP_CONTINUE))
780 return -EISDIR; /* we actually want to stop here */ 782 return -EISDIR; /* we actually want to stop here */
781 783
782 /* We want to mount if someone is trying to open/create a file of any 784 /* We don't want to mount if someone's just doing a stat -
783 * type under the mountpoint, wants to traverse through the mountpoint 785 * unless they're stat'ing a directory and appended a '/' to
784 * or wants to open the mounted directory. 786 * the name.
785 * 787 *
786 * We don't want to mount if someone's just doing a stat and they've 788 * We do, however, want to mount if someone wants to open or
787 * set AT_SYMLINK_NOFOLLOW - unless they're stat'ing a directory and 789 * create a file of any type under the mountpoint, wants to
788 * appended a '/' to the name. 790 * traverse through the mountpoint or wants to open the
791 * mounted directory. Also, autofs may mark negative dentries
792 * as being automount points. These will need the attentions
793 * of the daemon to instantiate them before they can be used.
789 */ 794 */
790 if (!(flags & LOOKUP_FOLLOW) && 795 if (!(flags & (LOOKUP_CONTINUE | LOOKUP_DIRECTORY |
791 !(flags & (LOOKUP_CONTINUE | LOOKUP_DIRECTORY | 796 LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_AUTOMOUNT)) &&
792 LOOKUP_OPEN | LOOKUP_CREATE))) 797 path->dentry->d_inode)
793 return -EISDIR; 798 return -EISDIR;
794 799
795 current->total_link_count++; 800 current->total_link_count++;
@@ -905,7 +910,7 @@ static int follow_managed(struct path *path, unsigned flags)
905 mntput(path->mnt); 910 mntput(path->mnt);
906 if (ret == -EISDIR) 911 if (ret == -EISDIR)
907 ret = 0; 912 ret = 0;
908 return ret; 913 return ret < 0 ? ret : need_mntput;
909} 914}
910 915
911int follow_down_one(struct path *path) 916int follow_down_one(struct path *path)
@@ -953,6 +958,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
953 break; 958 break;
954 path->mnt = mounted; 959 path->mnt = mounted;
955 path->dentry = mounted->mnt_root; 960 path->dentry = mounted->mnt_root;
961 nd->flags |= LOOKUP_JUMPED;
956 nd->seq = read_seqcount_begin(&path->dentry->d_seq); 962 nd->seq = read_seqcount_begin(&path->dentry->d_seq);
957 /* 963 /*
958 * Update the inode too. We don't need to re-check the 964 * Update the inode too. We don't need to re-check the
@@ -1227,6 +1233,8 @@ retry:
1227 path_put_conditional(path, nd); 1233 path_put_conditional(path, nd);
1228 return err; 1234 return err;
1229 } 1235 }
1236 if (err)
1237 nd->flags |= LOOKUP_JUMPED;
1230 *inode = path->dentry->d_inode; 1238 *inode = path->dentry->d_inode;
1231 return 0; 1239 return 0;
1232} 1240}
@@ -1747,11 +1755,11 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
1747 return __lookup_hash(&this, base, NULL); 1755 return __lookup_hash(&this, base, NULL);
1748} 1756}
1749 1757
1750int user_path_at(int dfd, const char __user *name, unsigned flags, 1758int user_path_at_empty(int dfd, const char __user *name, unsigned flags,
1751 struct path *path) 1759 struct path *path, int *empty)
1752{ 1760{
1753 struct nameidata nd; 1761 struct nameidata nd;
1754 char *tmp = getname_flags(name, flags); 1762 char *tmp = getname_flags(name, flags, empty);
1755 int err = PTR_ERR(tmp); 1763 int err = PTR_ERR(tmp);
1756 if (!IS_ERR(tmp)) { 1764 if (!IS_ERR(tmp)) {
1757 1765
@@ -1765,6 +1773,12 @@ int user_path_at(int dfd, const char __user *name, unsigned flags,
1765 return err; 1773 return err;
1766} 1774}
1767 1775
1776int user_path_at(int dfd, const char __user *name, unsigned flags,
1777 struct path *path)
1778{
1779 return user_path_at_empty(dfd, name, flags, path, 0);
1780}
1781
1768static int user_path_parent(int dfd, const char __user *path, 1782static int user_path_parent(int dfd, const char __user *path,
1769 struct nameidata *nd, char **name) 1783 struct nameidata *nd, char **name)
1770{ 1784{
@@ -2095,7 +2109,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
2095 /* sayonara */ 2109 /* sayonara */
2096 error = complete_walk(nd); 2110 error = complete_walk(nd);
2097 if (error) 2111 if (error)
2098 return ERR_PTR(-ECHILD); 2112 return ERR_PTR(error);
2099 2113
2100 error = -ENOTDIR; 2114 error = -ENOTDIR;
2101 if (nd->flags & LOOKUP_DIRECTORY) { 2115 if (nd->flags & LOOKUP_DIRECTORY) {
@@ -2107,6 +2121,10 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
2107 } 2121 }
2108 2122
2109 /* create side of things */ 2123 /* create side of things */
2124 /*
2125 * This will *only* deal with leaving RCU mode - LOOKUP_JUMPED has been
2126 * cleared when we got to the last component we are about to look up
2127 */
2110 error = complete_walk(nd); 2128 error = complete_walk(nd);
2111 if (error) 2129 if (error)
2112 return ERR_PTR(error); 2130 return ERR_PTR(error);
@@ -2175,6 +2193,9 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
2175 if (error < 0) 2193 if (error < 0)
2176 goto exit_dput; 2194 goto exit_dput;
2177 2195
2196 if (error)
2197 nd->flags |= LOOKUP_JUMPED;
2198
2178 error = -ENOENT; 2199 error = -ENOENT;
2179 if (!path->dentry->d_inode) 2200 if (!path->dentry->d_inode)
2180 goto exit_dput; 2201 goto exit_dput;
@@ -2184,6 +2205,10 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
2184 2205
2185 path_to_nameidata(path, nd); 2206 path_to_nameidata(path, nd);
2186 nd->inode = path->dentry->d_inode; 2207 nd->inode = path->dentry->d_inode;
2208 /* Why this, you ask? _Now_ we might have grown LOOKUP_JUMPED... */
2209 error = complete_walk(nd);
2210 if (error)
2211 return ERR_PTR(error);
2187 error = -EISDIR; 2212 error = -EISDIR;
2188 if (S_ISDIR(nd->inode->i_mode)) 2213 if (S_ISDIR(nd->inode->i_mode))
2189 goto exit; 2214 goto exit;
@@ -2582,6 +2607,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
2582 if (!dir->i_op->rmdir) 2607 if (!dir->i_op->rmdir)
2583 return -EPERM; 2608 return -EPERM;
2584 2609
2610 dget(dentry);
2585 mutex_lock(&dentry->d_inode->i_mutex); 2611 mutex_lock(&dentry->d_inode->i_mutex);
2586 2612
2587 error = -EBUSY; 2613 error = -EBUSY;
@@ -2602,6 +2628,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
2602 2628
2603out: 2629out:
2604 mutex_unlock(&dentry->d_inode->i_mutex); 2630 mutex_unlock(&dentry->d_inode->i_mutex);
2631 dput(dentry);
2605 if (!error) 2632 if (!error)
2606 d_delete(dentry); 2633 d_delete(dentry);
2607 return error; 2634 return error;
@@ -3005,6 +3032,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
3005 if (error) 3032 if (error)
3006 return error; 3033 return error;
3007 3034
3035 dget(new_dentry);
3008 if (target) 3036 if (target)
3009 mutex_lock(&target->i_mutex); 3037 mutex_lock(&target->i_mutex);
3010 3038
@@ -3025,6 +3053,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
3025out: 3053out:
3026 if (target) 3054 if (target)
3027 mutex_unlock(&target->i_mutex); 3055 mutex_unlock(&target->i_mutex);
3056 dput(new_dentry);
3028 if (!error) 3057 if (!error)
3029 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) 3058 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
3030 d_move(old_dentry,new_dentry); 3059 d_move(old_dentry,new_dentry);
diff --git a/fs/namespace.c b/fs/namespace.c
index fe59bd145d2..b3d8f51c6fa 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1048,15 +1048,12 @@ static int show_mountinfo(struct seq_file *m, void *v)
1048 if (err) 1048 if (err)
1049 goto out; 1049 goto out;
1050 seq_putc(m, ' '); 1050 seq_putc(m, ' ');
1051 seq_path_root(m, &mnt_path, &root, " \t\n\\"); 1051
1052 if (root.mnt != p->root.mnt || root.dentry != p->root.dentry) { 1052 /* mountpoints outside of chroot jail will give SEQ_SKIP on this */
1053 /* 1053 err = seq_path_root(m, &mnt_path, &root, " \t\n\\");
1054 * Mountpoint is outside root, discard that one. Ugly, 1054 if (err)
1055 * but less so than trying to do that in iterator in a 1055 goto out;
1056 * race-free way (due to renames). 1056
1057 */
1058 return SEQ_SKIP;
1059 }
1060 seq_puts(m, mnt->mnt_flags & MNT_READONLY ? " ro" : " rw"); 1057 seq_puts(m, mnt->mnt_flags & MNT_READONLY ? " ro" : " rw");
1061 show_mnt_opts(m, mnt); 1058 show_mnt_opts(m, mnt);
1062 1059
@@ -1109,6 +1106,7 @@ static int show_vfsstat(struct seq_file *m, void *v)
1109 1106
1110 /* device */ 1107 /* device */
1111 if (mnt->mnt_sb->s_op->show_devname) { 1108 if (mnt->mnt_sb->s_op->show_devname) {
1109 seq_puts(m, "device ");
1112 err = mnt->mnt_sb->s_op->show_devname(m, mnt); 1110 err = mnt->mnt_sb->s_op->show_devname(m, mnt);
1113 } else { 1111 } else {
1114 if (mnt->mnt_devname) { 1112 if (mnt->mnt_devname) {
@@ -1246,8 +1244,9 @@ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
1246 list_del_init(&p->mnt_expire); 1244 list_del_init(&p->mnt_expire);
1247 list_del_init(&p->mnt_list); 1245 list_del_init(&p->mnt_list);
1248 __touch_mnt_namespace(p->mnt_ns); 1246 __touch_mnt_namespace(p->mnt_ns);
1247 if (p->mnt_ns)
1248 __mnt_make_shortterm(p);
1249 p->mnt_ns = NULL; 1249 p->mnt_ns = NULL;
1250 __mnt_make_shortterm(p);
1251 list_del_init(&p->mnt_child); 1250 list_del_init(&p->mnt_child);
1252 if (p->mnt_parent != p) { 1251 if (p->mnt_parent != p) {
1253 p->mnt_parent->mnt_ghosts++; 1252 p->mnt_parent->mnt_ghosts++;
@@ -1757,7 +1756,7 @@ static int do_loopback(struct path *path, char *old_name,
1757 return err; 1756 return err;
1758 if (!old_name || !*old_name) 1757 if (!old_name || !*old_name)
1759 return -EINVAL; 1758 return -EINVAL;
1760 err = kern_path(old_name, LOOKUP_FOLLOW, &old_path); 1759 err = kern_path(old_name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &old_path);
1761 if (err) 1760 if (err)
1762 return err; 1761 return err;
1763 1762
@@ -2724,3 +2723,8 @@ struct vfsmount *kern_mount_data(struct file_system_type *type, void *data)
2724 return vfs_kern_mount(type, MS_KERNMOUNT, type->name, data); 2723 return vfs_kern_mount(type, MS_KERNMOUNT, type->name, data);
2725} 2724}
2726EXPORT_SYMBOL_GPL(kern_mount_data); 2725EXPORT_SYMBOL_GPL(kern_mount_data);
2726
2727bool our_mnt(struct vfsmount *mnt)
2728{
2729 return check_mnt(mnt);
2730}
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index b257383bb56..07df5f1d85e 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -38,6 +38,7 @@ enum nfs4_callback_opnum {
38struct cb_process_state { 38struct cb_process_state {
39 __be32 drc_status; 39 __be32 drc_status;
40 struct nfs_client *clp; 40 struct nfs_client *clp;
41 int slotid;
41}; 42};
42 43
43struct cb_compound_hdr_arg { 44struct cb_compound_hdr_arg {
@@ -166,7 +167,6 @@ extern unsigned nfs4_callback_layoutrecall(
166 void *dummy, struct cb_process_state *cps); 167 void *dummy, struct cb_process_state *cps);
167 168
168extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses); 169extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses);
169extern void nfs4_cb_take_slot(struct nfs_client *clp);
170 170
171struct cb_devicenotifyitem { 171struct cb_devicenotifyitem {
172 uint32_t cbd_notify_type; 172 uint32_t cbd_notify_type;
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index d4d1954e9bb..b5c826e17b6 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -324,7 +324,7 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args)
324 dprintk("%s enter. slotid %d seqid %d\n", 324 dprintk("%s enter. slotid %d seqid %d\n",
325 __func__, args->csa_slotid, args->csa_sequenceid); 325 __func__, args->csa_slotid, args->csa_sequenceid);
326 326
327 if (args->csa_slotid > NFS41_BC_MAX_CALLBACKS) 327 if (args->csa_slotid >= NFS41_BC_MAX_CALLBACKS)
328 return htonl(NFS4ERR_BADSLOT); 328 return htonl(NFS4ERR_BADSLOT);
329 329
330 slot = tbl->slots + args->csa_slotid; 330 slot = tbl->slots + args->csa_slotid;
@@ -333,7 +333,7 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args)
333 /* Normal */ 333 /* Normal */
334 if (likely(args->csa_sequenceid == slot->seq_nr + 1)) { 334 if (likely(args->csa_sequenceid == slot->seq_nr + 1)) {
335 slot->seq_nr++; 335 slot->seq_nr++;
336 return htonl(NFS4_OK); 336 goto out_ok;
337 } 337 }
338 338
339 /* Replay */ 339 /* Replay */
@@ -352,11 +352,14 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args)
352 /* Wraparound */ 352 /* Wraparound */
353 if (args->csa_sequenceid == 1 && (slot->seq_nr + 1) == 0) { 353 if (args->csa_sequenceid == 1 && (slot->seq_nr + 1) == 0) {
354 slot->seq_nr = 1; 354 slot->seq_nr = 1;
355 return htonl(NFS4_OK); 355 goto out_ok;
356 } 356 }
357 357
358 /* Misordered request */ 358 /* Misordered request */
359 return htonl(NFS4ERR_SEQ_MISORDERED); 359 return htonl(NFS4ERR_SEQ_MISORDERED);
360out_ok:
361 tbl->highest_used_slotid = args->csa_slotid;
362 return htonl(NFS4_OK);
360} 363}
361 364
362/* 365/*
@@ -418,26 +421,37 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
418 struct cb_sequenceres *res, 421 struct cb_sequenceres *res,
419 struct cb_process_state *cps) 422 struct cb_process_state *cps)
420{ 423{
424 struct nfs4_slot_table *tbl;
421 struct nfs_client *clp; 425 struct nfs_client *clp;
422 int i; 426 int i;
423 __be32 status = htonl(NFS4ERR_BADSESSION); 427 __be32 status = htonl(NFS4ERR_BADSESSION);
424 428
425 cps->clp = NULL;
426
427 clp = nfs4_find_client_sessionid(args->csa_addr, &args->csa_sessionid); 429 clp = nfs4_find_client_sessionid(args->csa_addr, &args->csa_sessionid);
428 if (clp == NULL) 430 if (clp == NULL)
429 goto out; 431 goto out;
430 432
433 tbl = &clp->cl_session->bc_slot_table;
434
435 spin_lock(&tbl->slot_tbl_lock);
431 /* state manager is resetting the session */ 436 /* state manager is resetting the session */
432 if (test_bit(NFS4_SESSION_DRAINING, &clp->cl_session->session_state)) { 437 if (test_bit(NFS4_SESSION_DRAINING, &clp->cl_session->session_state)) {
433 status = NFS4ERR_DELAY; 438 spin_unlock(&tbl->slot_tbl_lock);
439 status = htonl(NFS4ERR_DELAY);
440 /* Return NFS4ERR_BADSESSION if we're draining the session
441 * in order to reset it.
442 */
443 if (test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state))
444 status = htonl(NFS4ERR_BADSESSION);
434 goto out; 445 goto out;
435 } 446 }
436 447
437 status = validate_seqid(&clp->cl_session->bc_slot_table, args); 448 status = validate_seqid(&clp->cl_session->bc_slot_table, args);
449 spin_unlock(&tbl->slot_tbl_lock);
438 if (status) 450 if (status)
439 goto out; 451 goto out;
440 452
453 cps->slotid = args->csa_slotid;
454
441 /* 455 /*
442 * Check for pending referring calls. If a match is found, a 456 * Check for pending referring calls. If a match is found, a
443 * related callback was received before the response to the original 457 * related callback was received before the response to the original
@@ -454,7 +468,6 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
454 res->csr_slotid = args->csa_slotid; 468 res->csr_slotid = args->csa_slotid;
455 res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; 469 res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
456 res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; 470 res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
457 nfs4_cb_take_slot(clp);
458 471
459out: 472out:
460 cps->clp = clp; /* put in nfs4_callback_compound */ 473 cps->clp = clp; /* put in nfs4_callback_compound */
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index c6c86a77e04..918ad647afe 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -754,26 +754,15 @@ static void nfs4_callback_free_slot(struct nfs4_session *session)
754 * Let the state manager know callback processing done. 754 * Let the state manager know callback processing done.
755 * A single slot, so highest used slotid is either 0 or -1 755 * A single slot, so highest used slotid is either 0 or -1
756 */ 756 */
757 tbl->highest_used_slotid--; 757 tbl->highest_used_slotid = -1;
758 nfs4_check_drain_bc_complete(session); 758 nfs4_check_drain_bc_complete(session);
759 spin_unlock(&tbl->slot_tbl_lock); 759 spin_unlock(&tbl->slot_tbl_lock);
760} 760}
761 761
762static void nfs4_cb_free_slot(struct nfs_client *clp) 762static void nfs4_cb_free_slot(struct cb_process_state *cps)
763{ 763{
764 if (clp && clp->cl_session) 764 if (cps->slotid != -1)
765 nfs4_callback_free_slot(clp->cl_session); 765 nfs4_callback_free_slot(cps->clp->cl_session);
766}
767
768/* A single slot, so highest used slotid is either 0 or -1 */
769void nfs4_cb_take_slot(struct nfs_client *clp)
770{
771 struct nfs4_slot_table *tbl = &clp->cl_session->bc_slot_table;
772
773 spin_lock(&tbl->slot_tbl_lock);
774 tbl->highest_used_slotid++;
775 BUG_ON(tbl->highest_used_slotid != 0);
776 spin_unlock(&tbl->slot_tbl_lock);
777} 766}
778 767
779#else /* CONFIG_NFS_V4_1 */ 768#else /* CONFIG_NFS_V4_1 */
@@ -784,7 +773,7 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op)
784 return htonl(NFS4ERR_MINOR_VERS_MISMATCH); 773 return htonl(NFS4ERR_MINOR_VERS_MISMATCH);
785} 774}
786 775
787static void nfs4_cb_free_slot(struct nfs_client *clp) 776static void nfs4_cb_free_slot(struct cb_process_state *cps)
788{ 777{
789} 778}
790#endif /* CONFIG_NFS_V4_1 */ 779#endif /* CONFIG_NFS_V4_1 */
@@ -866,6 +855,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
866 struct cb_process_state cps = { 855 struct cb_process_state cps = {
867 .drc_status = 0, 856 .drc_status = 0,
868 .clp = NULL, 857 .clp = NULL,
858 .slotid = -1,
869 }; 859 };
870 unsigned int nops = 0; 860 unsigned int nops = 0;
871 861
@@ -906,7 +896,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
906 896
907 *hdr_res.status = status; 897 *hdr_res.status = status;
908 *hdr_res.nops = htonl(nops); 898 *hdr_res.nops = htonl(nops);
909 nfs4_cb_free_slot(cps.clp); 899 nfs4_cb_free_slot(&cps);
910 nfs_put_client(cps.clp); 900 nfs_put_client(cps.clp);
911 dprintk("%s: done, status = %u\n", __func__, ntohl(status)); 901 dprintk("%s: done, status = %u\n", __func__, ntohl(status));
912 return rpc_success; 902 return rpc_success;
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index dd25c2aec37..ecabbd8f6ee 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -398,12 +398,11 @@ int nfs_inode_return_delegation(struct inode *inode)
398 return err; 398 return err;
399} 399}
400 400
401static void nfs_mark_return_delegation(struct nfs_delegation *delegation) 401static void nfs_mark_return_delegation(struct nfs_server *server,
402 struct nfs_delegation *delegation)
402{ 403{
403 struct nfs_client *clp = NFS_SERVER(delegation->inode)->nfs_client;
404
405 set_bit(NFS_DELEGATION_RETURN, &delegation->flags); 404 set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
406 set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state); 405 set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state);
407} 406}
408 407
409/** 408/**
@@ -441,7 +440,7 @@ static void nfs_mark_return_all_delegation_types(struct nfs_server *server,
441 if ((delegation->type == (FMODE_READ|FMODE_WRITE)) && !(flags & FMODE_WRITE)) 440 if ((delegation->type == (FMODE_READ|FMODE_WRITE)) && !(flags & FMODE_WRITE))
442 continue; 441 continue;
443 if (delegation->type & flags) 442 if (delegation->type & flags)
444 nfs_mark_return_delegation(delegation); 443 nfs_mark_return_delegation(server, delegation);
445 } 444 }
446} 445}
447 446
@@ -467,6 +466,17 @@ static void nfs_delegation_run_state_manager(struct nfs_client *clp)
467 nfs4_schedule_state_manager(clp); 466 nfs4_schedule_state_manager(clp);
468} 467}
469 468
469void nfs_remove_bad_delegation(struct inode *inode)
470{
471 struct nfs_delegation *delegation;
472
473 delegation = nfs_detach_delegation(NFS_I(inode), NFS_SERVER(inode));
474 if (delegation) {
475 nfs_inode_find_state_and_recover(inode, &delegation->stateid);
476 nfs_free_delegation(delegation);
477 }
478}
479
470/** 480/**
471 * nfs_expire_all_delegation_types 481 * nfs_expire_all_delegation_types
472 * @clp: client to process 482 * @clp: client to process
@@ -508,7 +518,7 @@ static void nfs_mark_return_unreferenced_delegations(struct nfs_server *server)
508 list_for_each_entry_rcu(delegation, &server->delegations, super_list) { 518 list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
509 if (test_and_clear_bit(NFS_DELEGATION_REFERENCED, &delegation->flags)) 519 if (test_and_clear_bit(NFS_DELEGATION_REFERENCED, &delegation->flags))
510 continue; 520 continue;
511 nfs_mark_return_delegation(delegation); 521 nfs_mark_return_delegation(server, delegation);
512 } 522 }
513} 523}
514 524
@@ -539,7 +549,8 @@ void nfs_expire_unreferenced_delegations(struct nfs_client *clp)
539int nfs_async_inode_return_delegation(struct inode *inode, 549int nfs_async_inode_return_delegation(struct inode *inode,
540 const nfs4_stateid *stateid) 550 const nfs4_stateid *stateid)
541{ 551{
542 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; 552 struct nfs_server *server = NFS_SERVER(inode);
553 struct nfs_client *clp = server->nfs_client;
543 struct nfs_delegation *delegation; 554 struct nfs_delegation *delegation;
544 555
545 rcu_read_lock(); 556 rcu_read_lock();
@@ -549,7 +560,7 @@ int nfs_async_inode_return_delegation(struct inode *inode,
549 rcu_read_unlock(); 560 rcu_read_unlock();
550 return -ENOENT; 561 return -ENOENT;
551 } 562 }
552 nfs_mark_return_delegation(delegation); 563 nfs_mark_return_delegation(server, delegation);
553 rcu_read_unlock(); 564 rcu_read_unlock();
554 565
555 nfs_delegation_run_state_manager(clp); 566 nfs_delegation_run_state_manager(clp);
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index d9322e490c5..691a7960918 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -45,6 +45,7 @@ void nfs_expire_unreferenced_delegations(struct nfs_client *clp);
45void nfs_handle_cb_pathdown(struct nfs_client *clp); 45void nfs_handle_cb_pathdown(struct nfs_client *clp);
46int nfs_client_return_marked_delegations(struct nfs_client *clp); 46int nfs_client_return_marked_delegations(struct nfs_client *clp);
47int nfs_delegations_present(struct nfs_client *clp); 47int nfs_delegations_present(struct nfs_client *clp);
48void nfs_remove_bad_delegation(struct inode *inode);
48 49
49void nfs_delegation_mark_reclaim(struct nfs_client *clp); 50void nfs_delegation_mark_reclaim(struct nfs_client *clp);
50void nfs_delegation_reap_unclaimed(struct nfs_client *clp); 51void nfs_delegation_reap_unclaimed(struct nfs_client *clp);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index ededdbd0db3..462a0060173 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -134,18 +134,19 @@ const struct inode_operations nfs4_dir_inode_operations = {
134 134
135#endif /* CONFIG_NFS_V4 */ 135#endif /* CONFIG_NFS_V4 */
136 136
137static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct rpc_cred *cred) 137static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir, struct rpc_cred *cred)
138{ 138{
139 struct nfs_open_dir_context *ctx; 139 struct nfs_open_dir_context *ctx;
140 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); 140 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
141 if (ctx != NULL) { 141 if (ctx != NULL) {
142 ctx->duped = 0; 142 ctx->duped = 0;
143 ctx->attr_gencount = NFS_I(dir)->attr_gencount;
143 ctx->dir_cookie = 0; 144 ctx->dir_cookie = 0;
144 ctx->dup_cookie = 0; 145 ctx->dup_cookie = 0;
145 ctx->cred = get_rpccred(cred); 146 ctx->cred = get_rpccred(cred);
146 } else 147 return ctx;
147 ctx = ERR_PTR(-ENOMEM); 148 }
148 return ctx; 149 return ERR_PTR(-ENOMEM);
149} 150}
150 151
151static void put_nfs_open_dir_context(struct nfs_open_dir_context *ctx) 152static void put_nfs_open_dir_context(struct nfs_open_dir_context *ctx)
@@ -173,7 +174,7 @@ nfs_opendir(struct inode *inode, struct file *filp)
173 cred = rpc_lookup_cred(); 174 cred = rpc_lookup_cred();
174 if (IS_ERR(cred)) 175 if (IS_ERR(cred))
175 return PTR_ERR(cred); 176 return PTR_ERR(cred);
176 ctx = alloc_nfs_open_dir_context(cred); 177 ctx = alloc_nfs_open_dir_context(inode, cred);
177 if (IS_ERR(ctx)) { 178 if (IS_ERR(ctx)) {
178 res = PTR_ERR(ctx); 179 res = PTR_ERR(ctx);
179 goto out; 180 goto out;
@@ -323,7 +324,6 @@ int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descri
323{ 324{
324 loff_t diff = desc->file->f_pos - desc->current_index; 325 loff_t diff = desc->file->f_pos - desc->current_index;
325 unsigned int index; 326 unsigned int index;
326 struct nfs_open_dir_context *ctx = desc->file->private_data;
327 327
328 if (diff < 0) 328 if (diff < 0)
329 goto out_eof; 329 goto out_eof;
@@ -336,7 +336,6 @@ int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descri
336 index = (unsigned int)diff; 336 index = (unsigned int)diff;
337 *desc->dir_cookie = array->array[index].cookie; 337 *desc->dir_cookie = array->array[index].cookie;
338 desc->cache_entry_index = index; 338 desc->cache_entry_index = index;
339 ctx->duped = 0;
340 return 0; 339 return 0;
341out_eof: 340out_eof:
342 desc->eof = 1; 341 desc->eof = 1;
@@ -349,14 +348,33 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des
349 int i; 348 int i;
350 loff_t new_pos; 349 loff_t new_pos;
351 int status = -EAGAIN; 350 int status = -EAGAIN;
352 struct nfs_open_dir_context *ctx = desc->file->private_data;
353 351
354 for (i = 0; i < array->size; i++) { 352 for (i = 0; i < array->size; i++) {
355 if (array->array[i].cookie == *desc->dir_cookie) { 353 if (array->array[i].cookie == *desc->dir_cookie) {
354 struct nfs_inode *nfsi = NFS_I(desc->file->f_path.dentry->d_inode);
355 struct nfs_open_dir_context *ctx = desc->file->private_data;
356
356 new_pos = desc->current_index + i; 357 new_pos = desc->current_index + i;
357 if (new_pos < desc->file->f_pos) { 358 if (ctx->attr_gencount != nfsi->attr_gencount
359 || (nfsi->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA))) {
360 ctx->duped = 0;
361 ctx->attr_gencount = nfsi->attr_gencount;
362 } else if (new_pos < desc->file->f_pos) {
363 if (ctx->duped > 0
364 && ctx->dup_cookie == *desc->dir_cookie) {
365 if (printk_ratelimit()) {
366 pr_notice("NFS: directory %s/%s contains a readdir loop."
367 "Please contact your server vendor. "
368 "Offending cookie: %llu\n",
369 desc->file->f_dentry->d_parent->d_name.name,
370 desc->file->f_dentry->d_name.name,
371 *desc->dir_cookie);
372 }
373 status = -ELOOP;
374 goto out;
375 }
358 ctx->dup_cookie = *desc->dir_cookie; 376 ctx->dup_cookie = *desc->dir_cookie;
359 ctx->duped = 1; 377 ctx->duped = -1;
360 } 378 }
361 desc->file->f_pos = new_pos; 379 desc->file->f_pos = new_pos;
362 desc->cache_entry_index = i; 380 desc->cache_entry_index = i;
@@ -368,6 +386,7 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des
368 if (*desc->dir_cookie == array->last_cookie) 386 if (*desc->dir_cookie == array->last_cookie)
369 desc->eof = 1; 387 desc->eof = 1;
370 } 388 }
389out:
371 return status; 390 return status;
372} 391}
373 392
@@ -740,19 +759,6 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
740 struct nfs_cache_array *array = NULL; 759 struct nfs_cache_array *array = NULL;
741 struct nfs_open_dir_context *ctx = file->private_data; 760 struct nfs_open_dir_context *ctx = file->private_data;
742 761
743 if (ctx->duped != 0 && ctx->dup_cookie == *desc->dir_cookie) {
744 if (printk_ratelimit()) {
745 pr_notice("NFS: directory %s/%s contains a readdir loop. "
746 "Please contact your server vendor. "
747 "Offending cookie: %llu\n",
748 file->f_dentry->d_parent->d_name.name,
749 file->f_dentry->d_name.name,
750 *desc->dir_cookie);
751 }
752 res = -ELOOP;
753 goto out;
754 }
755
756 array = nfs_readdir_get_array(desc->page); 762 array = nfs_readdir_get_array(desc->page);
757 if (IS_ERR(array)) { 763 if (IS_ERR(array)) {
758 res = PTR_ERR(array); 764 res = PTR_ERR(array);
@@ -774,6 +780,8 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
774 *desc->dir_cookie = array->array[i+1].cookie; 780 *desc->dir_cookie = array->array[i+1].cookie;
775 else 781 else
776 *desc->dir_cookie = array->last_cookie; 782 *desc->dir_cookie = array->last_cookie;
783 if (ctx->duped != 0)
784 ctx->duped = 1;
777 } 785 }
778 if (array->eof_index >= 0) 786 if (array->eof_index >= 0)
779 desc->eof = 1; 787 desc->eof = 1;
@@ -805,6 +813,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
805 struct page *page = NULL; 813 struct page *page = NULL;
806 int status; 814 int status;
807 struct inode *inode = desc->file->f_path.dentry->d_inode; 815 struct inode *inode = desc->file->f_path.dentry->d_inode;
816 struct nfs_open_dir_context *ctx = desc->file->private_data;
808 817
809 dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n", 818 dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n",
810 (unsigned long long)*desc->dir_cookie); 819 (unsigned long long)*desc->dir_cookie);
@@ -818,6 +827,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
818 desc->page_index = 0; 827 desc->page_index = 0;
819 desc->last_cookie = *desc->dir_cookie; 828 desc->last_cookie = *desc->dir_cookie;
820 desc->page = page; 829 desc->page = page;
830 ctx->duped = 0;
821 831
822 status = nfs_readdir_xdr_to_array(desc, page, inode); 832 status = nfs_readdir_xdr_to_array(desc, page, inode);
823 if (status < 0) 833 if (status < 0)
@@ -1448,12 +1458,12 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
1448 res = NULL; 1458 res = NULL;
1449 goto out; 1459 goto out;
1450 /* This turned out not to be a regular file */ 1460 /* This turned out not to be a regular file */
1461 case -EISDIR:
1451 case -ENOTDIR: 1462 case -ENOTDIR:
1452 goto no_open; 1463 goto no_open;
1453 case -ELOOP: 1464 case -ELOOP:
1454 if (!(nd->intent.open.flags & O_NOFOLLOW)) 1465 if (!(nd->intent.open.flags & O_NOFOLLOW))
1455 goto no_open; 1466 goto no_open;
1456 /* case -EISDIR: */
1457 /* case -EINVAL: */ 1467 /* case -EINVAL: */
1458 default: 1468 default:
1459 res = ERR_CAST(inode); 1469 res = ERR_CAST(inode);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 2f093ed1698..6c6e2c46122 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -493,8 +493,11 @@ static int nfs_release_page(struct page *page, gfp_t gfp)
493 493
494 dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page); 494 dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page);
495 495
496 /* Only do I/O if gfp is a superset of GFP_KERNEL */ 496 /* Only do I/O if gfp is a superset of GFP_KERNEL, and we're not
497 if (mapping && (gfp & GFP_KERNEL) == GFP_KERNEL) { 497 * doing this memory reclaim for a fs-related allocation.
498 */
499 if (mapping && (gfp & GFP_KERNEL) == GFP_KERNEL &&
500 !(current->flags & PF_FSTRANS)) {
498 int how = FLUSH_SYNC; 501 int how = FLUSH_SYNC;
499 502
500 /* Don't let kswapd deadlock waiting for OOM RPC calls */ 503 /* Don't let kswapd deadlock waiting for OOM RPC calls */
@@ -887,3 +890,35 @@ static int nfs_setlease(struct file *file, long arg, struct file_lock **fl)
887 file->f_path.dentry->d_name.name, arg); 890 file->f_path.dentry->d_name.name, arg);
888 return -EINVAL; 891 return -EINVAL;
889} 892}
893
894#ifdef CONFIG_NFS_V4
895static int
896nfs4_file_open(struct inode *inode, struct file *filp)
897{
898 /*
899 * NFSv4 opens are handled in d_lookup and d_revalidate. If we get to
900 * this point, then something is very wrong
901 */
902 dprintk("NFS: %s called! inode=%p filp=%p\n", __func__, inode, filp);
903 return -ENOTDIR;
904}
905
906const struct file_operations nfs4_file_operations = {
907 .llseek = nfs_file_llseek,
908 .read = do_sync_read,
909 .write = do_sync_write,
910 .aio_read = nfs_file_read,
911 .aio_write = nfs_file_write,
912 .mmap = nfs_file_mmap,
913 .open = nfs4_file_open,
914 .flush = nfs_file_flush,
915 .release = nfs_file_release,
916 .fsync = nfs_file_fsync,
917 .lock = nfs_lock,
918 .flock = nfs_flock,
919 .splice_read = nfs_file_splice_read,
920 .splice_write = nfs_file_splice_write,
921 .check_flags = nfs_check_flags,
922 .setlease = nfs_setlease,
923};
924#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 6f4850deb27..c48f9f6ad72 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -291,7 +291,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
291 */ 291 */
292 inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->file_inode_ops; 292 inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->file_inode_ops;
293 if (S_ISREG(inode->i_mode)) { 293 if (S_ISREG(inode->i_mode)) {
294 inode->i_fop = &nfs_file_operations; 294 inode->i_fop = NFS_SB(sb)->nfs_client->rpc_ops->file_ops;
295 inode->i_data.a_ops = &nfs_file_aops; 295 inode->i_data.a_ops = &nfs_file_aops;
296 inode->i_data.backing_dev_info = &NFS_SB(sb)->backing_dev_info; 296 inode->i_data.backing_dev_info = &NFS_SB(sb)->backing_dev_info;
297 } else if (S_ISDIR(inode->i_mode)) { 297 } else if (S_ISDIR(inode->i_mode)) {
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 2a55347a2da..4f10d8188ab 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -315,7 +315,7 @@ void nfs_commit_release_pages(struct nfs_write_data *data);
315 315
316#ifdef CONFIG_MIGRATION 316#ifdef CONFIG_MIGRATION
317extern int nfs_migrate_page(struct address_space *, 317extern int nfs_migrate_page(struct address_space *,
318 struct page *, struct page *); 318 struct page *, struct page *, enum migrate_mode);
319#else 319#else
320#define nfs_migrate_page NULL 320#define nfs_migrate_page NULL
321#endif 321#endif
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 38053d823eb..771741f1479 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -853,6 +853,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
853 .dentry_ops = &nfs_dentry_operations, 853 .dentry_ops = &nfs_dentry_operations,
854 .dir_inode_ops = &nfs3_dir_inode_operations, 854 .dir_inode_ops = &nfs3_dir_inode_operations,
855 .file_inode_ops = &nfs3_file_inode_operations, 855 .file_inode_ops = &nfs3_file_inode_operations,
856 .file_ops = &nfs_file_operations,
856 .getroot = nfs3_proc_get_root, 857 .getroot = nfs3_proc_get_root,
857 .getattr = nfs3_proc_getattr, 858 .getattr = nfs3_proc_getattr,
858 .setattr = nfs3_proc_setattr, 859 .setattr = nfs3_proc_setattr,
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index c4a69833dd0..e1c1365ba83 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -209,6 +209,7 @@ struct nfs4_exception {
209 long timeout; 209 long timeout;
210 int retry; 210 int retry;
211 struct nfs4_state *state; 211 struct nfs4_state *state;
212 struct inode *inode;
212}; 213};
213 214
214struct nfs4_state_recovery_ops { 215struct nfs4_state_recovery_ops {
@@ -344,6 +345,8 @@ extern void nfs4_put_open_state(struct nfs4_state *);
344extern void nfs4_close_state(struct path *, struct nfs4_state *, fmode_t); 345extern void nfs4_close_state(struct path *, struct nfs4_state *, fmode_t);
345extern void nfs4_close_sync(struct path *, struct nfs4_state *, fmode_t); 346extern void nfs4_close_sync(struct path *, struct nfs4_state *, fmode_t);
346extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t); 347extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t);
348extern void nfs_inode_find_state_and_recover(struct inode *inode,
349 const nfs4_stateid *stateid);
347extern void nfs4_schedule_lease_recovery(struct nfs_client *); 350extern void nfs4_schedule_lease_recovery(struct nfs_client *);
348extern void nfs4_schedule_state_manager(struct nfs_client *); 351extern void nfs4_schedule_state_manager(struct nfs_client *);
349extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *); 352extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *);
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index f9d03abcd04..75af81211e4 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -170,7 +170,7 @@ filelayout_set_layoutcommit(struct nfs_write_data *wdata)
170 170
171 pnfs_set_layoutcommit(wdata); 171 pnfs_set_layoutcommit(wdata);
172 dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, wdata->inode->i_ino, 172 dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, wdata->inode->i_ino,
173 (unsigned long) wdata->lseg->pls_end_pos); 173 (unsigned long) NFS_I(wdata->inode)->layout->plh_lwb);
174} 174}
175 175
176/* 176/*
@@ -428,6 +428,14 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
428 428
429 dprintk("--> %s\n", __func__); 429 dprintk("--> %s\n", __func__);
430 430
431 /* FIXME: remove this check when layout segment support is added */
432 if (lgr->range.offset != 0 ||
433 lgr->range.length != NFS4_MAX_UINT64) {
434 dprintk("%s Only whole file layouts supported. Use MDS i/o\n",
435 __func__);
436 goto out;
437 }
438
431 if (fl->pattern_offset > lgr->range.offset) { 439 if (fl->pattern_offset > lgr->range.offset) {
432 dprintk("%s pattern_offset %lld too large\n", 440 dprintk("%s pattern_offset %lld too large\n",
433 __func__, fl->pattern_offset); 441 __func__, fl->pattern_offset);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 5879b23e0c9..b7a7e5fe401 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -94,6 +94,8 @@ static int nfs4_map_errors(int err)
94 case -NFS4ERR_BADOWNER: 94 case -NFS4ERR_BADOWNER:
95 case -NFS4ERR_BADNAME: 95 case -NFS4ERR_BADNAME:
96 return -EINVAL; 96 return -EINVAL;
97 case -NFS4ERR_SHARE_DENIED:
98 return -EACCES;
97 default: 99 default:
98 dprintk("%s could not handle NFSv4 error %d\n", 100 dprintk("%s could not handle NFSv4 error %d\n",
99 __func__, -err); 101 __func__, -err);
@@ -254,15 +256,28 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc
254{ 256{
255 struct nfs_client *clp = server->nfs_client; 257 struct nfs_client *clp = server->nfs_client;
256 struct nfs4_state *state = exception->state; 258 struct nfs4_state *state = exception->state;
259 struct inode *inode = exception->inode;
257 int ret = errorcode; 260 int ret = errorcode;
258 261
259 exception->retry = 0; 262 exception->retry = 0;
260 switch(errorcode) { 263 switch(errorcode) {
261 case 0: 264 case 0:
262 return 0; 265 return 0;
266 case -NFS4ERR_OPENMODE:
267 if (nfs_have_delegation(inode, FMODE_READ)) {
268 nfs_inode_return_delegation(inode);
269 exception->retry = 1;
270 return 0;
271 }
272 if (state == NULL)
273 break;
274 nfs4_schedule_stateid_recovery(server, state);
275 goto wait_on_recovery;
276 case -NFS4ERR_DELEG_REVOKED:
263 case -NFS4ERR_ADMIN_REVOKED: 277 case -NFS4ERR_ADMIN_REVOKED:
264 case -NFS4ERR_BAD_STATEID: 278 case -NFS4ERR_BAD_STATEID:
265 case -NFS4ERR_OPENMODE: 279 if (state != NULL)
280 nfs_remove_bad_delegation(state->inode);
266 if (state == NULL) 281 if (state == NULL)
267 break; 282 break;
268 nfs4_schedule_stateid_recovery(server, state); 283 nfs4_schedule_stateid_recovery(server, state);
@@ -1305,8 +1320,11 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
1305 * The show must go on: exit, but mark the 1320 * The show must go on: exit, but mark the
1306 * stateid as needing recovery. 1321 * stateid as needing recovery.
1307 */ 1322 */
1323 case -NFS4ERR_DELEG_REVOKED:
1308 case -NFS4ERR_ADMIN_REVOKED: 1324 case -NFS4ERR_ADMIN_REVOKED:
1309 case -NFS4ERR_BAD_STATEID: 1325 case -NFS4ERR_BAD_STATEID:
1326 nfs_inode_find_state_and_recover(state->inode,
1327 stateid);
1310 nfs4_schedule_stateid_recovery(server, state); 1328 nfs4_schedule_stateid_recovery(server, state);
1311 case -EKEYEXPIRED: 1329 case -EKEYEXPIRED:
1312 /* 1330 /*
@@ -1755,6 +1773,7 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, in
1755 nfs_setattr_update_inode(state->inode, sattr); 1773 nfs_setattr_update_inode(state->inode, sattr);
1756 nfs_post_op_update_inode(state->inode, opendata->o_res.f_attr); 1774 nfs_post_op_update_inode(state->inode, opendata->o_res.f_attr);
1757 } 1775 }
1776 nfs_revalidate_inode(server, state->inode);
1758 nfs4_opendata_put(opendata); 1777 nfs4_opendata_put(opendata);
1759 nfs4_put_state_owner(sp); 1778 nfs4_put_state_owner(sp);
1760 *res = state; 1779 *res = state;
@@ -1862,7 +1881,10 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
1862 struct nfs4_state *state) 1881 struct nfs4_state *state)
1863{ 1882{
1864 struct nfs_server *server = NFS_SERVER(inode); 1883 struct nfs_server *server = NFS_SERVER(inode);
1865 struct nfs4_exception exception = { }; 1884 struct nfs4_exception exception = {
1885 .state = state,
1886 .inode = inode,
1887 };
1866 int err; 1888 int err;
1867 do { 1889 do {
1868 err = nfs4_handle_exception(server, 1890 err = nfs4_handle_exception(server,
@@ -3678,8 +3700,11 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
3678 if (task->tk_status >= 0) 3700 if (task->tk_status >= 0)
3679 return 0; 3701 return 0;
3680 switch(task->tk_status) { 3702 switch(task->tk_status) {
3703 case -NFS4ERR_DELEG_REVOKED:
3681 case -NFS4ERR_ADMIN_REVOKED: 3704 case -NFS4ERR_ADMIN_REVOKED:
3682 case -NFS4ERR_BAD_STATEID: 3705 case -NFS4ERR_BAD_STATEID:
3706 if (state != NULL)
3707 nfs_remove_bad_delegation(state->inode);
3683 case -NFS4ERR_OPENMODE: 3708 case -NFS4ERR_OPENMODE:
3684 if (state == NULL) 3709 if (state == NULL)
3685 break; 3710 break;
@@ -4402,7 +4427,9 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
4402static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request) 4427static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request)
4403{ 4428{
4404 struct nfs_server *server = NFS_SERVER(state->inode); 4429 struct nfs_server *server = NFS_SERVER(state->inode);
4405 struct nfs4_exception exception = { }; 4430 struct nfs4_exception exception = {
4431 .inode = state->inode,
4432 };
4406 int err; 4433 int err;
4407 4434
4408 do { 4435 do {
@@ -4420,7 +4447,9 @@ static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request
4420static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request) 4447static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request)
4421{ 4448{
4422 struct nfs_server *server = NFS_SERVER(state->inode); 4449 struct nfs_server *server = NFS_SERVER(state->inode);
4423 struct nfs4_exception exception = { }; 4450 struct nfs4_exception exception = {
4451 .inode = state->inode,
4452 };
4424 int err; 4453 int err;
4425 4454
4426 err = nfs4_set_lock_state(state, request); 4455 err = nfs4_set_lock_state(state, request);
@@ -4484,7 +4513,10 @@ out:
4484 4513
4485static int nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) 4514static int nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
4486{ 4515{
4487 struct nfs4_exception exception = { }; 4516 struct nfs4_exception exception = {
4517 .state = state,
4518 .inode = state->inode,
4519 };
4488 int err; 4520 int err;
4489 4521
4490 do { 4522 do {
@@ -4529,6 +4561,20 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request)
4529 4561
4530 if (state == NULL) 4562 if (state == NULL)
4531 return -ENOLCK; 4563 return -ENOLCK;
4564 /*
4565 * Don't rely on the VFS having checked the file open mode,
4566 * since it won't do this for flock() locks.
4567 */
4568 switch (request->fl_type & (F_RDLCK|F_WRLCK|F_UNLCK)) {
4569 case F_RDLCK:
4570 if (!(filp->f_mode & FMODE_READ))
4571 return -EBADF;
4572 break;
4573 case F_WRLCK:
4574 if (!(filp->f_mode & FMODE_WRITE))
4575 return -EBADF;
4576 }
4577
4532 do { 4578 do {
4533 status = nfs4_proc_setlk(state, cmd, request); 4579 status = nfs4_proc_setlk(state, cmd, request);
4534 if ((status != -EAGAIN) || IS_SETLK(cmd)) 4580 if ((status != -EAGAIN) || IS_SETLK(cmd))
@@ -4577,6 +4623,7 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
4577 * The show must go on: exit, but mark the 4623 * The show must go on: exit, but mark the
4578 * stateid as needing recovery. 4624 * stateid as needing recovery.
4579 */ 4625 */
4626 case -NFS4ERR_DELEG_REVOKED:
4580 case -NFS4ERR_ADMIN_REVOKED: 4627 case -NFS4ERR_ADMIN_REVOKED:
4581 case -NFS4ERR_BAD_STATEID: 4628 case -NFS4ERR_BAD_STATEID:
4582 case -NFS4ERR_OPENMODE: 4629 case -NFS4ERR_OPENMODE:
@@ -5850,9 +5897,15 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
5850static void nfs4_layoutcommit_release(void *calldata) 5897static void nfs4_layoutcommit_release(void *calldata)
5851{ 5898{
5852 struct nfs4_layoutcommit_data *data = calldata; 5899 struct nfs4_layoutcommit_data *data = calldata;
5900 struct pnfs_layout_segment *lseg, *tmp;
5853 5901
5854 /* Matched by references in pnfs_set_layoutcommit */ 5902 /* Matched by references in pnfs_set_layoutcommit */
5855 put_lseg(data->lseg); 5903 list_for_each_entry_safe(lseg, tmp, &data->lseg_list, pls_lc_list) {
5904 list_del_init(&lseg->pls_lc_list);
5905 if (test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT,
5906 &lseg->pls_flags))
5907 put_lseg(lseg);
5908 }
5856 put_rpccred(data->cred); 5909 put_rpccred(data->cred);
5857 kfree(data); 5910 kfree(data);
5858} 5911}
@@ -6002,6 +6055,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
6002 .dentry_ops = &nfs4_dentry_operations, 6055 .dentry_ops = &nfs4_dentry_operations,
6003 .dir_inode_ops = &nfs4_dir_inode_operations, 6056 .dir_inode_ops = &nfs4_dir_inode_operations,
6004 .file_inode_ops = &nfs4_file_inode_operations, 6057 .file_inode_ops = &nfs4_file_inode_operations,
6058 .file_ops = &nfs4_file_operations,
6005 .getroot = nfs4_proc_get_root, 6059 .getroot = nfs4_proc_get_root,
6006 .getattr = nfs4_proc_getattr, 6060 .getattr = nfs4_proc_getattr,
6007 .setattr = nfs4_proc_setattr, 6061 .setattr = nfs4_proc_setattr,
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index e97dd219f84..c6e2769f65b 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1069,6 +1069,33 @@ void nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4
1069 nfs4_schedule_state_manager(clp); 1069 nfs4_schedule_state_manager(clp);
1070} 1070}
1071 1071
1072void nfs_inode_find_state_and_recover(struct inode *inode,
1073 const nfs4_stateid *stateid)
1074{
1075 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
1076 struct nfs_inode *nfsi = NFS_I(inode);
1077 struct nfs_open_context *ctx;
1078 struct nfs4_state *state;
1079 bool found = false;
1080
1081 spin_lock(&inode->i_lock);
1082 list_for_each_entry(ctx, &nfsi->open_files, list) {
1083 state = ctx->state;
1084 if (state == NULL)
1085 continue;
1086 if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
1087 continue;
1088 if (memcmp(state->stateid.data, stateid->data, sizeof(state->stateid.data)) != 0)
1089 continue;
1090 nfs4_state_mark_reclaim_nograce(clp, state);
1091 found = true;
1092 }
1093 spin_unlock(&inode->i_lock);
1094 if (found)
1095 nfs4_schedule_state_manager(clp);
1096}
1097
1098
1072static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops) 1099static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops)
1073{ 1100{
1074 struct inode *inode = state->inode; 1101 struct inode *inode = state->inode;
@@ -1519,16 +1546,16 @@ void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags)
1519{ 1546{
1520 if (!flags) 1547 if (!flags)
1521 return; 1548 return;
1522 else if (flags & SEQ4_STATUS_RESTART_RECLAIM_NEEDED) 1549 if (flags & SEQ4_STATUS_RESTART_RECLAIM_NEEDED)
1523 nfs41_handle_server_reboot(clp); 1550 nfs41_handle_server_reboot(clp);
1524 else if (flags & (SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED | 1551 if (flags & (SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED |
1525 SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED | 1552 SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED |
1526 SEQ4_STATUS_ADMIN_STATE_REVOKED | 1553 SEQ4_STATUS_ADMIN_STATE_REVOKED |
1527 SEQ4_STATUS_LEASE_MOVED)) 1554 SEQ4_STATUS_LEASE_MOVED))
1528 nfs41_handle_state_revoked(clp); 1555 nfs41_handle_state_revoked(clp);
1529 else if (flags & SEQ4_STATUS_RECALLABLE_STATE_REVOKED) 1556 if (flags & SEQ4_STATUS_RECALLABLE_STATE_REVOKED)
1530 nfs41_handle_recallable_state_revoked(clp); 1557 nfs41_handle_recallable_state_revoked(clp);
1531 else if (flags & (SEQ4_STATUS_CB_PATH_DOWN | 1558 if (flags & (SEQ4_STATUS_CB_PATH_DOWN |
1532 SEQ4_STATUS_BACKCHANNEL_FAULT | 1559 SEQ4_STATUS_BACKCHANNEL_FAULT |
1533 SEQ4_STATUS_CB_PATH_DOWN_SESSION)) 1560 SEQ4_STATUS_CB_PATH_DOWN_SESSION))
1534 nfs41_handle_cb_path_down(clp); 1561 nfs41_handle_cb_path_down(clp);
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index e6e8f3b9a1d..fc97fd5399a 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1888,7 +1888,7 @@ encode_layoutcommit(struct xdr_stream *xdr,
1888 *p++ = cpu_to_be32(OP_LAYOUTCOMMIT); 1888 *p++ = cpu_to_be32(OP_LAYOUTCOMMIT);
1889 /* Only whole file layouts */ 1889 /* Only whole file layouts */
1890 p = xdr_encode_hyper(p, 0); /* offset */ 1890 p = xdr_encode_hyper(p, 0); /* offset */
1891 p = xdr_encode_hyper(p, NFS4_MAX_UINT64); /* length */ 1891 p = xdr_encode_hyper(p, args->lastbytewritten + 1); /* length */
1892 *p++ = cpu_to_be32(0); /* reclaim */ 1892 *p++ = cpu_to_be32(0); /* reclaim */
1893 p = xdr_encode_opaque_fixed(p, args->stateid.data, NFS4_STATEID_SIZE); 1893 p = xdr_encode_opaque_fixed(p, args->stateid.data, NFS4_STATEID_SIZE);
1894 *p++ = cpu_to_be32(1); /* newoffset = TRUE */ 1894 *p++ = cpu_to_be32(1); /* newoffset = TRUE */
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 8ff2ea3f10e..75fe694d78d 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -479,7 +479,6 @@ static int _io_check(struct objio_state *ios, bool is_write)
479 for (i = 0; i < ios->numdevs; i++) { 479 for (i = 0; i < ios->numdevs; i++) {
480 struct osd_sense_info osi; 480 struct osd_sense_info osi;
481 struct osd_request *or = ios->per_dev[i].or; 481 struct osd_request *or = ios->per_dev[i].or;
482 unsigned dev;
483 int ret; 482 int ret;
484 483
485 if (!or) 484 if (!or)
@@ -500,9 +499,8 @@ static int _io_check(struct objio_state *ios, bool is_write)
500 499
501 continue; /* we recovered */ 500 continue; /* we recovered */
502 } 501 }
503 dev = ios->per_dev[i].dev; 502 objlayout_io_set_result(&ios->ol_state, i,
504 objlayout_io_set_result(&ios->ol_state, dev, 503 &ios->layout->comps[i].oc_object_id,
505 &ios->layout->comps[dev].oc_object_id,
506 osd_pri_2_pnfs_err(osi.osd_err_pri), 504 osd_pri_2_pnfs_err(osi.osd_err_pri),
507 ios->per_dev[i].offset, 505 ios->per_dev[i].offset,
508 ios->per_dev[i].length, 506 ios->per_dev[i].length,
@@ -589,22 +587,19 @@ static void _calc_stripe_info(struct objio_state *ios, u64 file_offset,
589} 587}
590 588
591static int _add_stripe_unit(struct objio_state *ios, unsigned *cur_pg, 589static int _add_stripe_unit(struct objio_state *ios, unsigned *cur_pg,
592 unsigned pgbase, struct _objio_per_comp *per_dev, int cur_len, 590 unsigned pgbase, struct _objio_per_comp *per_dev, int len,
593 gfp_t gfp_flags) 591 gfp_t gfp_flags)
594{ 592{
595 unsigned pg = *cur_pg; 593 unsigned pg = *cur_pg;
594 int cur_len = len;
596 struct request_queue *q = 595 struct request_queue *q =
597 osd_request_queue(_io_od(ios, per_dev->dev)); 596 osd_request_queue(_io_od(ios, per_dev->dev));
598 597
599 per_dev->length += cur_len;
600
601 if (per_dev->bio == NULL) { 598 if (per_dev->bio == NULL) {
602 unsigned stripes = ios->layout->num_comps / 599 unsigned pages_in_stripe = ios->layout->group_width *
603 ios->layout->mirrors_p1;
604 unsigned pages_in_stripe = stripes *
605 (ios->layout->stripe_unit / PAGE_SIZE); 600 (ios->layout->stripe_unit / PAGE_SIZE);
606 unsigned bio_size = (ios->ol_state.nr_pages + pages_in_stripe) / 601 unsigned bio_size = (ios->ol_state.nr_pages + pages_in_stripe) /
607 stripes; 602 ios->layout->group_width;
608 603
609 if (BIO_MAX_PAGES_KMALLOC < bio_size) 604 if (BIO_MAX_PAGES_KMALLOC < bio_size)
610 bio_size = BIO_MAX_PAGES_KMALLOC; 605 bio_size = BIO_MAX_PAGES_KMALLOC;
@@ -632,6 +627,7 @@ static int _add_stripe_unit(struct objio_state *ios, unsigned *cur_pg,
632 } 627 }
633 BUG_ON(cur_len); 628 BUG_ON(cur_len);
634 629
630 per_dev->length += len;
635 *cur_pg = pg; 631 *cur_pg = pg;
636 return 0; 632 return 0;
637} 633}
@@ -650,7 +646,7 @@ static int _prepare_one_group(struct objio_state *ios, u64 length,
650 int ret = 0; 646 int ret = 0;
651 647
652 while (length) { 648 while (length) {
653 struct _objio_per_comp *per_dev = &ios->per_dev[dev]; 649 struct _objio_per_comp *per_dev = &ios->per_dev[dev - first_dev];
654 unsigned cur_len, page_off = 0; 650 unsigned cur_len, page_off = 0;
655 651
656 if (!per_dev->length) { 652 if (!per_dev->length) {
@@ -670,8 +666,8 @@ static int _prepare_one_group(struct objio_state *ios, u64 length,
670 cur_len = stripe_unit; 666 cur_len = stripe_unit;
671 } 667 }
672 668
673 if (max_comp < dev) 669 if (max_comp < dev - first_dev)
674 max_comp = dev; 670 max_comp = dev - first_dev;
675 } else { 671 } else {
676 cur_len = stripe_unit; 672 cur_len = stripe_unit;
677 } 673 }
@@ -806,7 +802,7 @@ static int _read_mirrors(struct objio_state *ios, unsigned cur_comp)
806 struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp]; 802 struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp];
807 unsigned dev = per_dev->dev; 803 unsigned dev = per_dev->dev;
808 struct pnfs_osd_object_cred *cred = 804 struct pnfs_osd_object_cred *cred =
809 &ios->layout->comps[dev]; 805 &ios->layout->comps[cur_comp];
810 struct osd_obj_id obj = { 806 struct osd_obj_id obj = {
811 .partition = cred->oc_object_id.oid_partition_id, 807 .partition = cred->oc_object_id.oid_partition_id,
812 .id = cred->oc_object_id.oid_object_id, 808 .id = cred->oc_object_id.oid_object_id,
@@ -904,7 +900,7 @@ static int _write_mirrors(struct objio_state *ios, unsigned cur_comp)
904 for (; cur_comp < last_comp; ++cur_comp, ++dev) { 900 for (; cur_comp < last_comp; ++cur_comp, ++dev) {
905 struct osd_request *or = NULL; 901 struct osd_request *or = NULL;
906 struct pnfs_osd_object_cred *cred = 902 struct pnfs_osd_object_cred *cred =
907 &ios->layout->comps[dev]; 903 &ios->layout->comps[cur_comp];
908 struct osd_obj_id obj = { 904 struct osd_obj_id obj = {
909 .partition = cred->oc_object_id.oid_partition_id, 905 .partition = cred->oc_object_id.oid_partition_id,
910 .id = cred->oc_object_id.oid_object_id, 906 .id = cred->oc_object_id.oid_object_id,
@@ -1010,7 +1006,8 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio,
1010static struct pnfs_layoutdriver_type objlayout_type = { 1006static struct pnfs_layoutdriver_type objlayout_type = {
1011 .id = LAYOUT_OSD2_OBJECTS, 1007 .id = LAYOUT_OSD2_OBJECTS,
1012 .name = "LAYOUT_OSD2_OBJECTS", 1008 .name = "LAYOUT_OSD2_OBJECTS",
1013 .flags = PNFS_LAYOUTRET_ON_SETATTR, 1009 .flags = PNFS_LAYOUTRET_ON_SETATTR |
1010 PNFS_LAYOUTRET_ON_ERROR,
1014 1011
1015 .alloc_layout_hdr = objlayout_alloc_layout_hdr, 1012 .alloc_layout_hdr = objlayout_alloc_layout_hdr,
1016 .free_layout_hdr = objlayout_free_layout_hdr, 1013 .free_layout_hdr = objlayout_free_layout_hdr,
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
index 1d06f8e2ade..fefa1224aff 100644
--- a/fs/nfs/objlayout/objlayout.c
+++ b/fs/nfs/objlayout/objlayout.c
@@ -294,9 +294,11 @@ objlayout_read_done(struct objlayout_io_state *state, ssize_t status, bool sync)
294 dprintk("%s: Begin status=%zd eof=%d\n", __func__, status, eof); 294 dprintk("%s: Begin status=%zd eof=%d\n", __func__, status, eof);
295 rdata = state->rpcdata; 295 rdata = state->rpcdata;
296 rdata->task.tk_status = status; 296 rdata->task.tk_status = status;
297 if (status >= 0) { 297 if (likely(status >= 0)) {
298 rdata->res.count = status; 298 rdata->res.count = status;
299 rdata->res.eof = eof; 299 rdata->res.eof = eof;
300 } else {
301 rdata->pnfs_error = status;
300 } 302 }
301 objlayout_iodone(state); 303 objlayout_iodone(state);
302 /* must not use state after this point */ 304 /* must not use state after this point */
@@ -380,15 +382,17 @@ objlayout_write_done(struct objlayout_io_state *state, ssize_t status,
380 wdata = state->rpcdata; 382 wdata = state->rpcdata;
381 state->status = status; 383 state->status = status;
382 wdata->task.tk_status = status; 384 wdata->task.tk_status = status;
383 if (status >= 0) { 385 if (likely(status >= 0)) {
384 wdata->res.count = status; 386 wdata->res.count = status;
385 wdata->verf.committed = state->committed; 387 wdata->verf.committed = state->committed;
386 dprintk("%s: Return status %d committed %d\n", 388 dprintk("%s: Return status %d committed %d\n",
387 __func__, wdata->task.tk_status, 389 __func__, wdata->task.tk_status,
388 wdata->verf.committed); 390 wdata->verf.committed);
389 } else 391 } else {
392 wdata->pnfs_error = status;
390 dprintk("%s: Return status %d\n", 393 dprintk("%s: Return status %d\n",
391 __func__, wdata->task.tk_status); 394 __func__, wdata->task.tk_status);
395 }
392 objlayout_iodone(state); 396 objlayout_iodone(state);
393 /* must not use state after this point */ 397 /* must not use state after this point */
394 398
diff --git a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
index 16fc758e912..b3918f7ac34 100644
--- a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
+++ b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
@@ -170,6 +170,9 @@ int pnfs_osd_xdr_decode_layout_map(struct pnfs_osd_layout *layout,
170 p = _osd_xdr_decode_data_map(p, &layout->olo_map); 170 p = _osd_xdr_decode_data_map(p, &layout->olo_map);
171 layout->olo_comps_index = be32_to_cpup(p++); 171 layout->olo_comps_index = be32_to_cpup(p++);
172 layout->olo_num_comps = be32_to_cpup(p++); 172 layout->olo_num_comps = be32_to_cpup(p++);
173 dprintk("%s: olo_comps_index=%d olo_num_comps=%d\n", __func__,
174 layout->olo_comps_index, layout->olo_num_comps);
175
173 iter->total_comps = layout->olo_num_comps; 176 iter->total_comps = layout->olo_num_comps;
174 return 0; 177 return 0;
175} 178}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 29c0ca7fc34..99518872f42 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -189,6 +189,7 @@ static void
189pnfs_free_layout_hdr(struct pnfs_layout_hdr *lo) 189pnfs_free_layout_hdr(struct pnfs_layout_hdr *lo)
190{ 190{
191 struct pnfs_layoutdriver_type *ld = NFS_SERVER(lo->plh_inode)->pnfs_curr_ld; 191 struct pnfs_layoutdriver_type *ld = NFS_SERVER(lo->plh_inode)->pnfs_curr_ld;
192 put_rpccred(lo->plh_lc_cred);
192 return ld->alloc_layout_hdr ? ld->free_layout_hdr(lo) : kfree(lo); 193 return ld->alloc_layout_hdr ? ld->free_layout_hdr(lo) : kfree(lo);
193} 194}
194 195
@@ -223,6 +224,7 @@ static void
223init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg) 224init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg)
224{ 225{
225 INIT_LIST_HEAD(&lseg->pls_list); 226 INIT_LIST_HEAD(&lseg->pls_list);
227 INIT_LIST_HEAD(&lseg->pls_lc_list);
226 atomic_set(&lseg->pls_refcount, 1); 228 atomic_set(&lseg->pls_refcount, 1);
227 smp_mb(); 229 smp_mb();
228 set_bit(NFS_LSEG_VALID, &lseg->pls_flags); 230 set_bit(NFS_LSEG_VALID, &lseg->pls_flags);
@@ -805,7 +807,9 @@ out:
805} 807}
806 808
807static struct pnfs_layout_hdr * 809static struct pnfs_layout_hdr *
808alloc_init_layout_hdr(struct inode *ino, gfp_t gfp_flags) 810alloc_init_layout_hdr(struct inode *ino,
811 struct nfs_open_context *ctx,
812 gfp_t gfp_flags)
809{ 813{
810 struct pnfs_layout_hdr *lo; 814 struct pnfs_layout_hdr *lo;
811 815
@@ -817,11 +821,14 @@ alloc_init_layout_hdr(struct inode *ino, gfp_t gfp_flags)
817 INIT_LIST_HEAD(&lo->plh_segs); 821 INIT_LIST_HEAD(&lo->plh_segs);
818 INIT_LIST_HEAD(&lo->plh_bulk_recall); 822 INIT_LIST_HEAD(&lo->plh_bulk_recall);
819 lo->plh_inode = ino; 823 lo->plh_inode = ino;
824 lo->plh_lc_cred = get_rpccred(ctx->state->owner->so_cred);
820 return lo; 825 return lo;
821} 826}
822 827
823static struct pnfs_layout_hdr * 828static struct pnfs_layout_hdr *
824pnfs_find_alloc_layout(struct inode *ino, gfp_t gfp_flags) 829pnfs_find_alloc_layout(struct inode *ino,
830 struct nfs_open_context *ctx,
831 gfp_t gfp_flags)
825{ 832{
826 struct nfs_inode *nfsi = NFS_I(ino); 833 struct nfs_inode *nfsi = NFS_I(ino);
827 struct pnfs_layout_hdr *new = NULL; 834 struct pnfs_layout_hdr *new = NULL;
@@ -836,7 +843,7 @@ pnfs_find_alloc_layout(struct inode *ino, gfp_t gfp_flags)
836 return nfsi->layout; 843 return nfsi->layout;
837 } 844 }
838 spin_unlock(&ino->i_lock); 845 spin_unlock(&ino->i_lock);
839 new = alloc_init_layout_hdr(ino, gfp_flags); 846 new = alloc_init_layout_hdr(ino, ctx, gfp_flags);
840 spin_lock(&ino->i_lock); 847 spin_lock(&ino->i_lock);
841 848
842 if (likely(nfsi->layout == NULL)) /* Won the race? */ 849 if (likely(nfsi->layout == NULL)) /* Won the race? */
@@ -928,7 +935,7 @@ pnfs_update_layout(struct inode *ino,
928 if (!pnfs_enabled_sb(NFS_SERVER(ino))) 935 if (!pnfs_enabled_sb(NFS_SERVER(ino)))
929 return NULL; 936 return NULL;
930 spin_lock(&ino->i_lock); 937 spin_lock(&ino->i_lock);
931 lo = pnfs_find_alloc_layout(ino, gfp_flags); 938 lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags);
932 if (lo == NULL) { 939 if (lo == NULL) {
933 dprintk("%s ERROR: can't get pnfs_layout_hdr\n", __func__); 940 dprintk("%s ERROR: can't get pnfs_layout_hdr\n", __func__);
934 goto out_unlock; 941 goto out_unlock;
@@ -973,7 +980,8 @@ pnfs_update_layout(struct inode *ino,
973 arg.offset -= pg_offset; 980 arg.offset -= pg_offset;
974 arg.length += pg_offset; 981 arg.length += pg_offset;
975 } 982 }
976 arg.length = PAGE_CACHE_ALIGN(arg.length); 983 if (arg.length != NFS4_MAX_UINT64)
984 arg.length = PAGE_CACHE_ALIGN(arg.length);
977 985
978 lseg = send_layoutget(lo, ctx, &arg, gfp_flags); 986 lseg = send_layoutget(lo, ctx, &arg, gfp_flags);
979 if (!lseg && first) { 987 if (!lseg && first) {
@@ -1111,6 +1119,14 @@ pnfs_ld_write_done(struct nfs_write_data *data)
1111 data->mds_ops->rpc_release(data); 1119 data->mds_ops->rpc_release(data);
1112 return 0; 1120 return 0;
1113 } 1121 }
1122 if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags &
1123 PNFS_LAYOUTRET_ON_ERROR) {
1124 /* Don't lo_commit on error, Server will needs to
1125 * preform a file recovery.
1126 */
1127 clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(data->inode)->flags);
1128 pnfs_return_layout(data->inode);
1129 }
1114 1130
1115 dprintk("%s: pnfs_error=%d, retry via MDS\n", __func__, 1131 dprintk("%s: pnfs_error=%d, retry via MDS\n", __func__,
1116 data->pnfs_error); 1132 data->pnfs_error);
@@ -1159,6 +1175,10 @@ pnfs_ld_read_done(struct nfs_read_data *data)
1159 return 0; 1175 return 0;
1160 } 1176 }
1161 1177
1178 if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags &
1179 PNFS_LAYOUTRET_ON_ERROR)
1180 pnfs_return_layout(data->inode);
1181
1162 dprintk("%s: pnfs_error=%d, retry via MDS\n", __func__, 1182 dprintk("%s: pnfs_error=%d, retry via MDS\n", __func__,
1163 data->pnfs_error); 1183 data->pnfs_error);
1164 status = nfs_initiate_read(data, NFS_CLIENT(data->inode), 1184 status = nfs_initiate_read(data, NFS_CLIENT(data->inode),
@@ -1195,16 +1215,17 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata,
1195} 1215}
1196 1216
1197/* 1217/*
1198 * Currently there is only one (whole file) write lseg. 1218 * There can be multiple RW segments.
1199 */ 1219 */
1200static struct pnfs_layout_segment *pnfs_list_write_lseg(struct inode *inode) 1220static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp)
1201{ 1221{
1202 struct pnfs_layout_segment *lseg, *rv = NULL; 1222 struct pnfs_layout_segment *lseg;
1203 1223
1204 list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) 1224 list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) {
1205 if (lseg->pls_range.iomode == IOMODE_RW) 1225 if (lseg->pls_range.iomode == IOMODE_RW &&
1206 rv = lseg; 1226 test_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
1207 return rv; 1227 list_add(&lseg->pls_lc_list, listp);
1228 }
1208} 1229}
1209 1230
1210void 1231void
@@ -1216,17 +1237,19 @@ pnfs_set_layoutcommit(struct nfs_write_data *wdata)
1216 1237
1217 spin_lock(&nfsi->vfs_inode.i_lock); 1238 spin_lock(&nfsi->vfs_inode.i_lock);
1218 if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { 1239 if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
1219 /* references matched in nfs4_layoutcommit_release */
1220 get_lseg(wdata->lseg);
1221 wdata->lseg->pls_lc_cred =
1222 get_rpccred(wdata->args.context->state->owner->so_cred);
1223 mark_as_dirty = true; 1240 mark_as_dirty = true;
1224 dprintk("%s: Set layoutcommit for inode %lu ", 1241 dprintk("%s: Set layoutcommit for inode %lu ",
1225 __func__, wdata->inode->i_ino); 1242 __func__, wdata->inode->i_ino);
1226 } 1243 }
1227 if (end_pos > wdata->lseg->pls_end_pos) 1244 if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &wdata->lseg->pls_flags)) {
1228 wdata->lseg->pls_end_pos = end_pos; 1245 /* references matched in nfs4_layoutcommit_release */
1246 get_lseg(wdata->lseg);
1247 }
1248 if (end_pos > nfsi->layout->plh_lwb)
1249 nfsi->layout->plh_lwb = end_pos;
1229 spin_unlock(&nfsi->vfs_inode.i_lock); 1250 spin_unlock(&nfsi->vfs_inode.i_lock);
1251 dprintk("%s: lseg %p end_pos %llu\n",
1252 __func__, wdata->lseg, nfsi->layout->plh_lwb);
1230 1253
1231 /* if pnfs_layoutcommit_inode() runs between inode locks, the next one 1254 /* if pnfs_layoutcommit_inode() runs between inode locks, the next one
1232 * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */ 1255 * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */
@@ -1248,8 +1271,6 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)
1248{ 1271{
1249 struct nfs4_layoutcommit_data *data; 1272 struct nfs4_layoutcommit_data *data;
1250 struct nfs_inode *nfsi = NFS_I(inode); 1273 struct nfs_inode *nfsi = NFS_I(inode);
1251 struct pnfs_layout_segment *lseg;
1252 struct rpc_cred *cred;
1253 loff_t end_pos; 1274 loff_t end_pos;
1254 int status = 0; 1275 int status = 0;
1255 1276
@@ -1266,30 +1287,25 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)
1266 goto out; 1287 goto out;
1267 } 1288 }
1268 1289
1290 INIT_LIST_HEAD(&data->lseg_list);
1269 spin_lock(&inode->i_lock); 1291 spin_lock(&inode->i_lock);
1270 if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { 1292 if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
1271 spin_unlock(&inode->i_lock); 1293 spin_unlock(&inode->i_lock);
1272 kfree(data); 1294 kfree(data);
1273 goto out; 1295 goto out;
1274 } 1296 }
1275 /*
1276 * Currently only one (whole file) write lseg which is referenced
1277 * in pnfs_set_layoutcommit and will be found.
1278 */
1279 lseg = pnfs_list_write_lseg(inode);
1280 1297
1281 end_pos = lseg->pls_end_pos; 1298 pnfs_list_write_lseg(inode, &data->lseg_list);
1282 cred = lseg->pls_lc_cred; 1299
1283 lseg->pls_end_pos = 0; 1300 end_pos = nfsi->layout->plh_lwb;
1284 lseg->pls_lc_cred = NULL; 1301 nfsi->layout->plh_lwb = 0;
1285 1302
1286 memcpy(&data->args.stateid.data, nfsi->layout->plh_stateid.data, 1303 memcpy(&data->args.stateid.data, nfsi->layout->plh_stateid.data,
1287 sizeof(nfsi->layout->plh_stateid.data)); 1304 sizeof(nfsi->layout->plh_stateid.data));
1288 spin_unlock(&inode->i_lock); 1305 spin_unlock(&inode->i_lock);
1289 1306
1290 data->args.inode = inode; 1307 data->args.inode = inode;
1291 data->lseg = lseg; 1308 data->cred = get_rpccred(nfsi->layout->plh_lc_cred);
1292 data->cred = cred;
1293 nfs_fattr_init(&data->fattr); 1309 nfs_fattr_init(&data->fattr);
1294 data->args.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask; 1310 data->args.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask;
1295 data->res.fattr = &data->fattr; 1311 data->res.fattr = &data->fattr;
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 96bf4e6f45b..bb8b3247f29 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -36,16 +36,16 @@
36enum { 36enum {
37 NFS_LSEG_VALID = 0, /* cleared when lseg is recalled/returned */ 37 NFS_LSEG_VALID = 0, /* cleared when lseg is recalled/returned */
38 NFS_LSEG_ROC, /* roc bit received from server */ 38 NFS_LSEG_ROC, /* roc bit received from server */
39 NFS_LSEG_LAYOUTCOMMIT, /* layoutcommit bit set for layoutcommit */
39}; 40};
40 41
41struct pnfs_layout_segment { 42struct pnfs_layout_segment {
42 struct list_head pls_list; 43 struct list_head pls_list;
44 struct list_head pls_lc_list;
43 struct pnfs_layout_range pls_range; 45 struct pnfs_layout_range pls_range;
44 atomic_t pls_refcount; 46 atomic_t pls_refcount;
45 unsigned long pls_flags; 47 unsigned long pls_flags;
46 struct pnfs_layout_hdr *pls_layout; 48 struct pnfs_layout_hdr *pls_layout;
47 struct rpc_cred *pls_lc_cred; /* LAYOUTCOMMIT credential */
48 loff_t pls_end_pos; /* LAYOUTCOMMIT write end */
49}; 49};
50 50
51enum pnfs_try_status { 51enum pnfs_try_status {
@@ -68,6 +68,7 @@ enum {
68enum layoutdriver_policy_flags { 68enum layoutdriver_policy_flags {
69 /* Should the pNFS client commit and return the layout upon a setattr */ 69 /* Should the pNFS client commit and return the layout upon a setattr */
70 PNFS_LAYOUTRET_ON_SETATTR = 1 << 0, 70 PNFS_LAYOUTRET_ON_SETATTR = 1 << 0,
71 PNFS_LAYOUTRET_ON_ERROR = 1 << 1,
71}; 72};
72 73
73struct nfs4_deviceid_node; 74struct nfs4_deviceid_node;
@@ -124,6 +125,8 @@ struct pnfs_layout_hdr {
124 unsigned long plh_block_lgets; /* block LAYOUTGET if >0 */ 125 unsigned long plh_block_lgets; /* block LAYOUTGET if >0 */
125 u32 plh_barrier; /* ignore lower seqids */ 126 u32 plh_barrier; /* ignore lower seqids */
126 unsigned long plh_flags; 127 unsigned long plh_flags;
128 loff_t plh_lwb; /* last write byte for layoutcommit */
129 struct rpc_cred *plh_lc_cred; /* layoutcommit cred */
127 struct inode *plh_inode; 130 struct inode *plh_inode;
128}; 131};
129 132
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index ac40b8535d7..f48125da198 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -710,6 +710,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
710 .dentry_ops = &nfs_dentry_operations, 710 .dentry_ops = &nfs_dentry_operations,
711 .dir_inode_ops = &nfs_dir_inode_operations, 711 .dir_inode_ops = &nfs_dir_inode_operations,
712 .file_inode_ops = &nfs_file_inode_operations, 712 .file_inode_ops = &nfs_file_inode_operations,
713 .file_ops = &nfs_file_operations,
713 .getroot = nfs_proc_get_root, 714 .getroot = nfs_proc_get_root,
714 .getattr = nfs_proc_getattr, 715 .getattr = nfs_proc_getattr,
715 .setattr = nfs_proc_setattr, 716 .setattr = nfs_proc_setattr,
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index ce40e5c568b..8e7b61d5829 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -904,10 +904,24 @@ static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(unsigned int ve
904 data->auth_flavor_len = 1; 904 data->auth_flavor_len = 1;
905 data->version = version; 905 data->version = version;
906 data->minorversion = 0; 906 data->minorversion = 0;
907 security_init_mnt_opts(&data->lsm_opts);
907 } 908 }
908 return data; 909 return data;
909} 910}
910 911
912static void nfs_free_parsed_mount_data(struct nfs_parsed_mount_data *data)
913{
914 if (data) {
915 kfree(data->client_address);
916 kfree(data->mount_server.hostname);
917 kfree(data->nfs_server.export_path);
918 kfree(data->nfs_server.hostname);
919 kfree(data->fscache_uniq);
920 security_free_mnt_opts(&data->lsm_opts);
921 kfree(data);
922 }
923}
924
911/* 925/*
912 * Sanity-check a server address provided by the mount command. 926 * Sanity-check a server address provided by the mount command.
913 * 927 *
@@ -2218,9 +2232,7 @@ static struct dentry *nfs_fs_mount(struct file_system_type *fs_type,
2218 data = nfs_alloc_parsed_mount_data(NFS_DEFAULT_VERSION); 2232 data = nfs_alloc_parsed_mount_data(NFS_DEFAULT_VERSION);
2219 mntfh = nfs_alloc_fhandle(); 2233 mntfh = nfs_alloc_fhandle();
2220 if (data == NULL || mntfh == NULL) 2234 if (data == NULL || mntfh == NULL)
2221 goto out_free_fh; 2235 goto out;
2222
2223 security_init_mnt_opts(&data->lsm_opts);
2224 2236
2225 /* Validate the mount data */ 2237 /* Validate the mount data */
2226 error = nfs_validate_mount_data(raw_data, data, mntfh, dev_name); 2238 error = nfs_validate_mount_data(raw_data, data, mntfh, dev_name);
@@ -2232,8 +2244,6 @@ static struct dentry *nfs_fs_mount(struct file_system_type *fs_type,
2232#ifdef CONFIG_NFS_V4 2244#ifdef CONFIG_NFS_V4
2233 if (data->version == 4) { 2245 if (data->version == 4) {
2234 mntroot = nfs4_try_mount(flags, dev_name, data); 2246 mntroot = nfs4_try_mount(flags, dev_name, data);
2235 kfree(data->client_address);
2236 kfree(data->nfs_server.export_path);
2237 goto out; 2247 goto out;
2238 } 2248 }
2239#endif /* CONFIG_NFS_V4 */ 2249#endif /* CONFIG_NFS_V4 */
@@ -2284,13 +2294,8 @@ static struct dentry *nfs_fs_mount(struct file_system_type *fs_type,
2284 s->s_flags |= MS_ACTIVE; 2294 s->s_flags |= MS_ACTIVE;
2285 2295
2286out: 2296out:
2287 kfree(data->nfs_server.hostname); 2297 nfs_free_parsed_mount_data(data);
2288 kfree(data->mount_server.hostname);
2289 kfree(data->fscache_uniq);
2290 security_free_mnt_opts(&data->lsm_opts);
2291out_free_fh:
2292 nfs_free_fhandle(mntfh); 2298 nfs_free_fhandle(mntfh);
2293 kfree(data);
2294 return mntroot; 2299 return mntroot;
2295 2300
2296out_err_nosb: 2301out_err_nosb:
@@ -2613,9 +2618,7 @@ nfs4_remote_mount(struct file_system_type *fs_type, int flags,
2613 2618
2614 mntfh = nfs_alloc_fhandle(); 2619 mntfh = nfs_alloc_fhandle();
2615 if (data == NULL || mntfh == NULL) 2620 if (data == NULL || mntfh == NULL)
2616 goto out_free_fh; 2621 goto out;
2617
2618 security_init_mnt_opts(&data->lsm_opts);
2619 2622
2620 /* Get a volume representation */ 2623 /* Get a volume representation */
2621 server = nfs4_create_server(data, mntfh); 2624 server = nfs4_create_server(data, mntfh);
@@ -2663,13 +2666,10 @@ nfs4_remote_mount(struct file_system_type *fs_type, int flags,
2663 2666
2664 s->s_flags |= MS_ACTIVE; 2667 s->s_flags |= MS_ACTIVE;
2665 2668
2666 security_free_mnt_opts(&data->lsm_opts);
2667 nfs_free_fhandle(mntfh); 2669 nfs_free_fhandle(mntfh);
2668 return mntroot; 2670 return mntroot;
2669 2671
2670out: 2672out:
2671 security_free_mnt_opts(&data->lsm_opts);
2672out_free_fh:
2673 nfs_free_fhandle(mntfh); 2673 nfs_free_fhandle(mntfh);
2674 return ERR_PTR(error); 2674 return ERR_PTR(error);
2675 2675
@@ -2694,11 +2694,15 @@ static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type,
2694 char *root_devname; 2694 char *root_devname;
2695 size_t len; 2695 size_t len;
2696 2696
2697 len = strlen(hostname) + 3; 2697 len = strlen(hostname) + 5;
2698 root_devname = kmalloc(len, GFP_KERNEL); 2698 root_devname = kmalloc(len, GFP_KERNEL);
2699 if (root_devname == NULL) 2699 if (root_devname == NULL)
2700 return ERR_PTR(-ENOMEM); 2700 return ERR_PTR(-ENOMEM);
2701 snprintf(root_devname, len, "%s:/", hostname); 2701 /* Does hostname needs to be enclosed in brackets? */
2702 if (strchr(hostname, ':'))
2703 snprintf(root_devname, len, "[%s]:/", hostname);
2704 else
2705 snprintf(root_devname, len, "%s:/", hostname);
2702 root_mnt = vfs_kern_mount(fs_type, flags, root_devname, data); 2706 root_mnt = vfs_kern_mount(fs_type, flags, root_devname, data);
2703 kfree(root_devname); 2707 kfree(root_devname);
2704 return root_mnt; 2708 return root_mnt;
@@ -2793,7 +2797,7 @@ static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt,
2793 goto out_put_mnt_ns; 2797 goto out_put_mnt_ns;
2794 2798
2795 ret = vfs_path_lookup(root_mnt->mnt_root, root_mnt, 2799 ret = vfs_path_lookup(root_mnt->mnt_root, root_mnt,
2796 export_path, LOOKUP_FOLLOW, nd); 2800 export_path, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, nd);
2797 2801
2798 nfs_referral_loop_unprotect(); 2802 nfs_referral_loop_unprotect();
2799 put_mnt_ns(ns_private); 2803 put_mnt_ns(ns_private);
@@ -2855,7 +2859,7 @@ static struct dentry *nfs4_mount(struct file_system_type *fs_type,
2855 2859
2856 data = nfs_alloc_parsed_mount_data(4); 2860 data = nfs_alloc_parsed_mount_data(4);
2857 if (data == NULL) 2861 if (data == NULL)
2858 goto out_free_data; 2862 goto out;
2859 2863
2860 /* Validate the mount data */ 2864 /* Validate the mount data */
2861 error = nfs4_validate_mount_data(raw_data, data, dev_name); 2865 error = nfs4_validate_mount_data(raw_data, data, dev_name);
@@ -2869,12 +2873,7 @@ static struct dentry *nfs4_mount(struct file_system_type *fs_type,
2869 error = PTR_ERR(res); 2873 error = PTR_ERR(res);
2870 2874
2871out: 2875out:
2872 kfree(data->client_address); 2876 nfs_free_parsed_mount_data(data);
2873 kfree(data->nfs_server.export_path);
2874 kfree(data->nfs_server.hostname);
2875 kfree(data->fscache_uniq);
2876out_free_data:
2877 kfree(data);
2878 dprintk("<-- nfs4_mount() = %d%s\n", error, 2877 dprintk("<-- nfs4_mount() = %d%s\n", error,
2879 error != 0 ? " [error]" : ""); 2878 error != 0 ? " [error]" : "");
2880 return res; 2879 return res;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 72716805968..d5ef7e933b7 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -428,7 +428,6 @@ static void
428nfs_mark_request_dirty(struct nfs_page *req) 428nfs_mark_request_dirty(struct nfs_page *req)
429{ 429{
430 __set_page_dirty_nobuffers(req->wb_page); 430 __set_page_dirty_nobuffers(req->wb_page);
431 __mark_inode_dirty(req->wb_page->mapping->host, I_DIRTY_DATASYNC);
432} 431}
433 432
434#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 433#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
@@ -762,6 +761,8 @@ int nfs_updatepage(struct file *file, struct page *page,
762 status = nfs_writepage_setup(ctx, page, offset, count); 761 status = nfs_writepage_setup(ctx, page, offset, count);
763 if (status < 0) 762 if (status < 0)
764 nfs_set_pageerror(page); 763 nfs_set_pageerror(page);
764 else
765 __set_page_dirty_nobuffers(page);
765 766
766 dprintk("NFS: nfs_updatepage returns %d (isize %lld)\n", 767 dprintk("NFS: nfs_updatepage returns %d (isize %lld)\n",
767 status, (long long)i_size_read(inode)); 768 status, (long long)i_size_read(inode));
@@ -1525,6 +1526,10 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr
1525 int flags = FLUSH_SYNC; 1526 int flags = FLUSH_SYNC;
1526 int ret = 0; 1527 int ret = 0;
1527 1528
1529 /* no commits means nothing needs to be done */
1530 if (!nfsi->ncommit)
1531 return ret;
1532
1528 if (wbc->sync_mode == WB_SYNC_NONE) { 1533 if (wbc->sync_mode == WB_SYNC_NONE) {
1529 /* Don't commit yet if this is a non-blocking flush and there 1534 /* Don't commit yet if this is a non-blocking flush and there
1530 * are a lot of outstanding writes for this mapping. 1535 * are a lot of outstanding writes for this mapping.
@@ -1566,8 +1571,7 @@ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
1566 int status; 1571 int status;
1567 bool sync = true; 1572 bool sync = true;
1568 1573
1569 if (wbc->sync_mode == WB_SYNC_NONE || wbc->nonblocking || 1574 if (wbc->sync_mode == WB_SYNC_NONE || wbc->for_background)
1570 wbc->for_background)
1571 sync = false; 1575 sync = false;
1572 1576
1573 status = pnfs_layoutcommit_inode(inode, sync); 1577 status = pnfs_layoutcommit_inode(inode, sync);
@@ -1657,36 +1661,22 @@ out_error:
1657 1661
1658#ifdef CONFIG_MIGRATION 1662#ifdef CONFIG_MIGRATION
1659int nfs_migrate_page(struct address_space *mapping, struct page *newpage, 1663int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
1660 struct page *page) 1664 struct page *page, enum migrate_mode mode)
1661{ 1665{
1662 struct nfs_page *req; 1666 /*
1663 int ret; 1667 * If PagePrivate is set, then the page is currently associated with
1668 * an in-progress read or write request. Don't try to migrate it.
1669 *
1670 * FIXME: we could do this in principle, but we'll need a way to ensure
1671 * that we can safely release the inode reference while holding
1672 * the page lock.
1673 */
1674 if (PagePrivate(page))
1675 return -EBUSY;
1664 1676
1665 nfs_fscache_release_page(page, GFP_KERNEL); 1677 nfs_fscache_release_page(page, GFP_KERNEL);
1666 1678
1667 req = nfs_find_and_lock_request(page, false); 1679 return migrate_page(mapping, newpage, page, mode);
1668 ret = PTR_ERR(req);
1669 if (IS_ERR(req))
1670 goto out;
1671
1672 ret = migrate_page(mapping, newpage, page);
1673 if (!req)
1674 goto out;
1675 if (ret)
1676 goto out_unlock;
1677 page_cache_get(newpage);
1678 spin_lock(&mapping->host->i_lock);
1679 req->wb_page = newpage;
1680 SetPagePrivate(newpage);
1681 set_page_private(newpage, (unsigned long)req);
1682 ClearPagePrivate(page);
1683 set_page_private(page, 0);
1684 spin_unlock(&mapping->host->i_lock);
1685 page_cache_release(page);
1686out_unlock:
1687 nfs_clear_page_tag_locked(req);
1688out:
1689 return ret;
1690} 1680}
1691#endif 1681#endif
1692 1682
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index b9566e46219..4b470f6043e 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -88,7 +88,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
88 struct svc_expkey key; 88 struct svc_expkey key;
89 struct svc_expkey *ek = NULL; 89 struct svc_expkey *ek = NULL;
90 90
91 if (mesg[mlen-1] != '\n') 91 if (mlen < 1 || mesg[mlen-1] != '\n')
92 return -EINVAL; 92 return -EINVAL;
93 mesg[mlen-1] = 0; 93 mesg[mlen-1] = 0;
94 94
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 08c6e36ab2e..43f46cd9ede 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -803,13 +803,13 @@ encode_entry_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name,
803 return p; 803 return p;
804} 804}
805 805
806static int 806static __be32
807compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp, 807compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
808 const char *name, int namlen) 808 const char *name, int namlen)
809{ 809{
810 struct svc_export *exp; 810 struct svc_export *exp;
811 struct dentry *dparent, *dchild; 811 struct dentry *dparent, *dchild;
812 int rv = 0; 812 __be32 rv = nfserr_noent;
813 813
814 dparent = cd->fh.fh_dentry; 814 dparent = cd->fh.fh_dentry;
815 exp = cd->fh.fh_export; 815 exp = cd->fh.fh_export;
@@ -817,26 +817,20 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
817 if (isdotent(name, namlen)) { 817 if (isdotent(name, namlen)) {
818 if (namlen == 2) { 818 if (namlen == 2) {
819 dchild = dget_parent(dparent); 819 dchild = dget_parent(dparent);
820 if (dchild == dparent) { 820 /* filesystem root - cannot return filehandle for ".." */
821 /* filesystem root - cannot return filehandle for ".." */ 821 if (dchild == dparent)
822 dput(dchild); 822 goto out;
823 return -ENOENT;
824 }
825 } else 823 } else
826 dchild = dget(dparent); 824 dchild = dget(dparent);
827 } else 825 } else
828 dchild = lookup_one_len(name, dparent, namlen); 826 dchild = lookup_one_len(name, dparent, namlen);
829 if (IS_ERR(dchild)) 827 if (IS_ERR(dchild))
830 return -ENOENT; 828 return rv;
831 rv = -ENOENT;
832 if (d_mountpoint(dchild)) 829 if (d_mountpoint(dchild))
833 goto out; 830 goto out;
834 rv = fh_compose(fhp, exp, dchild, &cd->fh);
835 if (rv)
836 goto out;
837 if (!dchild->d_inode) 831 if (!dchild->d_inode)
838 goto out; 832 goto out;
839 rv = 0; 833 rv = fh_compose(fhp, exp, dchild, &cd->fh);
840out: 834out:
841 dput(dchild); 835 dput(dchild);
842 return rv; 836 return rv;
@@ -845,7 +839,7 @@ out:
845static __be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen) 839static __be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen)
846{ 840{
847 struct svc_fh fh; 841 struct svc_fh fh;
848 int err; 842 __be32 err;
849 843
850 fh_init(&fh, NFS3_FHSIZE); 844 fh_init(&fh, NFS3_FHSIZE);
851 err = compose_entry_fh(cd, &fh, name, namlen); 845 err = compose_entry_fh(cd, &fh, name, namlen);
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 3a6dbd70b34..d06a02c1b1a 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -156,6 +156,8 @@ do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfs
156 !(open->op_share_access & NFS4_SHARE_ACCESS_WRITE)) 156 !(open->op_share_access & NFS4_SHARE_ACCESS_WRITE))
157 return nfserr_inval; 157 return nfserr_inval;
158 158
159 accmode |= NFSD_MAY_READ_IF_EXEC;
160
159 if (open->op_share_access & NFS4_SHARE_ACCESS_READ) 161 if (open->op_share_access & NFS4_SHARE_ACCESS_READ)
160 accmode |= NFSD_MAY_READ; 162 accmode |= NFSD_MAY_READ;
161 if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) 163 if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
@@ -682,7 +684,7 @@ nfsd4_readdir(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
682 readdir->rd_bmval[1] &= nfsd_suppattrs1(cstate->minorversion); 684 readdir->rd_bmval[1] &= nfsd_suppattrs1(cstate->minorversion);
683 readdir->rd_bmval[2] &= nfsd_suppattrs2(cstate->minorversion); 685 readdir->rd_bmval[2] &= nfsd_suppattrs2(cstate->minorversion);
684 686
685 if ((cookie > ~(u32)0) || (cookie == 1) || (cookie == 2) || 687 if ((cookie == 1) || (cookie == 2) ||
686 (cookie == 0 && memcmp(readdir->rd_verf.data, zeroverf.data, NFS4_VERIFIER_SIZE))) 688 (cookie == 0 && memcmp(readdir->rd_verf.data, zeroverf.data, NFS4_VERIFIER_SIZE)))
687 return nfserr_bad_cookie; 689 return nfserr_bad_cookie;
688 690
@@ -810,6 +812,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
810 struct nfsd4_setattr *setattr) 812 struct nfsd4_setattr *setattr)
811{ 813{
812 __be32 status = nfs_ok; 814 __be32 status = nfs_ok;
815 int err;
813 816
814 if (setattr->sa_iattr.ia_valid & ATTR_SIZE) { 817 if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
815 nfs4_lock_state(); 818 nfs4_lock_state();
@@ -821,9 +824,9 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
821 return status; 824 return status;
822 } 825 }
823 } 826 }
824 status = mnt_want_write(cstate->current_fh.fh_export->ex_path.mnt); 827 err = mnt_want_write(cstate->current_fh.fh_export->ex_path.mnt);
825 if (status) 828 if (err)
826 return status; 829 return nfserrno(err);
827 status = nfs_ok; 830 status = nfs_ok;
828 831
829 status = check_attr_support(rqstp, cstate, setattr->sa_bmval, 832 status = check_attr_support(rqstp, cstate, setattr->sa_bmval,
@@ -921,7 +924,7 @@ _nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
921 count = 4 + (verify->ve_attrlen >> 2); 924 count = 4 + (verify->ve_attrlen >> 2);
922 buf = kmalloc(count << 2, GFP_KERNEL); 925 buf = kmalloc(count << 2, GFP_KERNEL);
923 if (!buf) 926 if (!buf)
924 return nfserr_resource; 927 return nfserr_jukebox;
925 928
926 status = nfsd4_encode_fattr(&cstate->current_fh, 929 status = nfsd4_encode_fattr(&cstate->current_fh,
927 cstate->current_fh.fh_export, 930 cstate->current_fh.fh_export,
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index ffb59ef6f82..be268148170 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -88,7 +88,7 @@ nfs4_make_rec_clidname(char *dname, struct xdr_netobj *clname)
88 struct xdr_netobj cksum; 88 struct xdr_netobj cksum;
89 struct hash_desc desc; 89 struct hash_desc desc;
90 struct scatterlist sg; 90 struct scatterlist sg;
91 __be32 status = nfserr_resource; 91 __be32 status = nfserr_jukebox;
92 92
93 dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n", 93 dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n",
94 clname->len, clname->data); 94 clname->len, clname->data);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index e98f3c2e949..92f7eb7c586 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -188,8 +188,15 @@ static void nfs4_file_put_fd(struct nfs4_file *fp, int oflag)
188static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag) 188static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag)
189{ 189{
190 if (atomic_dec_and_test(&fp->fi_access[oflag])) { 190 if (atomic_dec_and_test(&fp->fi_access[oflag])) {
191 nfs4_file_put_fd(fp, O_RDWR);
192 nfs4_file_put_fd(fp, oflag); 191 nfs4_file_put_fd(fp, oflag);
192 /*
193 * It's also safe to get rid of the RDWR open *if*
194 * we no longer have need of the other kind of access
195 * or if we already have the other kind of open:
196 */
197 if (fp->fi_fds[1-oflag]
198 || atomic_read(&fp->fi_access[1 - oflag]) == 0)
199 nfs4_file_put_fd(fp, O_RDWR);
193 } 200 }
194} 201}
195 202
@@ -381,14 +388,6 @@ static int nfs4_access_to_omode(u32 access)
381 BUG(); 388 BUG();
382} 389}
383 390
384static int nfs4_access_bmap_to_omode(struct nfs4_stateid *stp)
385{
386 unsigned int access;
387
388 set_access(&access, stp->st_access_bmap);
389 return nfs4_access_to_omode(access);
390}
391
392static void unhash_generic_stateid(struct nfs4_stateid *stp) 391static void unhash_generic_stateid(struct nfs4_stateid *stp)
393{ 392{
394 list_del(&stp->st_hash); 393 list_del(&stp->st_hash);
@@ -398,11 +397,14 @@ static void unhash_generic_stateid(struct nfs4_stateid *stp)
398 397
399static void free_generic_stateid(struct nfs4_stateid *stp) 398static void free_generic_stateid(struct nfs4_stateid *stp)
400{ 399{
401 int oflag; 400 int i;
402 401
403 if (stp->st_access_bmap) { 402 if (stp->st_access_bmap) {
404 oflag = nfs4_access_bmap_to_omode(stp); 403 for (i = 1; i < 4; i++) {
405 nfs4_file_put_access(stp->st_file, oflag); 404 if (test_bit(i, &stp->st_access_bmap))
405 nfs4_file_put_access(stp->st_file,
406 nfs4_access_to_omode(i));
407 }
406 } 408 }
407 put_nfs4_file(stp->st_file); 409 put_nfs4_file(stp->st_file);
408 kmem_cache_free(stateid_slab, stp); 410 kmem_cache_free(stateid_slab, stp);
@@ -1908,7 +1910,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1908 * of 5 bullet points, labeled as CASE0 - CASE4 below. 1910 * of 5 bullet points, labeled as CASE0 - CASE4 below.
1909 */ 1911 */
1910 unconf = find_unconfirmed_client_by_str(dname, strhashval); 1912 unconf = find_unconfirmed_client_by_str(dname, strhashval);
1911 status = nfserr_resource; 1913 status = nfserr_jukebox;
1912 if (!conf) { 1914 if (!conf) {
1913 /* 1915 /*
1914 * RFC 3530 14.2.33 CASE 4: 1916 * RFC 3530 14.2.33 CASE 4:
@@ -2337,15 +2339,6 @@ out:
2337 return ret; 2339 return ret;
2338} 2340}
2339 2341
2340static inline void
2341nfs4_file_downgrade(struct nfs4_file *fp, unsigned int share_access)
2342{
2343 if (share_access & NFS4_SHARE_ACCESS_WRITE)
2344 nfs4_file_put_access(fp, O_WRONLY);
2345 if (share_access & NFS4_SHARE_ACCESS_READ)
2346 nfs4_file_put_access(fp, O_RDONLY);
2347}
2348
2349static void nfsd_break_one_deleg(struct nfs4_delegation *dp) 2342static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
2350{ 2343{
2351 /* We're assuming the state code never drops its reference 2344 /* We're assuming the state code never drops its reference
@@ -2454,7 +2447,7 @@ renew:
2454 if (open->op_stateowner == NULL) { 2447 if (open->op_stateowner == NULL) {
2455 sop = alloc_init_open_stateowner(strhashval, clp, open); 2448 sop = alloc_init_open_stateowner(strhashval, clp, open);
2456 if (sop == NULL) 2449 if (sop == NULL)
2457 return nfserr_resource; 2450 return nfserr_jukebox;
2458 open->op_stateowner = sop; 2451 open->op_stateowner = sop;
2459 } 2452 }
2460 list_del_init(&sop->so_close_lru); 2453 list_del_init(&sop->so_close_lru);
@@ -2556,12 +2549,18 @@ static inline int nfs4_access_to_access(u32 nfs4_access)
2556 return flags; 2549 return flags;
2557} 2550}
2558 2551
2559static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file 2552static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
2560*fp, struct svc_fh *cur_fh, u32 nfs4_access) 2553 struct svc_fh *cur_fh, struct nfsd4_open *open)
2561{ 2554{
2562 __be32 status; 2555 __be32 status;
2563 int oflag = nfs4_access_to_omode(nfs4_access); 2556 int oflag = nfs4_access_to_omode(open->op_share_access);
2564 int access = nfs4_access_to_access(nfs4_access); 2557 int access = nfs4_access_to_access(open->op_share_access);
2558
2559 /* CLAIM_DELEGATE_CUR is used in response to a broken lease;
2560 * allowing it to break the lease and return EAGAIN leaves the
2561 * client unable to make progress in returning the delegation */
2562 if (open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR)
2563 access |= NFSD_MAY_NOT_BREAK_LEASE;
2565 2564
2566 if (!fp->fi_fds[oflag]) { 2565 if (!fp->fi_fds[oflag]) {
2567 status = nfsd_open(rqstp, cur_fh, S_IFREG, access, 2566 status = nfsd_open(rqstp, cur_fh, S_IFREG, access,
@@ -2584,9 +2583,9 @@ nfs4_new_open(struct svc_rqst *rqstp, struct nfs4_stateid **stpp,
2584 2583
2585 stp = nfs4_alloc_stateid(); 2584 stp = nfs4_alloc_stateid();
2586 if (stp == NULL) 2585 if (stp == NULL)
2587 return nfserr_resource; 2586 return nfserr_jukebox;
2588 2587
2589 status = nfs4_get_vfs_file(rqstp, fp, cur_fh, open->op_share_access); 2588 status = nfs4_get_vfs_file(rqstp, fp, cur_fh, open);
2590 if (status) { 2589 if (status) {
2591 kmem_cache_free(stateid_slab, stp); 2590 kmem_cache_free(stateid_slab, stp);
2592 return status; 2591 return status;
@@ -2619,14 +2618,14 @@ nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *c
2619 2618
2620 new_access = !test_bit(op_share_access, &stp->st_access_bmap); 2619 new_access = !test_bit(op_share_access, &stp->st_access_bmap);
2621 if (new_access) { 2620 if (new_access) {
2622 status = nfs4_get_vfs_file(rqstp, fp, cur_fh, op_share_access); 2621 status = nfs4_get_vfs_file(rqstp, fp, cur_fh, open);
2623 if (status) 2622 if (status)
2624 return status; 2623 return status;
2625 } 2624 }
2626 status = nfsd4_truncate(rqstp, cur_fh, open); 2625 status = nfsd4_truncate(rqstp, cur_fh, open);
2627 if (status) { 2626 if (status) {
2628 if (new_access) { 2627 if (new_access) {
2629 int oflag = nfs4_access_to_omode(new_access); 2628 int oflag = nfs4_access_to_omode(op_share_access);
2630 nfs4_file_put_access(fp, oflag); 2629 nfs4_file_put_access(fp, oflag);
2631 } 2630 }
2632 return status; 2631 return status;
@@ -2815,7 +2814,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
2815 status = nfserr_bad_stateid; 2814 status = nfserr_bad_stateid;
2816 if (open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR) 2815 if (open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR)
2817 goto out; 2816 goto out;
2818 status = nfserr_resource; 2817 status = nfserr_jukebox;
2819 fp = alloc_init_file(ino); 2818 fp = alloc_init_file(ino);
2820 if (fp == NULL) 2819 if (fp == NULL)
2821 goto out; 2820 goto out;
@@ -3384,18 +3383,16 @@ out:
3384 return status; 3383 return status;
3385} 3384}
3386 3385
3387 3386static inline void nfs4_file_downgrade(struct nfs4_stateid *stp, unsigned int to_access)
3388/*
3389 * unset all bits in union bitmap (bmap) that
3390 * do not exist in share (from successful OPEN_DOWNGRADE)
3391 */
3392static void
3393reset_union_bmap_access(unsigned long access, unsigned long *bmap)
3394{ 3387{
3395 int i; 3388 int i;
3389
3396 for (i = 1; i < 4; i++) { 3390 for (i = 1; i < 4; i++) {
3397 if ((i & access) != i) 3391 if (test_bit(i, &stp->st_access_bmap)
3398 __clear_bit(i, bmap); 3392 && ((i & to_access) != i)) {
3393 nfs4_file_put_access(stp->st_file, nfs4_access_to_omode(i));
3394 __clear_bit(i, &stp->st_access_bmap);
3395 }
3399 } 3396 }
3400} 3397}
3401 3398
@@ -3416,7 +3413,6 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp,
3416{ 3413{
3417 __be32 status; 3414 __be32 status;
3418 struct nfs4_stateid *stp; 3415 struct nfs4_stateid *stp;
3419 unsigned int share_access;
3420 3416
3421 dprintk("NFSD: nfsd4_open_downgrade on file %.*s\n", 3417 dprintk("NFSD: nfsd4_open_downgrade on file %.*s\n",
3422 (int)cstate->current_fh.fh_dentry->d_name.len, 3418 (int)cstate->current_fh.fh_dentry->d_name.len,
@@ -3425,6 +3421,8 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp,
3425 if (!access_valid(od->od_share_access, cstate->minorversion) 3421 if (!access_valid(od->od_share_access, cstate->minorversion)
3426 || !deny_valid(od->od_share_deny)) 3422 || !deny_valid(od->od_share_deny))
3427 return nfserr_inval; 3423 return nfserr_inval;
3424 /* We don't yet support WANT bits: */
3425 od->od_share_access &= NFS4_SHARE_ACCESS_MASK;
3428 3426
3429 nfs4_lock_state(); 3427 nfs4_lock_state();
3430 if ((status = nfs4_preprocess_seqid_op(cstate, 3428 if ((status = nfs4_preprocess_seqid_op(cstate,
@@ -3445,10 +3443,8 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp,
3445 stp->st_deny_bmap, od->od_share_deny); 3443 stp->st_deny_bmap, od->od_share_deny);
3446 goto out; 3444 goto out;
3447 } 3445 }
3448 set_access(&share_access, stp->st_access_bmap); 3446 nfs4_file_downgrade(stp, od->od_share_access);
3449 nfs4_file_downgrade(stp->st_file, share_access & ~od->od_share_access);
3450 3447
3451 reset_union_bmap_access(od->od_share_access, &stp->st_access_bmap);
3452 reset_union_bmap_deny(od->od_share_deny, &stp->st_deny_bmap); 3448 reset_union_bmap_deny(od->od_share_deny, &stp->st_deny_bmap);
3453 3449
3454 update_stateid(&stp->st_stateid); 3450 update_stateid(&stp->st_stateid);
@@ -3854,7 +3850,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3854 /* XXX: Do we need to check for duplicate stateowners on 3850 /* XXX: Do we need to check for duplicate stateowners on
3855 * the same file, or should they just be allowed (and 3851 * the same file, or should they just be allowed (and
3856 * create new stateids)? */ 3852 * create new stateids)? */
3857 status = nfserr_resource; 3853 status = nfserr_jukebox;
3858 lock_sop = alloc_init_lock_stateowner(strhashval, 3854 lock_sop = alloc_init_lock_stateowner(strhashval,
3859 open_sop->so_client, open_stp, lock); 3855 open_sop->so_client, open_stp, lock);
3860 if (lock_sop == NULL) 3856 if (lock_sop == NULL)
@@ -3938,9 +3934,9 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3938 case (EDEADLK): 3934 case (EDEADLK):
3939 status = nfserr_deadlock; 3935 status = nfserr_deadlock;
3940 break; 3936 break;
3941 default: 3937 default:
3942 dprintk("NFSD: nfsd4_lock: vfs_lock_file() failed! status %d\n",err); 3938 dprintk("NFSD: nfsd4_lock: vfs_lock_file() failed! status %d\n",err);
3943 status = nfserr_resource; 3939 status = nfserrno(err);
3944 break; 3940 break;
3945 } 3941 }
3946out: 3942out:
@@ -3960,16 +3956,14 @@ out:
3960 * vfs_test_lock. (Arguably perhaps test_lock should be done with an 3956 * vfs_test_lock. (Arguably perhaps test_lock should be done with an
3961 * inode operation.) 3957 * inode operation.)
3962 */ 3958 */
3963static int nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file_lock *lock) 3959static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file_lock *lock)
3964{ 3960{
3965 struct file *file; 3961 struct file *file;
3966 int err; 3962 __be32 err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
3967 3963 if (!err) {
3968 err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file); 3964 err = nfserrno(vfs_test_lock(file, lock));
3969 if (err) 3965 nfsd_close(file);
3970 return err; 3966 }
3971 err = vfs_test_lock(file, lock);
3972 nfsd_close(file);
3973 return err; 3967 return err;
3974} 3968}
3975 3969
@@ -3982,7 +3976,6 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3982{ 3976{
3983 struct inode *inode; 3977 struct inode *inode;
3984 struct file_lock file_lock; 3978 struct file_lock file_lock;
3985 int error;
3986 __be32 status; 3979 __be32 status;
3987 3980
3988 if (locks_in_grace()) 3981 if (locks_in_grace())
@@ -4034,12 +4027,10 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4034 4027
4035 nfs4_transform_lock_offset(&file_lock); 4028 nfs4_transform_lock_offset(&file_lock);
4036 4029
4037 status = nfs_ok; 4030 status = nfsd_test_lock(rqstp, &cstate->current_fh, &file_lock);
4038 error = nfsd_test_lock(rqstp, &cstate->current_fh, &file_lock); 4031 if (status)
4039 if (error) {
4040 status = nfserrno(error);
4041 goto out; 4032 goto out;
4042 } 4033
4043 if (file_lock.fl_type != F_UNLCK) { 4034 if (file_lock.fl_type != F_UNLCK) {
4044 status = nfserr_denied; 4035 status = nfserr_denied;
4045 nfs4_set_lock_denied(&file_lock, &lockt->lt_denied); 4036 nfs4_set_lock_denied(&file_lock, &lockt->lt_denied);
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 99018110321..f91d58990b6 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1548,6 +1548,18 @@ static void write_cinfo(__be32 **p, struct nfsd4_change_info *c)
1548 \ 1548 \
1549 save = resp->p; 1549 save = resp->p;
1550 1550
1551static bool seqid_mutating_err(__be32 err)
1552{
1553 /* rfc 3530 section 8.1.5: */
1554 return err != nfserr_stale_clientid &&
1555 err != nfserr_stale_stateid &&
1556 err != nfserr_bad_stateid &&
1557 err != nfserr_bad_seqid &&
1558 err != nfserr_bad_xdr &&
1559 err != nfserr_resource &&
1560 err != nfserr_nofilehandle;
1561}
1562
1551/* 1563/*
1552 * Routine for encoding the result of a "seqid-mutating" NFSv4 operation. This 1564 * Routine for encoding the result of a "seqid-mutating" NFSv4 operation. This
1553 * is where sequence id's are incremented, and the replay cache is filled. 1565 * is where sequence id's are incremented, and the replay cache is filled.
@@ -1998,7 +2010,7 @@ out_acl:
1998 if (bmval0 & FATTR4_WORD0_CASE_INSENSITIVE) { 2010 if (bmval0 & FATTR4_WORD0_CASE_INSENSITIVE) {
1999 if ((buflen -= 4) < 0) 2011 if ((buflen -= 4) < 0)
2000 goto out_resource; 2012 goto out_resource;
2001 WRITE32(1); 2013 WRITE32(0);
2002 } 2014 }
2003 if (bmval0 & FATTR4_WORD0_CASE_PRESERVING) { 2015 if (bmval0 & FATTR4_WORD0_CASE_PRESERVING) {
2004 if ((buflen -= 4) < 0) 2016 if ((buflen -= 4) < 0)
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 6bd2f3c21f2..858c7baea2d 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -447,12 +447,6 @@ struct nfs4_stateid {
447#define WR_STATE 0x00000020 447#define WR_STATE 0x00000020
448#define CLOSE_STATE 0x00000040 448#define CLOSE_STATE 0x00000040
449 449
450#define seqid_mutating_err(err) \
451 (((err) != nfserr_stale_clientid) && \
452 ((err) != nfserr_bad_seqid) && \
453 ((err) != nfserr_stale_stateid) && \
454 ((err) != nfserr_bad_stateid))
455
456struct nfsd4_compound_state; 450struct nfsd4_compound_state;
457 451
458extern __be32 nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate, 452extern __be32 nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index fd0acca5370..acf88aea211 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -2114,7 +2114,8 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
2114 2114
2115 /* Allow read access to binaries even when mode 111 */ 2115 /* Allow read access to binaries even when mode 111 */
2116 if (err == -EACCES && S_ISREG(inode->i_mode) && 2116 if (err == -EACCES && S_ISREG(inode->i_mode) &&
2117 acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE)) 2117 (acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE) ||
2118 acc == (NFSD_MAY_READ | NFSD_MAY_READ_IF_EXEC)))
2118 err = inode_permission(inode, MAY_EXEC); 2119 err = inode_permission(inode, MAY_EXEC);
2119 2120
2120 return err? nfserrno(err) : 0; 2121 return err? nfserrno(err) : 0;
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index e0bbac04d1d..a22e40e2786 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -25,6 +25,7 @@
25#define NFSD_MAY_BYPASS_GSS_ON_ROOT 256 25#define NFSD_MAY_BYPASS_GSS_ON_ROOT 256
26#define NFSD_MAY_NOT_BREAK_LEASE 512 26#define NFSD_MAY_NOT_BREAK_LEASE 512
27#define NFSD_MAY_BYPASS_GSS 1024 27#define NFSD_MAY_BYPASS_GSS 1024
28#define NFSD_MAY_READ_IF_EXEC 2048
28 29
29#define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE) 30#define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE)
30#define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC) 31#define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC)
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c
index 08a07a218d2..57ceaf33d17 100644
--- a/fs/nilfs2/gcinode.c
+++ b/fs/nilfs2/gcinode.c
@@ -191,6 +191,8 @@ void nilfs_remove_all_gcinodes(struct the_nilfs *nilfs)
191 while (!list_empty(head)) { 191 while (!list_empty(head)) {
192 ii = list_first_entry(head, struct nilfs_inode_info, i_dirty); 192 ii = list_first_entry(head, struct nilfs_inode_info, i_dirty);
193 list_del_init(&ii->i_dirty); 193 list_del_init(&ii->i_dirty);
194 truncate_inode_pages(&ii->vfs_inode.i_data, 0);
195 nilfs_btnode_cache_clear(&ii->i_btnode_cache);
194 iput(&ii->vfs_inode); 196 iput(&ii->vfs_inode);
195 } 197 }
196} 198}
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index 41d6743d303..0d1c9bdbb79 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -182,7 +182,7 @@ static int nilfs_ioctl_change_cpmode(struct inode *inode, struct file *filp,
182 if (copy_from_user(&cpmode, argp, sizeof(cpmode))) 182 if (copy_from_user(&cpmode, argp, sizeof(cpmode)))
183 goto out; 183 goto out;
184 184
185 down_read(&inode->i_sb->s_umount); 185 mutex_lock(&nilfs->ns_snapshot_mount_mutex);
186 186
187 nilfs_transaction_begin(inode->i_sb, &ti, 0); 187 nilfs_transaction_begin(inode->i_sb, &ti, 0);
188 ret = nilfs_cpfile_change_cpmode( 188 ret = nilfs_cpfile_change_cpmode(
@@ -192,7 +192,7 @@ static int nilfs_ioctl_change_cpmode(struct inode *inode, struct file *filp,
192 else 192 else
193 nilfs_transaction_commit(inode->i_sb); /* never fails */ 193 nilfs_transaction_commit(inode->i_sb); /* never fails */
194 194
195 up_read(&inode->i_sb->s_umount); 195 mutex_unlock(&nilfs->ns_snapshot_mount_mutex);
196out: 196out:
197 mnt_drop_write(filp->f_path.mnt); 197 mnt_drop_write(filp->f_path.mnt);
198 return ret; 198 return ret;
@@ -842,6 +842,19 @@ long nilfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
842 case FS_IOC32_GETVERSION: 842 case FS_IOC32_GETVERSION:
843 cmd = FS_IOC_GETVERSION; 843 cmd = FS_IOC_GETVERSION;
844 break; 844 break;
845 case NILFS_IOCTL_CHANGE_CPMODE:
846 case NILFS_IOCTL_DELETE_CHECKPOINT:
847 case NILFS_IOCTL_GET_CPINFO:
848 case NILFS_IOCTL_GET_CPSTAT:
849 case NILFS_IOCTL_GET_SUINFO:
850 case NILFS_IOCTL_GET_SUSTAT:
851 case NILFS_IOCTL_GET_VINFO:
852 case NILFS_IOCTL_GET_BDESCS:
853 case NILFS_IOCTL_CLEAN_SEGMENTS:
854 case NILFS_IOCTL_SYNC:
855 case NILFS_IOCTL_RESIZE:
856 case NILFS_IOCTL_SET_ALLOC_RANGE:
857 break;
845 default: 858 default:
846 return -ENOIOCTLCMD; 859 return -ENOIOCTLCMD;
847 } 860 }
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index bb24ab6c282..6f24e67162c 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -2309,6 +2309,8 @@ nilfs_remove_written_gcinodes(struct the_nilfs *nilfs, struct list_head *head)
2309 if (!test_bit(NILFS_I_UPDATED, &ii->i_state)) 2309 if (!test_bit(NILFS_I_UPDATED, &ii->i_state))
2310 continue; 2310 continue;
2311 list_del_init(&ii->i_dirty); 2311 list_del_init(&ii->i_dirty);
2312 truncate_inode_pages(&ii->vfs_inode.i_data, 0);
2313 nilfs_btnode_cache_clear(&ii->i_btnode_cache);
2312 iput(&ii->vfs_inode); 2314 iput(&ii->vfs_inode);
2313 } 2315 }
2314} 2316}
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 8351c44a732..97bfbddd9fc 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -951,6 +951,8 @@ static int nilfs_attach_snapshot(struct super_block *s, __u64 cno,
951 struct nilfs_root *root; 951 struct nilfs_root *root;
952 int ret; 952 int ret;
953 953
954 mutex_lock(&nilfs->ns_snapshot_mount_mutex);
955
954 down_read(&nilfs->ns_segctor_sem); 956 down_read(&nilfs->ns_segctor_sem);
955 ret = nilfs_cpfile_is_snapshot(nilfs->ns_cpfile, cno); 957 ret = nilfs_cpfile_is_snapshot(nilfs->ns_cpfile, cno);
956 up_read(&nilfs->ns_segctor_sem); 958 up_read(&nilfs->ns_segctor_sem);
@@ -975,6 +977,7 @@ static int nilfs_attach_snapshot(struct super_block *s, __u64 cno,
975 ret = nilfs_get_root_dentry(s, root, root_dentry); 977 ret = nilfs_get_root_dentry(s, root, root_dentry);
976 nilfs_put_root(root); 978 nilfs_put_root(root);
977 out: 979 out:
980 mutex_unlock(&nilfs->ns_snapshot_mount_mutex);
978 return ret; 981 return ret;
979} 982}
980 983
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index d3271409437..1c98f5394de 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -76,6 +76,7 @@ struct the_nilfs *alloc_nilfs(struct block_device *bdev)
76 nilfs->ns_bdev = bdev; 76 nilfs->ns_bdev = bdev;
77 atomic_set(&nilfs->ns_ndirtyblks, 0); 77 atomic_set(&nilfs->ns_ndirtyblks, 0);
78 init_rwsem(&nilfs->ns_sem); 78 init_rwsem(&nilfs->ns_sem);
79 mutex_init(&nilfs->ns_snapshot_mount_mutex);
79 INIT_LIST_HEAD(&nilfs->ns_dirty_files); 80 INIT_LIST_HEAD(&nilfs->ns_dirty_files);
80 INIT_LIST_HEAD(&nilfs->ns_gc_inodes); 81 INIT_LIST_HEAD(&nilfs->ns_gc_inodes);
81 spin_lock_init(&nilfs->ns_inode_lock); 82 spin_lock_init(&nilfs->ns_inode_lock);
@@ -515,6 +516,7 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs,
515 brelse(sbh[1]); 516 brelse(sbh[1]);
516 sbh[1] = NULL; 517 sbh[1] = NULL;
517 sbp[1] = NULL; 518 sbp[1] = NULL;
519 valid[1] = 0;
518 swp = 0; 520 swp = 0;
519 } 521 }
520 if (!valid[swp]) { 522 if (!valid[swp]) {
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index 9992b11312f..de7435f0ef5 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -47,6 +47,7 @@ enum {
47 * @ns_flags: flags 47 * @ns_flags: flags
48 * @ns_bdev: block device 48 * @ns_bdev: block device
49 * @ns_sem: semaphore for shared states 49 * @ns_sem: semaphore for shared states
50 * @ns_snapshot_mount_mutex: mutex to protect snapshot mounts
50 * @ns_sbh: buffer heads of on-disk super blocks 51 * @ns_sbh: buffer heads of on-disk super blocks
51 * @ns_sbp: pointers to super block data 52 * @ns_sbp: pointers to super block data
52 * @ns_sbwtime: previous write time of super block 53 * @ns_sbwtime: previous write time of super block
@@ -99,6 +100,7 @@ struct the_nilfs {
99 100
100 struct block_device *ns_bdev; 101 struct block_device *ns_bdev;
101 struct rw_semaphore ns_sem; 102 struct rw_semaphore ns_sem;
103 struct mutex ns_snapshot_mount_mutex;
102 104
103 /* 105 /*
104 * used for 106 * used for
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 252ab1f6452..42ed195771f 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -135,9 +135,6 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark)
135 135
136 mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE; 136 mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE;
137 137
138 /* 1 from caller and 1 for being on i_list/g_list */
139 BUG_ON(atomic_read(&mark->refcnt) < 2);
140
141 spin_lock(&group->mark_lock); 138 spin_lock(&group->mark_lock);
142 139
143 if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) { 140 if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) {
@@ -182,6 +179,11 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark)
182 iput(inode); 179 iput(inode);
183 180
184 /* 181 /*
182 * We don't necessarily have a ref on mark from caller so the above iput
183 * may have already destroyed it. Don't touch from now on.
184 */
185
186 /*
185 * it's possible that this group tried to destroy itself, but this 187 * it's possible that this group tried to destroy itself, but this
186 * this mark was simultaneously being freed by inode. If that's the 188 * this mark was simultaneously being freed by inode. If that's the
187 * case, we finish freeing the group here. 189 * case, we finish freeing the group here.
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index ed553c60de8..76c8165deed 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -1134,7 +1134,7 @@ static int ocfs2_adjust_rightmost_branch(handle_t *handle,
1134 } 1134 }
1135 1135
1136 el = path_leaf_el(path); 1136 el = path_leaf_el(path);
1137 rec = &el->l_recs[le32_to_cpu(el->l_next_free_rec) - 1]; 1137 rec = &el->l_recs[le16_to_cpu(el->l_next_free_rec) - 1];
1138 1138
1139 ocfs2_adjust_rightmost_records(handle, et, path, rec); 1139 ocfs2_adjust_rightmost_records(handle, et, path, rec);
1140 1140
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index ebfd3825f12..15d29ccefd4 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -1036,14 +1036,14 @@ static int ocfs2_get_refcount_cpos_end(struct ocfs2_caching_info *ci,
1036 1036
1037 tmp_el = left_path->p_node[subtree_root].el; 1037 tmp_el = left_path->p_node[subtree_root].el;
1038 blkno = left_path->p_node[subtree_root+1].bh->b_blocknr; 1038 blkno = left_path->p_node[subtree_root+1].bh->b_blocknr;
1039 for (i = 0; i < le32_to_cpu(tmp_el->l_next_free_rec); i++) { 1039 for (i = 0; i < le16_to_cpu(tmp_el->l_next_free_rec); i++) {
1040 if (le64_to_cpu(tmp_el->l_recs[i].e_blkno) == blkno) { 1040 if (le64_to_cpu(tmp_el->l_recs[i].e_blkno) == blkno) {
1041 *cpos_end = le32_to_cpu(tmp_el->l_recs[i+1].e_cpos); 1041 *cpos_end = le32_to_cpu(tmp_el->l_recs[i+1].e_cpos);
1042 break; 1042 break;
1043 } 1043 }
1044 } 1044 }
1045 1045
1046 BUG_ON(i == le32_to_cpu(tmp_el->l_next_free_rec)); 1046 BUG_ON(i == le16_to_cpu(tmp_el->l_next_free_rec));
1047 1047
1048out: 1048out:
1049 ocfs2_free_path(left_path); 1049 ocfs2_free_path(left_path);
@@ -1468,7 +1468,7 @@ static int ocfs2_divide_leaf_refcount_block(struct buffer_head *ref_leaf_bh,
1468 1468
1469 trace_ocfs2_divide_leaf_refcount_block( 1469 trace_ocfs2_divide_leaf_refcount_block(
1470 (unsigned long long)ref_leaf_bh->b_blocknr, 1470 (unsigned long long)ref_leaf_bh->b_blocknr,
1471 le32_to_cpu(rl->rl_count), le32_to_cpu(rl->rl_used)); 1471 le16_to_cpu(rl->rl_count), le16_to_cpu(rl->rl_used));
1472 1472
1473 /* 1473 /*
1474 * XXX: Improvement later. 1474 * XXX: Improvement later.
@@ -2411,7 +2411,7 @@ static int ocfs2_calc_refcount_meta_credits(struct super_block *sb,
2411 rb = (struct ocfs2_refcount_block *) 2411 rb = (struct ocfs2_refcount_block *)
2412 prev_bh->b_data; 2412 prev_bh->b_data;
2413 2413
2414 if (le64_to_cpu(rb->rf_records.rl_used) + 2414 if (le16_to_cpu(rb->rf_records.rl_used) +
2415 recs_add > 2415 recs_add >
2416 le16_to_cpu(rb->rf_records.rl_count)) 2416 le16_to_cpu(rb->rf_records.rl_count))
2417 ref_blocks++; 2417 ref_blocks++;
@@ -2476,7 +2476,7 @@ static int ocfs2_calc_refcount_meta_credits(struct super_block *sb,
2476 if (prev_bh) { 2476 if (prev_bh) {
2477 rb = (struct ocfs2_refcount_block *)prev_bh->b_data; 2477 rb = (struct ocfs2_refcount_block *)prev_bh->b_data;
2478 2478
2479 if (le64_to_cpu(rb->rf_records.rl_used) + recs_add > 2479 if (le16_to_cpu(rb->rf_records.rl_used) + recs_add >
2480 le16_to_cpu(rb->rf_records.rl_count)) 2480 le16_to_cpu(rb->rf_records.rl_count))
2481 ref_blocks++; 2481 ref_blocks++;
2482 2482
@@ -3629,7 +3629,7 @@ int ocfs2_refcounted_xattr_delete_need(struct inode *inode,
3629 * one will split a refcount rec, so totally we need 3629 * one will split a refcount rec, so totally we need
3630 * clusters * 2 new refcount rec. 3630 * clusters * 2 new refcount rec.
3631 */ 3631 */
3632 if (le64_to_cpu(rb->rf_records.rl_used) + clusters * 2 > 3632 if (le16_to_cpu(rb->rf_records.rl_used) + clusters * 2 >
3633 le16_to_cpu(rb->rf_records.rl_count)) 3633 le16_to_cpu(rb->rf_records.rl_count))
3634 ref_blocks++; 3634 ref_blocks++;
3635 3635
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index ba5d97e4a73..f169da4624f 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -600,7 +600,7 @@ static void ocfs2_bg_alloc_cleanup(handle_t *handle,
600 ret = ocfs2_free_clusters(handle, cluster_ac->ac_inode, 600 ret = ocfs2_free_clusters(handle, cluster_ac->ac_inode,
601 cluster_ac->ac_bh, 601 cluster_ac->ac_bh,
602 le64_to_cpu(rec->e_blkno), 602 le64_to_cpu(rec->e_blkno),
603 le32_to_cpu(rec->e_leaf_clusters)); 603 le16_to_cpu(rec->e_leaf_clusters));
604 if (ret) 604 if (ret)
605 mlog_errno(ret); 605 mlog_errno(ret);
606 /* Try all the clusters to free */ 606 /* Try all the clusters to free */
@@ -1628,7 +1628,7 @@ static int ocfs2_bg_discontig_fix_by_rec(struct ocfs2_suballoc_result *res,
1628{ 1628{
1629 unsigned int bpc = le16_to_cpu(cl->cl_bpc); 1629 unsigned int bpc = le16_to_cpu(cl->cl_bpc);
1630 unsigned int bitoff = le32_to_cpu(rec->e_cpos) * bpc; 1630 unsigned int bitoff = le32_to_cpu(rec->e_cpos) * bpc;
1631 unsigned int bitcount = le32_to_cpu(rec->e_leaf_clusters) * bpc; 1631 unsigned int bitcount = le16_to_cpu(rec->e_leaf_clusters) * bpc;
1632 1632
1633 if (res->sr_bit_offset < bitoff) 1633 if (res->sr_bit_offset < bitoff)
1634 return 0; 1634 return 0;
diff --git a/fs/open.c b/fs/open.c
index b52cf013ffa..7e18c4d6e1f 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -396,10 +396,10 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd)
396{ 396{
397 struct file *file; 397 struct file *file;
398 struct inode *inode; 398 struct inode *inode;
399 int error; 399 int error, fput_needed;
400 400
401 error = -EBADF; 401 error = -EBADF;
402 file = fget(fd); 402 file = fget_raw_light(fd, &fput_needed);
403 if (!file) 403 if (!file)
404 goto out; 404 goto out;
405 405
@@ -413,7 +413,7 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd)
413 if (!error) 413 if (!error)
414 set_fs_pwd(current->fs, &file->f_path); 414 set_fs_pwd(current->fs, &file->f_path);
415out_putf: 415out_putf:
416 fput(file); 416 fput_light(file, fput_needed);
417out: 417out:
418 return error; 418 return error;
419} 419}
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index d545e97d99c..811960a5ef6 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -366,10 +366,21 @@ static void part_release(struct device *dev)
366 kfree(p); 366 kfree(p);
367} 367}
368 368
369static int part_uevent(struct device *dev, struct kobj_uevent_env *env)
370{
371 struct hd_struct *part = dev_to_part(dev);
372
373 add_uevent_var(env, "PARTN=%u", part->partno);
374 if (part->info && part->info->volname[0])
375 add_uevent_var(env, "PARTNAME=%s", part->info->volname);
376 return 0;
377}
378
369struct device_type part_type = { 379struct device_type part_type = {
370 .name = "partition", 380 .name = "partition",
371 .groups = part_attr_groups, 381 .groups = part_attr_groups,
372 .release = part_release, 382 .release = part_release,
383 .uevent = part_uevent,
373}; 384};
374 385
375static void delete_partition_rcu_cb(struct rcu_head *head) 386static void delete_partition_rcu_cb(struct rcu_head *head)
@@ -539,17 +550,11 @@ static bool disk_unlock_native_capacity(struct gendisk *disk)
539 } 550 }
540} 551}
541 552
542int rescan_partitions(struct gendisk *disk, struct block_device *bdev) 553static int drop_partitions(struct gendisk *disk, struct block_device *bdev)
543{ 554{
544 struct parsed_partitions *state = NULL;
545 struct disk_part_iter piter; 555 struct disk_part_iter piter;
546 struct hd_struct *part; 556 struct hd_struct *part;
547 int p, highest, res; 557 int res;
548rescan:
549 if (state && !IS_ERR(state)) {
550 kfree(state);
551 state = NULL;
552 }
553 558
554 if (bdev->bd_part_count) 559 if (bdev->bd_part_count)
555 return -EBUSY; 560 return -EBUSY;
@@ -562,6 +567,24 @@ rescan:
562 delete_partition(disk, part->partno); 567 delete_partition(disk, part->partno);
563 disk_part_iter_exit(&piter); 568 disk_part_iter_exit(&piter);
564 569
570 return 0;
571}
572
573int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
574{
575 struct parsed_partitions *state = NULL;
576 struct hd_struct *part;
577 int p, highest, res;
578rescan:
579 if (state && !IS_ERR(state)) {
580 kfree(state);
581 state = NULL;
582 }
583
584 res = drop_partitions(disk, bdev);
585 if (res)
586 return res;
587
565 if (disk->fops->revalidate_disk) 588 if (disk->fops->revalidate_disk)
566 disk->fops->revalidate_disk(disk); 589 disk->fops->revalidate_disk(disk);
567 check_disk_size_change(disk, bdev); 590 check_disk_size_change(disk, bdev);
@@ -665,6 +688,26 @@ rescan:
665 return 0; 688 return 0;
666} 689}
667 690
691int invalidate_partitions(struct gendisk *disk, struct block_device *bdev)
692{
693 int res;
694
695 if (!bdev->bd_invalidated)
696 return 0;
697
698 res = drop_partitions(disk, bdev);
699 if (res)
700 return res;
701
702 set_capacity(disk, 0);
703 check_disk_size_change(disk, bdev);
704 bdev->bd_invalidated = 0;
705 /* tell userspace that the media / partition table may have changed */
706 kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
707
708 return 0;
709}
710
668unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p) 711unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
669{ 712{
670 struct address_space *mapping = bdev->bd_inode->i_mapping; 713 struct address_space *mapping = bdev->bd_inode->i_mapping;
diff --git a/fs/pipe.c b/fs/pipe.c
index da42f7db50d..0499a96287a 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -345,6 +345,16 @@ static const struct pipe_buf_operations anon_pipe_buf_ops = {
345 .get = generic_pipe_buf_get, 345 .get = generic_pipe_buf_get,
346}; 346};
347 347
348static const struct pipe_buf_operations packet_pipe_buf_ops = {
349 .can_merge = 0,
350 .map = generic_pipe_buf_map,
351 .unmap = generic_pipe_buf_unmap,
352 .confirm = generic_pipe_buf_confirm,
353 .release = anon_pipe_buf_release,
354 .steal = generic_pipe_buf_steal,
355 .get = generic_pipe_buf_get,
356};
357
348static ssize_t 358static ssize_t
349pipe_read(struct kiocb *iocb, const struct iovec *_iov, 359pipe_read(struct kiocb *iocb, const struct iovec *_iov,
350 unsigned long nr_segs, loff_t pos) 360 unsigned long nr_segs, loff_t pos)
@@ -406,6 +416,13 @@ redo:
406 ret += chars; 416 ret += chars;
407 buf->offset += chars; 417 buf->offset += chars;
408 buf->len -= chars; 418 buf->len -= chars;
419
420 /* Was it a packet buffer? Clean up and exit */
421 if (buf->flags & PIPE_BUF_FLAG_PACKET) {
422 total_len = chars;
423 buf->len = 0;
424 }
425
409 if (!buf->len) { 426 if (!buf->len) {
410 buf->ops = NULL; 427 buf->ops = NULL;
411 ops->release(pipe, buf); 428 ops->release(pipe, buf);
@@ -458,6 +475,11 @@ redo:
458 return ret; 475 return ret;
459} 476}
460 477
478static inline int is_packetized(struct file *file)
479{
480 return (file->f_flags & O_DIRECT) != 0;
481}
482
461static ssize_t 483static ssize_t
462pipe_write(struct kiocb *iocb, const struct iovec *_iov, 484pipe_write(struct kiocb *iocb, const struct iovec *_iov,
463 unsigned long nr_segs, loff_t ppos) 485 unsigned long nr_segs, loff_t ppos)
@@ -592,6 +614,11 @@ redo2:
592 buf->ops = &anon_pipe_buf_ops; 614 buf->ops = &anon_pipe_buf_ops;
593 buf->offset = 0; 615 buf->offset = 0;
594 buf->len = chars; 616 buf->len = chars;
617 buf->flags = 0;
618 if (is_packetized(filp)) {
619 buf->ops = &packet_pipe_buf_ops;
620 buf->flags = PIPE_BUF_FLAG_PACKET;
621 }
595 pipe->nrbufs = ++bufs; 622 pipe->nrbufs = ++bufs;
596 pipe->tmp_page = NULL; 623 pipe->tmp_page = NULL;
597 624
@@ -1012,7 +1039,7 @@ struct file *create_write_pipe(int flags)
1012 goto err_dentry; 1039 goto err_dentry;
1013 f->f_mapping = inode->i_mapping; 1040 f->f_mapping = inode->i_mapping;
1014 1041
1015 f->f_flags = O_WRONLY | (flags & O_NONBLOCK); 1042 f->f_flags = O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT));
1016 f->f_version = 0; 1043 f->f_version = 0;
1017 1044
1018 return f; 1045 return f;
@@ -1056,7 +1083,7 @@ int do_pipe_flags(int *fd, int flags)
1056 int error; 1083 int error;
1057 int fdw, fdr; 1084 int fdw, fdr;
1058 1085
1059 if (flags & ~(O_CLOEXEC | O_NONBLOCK)) 1086 if (flags & ~(O_CLOEXEC | O_NONBLOCK | O_DIRECT))
1060 return -EINVAL; 1087 return -EINVAL;
1061 1088
1062 fw = create_write_pipe(flags); 1089 fw = create_write_pipe(flags);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index fc5bc276769..600faf5ba2f 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -133,6 +133,12 @@ struct pid_entry {
133 NULL, &proc_single_file_operations, \ 133 NULL, &proc_single_file_operations, \
134 { .proc_show = show } ) 134 { .proc_show = show } )
135 135
136/* ANDROID is for special files in /proc. */
137#define ANDROID(NAME, MODE, OTYPE) \
138 NOD(NAME, (S_IFREG|(MODE)), \
139 &proc_##OTYPE##_inode_operations, \
140 &proc_##OTYPE##_operations, {})
141
136/* 142/*
137 * Count the number of hardlinks for the pid_entry table, excluding the . 143 * Count the number of hardlinks for the pid_entry table, excluding the .
138 * and .. links. 144 * and .. links.
@@ -194,65 +200,7 @@ static int proc_root_link(struct inode *inode, struct path *path)
194 return result; 200 return result;
195} 201}
196 202
197static struct mm_struct *__check_mem_permission(struct task_struct *task) 203static struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
198{
199 struct mm_struct *mm;
200
201 mm = get_task_mm(task);
202 if (!mm)
203 return ERR_PTR(-EINVAL);
204
205 /*
206 * A task can always look at itself, in case it chooses
207 * to use system calls instead of load instructions.
208 */
209 if (task == current)
210 return mm;
211
212 /*
213 * If current is actively ptrace'ing, and would also be
214 * permitted to freshly attach with ptrace now, permit it.
215 */
216 if (task_is_stopped_or_traced(task)) {
217 int match;
218 rcu_read_lock();
219 match = (tracehook_tracer_task(task) == current);
220 rcu_read_unlock();
221 if (match && ptrace_may_access(task, PTRACE_MODE_ATTACH))
222 return mm;
223 }
224
225 /*
226 * No one else is allowed.
227 */
228 mmput(mm);
229 return ERR_PTR(-EPERM);
230}
231
232/*
233 * If current may access user memory in @task return a reference to the
234 * corresponding mm, otherwise ERR_PTR.
235 */
236static struct mm_struct *check_mem_permission(struct task_struct *task)
237{
238 struct mm_struct *mm;
239 int err;
240
241 /*
242 * Avoid racing if task exec's as we might get a new mm but validate
243 * against old credentials.
244 */
245 err = mutex_lock_killable(&task->signal->cred_guard_mutex);
246 if (err)
247 return ERR_PTR(err);
248
249 mm = __check_mem_permission(task);
250 mutex_unlock(&task->signal->cred_guard_mutex);
251
252 return mm;
253}
254
255struct mm_struct *mm_for_maps(struct task_struct *task)
256{ 204{
257 struct mm_struct *mm; 205 struct mm_struct *mm;
258 int err; 206 int err;
@@ -263,7 +211,8 @@ struct mm_struct *mm_for_maps(struct task_struct *task)
263 211
264 mm = get_task_mm(task); 212 mm = get_task_mm(task);
265 if (mm && mm != current->mm && 213 if (mm && mm != current->mm &&
266 !ptrace_may_access(task, PTRACE_MODE_READ)) { 214 !ptrace_may_access(task, PTRACE_MODE_READ) &&
215 !capable(CAP_SYS_RESOURCE)) {
267 mmput(mm); 216 mmput(mm);
268 mm = ERR_PTR(-EACCES); 217 mm = ERR_PTR(-EACCES);
269 } 218 }
@@ -272,6 +221,11 @@ struct mm_struct *mm_for_maps(struct task_struct *task)
272 return mm; 221 return mm;
273} 222}
274 223
224struct mm_struct *mm_for_maps(struct task_struct *task)
225{
226 return mm_access(task, PTRACE_MODE_READ);
227}
228
275static int proc_pid_cmdline(struct task_struct *task, char * buffer) 229static int proc_pid_cmdline(struct task_struct *task, char * buffer)
276{ 230{
277 int res = 0; 231 int res = 0;
@@ -816,38 +770,46 @@ static const struct file_operations proc_single_file_operations = {
816 770
817static int mem_open(struct inode* inode, struct file* file) 771static int mem_open(struct inode* inode, struct file* file)
818{ 772{
819 file->private_data = (void*)((long)current->self_exec_id); 773 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
774 struct mm_struct *mm;
775
776 if (!task)
777 return -ESRCH;
778
779 mm = mm_access(task, PTRACE_MODE_ATTACH);
780 put_task_struct(task);
781
782 if (IS_ERR(mm))
783 return PTR_ERR(mm);
784
785 if (mm) {
786 /* ensure this mm_struct can't be freed */
787 atomic_inc(&mm->mm_count);
788 /* but do not pin its memory */
789 mmput(mm);
790 }
791
820 /* OK to pass negative loff_t, we can catch out-of-range */ 792 /* OK to pass negative loff_t, we can catch out-of-range */
821 file->f_mode |= FMODE_UNSIGNED_OFFSET; 793 file->f_mode |= FMODE_UNSIGNED_OFFSET;
794 file->private_data = mm;
795
822 return 0; 796 return 0;
823} 797}
824 798
825static ssize_t mem_read(struct file * file, char __user * buf, 799static ssize_t mem_read(struct file * file, char __user * buf,
826 size_t count, loff_t *ppos) 800 size_t count, loff_t *ppos)
827{ 801{
828 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); 802 int ret;
829 char *page; 803 char *page;
830 unsigned long src = *ppos; 804 unsigned long src = *ppos;
831 int ret = -ESRCH; 805 struct mm_struct *mm = file->private_data;
832 struct mm_struct *mm;
833 806
834 if (!task) 807 if (!mm)
835 goto out_no_task; 808 return 0;
836 809
837 ret = -ENOMEM;
838 page = (char *)__get_free_page(GFP_TEMPORARY); 810 page = (char *)__get_free_page(GFP_TEMPORARY);
839 if (!page) 811 if (!page)
840 goto out; 812 return -ENOMEM;
841
842 mm = check_mem_permission(task);
843 ret = PTR_ERR(mm);
844 if (IS_ERR(mm))
845 goto out_free;
846
847 ret = -EIO;
848
849 if (file->private_data != (void*)((long)current->self_exec_id))
850 goto out_put;
851 813
852 ret = 0; 814 ret = 0;
853 815
@@ -874,42 +836,28 @@ static ssize_t mem_read(struct file * file, char __user * buf,
874 } 836 }
875 *ppos = src; 837 *ppos = src;
876 838
877out_put:
878 mmput(mm);
879out_free:
880 free_page((unsigned long) page); 839 free_page((unsigned long) page);
881out:
882 put_task_struct(task);
883out_no_task:
884 return ret; 840 return ret;
885} 841}
886 842
843#define mem_write NULL
844
845#ifndef mem_write
846/* This is a security hazard */
887static ssize_t mem_write(struct file * file, const char __user *buf, 847static ssize_t mem_write(struct file * file, const char __user *buf,
888 size_t count, loff_t *ppos) 848 size_t count, loff_t *ppos)
889{ 849{
890 int copied; 850 int copied;
891 char *page; 851 char *page;
892 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
893 unsigned long dst = *ppos; 852 unsigned long dst = *ppos;
894 struct mm_struct *mm; 853 struct mm_struct *mm = file->private_data;
895 854
896 copied = -ESRCH; 855 if (!mm)
897 if (!task) 856 return 0;
898 goto out_no_task;
899 857
900 copied = -ENOMEM;
901 page = (char *)__get_free_page(GFP_TEMPORARY); 858 page = (char *)__get_free_page(GFP_TEMPORARY);
902 if (!page) 859 if (!page)
903 goto out_task; 860 return -ENOMEM;
904
905 mm = check_mem_permission(task);
906 copied = PTR_ERR(mm);
907 if (IS_ERR(mm))
908 goto out_free;
909
910 copied = -EIO;
911 if (file->private_data != (void *)((long)current->self_exec_id))
912 goto out_mm;
913 861
914 copied = 0; 862 copied = 0;
915 while (count > 0) { 863 while (count > 0) {
@@ -933,15 +881,10 @@ static ssize_t mem_write(struct file * file, const char __user *buf,
933 } 881 }
934 *ppos = dst; 882 *ppos = dst;
935 883
936out_mm:
937 mmput(mm);
938out_free:
939 free_page((unsigned long) page); 884 free_page((unsigned long) page);
940out_task:
941 put_task_struct(task);
942out_no_task:
943 return copied; 885 return copied;
944} 886}
887#endif
945 888
946loff_t mem_lseek(struct file *file, loff_t offset, int orig) 889loff_t mem_lseek(struct file *file, loff_t offset, int orig)
947{ 890{
@@ -959,11 +902,20 @@ loff_t mem_lseek(struct file *file, loff_t offset, int orig)
959 return file->f_pos; 902 return file->f_pos;
960} 903}
961 904
905static int mem_release(struct inode *inode, struct file *file)
906{
907 struct mm_struct *mm = file->private_data;
908 if (mm)
909 mmdrop(mm);
910 return 0;
911}
912
962static const struct file_operations proc_mem_operations = { 913static const struct file_operations proc_mem_operations = {
963 .llseek = mem_lseek, 914 .llseek = mem_lseek,
964 .read = mem_read, 915 .read = mem_read,
965 .write = mem_write, 916 .write = mem_write,
966 .open = mem_open, 917 .open = mem_open,
918 .release = mem_release,
967}; 919};
968 920
969static ssize_t environ_read(struct file *file, char __user *buf, 921static ssize_t environ_read(struct file *file, char __user *buf,
@@ -1141,6 +1093,39 @@ out:
1141 return err < 0 ? err : count; 1093 return err < 0 ? err : count;
1142} 1094}
1143 1095
1096static int oom_adjust_permission(struct inode *inode, int mask,
1097 unsigned int flags)
1098{
1099 uid_t uid;
1100 struct task_struct *p;
1101
1102 if (flags & IPERM_FLAG_RCU)
1103 return -ECHILD;
1104
1105 p = get_proc_task(inode);
1106 if(p) {
1107 uid = task_uid(p);
1108 put_task_struct(p);
1109 }
1110
1111 /*
1112 * System Server (uid == 1000) is granted access to oom_adj of all
1113 * android applications (uid > 10000) as and services (uid >= 1000)
1114 */
1115 if (p && (current_fsuid() == 1000) && (uid >= 1000)) {
1116 if (inode->i_mode >> 6 & mask) {
1117 return 0;
1118 }
1119 }
1120
1121 /* Fall back to default. */
1122 return generic_permission(inode, mask, flags, NULL);
1123}
1124
1125static const struct inode_operations proc_oom_adjust_inode_operations = {
1126 .permission = oom_adjust_permission,
1127};
1128
1144static const struct file_operations proc_oom_adjust_operations = { 1129static const struct file_operations proc_oom_adjust_operations = {
1145 .read = oom_adjust_read, 1130 .read = oom_adjust_read,
1146 .write = oom_adjust_write, 1131 .write = oom_adjust_write,
@@ -1920,6 +1905,14 @@ static int proc_fd_info(struct inode *inode, struct path *path, char *info)
1920 spin_lock(&files->file_lock); 1905 spin_lock(&files->file_lock);
1921 file = fcheck_files(files, fd); 1906 file = fcheck_files(files, fd);
1922 if (file) { 1907 if (file) {
1908 unsigned int f_flags;
1909 struct fdtable *fdt;
1910
1911 fdt = files_fdtable(files);
1912 f_flags = file->f_flags & ~O_CLOEXEC;
1913 if (FD_ISSET(fd, fdt->close_on_exec))
1914 f_flags |= O_CLOEXEC;
1915
1923 if (path) { 1916 if (path) {
1924 *path = file->f_path; 1917 *path = file->f_path;
1925 path_get(&file->f_path); 1918 path_get(&file->f_path);
@@ -1929,7 +1922,7 @@ static int proc_fd_info(struct inode *inode, struct path *path, char *info)
1929 "pos:\t%lli\n" 1922 "pos:\t%lli\n"
1930 "flags:\t0%o\n", 1923 "flags:\t0%o\n",
1931 (long long) file->f_pos, 1924 (long long) file->f_pos,
1932 file->f_flags); 1925 f_flags);
1933 spin_unlock(&files->file_lock); 1926 spin_unlock(&files->file_lock);
1934 put_files_struct(files); 1927 put_files_struct(files);
1935 return 0; 1928 return 0;
@@ -2707,9 +2700,16 @@ static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
2707{ 2700{
2708 struct task_io_accounting acct = task->ioac; 2701 struct task_io_accounting acct = task->ioac;
2709 unsigned long flags; 2702 unsigned long flags;
2703 int result;
2710 2704
2711 if (!ptrace_may_access(task, PTRACE_MODE_READ)) 2705 result = mutex_lock_killable(&task->signal->cred_guard_mutex);
2712 return -EACCES; 2706 if (result)
2707 return result;
2708
2709 if (!ptrace_may_access(task, PTRACE_MODE_READ)) {
2710 result = -EACCES;
2711 goto out_unlock;
2712 }
2713 2713
2714 if (whole && lock_task_sighand(task, &flags)) { 2714 if (whole && lock_task_sighand(task, &flags)) {
2715 struct task_struct *t = task; 2715 struct task_struct *t = task;
@@ -2720,7 +2720,7 @@ static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
2720 2720
2721 unlock_task_sighand(task, &flags); 2721 unlock_task_sighand(task, &flags);
2722 } 2722 }
2723 return sprintf(buffer, 2723 result = sprintf(buffer,
2724 "rchar: %llu\n" 2724 "rchar: %llu\n"
2725 "wchar: %llu\n" 2725 "wchar: %llu\n"
2726 "syscr: %llu\n" 2726 "syscr: %llu\n"
@@ -2735,6 +2735,9 @@ static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
2735 (unsigned long long)acct.read_bytes, 2735 (unsigned long long)acct.read_bytes,
2736 (unsigned long long)acct.write_bytes, 2736 (unsigned long long)acct.write_bytes,
2737 (unsigned long long)acct.cancelled_write_bytes); 2737 (unsigned long long)acct.cancelled_write_bytes);
2738out_unlock:
2739 mutex_unlock(&task->signal->cred_guard_mutex);
2740 return result;
2738} 2741}
2739 2742
2740static int proc_tid_io_accounting(struct task_struct *task, char *buffer) 2743static int proc_tid_io_accounting(struct task_struct *task, char *buffer)
@@ -2829,7 +2832,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2829 REG("cgroup", S_IRUGO, proc_cgroup_operations), 2832 REG("cgroup", S_IRUGO, proc_cgroup_operations),
2830#endif 2833#endif
2831 INF("oom_score", S_IRUGO, proc_oom_score), 2834 INF("oom_score", S_IRUGO, proc_oom_score),
2832 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adjust_operations), 2835 ANDROID("oom_adj",S_IRUGO|S_IWUSR, oom_adjust),
2833 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), 2836 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
2834#ifdef CONFIG_AUDITSYSCALL 2837#ifdef CONFIG_AUDITSYSCALL
2835 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), 2838 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index ed257d14156..a96282781f9 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -131,12 +131,13 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
131 K(i.freeswap), 131 K(i.freeswap),
132 K(global_page_state(NR_FILE_DIRTY)), 132 K(global_page_state(NR_FILE_DIRTY)),
133 K(global_page_state(NR_WRITEBACK)), 133 K(global_page_state(NR_WRITEBACK)),
134 K(global_page_state(NR_ANON_PAGES)
135#ifdef CONFIG_TRANSPARENT_HUGEPAGE 134#ifdef CONFIG_TRANSPARENT_HUGEPAGE
135 K(global_page_state(NR_ANON_PAGES)
136 + global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) * 136 + global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) *
137 HPAGE_PMD_NR 137 HPAGE_PMD_NR),
138#else
139 K(global_page_state(NR_ANON_PAGES)),
138#endif 140#endif
139 ),
140 K(global_page_state(NR_FILE_MAPPED)), 141 K(global_page_state(NR_FILE_MAPPED)),
141 K(global_page_state(NR_SHMEM)), 142 K(global_page_state(NR_SHMEM)),
142 K(global_page_state(NR_SLAB_RECLAIMABLE) + 143 K(global_page_state(NR_SLAB_RECLAIMABLE) +
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index be177f702ac..d6c078ea148 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -54,7 +54,7 @@ static struct dentry *proc_ns_instantiate(struct inode *dir,
54 ei->ns_ops = ns_ops; 54 ei->ns_ops = ns_ops;
55 ei->ns = ns; 55 ei->ns = ns;
56 56
57 dentry->d_op = &pid_dentry_operations; 57 d_set_d_op(dentry, &pid_dentry_operations);
58 d_add(dentry, inode); 58 d_add(dentry, inode);
59 /* Close the race of the process dying before we return the dentry */ 59 /* Close the race of the process dying before we return the dentry */
60 if (pid_revalidate(dentry, NULL)) 60 if (pid_revalidate(dentry, NULL))
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 25b6a887adb..55a1f494711 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -407,6 +407,9 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
407 } else { 407 } else {
408 spin_unlock(&walk->mm->page_table_lock); 408 spin_unlock(&walk->mm->page_table_lock);
409 } 409 }
410
411 if (pmd_trans_unstable(pmd))
412 return 0;
410 /* 413 /*
411 * The mmap_sem held all the way back in m_start() is what 414 * The mmap_sem held all the way back in m_start() is what
412 * keeps khugepaged out of here and from collapsing things 415 * keeps khugepaged out of here and from collapsing things
@@ -505,6 +508,8 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
505 struct page *page; 508 struct page *page;
506 509
507 split_huge_page_pmd(walk->mm, pmd); 510 split_huge_page_pmd(walk->mm, pmd);
511 if (pmd_trans_unstable(pmd))
512 return 0;
508 513
509 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 514 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
510 for (; addr != end; pte++, addr += PAGE_SIZE) { 515 for (; addr != end; pte++, addr += PAGE_SIZE) {
@@ -516,6 +521,9 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
516 if (!page) 521 if (!page)
517 continue; 522 continue;
518 523
524 if (PageReserved(page))
525 continue;
526
519 /* Clear accessed and referenced bits. */ 527 /* Clear accessed and referenced bits. */
520 ptep_test_and_clear_young(vma, addr, pte); 528 ptep_test_and_clear_young(vma, addr, pte);
521 ClearPageReferenced(page); 529 ClearPageReferenced(page);
@@ -665,6 +673,8 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
665 int err = 0; 673 int err = 0;
666 674
667 split_huge_page_pmd(walk->mm, pmd); 675 split_huge_page_pmd(walk->mm, pmd);
676 if (pmd_trans_unstable(pmd))
677 return 0;
668 678
669 /* find the first VMA at or above 'addr' */ 679 /* find the first VMA at or above 'addr' */
670 vma = find_vma(walk->mm, addr); 680 vma = find_vma(walk->mm, addr);
@@ -877,30 +887,54 @@ struct numa_maps_private {
877 struct numa_maps md; 887 struct numa_maps md;
878}; 888};
879 889
880static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty) 890static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty,
891 unsigned long nr_pages)
881{ 892{
882 int count = page_mapcount(page); 893 int count = page_mapcount(page);
883 894
884 md->pages++; 895 md->pages += nr_pages;
885 if (pte_dirty || PageDirty(page)) 896 if (pte_dirty || PageDirty(page))
886 md->dirty++; 897 md->dirty += nr_pages;
887 898
888 if (PageSwapCache(page)) 899 if (PageSwapCache(page))
889 md->swapcache++; 900 md->swapcache += nr_pages;
890 901
891 if (PageActive(page) || PageUnevictable(page)) 902 if (PageActive(page) || PageUnevictable(page))
892 md->active++; 903 md->active += nr_pages;
893 904
894 if (PageWriteback(page)) 905 if (PageWriteback(page))
895 md->writeback++; 906 md->writeback += nr_pages;
896 907
897 if (PageAnon(page)) 908 if (PageAnon(page))
898 md->anon++; 909 md->anon += nr_pages;
899 910
900 if (count > md->mapcount_max) 911 if (count > md->mapcount_max)
901 md->mapcount_max = count; 912 md->mapcount_max = count;
902 913
903 md->node[page_to_nid(page)]++; 914 md->node[page_to_nid(page)] += nr_pages;
915}
916
917static struct page *can_gather_numa_stats(pte_t pte, struct vm_area_struct *vma,
918 unsigned long addr)
919{
920 struct page *page;
921 int nid;
922
923 if (!pte_present(pte))
924 return NULL;
925
926 page = vm_normal_page(vma, addr, pte);
927 if (!page)
928 return NULL;
929
930 if (PageReserved(page))
931 return NULL;
932
933 nid = page_to_nid(page);
934 if (!node_isset(nid, node_states[N_HIGH_MEMORY]))
935 return NULL;
936
937 return page;
904} 938}
905 939
906static int gather_pte_stats(pmd_t *pmd, unsigned long addr, 940static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
@@ -912,26 +946,34 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
912 pte_t *pte; 946 pte_t *pte;
913 947
914 md = walk->private; 948 md = walk->private;
915 orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); 949 spin_lock(&walk->mm->page_table_lock);
916 do { 950 if (pmd_trans_huge(*pmd)) {
917 struct page *page; 951 if (pmd_trans_splitting(*pmd)) {
918 int nid; 952 spin_unlock(&walk->mm->page_table_lock);
953 wait_split_huge_page(md->vma->anon_vma, pmd);
954 } else {
955 pte_t huge_pte = *(pte_t *)pmd;
956 struct page *page;
919 957
920 if (!pte_present(*pte)) 958 page = can_gather_numa_stats(huge_pte, md->vma, addr);
921 continue; 959 if (page)
960 gather_stats(page, md, pte_dirty(huge_pte),
961 HPAGE_PMD_SIZE/PAGE_SIZE);
962 spin_unlock(&walk->mm->page_table_lock);
963 return 0;
964 }
965 } else {
966 spin_unlock(&walk->mm->page_table_lock);
967 }
922 968
923 page = vm_normal_page(md->vma, addr, *pte); 969 if (pmd_trans_unstable(pmd))
970 return 0;
971 orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
972 do {
973 struct page *page = can_gather_numa_stats(*pte, md->vma, addr);
924 if (!page) 974 if (!page)
925 continue; 975 continue;
926 976 gather_stats(page, md, pte_dirty(*pte), 1);
927 if (PageReserved(page))
928 continue;
929
930 nid = page_to_nid(page);
931 if (!node_isset(nid, node_states[N_HIGH_MEMORY]))
932 continue;
933
934 gather_stats(page, md, pte_dirty(*pte));
935 977
936 } while (pte++, addr += PAGE_SIZE, addr != end); 978 } while (pte++, addr += PAGE_SIZE, addr != end);
937 pte_unmap_unlock(orig_pte, ptl); 979 pte_unmap_unlock(orig_pte, ptl);
@@ -952,7 +994,7 @@ static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask,
952 return 0; 994 return 0;
953 995
954 md = walk->private; 996 md = walk->private;
955 gather_stats(page, md, pte_dirty(*pte)); 997 gather_stats(page, md, pte_dirty(*pte), 1);
956 return 0; 998 return 0;
957} 999}
958 1000
@@ -1009,6 +1051,9 @@ static int show_numa_map(struct seq_file *m, void *v)
1009 seq_printf(m, " stack"); 1051 seq_printf(m, " stack");
1010 } 1052 }
1011 1053
1054 if (is_vm_hugetlb_page(vma))
1055 seq_printf(m, " huge");
1056
1012 walk_page_range(vma->vm_start, vma->vm_end, &walk); 1057 walk_page_range(vma->vm_start, vma->vm_end, &walk);
1013 1058
1014 if (!md->pages) 1059 if (!md->pages)
diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c
index 766b1d45605..29166ecd03a 100644
--- a/fs/proc/uptime.c
+++ b/fs/proc/uptime.c
@@ -11,15 +11,20 @@ static int uptime_proc_show(struct seq_file *m, void *v)
11{ 11{
12 struct timespec uptime; 12 struct timespec uptime;
13 struct timespec idle; 13 struct timespec idle;
14 cputime64_t idletime;
15 u64 nsec;
16 u32 rem;
14 int i; 17 int i;
15 cputime_t idletime = cputime_zero;
16 18
19 idletime = 0;
17 for_each_possible_cpu(i) 20 for_each_possible_cpu(i)
18 idletime = cputime64_add(idletime, kstat_cpu(i).cpustat.idle); 21 idletime = cputime64_add(idletime, kstat_cpu(i).cpustat.idle);
19 22
20 do_posix_clock_monotonic_gettime(&uptime); 23 do_posix_clock_monotonic_gettime(&uptime);
21 monotonic_to_bootbased(&uptime); 24 monotonic_to_bootbased(&uptime);
22 cputime_to_timespec(idletime, &idle); 25 nsec = cputime64_to_jiffies64(idletime) * TICK_NSEC;
26 idle.tv_sec = div_u64_rem(nsec, NSEC_PER_SEC, &rem);
27 idle.tv_nsec = rem;
23 seq_printf(m, "%lu.%02lu %lu.%02lu\n", 28 seq_printf(m, "%lu.%02lu %lu.%02lu\n",
24 (unsigned long) uptime.tv_sec, 29 (unsigned long) uptime.tv_sec,
25 (uptime.tv_nsec / (NSEC_PER_SEC / 100)), 30 (uptime.tv_nsec / (NSEC_PER_SEC / 100)),
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index b34bdb25490..4bae57fc603 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -286,7 +286,7 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
286 /* caller already holds s_umount */ 286 /* caller already holds s_umount */
287 if (sb->s_flags & MS_RDONLY) 287 if (sb->s_flags & MS_RDONLY)
288 return -EROFS; 288 return -EROFS;
289 writeback_inodes_sb(sb); 289 writeback_inodes_sb(sb, WB_REASON_SYNC);
290 return 0; 290 return 0;
291 default: 291 default:
292 return -EINVAL; 292 return -EINVAL;
@@ -355,7 +355,7 @@ SYSCALL_DEFINE4(quotactl, unsigned int, cmd, const char __user *, special,
355 * resolution (think about autofs) and thus deadlocks could arise. 355 * resolution (think about autofs) and thus deadlocks could arise.
356 */ 356 */
357 if (cmds == Q_QUOTAON) { 357 if (cmds == Q_QUOTAON) {
358 ret = user_path_at(AT_FDCWD, addr, LOOKUP_FOLLOW, &path); 358 ret = user_path_at(AT_FDCWD, addr, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path);
359 if (ret) 359 if (ret)
360 pathp = ERR_PTR(ret); 360 pathp = ERR_PTR(ret);
361 else 361 else
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index fbb0b478a34..d5378d02858 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -110,6 +110,7 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
110 110
111 /* prevent the page from being discarded on memory pressure */ 111 /* prevent the page from being discarded on memory pressure */
112 SetPageDirty(page); 112 SetPageDirty(page);
113 SetPageUptodate(page);
113 114
114 unlock_page(page); 115 unlock_page(page);
115 put_page(page); 116 put_page(page);
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index aa91089162c..f19dfbf6000 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -453,16 +453,20 @@ int remove_save_link(struct inode *inode, int truncate)
453static void reiserfs_kill_sb(struct super_block *s) 453static void reiserfs_kill_sb(struct super_block *s)
454{ 454{
455 if (REISERFS_SB(s)) { 455 if (REISERFS_SB(s)) {
456 if (REISERFS_SB(s)->xattr_root) { 456 /*
457 d_invalidate(REISERFS_SB(s)->xattr_root); 457 * Force any pending inode evictions to occur now. Any
458 dput(REISERFS_SB(s)->xattr_root); 458 * inodes to be removed that have extended attributes
459 REISERFS_SB(s)->xattr_root = NULL; 459 * associated with them need to clean them up before
460 } 460 * we can release the extended attribute root dentries.
461 if (REISERFS_SB(s)->priv_root) { 461 * shrink_dcache_for_umount will BUG if we don't release
462 d_invalidate(REISERFS_SB(s)->priv_root); 462 * those before it's called so ->put_super is too late.
463 dput(REISERFS_SB(s)->priv_root); 463 */
464 REISERFS_SB(s)->priv_root = NULL; 464 shrink_dcache_sb(s);
465 } 465
466 dput(REISERFS_SB(s)->xattr_root);
467 REISERFS_SB(s)->xattr_root = NULL;
468 dput(REISERFS_SB(s)->priv_root);
469 REISERFS_SB(s)->priv_root = NULL;
466 } 470 }
467 471
468 kill_block_super(s); 472 kill_block_super(s);
@@ -1164,7 +1168,8 @@ static void handle_quota_files(struct super_block *s, char **qf_names,
1164 kfree(REISERFS_SB(s)->s_qf_names[i]); 1168 kfree(REISERFS_SB(s)->s_qf_names[i]);
1165 REISERFS_SB(s)->s_qf_names[i] = qf_names[i]; 1169 REISERFS_SB(s)->s_qf_names[i] = qf_names[i];
1166 } 1170 }
1167 REISERFS_SB(s)->s_jquota_fmt = *qfmt; 1171 if (*qfmt)
1172 REISERFS_SB(s)->s_jquota_fmt = *qfmt;
1168} 1173}
1169#endif 1174#endif
1170 1175
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 05d6b0e78c9..dba43c3ea3a 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -449,8 +449,6 @@ EXPORT_SYMBOL(seq_path);
449 449
450/* 450/*
451 * Same as seq_path, but relative to supplied root. 451 * Same as seq_path, but relative to supplied root.
452 *
453 * root may be changed, see __d_path().
454 */ 452 */
455int seq_path_root(struct seq_file *m, struct path *path, struct path *root, 453int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
456 char *esc) 454 char *esc)
@@ -463,6 +461,8 @@ int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
463 char *p; 461 char *p;
464 462
465 p = __d_path(path, root, buf, size); 463 p = __d_path(path, root, buf, size);
464 if (!p)
465 return SEQ_SKIP;
466 res = PTR_ERR(p); 466 res = PTR_ERR(p);
467 if (!IS_ERR(p)) { 467 if (!IS_ERR(p)) {
468 char *end = mangle_path(buf, p, esc); 468 char *end = mangle_path(buf, p, esc);
@@ -474,7 +474,7 @@ int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
474 } 474 }
475 seq_commit(m, res); 475 seq_commit(m, res);
476 476
477 return res < 0 ? res : 0; 477 return res < 0 && res != -ENAMETOOLONG ? res : 0;
478} 478}
479 479
480/* 480/*
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 492465b451d..7ae2a574cb2 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -30,6 +30,21 @@
30#include <linux/signalfd.h> 30#include <linux/signalfd.h>
31#include <linux/syscalls.h> 31#include <linux/syscalls.h>
32 32
33void signalfd_cleanup(struct sighand_struct *sighand)
34{
35 wait_queue_head_t *wqh = &sighand->signalfd_wqh;
36 /*
37 * The lockless check can race with remove_wait_queue() in progress,
38 * but in this case its caller should run under rcu_read_lock() and
39 * sighand_cachep is SLAB_DESTROY_BY_RCU, we can safely return.
40 */
41 if (likely(!waitqueue_active(wqh)))
42 return;
43
44 /* wait_queue_t->func(POLLFREE) should do remove_wait_queue() */
45 wake_up_poll(wqh, POLLHUP | POLLFREE);
46}
47
33struct signalfd_ctx { 48struct signalfd_ctx {
34 sigset_t sigmask; 49 sigset_t sigmask;
35}; 50};
diff --git a/fs/splice.c b/fs/splice.c
index aa866d30969..6d0dfb89c75 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -31,6 +31,7 @@
31#include <linux/uio.h> 31#include <linux/uio.h>
32#include <linux/security.h> 32#include <linux/security.h>
33#include <linux/gfp.h> 33#include <linux/gfp.h>
34#include <linux/socket.h>
34 35
35/* 36/*
36 * Attempt to steal a page from a pipe buffer. This should perhaps go into 37 * Attempt to steal a page from a pipe buffer. This should perhaps go into
@@ -132,7 +133,7 @@ error:
132 return err; 133 return err;
133} 134}
134 135
135static const struct pipe_buf_operations page_cache_pipe_buf_ops = { 136const struct pipe_buf_operations page_cache_pipe_buf_ops = {
136 .can_merge = 0, 137 .can_merge = 0,
137 .map = generic_pipe_buf_map, 138 .map = generic_pipe_buf_map,
138 .unmap = generic_pipe_buf_unmap, 139 .unmap = generic_pipe_buf_unmap,
@@ -264,7 +265,7 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
264 return ret; 265 return ret;
265} 266}
266 267
267static void spd_release_page(struct splice_pipe_desc *spd, unsigned int i) 268void spd_release_page(struct splice_pipe_desc *spd, unsigned int i)
268{ 269{
269 page_cache_release(spd->pages[i]); 270 page_cache_release(spd->pages[i]);
270} 271}
@@ -691,7 +692,9 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe,
691 if (!likely(file->f_op && file->f_op->sendpage)) 692 if (!likely(file->f_op && file->f_op->sendpage))
692 return -EINVAL; 693 return -EINVAL;
693 694
694 more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len; 695 more = (sd->flags & SPLICE_F_MORE) ? MSG_MORE : 0;
696 if (sd->len < sd->total_len)
697 more |= MSG_SENDPAGE_NOTLAST;
695 return file->f_op->sendpage(file, buf->page, buf->offset, 698 return file->f_op->sendpage(file, buf->page, buf->offset,
696 sd->len, &pos, more); 699 sd->len, &pos, more);
697} 700}
diff --git a/fs/stat.c b/fs/stat.c
index 961039121cb..02a606141b8 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -296,15 +296,16 @@ SYSCALL_DEFINE4(readlinkat, int, dfd, const char __user *, pathname,
296{ 296{
297 struct path path; 297 struct path path;
298 int error; 298 int error;
299 int empty = 0;
299 300
300 if (bufsiz <= 0) 301 if (bufsiz <= 0)
301 return -EINVAL; 302 return -EINVAL;
302 303
303 error = user_path_at(dfd, pathname, LOOKUP_EMPTY, &path); 304 error = user_path_at_empty(dfd, pathname, LOOKUP_EMPTY, &path, &empty);
304 if (!error) { 305 if (!error) {
305 struct inode *inode = path.dentry->d_inode; 306 struct inode *inode = path.dentry->d_inode;
306 307
307 error = -EINVAL; 308 error = empty ? -ENOENT : -EINVAL;
308 if (inode->i_op->readlink) { 309 if (inode->i_op->readlink) {
309 error = security_inode_readlink(path.dentry); 310 error = security_inode_readlink(path.dentry);
310 if (!error) { 311 if (!error) {
diff --git a/fs/statfs.c b/fs/statfs.c
index 8244924dec5..9cf04a11896 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -76,7 +76,7 @@ EXPORT_SYMBOL(vfs_statfs);
76int user_statfs(const char __user *pathname, struct kstatfs *st) 76int user_statfs(const char __user *pathname, struct kstatfs *st)
77{ 77{
78 struct path path; 78 struct path path;
79 int error = user_path(pathname, &path); 79 int error = user_path_at(AT_FDCWD, pathname, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path);
80 if (!error) { 80 if (!error) {
81 error = vfs_statfs(&path, st); 81 error = vfs_statfs(&path, st);
82 path_put(&path); 82 path_put(&path);
diff --git a/fs/super.c b/fs/super.c
index ab3d672db0d..e2cf58cad6a 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -241,17 +241,48 @@ static int grab_super(struct super_block *s) __releases(sb_lock)
241} 241}
242 242
243/* 243/*
244 * grab_super_passive - acquire a passive reference
245 * @s: reference we are trying to grab
246 *
247 * Tries to acquire a passive reference. This is used in places where we
248 * cannot take an active reference but we need to ensure that the
249 * superblock does not go away while we are working on it. It returns
250 * false if a reference was not gained, and returns true with the s_umount
251 * lock held in read mode if a reference is gained. On successful return,
252 * the caller must drop the s_umount lock and the passive reference when
253 * done.
254 */
255bool grab_super_passive(struct super_block *sb)
256{
257 spin_lock(&sb_lock);
258 if (list_empty(&sb->s_instances)) {
259 spin_unlock(&sb_lock);
260 return false;
261 }
262
263 sb->s_count++;
264 spin_unlock(&sb_lock);
265
266 if (down_read_trylock(&sb->s_umount)) {
267 if (sb->s_root)
268 return true;
269 up_read(&sb->s_umount);
270 }
271
272 put_super(sb);
273 return false;
274}
275
276/*
244 * Superblock locking. We really ought to get rid of these two. 277 * Superblock locking. We really ought to get rid of these two.
245 */ 278 */
246void lock_super(struct super_block * sb) 279void lock_super(struct super_block * sb)
247{ 280{
248 get_fs_excl();
249 mutex_lock(&sb->s_lock); 281 mutex_lock(&sb->s_lock);
250} 282}
251 283
252void unlock_super(struct super_block * sb) 284void unlock_super(struct super_block * sb)
253{ 285{
254 put_fs_excl();
255 mutex_unlock(&sb->s_lock); 286 mutex_unlock(&sb->s_lock);
256} 287}
257 288
@@ -822,7 +853,7 @@ struct dentry *mount_bdev(struct file_system_type *fs_type,
822 } else { 853 } else {
823 char b[BDEVNAME_SIZE]; 854 char b[BDEVNAME_SIZE];
824 855
825 s->s_flags = flags | MS_NOSEC; 856 s->s_flags = flags;
826 s->s_mode = mode; 857 s->s_mode = mode;
827 strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); 858 strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
828 sb_set_blocksize(s, block_size(bdev)); 859 sb_set_blocksize(s, block_size(bdev));
@@ -1009,6 +1040,8 @@ int freeze_super(struct super_block *sb)
1009 printk(KERN_ERR 1040 printk(KERN_ERR
1010 "VFS:Filesystem freeze failed\n"); 1041 "VFS:Filesystem freeze failed\n");
1011 sb->s_frozen = SB_UNFROZEN; 1042 sb->s_frozen = SB_UNFROZEN;
1043 smp_wmb();
1044 wake_up(&sb->s_wait_unfrozen);
1012 deactivate_locked_super(sb); 1045 deactivate_locked_super(sb);
1013 return ret; 1046 return ret;
1014 } 1047 }
diff --git a/fs/sync.c b/fs/sync.c
index c38ec163da6..2385e128a35 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -18,6 +18,9 @@
18#include <linux/backing-dev.h> 18#include <linux/backing-dev.h>
19#include "internal.h" 19#include "internal.h"
20 20
21bool fsync_enabled = true;
22module_param(fsync_enabled, bool, 0755);
23
21#define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \ 24#define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \
22 SYNC_FILE_RANGE_WAIT_AFTER) 25 SYNC_FILE_RANGE_WAIT_AFTER)
23 26
@@ -43,7 +46,7 @@ static int __sync_filesystem(struct super_block *sb, int wait)
43 if (wait) 46 if (wait)
44 sync_inodes_sb(sb); 47 sync_inodes_sb(sb);
45 else 48 else
46 writeback_inodes_sb(sb); 49 writeback_inodes_sb(sb, WB_REASON_SYNC);
47 50
48 if (sb->s_op->sync_fs) 51 if (sb->s_op->sync_fs)
49 sb->s_op->sync_fs(sb, wait); 52 sb->s_op->sync_fs(sb, wait);
@@ -98,7 +101,7 @@ static void sync_filesystems(int wait)
98 */ 101 */
99SYSCALL_DEFINE0(sync) 102SYSCALL_DEFINE0(sync)
100{ 103{
101 wakeup_flusher_threads(0); 104 wakeup_flusher_threads(0, WB_REASON_SYNC);
102 sync_filesystems(0); 105 sync_filesystems(0);
103 sync_filesystems(1); 106 sync_filesystems(1);
104 if (unlikely(laptop_mode)) 107 if (unlikely(laptop_mode))
@@ -139,6 +142,9 @@ SYSCALL_DEFINE1(syncfs, int, fd)
139 int ret; 142 int ret;
140 int fput_needed; 143 int fput_needed;
141 144
145 if (!fsync_enabled)
146 return 0;
147
142 file = fget_light(fd, &fput_needed); 148 file = fget_light(fd, &fput_needed);
143 if (!file) 149 if (!file)
144 return -EBADF; 150 return -EBADF;
@@ -168,6 +174,9 @@ int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync)
168 struct address_space *mapping = file->f_mapping; 174 struct address_space *mapping = file->f_mapping;
169 int err, ret; 175 int err, ret;
170 176
177 if (!fsync_enabled)
178 return 0;
179
171 if (!file->f_op || !file->f_op->fsync) { 180 if (!file->f_op || !file->f_op->fsync) {
172 ret = -EINVAL; 181 ret = -EINVAL;
173 goto out; 182 goto out;
@@ -200,6 +209,9 @@ EXPORT_SYMBOL(vfs_fsync_range);
200 */ 209 */
201int vfs_fsync(struct file *file, int datasync) 210int vfs_fsync(struct file *file, int datasync)
202{ 211{
212 if (!fsync_enabled)
213 return 0;
214
203 return vfs_fsync_range(file, 0, LLONG_MAX, datasync); 215 return vfs_fsync_range(file, 0, LLONG_MAX, datasync);
204} 216}
205EXPORT_SYMBOL(vfs_fsync); 217EXPORT_SYMBOL(vfs_fsync);
@@ -209,6 +221,9 @@ static int do_fsync(unsigned int fd, int datasync)
209 struct file *file; 221 struct file *file;
210 int ret = -EBADF; 222 int ret = -EBADF;
211 223
224 if (!fsync_enabled)
225 return 0;
226
212 file = fget(fd); 227 file = fget(fd);
213 if (file) { 228 if (file) {
214 ret = vfs_fsync(file, datasync); 229 ret = vfs_fsync(file, datasync);
@@ -219,11 +234,17 @@ static int do_fsync(unsigned int fd, int datasync)
219 234
220SYSCALL_DEFINE1(fsync, unsigned int, fd) 235SYSCALL_DEFINE1(fsync, unsigned int, fd)
221{ 236{
237 if (!fsync_enabled)
238 return 0;
239
222 return do_fsync(fd, 0); 240 return do_fsync(fd, 0);
223} 241}
224 242
225SYSCALL_DEFINE1(fdatasync, unsigned int, fd) 243SYSCALL_DEFINE1(fdatasync, unsigned int, fd)
226{ 244{
245 if (!fsync_enabled)
246 return 0;
247
227 return do_fsync(fd, 1); 248 return do_fsync(fd, 1);
228} 249}
229 250
@@ -237,6 +258,9 @@ SYSCALL_DEFINE1(fdatasync, unsigned int, fd)
237 */ 258 */
238int generic_write_sync(struct file *file, loff_t pos, loff_t count) 259int generic_write_sync(struct file *file, loff_t pos, loff_t count)
239{ 260{
261 if (!fsync_enabled)
262 return 0;
263
240 if (!(file->f_flags & O_DSYNC) && !IS_SYNC(file->f_mapping->host)) 264 if (!(file->f_flags & O_DSYNC) && !IS_SYNC(file->f_mapping->host))
241 return 0; 265 return 0;
242 return vfs_fsync_range(file, pos, pos + count - 1, 266 return vfs_fsync_range(file, pos, pos + count - 1,
@@ -301,6 +325,9 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes,
301 int fput_needed; 325 int fput_needed;
302 umode_t i_mode; 326 umode_t i_mode;
303 327
328 if (!fsync_enabled)
329 return 0;
330
304 ret = -EINVAL; 331 ret = -EINVAL;
305 if (flags & ~VALID_FLAGS) 332 if (flags & ~VALID_FLAGS)
306 goto out; 333 goto out;
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 0a12eb89cd3..a494413e486 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -136,12 +136,13 @@ static int sysfs_sd_setsecdata(struct sysfs_dirent *sd, void **secdata, u32 *sec
136 void *old_secdata; 136 void *old_secdata;
137 size_t old_secdata_len; 137 size_t old_secdata_len;
138 138
139 iattrs = sd->s_iattr; 139 if (!sd->s_iattr) {
140 if (!iattrs) 140 sd->s_iattr = sysfs_init_inode_attrs(sd);
141 iattrs = sysfs_init_inode_attrs(sd); 141 if (!sd->s_iattr)
142 if (!iattrs) 142 return -ENOMEM;
143 return -ENOMEM; 143 }
144 144
145 iattrs = sd->s_iattr;
145 old_secdata = iattrs->ia_secdata; 146 old_secdata = iattrs->ia_secdata;
146 old_secdata_len = iattrs->ia_secdata_len; 147 old_secdata_len = iattrs->ia_secdata_len;
147 148
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index 315de66e52b..bc4f94b2870 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -63,7 +63,7 @@
63static void shrink_liability(struct ubifs_info *c, int nr_to_write) 63static void shrink_liability(struct ubifs_info *c, int nr_to_write)
64{ 64{
65 down_read(&c->vfs_sb->s_umount); 65 down_read(&c->vfs_sb->s_umount);
66 writeback_inodes_sb(c->vfs_sb); 66 writeback_inodes_sb(c->vfs_sb, WB_REASON_FS_FREE_SPACE);
67 up_read(&c->vfs_sb->s_umount); 67 up_read(&c->vfs_sb->s_umount);
68} 68}
69 69
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index a811ac4a26b..fd75b635dae 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -121,20 +121,21 @@ const char *dbg_key_str1(const struct ubifs_info *c,
121 const union ubifs_key *key); 121 const union ubifs_key *key);
122 122
123/* 123/*
124 * DBGKEY macros require @dbg_lock to be held, which it is in the dbg message 124 * TODO: these macros are now broken because there is no locking around them
125 * macros. 125 * and we use a global buffer for the key string. This means that in case of
126 * concurrent execution we will end up with incorrect and messy key strings.
126 */ 127 */
127#define DBGKEY(key) dbg_key_str0(c, (key)) 128#define DBGKEY(key) dbg_key_str0(c, (key))
128#define DBGKEY1(key) dbg_key_str1(c, (key)) 129#define DBGKEY1(key) dbg_key_str1(c, (key))
129 130
130#define ubifs_dbg_msg(type, fmt, ...) do { \ 131#define ubifs_dbg_msg(type, fmt, ...) \
131 spin_lock(&dbg_lock); \ 132 pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__)
132 pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__); \
133 spin_unlock(&dbg_lock); \
134} while (0)
135 133
136/* Just a debugging messages not related to any specific UBIFS subsystem */ 134/* Just a debugging messages not related to any specific UBIFS subsystem */
137#define dbg_msg(fmt, ...) ubifs_dbg_msg("msg", fmt, ##__VA_ARGS__) 135#define dbg_msg(fmt, ...) \
136 printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", current->pid, \
137 __func__, ##__VA_ARGS__)
138
138/* General messages */ 139/* General messages */
139#define dbg_gen(fmt, ...) ubifs_dbg_msg("gen", fmt, ##__VA_ARGS__) 140#define dbg_gen(fmt, ...) ubifs_dbg_msg("gen", fmt, ##__VA_ARGS__)
140/* Additional journal messages */ 141/* Additional journal messages */
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
index c606f010e8d..1250016bfb9 100644
--- a/fs/ubifs/sb.c
+++ b/fs/ubifs/sb.c
@@ -715,8 +715,12 @@ static int fixup_free_space(struct ubifs_info *c)
715 lnum = ubifs_next_log_lnum(c, lnum); 715 lnum = ubifs_next_log_lnum(c, lnum);
716 } 716 }
717 717
718 /* Fixup the current log head */ 718 /*
719 err = fixup_leb(c, c->lhead_lnum, c->lhead_offs); 719 * Fixup the log head which contains the only a CS node at the
720 * beginning.
721 */
722 err = fixup_leb(c, c->lhead_lnum,
723 ALIGN(UBIFS_CS_NODE_SZ, c->min_io_size));
720 if (err) 724 if (err)
721 goto out; 725 goto out;
722 726
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 2a346bb1d9f..3438b000041 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -125,7 +125,6 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
125 err = udf_expand_file_adinicb(inode); 125 err = udf_expand_file_adinicb(inode);
126 if (err) { 126 if (err) {
127 udf_debug("udf_expand_adinicb: err=%d\n", err); 127 udf_debug("udf_expand_adinicb: err=%d\n", err);
128 up_write(&iinfo->i_data_sem);
129 return err; 128 return err;
130 } 129 }
131 } else { 130 } else {
@@ -133,9 +132,10 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
133 iinfo->i_lenAlloc = pos + count; 132 iinfo->i_lenAlloc = pos + count;
134 else 133 else
135 iinfo->i_lenAlloc = inode->i_size; 134 iinfo->i_lenAlloc = inode->i_size;
135 up_write(&iinfo->i_data_sem);
136 } 136 }
137 } 137 } else
138 up_write(&iinfo->i_data_sem); 138 up_write(&iinfo->i_data_sem);
139 139
140 retval = generic_file_aio_write(iocb, iov, nr_segs, ppos); 140 retval = generic_file_aio_write(iocb, iov, nr_segs, ppos);
141 if (retval > 0) 141 if (retval > 0)
@@ -201,12 +201,10 @@ out:
201static int udf_release_file(struct inode *inode, struct file *filp) 201static int udf_release_file(struct inode *inode, struct file *filp)
202{ 202{
203 if (filp->f_mode & FMODE_WRITE) { 203 if (filp->f_mode & FMODE_WRITE) {
204 mutex_lock(&inode->i_mutex);
205 down_write(&UDF_I(inode)->i_data_sem); 204 down_write(&UDF_I(inode)->i_data_sem);
206 udf_discard_prealloc(inode); 205 udf_discard_prealloc(inode);
207 udf_truncate_tail_extent(inode); 206 udf_truncate_tail_extent(inode);
208 up_write(&UDF_I(inode)->i_data_sem); 207 up_write(&UDF_I(inode)->i_data_sem);
209 mutex_unlock(&inode->i_mutex);
210 } 208 }
211 return 0; 209 return 0;
212} 210}
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 1d1358ed80c..262050f2eb6 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -145,6 +145,12 @@ const struct address_space_operations udf_aops = {
145 .bmap = udf_bmap, 145 .bmap = udf_bmap,
146}; 146};
147 147
148/*
149 * Expand file stored in ICB to a normal one-block-file
150 *
151 * This function requires i_data_sem for writing and releases it.
152 * This function requires i_mutex held
153 */
148int udf_expand_file_adinicb(struct inode *inode) 154int udf_expand_file_adinicb(struct inode *inode)
149{ 155{
150 struct page *page; 156 struct page *page;
@@ -163,9 +169,15 @@ int udf_expand_file_adinicb(struct inode *inode)
163 iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG; 169 iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG;
164 /* from now on we have normal address_space methods */ 170 /* from now on we have normal address_space methods */
165 inode->i_data.a_ops = &udf_aops; 171 inode->i_data.a_ops = &udf_aops;
172 up_write(&iinfo->i_data_sem);
166 mark_inode_dirty(inode); 173 mark_inode_dirty(inode);
167 return 0; 174 return 0;
168 } 175 }
176 /*
177 * Release i_data_sem so that we can lock a page - page lock ranks
178 * above i_data_sem. i_mutex still protects us against file changes.
179 */
180 up_write(&iinfo->i_data_sem);
169 181
170 page = find_or_create_page(inode->i_mapping, 0, GFP_NOFS); 182 page = find_or_create_page(inode->i_mapping, 0, GFP_NOFS);
171 if (!page) 183 if (!page)
@@ -181,6 +193,7 @@ int udf_expand_file_adinicb(struct inode *inode)
181 SetPageUptodate(page); 193 SetPageUptodate(page);
182 kunmap(page); 194 kunmap(page);
183 } 195 }
196 down_write(&iinfo->i_data_sem);
184 memset(iinfo->i_ext.i_data + iinfo->i_lenEAttr, 0x00, 197 memset(iinfo->i_ext.i_data + iinfo->i_lenEAttr, 0x00,
185 iinfo->i_lenAlloc); 198 iinfo->i_lenAlloc);
186 iinfo->i_lenAlloc = 0; 199 iinfo->i_lenAlloc = 0;
@@ -190,17 +203,20 @@ int udf_expand_file_adinicb(struct inode *inode)
190 iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG; 203 iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG;
191 /* from now on we have normal address_space methods */ 204 /* from now on we have normal address_space methods */
192 inode->i_data.a_ops = &udf_aops; 205 inode->i_data.a_ops = &udf_aops;
206 up_write(&iinfo->i_data_sem);
193 err = inode->i_data.a_ops->writepage(page, &udf_wbc); 207 err = inode->i_data.a_ops->writepage(page, &udf_wbc);
194 if (err) { 208 if (err) {
195 /* Restore everything back so that we don't lose data... */ 209 /* Restore everything back so that we don't lose data... */
196 lock_page(page); 210 lock_page(page);
197 kaddr = kmap(page); 211 kaddr = kmap(page);
212 down_write(&iinfo->i_data_sem);
198 memcpy(iinfo->i_ext.i_data + iinfo->i_lenEAttr, kaddr, 213 memcpy(iinfo->i_ext.i_data + iinfo->i_lenEAttr, kaddr,
199 inode->i_size); 214 inode->i_size);
200 kunmap(page); 215 kunmap(page);
201 unlock_page(page); 216 unlock_page(page);
202 iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB; 217 iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB;
203 inode->i_data.a_ops = &udf_adinicb_aops; 218 inode->i_data.a_ops = &udf_adinicb_aops;
219 up_write(&iinfo->i_data_sem);
204 } 220 }
205 page_cache_release(page); 221 page_cache_release(page);
206 mark_inode_dirty(inode); 222 mark_inode_dirty(inode);
@@ -1105,10 +1121,9 @@ int udf_setsize(struct inode *inode, loff_t newsize)
1105 if (bsize < 1121 if (bsize <
1106 (udf_file_entry_alloc_offset(inode) + newsize)) { 1122 (udf_file_entry_alloc_offset(inode) + newsize)) {
1107 err = udf_expand_file_adinicb(inode); 1123 err = udf_expand_file_adinicb(inode);
1108 if (err) { 1124 if (err)
1109 up_write(&iinfo->i_data_sem);
1110 return err; 1125 return err;
1111 } 1126 down_write(&iinfo->i_data_sem);
1112 } else 1127 } else
1113 iinfo->i_lenAlloc = newsize; 1128 iinfo->i_lenAlloc = newsize;
1114 } 1129 }
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 7b27b063ff6..a8e867ae11c 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -56,6 +56,7 @@
56#include <linux/seq_file.h> 56#include <linux/seq_file.h>
57#include <linux/bitmap.h> 57#include <linux/bitmap.h>
58#include <linux/crc-itu-t.h> 58#include <linux/crc-itu-t.h>
59#include <linux/log2.h>
59#include <asm/byteorder.h> 60#include <asm/byteorder.h>
60 61
61#include "udf_sb.h" 62#include "udf_sb.h"
@@ -1244,16 +1245,65 @@ out_bh:
1244 return ret; 1245 return ret;
1245} 1246}
1246 1247
1248static int udf_load_sparable_map(struct super_block *sb,
1249 struct udf_part_map *map,
1250 struct sparablePartitionMap *spm)
1251{
1252 uint32_t loc;
1253 uint16_t ident;
1254 struct sparingTable *st;
1255 struct udf_sparing_data *sdata = &map->s_type_specific.s_sparing;
1256 int i;
1257 struct buffer_head *bh;
1258
1259 map->s_partition_type = UDF_SPARABLE_MAP15;
1260 sdata->s_packet_len = le16_to_cpu(spm->packetLength);
1261 if (!is_power_of_2(sdata->s_packet_len)) {
1262 udf_error(sb, __func__, "error loading logical volume descriptor: "
1263 "Invalid packet length %u\n",
1264 (unsigned)sdata->s_packet_len);
1265 return -EIO;
1266 }
1267 if (spm->numSparingTables > 4) {
1268 udf_error(sb, __func__, "error loading logical volume descriptor: "
1269 "Too many sparing tables (%d)\n",
1270 (int)spm->numSparingTables);
1271 return -EIO;
1272 }
1273
1274 for (i = 0; i < spm->numSparingTables; i++) {
1275 loc = le32_to_cpu(spm->locSparingTable[i]);
1276 bh = udf_read_tagged(sb, loc, loc, &ident);
1277 if (!bh)
1278 continue;
1279
1280 st = (struct sparingTable *)bh->b_data;
1281 if (ident != 0 ||
1282 strncmp(st->sparingIdent.ident, UDF_ID_SPARING,
1283 strlen(UDF_ID_SPARING)) ||
1284 sizeof(*st) + le16_to_cpu(st->reallocationTableLen) >
1285 sb->s_blocksize) {
1286 brelse(bh);
1287 continue;
1288 }
1289
1290 sdata->s_spar_map[i] = bh;
1291 }
1292 map->s_partition_func = udf_get_pblock_spar15;
1293 return 0;
1294}
1295
1247static int udf_load_logicalvol(struct super_block *sb, sector_t block, 1296static int udf_load_logicalvol(struct super_block *sb, sector_t block,
1248 struct kernel_lb_addr *fileset) 1297 struct kernel_lb_addr *fileset)
1249{ 1298{
1250 struct logicalVolDesc *lvd; 1299 struct logicalVolDesc *lvd;
1251 int i, j, offset; 1300 int i, offset;
1252 uint8_t type; 1301 uint8_t type;
1253 struct udf_sb_info *sbi = UDF_SB(sb); 1302 struct udf_sb_info *sbi = UDF_SB(sb);
1254 struct genericPartitionMap *gpm; 1303 struct genericPartitionMap *gpm;
1255 uint16_t ident; 1304 uint16_t ident;
1256 struct buffer_head *bh; 1305 struct buffer_head *bh;
1306 unsigned int table_len;
1257 int ret = 0; 1307 int ret = 0;
1258 1308
1259 bh = udf_read_tagged(sb, block, block, &ident); 1309 bh = udf_read_tagged(sb, block, block, &ident);
@@ -1261,15 +1311,20 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
1261 return 1; 1311 return 1;
1262 BUG_ON(ident != TAG_IDENT_LVD); 1312 BUG_ON(ident != TAG_IDENT_LVD);
1263 lvd = (struct logicalVolDesc *)bh->b_data; 1313 lvd = (struct logicalVolDesc *)bh->b_data;
1264 1314 table_len = le32_to_cpu(lvd->mapTableLength);
1265 i = udf_sb_alloc_partition_maps(sb, le32_to_cpu(lvd->numPartitionMaps)); 1315 if (sizeof(*lvd) + table_len > sb->s_blocksize) {
1266 if (i != 0) { 1316 udf_error(sb, __func__, "error loading logical volume descriptor: "
1267 ret = i; 1317 "Partition table too long (%u > %lu)\n", table_len,
1318 sb->s_blocksize - sizeof(*lvd));
1268 goto out_bh; 1319 goto out_bh;
1269 } 1320 }
1270 1321
1322 ret = udf_sb_alloc_partition_maps(sb, le32_to_cpu(lvd->numPartitionMaps));
1323 if (ret)
1324 goto out_bh;
1325
1271 for (i = 0, offset = 0; 1326 for (i = 0, offset = 0;
1272 i < sbi->s_partitions && offset < le32_to_cpu(lvd->mapTableLength); 1327 i < sbi->s_partitions && offset < table_len;
1273 i++, offset += gpm->partitionMapLength) { 1328 i++, offset += gpm->partitionMapLength) {
1274 struct udf_part_map *map = &sbi->s_partmaps[i]; 1329 struct udf_part_map *map = &sbi->s_partmaps[i];
1275 gpm = (struct genericPartitionMap *) 1330 gpm = (struct genericPartitionMap *)
@@ -1304,38 +1359,9 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
1304 } else if (!strncmp(upm2->partIdent.ident, 1359 } else if (!strncmp(upm2->partIdent.ident,
1305 UDF_ID_SPARABLE, 1360 UDF_ID_SPARABLE,
1306 strlen(UDF_ID_SPARABLE))) { 1361 strlen(UDF_ID_SPARABLE))) {
1307 uint32_t loc; 1362 if (udf_load_sparable_map(sb, map,
1308 struct sparingTable *st; 1363 (struct sparablePartitionMap *)gpm) < 0)
1309 struct sparablePartitionMap *spm = 1364 goto out_bh;
1310 (struct sparablePartitionMap *)gpm;
1311
1312 map->s_partition_type = UDF_SPARABLE_MAP15;
1313 map->s_type_specific.s_sparing.s_packet_len =
1314 le16_to_cpu(spm->packetLength);
1315 for (j = 0; j < spm->numSparingTables; j++) {
1316 struct buffer_head *bh2;
1317
1318 loc = le32_to_cpu(
1319 spm->locSparingTable[j]);
1320 bh2 = udf_read_tagged(sb, loc, loc,
1321 &ident);
1322 map->s_type_specific.s_sparing.
1323 s_spar_map[j] = bh2;
1324
1325 if (bh2 == NULL)
1326 continue;
1327
1328 st = (struct sparingTable *)bh2->b_data;
1329 if (ident != 0 || strncmp(
1330 st->sparingIdent.ident,
1331 UDF_ID_SPARING,
1332 strlen(UDF_ID_SPARING))) {
1333 brelse(bh2);
1334 map->s_type_specific.s_sparing.
1335 s_spar_map[j] = NULL;
1336 }
1337 }
1338 map->s_partition_func = udf_get_pblock_spar15;
1339 } else if (!strncmp(upm2->partIdent.ident, 1365 } else if (!strncmp(upm2->partIdent.ident,
1340 UDF_ID_METADATA, 1366 UDF_ID_METADATA,
1341 strlen(UDF_ID_METADATA))) { 1367 strlen(UDF_ID_METADATA))) {
@@ -1830,6 +1856,12 @@ static void udf_close_lvid(struct super_block *sb)
1830 le16_to_cpu(lvid->descTag.descCRCLength))); 1856 le16_to_cpu(lvid->descTag.descCRCLength)));
1831 1857
1832 lvid->descTag.tagChecksum = udf_tag_checksum(&lvid->descTag); 1858 lvid->descTag.tagChecksum = udf_tag_checksum(&lvid->descTag);
1859 /*
1860 * We set buffer uptodate unconditionally here to avoid spurious
1861 * warnings from mark_buffer_dirty() when previous EIO has marked
1862 * the buffer as !uptodate
1863 */
1864 set_buffer_uptodate(bh);
1833 mark_buffer_dirty(bh); 1865 mark_buffer_dirty(bh);
1834 sbi->s_lvid_dirty = 0; 1866 sbi->s_lvid_dirty = 0;
1835 mutex_unlock(&sbi->s_alloc_mutex); 1867 mutex_unlock(&sbi->s_alloc_mutex);
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c
index 39f4f809bb6..f86e0348786 100644
--- a/fs/xfs/linux-2.6/xfs_acl.c
+++ b/fs/xfs/linux-2.6/xfs_acl.c
@@ -39,9 +39,11 @@ xfs_acl_from_disk(struct xfs_acl *aclp)
39 struct posix_acl_entry *acl_e; 39 struct posix_acl_entry *acl_e;
40 struct posix_acl *acl; 40 struct posix_acl *acl;
41 struct xfs_acl_entry *ace; 41 struct xfs_acl_entry *ace;
42 int count, i; 42 unsigned int count, i;
43 43
44 count = be32_to_cpu(aclp->acl_cnt); 44 count = be32_to_cpu(aclp->acl_cnt);
45 if (count > XFS_ACL_MAX_ENTRIES)
46 return ERR_PTR(-EFSCORRUPTED);
45 47
46 acl = posix_acl_alloc(count, GFP_KERNEL); 48 acl = posix_acl_alloc(count, GFP_KERNEL);
47 if (!acl) 49 if (!acl)
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 50a7d5fb3b7..36d6ee44386 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -346,7 +346,6 @@ extern struct list_head *xfs_get_buftarg_list(void);
346#define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev) 346#define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev)
347#define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev) 347#define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev)
348 348
349#define xfs_binval(buftarg) xfs_flush_buftarg(buftarg, 1)
350#define XFS_bflush(buftarg) xfs_flush_buftarg(buftarg, 1) 349#define XFS_bflush(buftarg) xfs_flush_buftarg(buftarg, 1)
351 350
352#endif /* __XFS_BUF_H__ */ 351#endif /* __XFS_BUF_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_discard.c b/fs/xfs/linux-2.6/xfs_discard.c
index 244e797dae3..572494faf26 100644
--- a/fs/xfs/linux-2.6/xfs_discard.c
+++ b/fs/xfs/linux-2.6/xfs_discard.c
@@ -68,7 +68,7 @@ xfs_trim_extents(
68 * Look up the longest btree in the AGF and start with it. 68 * Look up the longest btree in the AGF and start with it.
69 */ 69 */
70 error = xfs_alloc_lookup_le(cur, 0, 70 error = xfs_alloc_lookup_le(cur, 0,
71 XFS_BUF_TO_AGF(agbp)->agf_longest, &i); 71 be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_longest), &i);
72 if (error) 72 if (error)
73 goto out_del_cursor; 73 goto out_del_cursor;
74 74
@@ -84,7 +84,7 @@ xfs_trim_extents(
84 if (error) 84 if (error)
85 goto out_del_cursor; 85 goto out_del_cursor;
86 XFS_WANT_CORRUPTED_GOTO(i == 1, out_del_cursor); 86 XFS_WANT_CORRUPTED_GOTO(i == 1, out_del_cursor);
87 ASSERT(flen <= XFS_BUF_TO_AGF(agbp)->agf_longest); 87 ASSERT(flen <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_longest));
88 88
89 /* 89 /*
90 * Too small? Give up. 90 * Too small? Give up.
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index f4f878fc008..fed3f3c878c 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -98,22 +98,22 @@ xfs_fs_encode_fh(
98 switch (fileid_type) { 98 switch (fileid_type) {
99 case FILEID_INO32_GEN_PARENT: 99 case FILEID_INO32_GEN_PARENT:
100 spin_lock(&dentry->d_lock); 100 spin_lock(&dentry->d_lock);
101 fid->i32.parent_ino = dentry->d_parent->d_inode->i_ino; 101 fid->i32.parent_ino = XFS_I(dentry->d_parent->d_inode)->i_ino;
102 fid->i32.parent_gen = dentry->d_parent->d_inode->i_generation; 102 fid->i32.parent_gen = dentry->d_parent->d_inode->i_generation;
103 spin_unlock(&dentry->d_lock); 103 spin_unlock(&dentry->d_lock);
104 /*FALLTHRU*/ 104 /*FALLTHRU*/
105 case FILEID_INO32_GEN: 105 case FILEID_INO32_GEN:
106 fid->i32.ino = inode->i_ino; 106 fid->i32.ino = XFS_I(inode)->i_ino;
107 fid->i32.gen = inode->i_generation; 107 fid->i32.gen = inode->i_generation;
108 break; 108 break;
109 case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG: 109 case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG:
110 spin_lock(&dentry->d_lock); 110 spin_lock(&dentry->d_lock);
111 fid64->parent_ino = dentry->d_parent->d_inode->i_ino; 111 fid64->parent_ino = XFS_I(dentry->d_parent->d_inode)->i_ino;
112 fid64->parent_gen = dentry->d_parent->d_inode->i_generation; 112 fid64->parent_gen = dentry->d_parent->d_inode->i_generation;
113 spin_unlock(&dentry->d_lock); 113 spin_unlock(&dentry->d_lock);
114 /*FALLTHRU*/ 114 /*FALLTHRU*/
115 case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG: 115 case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG:
116 fid64->ino = inode->i_ino; 116 fid64->ino = XFS_I(inode)->i_ino;
117 fid64->gen = inode->i_generation; 117 fid64->gen = inode->i_generation;
118 break; 118 break;
119 } 119 }
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 7f782af286b..b679198dcc0 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -309,7 +309,19 @@ xfs_file_aio_read(
309 if (XFS_FORCED_SHUTDOWN(mp)) 309 if (XFS_FORCED_SHUTDOWN(mp))
310 return -EIO; 310 return -EIO;
311 311
312 if (unlikely(ioflags & IO_ISDIRECT)) { 312 /*
313 * Locking is a bit tricky here. If we take an exclusive lock
314 * for direct IO, we effectively serialise all new concurrent
315 * read IO to this file and block it behind IO that is currently in
316 * progress because IO in progress holds the IO lock shared. We only
317 * need to hold the lock exclusive to blow away the page cache, so
318 * only take lock exclusively if the page cache needs invalidation.
319 * This allows the normal direct IO case of no page cache pages to
320 * proceeed concurrently without serialisation.
321 */
322 xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
323 if ((ioflags & IO_ISDIRECT) && inode->i_mapping->nrpages) {
324 xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
313 xfs_rw_ilock(ip, XFS_IOLOCK_EXCL); 325 xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
314 326
315 if (inode->i_mapping->nrpages) { 327 if (inode->i_mapping->nrpages) {
@@ -322,8 +334,7 @@ xfs_file_aio_read(
322 } 334 }
323 } 335 }
324 xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); 336 xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
325 } else 337 }
326 xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
327 338
328 trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags); 339 trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags);
329 340
@@ -658,6 +669,7 @@ xfs_file_aio_write_checks(
658 xfs_fsize_t new_size; 669 xfs_fsize_t new_size;
659 int error = 0; 670 int error = 0;
660 671
672 xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
661 error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode)); 673 error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode));
662 if (error) { 674 if (error) {
663 xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock); 675 xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock);
@@ -749,14 +761,24 @@ xfs_file_dio_aio_write(
749 *iolock = XFS_IOLOCK_EXCL; 761 *iolock = XFS_IOLOCK_EXCL;
750 else 762 else
751 *iolock = XFS_IOLOCK_SHARED; 763 *iolock = XFS_IOLOCK_SHARED;
752 xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock); 764 xfs_rw_ilock(ip, *iolock);
753 765
754 ret = xfs_file_aio_write_checks(file, &pos, &count, iolock); 766 ret = xfs_file_aio_write_checks(file, &pos, &count, iolock);
755 if (ret) 767 if (ret)
756 return ret; 768 return ret;
757 769
770 /*
771 * Recheck if there are cached pages that need invalidate after we got
772 * the iolock to protect against other threads adding new pages while
773 * we were waiting for the iolock.
774 */
775 if (mapping->nrpages && *iolock == XFS_IOLOCK_SHARED) {
776 xfs_rw_iunlock(ip, *iolock);
777 *iolock = XFS_IOLOCK_EXCL;
778 xfs_rw_ilock(ip, *iolock);
779 }
780
758 if (mapping->nrpages) { 781 if (mapping->nrpages) {
759 WARN_ON(*iolock != XFS_IOLOCK_EXCL);
760 ret = -xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1, 782 ret = -xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1,
761 FI_REMAPF_LOCKED); 783 FI_REMAPF_LOCKED);
762 if (ret) 784 if (ret)
@@ -801,7 +823,7 @@ xfs_file_buffered_aio_write(
801 size_t count = ocount; 823 size_t count = ocount;
802 824
803 *iolock = XFS_IOLOCK_EXCL; 825 *iolock = XFS_IOLOCK_EXCL;
804 xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock); 826 xfs_rw_ilock(ip, *iolock);
805 827
806 ret = xfs_file_aio_write_checks(file, &pos, &count, iolock); 828 ret = xfs_file_aio_write_checks(file, &pos, &count, iolock);
807 if (ret) 829 if (ret)
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index d44d92cd12b..f5b697bf39f 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -69,9 +69,8 @@ xfs_synchronize_times(
69} 69}
70 70
71/* 71/*
72 * If the linux inode is valid, mark it dirty. 72 * If the linux inode is valid, mark it dirty, else mark the dirty state
73 * Used when committing a dirty inode into a transaction so that 73 * in the XFS inode to make sure we pick it up when reclaiming the inode.
74 * the inode will get written back by the linux code
75 */ 74 */
76void 75void
77xfs_mark_inode_dirty_sync( 76xfs_mark_inode_dirty_sync(
@@ -81,6 +80,10 @@ xfs_mark_inode_dirty_sync(
81 80
82 if (!(inode->i_state & (I_WILL_FREE|I_FREEING))) 81 if (!(inode->i_state & (I_WILL_FREE|I_FREEING)))
83 mark_inode_dirty_sync(inode); 82 mark_inode_dirty_sync(inode);
83 else {
84 barrier();
85 ip->i_update_core = 1;
86 }
84} 87}
85 88
86void 89void
@@ -91,6 +94,11 @@ xfs_mark_inode_dirty(
91 94
92 if (!(inode->i_state & (I_WILL_FREE|I_FREEING))) 95 if (!(inode->i_state & (I_WILL_FREE|I_FREEING)))
93 mark_inode_dirty(inode); 96 mark_inode_dirty(inode);
97 else {
98 barrier();
99 ip->i_update_core = 1;
100 }
101
94} 102}
95 103
96/* 104/*
@@ -456,7 +464,7 @@ xfs_vn_getattr(
456 trace_xfs_getattr(ip); 464 trace_xfs_getattr(ip);
457 465
458 if (XFS_FORCED_SHUTDOWN(mp)) 466 if (XFS_FORCED_SHUTDOWN(mp))
459 return XFS_ERROR(EIO); 467 return -XFS_ERROR(EIO);
460 468
461 stat->size = XFS_ISIZE(ip); 469 stat->size = XFS_ISIZE(ip);
462 stat->dev = inode->i_sb->s_dev; 470 stat->dev = inode->i_sb->s_dev;
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 8633521b3b2..87315168538 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -70,6 +70,8 @@
70#include <linux/ctype.h> 70#include <linux/ctype.h>
71#include <linux/writeback.h> 71#include <linux/writeback.h>
72#include <linux/capability.h> 72#include <linux/capability.h>
73#include <linux/kthread.h>
74#include <linux/freezer.h>
73#include <linux/list_sort.h> 75#include <linux/list_sort.h>
74 76
75#include <asm/page.h> 77#include <asm/page.h>
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index a1a881e68a9..e6ac98c112e 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -871,43 +871,6 @@ xfs_fs_dirty_inode(
871} 871}
872 872
873STATIC int 873STATIC int
874xfs_log_inode(
875 struct xfs_inode *ip)
876{
877 struct xfs_mount *mp = ip->i_mount;
878 struct xfs_trans *tp;
879 int error;
880
881 xfs_iunlock(ip, XFS_ILOCK_SHARED);
882 tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
883 error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
884
885 if (error) {
886 xfs_trans_cancel(tp, 0);
887 /* we need to return with the lock hold shared */
888 xfs_ilock(ip, XFS_ILOCK_SHARED);
889 return error;
890 }
891
892 xfs_ilock(ip, XFS_ILOCK_EXCL);
893
894 /*
895 * Note - it's possible that we might have pushed ourselves out of the
896 * way during trans_reserve which would flush the inode. But there's
897 * no guarantee that the inode buffer has actually gone out yet (it's
898 * delwri). Plus the buffer could be pinned anyway if it's part of
899 * an inode in another recent transaction. So we play it safe and
900 * fire off the transaction anyway.
901 */
902 xfs_trans_ijoin(tp, ip);
903 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
904 error = xfs_trans_commit(tp, 0);
905 xfs_ilock_demote(ip, XFS_ILOCK_EXCL);
906
907 return error;
908}
909
910STATIC int
911xfs_fs_write_inode( 874xfs_fs_write_inode(
912 struct inode *inode, 875 struct inode *inode,
913 struct writeback_control *wbc) 876 struct writeback_control *wbc)
@@ -919,9 +882,9 @@ xfs_fs_write_inode(
919 trace_xfs_write_inode(ip); 882 trace_xfs_write_inode(ip);
920 883
921 if (XFS_FORCED_SHUTDOWN(mp)) 884 if (XFS_FORCED_SHUTDOWN(mp))
922 return XFS_ERROR(EIO); 885 return -XFS_ERROR(EIO);
923 886
924 if (wbc->sync_mode == WB_SYNC_ALL) { 887 if (wbc->sync_mode == WB_SYNC_ALL || wbc->for_kupdate) {
925 /* 888 /*
926 * Make sure the inode has made it it into the log. Instead 889 * Make sure the inode has made it it into the log. Instead
927 * of forcing it all the way to stable storage using a 890 * of forcing it all the way to stable storage using a
@@ -930,13 +893,14 @@ xfs_fs_write_inode(
930 * of synchronous log foces dramatically. 893 * of synchronous log foces dramatically.
931 */ 894 */
932 xfs_ioend_wait(ip); 895 xfs_ioend_wait(ip);
933 xfs_ilock(ip, XFS_ILOCK_SHARED); 896 error = xfs_log_dirty_inode(ip, NULL, 0);
934 if (ip->i_update_core) { 897 if (error)
935 error = xfs_log_inode(ip); 898 goto out;
936 if (error) 899 return 0;
937 goto out_unlock;
938 }
939 } else { 900 } else {
901 if (!ip->i_update_core)
902 return 0;
903
940 /* 904 /*
941 * We make this non-blocking if the inode is contended, return 905 * We make this non-blocking if the inode is contended, return
942 * EAGAIN to indicate to the caller that they did not succeed. 906 * EAGAIN to indicate to the caller that they did not succeed.
@@ -1412,37 +1376,35 @@ xfs_fs_fill_super(
1412 sb->s_time_gran = 1; 1376 sb->s_time_gran = 1;
1413 set_posix_acl_flag(sb); 1377 set_posix_acl_flag(sb);
1414 1378
1415 error = xfs_syncd_init(mp);
1416 if (error)
1417 goto out_filestream_unmount;
1418
1419 xfs_inode_shrinker_register(mp); 1379 xfs_inode_shrinker_register(mp);
1420 1380
1421 error = xfs_mountfs(mp); 1381 error = xfs_mountfs(mp);
1422 if (error) 1382 if (error)
1423 goto out_syncd_stop; 1383 goto out_filestream_unmount;
1384
1385 error = xfs_syncd_init(mp);
1386 if (error)
1387 goto out_unmount;
1424 1388
1425 root = igrab(VFS_I(mp->m_rootip)); 1389 root = igrab(VFS_I(mp->m_rootip));
1426 if (!root) { 1390 if (!root) {
1427 error = ENOENT; 1391 error = ENOENT;
1428 goto fail_unmount; 1392 goto out_syncd_stop;
1429 } 1393 }
1430 if (is_bad_inode(root)) { 1394 if (is_bad_inode(root)) {
1431 error = EINVAL; 1395 error = EINVAL;
1432 goto fail_vnrele; 1396 goto out_syncd_stop;
1433 } 1397 }
1434 sb->s_root = d_alloc_root(root); 1398 sb->s_root = d_alloc_root(root);
1435 if (!sb->s_root) { 1399 if (!sb->s_root) {
1436 error = ENOMEM; 1400 error = ENOMEM;
1437 goto fail_vnrele; 1401 goto out_iput;
1438 } 1402 }
1439 1403
1440 return 0; 1404 return 0;
1441 1405
1442 out_syncd_stop:
1443 xfs_inode_shrinker_unregister(mp);
1444 xfs_syncd_stop(mp);
1445 out_filestream_unmount: 1406 out_filestream_unmount:
1407 xfs_inode_shrinker_unregister(mp);
1446 xfs_filestream_unmount(mp); 1408 xfs_filestream_unmount(mp);
1447 out_free_sb: 1409 out_free_sb:
1448 xfs_freesb(mp); 1410 xfs_freesb(mp);
@@ -1456,17 +1418,12 @@ xfs_fs_fill_super(
1456 out: 1418 out:
1457 return -error; 1419 return -error;
1458 1420
1459 fail_vnrele: 1421 out_iput:
1460 if (sb->s_root) { 1422 iput(root);
1461 dput(sb->s_root); 1423 out_syncd_stop:
1462 sb->s_root = NULL;
1463 } else {
1464 iput(root);
1465 }
1466
1467 fail_unmount:
1468 xfs_inode_shrinker_unregister(mp);
1469 xfs_syncd_stop(mp); 1424 xfs_syncd_stop(mp);
1425 out_unmount:
1426 xfs_inode_shrinker_unregister(mp);
1470 1427
1471 /* 1428 /*
1472 * Blow away any referenced inode in the filestreams cache. 1429 * Blow away any referenced inode in the filestreams cache.
@@ -1667,24 +1624,13 @@ xfs_init_workqueues(void)
1667 */ 1624 */
1668 xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8); 1625 xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8);
1669 if (!xfs_syncd_wq) 1626 if (!xfs_syncd_wq)
1670 goto out; 1627 return -ENOMEM;
1671
1672 xfs_ail_wq = alloc_workqueue("xfsail", WQ_CPU_INTENSIVE, 8);
1673 if (!xfs_ail_wq)
1674 goto out_destroy_syncd;
1675
1676 return 0; 1628 return 0;
1677
1678out_destroy_syncd:
1679 destroy_workqueue(xfs_syncd_wq);
1680out:
1681 return -ENOMEM;
1682} 1629}
1683 1630
1684STATIC void 1631STATIC void
1685xfs_destroy_workqueues(void) 1632xfs_destroy_workqueues(void)
1686{ 1633{
1687 destroy_workqueue(xfs_ail_wq);
1688 destroy_workqueue(xfs_syncd_wq); 1634 destroy_workqueue(xfs_syncd_wq);
1689} 1635}
1690 1636
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 8ecad5ff9f9..2f277a04d67 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -336,6 +336,32 @@ xfs_sync_fsdata(
336 return xfs_bwrite(mp, bp); 336 return xfs_bwrite(mp, bp);
337} 337}
338 338
339int
340xfs_log_dirty_inode(
341 struct xfs_inode *ip,
342 struct xfs_perag *pag,
343 int flags)
344{
345 struct xfs_mount *mp = ip->i_mount;
346 struct xfs_trans *tp;
347 int error;
348
349 if (!ip->i_update_core)
350 return 0;
351
352 tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
353 error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
354 if (error) {
355 xfs_trans_cancel(tp, 0);
356 return error;
357 }
358
359 xfs_ilock(ip, XFS_ILOCK_EXCL);
360 xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL);
361 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
362 return xfs_trans_commit(tp, 0);
363}
364
339/* 365/*
340 * When remounting a filesystem read-only or freezing the filesystem, we have 366 * When remounting a filesystem read-only or freezing the filesystem, we have
341 * two phases to execute. This first phase is syncing the data before we 367 * two phases to execute. This first phase is syncing the data before we
@@ -365,6 +391,17 @@ xfs_quiesce_data(
365 391
366 /* push and block till complete */ 392 /* push and block till complete */
367 xfs_sync_data(mp, SYNC_WAIT); 393 xfs_sync_data(mp, SYNC_WAIT);
394
395 /*
396 * Log all pending size and timestamp updates. The vfs writeback
397 * code is supposed to do this, but due to its overagressive
398 * livelock detection it will skip inodes where appending writes
399 * were written out in the first non-blocking sync phase if their
400 * completion took long enough that it happened after taking the
401 * timestamp for the cut-off in the blocking phase.
402 */
403 xfs_inode_ag_iterator(mp, xfs_log_dirty_inode, 0);
404
368 xfs_qm_sync(mp, SYNC_WAIT); 405 xfs_qm_sync(mp, SYNC_WAIT);
369 406
370 /* write superblock and hoover up shutdown errors */ 407 /* write superblock and hoover up shutdown errors */
@@ -772,6 +809,17 @@ restart:
772 if (!xfs_iflock_nowait(ip)) { 809 if (!xfs_iflock_nowait(ip)) {
773 if (!(sync_mode & SYNC_WAIT)) 810 if (!(sync_mode & SYNC_WAIT))
774 goto out; 811 goto out;
812
813 /*
814 * If we only have a single dirty inode in a cluster there is
815 * a fair chance that the AIL push may have pushed it into
816 * the buffer, but xfsbufd won't touch it until 30 seconds
817 * from now, and thus we will lock up here.
818 *
819 * Promote the inode buffer to the front of the delwri list
820 * and wake up xfsbufd now.
821 */
822 xfs_promote_inode(ip);
775 xfs_iflock(ip); 823 xfs_iflock(ip);
776 } 824 }
777 825
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index e3a6ad27415..ef5b2ce4298 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -42,6 +42,8 @@ void xfs_quiesce_attr(struct xfs_mount *mp);
42 42
43void xfs_flush_inodes(struct xfs_inode *ip); 43void xfs_flush_inodes(struct xfs_inode *ip);
44 44
45int xfs_log_dirty_inode(struct xfs_inode *ip, struct xfs_perag *pag, int flags);
46
45int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); 47int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
46 48
47void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); 49void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index 9e0e2fa3f2c..8126fc2ea63 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -183,13 +183,14 @@ xfs_qm_dqunpin_wait(
183 * search the buffer cache can be a time consuming thing, and AIL lock is a 183 * search the buffer cache can be a time consuming thing, and AIL lock is a
184 * spinlock. 184 * spinlock.
185 */ 185 */
186STATIC void 186STATIC bool
187xfs_qm_dquot_logitem_pushbuf( 187xfs_qm_dquot_logitem_pushbuf(
188 struct xfs_log_item *lip) 188 struct xfs_log_item *lip)
189{ 189{
190 struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip); 190 struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip);
191 struct xfs_dquot *dqp = qlip->qli_dquot; 191 struct xfs_dquot *dqp = qlip->qli_dquot;
192 struct xfs_buf *bp; 192 struct xfs_buf *bp;
193 bool ret = true;
193 194
194 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 195 ASSERT(XFS_DQ_IS_LOCKED(dqp));
195 196
@@ -201,17 +202,20 @@ xfs_qm_dquot_logitem_pushbuf(
201 if (completion_done(&dqp->q_flush) || 202 if (completion_done(&dqp->q_flush) ||
202 !(lip->li_flags & XFS_LI_IN_AIL)) { 203 !(lip->li_flags & XFS_LI_IN_AIL)) {
203 xfs_dqunlock(dqp); 204 xfs_dqunlock(dqp);
204 return; 205 return true;
205 } 206 }
206 207
207 bp = xfs_incore(dqp->q_mount->m_ddev_targp, qlip->qli_format.qlf_blkno, 208 bp = xfs_incore(dqp->q_mount->m_ddev_targp, qlip->qli_format.qlf_blkno,
208 dqp->q_mount->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK); 209 dqp->q_mount->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK);
209 xfs_dqunlock(dqp); 210 xfs_dqunlock(dqp);
210 if (!bp) 211 if (!bp)
211 return; 212 return true;
212 if (XFS_BUF_ISDELAYWRITE(bp)) 213 if (XFS_BUF_ISDELAYWRITE(bp))
213 xfs_buf_delwri_promote(bp); 214 xfs_buf_delwri_promote(bp);
215 if (XFS_BUF_ISPINNED(bp))
216 ret = false;
214 xfs_buf_relse(bp); 217 xfs_buf_relse(bp);
218 return ret;
215} 219}
216 220
217/* 221/*
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index b94dace4e78..e70c7fc95e2 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -714,7 +714,8 @@ xfs_qm_dqattach_one(
714 * disk and we didn't ask it to allocate; 714 * disk and we didn't ask it to allocate;
715 * ESRCH if quotas got turned off suddenly. 715 * ESRCH if quotas got turned off suddenly.
716 */ 716 */
717 error = xfs_qm_dqget(ip->i_mount, ip, id, type, XFS_QMOPT_DOWARN, &dqp); 717 error = xfs_qm_dqget(ip->i_mount, ip, id, type,
718 doalloc | XFS_QMOPT_DOWARN, &dqp);
718 if (error) 719 if (error)
719 return error; 720 return error;
720 721
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 01d2072fb6d..99d4011602e 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -822,17 +822,9 @@ xfs_attr_inactive(xfs_inode_t *dp)
822 error = xfs_attr_root_inactive(&trans, dp); 822 error = xfs_attr_root_inactive(&trans, dp);
823 if (error) 823 if (error)
824 goto out; 824 goto out;
825 /* 825
826 * signal synchronous inactive transactions unless this 826 error = xfs_itruncate_finish(&trans, dp, 0LL, XFS_ATTR_FORK, 0);
827 * is a synchronous mount filesystem in which case we 827 if (error)
828 * know that we're here because we've been called out of
829 * xfs_inactive which means that the last reference is gone
830 * and the unlink transaction has already hit the disk so
831 * async inactive transactions are safe.
832 */
833 if ((error = xfs_itruncate_finish(&trans, dp, 0LL, XFS_ATTR_FORK,
834 (!(mp->m_flags & XFS_MOUNT_WSYNC)
835 ? 1 : 0))))
836 goto out; 828 goto out;
837 829
838 /* 830 /*
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 71e90dc2aeb..f49ecf2e7d3 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -110,6 +110,7 @@ xfs_attr_namesp_match(int arg_flags, int ondisk_flags)
110/* 110/*
111 * Query whether the requested number of additional bytes of extended 111 * Query whether the requested number of additional bytes of extended
112 * attribute space will be able to fit inline. 112 * attribute space will be able to fit inline.
113 *
113 * Returns zero if not, else the di_forkoff fork offset to be used in the 114 * Returns zero if not, else the di_forkoff fork offset to be used in the
114 * literal area for attribute data once the new bytes have been added. 115 * literal area for attribute data once the new bytes have been added.
115 * 116 *
@@ -122,7 +123,7 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes)
122 int offset; 123 int offset;
123 int minforkoff; /* lower limit on valid forkoff locations */ 124 int minforkoff; /* lower limit on valid forkoff locations */
124 int maxforkoff; /* upper limit on valid forkoff locations */ 125 int maxforkoff; /* upper limit on valid forkoff locations */
125 int dsize; 126 int dsize;
126 xfs_mount_t *mp = dp->i_mount; 127 xfs_mount_t *mp = dp->i_mount;
127 128
128 offset = (XFS_LITINO(mp) - bytes) >> 3; /* rounded down */ 129 offset = (XFS_LITINO(mp) - bytes) >> 3; /* rounded down */
@@ -136,47 +137,60 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes)
136 return (offset >= minforkoff) ? minforkoff : 0; 137 return (offset >= minforkoff) ? minforkoff : 0;
137 } 138 }
138 139
139 if (!(mp->m_flags & XFS_MOUNT_ATTR2)) { 140 /*
140 if (bytes <= XFS_IFORK_ASIZE(dp)) 141 * If the requested numbers of bytes is smaller or equal to the
141 return dp->i_d.di_forkoff; 142 * current attribute fork size we can always proceed.
143 *
144 * Note that if_bytes in the data fork might actually be larger than
145 * the current data fork size is due to delalloc extents. In that
146 * case either the extent count will go down when they are converted
147 * to real extents, or the delalloc conversion will take care of the
148 * literal area rebalancing.
149 */
150 if (bytes <= XFS_IFORK_ASIZE(dp))
151 return dp->i_d.di_forkoff;
152
153 /*
154 * For attr2 we can try to move the forkoff if there is space in the
155 * literal area, but for the old format we are done if there is no
156 * space in the fixed attribute fork.
157 */
158 if (!(mp->m_flags & XFS_MOUNT_ATTR2))
142 return 0; 159 return 0;
143 }
144 160
145 dsize = dp->i_df.if_bytes; 161 dsize = dp->i_df.if_bytes;
146 162
147 switch (dp->i_d.di_format) { 163 switch (dp->i_d.di_format) {
148 case XFS_DINODE_FMT_EXTENTS: 164 case XFS_DINODE_FMT_EXTENTS:
149 /* 165 /*
150 * If there is no attr fork and the data fork is extents, 166 * If there is no attr fork and the data fork is extents,
151 * determine if creating the default attr fork will result 167 * determine if creating the default attr fork will result
152 * in the extents form migrating to btree. If so, the 168 * in the extents form migrating to btree. If so, the
153 * minimum offset only needs to be the space required for 169 * minimum offset only needs to be the space required for
154 * the btree root. 170 * the btree root.
155 */ 171 */
156 if (!dp->i_d.di_forkoff && dp->i_df.if_bytes > 172 if (!dp->i_d.di_forkoff && dp->i_df.if_bytes >
157 xfs_default_attroffset(dp)) 173 xfs_default_attroffset(dp))
158 dsize = XFS_BMDR_SPACE_CALC(MINDBTPTRS); 174 dsize = XFS_BMDR_SPACE_CALC(MINDBTPTRS);
159 break; 175 break;
160
161 case XFS_DINODE_FMT_BTREE: 176 case XFS_DINODE_FMT_BTREE:
162 /* 177 /*
163 * If have data btree then keep forkoff if we have one, 178 * If we have a data btree then keep forkoff if we have one,
164 * otherwise we are adding a new attr, so then we set 179 * otherwise we are adding a new attr, so then we set
165 * minforkoff to where the btree root can finish so we have 180 * minforkoff to where the btree root can finish so we have
166 * plenty of room for attrs 181 * plenty of room for attrs
167 */ 182 */
168 if (dp->i_d.di_forkoff) { 183 if (dp->i_d.di_forkoff) {
169 if (offset < dp->i_d.di_forkoff) 184 if (offset < dp->i_d.di_forkoff)
170 return 0; 185 return 0;
171 else 186 return dp->i_d.di_forkoff;
172 return dp->i_d.di_forkoff; 187 }
173 } else 188 dsize = XFS_BMAP_BROOT_SPACE(dp->i_df.if_broot);
174 dsize = XFS_BMAP_BROOT_SPACE(dp->i_df.if_broot);
175 break; 189 break;
176 } 190 }
177 191
178 /* 192 /*
179 * A data fork btree root must have space for at least 193 * A data fork btree root must have space for at least
180 * MINDBTPTRS key/ptr pairs if the data fork is small or empty. 194 * MINDBTPTRS key/ptr pairs if the data fork is small or empty.
181 */ 195 */
182 minforkoff = MAX(dsize, XFS_BMDR_SPACE_CALC(MINDBTPTRS)); 196 minforkoff = MAX(dsize, XFS_BMDR_SPACE_CALC(MINDBTPTRS));
@@ -186,10 +200,10 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes)
186 maxforkoff = XFS_LITINO(mp) - XFS_BMDR_SPACE_CALC(MINABTPTRS); 200 maxforkoff = XFS_LITINO(mp) - XFS_BMDR_SPACE_CALC(MINABTPTRS);
187 maxforkoff = maxforkoff >> 3; /* rounded down */ 201 maxforkoff = maxforkoff >> 3; /* rounded down */
188 202
189 if (offset >= minforkoff && offset < maxforkoff)
190 return offset;
191 if (offset >= maxforkoff) 203 if (offset >= maxforkoff)
192 return maxforkoff; 204 return maxforkoff;
205 if (offset >= minforkoff)
206 return offset;
193 return 0; 207 return 0;
194} 208}
195 209
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index e546a33214c..a175933a7f4 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -3785,19 +3785,11 @@ xfs_bmap_compute_maxlevels(
3785 * Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi 3785 * Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi
3786 * caller. Frees all the extents that need freeing, which must be done 3786 * caller. Frees all the extents that need freeing, which must be done
3787 * last due to locking considerations. We never free any extents in 3787 * last due to locking considerations. We never free any extents in
3788 * the first transaction. This is to allow the caller to make the first 3788 * the first transaction.
3789 * transaction a synchronous one so that the pointers to the data being
3790 * broken in this transaction will be permanent before the data is actually
3791 * freed. This is necessary to prevent blocks from being reallocated
3792 * and written to before the free and reallocation are actually permanent.
3793 * We do not just make the first transaction synchronous here, because
3794 * there are more efficient ways to gain the same protection in some cases
3795 * (see the file truncation code).
3796 * 3789 *
3797 * Return 1 if the given transaction was committed and a new one 3790 * Return 1 if the given transaction was committed and a new one
3798 * started, and 0 otherwise in the committed parameter. 3791 * started, and 0 otherwise in the committed parameter.
3799 */ 3792 */
3800/*ARGSUSED*/
3801int /* error */ 3793int /* error */
3802xfs_bmap_finish( 3794xfs_bmap_finish(
3803 xfs_trans_t **tp, /* transaction pointer addr */ 3795 xfs_trans_t **tp, /* transaction pointer addr */
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 7b7e005e3dc..7888a756307 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -632,7 +632,7 @@ xfs_buf_item_push(
632 * the xfsbufd to get this buffer written. We have to unlock the buffer 632 * the xfsbufd to get this buffer written. We have to unlock the buffer
633 * to allow the xfsbufd to write it, too. 633 * to allow the xfsbufd to write it, too.
634 */ 634 */
635STATIC void 635STATIC bool
636xfs_buf_item_pushbuf( 636xfs_buf_item_pushbuf(
637 struct xfs_log_item *lip) 637 struct xfs_log_item *lip)
638{ 638{
@@ -646,6 +646,7 @@ xfs_buf_item_pushbuf(
646 646
647 xfs_buf_delwri_promote(bp); 647 xfs_buf_delwri_promote(bp);
648 xfs_buf_relse(bp); 648 xfs_buf_relse(bp);
649 return true;
649} 650}
650 651
651STATIC void 652STATIC void
@@ -1022,7 +1023,6 @@ xfs_buf_iodone_callbacks(
1022 XFS_BUF_UNDELAYWRITE(bp); 1023 XFS_BUF_UNDELAYWRITE(bp);
1023 1024
1024 trace_xfs_buf_error_relse(bp, _RET_IP_); 1025 trace_xfs_buf_error_relse(bp, _RET_IP_);
1025 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
1026 1026
1027do_callbacks: 1027do_callbacks:
1028 xfs_buf_do_callbacks(bp); 1028 xfs_buf_do_callbacks(bp);
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 3631783b2b5..ca752f05c31 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -356,9 +356,20 @@ xfs_iget_cache_miss(
356 BUG(); 356 BUG();
357 } 357 }
358 358
359 spin_lock(&pag->pag_ici_lock); 359 /*
360 * These values must be set before inserting the inode into the radix
361 * tree as the moment it is inserted a concurrent lookup (allowed by the
362 * RCU locking mechanism) can find it and that lookup must see that this
363 * is an inode currently under construction (i.e. that XFS_INEW is set).
364 * The ip->i_flags_lock that protects the XFS_INEW flag forms the
365 * memory barrier that ensures this detection works correctly at lookup
366 * time.
367 */
368 ip->i_udquot = ip->i_gdquot = NULL;
369 xfs_iflags_set(ip, XFS_INEW);
360 370
361 /* insert the new inode */ 371 /* insert the new inode */
372 spin_lock(&pag->pag_ici_lock);
362 error = radix_tree_insert(&pag->pag_ici_root, agino, ip); 373 error = radix_tree_insert(&pag->pag_ici_root, agino, ip);
363 if (unlikely(error)) { 374 if (unlikely(error)) {
364 WARN_ON(error != -EEXIST); 375 WARN_ON(error != -EEXIST);
@@ -366,11 +377,6 @@ xfs_iget_cache_miss(
366 error = EAGAIN; 377 error = EAGAIN;
367 goto out_preload_end; 378 goto out_preload_end;
368 } 379 }
369
370 /* These values _must_ be set before releasing the radix tree lock! */
371 ip->i_udquot = ip->i_gdquot = NULL;
372 xfs_iflags_set(ip, XFS_INEW);
373
374 spin_unlock(&pag->pag_ici_lock); 380 spin_unlock(&pag->pag_ici_lock);
375 radix_tree_preload_end(); 381 radix_tree_preload_end();
376 382
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index a098a20ca63..5715279975c 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1528,15 +1528,7 @@ xfs_itruncate_finish(
1528 xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); 1528 xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE);
1529 } 1529 }
1530 } 1530 }
1531 } else if (sync) {
1532 ASSERT(!(mp->m_flags & XFS_MOUNT_WSYNC));
1533 if (ip->i_d.di_anextents > 0)
1534 xfs_trans_set_sync(ntp);
1535 } 1531 }
1536 ASSERT(fork == XFS_DATA_FORK ||
1537 (fork == XFS_ATTR_FORK &&
1538 ((sync && !(mp->m_flags & XFS_MOUNT_WSYNC)) ||
1539 (sync == 0 && (mp->m_flags & XFS_MOUNT_WSYNC)))));
1540 1532
1541 /* 1533 /*
1542 * Since it is possible for space to become allocated beyond 1534 * Since it is possible for space to become allocated beyond
@@ -3099,6 +3091,27 @@ corrupt_out:
3099 return XFS_ERROR(EFSCORRUPTED); 3091 return XFS_ERROR(EFSCORRUPTED);
3100} 3092}
3101 3093
3094void
3095xfs_promote_inode(
3096 struct xfs_inode *ip)
3097{
3098 struct xfs_buf *bp;
3099
3100 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
3101
3102 bp = xfs_incore(ip->i_mount->m_ddev_targp, ip->i_imap.im_blkno,
3103 ip->i_imap.im_len, XBF_TRYLOCK);
3104 if (!bp)
3105 return;
3106
3107 if (XFS_BUF_ISDELAYWRITE(bp)) {
3108 xfs_buf_delwri_promote(bp);
3109 wake_up_process(ip->i_mount->m_ddev_targp->bt_task);
3110 }
3111
3112 xfs_buf_relse(bp);
3113}
3114
3102/* 3115/*
3103 * Return a pointer to the extent record at file index idx. 3116 * Return a pointer to the extent record at file index idx.
3104 */ 3117 */
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 964cfea7768..28b3596453e 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -509,6 +509,7 @@ int xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
509void xfs_iext_realloc(xfs_inode_t *, int, int); 509void xfs_iext_realloc(xfs_inode_t *, int, int);
510void xfs_iunpin_wait(xfs_inode_t *); 510void xfs_iunpin_wait(xfs_inode_t *);
511int xfs_iflush(xfs_inode_t *, uint); 511int xfs_iflush(xfs_inode_t *, uint);
512void xfs_promote_inode(struct xfs_inode *);
512void xfs_lock_inodes(xfs_inode_t **, int, uint); 513void xfs_lock_inodes(xfs_inode_t **, int, uint);
513void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); 514void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint);
514 515
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index b1e88d56069..391044c62d5 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -713,13 +713,14 @@ xfs_inode_item_committed(
713 * marked delayed write. If that's the case, we'll promote it and that will 713 * marked delayed write. If that's the case, we'll promote it and that will
714 * allow the caller to write the buffer by triggering the xfsbufd to run. 714 * allow the caller to write the buffer by triggering the xfsbufd to run.
715 */ 715 */
716STATIC void 716STATIC bool
717xfs_inode_item_pushbuf( 717xfs_inode_item_pushbuf(
718 struct xfs_log_item *lip) 718 struct xfs_log_item *lip)
719{ 719{
720 struct xfs_inode_log_item *iip = INODE_ITEM(lip); 720 struct xfs_inode_log_item *iip = INODE_ITEM(lip);
721 struct xfs_inode *ip = iip->ili_inode; 721 struct xfs_inode *ip = iip->ili_inode;
722 struct xfs_buf *bp; 722 struct xfs_buf *bp;
723 bool ret = true;
723 724
724 ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); 725 ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
725 726
@@ -730,7 +731,7 @@ xfs_inode_item_pushbuf(
730 if (completion_done(&ip->i_flush) || 731 if (completion_done(&ip->i_flush) ||
731 !(lip->li_flags & XFS_LI_IN_AIL)) { 732 !(lip->li_flags & XFS_LI_IN_AIL)) {
732 xfs_iunlock(ip, XFS_ILOCK_SHARED); 733 xfs_iunlock(ip, XFS_ILOCK_SHARED);
733 return; 734 return true;
734 } 735 }
735 736
736 bp = xfs_incore(ip->i_mount->m_ddev_targp, iip->ili_format.ilf_blkno, 737 bp = xfs_incore(ip->i_mount->m_ddev_targp, iip->ili_format.ilf_blkno,
@@ -738,10 +739,13 @@ xfs_inode_item_pushbuf(
738 739
739 xfs_iunlock(ip, XFS_ILOCK_SHARED); 740 xfs_iunlock(ip, XFS_ILOCK_SHARED);
740 if (!bp) 741 if (!bp)
741 return; 742 return true;
742 if (XFS_BUF_ISDELAYWRITE(bp)) 743 if (XFS_BUF_ISDELAYWRITE(bp))
743 xfs_buf_delwri_promote(bp); 744 xfs_buf_delwri_promote(bp);
745 if (XFS_BUF_ISPINNED(bp))
746 ret = false;
744 xfs_buf_relse(bp); 747 xfs_buf_relse(bp);
748 return ret;
745} 749}
746 750
747/* 751/*
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 04142caedb2..b75fd67ca37 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -3159,37 +3159,26 @@ xlog_recover_process_iunlinks(
3159 */ 3159 */
3160 continue; 3160 continue;
3161 } 3161 }
3162 /*
3163 * Unlock the buffer so that it can be acquired in the normal
3164 * course of the transaction to truncate and free each inode.
3165 * Because we are not racing with anyone else here for the AGI
3166 * buffer, we don't even need to hold it locked to read the
3167 * initial unlinked bucket entries out of the buffer. We keep
3168 * buffer reference though, so that it stays pinned in memory
3169 * while we need the buffer.
3170 */
3162 agi = XFS_BUF_TO_AGI(agibp); 3171 agi = XFS_BUF_TO_AGI(agibp);
3172 xfs_buf_unlock(agibp);
3163 3173
3164 for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) { 3174 for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) {
3165 agino = be32_to_cpu(agi->agi_unlinked[bucket]); 3175 agino = be32_to_cpu(agi->agi_unlinked[bucket]);
3166 while (agino != NULLAGINO) { 3176 while (agino != NULLAGINO) {
3167 /*
3168 * Release the agi buffer so that it can
3169 * be acquired in the normal course of the
3170 * transaction to truncate and free the inode.
3171 */
3172 xfs_buf_relse(agibp);
3173
3174 agino = xlog_recover_process_one_iunlink(mp, 3177 agino = xlog_recover_process_one_iunlink(mp,
3175 agno, agino, bucket); 3178 agno, agino, bucket);
3176
3177 /*
3178 * Reacquire the agibuffer and continue around
3179 * the loop. This should never fail as we know
3180 * the buffer was good earlier on.
3181 */
3182 error = xfs_read_agi(mp, NULL, agno, &agibp);
3183 ASSERT(error == 0);
3184 agi = XFS_BUF_TO_AGI(agibp);
3185 } 3179 }
3186 } 3180 }
3187 3181 xfs_buf_rele(agibp);
3188 /*
3189 * Release the buffer for the current agi so we can
3190 * go on to the next one.
3191 */
3192 xfs_buf_relse(agibp);
3193 } 3182 }
3194 3183
3195 mp->m_dmevmask = mp_dmevmask; 3184 mp->m_dmevmask = mp_dmevmask;
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index b49b82363d2..9afdd497369 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -44,9 +44,6 @@
44#include "xfs_trace.h" 44#include "xfs_trace.h"
45 45
46 46
47STATIC void xfs_unmountfs_wait(xfs_mount_t *);
48
49
50#ifdef HAVE_PERCPU_SB 47#ifdef HAVE_PERCPU_SB
51STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t, 48STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t,
52 int); 49 int);
@@ -1507,11 +1504,6 @@ xfs_unmountfs(
1507 */ 1504 */
1508 xfs_log_force(mp, XFS_LOG_SYNC); 1505 xfs_log_force(mp, XFS_LOG_SYNC);
1509 1506
1510 xfs_binval(mp->m_ddev_targp);
1511 if (mp->m_rtdev_targp) {
1512 xfs_binval(mp->m_rtdev_targp);
1513 }
1514
1515 /* 1507 /*
1516 * Unreserve any blocks we have so that when we unmount we don't account 1508 * Unreserve any blocks we have so that when we unmount we don't account
1517 * the reserved free space as used. This is really only necessary for 1509 * the reserved free space as used. This is really only necessary for
@@ -1537,7 +1529,16 @@ xfs_unmountfs(
1537 xfs_warn(mp, "Unable to update superblock counters. " 1529 xfs_warn(mp, "Unable to update superblock counters. "
1538 "Freespace may not be correct on next mount."); 1530 "Freespace may not be correct on next mount.");
1539 xfs_unmountfs_writesb(mp); 1531 xfs_unmountfs_writesb(mp);
1540 xfs_unmountfs_wait(mp); /* wait for async bufs */ 1532
1533 /*
1534 * Make sure all buffers have been flushed and completed before
1535 * unmounting the log.
1536 */
1537 error = xfs_flush_buftarg(mp->m_ddev_targp, 1);
1538 if (error)
1539 xfs_warn(mp, "%d busy buffers during unmount.", error);
1540 xfs_wait_buftarg(mp->m_ddev_targp);
1541
1541 xfs_log_unmount_write(mp); 1542 xfs_log_unmount_write(mp);
1542 xfs_log_unmount(mp); 1543 xfs_log_unmount(mp);
1543 xfs_uuid_unmount(mp); 1544 xfs_uuid_unmount(mp);
@@ -1548,16 +1549,6 @@ xfs_unmountfs(
1548 xfs_free_perag(mp); 1549 xfs_free_perag(mp);
1549} 1550}
1550 1551
1551STATIC void
1552xfs_unmountfs_wait(xfs_mount_t *mp)
1553{
1554 if (mp->m_logdev_targp != mp->m_ddev_targp)
1555 xfs_wait_buftarg(mp->m_logdev_targp);
1556 if (mp->m_rtdev_targp)
1557 xfs_wait_buftarg(mp->m_rtdev_targp);
1558 xfs_wait_buftarg(mp->m_ddev_targp);
1559}
1560
1561int 1552int
1562xfs_fs_writable(xfs_mount_t *mp) 1553xfs_fs_writable(xfs_mount_t *mp)
1563{ 1554{
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index c83f63b33aa..efc147f0e9b 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -1426,6 +1426,7 @@ xfs_trans_committed(
1426static inline void 1426static inline void
1427xfs_log_item_batch_insert( 1427xfs_log_item_batch_insert(
1428 struct xfs_ail *ailp, 1428 struct xfs_ail *ailp,
1429 struct xfs_ail_cursor *cur,
1429 struct xfs_log_item **log_items, 1430 struct xfs_log_item **log_items,
1430 int nr_items, 1431 int nr_items,
1431 xfs_lsn_t commit_lsn) 1432 xfs_lsn_t commit_lsn)
@@ -1434,7 +1435,7 @@ xfs_log_item_batch_insert(
1434 1435
1435 spin_lock(&ailp->xa_lock); 1436 spin_lock(&ailp->xa_lock);
1436 /* xfs_trans_ail_update_bulk drops ailp->xa_lock */ 1437 /* xfs_trans_ail_update_bulk drops ailp->xa_lock */
1437 xfs_trans_ail_update_bulk(ailp, log_items, nr_items, commit_lsn); 1438 xfs_trans_ail_update_bulk(ailp, cur, log_items, nr_items, commit_lsn);
1438 1439
1439 for (i = 0; i < nr_items; i++) 1440 for (i = 0; i < nr_items; i++)
1440 IOP_UNPIN(log_items[i], 0); 1441 IOP_UNPIN(log_items[i], 0);
@@ -1452,6 +1453,13 @@ xfs_log_item_batch_insert(
1452 * as an iclog write error even though we haven't started any IO yet. Hence in 1453 * as an iclog write error even though we haven't started any IO yet. Hence in
1453 * this case all we need to do is IOP_COMMITTED processing, followed by an 1454 * this case all we need to do is IOP_COMMITTED processing, followed by an
1454 * IOP_UNPIN(aborted) call. 1455 * IOP_UNPIN(aborted) call.
1456 *
1457 * The AIL cursor is used to optimise the insert process. If commit_lsn is not
1458 * at the end of the AIL, the insert cursor avoids the need to walk
1459 * the AIL to find the insertion point on every xfs_log_item_batch_insert()
1460 * call. This saves a lot of needless list walking and is a net win, even
1461 * though it slightly increases that amount of AIL lock traffic to set it up
1462 * and tear it down.
1455 */ 1463 */
1456void 1464void
1457xfs_trans_committed_bulk( 1465xfs_trans_committed_bulk(
@@ -1463,8 +1471,13 @@ xfs_trans_committed_bulk(
1463#define LOG_ITEM_BATCH_SIZE 32 1471#define LOG_ITEM_BATCH_SIZE 32
1464 struct xfs_log_item *log_items[LOG_ITEM_BATCH_SIZE]; 1472 struct xfs_log_item *log_items[LOG_ITEM_BATCH_SIZE];
1465 struct xfs_log_vec *lv; 1473 struct xfs_log_vec *lv;
1474 struct xfs_ail_cursor cur;
1466 int i = 0; 1475 int i = 0;
1467 1476
1477 spin_lock(&ailp->xa_lock);
1478 xfs_trans_ail_cursor_last(ailp, &cur, commit_lsn);
1479 spin_unlock(&ailp->xa_lock);
1480
1468 /* unpin all the log items */ 1481 /* unpin all the log items */
1469 for (lv = log_vector; lv; lv = lv->lv_next ) { 1482 for (lv = log_vector; lv; lv = lv->lv_next ) {
1470 struct xfs_log_item *lip = lv->lv_item; 1483 struct xfs_log_item *lip = lv->lv_item;
@@ -1493,7 +1506,9 @@ xfs_trans_committed_bulk(
1493 /* 1506 /*
1494 * Not a bulk update option due to unusual item_lsn. 1507 * Not a bulk update option due to unusual item_lsn.
1495 * Push into AIL immediately, rechecking the lsn once 1508 * Push into AIL immediately, rechecking the lsn once
1496 * we have the ail lock. Then unpin the item. 1509 * we have the ail lock. Then unpin the item. This does
1510 * not affect the AIL cursor the bulk insert path is
1511 * using.
1497 */ 1512 */
1498 spin_lock(&ailp->xa_lock); 1513 spin_lock(&ailp->xa_lock);
1499 if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) 1514 if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0)
@@ -1507,7 +1522,7 @@ xfs_trans_committed_bulk(
1507 /* Item is a candidate for bulk AIL insert. */ 1522 /* Item is a candidate for bulk AIL insert. */
1508 log_items[i++] = lv->lv_item; 1523 log_items[i++] = lv->lv_item;
1509 if (i >= LOG_ITEM_BATCH_SIZE) { 1524 if (i >= LOG_ITEM_BATCH_SIZE) {
1510 xfs_log_item_batch_insert(ailp, log_items, 1525 xfs_log_item_batch_insert(ailp, &cur, log_items,
1511 LOG_ITEM_BATCH_SIZE, commit_lsn); 1526 LOG_ITEM_BATCH_SIZE, commit_lsn);
1512 i = 0; 1527 i = 0;
1513 } 1528 }
@@ -1515,7 +1530,11 @@ xfs_trans_committed_bulk(
1515 1530
1516 /* make sure we insert the remainder! */ 1531 /* make sure we insert the remainder! */
1517 if (i) 1532 if (i)
1518 xfs_log_item_batch_insert(ailp, log_items, i, commit_lsn); 1533 xfs_log_item_batch_insert(ailp, &cur, log_items, i, commit_lsn);
1534
1535 spin_lock(&ailp->xa_lock);
1536 xfs_trans_ail_cursor_done(ailp, &cur);
1537 spin_unlock(&ailp->xa_lock);
1519} 1538}
1520 1539
1521/* 1540/*
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 06a9759b635..53597f4db9b 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -350,7 +350,7 @@ typedef struct xfs_item_ops {
350 void (*iop_unlock)(xfs_log_item_t *); 350 void (*iop_unlock)(xfs_log_item_t *);
351 xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t); 351 xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t);
352 void (*iop_push)(xfs_log_item_t *); 352 void (*iop_push)(xfs_log_item_t *);
353 void (*iop_pushbuf)(xfs_log_item_t *); 353 bool (*iop_pushbuf)(xfs_log_item_t *);
354 void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t); 354 void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t);
355} xfs_item_ops_t; 355} xfs_item_ops_t;
356 356
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 5fc2380092c..a4c281bf7a9 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -28,8 +28,6 @@
28#include "xfs_trans_priv.h" 28#include "xfs_trans_priv.h"
29#include "xfs_error.h" 29#include "xfs_error.h"
30 30
31struct workqueue_struct *xfs_ail_wq; /* AIL workqueue */
32
33#ifdef DEBUG 31#ifdef DEBUG
34/* 32/*
35 * Check that the list is sorted as it should be. 33 * Check that the list is sorted as it should be.
@@ -272,9 +270,9 @@ xfs_trans_ail_cursor_clear(
272} 270}
273 271
274/* 272/*
275 * Return the item in the AIL with the current lsn. 273 * Initialise the cursor to the first item in the AIL with the given @lsn.
276 * Return the current tree generation number for use 274 * This searches the list from lowest LSN to highest. Pass a @lsn of zero
277 * in calls to xfs_trans_next_ail(). 275 * to initialise the cursor to the first item in the AIL.
278 */ 276 */
279xfs_log_item_t * 277xfs_log_item_t *
280xfs_trans_ail_cursor_first( 278xfs_trans_ail_cursor_first(
@@ -300,31 +298,97 @@ out:
300} 298}
301 299
302/* 300/*
303 * splice the log item list into the AIL at the given LSN. 301 * Initialise the cursor to the last item in the AIL with the given @lsn.
302 * This searches the list from highest LSN to lowest. If there is no item with
303 * the value of @lsn, then it sets the cursor to the last item with an LSN lower
304 * than @lsn.
305 */
306static struct xfs_log_item *
307__xfs_trans_ail_cursor_last(
308 struct xfs_ail *ailp,
309 xfs_lsn_t lsn)
310{
311 xfs_log_item_t *lip;
312
313 list_for_each_entry_reverse(lip, &ailp->xa_ail, li_ail) {
314 if (XFS_LSN_CMP(lip->li_lsn, lsn) <= 0)
315 return lip;
316 }
317 return NULL;
318}
319
320/*
321 * Initialise the cursor to the last item in the AIL with the given @lsn.
322 * This searches the list from highest LSN to lowest.
323 */
324struct xfs_log_item *
325xfs_trans_ail_cursor_last(
326 struct xfs_ail *ailp,
327 struct xfs_ail_cursor *cur,
328 xfs_lsn_t lsn)
329{
330 xfs_trans_ail_cursor_init(ailp, cur);
331 cur->item = __xfs_trans_ail_cursor_last(ailp, lsn);
332 return cur->item;
333}
334
335/*
336 * splice the log item list into the AIL at the given LSN. We splice to the
337 * tail of the given LSN to maintain insert order for push traversals. The
338 * cursor is optional, allowing repeated updates to the same LSN to avoid
339 * repeated traversals.
304 */ 340 */
305static void 341static void
306xfs_ail_splice( 342xfs_ail_splice(
307 struct xfs_ail *ailp, 343 struct xfs_ail *ailp,
308 struct list_head *list, 344 struct xfs_ail_cursor *cur,
309 xfs_lsn_t lsn) 345 struct list_head *list,
346 xfs_lsn_t lsn)
310{ 347{
311 xfs_log_item_t *next_lip; 348 struct xfs_log_item *lip = cur ? cur->item : NULL;
349 struct xfs_log_item *next_lip;
312 350
313 /* If the list is empty, just insert the item. */ 351 /*
314 if (list_empty(&ailp->xa_ail)) { 352 * Get a new cursor if we don't have a placeholder or the existing one
315 list_splice(list, &ailp->xa_ail); 353 * has been invalidated.
316 return; 354 */
355 if (!lip || (__psint_t)lip & 1) {
356 lip = __xfs_trans_ail_cursor_last(ailp, lsn);
357
358 if (!lip) {
359 /* The list is empty, so just splice and return. */
360 if (cur)
361 cur->item = NULL;
362 list_splice(list, &ailp->xa_ail);
363 return;
364 }
317 } 365 }
318 366
319 list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) { 367 /*
320 if (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0) 368 * Our cursor points to the item we want to insert _after_, so we have
321 break; 369 * to update the cursor to point to the end of the list we are splicing
370 * in so that it points to the correct location for the next splice.
371 * i.e. before the splice
372 *
373 * lsn -> lsn -> lsn + x -> lsn + x ...
374 * ^
375 * | cursor points here
376 *
377 * After the splice we have:
378 *
379 * lsn -> lsn -> lsn -> lsn -> .... -> lsn -> lsn + x -> lsn + x ...
380 * ^ ^
381 * | cursor points here | needs to move here
382 *
383 * So we set the cursor to the last item in the list to be spliced
384 * before we execute the splice, resulting in the cursor pointing to
385 * the correct item after the splice occurs.
386 */
387 if (cur) {
388 next_lip = list_entry(list->prev, struct xfs_log_item, li_ail);
389 cur->item = next_lip;
322 } 390 }
323 391 list_splice(list, &lip->li_ail);
324 ASSERT(&next_lip->li_ail == &ailp->xa_ail ||
325 XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0);
326
327 list_splice_init(list, &next_lip->li_ail);
328} 392}
329 393
330/* 394/*
@@ -340,16 +404,10 @@ xfs_ail_delete(
340 xfs_trans_ail_cursor_clear(ailp, lip); 404 xfs_trans_ail_cursor_clear(ailp, lip);
341} 405}
342 406
343/* 407static long
344 * xfs_ail_worker does the work of pushing on the AIL. It will requeue itself 408xfsaild_push(
345 * to run at a later time if there is more work to do to complete the push. 409 struct xfs_ail *ailp)
346 */
347STATIC void
348xfs_ail_worker(
349 struct work_struct *work)
350{ 410{
351 struct xfs_ail *ailp = container_of(to_delayed_work(work),
352 struct xfs_ail, xa_work);
353 xfs_mount_t *mp = ailp->xa_mount; 411 xfs_mount_t *mp = ailp->xa_mount;
354 struct xfs_ail_cursor *cur = &ailp->xa_cursors; 412 struct xfs_ail_cursor *cur = &ailp->xa_cursors;
355 xfs_log_item_t *lip; 413 xfs_log_item_t *lip;
@@ -412,8 +470,13 @@ xfs_ail_worker(
412 470
413 case XFS_ITEM_PUSHBUF: 471 case XFS_ITEM_PUSHBUF:
414 XFS_STATS_INC(xs_push_ail_pushbuf); 472 XFS_STATS_INC(xs_push_ail_pushbuf);
415 IOP_PUSHBUF(lip); 473
416 ailp->xa_last_pushed_lsn = lsn; 474 if (!IOP_PUSHBUF(lip)) {
475 stuck++;
476 flush_log = 1;
477 } else {
478 ailp->xa_last_pushed_lsn = lsn;
479 }
417 push_xfsbufd = 1; 480 push_xfsbufd = 1;
418 break; 481 break;
419 482
@@ -425,7 +488,6 @@ xfs_ail_worker(
425 488
426 case XFS_ITEM_LOCKED: 489 case XFS_ITEM_LOCKED:
427 XFS_STATS_INC(xs_push_ail_locked); 490 XFS_STATS_INC(xs_push_ail_locked);
428 ailp->xa_last_pushed_lsn = lsn;
429 stuck++; 491 stuck++;
430 break; 492 break;
431 493
@@ -486,20 +548,6 @@ out_done:
486 /* We're past our target or empty, so idle */ 548 /* We're past our target or empty, so idle */
487 ailp->xa_last_pushed_lsn = 0; 549 ailp->xa_last_pushed_lsn = 0;
488 550
489 /*
490 * We clear the XFS_AIL_PUSHING_BIT first before checking
491 * whether the target has changed. If the target has changed,
492 * this pushes the requeue race directly onto the result of the
493 * atomic test/set bit, so we are guaranteed that either the
494 * the pusher that changed the target or ourselves will requeue
495 * the work (but not both).
496 */
497 clear_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags);
498 smp_rmb();
499 if (XFS_LSN_CMP(ailp->xa_target, target) == 0 ||
500 test_and_set_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags))
501 return;
502
503 tout = 50; 551 tout = 50;
504 } else if (XFS_LSN_CMP(lsn, target) >= 0) { 552 } else if (XFS_LSN_CMP(lsn, target) >= 0) {
505 /* 553 /*
@@ -522,9 +570,30 @@ out_done:
522 tout = 20; 570 tout = 20;
523 } 571 }
524 572
525 /* There is more to do, requeue us. */ 573 return tout;
526 queue_delayed_work(xfs_syncd_wq, &ailp->xa_work, 574}
527 msecs_to_jiffies(tout)); 575
576static int
577xfsaild(
578 void *data)
579{
580 struct xfs_ail *ailp = data;
581 long tout = 0; /* milliseconds */
582
583 while (!kthread_should_stop()) {
584 if (tout && tout <= 20)
585 __set_current_state(TASK_KILLABLE);
586 else
587 __set_current_state(TASK_INTERRUPTIBLE);
588 schedule_timeout(tout ?
589 msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT);
590
591 try_to_freeze();
592
593 tout = xfsaild_push(ailp);
594 }
595
596 return 0;
528} 597}
529 598
530/* 599/*
@@ -559,8 +628,9 @@ xfs_ail_push(
559 */ 628 */
560 smp_wmb(); 629 smp_wmb();
561 xfs_trans_ail_copy_lsn(ailp, &ailp->xa_target, &threshold_lsn); 630 xfs_trans_ail_copy_lsn(ailp, &ailp->xa_target, &threshold_lsn);
562 if (!test_and_set_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags)) 631 smp_wmb();
563 queue_delayed_work(xfs_syncd_wq, &ailp->xa_work, 0); 632
633 wake_up_process(ailp->xa_task);
564} 634}
565 635
566/* 636/*
@@ -645,6 +715,7 @@ xfs_trans_unlocked_item(
645void 715void
646xfs_trans_ail_update_bulk( 716xfs_trans_ail_update_bulk(
647 struct xfs_ail *ailp, 717 struct xfs_ail *ailp,
718 struct xfs_ail_cursor *cur,
648 struct xfs_log_item **log_items, 719 struct xfs_log_item **log_items,
649 int nr_items, 720 int nr_items,
650 xfs_lsn_t lsn) __releases(ailp->xa_lock) 721 xfs_lsn_t lsn) __releases(ailp->xa_lock)
@@ -674,7 +745,7 @@ xfs_trans_ail_update_bulk(
674 list_add(&lip->li_ail, &tmp); 745 list_add(&lip->li_ail, &tmp);
675 } 746 }
676 747
677 xfs_ail_splice(ailp, &tmp, lsn); 748 xfs_ail_splice(ailp, cur, &tmp, lsn);
678 749
679 if (!mlip_changed) { 750 if (!mlip_changed) {
680 spin_unlock(&ailp->xa_lock); 751 spin_unlock(&ailp->xa_lock);
@@ -794,9 +865,18 @@ xfs_trans_ail_init(
794 ailp->xa_mount = mp; 865 ailp->xa_mount = mp;
795 INIT_LIST_HEAD(&ailp->xa_ail); 866 INIT_LIST_HEAD(&ailp->xa_ail);
796 spin_lock_init(&ailp->xa_lock); 867 spin_lock_init(&ailp->xa_lock);
797 INIT_DELAYED_WORK(&ailp->xa_work, xfs_ail_worker); 868
869 ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild/%s",
870 ailp->xa_mount->m_fsname);
871 if (IS_ERR(ailp->xa_task))
872 goto out_free_ailp;
873
798 mp->m_ail = ailp; 874 mp->m_ail = ailp;
799 return 0; 875 return 0;
876
877out_free_ailp:
878 kmem_free(ailp);
879 return ENOMEM;
800} 880}
801 881
802void 882void
@@ -805,6 +885,6 @@ xfs_trans_ail_destroy(
805{ 885{
806 struct xfs_ail *ailp = mp->m_ail; 886 struct xfs_ail *ailp = mp->m_ail;
807 887
808 cancel_delayed_work_sync(&ailp->xa_work); 888 kthread_stop(ailp->xa_task);
809 kmem_free(ailp); 889 kmem_free(ailp);
810} 890}
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index 6b164e9e9a1..fe2e3cbc2f9 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -64,24 +64,19 @@ struct xfs_ail_cursor {
64 */ 64 */
65struct xfs_ail { 65struct xfs_ail {
66 struct xfs_mount *xa_mount; 66 struct xfs_mount *xa_mount;
67 struct task_struct *xa_task;
67 struct list_head xa_ail; 68 struct list_head xa_ail;
68 xfs_lsn_t xa_target; 69 xfs_lsn_t xa_target;
69 struct xfs_ail_cursor xa_cursors; 70 struct xfs_ail_cursor xa_cursors;
70 spinlock_t xa_lock; 71 spinlock_t xa_lock;
71 struct delayed_work xa_work;
72 xfs_lsn_t xa_last_pushed_lsn; 72 xfs_lsn_t xa_last_pushed_lsn;
73 unsigned long xa_flags;
74}; 73};
75 74
76#define XFS_AIL_PUSHING_BIT 0
77
78/* 75/*
79 * From xfs_trans_ail.c 76 * From xfs_trans_ail.c
80 */ 77 */
81
82extern struct workqueue_struct *xfs_ail_wq; /* AIL workqueue */
83
84void xfs_trans_ail_update_bulk(struct xfs_ail *ailp, 78void xfs_trans_ail_update_bulk(struct xfs_ail *ailp,
79 struct xfs_ail_cursor *cur,
85 struct xfs_log_item **log_items, int nr_items, 80 struct xfs_log_item **log_items, int nr_items,
86 xfs_lsn_t lsn) __releases(ailp->xa_lock); 81 xfs_lsn_t lsn) __releases(ailp->xa_lock);
87static inline void 82static inline void
@@ -90,7 +85,7 @@ xfs_trans_ail_update(
90 struct xfs_log_item *lip, 85 struct xfs_log_item *lip,
91 xfs_lsn_t lsn) __releases(ailp->xa_lock) 86 xfs_lsn_t lsn) __releases(ailp->xa_lock)
92{ 87{
93 xfs_trans_ail_update_bulk(ailp, &lip, 1, lsn); 88 xfs_trans_ail_update_bulk(ailp, NULL, &lip, 1, lsn);
94} 89}
95 90
96void xfs_trans_ail_delete_bulk(struct xfs_ail *ailp, 91void xfs_trans_ail_delete_bulk(struct xfs_ail *ailp,
@@ -111,10 +106,13 @@ xfs_lsn_t xfs_ail_min_lsn(struct xfs_ail *ailp);
111void xfs_trans_unlocked_item(struct xfs_ail *, 106void xfs_trans_unlocked_item(struct xfs_ail *,
112 xfs_log_item_t *); 107 xfs_log_item_t *);
113 108
114struct xfs_log_item *xfs_trans_ail_cursor_first(struct xfs_ail *ailp, 109struct xfs_log_item * xfs_trans_ail_cursor_first(struct xfs_ail *ailp,
110 struct xfs_ail_cursor *cur,
111 xfs_lsn_t lsn);
112struct xfs_log_item * xfs_trans_ail_cursor_last(struct xfs_ail *ailp,
115 struct xfs_ail_cursor *cur, 113 struct xfs_ail_cursor *cur,
116 xfs_lsn_t lsn); 114 xfs_lsn_t lsn);
117struct xfs_log_item *xfs_trans_ail_cursor_next(struct xfs_ail *ailp, 115struct xfs_log_item * xfs_trans_ail_cursor_next(struct xfs_ail *ailp,
118 struct xfs_ail_cursor *cur); 116 struct xfs_ail_cursor *cur);
119void xfs_trans_ail_cursor_done(struct xfs_ail *ailp, 117void xfs_trans_ail_cursor_done(struct xfs_ail *ailp,
120 struct xfs_ail_cursor *cur); 118 struct xfs_ail_cursor *cur);
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 619720705bc..59509ae0b27 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -535,7 +535,7 @@ xfs_readlink(
535 char *link) 535 char *link)
536{ 536{
537 xfs_mount_t *mp = ip->i_mount; 537 xfs_mount_t *mp = ip->i_mount;
538 int pathlen; 538 xfs_fsize_t pathlen;
539 int error = 0; 539 int error = 0;
540 540
541 trace_xfs_readlink(ip); 541 trace_xfs_readlink(ip);
@@ -545,13 +545,20 @@ xfs_readlink(
545 545
546 xfs_ilock(ip, XFS_ILOCK_SHARED); 546 xfs_ilock(ip, XFS_ILOCK_SHARED);
547 547
548 ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFLNK);
549 ASSERT(ip->i_d.di_size <= MAXPATHLEN);
550
551 pathlen = ip->i_d.di_size; 548 pathlen = ip->i_d.di_size;
552 if (!pathlen) 549 if (!pathlen)
553 goto out; 550 goto out;
554 551
552 if (pathlen < 0 || pathlen > MAXPATHLEN) {
553 xfs_alert(mp, "%s: inode (%llu) bad symlink length (%lld)",
554 __func__, (unsigned long long) ip->i_ino,
555 (long long) pathlen);
556 ASSERT(0);
557 error = XFS_ERROR(EFSCORRUPTED);
558 goto out;
559 }
560
561
555 if (ip->i_df.if_flags & XFS_IFINLINE) { 562 if (ip->i_df.if_flags & XFS_IFINLINE) {
556 memcpy(link, ip->i_df.if_u1.if_data, pathlen); 563 memcpy(link, ip->i_df.if_u1.if_data, pathlen);
557 link[pathlen] = '\0'; 564 link[pathlen] = '\0';
diff --git a/fs/yaffs2/Kconfig b/fs/yaffs2/Kconfig
new file mode 100644
index 00000000000..63541405999
--- /dev/null
+++ b/fs/yaffs2/Kconfig
@@ -0,0 +1,161 @@
1#
2# YAFFS file system configurations
3#
4
5config YAFFS_FS
6 tristate "YAFFS2 file system support"
7 default n
8 depends on MTD_BLOCK
9 select YAFFS_YAFFS1
10 select YAFFS_YAFFS2
11 help
12 YAFFS2, or Yet Another Flash Filing System, is a filing system
13 optimised for NAND Flash chips.
14
15 To compile the YAFFS2 file system support as a module, choose M
16 here: the module will be called yaffs2.
17
18 If unsure, say N.
19
20 Further information on YAFFS2 is available at
21 <http://www.aleph1.co.uk/yaffs/>.
22
23config YAFFS_YAFFS1
24 bool "512 byte / page devices"
25 depends on YAFFS_FS
26 default y
27 help
28 Enable YAFFS1 support -- yaffs for 512 byte / page devices
29
30 Not needed for 2K-page devices.
31
32 If unsure, say Y.
33
34config YAFFS_9BYTE_TAGS
35 bool "Use older-style on-NAND data format with pageStatus byte"
36 depends on YAFFS_YAFFS1
37 default n
38 help
39
40 Older-style on-NAND data format has a "pageStatus" byte to record
41 chunk/page state. This byte is zero when the page is discarded.
42 Choose this option if you have existing on-NAND data using this
43 format that you need to continue to support. New data written
44 also uses the older-style format. Note: Use of this option
45 generally requires that MTD's oob layout be adjusted to use the
46 older-style format. See notes on tags formats and MTD versions
47 in yaffs_mtdif1.c.
48
49 If unsure, say N.
50
51config YAFFS_DOES_ECC
52 bool "Lets Yaffs do its own ECC"
53 depends on YAFFS_FS && YAFFS_YAFFS1 && !YAFFS_9BYTE_TAGS
54 default n
55 help
56 This enables Yaffs to use its own ECC functions instead of using
57 the ones from the generic MTD-NAND driver.
58
59 If unsure, say N.
60
61config YAFFS_ECC_WRONG_ORDER
62 bool "Use the same ecc byte order as Steven Hill's nand_ecc.c"
63 depends on YAFFS_FS && YAFFS_DOES_ECC && !YAFFS_9BYTE_TAGS
64 default n
65 help
66 This makes yaffs_ecc.c use the same ecc byte order as Steven
67 Hill's nand_ecc.c. If not set, then you get the same ecc byte
68 order as SmartMedia.
69
70 If unsure, say N.
71
72config YAFFS_YAFFS2
73 bool "2048 byte (or larger) / page devices"
74 depends on YAFFS_FS
75 default y
76 help
77 Enable YAFFS2 support -- yaffs for >= 2K bytes per page devices
78
79 If unsure, say Y.
80
81config YAFFS_AUTO_YAFFS2
82 bool "Autoselect yaffs2 format"
83 depends on YAFFS_YAFFS2
84 default y
85 help
86 Without this, you need to explicitely use yaffs2 as the file
87 system type. With this, you can say "yaffs" and yaffs or yaffs2
88 will be used depending on the device page size (yaffs on
89 512-byte page devices, yaffs2 on 2K page devices).
90
91 If unsure, say Y.
92
93config YAFFS_DISABLE_TAGS_ECC
94 bool "Disable YAFFS from doing ECC on tags by default"
95 depends on YAFFS_FS && YAFFS_YAFFS2
96 default n
97 help
98 This defaults Yaffs to using its own ECC calculations on tags instead of
99 just relying on the MTD.
100 This behavior can also be overridden with tags_ecc_on and
101 tags_ecc_off mount options.
102
103 If unsure, say N.
104
105config YAFFS_ALWAYS_CHECK_CHUNK_ERASED
106 bool "Force chunk erase check"
107 depends on YAFFS_FS
108 default n
109 help
110 Normally YAFFS only checks chunks before writing until an erased
111 chunk is found. This helps to detect any partially written
112 chunks that might have happened due to power loss.
113
114 Enabling this forces on the test that chunks are erased in flash
115 before writing to them. This takes more time but is potentially
116 a bit more secure.
117
118 Suggest setting Y during development and ironing out driver
119 issues etc. Suggest setting to N if you want faster writing.
120
121 If unsure, say Y.
122
123config YAFFS_EMPTY_LOST_AND_FOUND
124 bool "Empty lost and found on boot"
125 depends on YAFFS_FS
126 default n
127 help
128 If this is enabled then the contents of lost and found is
129 automatically dumped at mount.
130
131 If unsure, say N.
132
133config YAFFS_DISABLE_BLOCK_REFRESHING
134 bool "Disable yaffs2 block refreshing"
135 depends on YAFFS_FS
136 default n
137 help
138 If this is set, then block refreshing is disabled.
139 Block refreshing infrequently refreshes the oldest block in
140 a yaffs2 file system. This mechanism helps to refresh flash to
141 mitigate against data loss. This is particularly useful for MLC.
142
143 If unsure, say N.
144
145config YAFFS_DISABLE_BACKGROUND
146 bool "Disable yaffs2 background processing"
147 depends on YAFFS_FS
148 default n
149 help
150 If this is set, then background processing is disabled.
151 Background processing makes many foreground activities faster.
152
153 If unsure, say N.
154
155config YAFFS_XATTR
156 bool "Enable yaffs2 xattr support"
157 depends on YAFFS_FS
158 default y
159 help
160 If this is set then yaffs2 will provide xattr support.
161 If unsure, say Y.
diff --git a/fs/yaffs2/Makefile b/fs/yaffs2/Makefile
new file mode 100644
index 00000000000..e63a28aa3ed
--- /dev/null
+++ b/fs/yaffs2/Makefile
@@ -0,0 +1,17 @@
1#
2# Makefile for the linux YAFFS filesystem routines.
3#
4
5obj-$(CONFIG_YAFFS_FS) += yaffs.o
6
7yaffs-y := yaffs_ecc.o yaffs_vfs.o yaffs_guts.o yaffs_checkptrw.o
8yaffs-y += yaffs_packedtags1.o yaffs_packedtags2.o yaffs_nand.o
9yaffs-y += yaffs_tagscompat.o yaffs_tagsvalidity.o
10yaffs-y += yaffs_mtdif.o yaffs_mtdif1.o yaffs_mtdif2.o
11yaffs-y += yaffs_nameval.o yaffs_attribs.o
12yaffs-y += yaffs_allocator.o
13yaffs-y += yaffs_yaffs1.o
14yaffs-y += yaffs_yaffs2.o
15yaffs-y += yaffs_bitmap.o
16yaffs-y += yaffs_verify.o
17
diff --git a/fs/yaffs2/yaffs_allocator.c b/fs/yaffs2/yaffs_allocator.c
new file mode 100644
index 00000000000..f9cd5becd8f
--- /dev/null
+++ b/fs/yaffs2/yaffs_allocator.c
@@ -0,0 +1,396 @@
1/*
2 * YAFFS: Yet Another Flash File System. A NAND-flash specific file system.
3 *
4 * Copyright (C) 2002-2010 Aleph One Ltd.
5 * for Toby Churchill Ltd and Brightstar Engineering
6 *
7 * Created by Charles Manning <charles@aleph1.co.uk>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 */
13
14#include "yaffs_allocator.h"
15#include "yaffs_guts.h"
16#include "yaffs_trace.h"
17#include "yportenv.h"
18
19#ifdef CONFIG_YAFFS_KMALLOC_ALLOCATOR
20
21void yaffs_deinit_raw_tnodes_and_objs(struct yaffs_dev *dev)
22{
23 dev = dev;
24}
25
26void yaffs_init_raw_tnodes_and_objs(struct yaffs_dev *dev)
27{
28 dev = dev;
29}
30
31struct yaffs_tnode *yaffs_alloc_raw_tnode(struct yaffs_dev *dev)
32{
33 return (struct yaffs_tnode *)kmalloc(dev->tnode_size, GFP_NOFS);
34}
35
36void yaffs_free_raw_tnode(struct yaffs_dev *dev, struct yaffs_tnode *tn)
37{
38 dev = dev;
39 kfree(tn);
40}
41
42void yaffs_init_raw_objs(struct yaffs_dev *dev)
43{
44 dev = dev;
45}
46
47void yaffs_deinit_raw_objs(struct yaffs_dev *dev)
48{
49 dev = dev;
50}
51
52struct yaffs_obj *yaffs_alloc_raw_obj(struct yaffs_dev *dev)
53{
54 dev = dev;
55 return (struct yaffs_obj *)kmalloc(sizeof(struct yaffs_obj));
56}
57
58void yaffs_free_raw_obj(struct yaffs_dev *dev, struct yaffs_obj *obj)
59{
60
61 dev = dev;
62 kfree(obj);
63}
64
65#else
66
67struct yaffs_tnode_list {
68 struct yaffs_tnode_list *next;
69 struct yaffs_tnode *tnodes;
70};
71
72struct yaffs_obj_list {
73 struct yaffs_obj_list *next;
74 struct yaffs_obj *objects;
75};
76
77struct yaffs_allocator {
78 int n_tnodes_created;
79 struct yaffs_tnode *free_tnodes;
80 int n_free_tnodes;
81 struct yaffs_tnode_list *alloc_tnode_list;
82
83 int n_obj_created;
84 struct yaffs_obj *free_objs;
85 int n_free_objects;
86
87 struct yaffs_obj_list *allocated_obj_list;
88};
89
90static void yaffs_deinit_raw_tnodes(struct yaffs_dev *dev)
91{
92
93 struct yaffs_allocator *allocator =
94 (struct yaffs_allocator *)dev->allocator;
95
96 struct yaffs_tnode_list *tmp;
97
98 if (!allocator) {
99 YBUG();
100 return;
101 }
102
103 while (allocator->alloc_tnode_list) {
104 tmp = allocator->alloc_tnode_list->next;
105
106 kfree(allocator->alloc_tnode_list->tnodes);
107 kfree(allocator->alloc_tnode_list);
108 allocator->alloc_tnode_list = tmp;
109
110 }
111
112 allocator->free_tnodes = NULL;
113 allocator->n_free_tnodes = 0;
114 allocator->n_tnodes_created = 0;
115}
116
117static void yaffs_init_raw_tnodes(struct yaffs_dev *dev)
118{
119 struct yaffs_allocator *allocator = dev->allocator;
120
121 if (allocator) {
122 allocator->alloc_tnode_list = NULL;
123 allocator->free_tnodes = NULL;
124 allocator->n_free_tnodes = 0;
125 allocator->n_tnodes_created = 0;
126 } else {
127 YBUG();
128 }
129}
130
131static int yaffs_create_tnodes(struct yaffs_dev *dev, int n_tnodes)
132{
133 struct yaffs_allocator *allocator =
134 (struct yaffs_allocator *)dev->allocator;
135 int i;
136 struct yaffs_tnode *new_tnodes;
137 u8 *mem;
138 struct yaffs_tnode *curr;
139 struct yaffs_tnode *next;
140 struct yaffs_tnode_list *tnl;
141
142 if (!allocator) {
143 YBUG();
144 return YAFFS_FAIL;
145 }
146
147 if (n_tnodes < 1)
148 return YAFFS_OK;
149
150 /* make these things */
151
152 new_tnodes = kmalloc(n_tnodes * dev->tnode_size, GFP_NOFS);
153 mem = (u8 *) new_tnodes;
154
155 if (!new_tnodes) {
156 yaffs_trace(YAFFS_TRACE_ERROR,
157 "yaffs: Could not allocate Tnodes");
158 return YAFFS_FAIL;
159 }
160
161 /* New hookup for wide tnodes */
162 for (i = 0; i < n_tnodes - 1; i++) {
163 curr = (struct yaffs_tnode *)&mem[i * dev->tnode_size];
164 next = (struct yaffs_tnode *)&mem[(i + 1) * dev->tnode_size];
165 curr->internal[0] = next;
166 }
167
168 curr = (struct yaffs_tnode *)&mem[(n_tnodes - 1) * dev->tnode_size];
169 curr->internal[0] = allocator->free_tnodes;
170 allocator->free_tnodes = (struct yaffs_tnode *)mem;
171
172 allocator->n_free_tnodes += n_tnodes;
173 allocator->n_tnodes_created += n_tnodes;
174
175 /* Now add this bunch of tnodes to a list for freeing up.
176 * NB If we can't add this to the management list it isn't fatal
177 * but it just means we can't free this bunch of tnodes later.
178 */
179
180 tnl = kmalloc(sizeof(struct yaffs_tnode_list), GFP_NOFS);
181 if (!tnl) {
182 yaffs_trace(YAFFS_TRACE_ERROR,
183 "Could not add tnodes to management list");
184 return YAFFS_FAIL;
185 } else {
186 tnl->tnodes = new_tnodes;
187 tnl->next = allocator->alloc_tnode_list;
188 allocator->alloc_tnode_list = tnl;
189 }
190
191 yaffs_trace(YAFFS_TRACE_ALLOCATE,"Tnodes added");
192
193 return YAFFS_OK;
194}
195
196struct yaffs_tnode *yaffs_alloc_raw_tnode(struct yaffs_dev *dev)
197{
198 struct yaffs_allocator *allocator =
199 (struct yaffs_allocator *)dev->allocator;
200 struct yaffs_tnode *tn = NULL;
201
202 if (!allocator) {
203 YBUG();
204 return NULL;
205 }
206
207 /* If there are none left make more */
208 if (!allocator->free_tnodes)
209 yaffs_create_tnodes(dev, YAFFS_ALLOCATION_NTNODES);
210
211 if (allocator->free_tnodes) {
212 tn = allocator->free_tnodes;
213 allocator->free_tnodes = allocator->free_tnodes->internal[0];
214 allocator->n_free_tnodes--;
215 }
216
217 return tn;
218}
219
220/* FreeTnode frees up a tnode and puts it back on the free list */
221void yaffs_free_raw_tnode(struct yaffs_dev *dev, struct yaffs_tnode *tn)
222{
223 struct yaffs_allocator *allocator = dev->allocator;
224
225 if (!allocator) {
226 YBUG();
227 return;
228 }
229
230 if (tn) {
231 tn->internal[0] = allocator->free_tnodes;
232 allocator->free_tnodes = tn;
233 allocator->n_free_tnodes++;
234 }
235 dev->checkpoint_blocks_required = 0; /* force recalculation */
236}
237
238static void yaffs_init_raw_objs(struct yaffs_dev *dev)
239{
240 struct yaffs_allocator *allocator = dev->allocator;
241
242 if (allocator) {
243 allocator->allocated_obj_list = NULL;
244 allocator->free_objs = NULL;
245 allocator->n_free_objects = 0;
246 } else {
247 YBUG();
248 }
249}
250
251static void yaffs_deinit_raw_objs(struct yaffs_dev *dev)
252{
253 struct yaffs_allocator *allocator = dev->allocator;
254 struct yaffs_obj_list *tmp;
255
256 if (!allocator) {
257 YBUG();
258 return;
259 }
260
261 while (allocator->allocated_obj_list) {
262 tmp = allocator->allocated_obj_list->next;
263 kfree(allocator->allocated_obj_list->objects);
264 kfree(allocator->allocated_obj_list);
265
266 allocator->allocated_obj_list = tmp;
267 }
268
269 allocator->free_objs = NULL;
270 allocator->n_free_objects = 0;
271 allocator->n_obj_created = 0;
272}
273
274static int yaffs_create_free_objs(struct yaffs_dev *dev, int n_obj)
275{
276 struct yaffs_allocator *allocator = dev->allocator;
277
278 int i;
279 struct yaffs_obj *new_objs;
280 struct yaffs_obj_list *list;
281
282 if (!allocator) {
283 YBUG();
284 return YAFFS_FAIL;
285 }
286
287 if (n_obj < 1)
288 return YAFFS_OK;
289
290 /* make these things */
291 new_objs = kmalloc(n_obj * sizeof(struct yaffs_obj), GFP_NOFS);
292 list = kmalloc(sizeof(struct yaffs_obj_list), GFP_NOFS);
293
294 if (!new_objs || !list) {
295 if (new_objs) {
296 kfree(new_objs);
297 new_objs = NULL;
298 }
299 if (list) {
300 kfree(list);
301 list = NULL;
302 }
303 yaffs_trace(YAFFS_TRACE_ALLOCATE,
304 "Could not allocate more objects");
305 return YAFFS_FAIL;
306 }
307
308 /* Hook them into the free list */
309 for (i = 0; i < n_obj - 1; i++) {
310 new_objs[i].siblings.next =
311 (struct list_head *)(&new_objs[i + 1]);
312 }
313
314 new_objs[n_obj - 1].siblings.next = (void *)allocator->free_objs;
315 allocator->free_objs = new_objs;
316 allocator->n_free_objects += n_obj;
317 allocator->n_obj_created += n_obj;
318
319 /* Now add this bunch of Objects to a list for freeing up. */
320
321 list->objects = new_objs;
322 list->next = allocator->allocated_obj_list;
323 allocator->allocated_obj_list = list;
324
325 return YAFFS_OK;
326}
327
328struct yaffs_obj *yaffs_alloc_raw_obj(struct yaffs_dev *dev)
329{
330 struct yaffs_obj *obj = NULL;
331 struct yaffs_allocator *allocator = dev->allocator;
332
333 if (!allocator) {
334 YBUG();
335 return obj;
336 }
337
338 /* If there are none left make more */
339 if (!allocator->free_objs)
340 yaffs_create_free_objs(dev, YAFFS_ALLOCATION_NOBJECTS);
341
342 if (allocator->free_objs) {
343 obj = allocator->free_objs;
344 allocator->free_objs =
345 (struct yaffs_obj *)(allocator->free_objs->siblings.next);
346 allocator->n_free_objects--;
347 }
348
349 return obj;
350}
351
352void yaffs_free_raw_obj(struct yaffs_dev *dev, struct yaffs_obj *obj)
353{
354
355 struct yaffs_allocator *allocator = dev->allocator;
356
357 if (!allocator)
358 YBUG();
359 else {
360 /* Link into the free list. */
361 obj->siblings.next = (struct list_head *)(allocator->free_objs);
362 allocator->free_objs = obj;
363 allocator->n_free_objects++;
364 }
365}
366
367void yaffs_deinit_raw_tnodes_and_objs(struct yaffs_dev *dev)
368{
369 if (dev->allocator) {
370 yaffs_deinit_raw_tnodes(dev);
371 yaffs_deinit_raw_objs(dev);
372
373 kfree(dev->allocator);
374 dev->allocator = NULL;
375 } else {
376 YBUG();
377 }
378}
379
380void yaffs_init_raw_tnodes_and_objs(struct yaffs_dev *dev)
381{
382 struct yaffs_allocator *allocator;
383
384 if (!dev->allocator) {
385 allocator = kmalloc(sizeof(struct yaffs_allocator), GFP_NOFS);
386 if (allocator) {
387 dev->allocator = allocator;
388 yaffs_init_raw_tnodes(dev);
389 yaffs_init_raw_objs(dev);
390 }
391 } else {
392 YBUG();
393 }
394}
395
396#endif
diff --git a/fs/yaffs2/yaffs_allocator.h b/fs/yaffs2/yaffs_allocator.h
new file mode 100644
index 00000000000..4d5f2aec89f
--- /dev/null
+++ b/fs/yaffs2/yaffs_allocator.h
@@ -0,0 +1,30 @@
1/*
2 * YAFFS: Yet another Flash File System . A NAND-flash specific file system.
3 *
4 * Copyright (C) 2002-2010 Aleph One Ltd.
5 * for Toby Churchill Ltd and Brightstar Engineering
6 *
7 * Created by Charles Manning <charles@aleph1.co.uk>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU Lesser General Public License version 2.1 as
11 * published by the Free Software Foundation.
12 *
13 * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL.
14 */
15
16#ifndef __YAFFS_ALLOCATOR_H__
17#define __YAFFS_ALLOCATOR_H__
18
19#include "yaffs_guts.h"
20
21void yaffs_init_raw_tnodes_and_objs(struct yaffs_dev *dev);
22void yaffs_deinit_raw_tnodes_and_objs(struct yaffs_dev *dev);
23
24struct yaffs_tnode *yaffs_alloc_raw_tnode(struct yaffs_dev *dev);
25void yaffs_free_raw_tnode(struct yaffs_dev *dev, struct yaffs_tnode *tn);
26
27struct yaffs_obj *yaffs_alloc_raw_obj(struct yaffs_dev *dev);
28void yaffs_free_raw_obj(struct yaffs_dev *dev, struct yaffs_obj *obj);
29
30#endif
diff --git a/fs/yaffs2/yaffs_attribs.c b/fs/yaffs2/yaffs_attribs.c
new file mode 100644
index 00000000000..9b47d376310
--- /dev/null
+++ b/fs/yaffs2/yaffs_attribs.c
@@ -0,0 +1,124 @@
1/*
2 * YAFFS: Yet Another Flash File System. A NAND-flash specific file system.
3 *
4 * Copyright (C) 2002-2010 Aleph One Ltd.
5 * for Toby Churchill Ltd and Brightstar Engineering
6 *
7 * Created by Charles Manning <charles@aleph1.co.uk>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 */
13
14#include "yaffs_guts.h"
15#include "yaffs_attribs.h"
16
17void yaffs_load_attribs(struct yaffs_obj *obj, struct yaffs_obj_hdr *oh)
18{
19 obj->yst_uid = oh->yst_uid;
20 obj->yst_gid = oh->yst_gid;
21 obj->yst_atime = oh->yst_atime;
22 obj->yst_mtime = oh->yst_mtime;
23 obj->yst_ctime = oh->yst_ctime;
24 obj->yst_rdev = oh->yst_rdev;
25}
26
27void yaffs_load_attribs_oh(struct yaffs_obj_hdr *oh, struct yaffs_obj *obj)
28{
29 oh->yst_uid = obj->yst_uid;
30 oh->yst_gid = obj->yst_gid;
31 oh->yst_atime = obj->yst_atime;
32 oh->yst_mtime = obj->yst_mtime;
33 oh->yst_ctime = obj->yst_ctime;
34 oh->yst_rdev = obj->yst_rdev;
35
36}
37
38void yaffs_load_current_time(struct yaffs_obj *obj, int do_a, int do_c)
39{
40 obj->yst_mtime = Y_CURRENT_TIME;
41 if (do_a)
42 obj->yst_atime = obj->yst_mtime;
43 if (do_c)
44 obj->yst_ctime = obj->yst_mtime;
45}
46
47void yaffs_attribs_init(struct yaffs_obj *obj, u32 gid, u32 uid, u32 rdev)
48{
49 yaffs_load_current_time(obj, 1, 1);
50 obj->yst_rdev = rdev;
51 obj->yst_uid = uid;
52 obj->yst_gid = gid;
53}
54
55loff_t yaffs_get_file_size(struct yaffs_obj *obj)
56{
57 YCHAR *alias = NULL;
58 obj = yaffs_get_equivalent_obj(obj);
59
60 switch (obj->variant_type) {
61 case YAFFS_OBJECT_TYPE_FILE:
62 return obj->variant.file_variant.file_size;
63 case YAFFS_OBJECT_TYPE_SYMLINK:
64 alias = obj->variant.symlink_variant.alias;
65 if (!alias)
66 return 0;
67 return strnlen(alias, YAFFS_MAX_ALIAS_LENGTH);
68 default:
69 return 0;
70 }
71}
72
73int yaffs_set_attribs(struct yaffs_obj *obj, struct iattr *attr)
74{
75 unsigned int valid = attr->ia_valid;
76
77 if (valid & ATTR_MODE)
78 obj->yst_mode = attr->ia_mode;
79 if (valid & ATTR_UID)
80 obj->yst_uid = attr->ia_uid;
81 if (valid & ATTR_GID)
82 obj->yst_gid = attr->ia_gid;
83
84 if (valid & ATTR_ATIME)
85 obj->yst_atime = Y_TIME_CONVERT(attr->ia_atime);
86 if (valid & ATTR_CTIME)
87 obj->yst_ctime = Y_TIME_CONVERT(attr->ia_ctime);
88 if (valid & ATTR_MTIME)
89 obj->yst_mtime = Y_TIME_CONVERT(attr->ia_mtime);
90
91 if (valid & ATTR_SIZE)
92 yaffs_resize_file(obj, attr->ia_size);
93
94 yaffs_update_oh(obj, NULL, 1, 0, 0, NULL);
95
96 return YAFFS_OK;
97
98}
99
100int yaffs_get_attribs(struct yaffs_obj *obj, struct iattr *attr)
101{
102 unsigned int valid = 0;
103
104 attr->ia_mode = obj->yst_mode;
105 valid |= ATTR_MODE;
106 attr->ia_uid = obj->yst_uid;
107 valid |= ATTR_UID;
108 attr->ia_gid = obj->yst_gid;
109 valid |= ATTR_GID;
110
111 Y_TIME_CONVERT(attr->ia_atime) = obj->yst_atime;
112 valid |= ATTR_ATIME;
113 Y_TIME_CONVERT(attr->ia_ctime) = obj->yst_ctime;
114 valid |= ATTR_CTIME;
115 Y_TIME_CONVERT(attr->ia_mtime) = obj->yst_mtime;
116 valid |= ATTR_MTIME;
117
118 attr->ia_size = yaffs_get_file_size(obj);
119 valid |= ATTR_SIZE;
120
121 attr->ia_valid = valid;
122
123 return YAFFS_OK;
124}
diff --git a/fs/yaffs2/yaffs_attribs.h b/fs/yaffs2/yaffs_attribs.h
new file mode 100644
index 00000000000..33d541d6944
--- /dev/null
+++ b/fs/yaffs2/yaffs_attribs.h
@@ -0,0 +1,28 @@
1/*
2 * YAFFS: Yet another Flash File System . A NAND-flash specific file system.
3 *
4 * Copyright (C) 2002-2010 Aleph One Ltd.
5 * for Toby Churchill Ltd and Brightstar Engineering
6 *
7 * Created by Charles Manning <charles@aleph1.co.uk>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU Lesser General Public License version 2.1 as
11 * published by the Free Software Foundation.
12 *
13 * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL.
14 */
15
16#ifndef __YAFFS_ATTRIBS_H__
17#define __YAFFS_ATTRIBS_H__
18
19#include "yaffs_guts.h"
20
21void yaffs_load_attribs(struct yaffs_obj *obj, struct yaffs_obj_hdr *oh);
22void yaffs_load_attribs_oh(struct yaffs_obj_hdr *oh, struct yaffs_obj *obj);
23void yaffs_attribs_init(struct yaffs_obj *obj, u32 gid, u32 uid, u32 rdev);
24void yaffs_load_current_time(struct yaffs_obj *obj, int do_a, int do_c);
25int yaffs_set_attribs(struct yaffs_obj *obj, struct iattr *attr);
26int yaffs_get_attribs(struct yaffs_obj *obj, struct iattr *attr);
27
28#endif
diff --git a/fs/yaffs2/yaffs_bitmap.c b/fs/yaffs2/yaffs_bitmap.c
new file mode 100644
index 00000000000..7df42cd0066
--- /dev/null
+++ b/fs/yaffs2/yaffs_bitmap.c
@@ -0,0 +1,98 @@
1/*
2 * YAFFS: Yet Another Flash File System. A NAND-flash specific file system.
3 *
4 * Copyright (C) 2002-2010 Aleph One Ltd.
5 * for Toby Churchill Ltd and Brightstar Engineering
6 *
7 * Created by Charles Manning <charles@aleph1.co.uk>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 */
13
14#include "yaffs_bitmap.h"
15#include "yaffs_trace.h"
16/*
17 * Chunk bitmap manipulations
18 */
19
20static inline u8 *yaffs_block_bits(struct yaffs_dev *dev, int blk)
21{
22 if (blk < dev->internal_start_block || blk > dev->internal_end_block) {
23 yaffs_trace(YAFFS_TRACE_ERROR,
24 "BlockBits block %d is not valid",
25 blk);
26 YBUG();
27 }
28 return dev->chunk_bits +
29 (dev->chunk_bit_stride * (blk - dev->internal_start_block));
30}
31
32void yaffs_verify_chunk_bit_id(struct yaffs_dev *dev, int blk, int chunk)
33{
34 if (blk < dev->internal_start_block || blk > dev->internal_end_block ||
35 chunk < 0 || chunk >= dev->param.chunks_per_block) {
36 yaffs_trace(YAFFS_TRACE_ERROR,
37 "Chunk Id (%d:%d) invalid",
38 blk, chunk);
39 YBUG();
40 }
41}
42
43void yaffs_clear_chunk_bits(struct yaffs_dev *dev, int blk)
44{
45 u8 *blk_bits = yaffs_block_bits(dev, blk);
46
47 memset(blk_bits, 0, dev->chunk_bit_stride);
48}
49
50void yaffs_clear_chunk_bit(struct yaffs_dev *dev, int blk, int chunk)
51{
52 u8 *blk_bits = yaffs_block_bits(dev, blk);
53
54 yaffs_verify_chunk_bit_id(dev, blk, chunk);
55
56 blk_bits[chunk / 8] &= ~(1 << (chunk & 7));
57}
58
59void yaffs_set_chunk_bit(struct yaffs_dev *dev, int blk, int chunk)
60{
61 u8 *blk_bits = yaffs_block_bits(dev, blk);
62
63 yaffs_verify_chunk_bit_id(dev, blk, chunk);
64
65 blk_bits[chunk / 8] |= (1 << (chunk & 7));
66}
67
68int yaffs_check_chunk_bit(struct yaffs_dev *dev, int blk, int chunk)
69{
70 u8 *blk_bits = yaffs_block_bits(dev, blk);
71 yaffs_verify_chunk_bit_id(dev, blk, chunk);
72
73 return (blk_bits[chunk / 8] & (1 << (chunk & 7))) ? 1 : 0;
74}
75
76int yaffs_still_some_chunks(struct yaffs_dev *dev, int blk)
77{
78 u8 *blk_bits = yaffs_block_bits(dev, blk);
79 int i;
80 for (i = 0; i < dev->chunk_bit_stride; i++) {
81 if (*blk_bits)
82 return 1;
83 blk_bits++;
84 }
85 return 0;
86}
87
88int yaffs_count_chunk_bits(struct yaffs_dev *dev, int blk)
89{
90 u8 *blk_bits = yaffs_block_bits(dev, blk);
91 int i;
92 int n = 0;
93
94 for (i = 0; i < dev->chunk_bit_stride; i++, blk_bits++)
95 n += hweight8(*blk_bits);
96
97 return n;
98}
diff --git a/fs/yaffs2/yaffs_bitmap.h b/fs/yaffs2/yaffs_bitmap.h
new file mode 100644
index 00000000000..cf9ea58da0d
--- /dev/null
+++ b/fs/yaffs2/yaffs_bitmap.h
@@ -0,0 +1,33 @@
1/*
2 * YAFFS: Yet another Flash File System . A NAND-flash specific file system.
3 *
4 * Copyright (C) 2002-2010 Aleph One Ltd.
5 * for Toby Churchill Ltd and Brightstar Engineering
6 *
7 * Created by Charles Manning <charles@aleph1.co.uk>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU Lesser General Public License version 2.1 as
11 * published by the Free Software Foundation.
12 *
13 * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL.
14 */
15
16/*
17 * Chunk bitmap manipulations
18 */
19
20#ifndef __YAFFS_BITMAP_H__
21#define __YAFFS_BITMAP_H__
22
23#include "yaffs_guts.h"
24
25void yaffs_verify_chunk_bit_id(struct yaffs_dev *dev, int blk, int chunk);
26void yaffs_clear_chunk_bits(struct yaffs_dev *dev, int blk);
27void yaffs_clear_chunk_bit(struct yaffs_dev *dev, int blk, int chunk);
28void yaffs_set_chunk_bit(struct yaffs_dev *dev, int blk, int chunk);
29int yaffs_check_chunk_bit(struct yaffs_dev *dev, int blk, int chunk);
30int yaffs_still_some_chunks(struct yaffs_dev *dev, int blk);
31int yaffs_count_chunk_bits(struct yaffs_dev *dev, int blk);
32
33#endif
diff --git a/fs/yaffs2/yaffs_checkptrw.c b/fs/yaffs2/yaffs_checkptrw.c
new file mode 100644
index 00000000000..4e40f437e65
--- /dev/null
+++ b/fs/yaffs2/yaffs_checkptrw.c
@@ -0,0 +1,415 @@
1/*
2 * YAFFS: Yet Another Flash File System. A NAND-flash specific file system.
3 *
4 * Copyright (C) 2002-2010 Aleph One Ltd.
5 * for Toby Churchill Ltd and Brightstar Engineering
6 *
7 * Created by Charles Manning <charles@aleph1.co.uk>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 */
13
14#include "yaffs_checkptrw.h"
15#include "yaffs_getblockinfo.h"
16
17static int yaffs2_checkpt_space_ok(struct yaffs_dev *dev)
18{
19 int blocks_avail = dev->n_erased_blocks - dev->param.n_reserved_blocks;
20
21 yaffs_trace(YAFFS_TRACE_CHECKPOINT,
22 "checkpt blocks_avail = %d", blocks_avail);
23
24 return (blocks_avail <= 0) ? 0 : 1;
25}
26
27static int yaffs_checkpt_erase(struct yaffs_dev *dev)
28{
29 int i;
30
31 if (!dev->param.erase_fn)
32 return 0;
33 yaffs_trace(YAFFS_TRACE_CHECKPOINT,
34 "checking blocks %d to %d",
35 dev->internal_start_block, dev->internal_end_block);
36
37 for (i = dev->internal_start_block; i <= dev->internal_end_block; i++) {
38 struct yaffs_block_info *bi = yaffs_get_block_info(dev, i);
39 if (bi->block_state == YAFFS_BLOCK_STATE_CHECKPOINT) {
40 yaffs_trace(YAFFS_TRACE_CHECKPOINT,
41 "erasing checkpt block %d", i);
42
43 dev->n_erasures++;
44
45 if (dev->param.
46 erase_fn(dev,
47 i - dev->block_offset /* realign */ )) {
48 bi->block_state = YAFFS_BLOCK_STATE_EMPTY;
49 dev->n_erased_blocks++;
50 dev->n_free_chunks +=
51 dev->param.chunks_per_block;
52 } else {
53 dev->param.bad_block_fn(dev, i);
54 bi->block_state = YAFFS_BLOCK_STATE_DEAD;
55 }
56 }
57 }
58
59 dev->blocks_in_checkpt = 0;
60
61 return 1;
62}
63
64static void yaffs2_checkpt_find_erased_block(struct yaffs_dev *dev)
65{
66 int i;
67 int blocks_avail = dev->n_erased_blocks - dev->param.n_reserved_blocks;
68 yaffs_trace(YAFFS_TRACE_CHECKPOINT,
69 "allocating checkpt block: erased %d reserved %d avail %d next %d ",
70 dev->n_erased_blocks, dev->param.n_reserved_blocks,
71 blocks_avail, dev->checkpt_next_block);
72
73 if (dev->checkpt_next_block >= 0 &&
74 dev->checkpt_next_block <= dev->internal_end_block &&
75 blocks_avail > 0) {
76
77 for (i = dev->checkpt_next_block; i <= dev->internal_end_block;
78 i++) {
79 struct yaffs_block_info *bi =
80 yaffs_get_block_info(dev, i);
81 if (bi->block_state == YAFFS_BLOCK_STATE_EMPTY) {
82 dev->checkpt_next_block = i + 1;
83 dev->checkpt_cur_block = i;
84 yaffs_trace(YAFFS_TRACE_CHECKPOINT,
85 "allocating checkpt block %d", i);
86 return;
87 }
88 }
89 }
90 yaffs_trace(YAFFS_TRACE_CHECKPOINT, "out of checkpt blocks");
91
92 dev->checkpt_next_block = -1;
93 dev->checkpt_cur_block = -1;
94}
95
96static void yaffs2_checkpt_find_block(struct yaffs_dev *dev)
97{
98 int i;
99 struct yaffs_ext_tags tags;
100
101 yaffs_trace(YAFFS_TRACE_CHECKPOINT,
102 "find next checkpt block: start: blocks %d next %d",
103 dev->blocks_in_checkpt, dev->checkpt_next_block);
104
105 if (dev->blocks_in_checkpt < dev->checkpt_max_blocks)
106 for (i = dev->checkpt_next_block; i <= dev->internal_end_block;
107 i++) {
108 int chunk = i * dev->param.chunks_per_block;
109 int realigned_chunk = chunk - dev->chunk_offset;
110
111 dev->param.read_chunk_tags_fn(dev, realigned_chunk,
112 NULL, &tags);
113 yaffs_trace(YAFFS_TRACE_CHECKPOINT,
114 "find next checkpt block: search: block %d oid %d seq %d eccr %d",
115 i, tags.obj_id, tags.seq_number,
116 tags.ecc_result);
117
118 if (tags.seq_number == YAFFS_SEQUENCE_CHECKPOINT_DATA) {
119 /* Right kind of block */
120 dev->checkpt_next_block = tags.obj_id;
121 dev->checkpt_cur_block = i;
122 dev->checkpt_block_list[dev->
123 blocks_in_checkpt] = i;
124 dev->blocks_in_checkpt++;
125 yaffs_trace(YAFFS_TRACE_CHECKPOINT,
126 "found checkpt block %d", i);
127 return;
128 }
129 }
130
131 yaffs_trace(YAFFS_TRACE_CHECKPOINT, "found no more checkpt blocks");
132
133 dev->checkpt_next_block = -1;
134 dev->checkpt_cur_block = -1;
135}
136
137int yaffs2_checkpt_open(struct yaffs_dev *dev, int writing)
138{
139
140 dev->checkpt_open_write = writing;
141
142 /* Got the functions we need? */
143 if (!dev->param.write_chunk_tags_fn ||
144 !dev->param.read_chunk_tags_fn ||
145 !dev->param.erase_fn || !dev->param.bad_block_fn)
146 return 0;
147
148 if (writing && !yaffs2_checkpt_space_ok(dev))
149 return 0;
150
151 if (!dev->checkpt_buffer)
152 dev->checkpt_buffer =
153 kmalloc(dev->param.total_bytes_per_chunk, GFP_NOFS);
154 if (!dev->checkpt_buffer)
155 return 0;
156
157 dev->checkpt_page_seq = 0;
158 dev->checkpt_byte_count = 0;
159 dev->checkpt_sum = 0;
160 dev->checkpt_xor = 0;
161 dev->checkpt_cur_block = -1;
162 dev->checkpt_cur_chunk = -1;
163 dev->checkpt_next_block = dev->internal_start_block;
164
165 /* Erase all the blocks in the checkpoint area */
166 if (writing) {
167 memset(dev->checkpt_buffer, 0, dev->data_bytes_per_chunk);
168 dev->checkpt_byte_offs = 0;
169 return yaffs_checkpt_erase(dev);
170 } else {
171 int i;
172 /* Set to a value that will kick off a read */
173 dev->checkpt_byte_offs = dev->data_bytes_per_chunk;
174 /* A checkpoint block list of 1 checkpoint block per 16 block is (hopefully)
175 * going to be way more than we need */
176 dev->blocks_in_checkpt = 0;
177 dev->checkpt_max_blocks =
178 (dev->internal_end_block - dev->internal_start_block) / 16 +
179 2;
180 dev->checkpt_block_list =
181 kmalloc(sizeof(int) * dev->checkpt_max_blocks, GFP_NOFS);
182 if (!dev->checkpt_block_list)
183 return 0;
184
185 for (i = 0; i < dev->checkpt_max_blocks; i++)
186 dev->checkpt_block_list[i] = -1;
187 }
188
189 return 1;
190}
191
192int yaffs2_get_checkpt_sum(struct yaffs_dev *dev, u32 * sum)
193{
194 u32 composite_sum;
195 composite_sum = (dev->checkpt_sum << 8) | (dev->checkpt_xor & 0xFF);
196 *sum = composite_sum;
197 return 1;
198}
199
200static int yaffs2_checkpt_flush_buffer(struct yaffs_dev *dev)
201{
202 int chunk;
203 int realigned_chunk;
204
205 struct yaffs_ext_tags tags;
206
207 if (dev->checkpt_cur_block < 0) {
208 yaffs2_checkpt_find_erased_block(dev);
209 dev->checkpt_cur_chunk = 0;
210 }
211
212 if (dev->checkpt_cur_block < 0)
213 return 0;
214
215 tags.is_deleted = 0;
216 tags.obj_id = dev->checkpt_next_block; /* Hint to next place to look */
217 tags.chunk_id = dev->checkpt_page_seq + 1;
218 tags.seq_number = YAFFS_SEQUENCE_CHECKPOINT_DATA;
219 tags.n_bytes = dev->data_bytes_per_chunk;
220 if (dev->checkpt_cur_chunk == 0) {
221 /* First chunk we write for the block? Set block state to
222 checkpoint */
223 struct yaffs_block_info *bi =
224 yaffs_get_block_info(dev, dev->checkpt_cur_block);
225 bi->block_state = YAFFS_BLOCK_STATE_CHECKPOINT;
226 dev->blocks_in_checkpt++;
227 }
228
229 chunk =
230 dev->checkpt_cur_block * dev->param.chunks_per_block +
231 dev->checkpt_cur_chunk;
232
233 yaffs_trace(YAFFS_TRACE_CHECKPOINT,
234 "checkpoint wite buffer nand %d(%d:%d) objid %d chId %d",
235 chunk, dev->checkpt_cur_block, dev->checkpt_cur_chunk,
236 tags.obj_id, tags.chunk_id);
237
238 realigned_chunk = chunk - dev->chunk_offset;
239
240 dev->n_page_writes++;
241
242 dev->param.write_chunk_tags_fn(dev, realigned_chunk,
243 dev->checkpt_buffer, &tags);
244 dev->checkpt_byte_offs = 0;
245 dev->checkpt_page_seq++;
246 dev->checkpt_cur_chunk++;
247 if (dev->checkpt_cur_chunk >= dev->param.chunks_per_block) {
248 dev->checkpt_cur_chunk = 0;
249 dev->checkpt_cur_block = -1;
250 }
251 memset(dev->checkpt_buffer, 0, dev->data_bytes_per_chunk);
252
253 return 1;
254}
255
256int yaffs2_checkpt_wr(struct yaffs_dev *dev, const void *data, int n_bytes)
257{
258 int i = 0;
259 int ok = 1;
260
261 u8 *data_bytes = (u8 *) data;
262
263 if (!dev->checkpt_buffer)
264 return 0;
265
266 if (!dev->checkpt_open_write)
267 return -1;
268
269 while (i < n_bytes && ok) {
270 dev->checkpt_buffer[dev->checkpt_byte_offs] = *data_bytes;
271 dev->checkpt_sum += *data_bytes;
272 dev->checkpt_xor ^= *data_bytes;
273
274 dev->checkpt_byte_offs++;
275 i++;
276 data_bytes++;
277 dev->checkpt_byte_count++;
278
279 if (dev->checkpt_byte_offs < 0 ||
280 dev->checkpt_byte_offs >= dev->data_bytes_per_chunk)
281 ok = yaffs2_checkpt_flush_buffer(dev);
282 }
283
284 return i;
285}
286
287int yaffs2_checkpt_rd(struct yaffs_dev *dev, void *data, int n_bytes)
288{
289 int i = 0;
290 int ok = 1;
291 struct yaffs_ext_tags tags;
292
293 int chunk;
294 int realigned_chunk;
295
296 u8 *data_bytes = (u8 *) data;
297
298 if (!dev->checkpt_buffer)
299 return 0;
300
301 if (dev->checkpt_open_write)
302 return -1;
303
304 while (i < n_bytes && ok) {
305
306 if (dev->checkpt_byte_offs < 0 ||
307 dev->checkpt_byte_offs >= dev->data_bytes_per_chunk) {
308
309 if (dev->checkpt_cur_block < 0) {
310 yaffs2_checkpt_find_block(dev);
311 dev->checkpt_cur_chunk = 0;
312 }
313
314 if (dev->checkpt_cur_block < 0)
315 ok = 0;
316 else {
317 chunk = dev->checkpt_cur_block *
318 dev->param.chunks_per_block +
319 dev->checkpt_cur_chunk;
320
321 realigned_chunk = chunk - dev->chunk_offset;
322
323 dev->n_page_reads++;
324
325 /* read in the next chunk */
326 dev->param.read_chunk_tags_fn(dev,
327 realigned_chunk,
328 dev->
329 checkpt_buffer,
330 &tags);
331
332 if (tags.chunk_id != (dev->checkpt_page_seq + 1)
333 || tags.ecc_result > YAFFS_ECC_RESULT_FIXED
334 || tags.seq_number !=
335 YAFFS_SEQUENCE_CHECKPOINT_DATA)
336 ok = 0;
337
338 dev->checkpt_byte_offs = 0;
339 dev->checkpt_page_seq++;
340 dev->checkpt_cur_chunk++;
341
342 if (dev->checkpt_cur_chunk >=
343 dev->param.chunks_per_block)
344 dev->checkpt_cur_block = -1;
345 }
346 }
347
348 if (ok) {
349 *data_bytes =
350 dev->checkpt_buffer[dev->checkpt_byte_offs];
351 dev->checkpt_sum += *data_bytes;
352 dev->checkpt_xor ^= *data_bytes;
353 dev->checkpt_byte_offs++;
354 i++;
355 data_bytes++;
356 dev->checkpt_byte_count++;
357 }
358 }
359
360 return i;
361}
362
363int yaffs_checkpt_close(struct yaffs_dev *dev)
364{
365
366 if (dev->checkpt_open_write) {
367 if (dev->checkpt_byte_offs != 0)
368 yaffs2_checkpt_flush_buffer(dev);
369 } else if (dev->checkpt_block_list) {
370 int i;
371 for (i = 0;
372 i < dev->blocks_in_checkpt
373 && dev->checkpt_block_list[i] >= 0; i++) {
374 int blk = dev->checkpt_block_list[i];
375 struct yaffs_block_info *bi = NULL;
376 if (dev->internal_start_block <= blk
377 && blk <= dev->internal_end_block)
378 bi = yaffs_get_block_info(dev, blk);
379 if (bi && bi->block_state == YAFFS_BLOCK_STATE_EMPTY)
380 bi->block_state = YAFFS_BLOCK_STATE_CHECKPOINT;
381 else {
382 /* Todo this looks odd... */
383 }
384 }
385 kfree(dev->checkpt_block_list);
386 dev->checkpt_block_list = NULL;
387 }
388
389 dev->n_free_chunks -=
390 dev->blocks_in_checkpt * dev->param.chunks_per_block;
391 dev->n_erased_blocks -= dev->blocks_in_checkpt;
392
393 yaffs_trace(YAFFS_TRACE_CHECKPOINT,"checkpoint byte count %d",
394 dev->checkpt_byte_count);
395
396 if (dev->checkpt_buffer) {
397 /* free the buffer */
398 kfree(dev->checkpt_buffer);
399 dev->checkpt_buffer = NULL;
400 return 1;
401 } else {
402 return 0;
403 }
404}
405
406int yaffs2_checkpt_invalidate_stream(struct yaffs_dev *dev)
407{
408 /* Erase the checkpoint data */
409
410 yaffs_trace(YAFFS_TRACE_CHECKPOINT,
411 "checkpoint invalidate of %d blocks",
412 dev->blocks_in_checkpt);
413
414 return yaffs_checkpt_erase(dev);
415}
diff --git a/fs/yaffs2/yaffs_checkptrw.h b/fs/yaffs2/yaffs_checkptrw.h
new file mode 100644
index 00000000000..361c6067717
--- /dev/null
+++ b/fs/yaffs2/yaffs_checkptrw.h
@@ -0,0 +1,33 @@
1/*
2 * YAFFS: Yet another Flash File System . A NAND-flash specific file system.
3 *
4 * Copyright (C) 2002-2010 Aleph One Ltd.
5 * for Toby Churchill Ltd and Brightstar Engineering
6 *
7 * Created by Charles Manning <charles@aleph1.co.uk>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU Lesser General Public License version 2.1 as
11 * published by the Free Software Foundation.
12 *
13 * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL.
14 */
15
16#ifndef __YAFFS_CHECKPTRW_H__
17#define __YAFFS_CHECKPTRW_H__
18
19#include "yaffs_guts.h"
20
21int yaffs2_checkpt_open(struct yaffs_dev *dev, int writing);
22
23int yaffs2_checkpt_wr(struct yaffs_dev *dev, const void *data, int n_bytes);
24
25int yaffs2_checkpt_rd(struct yaffs_dev *dev, void *data, int n_bytes);
26
27int yaffs2_get_checkpt_sum(struct yaffs_dev *dev, u32 * sum);
28
29int yaffs_checkpt_close(struct yaffs_dev *dev);
30
31int yaffs2_checkpt_invalidate_stream(struct yaffs_dev *dev);
32
33#endif
diff --git a/fs/yaffs2/yaffs_ecc.c b/fs/yaffs2/yaffs_ecc.c
new file mode 100644
index 00000000000..e95a8069a8c
--- /dev/null
+++ b/fs/yaffs2/yaffs_ecc.c
@@ -0,0 +1,298 @@
1/*
2 * YAFFS: Yet Another Flash File System. A NAND-flash specific file system.
3 *
4 * Copyright (C) 2002-2010 Aleph One Ltd.
5 * for Toby Churchill Ltd and Brightstar Engineering
6 *
7 * Created by Charles Manning <charles@aleph1.co.uk>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 */
13
14/*
15 * This code implements the ECC algorithm used in SmartMedia.
16 *
17 * The ECC comprises 22 bits of parity information and is stuffed into 3 bytes.
18 * The two unused bit are set to 1.
19 * The ECC can correct single bit errors in a 256-byte page of data. Thus, two such ECC
20 * blocks are used on a 512-byte NAND page.
21 *
22 */
23
24/* Table generated by gen-ecc.c
25 * Using a table means we do not have to calculate p1..p4 and p1'..p4'
26 * for each byte of data. These are instead provided in a table in bits7..2.
27 * Bit 0 of each entry indicates whether the entry has an odd or even parity, and therefore
28 * this bytes influence on the line parity.
29 */
30
31#include "yportenv.h"
32
33#include "yaffs_ecc.h"
34
35static const unsigned char column_parity_table[] = {
36 0x00, 0x55, 0x59, 0x0c, 0x65, 0x30, 0x3c, 0x69,
37 0x69, 0x3c, 0x30, 0x65, 0x0c, 0x59, 0x55, 0x00,
38 0x95, 0xc0, 0xcc, 0x99, 0xf0, 0xa5, 0xa9, 0xfc,
39 0xfc, 0xa9, 0xa5, 0xf0, 0x99, 0xcc, 0xc0, 0x95,
40 0x99, 0xcc, 0xc0, 0x95, 0xfc, 0xa9, 0xa5, 0xf0,
41 0xf0, 0xa5, 0xa9, 0xfc, 0x95, 0xc0, 0xcc, 0x99,
42 0x0c, 0x59, 0x55, 0x00, 0x69, 0x3c, 0x30, 0x65,
43 0x65, 0x30, 0x3c, 0x69, 0x00, 0x55, 0x59, 0x0c,
44 0xa5, 0xf0, 0xfc, 0xa9, 0xc0, 0x95, 0x99, 0xcc,
45 0xcc, 0x99, 0x95, 0xc0, 0xa9, 0xfc, 0xf0, 0xa5,
46 0x30, 0x65, 0x69, 0x3c, 0x55, 0x00, 0x0c, 0x59,
47 0x59, 0x0c, 0x00, 0x55, 0x3c, 0x69, 0x65, 0x30,
48 0x3c, 0x69, 0x65, 0x30, 0x59, 0x0c, 0x00, 0x55,
49 0x55, 0x00, 0x0c, 0x59, 0x30, 0x65, 0x69, 0x3c,
50 0xa9, 0xfc, 0xf0, 0xa5, 0xcc, 0x99, 0x95, 0xc0,
51 0xc0, 0x95, 0x99, 0xcc, 0xa5, 0xf0, 0xfc, 0xa9,
52 0xa9, 0xfc, 0xf0, 0xa5, 0xcc, 0x99, 0x95, 0xc0,
53 0xc0, 0x95, 0x99, 0xcc, 0xa5, 0xf0, 0xfc, 0xa9,
54 0x3c, 0x69, 0x65, 0x30, 0x59, 0x0c, 0x00, 0x55,
55 0x55, 0x00, 0x0c, 0x59, 0x30, 0x65, 0x69, 0x3c,
56 0x30, 0x65, 0x69, 0x3c, 0x55, 0x00, 0x0c, 0x59,
57 0x59, 0x0c, 0x00, 0x55, 0x3c, 0x69, 0x65, 0x30,
58 0xa5, 0xf0, 0xfc, 0xa9, 0xc0, 0x95, 0x99, 0xcc,
59 0xcc, 0x99, 0x95, 0xc0, 0xa9, 0xfc, 0xf0, 0xa5,
60 0x0c, 0x59, 0x55, 0x00, 0x69, 0x3c, 0x30, 0x65,
61 0x65, 0x30, 0x3c, 0x69, 0x00, 0x55, 0x59, 0x0c,
62 0x99, 0xcc, 0xc0, 0x95, 0xfc, 0xa9, 0xa5, 0xf0,
63 0xf0, 0xa5, 0xa9, 0xfc, 0x95, 0xc0, 0xcc, 0x99,
64 0x95, 0xc0, 0xcc, 0x99, 0xf0, 0xa5, 0xa9, 0xfc,
65 0xfc, 0xa9, 0xa5, 0xf0, 0x99, 0xcc, 0xc0, 0x95,
66 0x00, 0x55, 0x59, 0x0c, 0x65, 0x30, 0x3c, 0x69,
67 0x69, 0x3c, 0x30, 0x65, 0x0c, 0x59, 0x55, 0x00,
68};
69
70
71/* Calculate the ECC for a 256-byte block of data */
72void yaffs_ecc_cacl(const unsigned char *data, unsigned char *ecc)
73{
74 unsigned int i;
75
76 unsigned char col_parity = 0;
77 unsigned char line_parity = 0;
78 unsigned char line_parity_prime = 0;
79 unsigned char t;
80 unsigned char b;
81
82 for (i = 0; i < 256; i++) {
83 b = column_parity_table[*data++];
84 col_parity ^= b;
85
86 if (b & 0x01) { /* odd number of bits in the byte */
87 line_parity ^= i;
88 line_parity_prime ^= ~i;
89 }
90 }
91
92 ecc[2] = (~col_parity) | 0x03;
93
94 t = 0;
95 if (line_parity & 0x80)
96 t |= 0x80;
97 if (line_parity_prime & 0x80)
98 t |= 0x40;
99 if (line_parity & 0x40)
100 t |= 0x20;
101 if (line_parity_prime & 0x40)
102 t |= 0x10;
103 if (line_parity & 0x20)
104 t |= 0x08;
105 if (line_parity_prime & 0x20)
106 t |= 0x04;
107 if (line_parity & 0x10)
108 t |= 0x02;
109 if (line_parity_prime & 0x10)
110 t |= 0x01;
111 ecc[1] = ~t;
112
113 t = 0;
114 if (line_parity & 0x08)
115 t |= 0x80;
116 if (line_parity_prime & 0x08)
117 t |= 0x40;
118 if (line_parity & 0x04)
119 t |= 0x20;
120 if (line_parity_prime & 0x04)
121 t |= 0x10;
122 if (line_parity & 0x02)
123 t |= 0x08;
124 if (line_parity_prime & 0x02)
125 t |= 0x04;
126 if (line_parity & 0x01)
127 t |= 0x02;
128 if (line_parity_prime & 0x01)
129 t |= 0x01;
130 ecc[0] = ~t;
131
132#ifdef CONFIG_YAFFS_ECC_WRONG_ORDER
133 /* Swap the bytes into the wrong order */
134 t = ecc[0];
135 ecc[0] = ecc[1];
136 ecc[1] = t;
137#endif
138}
139
140/* Correct the ECC on a 256 byte block of data */
141
142int yaffs_ecc_correct(unsigned char *data, unsigned char *read_ecc,
143 const unsigned char *test_ecc)
144{
145 unsigned char d0, d1, d2; /* deltas */
146
147 d0 = read_ecc[0] ^ test_ecc[0];
148 d1 = read_ecc[1] ^ test_ecc[1];
149 d2 = read_ecc[2] ^ test_ecc[2];
150
151 if ((d0 | d1 | d2) == 0)
152 return 0; /* no error */
153
154 if (((d0 ^ (d0 >> 1)) & 0x55) == 0x55 &&
155 ((d1 ^ (d1 >> 1)) & 0x55) == 0x55 &&
156 ((d2 ^ (d2 >> 1)) & 0x54) == 0x54) {
157 /* Single bit (recoverable) error in data */
158
159 unsigned byte;
160 unsigned bit;
161
162#ifdef CONFIG_YAFFS_ECC_WRONG_ORDER
163 /* swap the bytes to correct for the wrong order */
164 unsigned char t;
165
166 t = d0;
167 d0 = d1;
168 d1 = t;
169#endif
170
171 bit = byte = 0;
172
173 if (d1 & 0x80)
174 byte |= 0x80;
175 if (d1 & 0x20)
176 byte |= 0x40;
177 if (d1 & 0x08)
178 byte |= 0x20;
179 if (d1 & 0x02)
180 byte |= 0x10;
181 if (d0 & 0x80)
182 byte |= 0x08;
183 if (d0 & 0x20)
184 byte |= 0x04;
185 if (d0 & 0x08)
186 byte |= 0x02;
187 if (d0 & 0x02)
188 byte |= 0x01;
189
190 if (d2 & 0x80)
191 bit |= 0x04;
192 if (d2 & 0x20)
193 bit |= 0x02;
194 if (d2 & 0x08)
195 bit |= 0x01;
196
197 data[byte] ^= (1 << bit);
198
199 return 1; /* Corrected the error */
200 }
201
202 if ((hweight8(d0) + hweight8(d1) + hweight8(d2)) == 1) {
203 /* Reccoverable error in ecc */
204
205 read_ecc[0] = test_ecc[0];
206 read_ecc[1] = test_ecc[1];
207 read_ecc[2] = test_ecc[2];
208
209 return 1; /* Corrected the error */
210 }
211
212 /* Unrecoverable error */
213
214 return -1;
215
216}
217
218/*
219 * ECCxxxOther does ECC calcs on arbitrary n bytes of data
220 */
221void yaffs_ecc_calc_other(const unsigned char *data, unsigned n_bytes,
222 struct yaffs_ecc_other *ecc_other)
223{
224 unsigned int i;
225
226 unsigned char col_parity = 0;
227 unsigned line_parity = 0;
228 unsigned line_parity_prime = 0;
229 unsigned char b;
230
231 for (i = 0; i < n_bytes; i++) {
232 b = column_parity_table[*data++];
233 col_parity ^= b;
234
235 if (b & 0x01) {
236 /* odd number of bits in the byte */
237 line_parity ^= i;
238 line_parity_prime ^= ~i;
239 }
240
241 }
242
243 ecc_other->col_parity = (col_parity >> 2) & 0x3f;
244 ecc_other->line_parity = line_parity;
245 ecc_other->line_parity_prime = line_parity_prime;
246}
247
248int yaffs_ecc_correct_other(unsigned char *data, unsigned n_bytes,
249 struct yaffs_ecc_other *read_ecc,
250 const struct yaffs_ecc_other *test_ecc)
251{
252 unsigned char delta_col; /* column parity delta */
253 unsigned delta_line; /* line parity delta */
254 unsigned delta_line_prime; /* line parity delta */
255 unsigned bit;
256
257 delta_col = read_ecc->col_parity ^ test_ecc->col_parity;
258 delta_line = read_ecc->line_parity ^ test_ecc->line_parity;
259 delta_line_prime =
260 read_ecc->line_parity_prime ^ test_ecc->line_parity_prime;
261
262 if ((delta_col | delta_line | delta_line_prime) == 0)
263 return 0; /* no error */
264
265 if (delta_line == ~delta_line_prime &&
266 (((delta_col ^ (delta_col >> 1)) & 0x15) == 0x15)) {
267 /* Single bit (recoverable) error in data */
268
269 bit = 0;
270
271 if (delta_col & 0x20)
272 bit |= 0x04;
273 if (delta_col & 0x08)
274 bit |= 0x02;
275 if (delta_col & 0x02)
276 bit |= 0x01;
277
278 if (delta_line >= n_bytes)
279 return -1;
280
281 data[delta_line] ^= (1 << bit);
282
283 return 1; /* corrected */
284 }
285
286 if ((hweight32(delta_line) +
287 hweight32(delta_line_prime) +
288 hweight8(delta_col)) == 1) {
289 /* Reccoverable error in ecc */
290
291 *read_ecc = *test_ecc;
292 return 1; /* corrected */
293 }
294
295 /* Unrecoverable error */
296
297 return -1;
298}
diff --git a/fs/yaffs2/yaffs_ecc.h b/fs/yaffs2/yaffs_ecc.h
new file mode 100644
index 00000000000..b0c461d699e
--- /dev/null
+++ b/fs/yaffs2/yaffs_ecc.h
@@ -0,0 +1,44 @@
1/*
2 * YAFFS: Yet another Flash File System . A NAND-flash specific file system.
3 *
4 * Copyright (C) 2002-2010 Aleph One Ltd.
5 * for Toby Churchill Ltd and Brightstar Engineering
6 *
7 * Created by Charles Manning <charles@aleph1.co.uk>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU Lesser General Public License version 2.1 as
11 * published by the Free Software Foundation.
12 *
13 * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL.
14 */
15
16/*
17 * This code implements the ECC algorithm used in SmartMedia.
18 *
19 * The ECC comprises 22 bits of parity information and is stuffed into 3 bytes.
20 * The two unused bit are set to 1.
21 * The ECC can correct single bit errors in a 256-byte page of data. Thus, two such ECC
22 * blocks are used on a 512-byte NAND page.
23 *
24 */
25
26#ifndef __YAFFS_ECC_H__
27#define __YAFFS_ECC_H__
28
29struct yaffs_ecc_other {
30 unsigned char col_parity;
31 unsigned line_parity;
32 unsigned line_parity_prime;
33};
34
35void yaffs_ecc_cacl(const unsigned char *data, unsigned char *ecc);
36int yaffs_ecc_correct(unsigned char *data, unsigned char *read_ecc,
37 const unsigned char *test_ecc);
38
39void yaffs_ecc_calc_other(const unsigned char *data, unsigned n_bytes,
40 struct yaffs_ecc_other *ecc);
41int yaffs_ecc_correct_other(unsigned char *data, unsigned n_bytes,
42 struct yaffs_ecc_other *read_ecc,
43 const struct yaffs_ecc_other *test_ecc);
44#endif
diff --git a/fs/yaffs2/yaffs_getblockinfo.h b/fs/yaffs2/yaffs_getblockinfo.h
new file mode 100644
index 00000000000..d87acbde997
--- /dev/null
+++ b/fs/yaffs2/yaffs_getblockinfo.h
@@ -0,0 +1,35 @@
1/*
2 * YAFFS: Yet another Flash File System . A NAND-flash specific file system.
3 *
4 * Copyright (C) 2002-2010 Aleph One Ltd.
5 * for Toby Churchill Ltd and Brightstar Engineering
6 *
7 * Created by Charles Manning <charles@aleph1.co.uk>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU Lesser General Public License version 2.1 as
11 * published by the Free Software Foundation.
12 *
13 * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL.
14 */
15
16#ifndef __YAFFS_GETBLOCKINFO_H__
17#define __YAFFS_GETBLOCKINFO_H__
18
19#include "yaffs_guts.h"
20#include "yaffs_trace.h"
21
22/* Function to manipulate block info */
23static inline struct yaffs_block_info *yaffs_get_block_info(struct yaffs_dev
24 *dev, int blk)
25{
26 if (blk < dev->internal_start_block || blk > dev->internal_end_block) {
27 yaffs_trace(YAFFS_TRACE_ERROR,
28 "**>> yaffs: get_block_info block %d is not valid",
29 blk);
30 YBUG();
31 }
32 return &dev->block_info[blk - dev->internal_start_block];
33}
34
35#endif
diff --git a/fs/yaffs2/yaffs_guts.c b/fs/yaffs2/yaffs_guts.c
new file mode 100644
index 00000000000..f4ae9deed72
--- /dev/null
+++ b/fs/yaffs2/yaffs_guts.c
@@ -0,0 +1,5164 @@
1/*
2 * YAFFS: Yet Another Flash File System. A NAND-flash specific file system.
3 *
4 * Copyright (C) 2002-2010 Aleph One Ltd.
5 * for Toby Churchill Ltd and Brightstar Engineering
6 *
7 * Created by Charles Manning <charles@aleph1.co.uk>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 */
13
14#include "yportenv.h"
15#include "yaffs_trace.h"
16
17#include "yaffs_guts.h"
18#include "yaffs_tagsvalidity.h"
19#include "yaffs_getblockinfo.h"
20
21#include "yaffs_tagscompat.h"
22
23#include "yaffs_nand.h"
24
25#include "yaffs_yaffs1.h"
26#include "yaffs_yaffs2.h"
27#include "yaffs_bitmap.h"
28#include "yaffs_verify.h"
29
30#include "yaffs_nand.h"
31#include "yaffs_packedtags2.h"
32
33#include "yaffs_nameval.h"
34#include "yaffs_allocator.h"
35
36#include "yaffs_attribs.h"
37
38/* Note YAFFS_GC_GOOD_ENOUGH must be <= YAFFS_GC_PASSIVE_THRESHOLD */
39#define YAFFS_GC_GOOD_ENOUGH 2
40#define YAFFS_GC_PASSIVE_THRESHOLD 4
41
42#include "yaffs_ecc.h"
43
44/* Forward declarations */
45
46static int yaffs_wr_data_obj(struct yaffs_obj *in, int inode_chunk,
47 const u8 * buffer, int n_bytes, int use_reserve);
48
49
50
51/* Function to calculate chunk and offset */
52
53static void yaffs_addr_to_chunk(struct yaffs_dev *dev, loff_t addr,
54 int *chunk_out, u32 * offset_out)
55{
56 int chunk;
57 u32 offset;
58
59 chunk = (u32) (addr >> dev->chunk_shift);
60
61 if (dev->chunk_div == 1) {
62 /* easy power of 2 case */
63 offset = (u32) (addr & dev->chunk_mask);
64 } else {
65 /* Non power-of-2 case */
66
67 loff_t chunk_base;
68
69 chunk /= dev->chunk_div;
70
71 chunk_base = ((loff_t) chunk) * dev->data_bytes_per_chunk;
72 offset = (u32) (addr - chunk_base);
73 }
74
75 *chunk_out = chunk;
76 *offset_out = offset;
77}
78
79/* Function to return the number of shifts for a power of 2 greater than or
80 * equal to the given number
81 * Note we don't try to cater for all possible numbers and this does not have to
82 * be hellishly efficient.
83 */
84
85static u32 calc_shifts_ceiling(u32 x)
86{
87 int extra_bits;
88 int shifts;
89
90 shifts = extra_bits = 0;
91
92 while (x > 1) {
93 if (x & 1)
94 extra_bits++;
95 x >>= 1;
96 shifts++;
97 }
98
99 if (extra_bits)
100 shifts++;
101
102 return shifts;
103}
104
105/* Function to return the number of shifts to get a 1 in bit 0
106 */
107
108static u32 calc_shifts(u32 x)
109{
110 u32 shifts;
111
112 shifts = 0;
113
114 if (!x)
115 return 0;
116
117 while (!(x & 1)) {
118 x >>= 1;
119 shifts++;
120 }
121
122 return shifts;
123}
124
125/*
126 * Temporary buffer manipulations.
127 */
128
129static int yaffs_init_tmp_buffers(struct yaffs_dev *dev)
130{
131 int i;
132 u8 *buf = (u8 *) 1;
133
134 memset(dev->temp_buffer, 0, sizeof(dev->temp_buffer));
135
136 for (i = 0; buf && i < YAFFS_N_TEMP_BUFFERS; i++) {
137 dev->temp_buffer[i].line = 0; /* not in use */
138 dev->temp_buffer[i].buffer = buf =
139 kmalloc(dev->param.total_bytes_per_chunk, GFP_NOFS);
140 }
141
142 return buf ? YAFFS_OK : YAFFS_FAIL;
143}
144
145u8 *yaffs_get_temp_buffer(struct yaffs_dev * dev, int line_no)
146{
147 int i, j;
148
149 dev->temp_in_use++;
150 if (dev->temp_in_use > dev->max_temp)
151 dev->max_temp = dev->temp_in_use;
152
153 for (i = 0; i < YAFFS_N_TEMP_BUFFERS; i++) {
154 if (dev->temp_buffer[i].line == 0) {
155 dev->temp_buffer[i].line = line_no;
156 if ((i + 1) > dev->max_temp) {
157 dev->max_temp = i + 1;
158 for (j = 0; j <= i; j++)
159 dev->temp_buffer[j].max_line =
160 dev->temp_buffer[j].line;
161 }
162
163 return dev->temp_buffer[i].buffer;
164 }
165 }
166
167 yaffs_trace(YAFFS_TRACE_BUFFERS,
168 "Out of temp buffers at line %d, other held by lines:",
169 line_no);
170 for (i = 0; i < YAFFS_N_TEMP_BUFFERS; i++)
171 yaffs_trace(YAFFS_TRACE_BUFFERS," %d", dev->temp_buffer[i].line);
172
173 /*
174 * If we got here then we have to allocate an unmanaged one
175 * This is not good.
176 */
177
178 dev->unmanaged_buffer_allocs++;
179 return kmalloc(dev->data_bytes_per_chunk, GFP_NOFS);
180
181}
182
183void yaffs_release_temp_buffer(struct yaffs_dev *dev, u8 * buffer, int line_no)
184{
185 int i;
186
187 dev->temp_in_use--;
188
189 for (i = 0; i < YAFFS_N_TEMP_BUFFERS; i++) {
190 if (dev->temp_buffer[i].buffer == buffer) {
191 dev->temp_buffer[i].line = 0;
192 return;
193 }
194 }
195
196 if (buffer) {
197 /* assume it is an unmanaged one. */
198 yaffs_trace(YAFFS_TRACE_BUFFERS,
199 "Releasing unmanaged temp buffer in line %d",
200 line_no);
201 kfree(buffer);
202 dev->unmanaged_buffer_deallocs++;
203 }
204
205}
206
207/*
208 * Determine if we have a managed buffer.
209 */
210int yaffs_is_managed_tmp_buffer(struct yaffs_dev *dev, const u8 * buffer)
211{
212 int i;
213
214 for (i = 0; i < YAFFS_N_TEMP_BUFFERS; i++) {
215 if (dev->temp_buffer[i].buffer == buffer)
216 return 1;
217 }
218
219 for (i = 0; i < dev->param.n_caches; i++) {
220 if (dev->cache[i].data == buffer)
221 return 1;
222 }
223
224 if (buffer == dev->checkpt_buffer)
225 return 1;
226
227 yaffs_trace(YAFFS_TRACE_ALWAYS,
228 "yaffs: unmaged buffer detected.");
229 return 0;
230}
231
232/*
233 * Functions for robustisizing TODO
234 *
235 */
236
237static void yaffs_handle_chunk_wr_ok(struct yaffs_dev *dev, int nand_chunk,
238 const u8 * data,
239 const struct yaffs_ext_tags *tags)
240{
241 dev = dev;
242 nand_chunk = nand_chunk;
243 data = data;
244 tags = tags;
245}
246
247static void yaffs_handle_chunk_update(struct yaffs_dev *dev, int nand_chunk,
248 const struct yaffs_ext_tags *tags)
249{
250 dev = dev;
251 nand_chunk = nand_chunk;
252 tags = tags;
253}
254
255void yaffs_handle_chunk_error(struct yaffs_dev *dev,
256 struct yaffs_block_info *bi)
257{
258 if (!bi->gc_prioritise) {
259 bi->gc_prioritise = 1;
260 dev->has_pending_prioritised_gc = 1;
261 bi->chunk_error_strikes++;
262
263 if (bi->chunk_error_strikes > 3) {
264 bi->needs_retiring = 1; /* Too many stikes, so retire this */
265 yaffs_trace(YAFFS_TRACE_ALWAYS, "yaffs: Block struck out");
266
267 }
268 }
269}
270
271static void yaffs_handle_chunk_wr_error(struct yaffs_dev *dev, int nand_chunk,
272 int erased_ok)
273{
274 int flash_block = nand_chunk / dev->param.chunks_per_block;
275 struct yaffs_block_info *bi = yaffs_get_block_info(dev, flash_block);
276
277 yaffs_handle_chunk_error(dev, bi);
278
279 if (erased_ok) {
280 /* Was an actual write failure, so mark the block for retirement */
281 bi->needs_retiring = 1;
282 yaffs_trace(YAFFS_TRACE_ERROR | YAFFS_TRACE_BAD_BLOCKS,
283 "**>> Block %d needs retiring", flash_block);
284 }
285
286 /* Delete the chunk */
287 yaffs_chunk_del(dev, nand_chunk, 1, __LINE__);
288 yaffs_skip_rest_of_block(dev);
289}
290
291/*
292 * Verification code
293 */
294
295/*
296 * Simple hash function. Needs to have a reasonable spread
297 */
298
299static inline int yaffs_hash_fn(int n)
300{
301 n = abs(n);
302 return n % YAFFS_NOBJECT_BUCKETS;
303}
304
305/*
306 * Access functions to useful fake objects.
307 * Note that root might have a presence in NAND if permissions are set.
308 */
309
310struct yaffs_obj *yaffs_root(struct yaffs_dev *dev)
311{
312 return dev->root_dir;
313}
314
315struct yaffs_obj *yaffs_lost_n_found(struct yaffs_dev *dev)
316{
317 return dev->lost_n_found;
318}
319
320/*
321 * Erased NAND checking functions
322 */
323
324int yaffs_check_ff(u8 * buffer, int n_bytes)
325{
326 /* Horrible, slow implementation */
327 while (n_bytes--) {
328 if (*buffer != 0xFF)
329 return 0;
330 buffer++;
331 }
332 return 1;
333}
334
335static int yaffs_check_chunk_erased(struct yaffs_dev *dev, int nand_chunk)
336{
337 int retval = YAFFS_OK;
338 u8 *data = yaffs_get_temp_buffer(dev, __LINE__);
339 struct yaffs_ext_tags tags;
340 int result;
341
342 result = yaffs_rd_chunk_tags_nand(dev, nand_chunk, data, &tags);
343
344 if (tags.ecc_result > YAFFS_ECC_RESULT_NO_ERROR)
345 retval = YAFFS_FAIL;
346
347 if (!yaffs_check_ff(data, dev->data_bytes_per_chunk) ||
348 tags.chunk_used) {
349 yaffs_trace(YAFFS_TRACE_NANDACCESS, "Chunk %d not erased", nand_chunk);
350 retval = YAFFS_FAIL;
351 }
352
353 yaffs_release_temp_buffer(dev, data, __LINE__);
354
355 return retval;
356
357}
358
359static int yaffs_verify_chunk_written(struct yaffs_dev *dev,
360 int nand_chunk,
361 const u8 * data,
362 struct yaffs_ext_tags *tags)
363{
364 int retval = YAFFS_OK;
365 struct yaffs_ext_tags temp_tags;
366 u8 *buffer = yaffs_get_temp_buffer(dev, __LINE__);
367 int result;
368
369 result = yaffs_rd_chunk_tags_nand(dev, nand_chunk, buffer, &temp_tags);
370 if (memcmp(buffer, data, dev->data_bytes_per_chunk) ||
371 temp_tags.obj_id != tags->obj_id ||
372 temp_tags.chunk_id != tags->chunk_id ||
373 temp_tags.n_bytes != tags->n_bytes)
374 retval = YAFFS_FAIL;
375
376 yaffs_release_temp_buffer(dev, buffer, __LINE__);
377
378 return retval;
379}
380
381
382int yaffs_check_alloc_available(struct yaffs_dev *dev, int n_chunks)
383{
384 int reserved_chunks;
385 int reserved_blocks = dev->param.n_reserved_blocks;
386 int checkpt_blocks;
387
388 checkpt_blocks = yaffs_calc_checkpt_blocks_required(dev);
389
390 reserved_chunks =
391 ((reserved_blocks + checkpt_blocks) * dev->param.chunks_per_block);
392
393 return (dev->n_free_chunks > (reserved_chunks + n_chunks));
394}
395
396static int yaffs_find_alloc_block(struct yaffs_dev *dev)
397{
398 int i;
399
400 struct yaffs_block_info *bi;
401
402 if (dev->n_erased_blocks < 1) {
403 /* Hoosterman we've got a problem.
404 * Can't get space to gc
405 */
406 yaffs_trace(YAFFS_TRACE_ERROR,
407 "yaffs tragedy: no more erased blocks" );
408
409 return -1;
410 }
411
412 /* Find an empty block. */
413
414 for (i = dev->internal_start_block; i <= dev->internal_end_block; i++) {
415 dev->alloc_block_finder++;
416 if (dev->alloc_block_finder < dev->internal_start_block
417 || dev->alloc_block_finder > dev->internal_end_block) {
418 dev->alloc_block_finder = dev->internal_start_block;
419 }
420
421 bi = yaffs_get_block_info(dev, dev->alloc_block_finder);
422
423 if (bi->block_state == YAFFS_BLOCK_STATE_EMPTY) {
424 bi->block_state = YAFFS_BLOCK_STATE_ALLOCATING;
425 dev->seq_number++;
426 bi->seq_number = dev->seq_number;
427 dev->n_erased_blocks--;
428 yaffs_trace(YAFFS_TRACE_ALLOCATE,
429 "Allocated block %d, seq %d, %d left" ,
430 dev->alloc_block_finder, dev->seq_number,
431 dev->n_erased_blocks);
432 return dev->alloc_block_finder;
433 }
434 }
435
436 yaffs_trace(YAFFS_TRACE_ALWAYS,
437 "yaffs tragedy: no more erased blocks, but there should have been %d",
438 dev->n_erased_blocks);
439
440 return -1;
441}
442
443static int yaffs_alloc_chunk(struct yaffs_dev *dev, int use_reserver,
444 struct yaffs_block_info **block_ptr)
445{
446 int ret_val;
447 struct yaffs_block_info *bi;
448
449 if (dev->alloc_block < 0) {
450 /* Get next block to allocate off */
451 dev->alloc_block = yaffs_find_alloc_block(dev);
452 dev->alloc_page = 0;
453 }
454
455 if (!use_reserver && !yaffs_check_alloc_available(dev, 1)) {
456 /* Not enough space to allocate unless we're allowed to use the reserve. */
457 return -1;
458 }
459
460 if (dev->n_erased_blocks < dev->param.n_reserved_blocks
461 && dev->alloc_page == 0)
462 yaffs_trace(YAFFS_TRACE_ALLOCATE, "Allocating reserve");
463
464 /* Next page please.... */
465 if (dev->alloc_block >= 0) {
466 bi = yaffs_get_block_info(dev, dev->alloc_block);
467
468 ret_val = (dev->alloc_block * dev->param.chunks_per_block) +
469 dev->alloc_page;
470 bi->pages_in_use++;
471 yaffs_set_chunk_bit(dev, dev->alloc_block, dev->alloc_page);
472
473 dev->alloc_page++;
474
475 dev->n_free_chunks--;
476
477 /* If the block is full set the state to full */
478 if (dev->alloc_page >= dev->param.chunks_per_block) {
479 bi->block_state = YAFFS_BLOCK_STATE_FULL;
480 dev->alloc_block = -1;
481 }
482
483 if (block_ptr)
484 *block_ptr = bi;
485
486 return ret_val;
487 }
488
489 yaffs_trace(YAFFS_TRACE_ERROR, "!!!!!!!!! Allocator out !!!!!!!!!!!!!!!!!" );
490
491 return -1;
492}
493
494static int yaffs_get_erased_chunks(struct yaffs_dev *dev)
495{
496 int n;
497
498 n = dev->n_erased_blocks * dev->param.chunks_per_block;
499
500 if (dev->alloc_block > 0)
501 n += (dev->param.chunks_per_block - dev->alloc_page);
502
503 return n;
504
505}
506
507/*
508 * yaffs_skip_rest_of_block() skips over the rest of the allocation block
509 * if we don't want to write to it.
510 */
511void yaffs_skip_rest_of_block(struct yaffs_dev *dev)
512{
513 if (dev->alloc_block > 0) {
514 struct yaffs_block_info *bi =
515 yaffs_get_block_info(dev, dev->alloc_block);
516 if (bi->block_state == YAFFS_BLOCK_STATE_ALLOCATING) {
517 bi->block_state = YAFFS_BLOCK_STATE_FULL;
518 dev->alloc_block = -1;
519 }
520 }
521}
522
523static int yaffs_write_new_chunk(struct yaffs_dev *dev,
524 const u8 * data,
525 struct yaffs_ext_tags *tags, int use_reserver)
526{
527 int attempts = 0;
528 int write_ok = 0;
529 int chunk;
530
531 yaffs2_checkpt_invalidate(dev);
532
533 do {
534 struct yaffs_block_info *bi = 0;
535 int erased_ok = 0;
536
537 chunk = yaffs_alloc_chunk(dev, use_reserver, &bi);
538 if (chunk < 0) {
539 /* no space */
540 break;
541 }
542
543 /* First check this chunk is erased, if it needs
544 * checking. The checking policy (unless forced
545 * always on) is as follows:
546 *
547 * Check the first page we try to write in a block.
548 * If the check passes then we don't need to check any
549 * more. If the check fails, we check again...
550 * If the block has been erased, we don't need to check.
551 *
552 * However, if the block has been prioritised for gc,
553 * then we think there might be something odd about
554 * this block and stop using it.
555 *
556 * Rationale: We should only ever see chunks that have
557 * not been erased if there was a partially written
558 * chunk due to power loss. This checking policy should
559 * catch that case with very few checks and thus save a
560 * lot of checks that are most likely not needed.
561 *
562 * Mods to the above
563 * If an erase check fails or the write fails we skip the
564 * rest of the block.
565 */
566
567 /* let's give it a try */
568 attempts++;
569
570 if (dev->param.always_check_erased)
571 bi->skip_erased_check = 0;
572
573 if (!bi->skip_erased_check) {
574 erased_ok = yaffs_check_chunk_erased(dev, chunk);
575 if (erased_ok != YAFFS_OK) {
576 yaffs_trace(YAFFS_TRACE_ERROR,
577 "**>> yaffs chunk %d was not erased",
578 chunk);
579
580 /* If not erased, delete this one,
581 * skip rest of block and
582 * try another chunk */
583 yaffs_chunk_del(dev, chunk, 1, __LINE__);
584 yaffs_skip_rest_of_block(dev);
585 continue;
586 }
587 }
588
589 write_ok = yaffs_wr_chunk_tags_nand(dev, chunk, data, tags);
590
591 if (!bi->skip_erased_check)
592 write_ok =
593 yaffs_verify_chunk_written(dev, chunk, data, tags);
594
595 if (write_ok != YAFFS_OK) {
596 /* Clean up aborted write, skip to next block and
597 * try another chunk */
598 yaffs_handle_chunk_wr_error(dev, chunk, erased_ok);
599 continue;
600 }
601
602 bi->skip_erased_check = 1;
603
604 /* Copy the data into the robustification buffer */
605 yaffs_handle_chunk_wr_ok(dev, chunk, data, tags);
606
607 } while (write_ok != YAFFS_OK &&
608 (yaffs_wr_attempts <= 0 || attempts <= yaffs_wr_attempts));
609
610 if (!write_ok)
611 chunk = -1;
612
613 if (attempts > 1) {
614 yaffs_trace(YAFFS_TRACE_ERROR,
615 "**>> yaffs write required %d attempts",
616 attempts);
617 dev->n_retired_writes += (attempts - 1);
618 }
619
620 return chunk;
621}
622
623/*
624 * Block retiring for handling a broken block.
625 */
626
627static void yaffs_retire_block(struct yaffs_dev *dev, int flash_block)
628{
629 struct yaffs_block_info *bi = yaffs_get_block_info(dev, flash_block);
630
631 yaffs2_checkpt_invalidate(dev);
632
633 yaffs2_clear_oldest_dirty_seq(dev, bi);
634
635 if (yaffs_mark_bad(dev, flash_block) != YAFFS_OK) {
636 if (yaffs_erase_block(dev, flash_block) != YAFFS_OK) {
637 yaffs_trace(YAFFS_TRACE_ALWAYS,
638 "yaffs: Failed to mark bad and erase block %d",
639 flash_block);
640 } else {
641 struct yaffs_ext_tags tags;
642 int chunk_id =
643 flash_block * dev->param.chunks_per_block;
644
645 u8 *buffer = yaffs_get_temp_buffer(dev, __LINE__);
646
647 memset(buffer, 0xff, dev->data_bytes_per_chunk);
648 yaffs_init_tags(&tags);
649 tags.seq_number = YAFFS_SEQUENCE_BAD_BLOCK;
650 if (dev->param.write_chunk_tags_fn(dev, chunk_id -
651 dev->chunk_offset,
652 buffer,
653 &tags) != YAFFS_OK)
654 yaffs_trace(YAFFS_TRACE_ALWAYS,
655 "yaffs: Failed to write bad block marker to block %d",
656 flash_block);
657
658 yaffs_release_temp_buffer(dev, buffer, __LINE__);
659 }
660 }
661
662 bi->block_state = YAFFS_BLOCK_STATE_DEAD;
663 bi->gc_prioritise = 0;
664 bi->needs_retiring = 0;
665
666 dev->n_retired_blocks++;
667}
668
669/*---------------- Name handling functions ------------*/
670
671static u16 yaffs_calc_name_sum(const YCHAR * name)
672{
673 u16 sum = 0;
674 u16 i = 1;
675
676 const YUCHAR *bname = (const YUCHAR *)name;
677 if (bname) {
678 while ((*bname) && (i < (YAFFS_MAX_NAME_LENGTH / 2))) {
679
680 /* 0x1f mask is case insensitive */
681 sum += ((*bname) & 0x1f) * i;
682 i++;
683 bname++;
684 }
685 }
686 return sum;
687}
688
689void yaffs_set_obj_name(struct yaffs_obj *obj, const YCHAR * name)
690{
691#ifndef CONFIG_YAFFS_NO_SHORT_NAMES
692 memset(obj->short_name, 0, sizeof(obj->short_name));
693 if (name &&
694 strnlen(name, YAFFS_SHORT_NAME_LENGTH + 1) <=
695 YAFFS_SHORT_NAME_LENGTH)
696 strcpy(obj->short_name, name);
697 else
698 obj->short_name[0] = _Y('\0');
699#endif
700 obj->sum = yaffs_calc_name_sum(name);
701}
702
703void yaffs_set_obj_name_from_oh(struct yaffs_obj *obj,
704 const struct yaffs_obj_hdr *oh)
705{
706#ifdef CONFIG_YAFFS_AUTO_UNICODE
707 YCHAR tmp_name[YAFFS_MAX_NAME_LENGTH + 1];
708 memset(tmp_name, 0, sizeof(tmp_name));
709 yaffs_load_name_from_oh(obj->my_dev, tmp_name, oh->name,
710 YAFFS_MAX_NAME_LENGTH + 1);
711 yaffs_set_obj_name(obj, tmp_name);
712#else
713 yaffs_set_obj_name(obj, oh->name);
714#endif
715}
716
717/*-------------------- TNODES -------------------
718
719 * List of spare tnodes
720 * The list is hooked together using the first pointer
721 * in the tnode.
722 */
723
724struct yaffs_tnode *yaffs_get_tnode(struct yaffs_dev *dev)
725{
726 struct yaffs_tnode *tn = yaffs_alloc_raw_tnode(dev);
727 if (tn) {
728 memset(tn, 0, dev->tnode_size);
729 dev->n_tnodes++;
730 }
731
732 dev->checkpoint_blocks_required = 0; /* force recalculation */
733
734 return tn;
735}
736
737/* FreeTnode frees up a tnode and puts it back on the free list */
738static void yaffs_free_tnode(struct yaffs_dev *dev, struct yaffs_tnode *tn)
739{
740 yaffs_free_raw_tnode(dev, tn);
741 dev->n_tnodes--;
742 dev->checkpoint_blocks_required = 0; /* force recalculation */
743}
744
745static void yaffs_deinit_tnodes_and_objs(struct yaffs_dev *dev)
746{
747 yaffs_deinit_raw_tnodes_and_objs(dev);
748 dev->n_obj = 0;
749 dev->n_tnodes = 0;
750}
751
752void yaffs_load_tnode_0(struct yaffs_dev *dev, struct yaffs_tnode *tn,
753 unsigned pos, unsigned val)
754{
755 u32 *map = (u32 *) tn;
756 u32 bit_in_map;
757 u32 bit_in_word;
758 u32 word_in_map;
759 u32 mask;
760
761 pos &= YAFFS_TNODES_LEVEL0_MASK;
762 val >>= dev->chunk_grp_bits;
763
764 bit_in_map = pos * dev->tnode_width;
765 word_in_map = bit_in_map / 32;
766 bit_in_word = bit_in_map & (32 - 1);
767
768 mask = dev->tnode_mask << bit_in_word;
769
770 map[word_in_map] &= ~mask;
771 map[word_in_map] |= (mask & (val << bit_in_word));
772
773 if (dev->tnode_width > (32 - bit_in_word)) {
774 bit_in_word = (32 - bit_in_word);
775 word_in_map++;;
776 mask =
777 dev->tnode_mask >> ( /*dev->tnode_width - */ bit_in_word);
778 map[word_in_map] &= ~mask;
779 map[word_in_map] |= (mask & (val >> bit_in_word));
780 }
781}
782
783u32 yaffs_get_group_base(struct yaffs_dev *dev, struct yaffs_tnode *tn,
784 unsigned pos)
785{
786 u32 *map = (u32 *) tn;
787 u32 bit_in_map;
788 u32 bit_in_word;
789 u32 word_in_map;
790 u32 val;
791
792 pos &= YAFFS_TNODES_LEVEL0_MASK;
793
794 bit_in_map = pos * dev->tnode_width;
795 word_in_map = bit_in_map / 32;
796 bit_in_word = bit_in_map & (32 - 1);
797
798 val = map[word_in_map] >> bit_in_word;
799
800 if (dev->tnode_width > (32 - bit_in_word)) {
801 bit_in_word = (32 - bit_in_word);
802 word_in_map++;;
803 val |= (map[word_in_map] << bit_in_word);
804 }
805
806 val &= dev->tnode_mask;
807 val <<= dev->chunk_grp_bits;
808
809 return val;
810}
811
812/* ------------------- End of individual tnode manipulation -----------------*/
813
814/* ---------Functions to manipulate the look-up tree (made up of tnodes) ------
815 * The look up tree is represented by the top tnode and the number of top_level
816 * in the tree. 0 means only the level 0 tnode is in the tree.
817 */
818
819/* FindLevel0Tnode finds the level 0 tnode, if one exists. */
820struct yaffs_tnode *yaffs_find_tnode_0(struct yaffs_dev *dev,
821 struct yaffs_file_var *file_struct,
822 u32 chunk_id)
823{
824 struct yaffs_tnode *tn = file_struct->top;
825 u32 i;
826 int required_depth;
827 int level = file_struct->top_level;
828
829 dev = dev;
830
831 /* Check sane level and chunk Id */
832 if (level < 0 || level > YAFFS_TNODES_MAX_LEVEL)
833 return NULL;
834
835 if (chunk_id > YAFFS_MAX_CHUNK_ID)
836 return NULL;
837
838 /* First check we're tall enough (ie enough top_level) */
839
840 i = chunk_id >> YAFFS_TNODES_LEVEL0_BITS;
841 required_depth = 0;
842 while (i) {
843 i >>= YAFFS_TNODES_INTERNAL_BITS;
844 required_depth++;
845 }
846
847 if (required_depth > file_struct->top_level)
848 return NULL; /* Not tall enough, so we can't find it */
849
850 /* Traverse down to level 0 */
851 while (level > 0 && tn) {
852 tn = tn->internal[(chunk_id >>
853 (YAFFS_TNODES_LEVEL0_BITS +
854 (level - 1) *
855 YAFFS_TNODES_INTERNAL_BITS)) &
856 YAFFS_TNODES_INTERNAL_MASK];
857 level--;
858 }
859
860 return tn;
861}
862
863/* AddOrFindLevel0Tnode finds the level 0 tnode if it exists, otherwise first expands the tree.
864 * This happens in two steps:
865 * 1. If the tree isn't tall enough, then make it taller.
866 * 2. Scan down the tree towards the level 0 tnode adding tnodes if required.
867 *
868 * Used when modifying the tree.
869 *
870 * If the tn argument is NULL, then a fresh tnode will be added otherwise the specified tn will
871 * be plugged into the ttree.
872 */
873
874struct yaffs_tnode *yaffs_add_find_tnode_0(struct yaffs_dev *dev,
875 struct yaffs_file_var *file_struct,
876 u32 chunk_id,
877 struct yaffs_tnode *passed_tn)
878{
879 int required_depth;
880 int i;
881 int l;
882 struct yaffs_tnode *tn;
883
884 u32 x;
885
886 /* Check sane level and page Id */
887 if (file_struct->top_level < 0
888 || file_struct->top_level > YAFFS_TNODES_MAX_LEVEL)
889 return NULL;
890
891 if (chunk_id > YAFFS_MAX_CHUNK_ID)
892 return NULL;
893
894 /* First check we're tall enough (ie enough top_level) */
895
896 x = chunk_id >> YAFFS_TNODES_LEVEL0_BITS;
897 required_depth = 0;
898 while (x) {
899 x >>= YAFFS_TNODES_INTERNAL_BITS;
900 required_depth++;
901 }
902
903 if (required_depth > file_struct->top_level) {
904 /* Not tall enough, gotta make the tree taller */
905 for (i = file_struct->top_level; i < required_depth; i++) {
906
907 tn = yaffs_get_tnode(dev);
908
909 if (tn) {
910 tn->internal[0] = file_struct->top;
911 file_struct->top = tn;
912 file_struct->top_level++;
913 } else {
914 yaffs_trace(YAFFS_TRACE_ERROR, "yaffs: no more tnodes");
915 return NULL;
916 }
917 }
918 }
919
920 /* Traverse down to level 0, adding anything we need */
921
922 l = file_struct->top_level;
923 tn = file_struct->top;
924
925 if (l > 0) {
926 while (l > 0 && tn) {
927 x = (chunk_id >>
928 (YAFFS_TNODES_LEVEL0_BITS +
929 (l - 1) * YAFFS_TNODES_INTERNAL_BITS)) &
930 YAFFS_TNODES_INTERNAL_MASK;
931
932 if ((l > 1) && !tn->internal[x]) {
933 /* Add missing non-level-zero tnode */
934 tn->internal[x] = yaffs_get_tnode(dev);
935 if (!tn->internal[x])
936 return NULL;
937 } else if (l == 1) {
938 /* Looking from level 1 at level 0 */
939 if (passed_tn) {
940 /* If we already have one, then release it. */
941 if (tn->internal[x])
942 yaffs_free_tnode(dev,
943 tn->
944 internal[x]);
945 tn->internal[x] = passed_tn;
946
947 } else if (!tn->internal[x]) {
948 /* Don't have one, none passed in */
949 tn->internal[x] = yaffs_get_tnode(dev);
950 if (!tn->internal[x])
951 return NULL;
952 }
953 }
954
955 tn = tn->internal[x];
956 l--;
957 }
958 } else {
959 /* top is level 0 */
960 if (passed_tn) {
961 memcpy(tn, passed_tn,
962 (dev->tnode_width * YAFFS_NTNODES_LEVEL0) / 8);
963 yaffs_free_tnode(dev, passed_tn);
964 }
965 }
966
967 return tn;
968}
969
970static int yaffs_tags_match(const struct yaffs_ext_tags *tags, int obj_id,
971 int chunk_obj)
972{
973 return (tags->chunk_id == chunk_obj &&
974 tags->obj_id == obj_id && !tags->is_deleted) ? 1 : 0;
975
976}
977
978static int yaffs_find_chunk_in_group(struct yaffs_dev *dev, int the_chunk,
979 struct yaffs_ext_tags *tags, int obj_id,
980 int inode_chunk)
981{
982 int j;
983
984 for (j = 0; the_chunk && j < dev->chunk_grp_size; j++) {
985 if (yaffs_check_chunk_bit
986 (dev, the_chunk / dev->param.chunks_per_block,
987 the_chunk % dev->param.chunks_per_block)) {
988
989 if (dev->chunk_grp_size == 1)
990 return the_chunk;
991 else {
992 yaffs_rd_chunk_tags_nand(dev, the_chunk, NULL,
993 tags);
994 if (yaffs_tags_match(tags, obj_id, inode_chunk)) {
995 /* found it; */
996 return the_chunk;
997 }
998 }
999 }
1000 the_chunk++;
1001 }
1002 return -1;
1003}
1004
1005static int yaffs_find_chunk_in_file(struct yaffs_obj *in, int inode_chunk,
1006 struct yaffs_ext_tags *tags)
1007{
1008 /*Get the Tnode, then get the level 0 offset chunk offset */
1009 struct yaffs_tnode *tn;
1010 int the_chunk = -1;
1011 struct yaffs_ext_tags local_tags;
1012 int ret_val = -1;
1013
1014 struct yaffs_dev *dev = in->my_dev;
1015
1016 if (!tags) {
1017 /* Passed a NULL, so use our own tags space */
1018 tags = &local_tags;
1019 }
1020
1021 tn = yaffs_find_tnode_0(dev, &in->variant.file_variant, inode_chunk);
1022
1023 if (tn) {
1024 the_chunk = yaffs_get_group_base(dev, tn, inode_chunk);
1025
1026 ret_val =
1027 yaffs_find_chunk_in_group(dev, the_chunk, tags, in->obj_id,
1028 inode_chunk);
1029 }
1030 return ret_val;
1031}
1032
1033static int yaffs_find_del_file_chunk(struct yaffs_obj *in, int inode_chunk,
1034 struct yaffs_ext_tags *tags)
1035{
1036 /* Get the Tnode, then get the level 0 offset chunk offset */
1037 struct yaffs_tnode *tn;
1038 int the_chunk = -1;
1039 struct yaffs_ext_tags local_tags;
1040
1041 struct yaffs_dev *dev = in->my_dev;
1042 int ret_val = -1;
1043
1044 if (!tags) {
1045 /* Passed a NULL, so use our own tags space */
1046 tags = &local_tags;
1047 }
1048
1049 tn = yaffs_find_tnode_0(dev, &in->variant.file_variant, inode_chunk);
1050
1051 if (tn) {
1052
1053 the_chunk = yaffs_get_group_base(dev, tn, inode_chunk);
1054
1055 ret_val =
1056 yaffs_find_chunk_in_group(dev, the_chunk, tags, in->obj_id,
1057 inode_chunk);
1058
1059 /* Delete the entry in the filestructure (if found) */
1060 if (ret_val != -1)
1061 yaffs_load_tnode_0(dev, tn, inode_chunk, 0);
1062 }
1063
1064 return ret_val;
1065}
1066
1067int yaffs_put_chunk_in_file(struct yaffs_obj *in, int inode_chunk,
1068 int nand_chunk, int in_scan)
1069{
1070 /* NB in_scan is zero unless scanning.
1071 * For forward scanning, in_scan is > 0;
1072 * for backward scanning in_scan is < 0
1073 *
1074 * nand_chunk = 0 is a dummy insert to make sure the tnodes are there.
1075 */
1076
1077 struct yaffs_tnode *tn;
1078 struct yaffs_dev *dev = in->my_dev;
1079 int existing_cunk;
1080 struct yaffs_ext_tags existing_tags;
1081 struct yaffs_ext_tags new_tags;
1082 unsigned existing_serial, new_serial;
1083
1084 if (in->variant_type != YAFFS_OBJECT_TYPE_FILE) {
1085 /* Just ignore an attempt at putting a chunk into a non-file during scanning
1086 * If it is not during Scanning then something went wrong!
1087 */
1088 if (!in_scan) {
1089 yaffs_trace(YAFFS_TRACE_ERROR,
1090 "yaffs tragedy:attempt to put data chunk into a non-file"
1091 );
1092 YBUG();
1093 }
1094
1095 yaffs_chunk_del(dev, nand_chunk, 1, __LINE__);
1096 return YAFFS_OK;
1097 }
1098
1099 tn = yaffs_add_find_tnode_0(dev,
1100 &in->variant.file_variant,
1101 inode_chunk, NULL);
1102 if (!tn)
1103 return YAFFS_FAIL;
1104
1105 if (!nand_chunk)
1106 /* Dummy insert, bail now */
1107 return YAFFS_OK;
1108
1109 existing_cunk = yaffs_get_group_base(dev, tn, inode_chunk);
1110
1111 if (in_scan != 0) {
1112 /* If we're scanning then we need to test for duplicates
1113 * NB This does not need to be efficient since it should only ever
1114 * happen when the power fails during a write, then only one
1115 * chunk should ever be affected.
1116 *
1117 * Correction for YAFFS2: This could happen quite a lot and we need to think about efficiency! TODO
1118 * Update: For backward scanning we don't need to re-read tags so this is quite cheap.
1119 */
1120
1121 if (existing_cunk > 0) {
1122 /* NB Right now existing chunk will not be real chunk_id if the chunk group size > 1
1123 * thus we have to do a FindChunkInFile to get the real chunk id.
1124 *
1125 * We have a duplicate now we need to decide which one to use:
1126 *
1127 * Backwards scanning YAFFS2: The old one is what we use, dump the new one.
1128 * Forward scanning YAFFS2: The new one is what we use, dump the old one.
1129 * YAFFS1: Get both sets of tags and compare serial numbers.
1130 */
1131
1132 if (in_scan > 0) {
1133 /* Only do this for forward scanning */
1134 yaffs_rd_chunk_tags_nand(dev,
1135 nand_chunk,
1136 NULL, &new_tags);
1137
1138 /* Do a proper find */
1139 existing_cunk =
1140 yaffs_find_chunk_in_file(in, inode_chunk,
1141 &existing_tags);
1142 }
1143
1144 if (existing_cunk <= 0) {
1145 /*Hoosterman - how did this happen? */
1146
1147 yaffs_trace(YAFFS_TRACE_ERROR,
1148 "yaffs tragedy: existing chunk < 0 in scan"
1149 );
1150
1151 }
1152
1153 /* NB The deleted flags should be false, otherwise the chunks will
1154 * not be loaded during a scan
1155 */
1156
1157 if (in_scan > 0) {
1158 new_serial = new_tags.serial_number;
1159 existing_serial = existing_tags.serial_number;
1160 }
1161
1162 if ((in_scan > 0) &&
1163 (existing_cunk <= 0 ||
1164 ((existing_serial + 1) & 3) == new_serial)) {
1165 /* Forward scanning.
1166 * Use new
1167 * Delete the old one and drop through to update the tnode
1168 */
1169 yaffs_chunk_del(dev, existing_cunk, 1,
1170 __LINE__);
1171 } else {
1172 /* Backward scanning or we want to use the existing one
1173 * Use existing.
1174 * Delete the new one and return early so that the tnode isn't changed
1175 */
1176 yaffs_chunk_del(dev, nand_chunk, 1, __LINE__);
1177 return YAFFS_OK;
1178 }
1179 }
1180
1181 }
1182
1183 if (existing_cunk == 0)
1184 in->n_data_chunks++;
1185
1186 yaffs_load_tnode_0(dev, tn, inode_chunk, nand_chunk);
1187
1188 return YAFFS_OK;
1189}
1190
1191static void yaffs_soft_del_chunk(struct yaffs_dev *dev, int chunk)
1192{
1193 struct yaffs_block_info *the_block;
1194 unsigned block_no;
1195
1196 yaffs_trace(YAFFS_TRACE_DELETION, "soft delete chunk %d", chunk);
1197
1198 block_no = chunk / dev->param.chunks_per_block;
1199 the_block = yaffs_get_block_info(dev, block_no);
1200 if (the_block) {
1201 the_block->soft_del_pages++;
1202 dev->n_free_chunks++;
1203 yaffs2_update_oldest_dirty_seq(dev, block_no, the_block);
1204 }
1205}
1206
1207/* SoftDeleteWorker scans backwards through the tnode tree and soft deletes all the chunks in the file.
1208 * All soft deleting does is increment the block's softdelete count and pulls the chunk out
1209 * of the tnode.
1210 * Thus, essentially this is the same as DeleteWorker except that the chunks are soft deleted.
1211 */
1212
1213static int yaffs_soft_del_worker(struct yaffs_obj *in, struct yaffs_tnode *tn,
1214 u32 level, int chunk_offset)
1215{
1216 int i;
1217 int the_chunk;
1218 int all_done = 1;
1219 struct yaffs_dev *dev = in->my_dev;
1220
1221 if (tn) {
1222 if (level > 0) {
1223
1224 for (i = YAFFS_NTNODES_INTERNAL - 1; all_done && i >= 0;
1225 i--) {
1226 if (tn->internal[i]) {
1227 all_done =
1228 yaffs_soft_del_worker(in,
1229 tn->internal
1230 [i],
1231 level - 1,
1232 (chunk_offset
1233 <<
1234 YAFFS_TNODES_INTERNAL_BITS)
1235 + i);
1236 if (all_done) {
1237 yaffs_free_tnode(dev,
1238 tn->internal
1239 [i]);
1240 tn->internal[i] = NULL;
1241 } else {
1242 /* Hoosterman... how could this happen? */
1243 }
1244 }
1245 }
1246 return (all_done) ? 1 : 0;
1247 } else if (level == 0) {
1248
1249 for (i = YAFFS_NTNODES_LEVEL0 - 1; i >= 0; i--) {
1250 the_chunk = yaffs_get_group_base(dev, tn, i);
1251 if (the_chunk) {
1252 /* Note this does not find the real chunk, only the chunk group.
1253 * We make an assumption that a chunk group is not larger than
1254 * a block.
1255 */
1256 yaffs_soft_del_chunk(dev, the_chunk);
1257 yaffs_load_tnode_0(dev, tn, i, 0);
1258 }
1259
1260 }
1261 return 1;
1262
1263 }
1264
1265 }
1266
1267 return 1;
1268
1269}
1270
1271static void yaffs_remove_obj_from_dir(struct yaffs_obj *obj)
1272{
1273 struct yaffs_dev *dev = obj->my_dev;
1274 struct yaffs_obj *parent;
1275
1276 yaffs_verify_obj_in_dir(obj);
1277 parent = obj->parent;
1278
1279 yaffs_verify_dir(parent);
1280
1281 if (dev && dev->param.remove_obj_fn)
1282 dev->param.remove_obj_fn(obj);
1283
1284 list_del_init(&obj->siblings);
1285 obj->parent = NULL;
1286
1287 yaffs_verify_dir(parent);
1288}
1289
1290void yaffs_add_obj_to_dir(struct yaffs_obj *directory, struct yaffs_obj *obj)
1291{
1292 if (!directory) {
1293 yaffs_trace(YAFFS_TRACE_ALWAYS,
1294 "tragedy: Trying to add an object to a null pointer directory"
1295 );
1296 YBUG();
1297 return;
1298 }
1299 if (directory->variant_type != YAFFS_OBJECT_TYPE_DIRECTORY) {
1300 yaffs_trace(YAFFS_TRACE_ALWAYS,
1301 "tragedy: Trying to add an object to a non-directory"
1302 );
1303 YBUG();
1304 }
1305
1306 if (obj->siblings.prev == NULL) {
1307 /* Not initialised */
1308 YBUG();
1309 }
1310
1311 yaffs_verify_dir(directory);
1312
1313 yaffs_remove_obj_from_dir(obj);
1314
1315 /* Now add it */
1316 list_add(&obj->siblings, &directory->variant.dir_variant.children);
1317 obj->parent = directory;
1318
1319 if (directory == obj->my_dev->unlinked_dir
1320 || directory == obj->my_dev->del_dir) {
1321 obj->unlinked = 1;
1322 obj->my_dev->n_unlinked_files++;
1323 obj->rename_allowed = 0;
1324 }
1325
1326 yaffs_verify_dir(directory);
1327 yaffs_verify_obj_in_dir(obj);
1328}
1329
1330static int yaffs_change_obj_name(struct yaffs_obj *obj,
1331 struct yaffs_obj *new_dir,
1332 const YCHAR * new_name, int force, int shadows)
1333{
1334 int unlink_op;
1335 int del_op;
1336
1337 struct yaffs_obj *existing_target;
1338
1339 if (new_dir == NULL)
1340 new_dir = obj->parent; /* use the old directory */
1341
1342 if (new_dir->variant_type != YAFFS_OBJECT_TYPE_DIRECTORY) {
1343 yaffs_trace(YAFFS_TRACE_ALWAYS,
1344 "tragedy: yaffs_change_obj_name: new_dir is not a directory"
1345 );
1346 YBUG();
1347 }
1348
1349 /* TODO: Do we need this different handling for YAFFS2 and YAFFS1?? */
1350 if (obj->my_dev->param.is_yaffs2)
1351 unlink_op = (new_dir == obj->my_dev->unlinked_dir);
1352 else
1353 unlink_op = (new_dir == obj->my_dev->unlinked_dir
1354 && obj->variant_type == YAFFS_OBJECT_TYPE_FILE);
1355
1356 del_op = (new_dir == obj->my_dev->del_dir);
1357
1358 existing_target = yaffs_find_by_name(new_dir, new_name);
1359
1360 /* If the object is a file going into the unlinked directory,
1361 * then it is OK to just stuff it in since duplicate names are allowed.
1362 * else only proceed if the new name does not exist and if we're putting
1363 * it into a directory.
1364 */
1365 if ((unlink_op ||
1366 del_op ||
1367 force ||
1368 (shadows > 0) ||
1369 !existing_target) &&
1370 new_dir->variant_type == YAFFS_OBJECT_TYPE_DIRECTORY) {
1371 yaffs_set_obj_name(obj, new_name);
1372 obj->dirty = 1;
1373
1374 yaffs_add_obj_to_dir(new_dir, obj);
1375
1376 if (unlink_op)
1377 obj->unlinked = 1;
1378
1379 /* If it is a deletion then we mark it as a shrink for gc purposes. */
1380 if (yaffs_update_oh(obj, new_name, 0, del_op, shadows, NULL) >=
1381 0)
1382 return YAFFS_OK;
1383 }
1384
1385 return YAFFS_FAIL;
1386}
1387
1388/*------------------------ Short Operations Cache ----------------------------------------
1389 * In many situations where there is no high level buffering a lot of
1390 * reads might be short sequential reads, and a lot of writes may be short
1391 * sequential writes. eg. scanning/writing a jpeg file.
1392 * In these cases, a short read/write cache can provide a huge perfomance
1393 * benefit with dumb-as-a-rock code.
1394 * In Linux, the page cache provides read buffering and the short op cache
1395 * provides write buffering.
1396 *
1397 * There are a limited number (~10) of cache chunks per device so that we don't
1398 * need a very intelligent search.
1399 */
1400
1401static int yaffs_obj_cache_dirty(struct yaffs_obj *obj)
1402{
1403 struct yaffs_dev *dev = obj->my_dev;
1404 int i;
1405 struct yaffs_cache *cache;
1406 int n_caches = obj->my_dev->param.n_caches;
1407
1408 for (i = 0; i < n_caches; i++) {
1409 cache = &dev->cache[i];
1410 if (cache->object == obj && cache->dirty)
1411 return 1;
1412 }
1413
1414 return 0;
1415}
1416
1417static void yaffs_flush_file_cache(struct yaffs_obj *obj)
1418{
1419 struct yaffs_dev *dev = obj->my_dev;
1420 int lowest = -99; /* Stop compiler whining. */
1421 int i;
1422 struct yaffs_cache *cache;
1423 int chunk_written = 0;
1424 int n_caches = obj->my_dev->param.n_caches;
1425
1426 if (n_caches > 0) {
1427 do {
1428 cache = NULL;
1429
1430 /* Find the dirty cache for this object with the lowest chunk id. */
1431 for (i = 0; i < n_caches; i++) {
1432 if (dev->cache[i].object == obj &&
1433 dev->cache[i].dirty) {
1434 if (!cache
1435 || dev->cache[i].chunk_id <
1436 lowest) {
1437 cache = &dev->cache[i];
1438 lowest = cache->chunk_id;
1439 }
1440 }
1441 }
1442
1443 if (cache && !cache->locked) {
1444 /* Write it out and free it up */
1445
1446 chunk_written =
1447 yaffs_wr_data_obj(cache->object,
1448 cache->chunk_id,
1449 cache->data,
1450 cache->n_bytes, 1);
1451 cache->dirty = 0;
1452 cache->object = NULL;
1453 }
1454
1455 } while (cache && chunk_written > 0);
1456
1457 if (cache)
1458 /* Hoosterman, disk full while writing cache out. */
1459 yaffs_trace(YAFFS_TRACE_ERROR,
1460 "yaffs tragedy: no space during cache write");
1461
1462 }
1463
1464}
1465
1466/*yaffs_flush_whole_cache(dev)
1467 *
1468 *
1469 */
1470
1471void yaffs_flush_whole_cache(struct yaffs_dev *dev)
1472{
1473 struct yaffs_obj *obj;
1474 int n_caches = dev->param.n_caches;
1475 int i;
1476
1477 /* Find a dirty object in the cache and flush it...
1478 * until there are no further dirty objects.
1479 */
1480 do {
1481 obj = NULL;
1482 for (i = 0; i < n_caches && !obj; i++) {
1483 if (dev->cache[i].object && dev->cache[i].dirty)
1484 obj = dev->cache[i].object;
1485
1486 }
1487 if (obj)
1488 yaffs_flush_file_cache(obj);
1489
1490 } while (obj);
1491
1492}
1493
1494/* Grab us a cache chunk for use.
1495 * First look for an empty one.
1496 * Then look for the least recently used non-dirty one.
1497 * Then look for the least recently used dirty one...., flush and look again.
1498 */
1499static struct yaffs_cache *yaffs_grab_chunk_worker(struct yaffs_dev *dev)
1500{
1501 int i;
1502
1503 if (dev->param.n_caches > 0) {
1504 for (i = 0; i < dev->param.n_caches; i++) {
1505 if (!dev->cache[i].object)
1506 return &dev->cache[i];
1507 }
1508 }
1509
1510 return NULL;
1511}
1512
1513static struct yaffs_cache *yaffs_grab_chunk_cache(struct yaffs_dev *dev)
1514{
1515 struct yaffs_cache *cache;
1516 struct yaffs_obj *the_obj;
1517 int usage;
1518 int i;
1519 int pushout;
1520
1521 if (dev->param.n_caches > 0) {
1522 /* Try find a non-dirty one... */
1523
1524 cache = yaffs_grab_chunk_worker(dev);
1525
1526 if (!cache) {
1527 /* They were all dirty, find the last recently used object and flush
1528 * its cache, then find again.
1529 * NB what's here is not very accurate, we actually flush the object
1530 * the last recently used page.
1531 */
1532
1533 /* With locking we can't assume we can use entry zero */
1534
1535 the_obj = NULL;
1536 usage = -1;
1537 cache = NULL;
1538 pushout = -1;
1539
1540 for (i = 0; i < dev->param.n_caches; i++) {
1541 if (dev->cache[i].object &&
1542 !dev->cache[i].locked &&
1543 (dev->cache[i].last_use < usage
1544 || !cache)) {
1545 usage = dev->cache[i].last_use;
1546 the_obj = dev->cache[i].object;
1547 cache = &dev->cache[i];
1548 pushout = i;
1549 }
1550 }
1551
1552 if (!cache || cache->dirty) {
1553 /* Flush and try again */
1554 yaffs_flush_file_cache(the_obj);
1555 cache = yaffs_grab_chunk_worker(dev);
1556 }
1557
1558 }
1559 return cache;
1560 } else {
1561 return NULL;
1562 }
1563}
1564
1565/* Find a cached chunk */
1566static struct yaffs_cache *yaffs_find_chunk_cache(const struct yaffs_obj *obj,
1567 int chunk_id)
1568{
1569 struct yaffs_dev *dev = obj->my_dev;
1570 int i;
1571 if (dev->param.n_caches > 0) {
1572 for (i = 0; i < dev->param.n_caches; i++) {
1573 if (dev->cache[i].object == obj &&
1574 dev->cache[i].chunk_id == chunk_id) {
1575 dev->cache_hits++;
1576
1577 return &dev->cache[i];
1578 }
1579 }
1580 }
1581 return NULL;
1582}
1583
1584/* Mark the chunk for the least recently used algorithym */
1585static void yaffs_use_cache(struct yaffs_dev *dev, struct yaffs_cache *cache,
1586 int is_write)
1587{
1588
1589 if (dev->param.n_caches > 0) {
1590 if (dev->cache_last_use < 0 || dev->cache_last_use > 100000000) {
1591 /* Reset the cache usages */
1592 int i;
1593 for (i = 1; i < dev->param.n_caches; i++)
1594 dev->cache[i].last_use = 0;
1595
1596 dev->cache_last_use = 0;
1597 }
1598
1599 dev->cache_last_use++;
1600
1601 cache->last_use = dev->cache_last_use;
1602
1603 if (is_write)
1604 cache->dirty = 1;
1605 }
1606}
1607
1608/* Invalidate a single cache page.
1609 * Do this when a whole page gets written,
1610 * ie the short cache for this page is no longer valid.
1611 */
1612static void yaffs_invalidate_chunk_cache(struct yaffs_obj *object, int chunk_id)
1613{
1614 if (object->my_dev->param.n_caches > 0) {
1615 struct yaffs_cache *cache =
1616 yaffs_find_chunk_cache(object, chunk_id);
1617
1618 if (cache)
1619 cache->object = NULL;
1620 }
1621}
1622
1623/* Invalidate all the cache pages associated with this object
1624 * Do this whenever ther file is deleted or resized.
1625 */
1626static void yaffs_invalidate_whole_cache(struct yaffs_obj *in)
1627{
1628 int i;
1629 struct yaffs_dev *dev = in->my_dev;
1630
1631 if (dev->param.n_caches > 0) {
1632 /* Invalidate it. */
1633 for (i = 0; i < dev->param.n_caches; i++) {
1634 if (dev->cache[i].object == in)
1635 dev->cache[i].object = NULL;
1636 }
1637 }
1638}
1639
1640static void yaffs_unhash_obj(struct yaffs_obj *obj)
1641{
1642 int bucket;
1643 struct yaffs_dev *dev = obj->my_dev;
1644
1645 /* If it is still linked into the bucket list, free from the list */
1646 if (!list_empty(&obj->hash_link)) {
1647 list_del_init(&obj->hash_link);
1648 bucket = yaffs_hash_fn(obj->obj_id);
1649 dev->obj_bucket[bucket].count--;
1650 }
1651}
1652
1653/* FreeObject frees up a Object and puts it back on the free list */
1654static void yaffs_free_obj(struct yaffs_obj *obj)
1655{
1656 struct yaffs_dev *dev = obj->my_dev;
1657
1658 yaffs_trace(YAFFS_TRACE_OS, "FreeObject %p inode %p",
1659 obj, obj->my_inode);
1660
1661 if (!obj)
1662 YBUG();
1663 if (obj->parent)
1664 YBUG();
1665 if (!list_empty(&obj->siblings))
1666 YBUG();
1667
1668 if (obj->my_inode) {
1669 /* We're still hooked up to a cached inode.
1670 * Don't delete now, but mark for later deletion
1671 */
1672 obj->defered_free = 1;
1673 return;
1674 }
1675
1676 yaffs_unhash_obj(obj);
1677
1678 yaffs_free_raw_obj(dev, obj);
1679 dev->n_obj--;
1680 dev->checkpoint_blocks_required = 0; /* force recalculation */
1681}
1682
1683void yaffs_handle_defered_free(struct yaffs_obj *obj)
1684{
1685 if (obj->defered_free)
1686 yaffs_free_obj(obj);
1687}
1688
1689static int yaffs_generic_obj_del(struct yaffs_obj *in)
1690{
1691
1692 /* First off, invalidate the file's data in the cache, without flushing. */
1693 yaffs_invalidate_whole_cache(in);
1694
1695 if (in->my_dev->param.is_yaffs2 && (in->parent != in->my_dev->del_dir)) {
1696 /* Move to the unlinked directory so we have a record that it was deleted. */
1697 yaffs_change_obj_name(in, in->my_dev->del_dir, _Y("deleted"), 0,
1698 0);
1699
1700 }
1701
1702 yaffs_remove_obj_from_dir(in);
1703 yaffs_chunk_del(in->my_dev, in->hdr_chunk, 1, __LINE__);
1704 in->hdr_chunk = 0;
1705
1706 yaffs_free_obj(in);
1707 return YAFFS_OK;
1708
1709}
1710
1711static void yaffs_soft_del_file(struct yaffs_obj *obj)
1712{
1713 if (obj->deleted &&
1714 obj->variant_type == YAFFS_OBJECT_TYPE_FILE && !obj->soft_del) {
1715 if (obj->n_data_chunks <= 0) {
1716 /* Empty file with no duplicate object headers,
1717 * just delete it immediately */
1718 yaffs_free_tnode(obj->my_dev,
1719 obj->variant.file_variant.top);
1720 obj->variant.file_variant.top = NULL;
1721 yaffs_trace(YAFFS_TRACE_TRACING,
1722 "yaffs: Deleting empty file %d",
1723 obj->obj_id);
1724 yaffs_generic_obj_del(obj);
1725 } else {
1726 yaffs_soft_del_worker(obj,
1727 obj->variant.file_variant.top,
1728 obj->variant.
1729 file_variant.top_level, 0);
1730 obj->soft_del = 1;
1731 }
1732 }
1733}
1734
1735/* Pruning removes any part of the file structure tree that is beyond the
1736 * bounds of the file (ie that does not point to chunks).
1737 *
1738 * A file should only get pruned when its size is reduced.
1739 *
1740 * Before pruning, the chunks must be pulled from the tree and the
1741 * level 0 tnode entries must be zeroed out.
1742 * Could also use this for file deletion, but that's probably better handled
1743 * by a special case.
1744 *
1745 * This function is recursive. For levels > 0 the function is called again on
1746 * any sub-tree. For level == 0 we just check if the sub-tree has data.
1747 * If there is no data in a subtree then it is pruned.
1748 */
1749
1750static struct yaffs_tnode *yaffs_prune_worker(struct yaffs_dev *dev,
1751 struct yaffs_tnode *tn, u32 level,
1752 int del0)
1753{
1754 int i;
1755 int has_data;
1756
1757 if (tn) {
1758 has_data = 0;
1759
1760 if (level > 0) {
1761 for (i = 0; i < YAFFS_NTNODES_INTERNAL; i++) {
1762 if (tn->internal[i]) {
1763 tn->internal[i] =
1764 yaffs_prune_worker(dev,
1765 tn->internal[i],
1766 level - 1,
1767 (i ==
1768 0) ? del0 : 1);
1769 }
1770
1771 if (tn->internal[i])
1772 has_data++;
1773 }
1774 } else {
1775 int tnode_size_u32 = dev->tnode_size / sizeof(u32);
1776 u32 *map = (u32 *) tn;
1777
1778 for (i = 0; !has_data && i < tnode_size_u32; i++) {
1779 if (map[i])
1780 has_data++;
1781 }
1782 }
1783
1784 if (has_data == 0 && del0) {
1785 /* Free and return NULL */
1786
1787 yaffs_free_tnode(dev, tn);
1788 tn = NULL;
1789 }
1790
1791 }
1792
1793 return tn;
1794
1795}
1796
1797static int yaffs_prune_tree(struct yaffs_dev *dev,
1798 struct yaffs_file_var *file_struct)
1799{
1800 int i;
1801 int has_data;
1802 int done = 0;
1803 struct yaffs_tnode *tn;
1804
1805 if (file_struct->top_level > 0) {
1806 file_struct->top =
1807 yaffs_prune_worker(dev, file_struct->top,
1808 file_struct->top_level, 0);
1809
1810 /* Now we have a tree with all the non-zero branches NULL but the height
1811 * is the same as it was.
1812 * Let's see if we can trim internal tnodes to shorten the tree.
1813 * We can do this if only the 0th element in the tnode is in use
1814 * (ie all the non-zero are NULL)
1815 */
1816
1817 while (file_struct->top_level && !done) {
1818 tn = file_struct->top;
1819
1820 has_data = 0;
1821 for (i = 1; i < YAFFS_NTNODES_INTERNAL; i++) {
1822 if (tn->internal[i])
1823 has_data++;
1824 }
1825
1826 if (!has_data) {
1827 file_struct->top = tn->internal[0];
1828 file_struct->top_level--;
1829 yaffs_free_tnode(dev, tn);
1830 } else {
1831 done = 1;
1832 }
1833 }
1834 }
1835
1836 return YAFFS_OK;
1837}
1838
1839/*-------------------- End of File Structure functions.-------------------*/
1840
1841/* AllocateEmptyObject gets us a clean Object. Tries to make allocate more if we run out */
1842static struct yaffs_obj *yaffs_alloc_empty_obj(struct yaffs_dev *dev)
1843{
1844 struct yaffs_obj *obj = yaffs_alloc_raw_obj(dev);
1845
1846 if (obj) {
1847 dev->n_obj++;
1848
1849 /* Now sweeten it up... */
1850
1851 memset(obj, 0, sizeof(struct yaffs_obj));
1852 obj->being_created = 1;
1853
1854 obj->my_dev = dev;
1855 obj->hdr_chunk = 0;
1856 obj->variant_type = YAFFS_OBJECT_TYPE_UNKNOWN;
1857 INIT_LIST_HEAD(&(obj->hard_links));
1858 INIT_LIST_HEAD(&(obj->hash_link));
1859 INIT_LIST_HEAD(&obj->siblings);
1860
1861 /* Now make the directory sane */
1862 if (dev->root_dir) {
1863 obj->parent = dev->root_dir;
1864 list_add(&(obj->siblings),
1865 &dev->root_dir->variant.dir_variant.children);
1866 }
1867
1868 /* Add it to the lost and found directory.
1869 * NB Can't put root or lost-n-found in lost-n-found so
1870 * check if lost-n-found exists first
1871 */
1872 if (dev->lost_n_found)
1873 yaffs_add_obj_to_dir(dev->lost_n_found, obj);
1874
1875 obj->being_created = 0;
1876 }
1877
1878 dev->checkpoint_blocks_required = 0; /* force recalculation */
1879
1880 return obj;
1881}
1882
1883static int yaffs_find_nice_bucket(struct yaffs_dev *dev)
1884{
1885 int i;
1886 int l = 999;
1887 int lowest = 999999;
1888
1889 /* Search for the shortest list or one that
1890 * isn't too long.
1891 */
1892
1893 for (i = 0; i < 10 && lowest > 4; i++) {
1894 dev->bucket_finder++;
1895 dev->bucket_finder %= YAFFS_NOBJECT_BUCKETS;
1896 if (dev->obj_bucket[dev->bucket_finder].count < lowest) {
1897 lowest = dev->obj_bucket[dev->bucket_finder].count;
1898 l = dev->bucket_finder;
1899 }
1900
1901 }
1902
1903 return l;
1904}
1905
1906static int yaffs_new_obj_id(struct yaffs_dev *dev)
1907{
1908 int bucket = yaffs_find_nice_bucket(dev);
1909
1910 /* Now find an object value that has not already been taken
1911 * by scanning the list.
1912 */
1913
1914 int found = 0;
1915 struct list_head *i;
1916
1917 u32 n = (u32) bucket;
1918
1919 /* yaffs_check_obj_hash_sane(); */
1920
1921 while (!found) {
1922 found = 1;
1923 n += YAFFS_NOBJECT_BUCKETS;
1924 if (1 || dev->obj_bucket[bucket].count > 0) {
1925 list_for_each(i, &dev->obj_bucket[bucket].list) {
1926 /* If there is already one in the list */
1927 if (i && list_entry(i, struct yaffs_obj,
1928 hash_link)->obj_id == n) {
1929 found = 0;
1930 }
1931 }
1932 }
1933 }
1934
1935 return n;
1936}
1937
1938static void yaffs_hash_obj(struct yaffs_obj *in)
1939{
1940 int bucket = yaffs_hash_fn(in->obj_id);
1941 struct yaffs_dev *dev = in->my_dev;
1942
1943 list_add(&in->hash_link, &dev->obj_bucket[bucket].list);
1944 dev->obj_bucket[bucket].count++;
1945}
1946
1947struct yaffs_obj *yaffs_find_by_number(struct yaffs_dev *dev, u32 number)
1948{
1949 int bucket = yaffs_hash_fn(number);
1950 struct list_head *i;
1951 struct yaffs_obj *in;
1952
1953 list_for_each(i, &dev->obj_bucket[bucket].list) {
1954 /* Look if it is in the list */
1955 if (i) {
1956 in = list_entry(i, struct yaffs_obj, hash_link);
1957 if (in->obj_id == number) {
1958
1959 /* Don't tell the VFS about this one if it is defered free */
1960 if (in->defered_free)
1961 return NULL;
1962
1963 return in;
1964 }
1965 }
1966 }
1967
1968 return NULL;
1969}
1970
1971struct yaffs_obj *yaffs_new_obj(struct yaffs_dev *dev, int number,
1972 enum yaffs_obj_type type)
1973{
1974 struct yaffs_obj *the_obj = NULL;
1975 struct yaffs_tnode *tn = NULL;
1976
1977 if (number < 0)
1978 number = yaffs_new_obj_id(dev);
1979
1980 if (type == YAFFS_OBJECT_TYPE_FILE) {
1981 tn = yaffs_get_tnode(dev);
1982 if (!tn)
1983 return NULL;
1984 }
1985
1986 the_obj = yaffs_alloc_empty_obj(dev);
1987 if (!the_obj) {
1988 if (tn)
1989 yaffs_free_tnode(dev, tn);
1990 return NULL;
1991 }
1992
1993 if (the_obj) {
1994 the_obj->fake = 0;
1995 the_obj->rename_allowed = 1;
1996 the_obj->unlink_allowed = 1;
1997 the_obj->obj_id = number;
1998 yaffs_hash_obj(the_obj);
1999 the_obj->variant_type = type;
2000 yaffs_load_current_time(the_obj, 1, 1);
2001
2002 switch (type) {
2003 case YAFFS_OBJECT_TYPE_FILE:
2004 the_obj->variant.file_variant.file_size = 0;
2005 the_obj->variant.file_variant.scanned_size = 0;
2006 the_obj->variant.file_variant.shrink_size = ~0; /* max */
2007 the_obj->variant.file_variant.top_level = 0;
2008 the_obj->variant.file_variant.top = tn;
2009 break;
2010 case YAFFS_OBJECT_TYPE_DIRECTORY:
2011 INIT_LIST_HEAD(&the_obj->variant.dir_variant.children);
2012 INIT_LIST_HEAD(&the_obj->variant.dir_variant.dirty);
2013 break;
2014 case YAFFS_OBJECT_TYPE_SYMLINK:
2015 case YAFFS_OBJECT_TYPE_HARDLINK:
2016 case YAFFS_OBJECT_TYPE_SPECIAL:
2017 /* No action required */
2018 break;
2019 case YAFFS_OBJECT_TYPE_UNKNOWN:
2020 /* todo this should not happen */
2021 break;
2022 }
2023 }
2024
2025 return the_obj;
2026}
2027
2028static struct yaffs_obj *yaffs_create_fake_dir(struct yaffs_dev *dev,
2029 int number, u32 mode)
2030{
2031
2032 struct yaffs_obj *obj =
2033 yaffs_new_obj(dev, number, YAFFS_OBJECT_TYPE_DIRECTORY);
2034 if (obj) {
2035 obj->fake = 1; /* it is fake so it might have no NAND presence... */
2036 obj->rename_allowed = 0; /* ... and we're not allowed to rename it... */
2037 obj->unlink_allowed = 0; /* ... or unlink it */
2038 obj->deleted = 0;
2039 obj->unlinked = 0;
2040 obj->yst_mode = mode;
2041 obj->my_dev = dev;
2042 obj->hdr_chunk = 0; /* Not a valid chunk. */
2043 }
2044
2045 return obj;
2046
2047}
2048
2049
2050static void yaffs_init_tnodes_and_objs(struct yaffs_dev *dev)
2051{
2052 int i;
2053
2054 dev->n_obj = 0;
2055 dev->n_tnodes = 0;
2056
2057 yaffs_init_raw_tnodes_and_objs(dev);
2058
2059 for (i = 0; i < YAFFS_NOBJECT_BUCKETS; i++) {
2060 INIT_LIST_HEAD(&dev->obj_bucket[i].list);
2061 dev->obj_bucket[i].count = 0;
2062 }
2063}
2064
2065struct yaffs_obj *yaffs_find_or_create_by_number(struct yaffs_dev *dev,
2066 int number,
2067 enum yaffs_obj_type type)
2068{
2069 struct yaffs_obj *the_obj = NULL;
2070
2071 if (number > 0)
2072 the_obj = yaffs_find_by_number(dev, number);
2073
2074 if (!the_obj)
2075 the_obj = yaffs_new_obj(dev, number, type);
2076
2077 return the_obj;
2078
2079}
2080
2081YCHAR *yaffs_clone_str(const YCHAR * str)
2082{
2083 YCHAR *new_str = NULL;
2084 int len;
2085
2086 if (!str)
2087 str = _Y("");
2088
2089 len = strnlen(str, YAFFS_MAX_ALIAS_LENGTH);
2090 new_str = kmalloc((len + 1) * sizeof(YCHAR), GFP_NOFS);
2091 if (new_str) {
2092 strncpy(new_str, str, len);
2093 new_str[len] = 0;
2094 }
2095 return new_str;
2096
2097}
2098/*
2099 *yaffs_update_parent() handles fixing a directories mtime and ctime when a new
2100 * link (ie. name) is created or deleted in the directory.
2101 *
2102 * ie.
2103 * create dir/a : update dir's mtime/ctime
2104 * rm dir/a: update dir's mtime/ctime
2105 * modify dir/a: don't update dir's mtimme/ctime
2106 *
2107 * This can be handled immediately or defered. Defering helps reduce the number
2108 * of updates when many files in a directory are changed within a brief period.
2109 *
2110 * If the directory updating is defered then yaffs_update_dirty_dirs must be
2111 * called periodically.
2112 */
2113
2114static void yaffs_update_parent(struct yaffs_obj *obj)
2115{
2116 struct yaffs_dev *dev;
2117 if (!obj)
2118 return;
2119 dev = obj->my_dev;
2120 obj->dirty = 1;
2121 yaffs_load_current_time(obj, 0, 1);
2122 if (dev->param.defered_dir_update) {
2123 struct list_head *link = &obj->variant.dir_variant.dirty;
2124
2125 if (list_empty(link)) {
2126 list_add(link, &dev->dirty_dirs);
2127 yaffs_trace(YAFFS_TRACE_BACKGROUND,
2128 "Added object %d to dirty directories",
2129 obj->obj_id);
2130 }
2131
2132 } else {
2133 yaffs_update_oh(obj, NULL, 0, 0, 0, NULL);
2134 }
2135}
2136
2137void yaffs_update_dirty_dirs(struct yaffs_dev *dev)
2138{
2139 struct list_head *link;
2140 struct yaffs_obj *obj;
2141 struct yaffs_dir_var *d_s;
2142 union yaffs_obj_var *o_v;
2143
2144 yaffs_trace(YAFFS_TRACE_BACKGROUND, "Update dirty directories");
2145
2146 while (!list_empty(&dev->dirty_dirs)) {
2147 link = dev->dirty_dirs.next;
2148 list_del_init(link);
2149
2150 d_s = list_entry(link, struct yaffs_dir_var, dirty);
2151 o_v = list_entry(d_s, union yaffs_obj_var, dir_variant);
2152 obj = list_entry(o_v, struct yaffs_obj, variant);
2153
2154 yaffs_trace(YAFFS_TRACE_BACKGROUND, "Update directory %d",
2155 obj->obj_id);
2156
2157 if (obj->dirty)
2158 yaffs_update_oh(obj, NULL, 0, 0, 0, NULL);
2159 }
2160}
2161
2162/*
2163 * Mknod (create) a new object.
2164 * equiv_obj only has meaning for a hard link;
2165 * alias_str only has meaning for a symlink.
2166 * rdev only has meaning for devices (a subset of special objects)
2167 */
2168
2169static struct yaffs_obj *yaffs_create_obj(enum yaffs_obj_type type,
2170 struct yaffs_obj *parent,
2171 const YCHAR * name,
2172 u32 mode,
2173 u32 uid,
2174 u32 gid,
2175 struct yaffs_obj *equiv_obj,
2176 const YCHAR * alias_str, u32 rdev)
2177{
2178 struct yaffs_obj *in;
2179 YCHAR *str = NULL;
2180
2181 struct yaffs_dev *dev = parent->my_dev;
2182
2183 /* Check if the entry exists. If it does then fail the call since we don't want a dup. */
2184 if (yaffs_find_by_name(parent, name))
2185 return NULL;
2186
2187 if (type == YAFFS_OBJECT_TYPE_SYMLINK) {
2188 str = yaffs_clone_str(alias_str);
2189 if (!str)
2190 return NULL;
2191 }
2192
2193 in = yaffs_new_obj(dev, -1, type);
2194
2195 if (!in) {
2196 if (str)
2197 kfree(str);
2198 return NULL;
2199 }
2200
2201 if (in) {
2202 in->hdr_chunk = 0;
2203 in->valid = 1;
2204 in->variant_type = type;
2205
2206 in->yst_mode = mode;
2207
2208 yaffs_attribs_init(in, gid, uid, rdev);
2209
2210 in->n_data_chunks = 0;
2211
2212 yaffs_set_obj_name(in, name);
2213 in->dirty = 1;
2214
2215 yaffs_add_obj_to_dir(parent, in);
2216
2217 in->my_dev = parent->my_dev;
2218
2219 switch (type) {
2220 case YAFFS_OBJECT_TYPE_SYMLINK:
2221 in->variant.symlink_variant.alias = str;
2222 break;
2223 case YAFFS_OBJECT_TYPE_HARDLINK:
2224 in->variant.hardlink_variant.equiv_obj = equiv_obj;
2225 in->variant.hardlink_variant.equiv_id =
2226 equiv_obj->obj_id;
2227 list_add(&in->hard_links, &equiv_obj->hard_links);
2228 break;
2229 case YAFFS_OBJECT_TYPE_FILE:
2230 case YAFFS_OBJECT_TYPE_DIRECTORY:
2231 case YAFFS_OBJECT_TYPE_SPECIAL:
2232 case YAFFS_OBJECT_TYPE_UNKNOWN:
2233 /* do nothing */
2234 break;
2235 }
2236
2237 if (yaffs_update_oh(in, name, 0, 0, 0, NULL) < 0) {
2238 /* Could not create the object header, fail the creation */
2239 yaffs_del_obj(in);
2240 in = NULL;
2241 }
2242
2243 yaffs_update_parent(parent);
2244 }
2245
2246 return in;
2247}
2248
2249struct yaffs_obj *yaffs_create_file(struct yaffs_obj *parent,
2250 const YCHAR * name, u32 mode, u32 uid,
2251 u32 gid)
2252{
2253 return yaffs_create_obj(YAFFS_OBJECT_TYPE_FILE, parent, name, mode,
2254 uid, gid, NULL, NULL, 0);
2255}
2256
2257struct yaffs_obj *yaffs_create_dir(struct yaffs_obj *parent, const YCHAR * name,
2258 u32 mode, u32 uid, u32 gid)
2259{
2260 return yaffs_create_obj(YAFFS_OBJECT_TYPE_DIRECTORY, parent, name,
2261 mode, uid, gid, NULL, NULL, 0);
2262}
2263
2264struct yaffs_obj *yaffs_create_special(struct yaffs_obj *parent,
2265 const YCHAR * name, u32 mode, u32 uid,
2266 u32 gid, u32 rdev)
2267{
2268 return yaffs_create_obj(YAFFS_OBJECT_TYPE_SPECIAL, parent, name, mode,
2269 uid, gid, NULL, NULL, rdev);
2270}
2271
2272struct yaffs_obj *yaffs_create_symlink(struct yaffs_obj *parent,
2273 const YCHAR * name, u32 mode, u32 uid,
2274 u32 gid, const YCHAR * alias)
2275{
2276 return yaffs_create_obj(YAFFS_OBJECT_TYPE_SYMLINK, parent, name, mode,
2277 uid, gid, NULL, alias, 0);
2278}
2279
2280/* yaffs_link_obj returns the object id of the equivalent object.*/
2281struct yaffs_obj *yaffs_link_obj(struct yaffs_obj *parent, const YCHAR * name,
2282 struct yaffs_obj *equiv_obj)
2283{
2284 /* Get the real object in case we were fed a hard link as an equivalent object */
2285 equiv_obj = yaffs_get_equivalent_obj(equiv_obj);
2286
2287 if (yaffs_create_obj
2288 (YAFFS_OBJECT_TYPE_HARDLINK, parent, name, 0, 0, 0,
2289 equiv_obj, NULL, 0)) {
2290 return equiv_obj;
2291 } else {
2292 return NULL;
2293 }
2294
2295}
2296
2297
2298
2299/*------------------------- Block Management and Page Allocation ----------------*/
2300
2301static int yaffs_init_blocks(struct yaffs_dev *dev)
2302{
2303 int n_blocks = dev->internal_end_block - dev->internal_start_block + 1;
2304
2305 dev->block_info = NULL;
2306 dev->chunk_bits = NULL;
2307
2308 dev->alloc_block = -1; /* force it to get a new one */
2309
2310 /* If the first allocation strategy fails, thry the alternate one */
2311 dev->block_info =
2312 kmalloc(n_blocks * sizeof(struct yaffs_block_info), GFP_NOFS);
2313 if (!dev->block_info) {
2314 dev->block_info =
2315 vmalloc(n_blocks * sizeof(struct yaffs_block_info));
2316 dev->block_info_alt = 1;
2317 } else {
2318 dev->block_info_alt = 0;
2319 }
2320
2321 if (dev->block_info) {
2322 /* Set up dynamic blockinfo stuff. Round up bytes. */
2323 dev->chunk_bit_stride = (dev->param.chunks_per_block + 7) / 8;
2324 dev->chunk_bits =
2325 kmalloc(dev->chunk_bit_stride * n_blocks, GFP_NOFS);
2326 if (!dev->chunk_bits) {
2327 dev->chunk_bits =
2328 vmalloc(dev->chunk_bit_stride * n_blocks);
2329 dev->chunk_bits_alt = 1;
2330 } else {
2331 dev->chunk_bits_alt = 0;
2332 }
2333 }
2334
2335 if (dev->block_info && dev->chunk_bits) {
2336 memset(dev->block_info, 0,
2337 n_blocks * sizeof(struct yaffs_block_info));
2338 memset(dev->chunk_bits, 0, dev->chunk_bit_stride * n_blocks);
2339 return YAFFS_OK;
2340 }
2341
2342 return YAFFS_FAIL;
2343}
2344
2345static void yaffs_deinit_blocks(struct yaffs_dev *dev)
2346{
2347 if (dev->block_info_alt && dev->block_info)
2348 vfree(dev->block_info);
2349 else if (dev->block_info)
2350 kfree(dev->block_info);
2351
2352 dev->block_info_alt = 0;
2353
2354 dev->block_info = NULL;
2355
2356 if (dev->chunk_bits_alt && dev->chunk_bits)
2357 vfree(dev->chunk_bits);
2358 else if (dev->chunk_bits)
2359 kfree(dev->chunk_bits);
2360 dev->chunk_bits_alt = 0;
2361 dev->chunk_bits = NULL;
2362}
2363
2364void yaffs_block_became_dirty(struct yaffs_dev *dev, int block_no)
2365{
2366 struct yaffs_block_info *bi = yaffs_get_block_info(dev, block_no);
2367
2368 int erased_ok = 0;
2369
2370 /* If the block is still healthy erase it and mark as clean.
2371 * If the block has had a data failure, then retire it.
2372 */
2373
2374 yaffs_trace(YAFFS_TRACE_GC | YAFFS_TRACE_ERASE,
2375 "yaffs_block_became_dirty block %d state %d %s",
2376 block_no, bi->block_state,
2377 (bi->needs_retiring) ? "needs retiring" : "");
2378
2379 yaffs2_clear_oldest_dirty_seq(dev, bi);
2380
2381 bi->block_state = YAFFS_BLOCK_STATE_DIRTY;
2382
2383 /* If this is the block being garbage collected then stop gc'ing this block */
2384 if (block_no == dev->gc_block)
2385 dev->gc_block = 0;
2386
2387 /* If this block is currently the best candidate for gc then drop as a candidate */
2388 if (block_no == dev->gc_dirtiest) {
2389 dev->gc_dirtiest = 0;
2390 dev->gc_pages_in_use = 0;
2391 }
2392
2393 if (!bi->needs_retiring) {
2394 yaffs2_checkpt_invalidate(dev);
2395 erased_ok = yaffs_erase_block(dev, block_no);
2396 if (!erased_ok) {
2397 dev->n_erase_failures++;
2398 yaffs_trace(YAFFS_TRACE_ERROR | YAFFS_TRACE_BAD_BLOCKS,
2399 "**>> Erasure failed %d", block_no);
2400 }
2401 }
2402
2403 if (erased_ok &&
2404 ((yaffs_trace_mask & YAFFS_TRACE_ERASE)
2405 || !yaffs_skip_verification(dev))) {
2406 int i;
2407 for (i = 0; i < dev->param.chunks_per_block; i++) {
2408 if (!yaffs_check_chunk_erased
2409 (dev, block_no * dev->param.chunks_per_block + i)) {
2410 yaffs_trace(YAFFS_TRACE_ERROR,
2411 ">>Block %d erasure supposedly OK, but chunk %d not erased",
2412 block_no, i);
2413 }
2414 }
2415 }
2416
2417 if (erased_ok) {
2418 /* Clean it up... */
2419 bi->block_state = YAFFS_BLOCK_STATE_EMPTY;
2420 bi->seq_number = 0;
2421 dev->n_erased_blocks++;
2422 bi->pages_in_use = 0;
2423 bi->soft_del_pages = 0;
2424 bi->has_shrink_hdr = 0;
2425 bi->skip_erased_check = 1; /* Clean, so no need to check */
2426 bi->gc_prioritise = 0;
2427 yaffs_clear_chunk_bits(dev, block_no);
2428
2429 yaffs_trace(YAFFS_TRACE_ERASE,
2430 "Erased block %d", block_no);
2431 } else {
2432 /* We lost a block of free space */
2433 dev->n_free_chunks -= dev->param.chunks_per_block;
2434 yaffs_retire_block(dev, block_no);
2435 yaffs_trace(YAFFS_TRACE_ERROR | YAFFS_TRACE_BAD_BLOCKS,
2436 "**>> Block %d retired", block_no);
2437 }
2438}
2439
2440
2441
2442static int yaffs_gc_block(struct yaffs_dev *dev, int block, int whole_block)
2443{
2444 int old_chunk;
2445 int new_chunk;
2446 int mark_flash;
2447 int ret_val = YAFFS_OK;
2448 int i;
2449 int is_checkpt_block;
2450 int matching_chunk;
2451 int max_copies;
2452
2453 int chunks_before = yaffs_get_erased_chunks(dev);
2454 int chunks_after;
2455
2456 struct yaffs_ext_tags tags;
2457
2458 struct yaffs_block_info *bi = yaffs_get_block_info(dev, block);
2459
2460 struct yaffs_obj *object;
2461
2462 is_checkpt_block = (bi->block_state == YAFFS_BLOCK_STATE_CHECKPOINT);
2463
2464 yaffs_trace(YAFFS_TRACE_TRACING,
2465 "Collecting block %d, in use %d, shrink %d, whole_block %d",
2466 block, bi->pages_in_use, bi->has_shrink_hdr,
2467 whole_block);
2468
2469 /*yaffs_verify_free_chunks(dev); */
2470
2471 if (bi->block_state == YAFFS_BLOCK_STATE_FULL)
2472 bi->block_state = YAFFS_BLOCK_STATE_COLLECTING;
2473
2474 bi->has_shrink_hdr = 0; /* clear the flag so that the block can erase */
2475
2476 dev->gc_disable = 1;
2477
2478 if (is_checkpt_block || !yaffs_still_some_chunks(dev, block)) {
2479 yaffs_trace(YAFFS_TRACE_TRACING,
2480 "Collecting block %d that has no chunks in use",
2481 block);
2482 yaffs_block_became_dirty(dev, block);
2483 } else {
2484
2485 u8 *buffer = yaffs_get_temp_buffer(dev, __LINE__);
2486
2487 yaffs_verify_blk(dev, bi, block);
2488
2489 max_copies = (whole_block) ? dev->param.chunks_per_block : 5;
2490 old_chunk = block * dev->param.chunks_per_block + dev->gc_chunk;
2491
2492 for ( /* init already done */ ;
2493 ret_val == YAFFS_OK &&
2494 dev->gc_chunk < dev->param.chunks_per_block &&
2495 (bi->block_state == YAFFS_BLOCK_STATE_COLLECTING) &&
2496 max_copies > 0; dev->gc_chunk++, old_chunk++) {
2497 if (yaffs_check_chunk_bit(dev, block, dev->gc_chunk)) {
2498
2499 /* This page is in use and might need to be copied off */
2500
2501 max_copies--;
2502
2503 mark_flash = 1;
2504
2505 yaffs_init_tags(&tags);
2506
2507 yaffs_rd_chunk_tags_nand(dev, old_chunk,
2508 buffer, &tags);
2509
2510 object = yaffs_find_by_number(dev, tags.obj_id);
2511
2512 yaffs_trace(YAFFS_TRACE_GC_DETAIL,
2513 "Collecting chunk in block %d, %d %d %d ",
2514 dev->gc_chunk, tags.obj_id,
2515 tags.chunk_id, tags.n_bytes);
2516
2517 if (object && !yaffs_skip_verification(dev)) {
2518 if (tags.chunk_id == 0)
2519 matching_chunk =
2520 object->hdr_chunk;
2521 else if (object->soft_del)
2522 matching_chunk = old_chunk; /* Defeat the test */
2523 else
2524 matching_chunk =
2525 yaffs_find_chunk_in_file
2526 (object, tags.chunk_id,
2527 NULL);
2528
2529 if (old_chunk != matching_chunk)
2530 yaffs_trace(YAFFS_TRACE_ERROR,
2531 "gc: page in gc mismatch: %d %d %d %d",
2532 old_chunk,
2533 matching_chunk,
2534 tags.obj_id,
2535 tags.chunk_id);
2536
2537 }
2538
2539 if (!object) {
2540 yaffs_trace(YAFFS_TRACE_ERROR,
2541 "page %d in gc has no object: %d %d %d ",
2542 old_chunk,
2543 tags.obj_id, tags.chunk_id,
2544 tags.n_bytes);
2545 }
2546
2547 if (object &&
2548 object->deleted &&
2549 object->soft_del && tags.chunk_id != 0) {
2550 /* Data chunk in a soft deleted file, throw it away
2551 * It's a soft deleted data chunk,
2552 * No need to copy this, just forget about it and
2553 * fix up the object.
2554 */
2555
2556 /* Free chunks already includes softdeleted chunks.
2557 * How ever this chunk is going to soon be really deleted
2558 * which will increment free chunks.
2559 * We have to decrement free chunks so this works out properly.
2560 */
2561 dev->n_free_chunks--;
2562 bi->soft_del_pages--;
2563
2564 object->n_data_chunks--;
2565
2566 if (object->n_data_chunks <= 0) {
2567 /* remeber to clean up the object */
2568 dev->gc_cleanup_list[dev->
2569 n_clean_ups]
2570 = tags.obj_id;
2571 dev->n_clean_ups++;
2572 }
2573 mark_flash = 0;
2574 } else if (0) {
2575 /* Todo object && object->deleted && object->n_data_chunks == 0 */
2576 /* Deleted object header with no data chunks.
2577 * Can be discarded and the file deleted.
2578 */
2579 object->hdr_chunk = 0;
2580 yaffs_free_tnode(object->my_dev,
2581 object->
2582 variant.file_variant.
2583 top);
2584 object->variant.file_variant.top = NULL;
2585 yaffs_generic_obj_del(object);
2586
2587 } else if (object) {
2588 /* It's either a data chunk in a live file or
2589 * an ObjectHeader, so we're interested in it.
2590 * NB Need to keep the ObjectHeaders of deleted files
2591 * until the whole file has been deleted off
2592 */
2593 tags.serial_number++;
2594
2595 dev->n_gc_copies++;
2596
2597 if (tags.chunk_id == 0) {
2598 /* It is an object Id,
2599 * We need to nuke the shrinkheader flags first
2600 * Also need to clean up shadowing.
2601 * We no longer want the shrink_header flag since its work is done
2602 * and if it is left in place it will mess up scanning.
2603 */
2604
2605 struct yaffs_obj_hdr *oh;
2606 oh = (struct yaffs_obj_hdr *)
2607 buffer;
2608
2609 oh->is_shrink = 0;
2610 tags.extra_is_shrink = 0;
2611
2612 oh->shadows_obj = 0;
2613 oh->inband_shadowed_obj_id = 0;
2614 tags.extra_shadows = 0;
2615
2616 /* Update file size */
2617 if (object->variant_type ==
2618 YAFFS_OBJECT_TYPE_FILE) {
2619 oh->file_size =
2620 object->variant.
2621 file_variant.
2622 file_size;
2623 tags.extra_length =
2624 oh->file_size;
2625 }
2626
2627 yaffs_verify_oh(object, oh,
2628 &tags, 1);
2629 new_chunk =
2630 yaffs_write_new_chunk(dev,
2631 (u8 *)
2632 oh,
2633 &tags,
2634 1);
2635 } else {
2636 new_chunk =
2637 yaffs_write_new_chunk(dev,
2638 buffer,
2639 &tags,
2640 1);
2641 }
2642
2643 if (new_chunk < 0) {
2644 ret_val = YAFFS_FAIL;
2645 } else {
2646
2647 /* Ok, now fix up the Tnodes etc. */
2648
2649 if (tags.chunk_id == 0) {
2650 /* It's a header */
2651 object->hdr_chunk =
2652 new_chunk;
2653 object->serial =
2654 tags.serial_number;
2655 } else {
2656 /* It's a data chunk */
2657 int ok;
2658 ok = yaffs_put_chunk_in_file(object, tags.chunk_id, new_chunk, 0);
2659 }
2660 }
2661 }
2662
2663 if (ret_val == YAFFS_OK)
2664 yaffs_chunk_del(dev, old_chunk,
2665 mark_flash, __LINE__);
2666
2667 }
2668 }
2669
2670 yaffs_release_temp_buffer(dev, buffer, __LINE__);
2671
2672 }
2673
2674 yaffs_verify_collected_blk(dev, bi, block);
2675
2676 if (bi->block_state == YAFFS_BLOCK_STATE_COLLECTING) {
2677 /*
2678 * The gc did not complete. Set block state back to FULL
2679 * because checkpointing does not restore gc.
2680 */
2681 bi->block_state = YAFFS_BLOCK_STATE_FULL;
2682 } else {
2683 /* The gc completed. */
2684 /* Do any required cleanups */
2685 for (i = 0; i < dev->n_clean_ups; i++) {
2686 /* Time to delete the file too */
2687 object =
2688 yaffs_find_by_number(dev, dev->gc_cleanup_list[i]);
2689 if (object) {
2690 yaffs_free_tnode(dev,
2691 object->variant.
2692 file_variant.top);
2693 object->variant.file_variant.top = NULL;
2694 yaffs_trace(YAFFS_TRACE_GC,
2695 "yaffs: About to finally delete object %d",
2696 object->obj_id);
2697 yaffs_generic_obj_del(object);
2698 object->my_dev->n_deleted_files--;
2699 }
2700
2701 }
2702
2703 chunks_after = yaffs_get_erased_chunks(dev);
2704 if (chunks_before >= chunks_after)
2705 yaffs_trace(YAFFS_TRACE_GC,
2706 "gc did not increase free chunks before %d after %d",
2707 chunks_before, chunks_after);
2708 dev->gc_block = 0;
2709 dev->gc_chunk = 0;
2710 dev->n_clean_ups = 0;
2711 }
2712
2713 dev->gc_disable = 0;
2714
2715 return ret_val;
2716}
2717
2718/*
2719 * FindBlockForgarbageCollection is used to select the dirtiest block (or close enough)
2720 * for garbage collection.
2721 */
2722
2723static unsigned yaffs_find_gc_block(struct yaffs_dev *dev,
2724 int aggressive, int background)
2725{
2726 int i;
2727 int iterations;
2728 unsigned selected = 0;
2729 int prioritised = 0;
2730 int prioritised_exist = 0;
2731 struct yaffs_block_info *bi;
2732 int threshold;
2733
2734 /* First let's see if we need to grab a prioritised block */
2735 if (dev->has_pending_prioritised_gc && !aggressive) {
2736 dev->gc_dirtiest = 0;
2737 bi = dev->block_info;
2738 for (i = dev->internal_start_block;
2739 i <= dev->internal_end_block && !selected; i++) {
2740
2741 if (bi->gc_prioritise) {
2742 prioritised_exist = 1;
2743 if (bi->block_state == YAFFS_BLOCK_STATE_FULL &&
2744 yaffs_block_ok_for_gc(dev, bi)) {
2745 selected = i;
2746 prioritised = 1;
2747 }
2748 }
2749 bi++;
2750 }
2751
2752 /*
2753 * If there is a prioritised block and none was selected then
2754 * this happened because there is at least one old dirty block gumming
2755 * up the works. Let's gc the oldest dirty block.
2756 */
2757
2758 if (prioritised_exist &&
2759 !selected && dev->oldest_dirty_block > 0)
2760 selected = dev->oldest_dirty_block;
2761
2762 if (!prioritised_exist) /* None found, so we can clear this */
2763 dev->has_pending_prioritised_gc = 0;
2764 }
2765
2766 /* If we're doing aggressive GC then we are happy to take a less-dirty block, and
2767 * search harder.
2768 * else (we're doing a leasurely gc), then we only bother to do this if the
2769 * block has only a few pages in use.
2770 */
2771
2772 if (!selected) {
2773 int pages_used;
2774 int n_blocks =
2775 dev->internal_end_block - dev->internal_start_block + 1;
2776 if (aggressive) {
2777 threshold = dev->param.chunks_per_block;
2778 iterations = n_blocks;
2779 } else {
2780 int max_threshold;
2781
2782 if (background)
2783 max_threshold = dev->param.chunks_per_block / 2;
2784 else
2785 max_threshold = dev->param.chunks_per_block / 8;
2786
2787 if (max_threshold < YAFFS_GC_PASSIVE_THRESHOLD)
2788 max_threshold = YAFFS_GC_PASSIVE_THRESHOLD;
2789
2790 threshold = background ? (dev->gc_not_done + 2) * 2 : 0;
2791 if (threshold < YAFFS_GC_PASSIVE_THRESHOLD)
2792 threshold = YAFFS_GC_PASSIVE_THRESHOLD;
2793 if (threshold > max_threshold)
2794 threshold = max_threshold;
2795
2796 iterations = n_blocks / 16 + 1;
2797 if (iterations > 100)
2798 iterations = 100;
2799 }
2800
2801 for (i = 0;
2802 i < iterations &&
2803 (dev->gc_dirtiest < 1 ||
2804 dev->gc_pages_in_use > YAFFS_GC_GOOD_ENOUGH); i++) {
2805 dev->gc_block_finder++;
2806 if (dev->gc_block_finder < dev->internal_start_block ||
2807 dev->gc_block_finder > dev->internal_end_block)
2808 dev->gc_block_finder =
2809 dev->internal_start_block;
2810
2811 bi = yaffs_get_block_info(dev, dev->gc_block_finder);
2812
2813 pages_used = bi->pages_in_use - bi->soft_del_pages;
2814
2815 if (bi->block_state == YAFFS_BLOCK_STATE_FULL &&
2816 pages_used < dev->param.chunks_per_block &&
2817 (dev->gc_dirtiest < 1
2818 || pages_used < dev->gc_pages_in_use)
2819 && yaffs_block_ok_for_gc(dev, bi)) {
2820 dev->gc_dirtiest = dev->gc_block_finder;
2821 dev->gc_pages_in_use = pages_used;
2822 }
2823 }
2824
2825 if (dev->gc_dirtiest > 0 && dev->gc_pages_in_use <= threshold)
2826 selected = dev->gc_dirtiest;
2827 }
2828
2829 /*
2830 * If nothing has been selected for a while, try selecting the oldest dirty
2831 * because that's gumming up the works.
2832 */
2833
2834 if (!selected && dev->param.is_yaffs2 &&
2835 dev->gc_not_done >= (background ? 10 : 20)) {
2836 yaffs2_find_oldest_dirty_seq(dev);
2837 if (dev->oldest_dirty_block > 0) {
2838 selected = dev->oldest_dirty_block;
2839 dev->gc_dirtiest = selected;
2840 dev->oldest_dirty_gc_count++;
2841 bi = yaffs_get_block_info(dev, selected);
2842 dev->gc_pages_in_use =
2843 bi->pages_in_use - bi->soft_del_pages;
2844 } else {
2845 dev->gc_not_done = 0;
2846 }
2847 }
2848
2849 if (selected) {
2850 yaffs_trace(YAFFS_TRACE_GC,
2851 "GC Selected block %d with %d free, prioritised:%d",
2852 selected,
2853 dev->param.chunks_per_block - dev->gc_pages_in_use,
2854 prioritised);
2855
2856 dev->n_gc_blocks++;
2857 if (background)
2858 dev->bg_gcs++;
2859
2860 dev->gc_dirtiest = 0;
2861 dev->gc_pages_in_use = 0;
2862 dev->gc_not_done = 0;
2863 if (dev->refresh_skip > 0)
2864 dev->refresh_skip--;
2865 } else {
2866 dev->gc_not_done++;
2867 yaffs_trace(YAFFS_TRACE_GC,
2868 "GC none: finder %d skip %d threshold %d dirtiest %d using %d oldest %d%s",
2869 dev->gc_block_finder, dev->gc_not_done, threshold,
2870 dev->gc_dirtiest, dev->gc_pages_in_use,
2871 dev->oldest_dirty_block, background ? " bg" : "");
2872 }
2873
2874 return selected;
2875}
2876
2877/* New garbage collector
2878 * If we're very low on erased blocks then we do aggressive garbage collection
2879 * otherwise we do "leasurely" garbage collection.
2880 * Aggressive gc looks further (whole array) and will accept less dirty blocks.
2881 * Passive gc only inspects smaller areas and will only accept more dirty blocks.
2882 *
2883 * The idea is to help clear out space in a more spread-out manner.
2884 * Dunno if it really does anything useful.
2885 */
2886static int yaffs_check_gc(struct yaffs_dev *dev, int background)
2887{
2888 int aggressive = 0;
2889 int gc_ok = YAFFS_OK;
2890 int max_tries = 0;
2891 int min_erased;
2892 int erased_chunks;
2893 int checkpt_block_adjust;
2894
2895 if (dev->param.gc_control && (dev->param.gc_control(dev) & 1) == 0)
2896 return YAFFS_OK;
2897
2898 if (dev->gc_disable) {
2899 /* Bail out so we don't get recursive gc */
2900 return YAFFS_OK;
2901 }
2902
2903 /* This loop should pass the first time.
2904 * We'll only see looping here if the collection does not increase space.
2905 */
2906
2907 do {
2908 max_tries++;
2909
2910 checkpt_block_adjust = yaffs_calc_checkpt_blocks_required(dev);
2911
2912 min_erased =
2913 dev->param.n_reserved_blocks + checkpt_block_adjust + 1;
2914 erased_chunks =
2915 dev->n_erased_blocks * dev->param.chunks_per_block;
2916
2917 /* If we need a block soon then do aggressive gc. */
2918 if (dev->n_erased_blocks < min_erased)
2919 aggressive = 1;
2920 else {
2921 if (!background
2922 && erased_chunks > (dev->n_free_chunks / 4))
2923 break;
2924
2925 if (dev->gc_skip > 20)
2926 dev->gc_skip = 20;
2927 if (erased_chunks < dev->n_free_chunks / 2 ||
2928 dev->gc_skip < 1 || background)
2929 aggressive = 0;
2930 else {
2931 dev->gc_skip--;
2932 break;
2933 }
2934 }
2935
2936 dev->gc_skip = 5;
2937
2938 /* If we don't already have a block being gc'd then see if we should start another */
2939
2940 if (dev->gc_block < 1 && !aggressive) {
2941 dev->gc_block = yaffs2_find_refresh_block(dev);
2942 dev->gc_chunk = 0;
2943 dev->n_clean_ups = 0;
2944 }
2945 if (dev->gc_block < 1) {
2946 dev->gc_block =
2947 yaffs_find_gc_block(dev, aggressive, background);
2948 dev->gc_chunk = 0;
2949 dev->n_clean_ups = 0;
2950 }
2951
2952 if (dev->gc_block > 0) {
2953 dev->all_gcs++;
2954 if (!aggressive)
2955 dev->passive_gc_count++;
2956
2957 yaffs_trace(YAFFS_TRACE_GC,
2958 "yaffs: GC n_erased_blocks %d aggressive %d",
2959 dev->n_erased_blocks, aggressive);
2960
2961 gc_ok = yaffs_gc_block(dev, dev->gc_block, aggressive);
2962 }
2963
2964 if (dev->n_erased_blocks < (dev->param.n_reserved_blocks)
2965 && dev->gc_block > 0) {
2966 yaffs_trace(YAFFS_TRACE_GC,
2967 "yaffs: GC !!!no reclaim!!! n_erased_blocks %d after try %d block %d",
2968 dev->n_erased_blocks, max_tries,
2969 dev->gc_block);
2970 }
2971 } while ((dev->n_erased_blocks < dev->param.n_reserved_blocks) &&
2972 (dev->gc_block > 0) && (max_tries < 2));
2973
2974 return aggressive ? gc_ok : YAFFS_OK;
2975}
2976
2977/*
2978 * yaffs_bg_gc()
2979 * Garbage collects. Intended to be called from a background thread.
2980 * Returns non-zero if at least half the free chunks are erased.
2981 */
2982int yaffs_bg_gc(struct yaffs_dev *dev, unsigned urgency)
2983{
2984 int erased_chunks = dev->n_erased_blocks * dev->param.chunks_per_block;
2985
2986 yaffs_trace(YAFFS_TRACE_BACKGROUND, "Background gc %u", urgency);
2987
2988 yaffs_check_gc(dev, 1);
2989 return erased_chunks > dev->n_free_chunks / 2;
2990}
2991
2992/*-------------------- Data file manipulation -----------------*/
2993
2994static int yaffs_rd_data_obj(struct yaffs_obj *in, int inode_chunk, u8 * buffer)
2995{
2996 int nand_chunk = yaffs_find_chunk_in_file(in, inode_chunk, NULL);
2997
2998 if (nand_chunk >= 0)
2999 return yaffs_rd_chunk_tags_nand(in->my_dev, nand_chunk,
3000 buffer, NULL);
3001 else {
3002 yaffs_trace(YAFFS_TRACE_NANDACCESS,
3003 "Chunk %d not found zero instead",
3004 nand_chunk);
3005 /* get sane (zero) data if you read a hole */
3006 memset(buffer, 0, in->my_dev->data_bytes_per_chunk);
3007 return 0;
3008 }
3009
3010}
3011
3012void yaffs_chunk_del(struct yaffs_dev *dev, int chunk_id, int mark_flash,
3013 int lyn)
3014{
3015 int block;
3016 int page;
3017 struct yaffs_ext_tags tags;
3018 struct yaffs_block_info *bi;
3019
3020 if (chunk_id <= 0)
3021 return;
3022
3023 dev->n_deletions++;
3024 block = chunk_id / dev->param.chunks_per_block;
3025 page = chunk_id % dev->param.chunks_per_block;
3026
3027 if (!yaffs_check_chunk_bit(dev, block, page))
3028 yaffs_trace(YAFFS_TRACE_VERIFY,
3029 "Deleting invalid chunk %d", chunk_id);
3030
3031 bi = yaffs_get_block_info(dev, block);
3032
3033 yaffs2_update_oldest_dirty_seq(dev, block, bi);
3034
3035 yaffs_trace(YAFFS_TRACE_DELETION,
3036 "line %d delete of chunk %d",
3037 lyn, chunk_id);
3038
3039 if (!dev->param.is_yaffs2 && mark_flash &&
3040 bi->block_state != YAFFS_BLOCK_STATE_COLLECTING) {
3041
3042 yaffs_init_tags(&tags);
3043
3044 tags.is_deleted = 1;
3045
3046 yaffs_wr_chunk_tags_nand(dev, chunk_id, NULL, &tags);
3047 yaffs_handle_chunk_update(dev, chunk_id, &tags);
3048 } else {
3049 dev->n_unmarked_deletions++;
3050 }
3051
3052 /* Pull out of the management area.
3053 * If the whole block became dirty, this will kick off an erasure.
3054 */
3055 if (bi->block_state == YAFFS_BLOCK_STATE_ALLOCATING ||
3056 bi->block_state == YAFFS_BLOCK_STATE_FULL ||
3057 bi->block_state == YAFFS_BLOCK_STATE_NEEDS_SCANNING ||
3058 bi->block_state == YAFFS_BLOCK_STATE_COLLECTING) {
3059 dev->n_free_chunks++;
3060
3061 yaffs_clear_chunk_bit(dev, block, page);
3062
3063 bi->pages_in_use--;
3064
3065 if (bi->pages_in_use == 0 &&
3066 !bi->has_shrink_hdr &&
3067 bi->block_state != YAFFS_BLOCK_STATE_ALLOCATING &&
3068 bi->block_state != YAFFS_BLOCK_STATE_NEEDS_SCANNING) {
3069 yaffs_block_became_dirty(dev, block);
3070 }
3071
3072 }
3073
3074}
3075
3076static int yaffs_wr_data_obj(struct yaffs_obj *in, int inode_chunk,
3077 const u8 * buffer, int n_bytes, int use_reserve)
3078{
3079 /* Find old chunk Need to do this to get serial number
3080 * Write new one and patch into tree.
3081 * Invalidate old tags.
3082 */
3083
3084 int prev_chunk_id;
3085 struct yaffs_ext_tags prev_tags;
3086
3087 int new_chunk_id;
3088 struct yaffs_ext_tags new_tags;
3089
3090 struct yaffs_dev *dev = in->my_dev;
3091
3092 yaffs_check_gc(dev, 0);
3093
3094 /* Get the previous chunk at this location in the file if it exists.
3095 * If it does not exist then put a zero into the tree. This creates
3096 * the tnode now, rather than later when it is harder to clean up.
3097 */
3098 prev_chunk_id = yaffs_find_chunk_in_file(in, inode_chunk, &prev_tags);
3099 if (prev_chunk_id < 1 &&
3100 !yaffs_put_chunk_in_file(in, inode_chunk, 0, 0))
3101 return 0;
3102
3103 /* Set up new tags */
3104 yaffs_init_tags(&new_tags);
3105
3106 new_tags.chunk_id = inode_chunk;
3107 new_tags.obj_id = in->obj_id;
3108 new_tags.serial_number =
3109 (prev_chunk_id > 0) ? prev_tags.serial_number + 1 : 1;
3110 new_tags.n_bytes = n_bytes;
3111
3112 if (n_bytes < 1 || n_bytes > dev->param.total_bytes_per_chunk) {
3113 yaffs_trace(YAFFS_TRACE_ERROR,
3114 "Writing %d bytes to chunk!!!!!!!!!",
3115 n_bytes);
3116 YBUG();
3117 }
3118
3119 new_chunk_id =
3120 yaffs_write_new_chunk(dev, buffer, &new_tags, use_reserve);
3121
3122 if (new_chunk_id > 0) {
3123 yaffs_put_chunk_in_file(in, inode_chunk, new_chunk_id, 0);
3124
3125 if (prev_chunk_id > 0)
3126 yaffs_chunk_del(dev, prev_chunk_id, 1, __LINE__);
3127
3128 yaffs_verify_file_sane(in);
3129 }
3130 return new_chunk_id;
3131
3132}
3133
3134
3135
3136static int yaffs_do_xattrib_mod(struct yaffs_obj *obj, int set,
3137 const YCHAR * name, const void *value, int size,
3138 int flags)
3139{
3140 struct yaffs_xattr_mod xmod;
3141
3142 int result;
3143
3144 xmod.set = set;
3145 xmod.name = name;
3146 xmod.data = value;
3147 xmod.size = size;
3148 xmod.flags = flags;
3149 xmod.result = -ENOSPC;
3150
3151 result = yaffs_update_oh(obj, NULL, 0, 0, 0, &xmod);
3152
3153 if (result > 0)
3154 return xmod.result;
3155 else
3156 return -ENOSPC;
3157}
3158
3159static int yaffs_apply_xattrib_mod(struct yaffs_obj *obj, char *buffer,
3160 struct yaffs_xattr_mod *xmod)
3161{
3162 int retval = 0;
3163 int x_offs = sizeof(struct yaffs_obj_hdr);
3164 struct yaffs_dev *dev = obj->my_dev;
3165 int x_size = dev->data_bytes_per_chunk - sizeof(struct yaffs_obj_hdr);
3166
3167 char *x_buffer = buffer + x_offs;
3168
3169 if (xmod->set)
3170 retval =
3171 nval_set(x_buffer, x_size, xmod->name, xmod->data,
3172 xmod->size, xmod->flags);
3173 else
3174 retval = nval_del(x_buffer, x_size, xmod->name);
3175
3176 obj->has_xattr = nval_hasvalues(x_buffer, x_size);
3177 obj->xattr_known = 1;
3178
3179 xmod->result = retval;
3180
3181 return retval;
3182}
3183
3184static int yaffs_do_xattrib_fetch(struct yaffs_obj *obj, const YCHAR * name,
3185 void *value, int size)
3186{
3187 char *buffer = NULL;
3188 int result;
3189 struct yaffs_ext_tags tags;
3190 struct yaffs_dev *dev = obj->my_dev;
3191 int x_offs = sizeof(struct yaffs_obj_hdr);
3192 int x_size = dev->data_bytes_per_chunk - sizeof(struct yaffs_obj_hdr);
3193
3194 char *x_buffer;
3195
3196 int retval = 0;
3197
3198 if (obj->hdr_chunk < 1)
3199 return -ENODATA;
3200
3201 /* If we know that the object has no xattribs then don't do all the
3202 * reading and parsing.
3203 */
3204 if (obj->xattr_known && !obj->has_xattr) {
3205 if (name)
3206 return -ENODATA;
3207 else
3208 return 0;
3209 }
3210
3211 buffer = (char *)yaffs_get_temp_buffer(dev, __LINE__);
3212 if (!buffer)
3213 return -ENOMEM;
3214
3215 result =
3216 yaffs_rd_chunk_tags_nand(dev, obj->hdr_chunk, (u8 *) buffer, &tags);
3217
3218 if (result != YAFFS_OK)
3219 retval = -ENOENT;
3220 else {
3221 x_buffer = buffer + x_offs;
3222
3223 if (!obj->xattr_known) {
3224 obj->has_xattr = nval_hasvalues(x_buffer, x_size);
3225 obj->xattr_known = 1;
3226 }
3227
3228 if (name)
3229 retval = nval_get(x_buffer, x_size, name, value, size);
3230 else
3231 retval = nval_list(x_buffer, x_size, value, size);
3232 }
3233 yaffs_release_temp_buffer(dev, (u8 *) buffer, __LINE__);
3234 return retval;
3235}
3236
3237int yaffs_set_xattrib(struct yaffs_obj *obj, const YCHAR * name,
3238 const void *value, int size, int flags)
3239{
3240 return yaffs_do_xattrib_mod(obj, 1, name, value, size, flags);
3241}
3242
3243int yaffs_remove_xattrib(struct yaffs_obj *obj, const YCHAR * name)
3244{
3245 return yaffs_do_xattrib_mod(obj, 0, name, NULL, 0, 0);
3246}
3247
3248int yaffs_get_xattrib(struct yaffs_obj *obj, const YCHAR * name, void *value,
3249 int size)
3250{
3251 return yaffs_do_xattrib_fetch(obj, name, value, size);
3252}
3253
3254int yaffs_list_xattrib(struct yaffs_obj *obj, char *buffer, int size)
3255{
3256 return yaffs_do_xattrib_fetch(obj, NULL, buffer, size);
3257}
3258
3259static void yaffs_check_obj_details_loaded(struct yaffs_obj *in)
3260{
3261 u8 *chunk_data;
3262 struct yaffs_obj_hdr *oh;
3263 struct yaffs_dev *dev;
3264 struct yaffs_ext_tags tags;
3265 int result;
3266 int alloc_failed = 0;
3267
3268 if (!in)
3269 return;
3270
3271 dev = in->my_dev;
3272
3273 if (in->lazy_loaded && in->hdr_chunk > 0) {
3274 in->lazy_loaded = 0;
3275 chunk_data = yaffs_get_temp_buffer(dev, __LINE__);
3276
3277 result =
3278 yaffs_rd_chunk_tags_nand(dev, in->hdr_chunk, chunk_data,
3279 &tags);
3280 oh = (struct yaffs_obj_hdr *)chunk_data;
3281
3282 in->yst_mode = oh->yst_mode;
3283 yaffs_load_attribs(in, oh);
3284 yaffs_set_obj_name_from_oh(in, oh);
3285
3286 if (in->variant_type == YAFFS_OBJECT_TYPE_SYMLINK) {
3287 in->variant.symlink_variant.alias =
3288 yaffs_clone_str(oh->alias);
3289 if (!in->variant.symlink_variant.alias)
3290 alloc_failed = 1; /* Not returned to caller */
3291 }
3292
3293 yaffs_release_temp_buffer(dev, chunk_data, __LINE__);
3294 }
3295}
3296
3297static void yaffs_load_name_from_oh(struct yaffs_dev *dev, YCHAR * name,
3298 const YCHAR * oh_name, int buff_size)
3299{
3300#ifdef CONFIG_YAFFS_AUTO_UNICODE
3301 if (dev->param.auto_unicode) {
3302 if (*oh_name) {
3303 /* It is an ASCII name, do an ASCII to
3304 * unicode conversion */
3305 const char *ascii_oh_name = (const char *)oh_name;
3306 int n = buff_size - 1;
3307 while (n > 0 && *ascii_oh_name) {
3308 *name = *ascii_oh_name;
3309 name++;
3310 ascii_oh_name++;
3311 n--;
3312 }
3313 } else {
3314 strncpy(name, oh_name + 1, buff_size - 1);
3315 }
3316 } else {
3317#else
3318 {
3319#endif
3320 strncpy(name, oh_name, buff_size - 1);
3321 }
3322}
3323
3324static void yaffs_load_oh_from_name(struct yaffs_dev *dev, YCHAR * oh_name,
3325 const YCHAR * name)
3326{
3327#ifdef CONFIG_YAFFS_AUTO_UNICODE
3328
3329 int is_ascii;
3330 YCHAR *w;
3331
3332 if (dev->param.auto_unicode) {
3333
3334 is_ascii = 1;
3335 w = name;
3336
3337 /* Figure out if the name will fit in ascii character set */
3338 while (is_ascii && *w) {
3339 if ((*w) & 0xff00)
3340 is_ascii = 0;
3341 w++;
3342 }
3343
3344 if (is_ascii) {
3345 /* It is an ASCII name, so do a unicode to ascii conversion */
3346 char *ascii_oh_name = (char *)oh_name;
3347 int n = YAFFS_MAX_NAME_LENGTH - 1;
3348 while (n > 0 && *name) {
3349 *ascii_oh_name = *name;
3350 name++;
3351 ascii_oh_name++;
3352 n--;
3353 }
3354 } else {
3355 /* It is a unicode name, so save starting at the second YCHAR */
3356 *oh_name = 0;
3357 strncpy(oh_name + 1, name,
3358 YAFFS_MAX_NAME_LENGTH - 2);
3359 }
3360 } else {
3361#else
3362 {
3363#endif
3364 strncpy(oh_name, name, YAFFS_MAX_NAME_LENGTH - 1);
3365 }
3366
3367}
3368
3369/* UpdateObjectHeader updates the header on NAND for an object.
3370 * If name is not NULL, then that new name is used.
3371 */
3372int yaffs_update_oh(struct yaffs_obj *in, const YCHAR * name, int force,
3373 int is_shrink, int shadows, struct yaffs_xattr_mod *xmod)
3374{
3375
3376 struct yaffs_block_info *bi;
3377
3378 struct yaffs_dev *dev = in->my_dev;
3379
3380 int prev_chunk_id;
3381 int ret_val = 0;
3382 int result = 0;
3383
3384 int new_chunk_id;
3385 struct yaffs_ext_tags new_tags;
3386 struct yaffs_ext_tags old_tags;
3387 const YCHAR *alias = NULL;
3388
3389 u8 *buffer = NULL;
3390 YCHAR old_name[YAFFS_MAX_NAME_LENGTH + 1];
3391
3392 struct yaffs_obj_hdr *oh = NULL;
3393
3394 strcpy(old_name, _Y("silly old name"));
3395
3396 if (!in->fake || in == dev->root_dir ||
3397 force || xmod) {
3398
3399 yaffs_check_gc(dev, 0);
3400 yaffs_check_obj_details_loaded(in);
3401
3402 buffer = yaffs_get_temp_buffer(in->my_dev, __LINE__);
3403 oh = (struct yaffs_obj_hdr *)buffer;
3404
3405 prev_chunk_id = in->hdr_chunk;
3406
3407 if (prev_chunk_id > 0) {
3408 result = yaffs_rd_chunk_tags_nand(dev, prev_chunk_id,
3409 buffer, &old_tags);
3410
3411 yaffs_verify_oh(in, oh, &old_tags, 0);
3412
3413 memcpy(old_name, oh->name, sizeof(oh->name));
3414 memset(buffer, 0xFF, sizeof(struct yaffs_obj_hdr));
3415 } else {
3416 memset(buffer, 0xFF, dev->data_bytes_per_chunk);
3417 }
3418
3419 oh->type = in->variant_type;
3420 oh->yst_mode = in->yst_mode;
3421 oh->shadows_obj = oh->inband_shadowed_obj_id = shadows;
3422
3423 yaffs_load_attribs_oh(oh, in);
3424
3425 if (in->parent)
3426 oh->parent_obj_id = in->parent->obj_id;
3427 else
3428 oh->parent_obj_id = 0;
3429
3430 if (name && *name) {
3431 memset(oh->name, 0, sizeof(oh->name));
3432 yaffs_load_oh_from_name(dev, oh->name, name);
3433 } else if (prev_chunk_id > 0) {
3434 memcpy(oh->name, old_name, sizeof(oh->name));
3435 } else {
3436 memset(oh->name, 0, sizeof(oh->name));
3437 }
3438
3439 oh->is_shrink = is_shrink;
3440
3441 switch (in->variant_type) {
3442 case YAFFS_OBJECT_TYPE_UNKNOWN:
3443 /* Should not happen */
3444 break;
3445 case YAFFS_OBJECT_TYPE_FILE:
3446 oh->file_size =
3447 (oh->parent_obj_id == YAFFS_OBJECTID_DELETED
3448 || oh->parent_obj_id ==
3449 YAFFS_OBJECTID_UNLINKED) ? 0 : in->
3450 variant.file_variant.file_size;
3451 break;
3452 case YAFFS_OBJECT_TYPE_HARDLINK:
3453 oh->equiv_id = in->variant.hardlink_variant.equiv_id;
3454 break;
3455 case YAFFS_OBJECT_TYPE_SPECIAL:
3456 /* Do nothing */
3457 break;
3458 case YAFFS_OBJECT_TYPE_DIRECTORY:
3459 /* Do nothing */
3460 break;
3461 case YAFFS_OBJECT_TYPE_SYMLINK:
3462 alias = in->variant.symlink_variant.alias;
3463 if (!alias)
3464 alias = _Y("no alias");
3465 strncpy(oh->alias, alias, YAFFS_MAX_ALIAS_LENGTH);
3466 oh->alias[YAFFS_MAX_ALIAS_LENGTH] = 0;
3467 break;
3468 }
3469
3470 /* process any xattrib modifications */
3471 if (xmod)
3472 yaffs_apply_xattrib_mod(in, (char *)buffer, xmod);
3473
3474 /* Tags */
3475 yaffs_init_tags(&new_tags);
3476 in->serial++;
3477 new_tags.chunk_id = 0;
3478 new_tags.obj_id = in->obj_id;
3479 new_tags.serial_number = in->serial;
3480
3481 /* Add extra info for file header */
3482
3483 new_tags.extra_available = 1;
3484 new_tags.extra_parent_id = oh->parent_obj_id;
3485 new_tags.extra_length = oh->file_size;
3486 new_tags.extra_is_shrink = oh->is_shrink;
3487 new_tags.extra_equiv_id = oh->equiv_id;
3488 new_tags.extra_shadows = (oh->shadows_obj > 0) ? 1 : 0;
3489 new_tags.extra_obj_type = in->variant_type;
3490
3491 yaffs_verify_oh(in, oh, &new_tags, 1);
3492
3493 /* Create new chunk in NAND */
3494 new_chunk_id =
3495 yaffs_write_new_chunk(dev, buffer, &new_tags,
3496 (prev_chunk_id > 0) ? 1 : 0);
3497
3498 if (new_chunk_id >= 0) {
3499
3500 in->hdr_chunk = new_chunk_id;
3501
3502 if (prev_chunk_id > 0) {
3503 yaffs_chunk_del(dev, prev_chunk_id, 1,
3504 __LINE__);
3505 }
3506
3507 if (!yaffs_obj_cache_dirty(in))
3508 in->dirty = 0;
3509
3510 /* If this was a shrink, then mark the block that the chunk lives on */
3511 if (is_shrink) {
3512 bi = yaffs_get_block_info(in->my_dev,
3513 new_chunk_id /
3514 in->my_dev->param.
3515 chunks_per_block);
3516 bi->has_shrink_hdr = 1;
3517 }
3518
3519 }
3520
3521 ret_val = new_chunk_id;
3522
3523 }
3524
3525 if (buffer)
3526 yaffs_release_temp_buffer(dev, buffer, __LINE__);
3527
3528 return ret_val;
3529}
3530
3531/*--------------------- File read/write ------------------------
3532 * Read and write have very similar structures.
3533 * In general the read/write has three parts to it
3534 * An incomplete chunk to start with (if the read/write is not chunk-aligned)
3535 * Some complete chunks
3536 * An incomplete chunk to end off with
3537 *
3538 * Curve-balls: the first chunk might also be the last chunk.
3539 */
3540
3541int yaffs_file_rd(struct yaffs_obj *in, u8 * buffer, loff_t offset, int n_bytes)
3542{
3543
3544 int chunk;
3545 u32 start;
3546 int n_copy;
3547 int n = n_bytes;
3548 int n_done = 0;
3549 struct yaffs_cache *cache;
3550
3551 struct yaffs_dev *dev;
3552
3553 dev = in->my_dev;
3554
3555 while (n > 0) {
3556 /* chunk = offset / dev->data_bytes_per_chunk + 1; */
3557 /* start = offset % dev->data_bytes_per_chunk; */
3558 yaffs_addr_to_chunk(dev, offset, &chunk, &start);
3559 chunk++;
3560
3561 /* OK now check for the curveball where the start and end are in
3562 * the same chunk.
3563 */
3564 if ((start + n) < dev->data_bytes_per_chunk)
3565 n_copy = n;
3566 else
3567 n_copy = dev->data_bytes_per_chunk - start;
3568
3569 cache = yaffs_find_chunk_cache(in, chunk);
3570
3571 /* If the chunk is already in the cache or it is less than a whole chunk
3572 * or we're using inband tags then use the cache (if there is caching)
3573 * else bypass the cache.
3574 */
3575 if (cache || n_copy != dev->data_bytes_per_chunk
3576 || dev->param.inband_tags) {
3577 if (dev->param.n_caches > 0) {
3578
3579 /* If we can't find the data in the cache, then load it up. */
3580
3581 if (!cache) {
3582 cache =
3583 yaffs_grab_chunk_cache(in->my_dev);
3584 cache->object = in;
3585 cache->chunk_id = chunk;
3586 cache->dirty = 0;
3587 cache->locked = 0;
3588 yaffs_rd_data_obj(in, chunk,
3589 cache->data);
3590 cache->n_bytes = 0;
3591 }
3592
3593 yaffs_use_cache(dev, cache, 0);
3594
3595 cache->locked = 1;
3596
3597 memcpy(buffer, &cache->data[start], n_copy);
3598
3599 cache->locked = 0;
3600 } else {
3601 /* Read into the local buffer then copy.. */
3602
3603 u8 *local_buffer =
3604 yaffs_get_temp_buffer(dev, __LINE__);
3605 yaffs_rd_data_obj(in, chunk, local_buffer);
3606
3607 memcpy(buffer, &local_buffer[start], n_copy);
3608
3609 yaffs_release_temp_buffer(dev, local_buffer,
3610 __LINE__);
3611 }
3612
3613 } else {
3614
3615 /* A full chunk. Read directly into the supplied buffer. */
3616 yaffs_rd_data_obj(in, chunk, buffer);
3617
3618 }
3619
3620 n -= n_copy;
3621 offset += n_copy;
3622 buffer += n_copy;
3623 n_done += n_copy;
3624
3625 }
3626
3627 return n_done;
3628}
3629
3630int yaffs_do_file_wr(struct yaffs_obj *in, const u8 * buffer, loff_t offset,
3631 int n_bytes, int write_trhrough)
3632{
3633
3634 int chunk;
3635 u32 start;
3636 int n_copy;
3637 int n = n_bytes;
3638 int n_done = 0;
3639 int n_writeback;
3640 int start_write = offset;
3641 int chunk_written = 0;
3642 u32 n_bytes_read;
3643 u32 chunk_start;
3644
3645 struct yaffs_dev *dev;
3646
3647 dev = in->my_dev;
3648
3649 while (n > 0 && chunk_written >= 0) {
3650 yaffs_addr_to_chunk(dev, offset, &chunk, &start);
3651
3652 if (chunk * dev->data_bytes_per_chunk + start != offset ||
3653 start >= dev->data_bytes_per_chunk) {
3654 yaffs_trace(YAFFS_TRACE_ERROR,
3655 "AddrToChunk of offset %d gives chunk %d start %d",
3656 (int)offset, chunk, start);
3657 }
3658 chunk++; /* File pos to chunk in file offset */
3659
3660 /* OK now check for the curveball where the start and end are in
3661 * the same chunk.
3662 */
3663
3664 if ((start + n) < dev->data_bytes_per_chunk) {
3665 n_copy = n;
3666
3667 /* Now folks, to calculate how many bytes to write back....
3668 * If we're overwriting and not writing to then end of file then
3669 * we need to write back as much as was there before.
3670 */
3671
3672 chunk_start = ((chunk - 1) * dev->data_bytes_per_chunk);
3673
3674 if (chunk_start > in->variant.file_variant.file_size)
3675 n_bytes_read = 0; /* Past end of file */
3676 else
3677 n_bytes_read =
3678 in->variant.file_variant.file_size -
3679 chunk_start;
3680
3681 if (n_bytes_read > dev->data_bytes_per_chunk)
3682 n_bytes_read = dev->data_bytes_per_chunk;
3683
3684 n_writeback =
3685 (n_bytes_read >
3686 (start + n)) ? n_bytes_read : (start + n);
3687
3688 if (n_writeback < 0
3689 || n_writeback > dev->data_bytes_per_chunk)
3690 YBUG();
3691
3692 } else {
3693 n_copy = dev->data_bytes_per_chunk - start;
3694 n_writeback = dev->data_bytes_per_chunk;
3695 }
3696
3697 if (n_copy != dev->data_bytes_per_chunk
3698 || dev->param.inband_tags) {
3699 /* An incomplete start or end chunk (or maybe both start and end chunk),
3700 * or we're using inband tags, so we want to use the cache buffers.
3701 */
3702 if (dev->param.n_caches > 0) {
3703 struct yaffs_cache *cache;
3704 /* If we can't find the data in the cache, then load the cache */
3705 cache = yaffs_find_chunk_cache(in, chunk);
3706
3707 if (!cache
3708 && yaffs_check_alloc_available(dev, 1)) {
3709 cache = yaffs_grab_chunk_cache(dev);
3710 cache->object = in;
3711 cache->chunk_id = chunk;
3712 cache->dirty = 0;
3713 cache->locked = 0;
3714 yaffs_rd_data_obj(in, chunk,
3715 cache->data);
3716 } else if (cache &&
3717 !cache->dirty &&
3718 !yaffs_check_alloc_available(dev,
3719 1)) {
3720 /* Drop the cache if it was a read cache item and
3721 * no space check has been made for it.
3722 */
3723 cache = NULL;
3724 }
3725
3726 if (cache) {
3727 yaffs_use_cache(dev, cache, 1);
3728 cache->locked = 1;
3729
3730 memcpy(&cache->data[start], buffer,
3731 n_copy);
3732
3733 cache->locked = 0;
3734 cache->n_bytes = n_writeback;
3735
3736 if (write_trhrough) {
3737 chunk_written =
3738 yaffs_wr_data_obj
3739 (cache->object,
3740 cache->chunk_id,
3741 cache->data,
3742 cache->n_bytes, 1);
3743 cache->dirty = 0;
3744 }
3745
3746 } else {
3747 chunk_written = -1; /* fail the write */
3748 }
3749 } else {
3750 /* An incomplete start or end chunk (or maybe both start and end chunk)
3751 * Read into the local buffer then copy, then copy over and write back.
3752 */
3753
3754 u8 *local_buffer =
3755 yaffs_get_temp_buffer(dev, __LINE__);
3756
3757 yaffs_rd_data_obj(in, chunk, local_buffer);
3758
3759 memcpy(&local_buffer[start], buffer, n_copy);
3760
3761 chunk_written =
3762 yaffs_wr_data_obj(in, chunk,
3763 local_buffer,
3764 n_writeback, 0);
3765
3766 yaffs_release_temp_buffer(dev, local_buffer,
3767 __LINE__);
3768
3769 }
3770
3771 } else {
3772 /* A full chunk. Write directly from the supplied buffer. */
3773
3774 chunk_written =
3775 yaffs_wr_data_obj(in, chunk, buffer,
3776 dev->data_bytes_per_chunk, 0);
3777
3778 /* Since we've overwritten the cached data, we better invalidate it. */
3779 yaffs_invalidate_chunk_cache(in, chunk);
3780 }
3781
3782 if (chunk_written >= 0) {
3783 n -= n_copy;
3784 offset += n_copy;
3785 buffer += n_copy;
3786 n_done += n_copy;
3787 }
3788
3789 }
3790
3791 /* Update file object */
3792
3793 if ((start_write + n_done) > in->variant.file_variant.file_size)
3794 in->variant.file_variant.file_size = (start_write + n_done);
3795
3796 in->dirty = 1;
3797
3798 return n_done;
3799}
3800
3801int yaffs_wr_file(struct yaffs_obj *in, const u8 * buffer, loff_t offset,
3802 int n_bytes, int write_trhrough)
3803{
3804 yaffs2_handle_hole(in, offset);
3805 return yaffs_do_file_wr(in, buffer, offset, n_bytes, write_trhrough);
3806}
3807
3808/* ---------------------- File resizing stuff ------------------ */
3809
3810static void yaffs_prune_chunks(struct yaffs_obj *in, int new_size)
3811{
3812
3813 struct yaffs_dev *dev = in->my_dev;
3814 int old_size = in->variant.file_variant.file_size;
3815
3816 int last_del = 1 + (old_size - 1) / dev->data_bytes_per_chunk;
3817
3818 int start_del = 1 + (new_size + dev->data_bytes_per_chunk - 1) /
3819 dev->data_bytes_per_chunk;
3820 int i;
3821 int chunk_id;
3822
3823 /* Delete backwards so that we don't end up with holes if
3824 * power is lost part-way through the operation.
3825 */
3826 for (i = last_del; i >= start_del; i--) {
3827 /* NB this could be optimised somewhat,
3828 * eg. could retrieve the tags and write them without
3829 * using yaffs_chunk_del
3830 */
3831
3832 chunk_id = yaffs_find_del_file_chunk(in, i, NULL);
3833 if (chunk_id > 0) {
3834 if (chunk_id <
3835 (dev->internal_start_block *
3836 dev->param.chunks_per_block)
3837 || chunk_id >=
3838 ((dev->internal_end_block +
3839 1) * dev->param.chunks_per_block)) {
3840 yaffs_trace(YAFFS_TRACE_ALWAYS,
3841 "Found daft chunk_id %d for %d",
3842 chunk_id, i);
3843 } else {
3844 in->n_data_chunks--;
3845 yaffs_chunk_del(dev, chunk_id, 1, __LINE__);
3846 }
3847 }
3848 }
3849
3850}
3851
3852void yaffs_resize_file_down(struct yaffs_obj *obj, loff_t new_size)
3853{
3854 int new_full;
3855 u32 new_partial;
3856 struct yaffs_dev *dev = obj->my_dev;
3857
3858 yaffs_addr_to_chunk(dev, new_size, &new_full, &new_partial);
3859
3860 yaffs_prune_chunks(obj, new_size);
3861
3862 if (new_partial != 0) {
3863 int last_chunk = 1 + new_full;
3864 u8 *local_buffer = yaffs_get_temp_buffer(dev, __LINE__);
3865
3866 /* Rewrite the last chunk with its new size and zero pad */
3867 yaffs_rd_data_obj(obj, last_chunk, local_buffer);
3868 memset(local_buffer + new_partial, 0,
3869 dev->data_bytes_per_chunk - new_partial);
3870
3871 yaffs_wr_data_obj(obj, last_chunk, local_buffer,
3872 new_partial, 1);
3873
3874 yaffs_release_temp_buffer(dev, local_buffer, __LINE__);
3875 }
3876
3877 obj->variant.file_variant.file_size = new_size;
3878
3879 yaffs_prune_tree(dev, &obj->variant.file_variant);
3880}
3881
3882int yaffs_resize_file(struct yaffs_obj *in, loff_t new_size)
3883{
3884 struct yaffs_dev *dev = in->my_dev;
3885 int old_size = in->variant.file_variant.file_size;
3886
3887 yaffs_flush_file_cache(in);
3888 yaffs_invalidate_whole_cache(in);
3889
3890 yaffs_check_gc(dev, 0);
3891
3892 if (in->variant_type != YAFFS_OBJECT_TYPE_FILE)
3893 return YAFFS_FAIL;
3894
3895 if (new_size == old_size)
3896 return YAFFS_OK;
3897
3898 if (new_size > old_size) {
3899 yaffs2_handle_hole(in, new_size);
3900 in->variant.file_variant.file_size = new_size;
3901 } else {
3902 /* new_size < old_size */
3903 yaffs_resize_file_down(in, new_size);
3904 }
3905
3906 /* Write a new object header to reflect the resize.
3907 * show we've shrunk the file, if need be
3908 * Do this only if the file is not in the deleted directories
3909 * and is not shadowed.
3910 */
3911 if (in->parent &&
3912 !in->is_shadowed &&
3913 in->parent->obj_id != YAFFS_OBJECTID_UNLINKED &&
3914 in->parent->obj_id != YAFFS_OBJECTID_DELETED)
3915 yaffs_update_oh(in, NULL, 0, 0, 0, NULL);
3916
3917 return YAFFS_OK;
3918}
3919
3920int yaffs_flush_file(struct yaffs_obj *in, int update_time, int data_sync)
3921{
3922 int ret_val;
3923 if (in->dirty) {
3924 yaffs_flush_file_cache(in);
3925 if (data_sync) /* Only sync data */
3926 ret_val = YAFFS_OK;
3927 else {
3928 if (update_time)
3929 yaffs_load_current_time(in, 0, 0);
3930
3931 ret_val = (yaffs_update_oh(in, NULL, 0, 0, 0, NULL) >=
3932 0) ? YAFFS_OK : YAFFS_FAIL;
3933 }
3934 } else {
3935 ret_val = YAFFS_OK;
3936 }
3937
3938 return ret_val;
3939
3940}
3941
3942
3943/* yaffs_del_file deletes the whole file data
3944 * and the inode associated with the file.
3945 * It does not delete the links associated with the file.
3946 */
3947static int yaffs_unlink_file_if_needed(struct yaffs_obj *in)
3948{
3949
3950 int ret_val;
3951 int del_now = 0;
3952 struct yaffs_dev *dev = in->my_dev;
3953
3954 if (!in->my_inode)
3955 del_now = 1;
3956
3957 if (del_now) {
3958 ret_val =
3959 yaffs_change_obj_name(in, in->my_dev->del_dir,
3960 _Y("deleted"), 0, 0);
3961 yaffs_trace(YAFFS_TRACE_TRACING,
3962 "yaffs: immediate deletion of file %d",
3963 in->obj_id);
3964 in->deleted = 1;
3965 in->my_dev->n_deleted_files++;
3966 if (dev->param.disable_soft_del || dev->param.is_yaffs2)
3967 yaffs_resize_file(in, 0);
3968 yaffs_soft_del_file(in);
3969 } else {
3970 ret_val =
3971 yaffs_change_obj_name(in, in->my_dev->unlinked_dir,
3972 _Y("unlinked"), 0, 0);
3973 }
3974
3975 return ret_val;
3976}
3977
3978int yaffs_del_file(struct yaffs_obj *in)
3979{
3980 int ret_val = YAFFS_OK;
3981 int deleted; /* Need to cache value on stack if in is freed */
3982 struct yaffs_dev *dev = in->my_dev;
3983
3984 if (dev->param.disable_soft_del || dev->param.is_yaffs2)
3985 yaffs_resize_file(in, 0);
3986
3987 if (in->n_data_chunks > 0) {
3988 /* Use soft deletion if there is data in the file.
3989 * That won't be the case if it has been resized to zero.
3990 */
3991 if (!in->unlinked)
3992 ret_val = yaffs_unlink_file_if_needed(in);
3993
3994 deleted = in->deleted;
3995
3996 if (ret_val == YAFFS_OK && in->unlinked && !in->deleted) {
3997 in->deleted = 1;
3998 deleted = 1;
3999 in->my_dev->n_deleted_files++;
4000 yaffs_soft_del_file(in);
4001 }
4002 return deleted ? YAFFS_OK : YAFFS_FAIL;
4003 } else {
4004 /* The file has no data chunks so we toss it immediately */
4005 yaffs_free_tnode(in->my_dev, in->variant.file_variant.top);
4006 in->variant.file_variant.top = NULL;
4007 yaffs_generic_obj_del(in);
4008
4009 return YAFFS_OK;
4010 }
4011}
4012
4013int yaffs_is_non_empty_dir(struct yaffs_obj *obj)
4014{
4015 return (obj &&
4016 obj->variant_type == YAFFS_OBJECT_TYPE_DIRECTORY) &&
4017 !(list_empty(&obj->variant.dir_variant.children));
4018}
4019
4020static int yaffs_del_dir(struct yaffs_obj *obj)
4021{
4022 /* First check that the directory is empty. */
4023 if (yaffs_is_non_empty_dir(obj))
4024 return YAFFS_FAIL;
4025
4026 return yaffs_generic_obj_del(obj);
4027}
4028
4029static int yaffs_del_symlink(struct yaffs_obj *in)
4030{
4031 if (in->variant.symlink_variant.alias)
4032 kfree(in->variant.symlink_variant.alias);
4033 in->variant.symlink_variant.alias = NULL;
4034
4035 return yaffs_generic_obj_del(in);
4036}
4037
4038static int yaffs_del_link(struct yaffs_obj *in)
4039{
4040 /* remove this hardlink from the list assocaited with the equivalent
4041 * object
4042 */
4043 list_del_init(&in->hard_links);
4044 return yaffs_generic_obj_del(in);
4045}
4046
4047int yaffs_del_obj(struct yaffs_obj *obj)
4048{
4049 int ret_val = -1;
4050 switch (obj->variant_type) {
4051 case YAFFS_OBJECT_TYPE_FILE:
4052 ret_val = yaffs_del_file(obj);
4053 break;
4054 case YAFFS_OBJECT_TYPE_DIRECTORY:
4055 if (!list_empty(&obj->variant.dir_variant.dirty)) {
4056 yaffs_trace(YAFFS_TRACE_BACKGROUND,
4057 "Remove object %d from dirty directories",
4058 obj->obj_id);
4059 list_del_init(&obj->variant.dir_variant.dirty);
4060 }
4061 return yaffs_del_dir(obj);
4062 break;
4063 case YAFFS_OBJECT_TYPE_SYMLINK:
4064 ret_val = yaffs_del_symlink(obj);
4065 break;
4066 case YAFFS_OBJECT_TYPE_HARDLINK:
4067 ret_val = yaffs_del_link(obj);
4068 break;
4069 case YAFFS_OBJECT_TYPE_SPECIAL:
4070 ret_val = yaffs_generic_obj_del(obj);
4071 break;
4072 case YAFFS_OBJECT_TYPE_UNKNOWN:
4073 ret_val = 0;
4074 break; /* should not happen. */
4075 }
4076
4077 return ret_val;
4078}
4079
4080static int yaffs_unlink_worker(struct yaffs_obj *obj)
4081{
4082
4083 int del_now = 0;
4084
4085 if (!obj->my_inode)
4086 del_now = 1;
4087
4088 if (obj)
4089 yaffs_update_parent(obj->parent);
4090
4091 if (obj->variant_type == YAFFS_OBJECT_TYPE_HARDLINK) {
4092 return yaffs_del_link(obj);
4093 } else if (!list_empty(&obj->hard_links)) {
4094 /* Curve ball: We're unlinking an object that has a hardlink.
4095 *
4096 * This problem arises because we are not strictly following
4097 * The Linux link/inode model.
4098 *
4099 * We can't really delete the object.
4100 * Instead, we do the following:
4101 * - Select a hardlink.
4102 * - Unhook it from the hard links
4103 * - Move it from its parent directory (so that the rename can work)
4104 * - Rename the object to the hardlink's name.
4105 * - Delete the hardlink
4106 */
4107
4108 struct yaffs_obj *hl;
4109 struct yaffs_obj *parent;
4110 int ret_val;
4111 YCHAR name[YAFFS_MAX_NAME_LENGTH + 1];
4112
4113 hl = list_entry(obj->hard_links.next, struct yaffs_obj,
4114 hard_links);
4115
4116 yaffs_get_obj_name(hl, name, YAFFS_MAX_NAME_LENGTH + 1);
4117 parent = hl->parent;
4118
4119 list_del_init(&hl->hard_links);
4120
4121 yaffs_add_obj_to_dir(obj->my_dev->unlinked_dir, hl);
4122
4123 ret_val = yaffs_change_obj_name(obj, parent, name, 0, 0);
4124
4125 if (ret_val == YAFFS_OK)
4126 ret_val = yaffs_generic_obj_del(hl);
4127
4128 return ret_val;
4129
4130 } else if (del_now) {
4131 switch (obj->variant_type) {
4132 case YAFFS_OBJECT_TYPE_FILE:
4133 return yaffs_del_file(obj);
4134 break;
4135 case YAFFS_OBJECT_TYPE_DIRECTORY:
4136 list_del_init(&obj->variant.dir_variant.dirty);
4137 return yaffs_del_dir(obj);
4138 break;
4139 case YAFFS_OBJECT_TYPE_SYMLINK:
4140 return yaffs_del_symlink(obj);
4141 break;
4142 case YAFFS_OBJECT_TYPE_SPECIAL:
4143 return yaffs_generic_obj_del(obj);
4144 break;
4145 case YAFFS_OBJECT_TYPE_HARDLINK:
4146 case YAFFS_OBJECT_TYPE_UNKNOWN:
4147 default:
4148 return YAFFS_FAIL;
4149 }
4150 } else if (yaffs_is_non_empty_dir(obj)) {
4151 return YAFFS_FAIL;
4152 } else {
4153 return yaffs_change_obj_name(obj, obj->my_dev->unlinked_dir,
4154 _Y("unlinked"), 0, 0);
4155 }
4156}
4157
4158static int yaffs_unlink_obj(struct yaffs_obj *obj)
4159{
4160
4161 if (obj && obj->unlink_allowed)
4162 return yaffs_unlink_worker(obj);
4163
4164 return YAFFS_FAIL;
4165
4166}
4167
4168int yaffs_unlinker(struct yaffs_obj *dir, const YCHAR * name)
4169{
4170 struct yaffs_obj *obj;
4171
4172 obj = yaffs_find_by_name(dir, name);
4173 return yaffs_unlink_obj(obj);
4174}
4175
4176/* Note:
4177 * If old_name is NULL then we take old_dir as the object to be renamed.
4178 */
4179int yaffs_rename_obj(struct yaffs_obj *old_dir, const YCHAR * old_name,
4180 struct yaffs_obj *new_dir, const YCHAR * new_name)
4181{
4182 struct yaffs_obj *obj = NULL;
4183 struct yaffs_obj *existing_target = NULL;
4184 int force = 0;
4185 int result;
4186 struct yaffs_dev *dev;
4187
4188 if (!old_dir || old_dir->variant_type != YAFFS_OBJECT_TYPE_DIRECTORY)
4189 YBUG();
4190 if (!new_dir || new_dir->variant_type != YAFFS_OBJECT_TYPE_DIRECTORY)
4191 YBUG();
4192
4193 dev = old_dir->my_dev;
4194
4195#ifdef CONFIG_YAFFS_CASE_INSENSITIVE
4196 /* Special case for case insemsitive systems.
4197 * While look-up is case insensitive, the name isn't.
4198 * Therefore we might want to change x.txt to X.txt
4199 */
4200 if (old_dir == new_dir &&
4201 old_name && new_name &&
4202 strcmp(old_name, new_name) == 0)
4203 force = 1;
4204#endif
4205
4206 if (strnlen(new_name, YAFFS_MAX_NAME_LENGTH + 1) >
4207 YAFFS_MAX_NAME_LENGTH)
4208 /* ENAMETOOLONG */
4209 return YAFFS_FAIL;
4210
4211 if(old_name)
4212 obj = yaffs_find_by_name(old_dir, old_name);
4213 else{
4214 obj = old_dir;
4215 old_dir = obj->parent;
4216 }
4217
4218
4219 if (obj && obj->rename_allowed) {
4220
4221 /* Now do the handling for an existing target, if there is one */
4222
4223 existing_target = yaffs_find_by_name(new_dir, new_name);
4224 if (yaffs_is_non_empty_dir(existing_target)){
4225 return YAFFS_FAIL; /* ENOTEMPTY */
4226 } else if (existing_target && existing_target != obj) {
4227 /* Nuke the target first, using shadowing,
4228 * but only if it isn't the same object.
4229 *
4230 * Note we must disable gc otherwise it can mess up the shadowing.
4231 *
4232 */
4233 dev->gc_disable = 1;
4234 yaffs_change_obj_name(obj, new_dir, new_name, force,
4235 existing_target->obj_id);
4236 existing_target->is_shadowed = 1;
4237 yaffs_unlink_obj(existing_target);
4238 dev->gc_disable = 0;
4239 }
4240
4241 result = yaffs_change_obj_name(obj, new_dir, new_name, 1, 0);
4242
4243 yaffs_update_parent(old_dir);
4244 if (new_dir != old_dir)
4245 yaffs_update_parent(new_dir);
4246
4247 return result;
4248 }
4249 return YAFFS_FAIL;
4250}
4251
4252/*----------------------- Initialisation Scanning ---------------------- */
4253
4254void yaffs_handle_shadowed_obj(struct yaffs_dev *dev, int obj_id,
4255 int backward_scanning)
4256{
4257 struct yaffs_obj *obj;
4258
4259 if (!backward_scanning) {
4260 /* Handle YAFFS1 forward scanning case
4261 * For YAFFS1 we always do the deletion
4262 */
4263
4264 } else {
4265 /* Handle YAFFS2 case (backward scanning)
4266 * If the shadowed object exists then ignore.
4267 */
4268 obj = yaffs_find_by_number(dev, obj_id);
4269 if (obj)
4270 return;
4271 }
4272
4273 /* Let's create it (if it does not exist) assuming it is a file so that it can do shrinking etc.
4274 * We put it in unlinked dir to be cleaned up after the scanning
4275 */
4276 obj =
4277 yaffs_find_or_create_by_number(dev, obj_id, YAFFS_OBJECT_TYPE_FILE);
4278 if (!obj)
4279 return;
4280 obj->is_shadowed = 1;
4281 yaffs_add_obj_to_dir(dev->unlinked_dir, obj);
4282 obj->variant.file_variant.shrink_size = 0;
4283 obj->valid = 1; /* So that we don't read any other info for this file */
4284
4285}
4286
4287void yaffs_link_fixup(struct yaffs_dev *dev, struct yaffs_obj *hard_list)
4288{
4289 struct yaffs_obj *hl;
4290 struct yaffs_obj *in;
4291
4292 while (hard_list) {
4293 hl = hard_list;
4294 hard_list = (struct yaffs_obj *)(hard_list->hard_links.next);
4295
4296 in = yaffs_find_by_number(dev,
4297 hl->variant.
4298 hardlink_variant.equiv_id);
4299
4300 if (in) {
4301 /* Add the hardlink pointers */
4302 hl->variant.hardlink_variant.equiv_obj = in;
4303 list_add(&hl->hard_links, &in->hard_links);
4304 } else {
4305 /* Todo Need to report/handle this better.
4306 * Got a problem... hardlink to a non-existant object
4307 */
4308 hl->variant.hardlink_variant.equiv_obj = NULL;
4309 INIT_LIST_HEAD(&hl->hard_links);
4310
4311 }
4312 }
4313}
4314
4315static void yaffs_strip_deleted_objs(struct yaffs_dev *dev)
4316{
4317 /*
4318 * Sort out state of unlinked and deleted objects after scanning.
4319 */
4320 struct list_head *i;
4321 struct list_head *n;
4322 struct yaffs_obj *l;
4323
4324 if (dev->read_only)
4325 return;
4326
4327 /* Soft delete all the unlinked files */
4328 list_for_each_safe(i, n,
4329 &dev->unlinked_dir->variant.dir_variant.children) {
4330 if (i) {
4331 l = list_entry(i, struct yaffs_obj, siblings);
4332 yaffs_del_obj(l);
4333 }
4334 }
4335
4336 list_for_each_safe(i, n, &dev->del_dir->variant.dir_variant.children) {
4337 if (i) {
4338 l = list_entry(i, struct yaffs_obj, siblings);
4339 yaffs_del_obj(l);
4340 }
4341 }
4342
4343}
4344
4345/*
4346 * This code iterates through all the objects making sure that they are rooted.
4347 * Any unrooted objects are re-rooted in lost+found.
4348 * An object needs to be in one of:
4349 * - Directly under deleted, unlinked
4350 * - Directly or indirectly under root.
4351 *
4352 * Note:
4353 * This code assumes that we don't ever change the current relationships between
4354 * directories:
4355 * root_dir->parent == unlinked_dir->parent == del_dir->parent == NULL
4356 * lost-n-found->parent == root_dir
4357 *
4358 * This fixes the problem where directories might have inadvertently been deleted
4359 * leaving the object "hanging" without being rooted in the directory tree.
4360 */
4361
4362static int yaffs_has_null_parent(struct yaffs_dev *dev, struct yaffs_obj *obj)
4363{
4364 return (obj == dev->del_dir ||
4365 obj == dev->unlinked_dir || obj == dev->root_dir);
4366}
4367
4368static void yaffs_fix_hanging_objs(struct yaffs_dev *dev)
4369{
4370 struct yaffs_obj *obj;
4371 struct yaffs_obj *parent;
4372 int i;
4373 struct list_head *lh;
4374 struct list_head *n;
4375 int depth_limit;
4376 int hanging;
4377
4378 if (dev->read_only)
4379 return;
4380
4381 /* Iterate through the objects in each hash entry,
4382 * looking at each object.
4383 * Make sure it is rooted.
4384 */
4385
4386 for (i = 0; i < YAFFS_NOBJECT_BUCKETS; i++) {
4387 list_for_each_safe(lh, n, &dev->obj_bucket[i].list) {
4388 if (lh) {
4389 obj =
4390 list_entry(lh, struct yaffs_obj, hash_link);
4391 parent = obj->parent;
4392
4393 if (yaffs_has_null_parent(dev, obj)) {
4394 /* These directories are not hanging */
4395 hanging = 0;
4396 } else if (!parent
4397 || parent->variant_type !=
4398 YAFFS_OBJECT_TYPE_DIRECTORY) {
4399 hanging = 1;
4400 } else if (yaffs_has_null_parent(dev, parent)) {
4401 hanging = 0;
4402 } else {
4403 /*
4404 * Need to follow the parent chain to see if it is hanging.
4405 */
4406 hanging = 0;
4407 depth_limit = 100;
4408
4409 while (parent != dev->root_dir &&
4410 parent->parent &&
4411 parent->parent->variant_type ==
4412 YAFFS_OBJECT_TYPE_DIRECTORY
4413 && depth_limit > 0) {
4414 parent = parent->parent;
4415 depth_limit--;
4416 }
4417 if (parent != dev->root_dir)
4418 hanging = 1;
4419 }
4420 if (hanging) {
4421 yaffs_trace(YAFFS_TRACE_SCAN,
4422 "Hanging object %d moved to lost and found",
4423 obj->obj_id);
4424 yaffs_add_obj_to_dir(dev->lost_n_found,
4425 obj);
4426 }
4427 }
4428 }
4429 }
4430}
4431
4432/*
4433 * Delete directory contents for cleaning up lost and found.
4434 */
4435static void yaffs_del_dir_contents(struct yaffs_obj *dir)
4436{
4437 struct yaffs_obj *obj;
4438 struct list_head *lh;
4439 struct list_head *n;
4440
4441 if (dir->variant_type != YAFFS_OBJECT_TYPE_DIRECTORY)
4442 YBUG();
4443
4444 list_for_each_safe(lh, n, &dir->variant.dir_variant.children) {
4445 if (lh) {
4446 obj = list_entry(lh, struct yaffs_obj, siblings);
4447 if (obj->variant_type == YAFFS_OBJECT_TYPE_DIRECTORY)
4448 yaffs_del_dir_contents(obj);
4449
4450 yaffs_trace(YAFFS_TRACE_SCAN,
4451 "Deleting lost_found object %d",
4452 obj->obj_id);
4453
4454 /* Need to use UnlinkObject since Delete would not handle
4455 * hardlinked objects correctly.
4456 */
4457 yaffs_unlink_obj(obj);
4458 }
4459 }
4460
4461}
4462
4463static void yaffs_empty_l_n_f(struct yaffs_dev *dev)
4464{
4465 yaffs_del_dir_contents(dev->lost_n_found);
4466}
4467
4468
4469struct yaffs_obj *yaffs_find_by_name(struct yaffs_obj *directory,
4470 const YCHAR * name)
4471{
4472 int sum;
4473
4474 struct list_head *i;
4475 YCHAR buffer[YAFFS_MAX_NAME_LENGTH + 1];
4476
4477 struct yaffs_obj *l;
4478
4479 if (!name)
4480 return NULL;
4481
4482 if (!directory) {
4483 yaffs_trace(YAFFS_TRACE_ALWAYS,
4484 "tragedy: yaffs_find_by_name: null pointer directory"
4485 );
4486 YBUG();
4487 return NULL;
4488 }
4489 if (directory->variant_type != YAFFS_OBJECT_TYPE_DIRECTORY) {
4490 yaffs_trace(YAFFS_TRACE_ALWAYS,
4491 "tragedy: yaffs_find_by_name: non-directory"
4492 );
4493 YBUG();
4494 }
4495
4496 sum = yaffs_calc_name_sum(name);
4497
4498 list_for_each(i, &directory->variant.dir_variant.children) {
4499 if (i) {
4500 l = list_entry(i, struct yaffs_obj, siblings);
4501
4502 if (l->parent != directory)
4503 YBUG();
4504
4505 yaffs_check_obj_details_loaded(l);
4506
4507 /* Special case for lost-n-found */
4508 if (l->obj_id == YAFFS_OBJECTID_LOSTNFOUND) {
4509 if (!strcmp(name, YAFFS_LOSTNFOUND_NAME))
4510 return l;
4511 } else if (l->sum == sum
4512 || l->hdr_chunk <= 0) {
4513 /* LostnFound chunk called Objxxx
4514 * Do a real check
4515 */
4516 yaffs_get_obj_name(l, buffer,
4517 YAFFS_MAX_NAME_LENGTH + 1);
4518 if (strncmp
4519 (name, buffer, YAFFS_MAX_NAME_LENGTH) == 0)
4520 return l;
4521 }
4522 }
4523 }
4524
4525 return NULL;
4526}
4527
4528/* GetEquivalentObject dereferences any hard links to get to the
4529 * actual object.
4530 */
4531
4532struct yaffs_obj *yaffs_get_equivalent_obj(struct yaffs_obj *obj)
4533{
4534 if (obj && obj->variant_type == YAFFS_OBJECT_TYPE_HARDLINK) {
4535 /* We want the object id of the equivalent object, not this one */
4536 obj = obj->variant.hardlink_variant.equiv_obj;
4537 yaffs_check_obj_details_loaded(obj);
4538 }
4539 return obj;
4540}
4541
4542/*
4543 * A note or two on object names.
4544 * * If the object name is missing, we then make one up in the form objnnn
4545 *
4546 * * ASCII names are stored in the object header's name field from byte zero
4547 * * Unicode names are historically stored starting from byte zero.
4548 *
4549 * Then there are automatic Unicode names...
4550 * The purpose of these is to save names in a way that can be read as
4551 * ASCII or Unicode names as appropriate, thus allowing a Unicode and ASCII
4552 * system to share files.
4553 *
4554 * These automatic unicode are stored slightly differently...
4555 * - If the name can fit in the ASCII character space then they are saved as
4556 * ascii names as per above.
4557 * - If the name needs Unicode then the name is saved in Unicode
4558 * starting at oh->name[1].
4559
4560 */
4561static void yaffs_fix_null_name(struct yaffs_obj *obj, YCHAR * name,
4562 int buffer_size)
4563{
4564 /* Create an object name if we could not find one. */
4565 if (strnlen(name, YAFFS_MAX_NAME_LENGTH) == 0) {
4566 YCHAR local_name[20];
4567 YCHAR num_string[20];
4568 YCHAR *x = &num_string[19];
4569 unsigned v = obj->obj_id;
4570 num_string[19] = 0;
4571 while (v > 0) {
4572 x--;
4573 *x = '0' + (v % 10);
4574 v /= 10;
4575 }
4576 /* make up a name */
4577 strcpy(local_name, YAFFS_LOSTNFOUND_PREFIX);
4578 strcat(local_name, x);
4579 strncpy(name, local_name, buffer_size - 1);
4580 }
4581}
4582
4583int yaffs_get_obj_name(struct yaffs_obj *obj, YCHAR * name, int buffer_size)
4584{
4585 memset(name, 0, buffer_size * sizeof(YCHAR));
4586
4587 yaffs_check_obj_details_loaded(obj);
4588
4589 if (obj->obj_id == YAFFS_OBJECTID_LOSTNFOUND) {
4590 strncpy(name, YAFFS_LOSTNFOUND_NAME, buffer_size - 1);
4591 }
4592#ifndef CONFIG_YAFFS_NO_SHORT_NAMES
4593 else if (obj->short_name[0]) {
4594 strcpy(name, obj->short_name);
4595 }
4596#endif
4597 else if (obj->hdr_chunk > 0) {
4598 int result;
4599 u8 *buffer = yaffs_get_temp_buffer(obj->my_dev, __LINE__);
4600
4601 struct yaffs_obj_hdr *oh = (struct yaffs_obj_hdr *)buffer;
4602
4603 memset(buffer, 0, obj->my_dev->data_bytes_per_chunk);
4604
4605 if (obj->hdr_chunk > 0) {
4606 result = yaffs_rd_chunk_tags_nand(obj->my_dev,
4607 obj->hdr_chunk,
4608 buffer, NULL);
4609 }
4610 yaffs_load_name_from_oh(obj->my_dev, name, oh->name,
4611 buffer_size);
4612
4613 yaffs_release_temp_buffer(obj->my_dev, buffer, __LINE__);
4614 }
4615
4616 yaffs_fix_null_name(obj, name, buffer_size);
4617
4618 return strnlen(name, YAFFS_MAX_NAME_LENGTH);
4619}
4620
4621int yaffs_get_obj_length(struct yaffs_obj *obj)
4622{
4623 /* Dereference any hard linking */
4624 obj = yaffs_get_equivalent_obj(obj);
4625
4626 if (obj->variant_type == YAFFS_OBJECT_TYPE_FILE)
4627 return obj->variant.file_variant.file_size;
4628 if (obj->variant_type == YAFFS_OBJECT_TYPE_SYMLINK) {
4629 if (!obj->variant.symlink_variant.alias)
4630 return 0;
4631 return strnlen(obj->variant.symlink_variant.alias,
4632 YAFFS_MAX_ALIAS_LENGTH);
4633 } else {
4634 /* Only a directory should drop through to here */
4635 return obj->my_dev->data_bytes_per_chunk;
4636 }
4637}
4638
4639int yaffs_get_obj_link_count(struct yaffs_obj *obj)
4640{
4641 int count = 0;
4642 struct list_head *i;
4643
4644 if (!obj->unlinked)
4645 count++; /* the object itself */
4646
4647 list_for_each(i, &obj->hard_links)
4648 count++; /* add the hard links; */
4649
4650 return count;
4651}
4652
4653int yaffs_get_obj_inode(struct yaffs_obj *obj)
4654{
4655 obj = yaffs_get_equivalent_obj(obj);
4656
4657 return obj->obj_id;
4658}
4659
4660unsigned yaffs_get_obj_type(struct yaffs_obj *obj)
4661{
4662 obj = yaffs_get_equivalent_obj(obj);
4663
4664 switch (obj->variant_type) {
4665 case YAFFS_OBJECT_TYPE_FILE:
4666 return DT_REG;
4667 break;
4668 case YAFFS_OBJECT_TYPE_DIRECTORY:
4669 return DT_DIR;
4670 break;
4671 case YAFFS_OBJECT_TYPE_SYMLINK:
4672 return DT_LNK;
4673 break;
4674 case YAFFS_OBJECT_TYPE_HARDLINK:
4675 return DT_REG;
4676 break;
4677 case YAFFS_OBJECT_TYPE_SPECIAL:
4678 if (S_ISFIFO(obj->yst_mode))
4679 return DT_FIFO;
4680 if (S_ISCHR(obj->yst_mode))
4681 return DT_CHR;
4682 if (S_ISBLK(obj->yst_mode))
4683 return DT_BLK;
4684 if (S_ISSOCK(obj->yst_mode))
4685 return DT_SOCK;
4686 default:
4687 return DT_REG;
4688 break;
4689 }
4690}
4691
4692YCHAR *yaffs_get_symlink_alias(struct yaffs_obj *obj)
4693{
4694 obj = yaffs_get_equivalent_obj(obj);
4695 if (obj->variant_type == YAFFS_OBJECT_TYPE_SYMLINK)
4696 return yaffs_clone_str(obj->variant.symlink_variant.alias);
4697 else
4698 return yaffs_clone_str(_Y(""));
4699}
4700
4701/*--------------------------- Initialisation code -------------------------- */
4702
4703static int yaffs_check_dev_fns(const struct yaffs_dev *dev)
4704{
4705
4706 /* Common functions, gotta have */
4707 if (!dev->param.erase_fn || !dev->param.initialise_flash_fn)
4708 return 0;
4709
4710#ifdef CONFIG_YAFFS_YAFFS2
4711
4712 /* Can use the "with tags" style interface for yaffs1 or yaffs2 */
4713 if (dev->param.write_chunk_tags_fn &&
4714 dev->param.read_chunk_tags_fn &&
4715 !dev->param.write_chunk_fn &&
4716 !dev->param.read_chunk_fn &&
4717 dev->param.bad_block_fn && dev->param.query_block_fn)
4718 return 1;
4719#endif
4720
4721 /* Can use the "spare" style interface for yaffs1 */
4722 if (!dev->param.is_yaffs2 &&
4723 !dev->param.write_chunk_tags_fn &&
4724 !dev->param.read_chunk_tags_fn &&
4725 dev->param.write_chunk_fn &&
4726 dev->param.read_chunk_fn &&
4727 !dev->param.bad_block_fn && !dev->param.query_block_fn)
4728 return 1;
4729
4730 return 0; /* bad */
4731}
4732
4733static int yaffs_create_initial_dir(struct yaffs_dev *dev)
4734{
4735 /* Initialise the unlinked, deleted, root and lost and found directories */
4736
4737 dev->lost_n_found = dev->root_dir = NULL;
4738 dev->unlinked_dir = dev->del_dir = NULL;
4739
4740 dev->unlinked_dir =
4741 yaffs_create_fake_dir(dev, YAFFS_OBJECTID_UNLINKED, S_IFDIR);
4742
4743 dev->del_dir =
4744 yaffs_create_fake_dir(dev, YAFFS_OBJECTID_DELETED, S_IFDIR);
4745
4746 dev->root_dir =
4747 yaffs_create_fake_dir(dev, YAFFS_OBJECTID_ROOT,
4748 YAFFS_ROOT_MODE | S_IFDIR);
4749 dev->lost_n_found =
4750 yaffs_create_fake_dir(dev, YAFFS_OBJECTID_LOSTNFOUND,
4751 YAFFS_LOSTNFOUND_MODE | S_IFDIR);
4752
4753 if (dev->lost_n_found && dev->root_dir && dev->unlinked_dir
4754 && dev->del_dir) {
4755 yaffs_add_obj_to_dir(dev->root_dir, dev->lost_n_found);
4756 return YAFFS_OK;
4757 }
4758
4759 return YAFFS_FAIL;
4760}
4761
4762int yaffs_guts_initialise(struct yaffs_dev *dev)
4763{
4764 int init_failed = 0;
4765 unsigned x;
4766 int bits;
4767
4768 yaffs_trace(YAFFS_TRACE_TRACING, "yaffs: yaffs_guts_initialise()" );
4769
4770 /* Check stuff that must be set */
4771
4772 if (!dev) {
4773 yaffs_trace(YAFFS_TRACE_ALWAYS,
4774 "yaffs: Need a device"
4775 );
4776 return YAFFS_FAIL;
4777 }
4778
4779 dev->internal_start_block = dev->param.start_block;
4780 dev->internal_end_block = dev->param.end_block;
4781 dev->block_offset = 0;
4782 dev->chunk_offset = 0;
4783 dev->n_free_chunks = 0;
4784
4785 dev->gc_block = 0;
4786
4787 if (dev->param.start_block == 0) {
4788 dev->internal_start_block = dev->param.start_block + 1;
4789 dev->internal_end_block = dev->param.end_block + 1;
4790 dev->block_offset = 1;
4791 dev->chunk_offset = dev->param.chunks_per_block;
4792 }
4793
4794 /* Check geometry parameters. */
4795
4796 if ((!dev->param.inband_tags && dev->param.is_yaffs2 &&
4797 dev->param.total_bytes_per_chunk < 1024) ||
4798 (!dev->param.is_yaffs2 &&
4799 dev->param.total_bytes_per_chunk < 512) ||
4800 (dev->param.inband_tags && !dev->param.is_yaffs2) ||
4801 dev->param.chunks_per_block < 2 ||
4802 dev->param.n_reserved_blocks < 2 ||
4803 dev->internal_start_block <= 0 ||
4804 dev->internal_end_block <= 0 ||
4805 dev->internal_end_block <=
4806 (dev->internal_start_block + dev->param.n_reserved_blocks + 2)
4807 ) {
4808 /* otherwise it is too small */
4809 yaffs_trace(YAFFS_TRACE_ALWAYS,
4810 "NAND geometry problems: chunk size %d, type is yaffs%s, inband_tags %d ",
4811 dev->param.total_bytes_per_chunk,
4812 dev->param.is_yaffs2 ? "2" : "",
4813 dev->param.inband_tags);
4814 return YAFFS_FAIL;
4815 }
4816
4817 if (yaffs_init_nand(dev) != YAFFS_OK) {
4818 yaffs_trace(YAFFS_TRACE_ALWAYS, "InitialiseNAND failed");
4819 return YAFFS_FAIL;
4820 }
4821
4822 /* Sort out space for inband tags, if required */
4823 if (dev->param.inband_tags)
4824 dev->data_bytes_per_chunk =
4825 dev->param.total_bytes_per_chunk -
4826 sizeof(struct yaffs_packed_tags2_tags_only);
4827 else
4828 dev->data_bytes_per_chunk = dev->param.total_bytes_per_chunk;
4829
4830 /* Got the right mix of functions? */
4831 if (!yaffs_check_dev_fns(dev)) {
4832 /* Function missing */
4833 yaffs_trace(YAFFS_TRACE_ALWAYS,
4834 "device function(s) missing or wrong");
4835
4836 return YAFFS_FAIL;
4837 }
4838
4839 if (dev->is_mounted) {
4840 yaffs_trace(YAFFS_TRACE_ALWAYS, "device already mounted");
4841 return YAFFS_FAIL;
4842 }
4843
4844 /* Finished with most checks. One or two more checks happen later on too. */
4845
4846 dev->is_mounted = 1;
4847
4848 /* OK now calculate a few things for the device */
4849
4850 /*
4851 * Calculate all the chunk size manipulation numbers:
4852 */
4853 x = dev->data_bytes_per_chunk;
4854 /* We always use dev->chunk_shift and dev->chunk_div */
4855 dev->chunk_shift = calc_shifts(x);
4856 x >>= dev->chunk_shift;
4857 dev->chunk_div = x;
4858 /* We only use chunk mask if chunk_div is 1 */
4859 dev->chunk_mask = (1 << dev->chunk_shift) - 1;
4860
4861 /*
4862 * Calculate chunk_grp_bits.
4863 * We need to find the next power of 2 > than internal_end_block
4864 */
4865
4866 x = dev->param.chunks_per_block * (dev->internal_end_block + 1);
4867
4868 bits = calc_shifts_ceiling(x);
4869
4870 /* Set up tnode width if wide tnodes are enabled. */
4871 if (!dev->param.wide_tnodes_disabled) {
4872 /* bits must be even so that we end up with 32-bit words */
4873 if (bits & 1)
4874 bits++;
4875 if (bits < 16)
4876 dev->tnode_width = 16;
4877 else
4878 dev->tnode_width = bits;
4879 } else {
4880 dev->tnode_width = 16;
4881 }
4882
4883 dev->tnode_mask = (1 << dev->tnode_width) - 1;
4884
4885 /* Level0 Tnodes are 16 bits or wider (if wide tnodes are enabled),
4886 * so if the bitwidth of the
4887 * chunk range we're using is greater than 16 we need
4888 * to figure out chunk shift and chunk_grp_size
4889 */
4890
4891 if (bits <= dev->tnode_width)
4892 dev->chunk_grp_bits = 0;
4893 else
4894 dev->chunk_grp_bits = bits - dev->tnode_width;
4895
4896 dev->tnode_size = (dev->tnode_width * YAFFS_NTNODES_LEVEL0) / 8;
4897 if (dev->tnode_size < sizeof(struct yaffs_tnode))
4898 dev->tnode_size = sizeof(struct yaffs_tnode);
4899
4900 dev->chunk_grp_size = 1 << dev->chunk_grp_bits;
4901
4902 if (dev->param.chunks_per_block < dev->chunk_grp_size) {
4903 /* We have a problem because the soft delete won't work if
4904 * the chunk group size > chunks per block.
4905 * This can be remedied by using larger "virtual blocks".
4906 */
4907 yaffs_trace(YAFFS_TRACE_ALWAYS, "chunk group too large");
4908
4909 return YAFFS_FAIL;
4910 }
4911
4912 /* OK, we've finished verifying the device, lets continue with initialisation */
4913
4914 /* More device initialisation */
4915 dev->all_gcs = 0;
4916 dev->passive_gc_count = 0;
4917 dev->oldest_dirty_gc_count = 0;
4918 dev->bg_gcs = 0;
4919 dev->gc_block_finder = 0;
4920 dev->buffered_block = -1;
4921 dev->doing_buffered_block_rewrite = 0;
4922 dev->n_deleted_files = 0;
4923 dev->n_bg_deletions = 0;
4924 dev->n_unlinked_files = 0;
4925 dev->n_ecc_fixed = 0;
4926 dev->n_ecc_unfixed = 0;
4927 dev->n_tags_ecc_fixed = 0;
4928 dev->n_tags_ecc_unfixed = 0;
4929 dev->n_erase_failures = 0;
4930 dev->n_erased_blocks = 0;
4931 dev->gc_disable = 0;
4932 dev->has_pending_prioritised_gc = 1; /* Assume the worst for now, will get fixed on first GC */
4933 INIT_LIST_HEAD(&dev->dirty_dirs);
4934 dev->oldest_dirty_seq = 0;
4935 dev->oldest_dirty_block = 0;
4936
4937 /* Initialise temporary buffers and caches. */
4938 if (!yaffs_init_tmp_buffers(dev))
4939 init_failed = 1;
4940
4941 dev->cache = NULL;
4942 dev->gc_cleanup_list = NULL;
4943
4944 if (!init_failed && dev->param.n_caches > 0) {
4945 int i;
4946 void *buf;
4947 int cache_bytes =
4948 dev->param.n_caches * sizeof(struct yaffs_cache);
4949
4950 if (dev->param.n_caches > YAFFS_MAX_SHORT_OP_CACHES)
4951 dev->param.n_caches = YAFFS_MAX_SHORT_OP_CACHES;
4952
4953 dev->cache = kmalloc(cache_bytes, GFP_NOFS);
4954
4955 buf = (u8 *) dev->cache;
4956
4957 if (dev->cache)
4958 memset(dev->cache, 0, cache_bytes);
4959
4960 for (i = 0; i < dev->param.n_caches && buf; i++) {
4961 dev->cache[i].object = NULL;
4962 dev->cache[i].last_use = 0;
4963 dev->cache[i].dirty = 0;
4964 dev->cache[i].data = buf =
4965 kmalloc(dev->param.total_bytes_per_chunk, GFP_NOFS);
4966 }
4967 if (!buf)
4968 init_failed = 1;
4969
4970 dev->cache_last_use = 0;
4971 }
4972
4973 dev->cache_hits = 0;
4974
4975 if (!init_failed) {
4976 dev->gc_cleanup_list =
4977 kmalloc(dev->param.chunks_per_block * sizeof(u32),
4978 GFP_NOFS);
4979 if (!dev->gc_cleanup_list)
4980 init_failed = 1;
4981 }
4982
4983 if (dev->param.is_yaffs2)
4984 dev->param.use_header_file_size = 1;
4985
4986 if (!init_failed && !yaffs_init_blocks(dev))
4987 init_failed = 1;
4988
4989 yaffs_init_tnodes_and_objs(dev);
4990
4991 if (!init_failed && !yaffs_create_initial_dir(dev))
4992 init_failed = 1;
4993
4994 if (!init_failed) {
4995 /* Now scan the flash. */
4996 if (dev->param.is_yaffs2) {
4997 if (yaffs2_checkpt_restore(dev)) {
4998 yaffs_check_obj_details_loaded(dev->root_dir);
4999 yaffs_trace(YAFFS_TRACE_CHECKPOINT | YAFFS_TRACE_MOUNT,
5000 "yaffs: restored from checkpoint"
5001 );
5002 } else {
5003
5004 /* Clean up the mess caused by an aborted checkpoint load
5005 * and scan backwards.
5006 */
5007 yaffs_deinit_blocks(dev);
5008
5009 yaffs_deinit_tnodes_and_objs(dev);
5010
5011 dev->n_erased_blocks = 0;
5012 dev->n_free_chunks = 0;
5013 dev->alloc_block = -1;
5014 dev->alloc_page = -1;
5015 dev->n_deleted_files = 0;
5016 dev->n_unlinked_files = 0;
5017 dev->n_bg_deletions = 0;
5018
5019 if (!init_failed && !yaffs_init_blocks(dev))
5020 init_failed = 1;
5021
5022 yaffs_init_tnodes_and_objs(dev);
5023
5024 if (!init_failed
5025 && !yaffs_create_initial_dir(dev))
5026 init_failed = 1;
5027
5028 if (!init_failed && !yaffs2_scan_backwards(dev))
5029 init_failed = 1;
5030 }
5031 } else if (!yaffs1_scan(dev)) {
5032 init_failed = 1;
5033 }
5034
5035 yaffs_strip_deleted_objs(dev);
5036 yaffs_fix_hanging_objs(dev);
5037 if (dev->param.empty_lost_n_found)
5038 yaffs_empty_l_n_f(dev);
5039 }
5040
5041 if (init_failed) {
5042 /* Clean up the mess */
5043 yaffs_trace(YAFFS_TRACE_TRACING,
5044 "yaffs: yaffs_guts_initialise() aborted.");
5045
5046 yaffs_deinitialise(dev);
5047 return YAFFS_FAIL;
5048 }
5049
5050 /* Zero out stats */
5051 dev->n_page_reads = 0;
5052 dev->n_page_writes = 0;
5053 dev->n_erasures = 0;
5054 dev->n_gc_copies = 0;
5055 dev->n_retired_writes = 0;
5056
5057 dev->n_retired_blocks = 0;
5058
5059 yaffs_verify_free_chunks(dev);
5060 yaffs_verify_blocks(dev);
5061
5062 /* Clean up any aborted checkpoint data */
5063 if (!dev->is_checkpointed && dev->blocks_in_checkpt > 0)
5064 yaffs2_checkpt_invalidate(dev);
5065
5066 yaffs_trace(YAFFS_TRACE_TRACING,
5067 "yaffs: yaffs_guts_initialise() done.");
5068 return YAFFS_OK;
5069
5070}
5071
5072void yaffs_deinitialise(struct yaffs_dev *dev)
5073{
5074 if (dev->is_mounted) {
5075 int i;
5076
5077 yaffs_deinit_blocks(dev);
5078 yaffs_deinit_tnodes_and_objs(dev);
5079 if (dev->param.n_caches > 0 && dev->cache) {
5080
5081 for (i = 0; i < dev->param.n_caches; i++) {
5082 if (dev->cache[i].data)
5083 kfree(dev->cache[i].data);
5084 dev->cache[i].data = NULL;
5085 }
5086
5087 kfree(dev->cache);
5088 dev->cache = NULL;
5089 }
5090
5091 kfree(dev->gc_cleanup_list);
5092
5093 for (i = 0; i < YAFFS_N_TEMP_BUFFERS; i++)
5094 kfree(dev->temp_buffer[i].buffer);
5095
5096 dev->is_mounted = 0;
5097
5098 if (dev->param.deinitialise_flash_fn)
5099 dev->param.deinitialise_flash_fn(dev);
5100 }
5101}
5102
5103int yaffs_count_free_chunks(struct yaffs_dev *dev)
5104{
5105 int n_free = 0;
5106 int b;
5107
5108 struct yaffs_block_info *blk;
5109
5110 blk = dev->block_info;
5111 for (b = dev->internal_start_block; b <= dev->internal_end_block; b++) {
5112 switch (blk->block_state) {
5113 case YAFFS_BLOCK_STATE_EMPTY:
5114 case YAFFS_BLOCK_STATE_ALLOCATING:
5115 case YAFFS_BLOCK_STATE_COLLECTING:
5116 case YAFFS_BLOCK_STATE_FULL:
5117 n_free +=
5118 (dev->param.chunks_per_block - blk->pages_in_use +
5119 blk->soft_del_pages);
5120 break;
5121 default:
5122 break;
5123 }
5124 blk++;
5125 }
5126
5127 return n_free;
5128}
5129
5130int yaffs_get_n_free_chunks(struct yaffs_dev *dev)
5131{
5132 /* This is what we report to the outside world */
5133
5134 int n_free;
5135 int n_dirty_caches;
5136 int blocks_for_checkpt;
5137 int i;
5138
5139 n_free = dev->n_free_chunks;
5140 n_free += dev->n_deleted_files;
5141
5142 /* Now count the number of dirty chunks in the cache and subtract those */
5143
5144 for (n_dirty_caches = 0, i = 0; i < dev->param.n_caches; i++) {
5145 if (dev->cache[i].dirty)
5146 n_dirty_caches++;
5147 }
5148
5149 n_free -= n_dirty_caches;
5150
5151 n_free -=
5152 ((dev->param.n_reserved_blocks + 1) * dev->param.chunks_per_block);
5153
5154 /* Now we figure out how much to reserve for the checkpoint and report that... */
5155 blocks_for_checkpt = yaffs_calc_checkpt_blocks_required(dev);
5156
5157 n_free -= (blocks_for_checkpt * dev->param.chunks_per_block);
5158
5159 if (n_free < 0)
5160 n_free = 0;
5161
5162 return n_free;
5163
5164}
diff --git a/fs/yaffs2/yaffs_guts.h b/fs/yaffs2/yaffs_guts.h
new file mode 100644
index 00000000000..307eba28676
--- /dev/null
+++ b/fs/yaffs2/yaffs_guts.h
@@ -0,0 +1,915 @@
1/*
2 * YAFFS: Yet another Flash File System . A NAND-flash specific file system.
3 *
4 * Copyright (C) 2002-2010 Aleph One Ltd.
5 * for Toby Churchill Ltd and Brightstar Engineering
6 *
7 * Created by Charles Manning <charles@aleph1.co.uk>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU Lesser General Public License version 2.1 as
11 * published by the Free Software Foundation.
12 *
13 * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL.
14 */
15
16#ifndef __YAFFS_GUTS_H__
17#define __YAFFS_GUTS_H__
18
19#include "yportenv.h"
20
21#define YAFFS_OK 1
22#define YAFFS_FAIL 0
23
24/* Give us a Y=0x59,
25 * Give us an A=0x41,
26 * Give us an FF=0xFF
27 * Give us an S=0x53
28 * And what have we got...
29 */
30#define YAFFS_MAGIC 0x5941FF53
31
32#define YAFFS_NTNODES_LEVEL0 16
33#define YAFFS_TNODES_LEVEL0_BITS 4
34#define YAFFS_TNODES_LEVEL0_MASK 0xf
35
36#define YAFFS_NTNODES_INTERNAL (YAFFS_NTNODES_LEVEL0 / 2)
37#define YAFFS_TNODES_INTERNAL_BITS (YAFFS_TNODES_LEVEL0_BITS - 1)
38#define YAFFS_TNODES_INTERNAL_MASK 0x7
39#define YAFFS_TNODES_MAX_LEVEL 6
40
41#ifndef CONFIG_YAFFS_NO_YAFFS1
42#define YAFFS_BYTES_PER_SPARE 16
43#define YAFFS_BYTES_PER_CHUNK 512
44#define YAFFS_CHUNK_SIZE_SHIFT 9
45#define YAFFS_CHUNKS_PER_BLOCK 32
46#define YAFFS_BYTES_PER_BLOCK (YAFFS_CHUNKS_PER_BLOCK*YAFFS_BYTES_PER_CHUNK)
47#endif
48
49#define YAFFS_MIN_YAFFS2_CHUNK_SIZE 1024
50#define YAFFS_MIN_YAFFS2_SPARE_SIZE 32
51
52#define YAFFS_MAX_CHUNK_ID 0x000FFFFF
53
54#define YAFFS_ALLOCATION_NOBJECTS 100
55#define YAFFS_ALLOCATION_NTNODES 100
56#define YAFFS_ALLOCATION_NLINKS 100
57
58#define YAFFS_NOBJECT_BUCKETS 256
59
60#define YAFFS_OBJECT_SPACE 0x40000
61#define YAFFS_MAX_OBJECT_ID (YAFFS_OBJECT_SPACE -1)
62
63#define YAFFS_CHECKPOINT_VERSION 4
64
65#ifdef CONFIG_YAFFS_UNICODE
66#define YAFFS_MAX_NAME_LENGTH 127
67#define YAFFS_MAX_ALIAS_LENGTH 79
68#else
69#define YAFFS_MAX_NAME_LENGTH 255
70#define YAFFS_MAX_ALIAS_LENGTH 159
71#endif
72
73#define YAFFS_SHORT_NAME_LENGTH 15
74
75/* Some special object ids for pseudo objects */
76#define YAFFS_OBJECTID_ROOT 1
77#define YAFFS_OBJECTID_LOSTNFOUND 2
78#define YAFFS_OBJECTID_UNLINKED 3
79#define YAFFS_OBJECTID_DELETED 4
80
81/* Pseudo object ids for checkpointing */
82#define YAFFS_OBJECTID_SB_HEADER 0x10
83#define YAFFS_OBJECTID_CHECKPOINT_DATA 0x20
84#define YAFFS_SEQUENCE_CHECKPOINT_DATA 0x21
85
86#define YAFFS_MAX_SHORT_OP_CACHES 20
87
88#define YAFFS_N_TEMP_BUFFERS 6
89
90/* We limit the number attempts at sucessfully saving a chunk of data.
91 * Small-page devices have 32 pages per block; large-page devices have 64.
92 * Default to something in the order of 5 to 10 blocks worth of chunks.
93 */
94#define YAFFS_WR_ATTEMPTS (5*64)
95
96/* Sequence numbers are used in YAFFS2 to determine block allocation order.
97 * The range is limited slightly to help distinguish bad numbers from good.
98 * This also allows us to perhaps in the future use special numbers for
99 * special purposes.
100 * EFFFFF00 allows the allocation of 8 blocks per second (~1Mbytes) for 15 years,
101 * and is a larger number than the lifetime of a 2GB device.
102 */
103#define YAFFS_LOWEST_SEQUENCE_NUMBER 0x00001000
104#define YAFFS_HIGHEST_SEQUENCE_NUMBER 0xEFFFFF00
105
106/* Special sequence number for bad block that failed to be marked bad */
107#define YAFFS_SEQUENCE_BAD_BLOCK 0xFFFF0000
108
109/* ChunkCache is used for short read/write operations.*/
110struct yaffs_cache {
111 struct yaffs_obj *object;
112 int chunk_id;
113 int last_use;
114 int dirty;
115 int n_bytes; /* Only valid if the cache is dirty */
116 int locked; /* Can't push out or flush while locked. */
117 u8 *data;
118};
119
120/* Tags structures in RAM
121 * NB This uses bitfield. Bitfields should not straddle a u32 boundary otherwise
122 * the structure size will get blown out.
123 */
124
125#ifndef CONFIG_YAFFS_NO_YAFFS1
126struct yaffs_tags {
127 unsigned chunk_id:20;
128 unsigned serial_number:2;
129 unsigned n_bytes_lsb:10;
130 unsigned obj_id:18;
131 unsigned ecc:12;
132 unsigned n_bytes_msb:2;
133};
134
135union yaffs_tags_union {
136 struct yaffs_tags as_tags;
137 u8 as_bytes[8];
138};
139
140#endif
141
142/* Stuff used for extended tags in YAFFS2 */
143
144enum yaffs_ecc_result {
145 YAFFS_ECC_RESULT_UNKNOWN,
146 YAFFS_ECC_RESULT_NO_ERROR,
147 YAFFS_ECC_RESULT_FIXED,
148 YAFFS_ECC_RESULT_UNFIXED
149};
150
151enum yaffs_obj_type {
152 YAFFS_OBJECT_TYPE_UNKNOWN,
153 YAFFS_OBJECT_TYPE_FILE,
154 YAFFS_OBJECT_TYPE_SYMLINK,
155 YAFFS_OBJECT_TYPE_DIRECTORY,
156 YAFFS_OBJECT_TYPE_HARDLINK,
157 YAFFS_OBJECT_TYPE_SPECIAL
158};
159
160#define YAFFS_OBJECT_TYPE_MAX YAFFS_OBJECT_TYPE_SPECIAL
161
162struct yaffs_ext_tags {
163
164 unsigned validity0;
165 unsigned chunk_used; /* Status of the chunk: used or unused */
166 unsigned obj_id; /* If 0 then this is not part of an object (unused) */
167 unsigned chunk_id; /* If 0 then this is a header, else a data chunk */
168 unsigned n_bytes; /* Only valid for data chunks */
169
170 /* The following stuff only has meaning when we read */
171 enum yaffs_ecc_result ecc_result;
172 unsigned block_bad;
173
174 /* YAFFS 1 stuff */
175 unsigned is_deleted; /* The chunk is marked deleted */
176 unsigned serial_number; /* Yaffs1 2-bit serial number */
177
178 /* YAFFS2 stuff */
179 unsigned seq_number; /* The sequence number of this block */
180
181 /* Extra info if this is an object header (YAFFS2 only) */
182
183 unsigned extra_available; /* There is extra info available if this is not zero */
184 unsigned extra_parent_id; /* The parent object */
185 unsigned extra_is_shrink; /* Is it a shrink header? */
186 unsigned extra_shadows; /* Does this shadow another object? */
187
188 enum yaffs_obj_type extra_obj_type; /* What object type? */
189
190 unsigned extra_length; /* Length if it is a file */
191 unsigned extra_equiv_id; /* Equivalent object Id if it is a hard link */
192
193 unsigned validity1;
194
195};
196
197/* Spare structure for YAFFS1 */
198struct yaffs_spare {
199 u8 tb0;
200 u8 tb1;
201 u8 tb2;
202 u8 tb3;
203 u8 page_status; /* set to 0 to delete the chunk */
204 u8 block_status;
205 u8 tb4;
206 u8 tb5;
207 u8 ecc1[3];
208 u8 tb6;
209 u8 tb7;
210 u8 ecc2[3];
211};
212
213/*Special structure for passing through to mtd */
214struct yaffs_nand_spare {
215 struct yaffs_spare spare;
216 int eccres1;
217 int eccres2;
218};
219
220/* Block data in RAM */
221
222enum yaffs_block_state {
223 YAFFS_BLOCK_STATE_UNKNOWN = 0,
224
225 YAFFS_BLOCK_STATE_SCANNING,
226 /* Being scanned */
227
228 YAFFS_BLOCK_STATE_NEEDS_SCANNING,
229 /* The block might have something on it (ie it is allocating or full, perhaps empty)
230 * but it needs to be scanned to determine its true state.
231 * This state is only valid during scanning.
232 * NB We tolerate empty because the pre-scanner might be incapable of deciding
233 * However, if this state is returned on a YAFFS2 device, then we expect a sequence number
234 */
235
236 YAFFS_BLOCK_STATE_EMPTY,
237 /* This block is empty */
238
239 YAFFS_BLOCK_STATE_ALLOCATING,
240 /* This block is partially allocated.
241 * At least one page holds valid data.
242 * This is the one currently being used for page
243 * allocation. Should never be more than one of these.
244 * If a block is only partially allocated at mount it is treated as full.
245 */
246
247 YAFFS_BLOCK_STATE_FULL,
248 /* All the pages in this block have been allocated.
249 * If a block was only partially allocated when mounted we treat
250 * it as fully allocated.
251 */
252
253 YAFFS_BLOCK_STATE_DIRTY,
254 /* The block was full and now all chunks have been deleted.
255 * Erase me, reuse me.
256 */
257
258 YAFFS_BLOCK_STATE_CHECKPOINT,
259 /* This block is assigned to holding checkpoint data. */
260
261 YAFFS_BLOCK_STATE_COLLECTING,
262 /* This block is being garbage collected */
263
264 YAFFS_BLOCK_STATE_DEAD
265 /* This block has failed and is not in use */
266};
267
268#define YAFFS_NUMBER_OF_BLOCK_STATES (YAFFS_BLOCK_STATE_DEAD + 1)
269
270struct yaffs_block_info {
271
272 int soft_del_pages:10; /* number of soft deleted pages */
273 int pages_in_use:10; /* number of pages in use */
274 unsigned block_state:4; /* One of the above block states. NB use unsigned because enum is sometimes an int */
275 u32 needs_retiring:1; /* Data has failed on this block, need to get valid data off */
276 /* and retire the block. */
277 u32 skip_erased_check:1; /* If this is set we can skip the erased check on this block */
278 u32 gc_prioritise:1; /* An ECC check or blank check has failed on this block.
279 It should be prioritised for GC */
280 u32 chunk_error_strikes:3; /* How many times we've had ecc etc failures on this block and tried to reuse it */
281
282#ifdef CONFIG_YAFFS_YAFFS2
283 u32 has_shrink_hdr:1; /* This block has at least one shrink object header */
284 u32 seq_number; /* block sequence number for yaffs2 */
285#endif
286
287};
288
289/* -------------------------- Object structure -------------------------------*/
290/* This is the object structure as stored on NAND */
291
292struct yaffs_obj_hdr {
293 enum yaffs_obj_type type;
294
295 /* Apply to everything */
296 int parent_obj_id;
297 u16 sum_no_longer_used; /* checksum of name. No longer used */
298 YCHAR name[YAFFS_MAX_NAME_LENGTH + 1];
299
300 /* The following apply to directories, files, symlinks - not hard links */
301 u32 yst_mode; /* protection */
302
303 u32 yst_uid;
304 u32 yst_gid;
305 u32 yst_atime;
306 u32 yst_mtime;
307 u32 yst_ctime;
308
309 /* File size applies to files only */
310 int file_size;
311
312 /* Equivalent object id applies to hard links only. */
313 int equiv_id;
314
315 /* Alias is for symlinks only. */
316 YCHAR alias[YAFFS_MAX_ALIAS_LENGTH + 1];
317
318 u32 yst_rdev; /* device stuff for block and char devices (major/min) */
319
320 u32 win_ctime[2];
321 u32 win_atime[2];
322 u32 win_mtime[2];
323
324 u32 inband_shadowed_obj_id;
325 u32 inband_is_shrink;
326
327 u32 reserved[2];
328 int shadows_obj; /* This object header shadows the specified object if > 0 */
329
330 /* is_shrink applies to object headers written when we shrink the file (ie resize) */
331 u32 is_shrink;
332
333};
334
335/*--------------------------- Tnode -------------------------- */
336
337struct yaffs_tnode {
338 struct yaffs_tnode *internal[YAFFS_NTNODES_INTERNAL];
339};
340
341/*------------------------ Object -----------------------------*/
342/* An object can be one of:
343 * - a directory (no data, has children links
344 * - a regular file (data.... not prunes :->).
345 * - a symlink [symbolic link] (the alias).
346 * - a hard link
347 */
348
349struct yaffs_file_var {
350 u32 file_size;
351 u32 scanned_size;
352 u32 shrink_size;
353 int top_level;
354 struct yaffs_tnode *top;
355};
356
357struct yaffs_dir_var {
358 struct list_head children; /* list of child links */
359 struct list_head dirty; /* Entry for list of dirty directories */
360};
361
362struct yaffs_symlink_var {
363 YCHAR *alias;
364};
365
366struct yaffs_hardlink_var {
367 struct yaffs_obj *equiv_obj;
368 u32 equiv_id;
369};
370
371union yaffs_obj_var {
372 struct yaffs_file_var file_variant;
373 struct yaffs_dir_var dir_variant;
374 struct yaffs_symlink_var symlink_variant;
375 struct yaffs_hardlink_var hardlink_variant;
376};
377
378struct yaffs_obj {
379 u8 deleted:1; /* This should only apply to unlinked files. */
380 u8 soft_del:1; /* it has also been soft deleted */
381 u8 unlinked:1; /* An unlinked file. The file should be in the unlinked directory. */
382 u8 fake:1; /* A fake object has no presence on NAND. */
383 u8 rename_allowed:1; /* Some objects are not allowed to be renamed. */
384 u8 unlink_allowed:1;
385 u8 dirty:1; /* the object needs to be written to flash */
386 u8 valid:1; /* When the file system is being loaded up, this
387 * object might be created before the data
388 * is available (ie. file data records appear before the header).
389 */
390 u8 lazy_loaded:1; /* This object has been lazy loaded and is missing some detail */
391
392 u8 defered_free:1; /* For Linux kernel. Object is removed from NAND, but is
393 * still in the inode cache. Free of object is defered.
394 * until the inode is released.
395 */
396 u8 being_created:1; /* This object is still being created so skip some checks. */
397 u8 is_shadowed:1; /* This object is shadowed on the way to being renamed. */
398
399 u8 xattr_known:1; /* We know if this has object has xattribs or not. */
400 u8 has_xattr:1; /* This object has xattribs. Valid if xattr_known. */
401
402 u8 serial; /* serial number of chunk in NAND. Cached here */
403 u16 sum; /* sum of the name to speed searching */
404
405 struct yaffs_dev *my_dev; /* The device I'm on */
406
407 struct list_head hash_link; /* list of objects in this hash bucket */
408
409 struct list_head hard_links; /* all the equivalent hard linked objects */
410
411 /* directory structure stuff */
412 /* also used for linking up the free list */
413 struct yaffs_obj *parent;
414 struct list_head siblings;
415
416 /* Where's my object header in NAND? */
417 int hdr_chunk;
418
419 int n_data_chunks; /* Number of data chunks attached to the file. */
420
421 u32 obj_id; /* the object id value */
422
423 u32 yst_mode;
424
425#ifndef CONFIG_YAFFS_NO_SHORT_NAMES
426 YCHAR short_name[YAFFS_SHORT_NAME_LENGTH + 1];
427#endif
428
429#ifdef CONFIG_YAFFS_WINCE
430 u32 win_ctime[2];
431 u32 win_mtime[2];
432 u32 win_atime[2];
433#else
434 u32 yst_uid;
435 u32 yst_gid;
436 u32 yst_atime;
437 u32 yst_mtime;
438 u32 yst_ctime;
439#endif
440
441 u32 yst_rdev;
442
443 void *my_inode;
444
445 enum yaffs_obj_type variant_type;
446
447 union yaffs_obj_var variant;
448
449};
450
451struct yaffs_obj_bucket {
452 struct list_head list;
453 int count;
454};
455
456/* yaffs_checkpt_obj holds the definition of an object as dumped
457 * by checkpointing.
458 */
459
460struct yaffs_checkpt_obj {
461 int struct_type;
462 u32 obj_id;
463 u32 parent_id;
464 int hdr_chunk;
465 enum yaffs_obj_type variant_type:3;
466 u8 deleted:1;
467 u8 soft_del:1;
468 u8 unlinked:1;
469 u8 fake:1;
470 u8 rename_allowed:1;
471 u8 unlink_allowed:1;
472 u8 serial;
473 int n_data_chunks;
474 u32 size_or_equiv_obj;
475};
476
477/*--------------------- Temporary buffers ----------------
478 *
479 * These are chunk-sized working buffers. Each device has a few
480 */
481
482struct yaffs_buffer {
483 u8 *buffer;
484 int line; /* track from whence this buffer was allocated */
485 int max_line;
486};
487
488/*----------------- Device ---------------------------------*/
489
490struct yaffs_param {
491 const YCHAR *name;
492
493 /*
494 * Entry parameters set up way early. Yaffs sets up the rest.
495 * The structure should be zeroed out before use so that unused
496 * and defualt values are zero.
497 */
498
499 int inband_tags; /* Use unband tags */
500 u32 total_bytes_per_chunk; /* Should be >= 512, does not need to be a power of 2 */
501 int chunks_per_block; /* does not need to be a power of 2 */
502 int spare_bytes_per_chunk; /* spare area size */
503 int start_block; /* Start block we're allowed to use */
504 int end_block; /* End block we're allowed to use */
505 int n_reserved_blocks; /* We want this tuneable so that we can reduce */
506 /* reserved blocks on NOR and RAM. */
507
508 int n_caches; /* If <= 0, then short op caching is disabled, else
509 * the number of short op caches (don't use too many).
510 * 10 to 20 is a good bet.
511 */
512 int use_nand_ecc; /* Flag to decide whether or not to use NANDECC on data (yaffs1) */
513 int no_tags_ecc; /* Flag to decide whether or not to do ECC on packed tags (yaffs2) */
514
515 int is_yaffs2; /* Use yaffs2 mode on this device */
516
517 int empty_lost_n_found; /* Auto-empty lost+found directory on mount */
518
519 int refresh_period; /* How often we should check to do a block refresh */
520
521 /* Checkpoint control. Can be set before or after initialisation */
522 u8 skip_checkpt_rd;
523 u8 skip_checkpt_wr;
524
525 int enable_xattr; /* Enable xattribs */
526
527 /* NAND access functions (Must be set before calling YAFFS) */
528
529 int (*write_chunk_fn) (struct yaffs_dev * dev,
530 int nand_chunk, const u8 * data,
531 const struct yaffs_spare * spare);
532 int (*read_chunk_fn) (struct yaffs_dev * dev,
533 int nand_chunk, u8 * data,
534 struct yaffs_spare * spare);
535 int (*erase_fn) (struct yaffs_dev * dev, int flash_block);
536 int (*initialise_flash_fn) (struct yaffs_dev * dev);
537 int (*deinitialise_flash_fn) (struct yaffs_dev * dev);
538
539#ifdef CONFIG_YAFFS_YAFFS2
540 int (*write_chunk_tags_fn) (struct yaffs_dev * dev,
541 int nand_chunk, const u8 * data,
542 const struct yaffs_ext_tags * tags);
543 int (*read_chunk_tags_fn) (struct yaffs_dev * dev,
544 int nand_chunk, u8 * data,
545 struct yaffs_ext_tags * tags);
546 int (*bad_block_fn) (struct yaffs_dev * dev, int block_no);
547 int (*query_block_fn) (struct yaffs_dev * dev, int block_no,
548 enum yaffs_block_state * state,
549 u32 * seq_number);
550#endif
551
552 /* The remove_obj_fn function must be supplied by OS flavours that
553 * need it.
554 * yaffs direct uses it to implement the faster readdir.
555 * Linux uses it to protect the directory during unlocking.
556 */
557 void (*remove_obj_fn) (struct yaffs_obj * obj);
558
559 /* Callback to mark the superblock dirty */
560 void (*sb_dirty_fn) (struct yaffs_dev * dev);
561
562 /* Callback to control garbage collection. */
563 unsigned (*gc_control) (struct yaffs_dev * dev);
564
565 /* Debug control flags. Don't use unless you know what you're doing */
566 int use_header_file_size; /* Flag to determine if we should use file sizes from the header */
567 int disable_lazy_load; /* Disable lazy loading on this device */
568 int wide_tnodes_disabled; /* Set to disable wide tnodes */
569 int disable_soft_del; /* yaffs 1 only: Set to disable the use of softdeletion. */
570
571 int defered_dir_update; /* Set to defer directory updates */
572
573#ifdef CONFIG_YAFFS_AUTO_UNICODE
574 int auto_unicode;
575#endif
576 int always_check_erased; /* Force chunk erased check always on */
577};
578
579struct yaffs_dev {
580 struct yaffs_param param;
581
582 /* Context storage. Holds extra OS specific data for this device */
583
584 void *os_context;
585 void *driver_context;
586
587 struct list_head dev_list;
588
589 /* Runtime parameters. Set up by YAFFS. */
590 int data_bytes_per_chunk;
591
592 /* Non-wide tnode stuff */
593 u16 chunk_grp_bits; /* Number of bits that need to be resolved if
594 * the tnodes are not wide enough.
595 */
596 u16 chunk_grp_size; /* == 2^^chunk_grp_bits */
597
598 /* Stuff to support wide tnodes */
599 u32 tnode_width;
600 u32 tnode_mask;
601 u32 tnode_size;
602
603 /* Stuff for figuring out file offset to chunk conversions */
604 u32 chunk_shift; /* Shift value */
605 u32 chunk_div; /* Divisor after shifting: 1 for power-of-2 sizes */
606 u32 chunk_mask; /* Mask to use for power-of-2 case */
607
608 int is_mounted;
609 int read_only;
610 int is_checkpointed;
611
612 /* Stuff to support block offsetting to support start block zero */
613 int internal_start_block;
614 int internal_end_block;
615 int block_offset;
616 int chunk_offset;
617
618 /* Runtime checkpointing stuff */
619 int checkpt_page_seq; /* running sequence number of checkpoint pages */
620 int checkpt_byte_count;
621 int checkpt_byte_offs;
622 u8 *checkpt_buffer;
623 int checkpt_open_write;
624 int blocks_in_checkpt;
625 int checkpt_cur_chunk;
626 int checkpt_cur_block;
627 int checkpt_next_block;
628 int *checkpt_block_list;
629 int checkpt_max_blocks;
630 u32 checkpt_sum;
631 u32 checkpt_xor;
632
633 int checkpoint_blocks_required; /* Number of blocks needed to store current checkpoint set */
634
635 /* Block Info */
636 struct yaffs_block_info *block_info;
637 u8 *chunk_bits; /* bitmap of chunks in use */
638 unsigned block_info_alt:1; /* was allocated using alternative strategy */
639 unsigned chunk_bits_alt:1; /* was allocated using alternative strategy */
640 int chunk_bit_stride; /* Number of bytes of chunk_bits per block.
641 * Must be consistent with chunks_per_block.
642 */
643
644 int n_erased_blocks;
645 int alloc_block; /* Current block being allocated off */
646 u32 alloc_page;
647 int alloc_block_finder; /* Used to search for next allocation block */
648
649 /* Object and Tnode memory management */
650 void *allocator;
651 int n_obj;
652 int n_tnodes;
653
654 int n_hardlinks;
655
656 struct yaffs_obj_bucket obj_bucket[YAFFS_NOBJECT_BUCKETS];
657 u32 bucket_finder;
658
659 int n_free_chunks;
660
661 /* Garbage collection control */
662 u32 *gc_cleanup_list; /* objects to delete at the end of a GC. */
663 u32 n_clean_ups;
664
665 unsigned has_pending_prioritised_gc; /* We think this device might have pending prioritised gcs */
666 unsigned gc_disable;
667 unsigned gc_block_finder;
668 unsigned gc_dirtiest;
669 unsigned gc_pages_in_use;
670 unsigned gc_not_done;
671 unsigned gc_block;
672 unsigned gc_chunk;
673 unsigned gc_skip;
674
675 /* Special directories */
676 struct yaffs_obj *root_dir;
677 struct yaffs_obj *lost_n_found;
678
679 /* Buffer areas for storing data to recover from write failures TODO
680 * u8 buffered_data[YAFFS_CHUNKS_PER_BLOCK][YAFFS_BYTES_PER_CHUNK];
681 * struct yaffs_spare buffered_spare[YAFFS_CHUNKS_PER_BLOCK];
682 */
683
684 int buffered_block; /* Which block is buffered here? */
685 int doing_buffered_block_rewrite;
686
687 struct yaffs_cache *cache;
688 int cache_last_use;
689
690 /* Stuff for background deletion and unlinked files. */
691 struct yaffs_obj *unlinked_dir; /* Directory where unlinked and deleted files live. */
692 struct yaffs_obj *del_dir; /* Directory where deleted objects are sent to disappear. */
693 struct yaffs_obj *unlinked_deletion; /* Current file being background deleted. */
694 int n_deleted_files; /* Count of files awaiting deletion; */
695 int n_unlinked_files; /* Count of unlinked files. */
696 int n_bg_deletions; /* Count of background deletions. */
697
698 /* Temporary buffer management */
699 struct yaffs_buffer temp_buffer[YAFFS_N_TEMP_BUFFERS];
700 int max_temp;
701 int temp_in_use;
702 int unmanaged_buffer_allocs;
703 int unmanaged_buffer_deallocs;
704
705 /* yaffs2 runtime stuff */
706 unsigned seq_number; /* Sequence number of currently allocating block */
707 unsigned oldest_dirty_seq;
708 unsigned oldest_dirty_block;
709
710 /* Block refreshing */
711 int refresh_skip; /* A skip down counter. Refresh happens when this gets to zero. */
712
713 /* Dirty directory handling */
714 struct list_head dirty_dirs; /* List of dirty directories */
715
716 /* Statistcs */
717 u32 n_page_writes;
718 u32 n_page_reads;
719 u32 n_erasures;
720 u32 n_erase_failures;
721 u32 n_gc_copies;
722 u32 all_gcs;
723 u32 passive_gc_count;
724 u32 oldest_dirty_gc_count;
725 u32 n_gc_blocks;
726 u32 bg_gcs;
727 u32 n_retired_writes;
728 u32 n_retired_blocks;
729 u32 n_ecc_fixed;
730 u32 n_ecc_unfixed;
731 u32 n_tags_ecc_fixed;
732 u32 n_tags_ecc_unfixed;
733 u32 n_deletions;
734 u32 n_unmarked_deletions;
735 u32 refresh_count;
736 u32 cache_hits;
737
738};
739
740/* The CheckpointDevice structure holds the device information that changes at runtime and
741 * must be preserved over unmount/mount cycles.
742 */
743struct yaffs_checkpt_dev {
744 int struct_type;
745 int n_erased_blocks;
746 int alloc_block; /* Current block being allocated off */
747 u32 alloc_page;
748 int n_free_chunks;
749
750 int n_deleted_files; /* Count of files awaiting deletion; */
751 int n_unlinked_files; /* Count of unlinked files. */
752 int n_bg_deletions; /* Count of background deletions. */
753
754 /* yaffs2 runtime stuff */
755 unsigned seq_number; /* Sequence number of currently allocating block */
756
757};
758
759struct yaffs_checkpt_validity {
760 int struct_type;
761 u32 magic;
762 u32 version;
763 u32 head;
764};
765
766struct yaffs_shadow_fixer {
767 int obj_id;
768 int shadowed_id;
769 struct yaffs_shadow_fixer *next;
770};
771
772/* Structure for doing xattr modifications */
773struct yaffs_xattr_mod {
774 int set; /* If 0 then this is a deletion */
775 const YCHAR *name;
776 const void *data;
777 int size;
778 int flags;
779 int result;
780};
781
782/*----------------------- YAFFS Functions -----------------------*/
783
784int yaffs_guts_initialise(struct yaffs_dev *dev);
785void yaffs_deinitialise(struct yaffs_dev *dev);
786
787int yaffs_get_n_free_chunks(struct yaffs_dev *dev);
788
789int yaffs_rename_obj(struct yaffs_obj *old_dir, const YCHAR * old_name,
790 struct yaffs_obj *new_dir, const YCHAR * new_name);
791
792int yaffs_unlinker(struct yaffs_obj *dir, const YCHAR * name);
793int yaffs_del_obj(struct yaffs_obj *obj);
794
795int yaffs_get_obj_name(struct yaffs_obj *obj, YCHAR * name, int buffer_size);
796int yaffs_get_obj_length(struct yaffs_obj *obj);
797int yaffs_get_obj_inode(struct yaffs_obj *obj);
798unsigned yaffs_get_obj_type(struct yaffs_obj *obj);
799int yaffs_get_obj_link_count(struct yaffs_obj *obj);
800
801/* File operations */
802int yaffs_file_rd(struct yaffs_obj *obj, u8 * buffer, loff_t offset,
803 int n_bytes);
804int yaffs_wr_file(struct yaffs_obj *obj, const u8 * buffer, loff_t offset,
805 int n_bytes, int write_trhrough);
806int yaffs_resize_file(struct yaffs_obj *obj, loff_t new_size);
807
808struct yaffs_obj *yaffs_create_file(struct yaffs_obj *parent,
809 const YCHAR * name, u32 mode, u32 uid,
810 u32 gid);
811
812int yaffs_flush_file(struct yaffs_obj *obj, int update_time, int data_sync);
813
814/* Flushing and checkpointing */
815void yaffs_flush_whole_cache(struct yaffs_dev *dev);
816
817int yaffs_checkpoint_save(struct yaffs_dev *dev);
818int yaffs_checkpoint_restore(struct yaffs_dev *dev);
819
820/* Directory operations */
821struct yaffs_obj *yaffs_create_dir(struct yaffs_obj *parent, const YCHAR * name,
822 u32 mode, u32 uid, u32 gid);
823struct yaffs_obj *yaffs_find_by_name(struct yaffs_obj *the_dir,
824 const YCHAR * name);
825struct yaffs_obj *yaffs_find_by_number(struct yaffs_dev *dev, u32 number);
826
827/* Link operations */
828struct yaffs_obj *yaffs_link_obj(struct yaffs_obj *parent, const YCHAR * name,
829 struct yaffs_obj *equiv_obj);
830
831struct yaffs_obj *yaffs_get_equivalent_obj(struct yaffs_obj *obj);
832
833/* Symlink operations */
834struct yaffs_obj *yaffs_create_symlink(struct yaffs_obj *parent,
835 const YCHAR * name, u32 mode, u32 uid,
836 u32 gid, const YCHAR * alias);
837YCHAR *yaffs_get_symlink_alias(struct yaffs_obj *obj);
838
839/* Special inodes (fifos, sockets and devices) */
840struct yaffs_obj *yaffs_create_special(struct yaffs_obj *parent,
841 const YCHAR * name, u32 mode, u32 uid,
842 u32 gid, u32 rdev);
843
844int yaffs_set_xattrib(struct yaffs_obj *obj, const YCHAR * name,
845 const void *value, int size, int flags);
846int yaffs_get_xattrib(struct yaffs_obj *obj, const YCHAR * name, void *value,
847 int size);
848int yaffs_list_xattrib(struct yaffs_obj *obj, char *buffer, int size);
849int yaffs_remove_xattrib(struct yaffs_obj *obj, const YCHAR * name);
850
851/* Special directories */
852struct yaffs_obj *yaffs_root(struct yaffs_dev *dev);
853struct yaffs_obj *yaffs_lost_n_found(struct yaffs_dev *dev);
854
855void yaffs_handle_defered_free(struct yaffs_obj *obj);
856
857void yaffs_update_dirty_dirs(struct yaffs_dev *dev);
858
859int yaffs_bg_gc(struct yaffs_dev *dev, unsigned urgency);
860
861/* Debug dump */
862int yaffs_dump_obj(struct yaffs_obj *obj);
863
864void yaffs_guts_test(struct yaffs_dev *dev);
865
866/* A few useful functions to be used within the core files*/
867void yaffs_chunk_del(struct yaffs_dev *dev, int chunk_id, int mark_flash,
868 int lyn);
869int yaffs_check_ff(u8 * buffer, int n_bytes);
870void yaffs_handle_chunk_error(struct yaffs_dev *dev,
871 struct yaffs_block_info *bi);
872
873u8 *yaffs_get_temp_buffer(struct yaffs_dev *dev, int line_no);
874void yaffs_release_temp_buffer(struct yaffs_dev *dev, u8 * buffer, int line_no);
875
876struct yaffs_obj *yaffs_find_or_create_by_number(struct yaffs_dev *dev,
877 int number,
878 enum yaffs_obj_type type);
879int yaffs_put_chunk_in_file(struct yaffs_obj *in, int inode_chunk,
880 int nand_chunk, int in_scan);
881void yaffs_set_obj_name(struct yaffs_obj *obj, const YCHAR * name);
882void yaffs_set_obj_name_from_oh(struct yaffs_obj *obj,
883 const struct yaffs_obj_hdr *oh);
884void yaffs_add_obj_to_dir(struct yaffs_obj *directory, struct yaffs_obj *obj);
885YCHAR *yaffs_clone_str(const YCHAR * str);
886void yaffs_link_fixup(struct yaffs_dev *dev, struct yaffs_obj *hard_list);
887void yaffs_block_became_dirty(struct yaffs_dev *dev, int block_no);
888int yaffs_update_oh(struct yaffs_obj *in, const YCHAR * name,
889 int force, int is_shrink, int shadows,
890 struct yaffs_xattr_mod *xop);
891void yaffs_handle_shadowed_obj(struct yaffs_dev *dev, int obj_id,
892 int backward_scanning);
893int yaffs_check_alloc_available(struct yaffs_dev *dev, int n_chunks);
894struct yaffs_tnode *yaffs_get_tnode(struct yaffs_dev *dev);
895struct yaffs_tnode *yaffs_add_find_tnode_0(struct yaffs_dev *dev,
896 struct yaffs_file_var *file_struct,
897 u32 chunk_id,
898 struct yaffs_tnode *passed_tn);
899
900int yaffs_do_file_wr(struct yaffs_obj *in, const u8 * buffer, loff_t offset,
901 int n_bytes, int write_trhrough);
902void yaffs_resize_file_down(struct yaffs_obj *obj, loff_t new_size);
903void yaffs_skip_rest_of_block(struct yaffs_dev *dev);
904
905int yaffs_count_free_chunks(struct yaffs_dev *dev);
906
907struct yaffs_tnode *yaffs_find_tnode_0(struct yaffs_dev *dev,
908 struct yaffs_file_var *file_struct,
909 u32 chunk_id);
910
911u32 yaffs_get_group_base(struct yaffs_dev *dev, struct yaffs_tnode *tn,
912 unsigned pos);
913
914int yaffs_is_non_empty_dir(struct yaffs_obj *obj);
915#endif
diff --git a/fs/yaffs2/yaffs_linux.h b/fs/yaffs2/yaffs_linux.h
new file mode 100644
index 00000000000..3b508cbc4e8
--- /dev/null
+++ b/fs/yaffs2/yaffs_linux.h
@@ -0,0 +1,41 @@
1/*
2 * YAFFS: Yet another Flash File System . A NAND-flash specific file system.
3 *
4 * Copyright (C) 2002-2010 Aleph One Ltd.
5 * for Toby Churchill Ltd and Brightstar Engineering
6 *
7 * Created by Charles Manning <charles@aleph1.co.uk>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU Lesser General Public License version 2.1 as
11 * published by the Free Software Foundation.
12 *
13 * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL.
14 */
15
16#ifndef __YAFFS_LINUX_H__
17#define __YAFFS_LINUX_H__
18
19#include "yportenv.h"
20
21struct yaffs_linux_context {
22 struct list_head context_list; /* List of these we have mounted */
23 struct yaffs_dev *dev;
24 struct super_block *super;
25 struct task_struct *bg_thread; /* Background thread for this device */
26 int bg_running;
27 struct mutex gross_lock; /* Gross locking mutex*/
28 u8 *spare_buffer; /* For mtdif2 use. Don't know the size of the buffer
29 * at compile time so we have to allocate it.
30 */
31 struct list_head search_contexts;
32 void (*put_super_fn) (struct super_block * sb);
33
34 struct task_struct *readdir_process;
35 unsigned mount_id;
36};
37
38#define yaffs_dev_to_lc(dev) ((struct yaffs_linux_context *)((dev)->os_context))
39#define yaffs_dev_to_mtd(dev) ((struct mtd_info *)((dev)->driver_context))
40
41#endif
diff --git a/fs/yaffs2/yaffs_mtdif.c b/fs/yaffs2/yaffs_mtdif.c
new file mode 100644
index 00000000000..7cf53b3d91b
--- /dev/null
+++ b/fs/yaffs2/yaffs_mtdif.c
@@ -0,0 +1,54 @@
1/*
2 * YAFFS: Yet Another Flash File System. A NAND-flash specific file system.
3 *
4 * Copyright (C) 2002-2010 Aleph One Ltd.
5 * for Toby Churchill Ltd and Brightstar Engineering
6 *
7 * Created by Charles Manning <charles@aleph1.co.uk>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 */
13
14#include "yportenv.h"
15
16#include "yaffs_mtdif.h"
17
18#include "linux/mtd/mtd.h"
19#include "linux/types.h"
20#include "linux/time.h"
21#include "linux/mtd/nand.h"
22
23#include "yaffs_linux.h"
24
25int nandmtd_erase_block(struct yaffs_dev *dev, int block_no)
26{
27 struct mtd_info *mtd = yaffs_dev_to_mtd(dev);
28 u32 addr =
29 ((loff_t) block_no) * dev->param.total_bytes_per_chunk
30 * dev->param.chunks_per_block;
31 struct erase_info ei;
32
33 int retval = 0;
34
35 ei.mtd = mtd;
36 ei.addr = addr;
37 ei.len = dev->param.total_bytes_per_chunk * dev->param.chunks_per_block;
38 ei.time = 1000;
39 ei.retries = 2;
40 ei.callback = NULL;
41 ei.priv = (u_long) dev;
42
43 retval = mtd->erase(mtd, &ei);
44
45 if (retval == 0)
46 return YAFFS_OK;
47 else
48 return YAFFS_FAIL;
49}
50
51int nandmtd_initialise(struct yaffs_dev *dev)
52{
53 return YAFFS_OK;
54}
diff --git a/fs/yaffs2/yaffs_mtdif.h b/fs/yaffs2/yaffs_mtdif.h
new file mode 100644
index 00000000000..666507417fe
--- /dev/null
+++ b/fs/yaffs2/yaffs_mtdif.h
@@ -0,0 +1,23 @@
1/*
2 * YAFFS: Yet another Flash File System . A NAND-flash specific file system.
3 *
4 * Copyright (C) 2002-2010 Aleph One Ltd.
5 * for Toby Churchill Ltd and Brightstar Engineering
6 *
7 * Created by Charles Manning <charles@aleph1.co.uk>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU Lesser General Public License version 2.1 as
11 * published by the Free Software Foundation.
12 *
13 * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL.
14 */
15
16#ifndef __YAFFS_MTDIF_H__
17#define __YAFFS_MTDIF_H__
18
19#include "yaffs_guts.h"
20
21int nandmtd_erase_block(struct yaffs_dev *dev, int block_no);
22int nandmtd_initialise(struct yaffs_dev *dev);
23#endif
diff --git a/fs/yaffs2/yaffs_mtdif1.c b/fs/yaffs2/yaffs_mtdif1.c
new file mode 100644
index 00000000000..51083695eb3
--- /dev/null
+++ b/fs/yaffs2/yaffs_mtdif1.c
@@ -0,0 +1,330 @@
1/*
2 * YAFFS: Yet another FFS. A NAND-flash specific file system.
3 *
4 * Copyright (C) 2002-2010 Aleph One Ltd.
5 * for Toby Churchill Ltd and Brightstar Engineering
6 *
7 * Created by Charles Manning <charles@aleph1.co.uk>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 */
13
14/*
15 * This module provides the interface between yaffs_nand.c and the
16 * MTD API. This version is used when the MTD interface supports the
17 * 'mtd_oob_ops' style calls to read_oob and write_oob, circa 2.6.17,
18 * and we have small-page NAND device.
19 *
20 * These functions are invoked via function pointers in yaffs_nand.c.
21 * This replaces functionality provided by functions in yaffs_mtdif.c
22 * and the yaffs_tags compatability functions in yaffs_tagscompat.c that are
23 * called in yaffs_mtdif.c when the function pointers are NULL.
24 * We assume the MTD layer is performing ECC (use_nand_ecc is true).
25 */
26
27#include "yportenv.h"
28#include "yaffs_trace.h"
29#include "yaffs_guts.h"
30#include "yaffs_packedtags1.h"
31#include "yaffs_tagscompat.h" /* for yaffs_calc_tags_ecc */
32#include "yaffs_linux.h"
33
34#include "linux/kernel.h"
35#include "linux/version.h"
36#include "linux/types.h"
37#include "linux/mtd/mtd.h"
38
39#ifndef CONFIG_YAFFS_9BYTE_TAGS
40# define YTAG1_SIZE 8
41#else
42# define YTAG1_SIZE 9
43#endif
44
45/* Write a chunk (page) of data to NAND.
46 *
47 * Caller always provides ExtendedTags data which are converted to a more
48 * compact (packed) form for storage in NAND. A mini-ECC runs over the
49 * contents of the tags meta-data; used to valid the tags when read.
50 *
51 * - Pack ExtendedTags to packed_tags1 form
52 * - Compute mini-ECC for packed_tags1
53 * - Write data and packed tags to NAND.
54 *
55 * Note: Due to the use of the packed_tags1 meta-data which does not include
56 * a full sequence number (as found in the larger packed_tags2 form) it is
57 * necessary for Yaffs to re-write a chunk/page (just once) to mark it as
58 * discarded and dirty. This is not ideal: newer NAND parts are supposed
59 * to be written just once. When Yaffs performs this operation, this
60 * function is called with a NULL data pointer -- calling MTD write_oob
61 * without data is valid usage (2.6.17).
62 *
63 * Any underlying MTD error results in YAFFS_FAIL.
64 * Returns YAFFS_OK or YAFFS_FAIL.
65 */
66int nandmtd1_write_chunk_tags(struct yaffs_dev *dev,
67 int nand_chunk, const u8 * data,
68 const struct yaffs_ext_tags *etags)
69{
70 struct mtd_info *mtd = yaffs_dev_to_mtd(dev);
71 int chunk_bytes = dev->data_bytes_per_chunk;
72 loff_t addr = ((loff_t) nand_chunk) * chunk_bytes;
73 struct mtd_oob_ops ops;
74 struct yaffs_packed_tags1 pt1;
75 int retval;
76
77 /* we assume that packed_tags1 and struct yaffs_tags are compatible */
78 compile_time_assertion(sizeof(struct yaffs_packed_tags1) == 12);
79 compile_time_assertion(sizeof(struct yaffs_tags) == 8);
80
81 yaffs_pack_tags1(&pt1, etags);
82 yaffs_calc_tags_ecc((struct yaffs_tags *)&pt1);
83
84 /* When deleting a chunk, the upper layer provides only skeletal
85 * etags, one with is_deleted set. However, we need to update the
86 * tags, not erase them completely. So we use the NAND write property
87 * that only zeroed-bits stick and set tag bytes to all-ones and
88 * zero just the (not) deleted bit.
89 */
90#ifndef CONFIG_YAFFS_9BYTE_TAGS
91 if (etags->is_deleted) {
92 memset(&pt1, 0xff, 8);
93 /* clear delete status bit to indicate deleted */
94 pt1.deleted = 0;
95 }
96#else
97 ((u8 *) & pt1)[8] = 0xff;
98 if (etags->is_deleted) {
99 memset(&pt1, 0xff, 8);
100 /* zero page_status byte to indicate deleted */
101 ((u8 *) & pt1)[8] = 0;
102 }
103#endif
104
105 memset(&ops, 0, sizeof(ops));
106 ops.mode = MTD_OOB_AUTO;
107 ops.len = (data) ? chunk_bytes : 0;
108 ops.ooblen = YTAG1_SIZE;
109 ops.datbuf = (u8 *) data;
110 ops.oobbuf = (u8 *) & pt1;
111
112 retval = mtd->write_oob(mtd, addr, &ops);
113 if (retval) {
114 yaffs_trace(YAFFS_TRACE_MTD,
115 "write_oob failed, chunk %d, mtd error %d",
116 nand_chunk, retval);
117 }
118 return retval ? YAFFS_FAIL : YAFFS_OK;
119}
120
121/* Return with empty ExtendedTags but add ecc_result.
122 */
123static int rettags(struct yaffs_ext_tags *etags, int ecc_result, int retval)
124{
125 if (etags) {
126 memset(etags, 0, sizeof(*etags));
127 etags->ecc_result = ecc_result;
128 }
129 return retval;
130}
131
132/* Read a chunk (page) from NAND.
133 *
134 * Caller expects ExtendedTags data to be usable even on error; that is,
135 * all members except ecc_result and block_bad are zeroed.
136 *
137 * - Check ECC results for data (if applicable)
138 * - Check for blank/erased block (return empty ExtendedTags if blank)
139 * - Check the packed_tags1 mini-ECC (correct if necessary/possible)
140 * - Convert packed_tags1 to ExtendedTags
141 * - Update ecc_result and block_bad members to refect state.
142 *
143 * Returns YAFFS_OK or YAFFS_FAIL.
144 */
145int nandmtd1_read_chunk_tags(struct yaffs_dev *dev,
146 int nand_chunk, u8 * data,
147 struct yaffs_ext_tags *etags)
148{
149 struct mtd_info *mtd = yaffs_dev_to_mtd(dev);
150 int chunk_bytes = dev->data_bytes_per_chunk;
151 loff_t addr = ((loff_t) nand_chunk) * chunk_bytes;
152 int eccres = YAFFS_ECC_RESULT_NO_ERROR;
153 struct mtd_oob_ops ops;
154 struct yaffs_packed_tags1 pt1;
155 int retval;
156 int deleted;
157
158 memset(&ops, 0, sizeof(ops));
159 ops.mode = MTD_OOB_AUTO;
160 ops.len = (data) ? chunk_bytes : 0;
161 ops.ooblen = YTAG1_SIZE;
162 ops.datbuf = data;
163 ops.oobbuf = (u8 *) & pt1;
164
165 /* Read page and oob using MTD.
166 * Check status and determine ECC result.
167 */
168 retval = mtd->read_oob(mtd, addr, &ops);
169 if (retval) {
170 yaffs_trace(YAFFS_TRACE_MTD,
171 "read_oob failed, chunk %d, mtd error %d",
172 nand_chunk, retval);
173 }
174
175 switch (retval) {
176 case 0:
177 /* no error */
178 break;
179
180 case -EUCLEAN:
181 /* MTD's ECC fixed the data */
182 eccres = YAFFS_ECC_RESULT_FIXED;
183 dev->n_ecc_fixed++;
184 break;
185
186 case -EBADMSG:
187 /* MTD's ECC could not fix the data */
188 dev->n_ecc_unfixed++;
189 /* fall into... */
190 default:
191 rettags(etags, YAFFS_ECC_RESULT_UNFIXED, 0);
192 etags->block_bad = (mtd->block_isbad) (mtd, addr);
193 return YAFFS_FAIL;
194 }
195
196 /* Check for a blank/erased chunk.
197 */
198 if (yaffs_check_ff((u8 *) & pt1, 8)) {
199 /* when blank, upper layers want ecc_result to be <= NO_ERROR */
200 return rettags(etags, YAFFS_ECC_RESULT_NO_ERROR, YAFFS_OK);
201 }
202#ifndef CONFIG_YAFFS_9BYTE_TAGS
203 /* Read deleted status (bit) then return it to it's non-deleted
204 * state before performing tags mini-ECC check. pt1.deleted is
205 * inverted.
206 */
207 deleted = !pt1.deleted;
208 pt1.deleted = 1;
209#else
210 deleted = (yaffs_count_bits(((u8 *) & pt1)[8]) < 7);
211#endif
212
213 /* Check the packed tags mini-ECC and correct if necessary/possible.
214 */
215 retval = yaffs_check_tags_ecc((struct yaffs_tags *)&pt1);
216 switch (retval) {
217 case 0:
218 /* no tags error, use MTD result */
219 break;
220 case 1:
221 /* recovered tags-ECC error */
222 dev->n_tags_ecc_fixed++;
223 if (eccres == YAFFS_ECC_RESULT_NO_ERROR)
224 eccres = YAFFS_ECC_RESULT_FIXED;
225 break;
226 default:
227 /* unrecovered tags-ECC error */
228 dev->n_tags_ecc_unfixed++;
229 return rettags(etags, YAFFS_ECC_RESULT_UNFIXED, YAFFS_FAIL);
230 }
231
232 /* Unpack the tags to extended form and set ECC result.
233 * [set should_be_ff just to keep yaffs_unpack_tags1 happy]
234 */
235 pt1.should_be_ff = 0xFFFFFFFF;
236 yaffs_unpack_tags1(etags, &pt1);
237 etags->ecc_result = eccres;
238
239 /* Set deleted state */
240 etags->is_deleted = deleted;
241 return YAFFS_OK;
242}
243
244/* Mark a block bad.
245 *
246 * This is a persistant state.
247 * Use of this function should be rare.
248 *
249 * Returns YAFFS_OK or YAFFS_FAIL.
250 */
251int nandmtd1_mark_block_bad(struct yaffs_dev *dev, int block_no)
252{
253 struct mtd_info *mtd = yaffs_dev_to_mtd(dev);
254 int blocksize = dev->param.chunks_per_block * dev->data_bytes_per_chunk;
255 int retval;
256
257 yaffs_trace(YAFFS_TRACE_BAD_BLOCKS,
258 "marking block %d bad", block_no);
259
260 retval = mtd->block_markbad(mtd, (loff_t) blocksize * block_no);
261 return (retval) ? YAFFS_FAIL : YAFFS_OK;
262}
263
264/* Check any MTD prerequists.
265 *
266 * Returns YAFFS_OK or YAFFS_FAIL.
267 */
268static int nandmtd1_test_prerequists(struct mtd_info *mtd)
269{
270 /* 2.6.18 has mtd->ecclayout->oobavail */
271 /* 2.6.21 has mtd->ecclayout->oobavail and mtd->oobavail */
272 int oobavail = mtd->ecclayout->oobavail;
273
274 if (oobavail < YTAG1_SIZE) {
275 yaffs_trace(YAFFS_TRACE_ERROR,
276 "mtd device has only %d bytes for tags, need %d",
277 oobavail, YTAG1_SIZE);
278 return YAFFS_FAIL;
279 }
280 return YAFFS_OK;
281}
282
283/* Query for the current state of a specific block.
284 *
285 * Examine the tags of the first chunk of the block and return the state:
286 * - YAFFS_BLOCK_STATE_DEAD, the block is marked bad
287 * - YAFFS_BLOCK_STATE_NEEDS_SCANNING, the block is in use
288 * - YAFFS_BLOCK_STATE_EMPTY, the block is clean
289 *
290 * Always returns YAFFS_OK.
291 */
292int nandmtd1_query_block(struct yaffs_dev *dev, int block_no,
293 enum yaffs_block_state *state_ptr, u32 * seq_ptr)
294{
295 struct mtd_info *mtd = yaffs_dev_to_mtd(dev);
296 int chunk_num = block_no * dev->param.chunks_per_block;
297 loff_t addr = (loff_t) chunk_num * dev->data_bytes_per_chunk;
298 struct yaffs_ext_tags etags;
299 int state = YAFFS_BLOCK_STATE_DEAD;
300 int seqnum = 0;
301 int retval;
302
303 /* We don't yet have a good place to test for MTD config prerequists.
304 * Do it here as we are called during the initial scan.
305 */
306 if (nandmtd1_test_prerequists(mtd) != YAFFS_OK)
307 return YAFFS_FAIL;
308
309 retval = nandmtd1_read_chunk_tags(dev, chunk_num, NULL, &etags);
310 etags.block_bad = (mtd->block_isbad) (mtd, addr);
311 if (etags.block_bad) {
312 yaffs_trace(YAFFS_TRACE_BAD_BLOCKS,
313 "block %d is marked bad", block_no);
314 state = YAFFS_BLOCK_STATE_DEAD;
315 } else if (etags.ecc_result != YAFFS_ECC_RESULT_NO_ERROR) {
316 /* bad tags, need to look more closely */
317 state = YAFFS_BLOCK_STATE_NEEDS_SCANNING;
318 } else if (etags.chunk_used) {
319 state = YAFFS_BLOCK_STATE_NEEDS_SCANNING;
320 seqnum = etags.seq_number;
321 } else {
322 state = YAFFS_BLOCK_STATE_EMPTY;
323 }
324
325 *state_ptr = state;
326 *seq_ptr = seqnum;
327
328 /* query always succeeds */
329 return YAFFS_OK;
330}
diff --git a/fs/yaffs2/yaffs_mtdif1.h b/fs/yaffs2/yaffs_mtdif1.h
new file mode 100644
index 00000000000..07ce4524f0f
--- /dev/null
+++ b/fs/yaffs2/yaffs_mtdif1.h
@@ -0,0 +1,29 @@
1/*
2 * YAFFS: Yet another Flash File System. A NAND-flash specific file system.
3 *
4 * Copyright (C) 2002-2010 Aleph One Ltd.
5 * for Toby Churchill Ltd and Brightstar Engineering
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU Lesser General Public License version 2.1 as
9 * published by the Free Software Foundation.
10 *
11 * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL.
12 */
13
14#ifndef __YAFFS_MTDIF1_H__
15#define __YAFFS_MTDIF1_H__
16
17int nandmtd1_write_chunk_tags(struct yaffs_dev *dev, int nand_chunk,
18 const u8 * data,
19 const struct yaffs_ext_tags *tags);
20
21int nandmtd1_read_chunk_tags(struct yaffs_dev *dev, int nand_chunk,
22 u8 * data, struct yaffs_ext_tags *tags);
23
24int nandmtd1_mark_block_bad(struct yaffs_dev *dev, int block_no);
25
26int nandmtd1_query_block(struct yaffs_dev *dev, int block_no,
27 enum yaffs_block_state *state, u32 * seq_number);
28
29#endif
diff --git a/fs/yaffs2/yaffs_mtdif2.c b/fs/yaffs2/yaffs_mtdif2.c
new file mode 100644
index 00000000000..d1643df2c38
--- /dev/null
+++ b/fs/yaffs2/yaffs_mtdif2.c
@@ -0,0 +1,225 @@
1/*
2 * YAFFS: Yet Another Flash File System. A NAND-flash specific file system.
3 *
4 * Copyright (C) 2002-2010 Aleph One Ltd.
5 * for Toby Churchill Ltd and Brightstar Engineering
6 *
7 * Created by Charles Manning <charles@aleph1.co.uk>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 */
13
14/* mtd interface for YAFFS2 */
15
16#include "yportenv.h"
17#include "yaffs_trace.h"
18
19#include "yaffs_mtdif2.h"
20
21#include "linux/mtd/mtd.h"
22#include "linux/types.h"
23#include "linux/time.h"
24
25#include "yaffs_packedtags2.h"
26
27#include "yaffs_linux.h"
28
29/* NB For use with inband tags....
30 * We assume that the data buffer is of size total_bytes_per_chunk so that we can also
31 * use it to load the tags.
32 */
33int nandmtd2_write_chunk_tags(struct yaffs_dev *dev, int nand_chunk,
34 const u8 * data,
35 const struct yaffs_ext_tags *tags)
36{
37 struct mtd_info *mtd = yaffs_dev_to_mtd(dev);
38 struct mtd_oob_ops ops;
39 int retval = 0;
40
41 loff_t addr;
42
43 struct yaffs_packed_tags2 pt;
44
45 int packed_tags_size =
46 dev->param.no_tags_ecc ? sizeof(pt.t) : sizeof(pt);
47 void *packed_tags_ptr =
48 dev->param.no_tags_ecc ? (void *)&pt.t : (void *)&pt;
49
50 yaffs_trace(YAFFS_TRACE_MTD,
51 "nandmtd2_write_chunk_tags chunk %d data %p tags %p",
52 nand_chunk, data, tags);
53
54 addr = ((loff_t) nand_chunk) * dev->param.total_bytes_per_chunk;
55
56 /* For yaffs2 writing there must be both data and tags.
57 * If we're using inband tags, then the tags are stuffed into
58 * the end of the data buffer.
59 */
60 if (!data || !tags)
61 BUG();
62 else if (dev->param.inband_tags) {
63 struct yaffs_packed_tags2_tags_only *pt2tp;
64 pt2tp =
65 (struct yaffs_packed_tags2_tags_only *)(data +
66 dev->
67 data_bytes_per_chunk);
68 yaffs_pack_tags2_tags_only(pt2tp, tags);
69 } else {
70 yaffs_pack_tags2(&pt, tags, !dev->param.no_tags_ecc);
71 }
72
73 ops.mode = MTD_OOB_AUTO;
74 ops.ooblen = (dev->param.inband_tags) ? 0 : packed_tags_size;
75 ops.len = dev->param.total_bytes_per_chunk;
76 ops.ooboffs = 0;
77 ops.datbuf = (u8 *) data;
78 ops.oobbuf = (dev->param.inband_tags) ? NULL : packed_tags_ptr;
79 retval = mtd->write_oob(mtd, addr, &ops);
80
81 if (retval == 0)
82 return YAFFS_OK;
83 else
84 return YAFFS_FAIL;
85}
86
87int nandmtd2_read_chunk_tags(struct yaffs_dev *dev, int nand_chunk,
88 u8 * data, struct yaffs_ext_tags *tags)
89{
90 struct mtd_info *mtd = yaffs_dev_to_mtd(dev);
91 struct mtd_oob_ops ops;
92
93 size_t dummy;
94 int retval = 0;
95 int local_data = 0;
96
97 loff_t addr = ((loff_t) nand_chunk) * dev->param.total_bytes_per_chunk;
98
99 struct yaffs_packed_tags2 pt;
100
101 int packed_tags_size =
102 dev->param.no_tags_ecc ? sizeof(pt.t) : sizeof(pt);
103 void *packed_tags_ptr =
104 dev->param.no_tags_ecc ? (void *)&pt.t : (void *)&pt;
105
106 yaffs_trace(YAFFS_TRACE_MTD,
107 "nandmtd2_read_chunk_tags chunk %d data %p tags %p",
108 nand_chunk, data, tags);
109
110 if (dev->param.inband_tags) {
111
112 if (!data) {
113 local_data = 1;
114 data = yaffs_get_temp_buffer(dev, __LINE__);
115 }
116
117 }
118
119 if (dev->param.inband_tags || (data && !tags))
120 retval = mtd->read(mtd, addr, dev->param.total_bytes_per_chunk,
121 &dummy, data);
122 else if (tags) {
123 ops.mode = MTD_OOB_AUTO;
124 ops.ooblen = packed_tags_size;
125 ops.len = data ? dev->data_bytes_per_chunk : packed_tags_size;
126 ops.ooboffs = 0;
127 ops.datbuf = data;
128 ops.oobbuf = yaffs_dev_to_lc(dev)->spare_buffer;
129 retval = mtd->read_oob(mtd, addr, &ops);
130 }
131
132 if (dev->param.inband_tags) {
133 if (tags) {
134 struct yaffs_packed_tags2_tags_only *pt2tp;
135 pt2tp =
136 (struct yaffs_packed_tags2_tags_only *)&data[dev->
137 data_bytes_per_chunk];
138 yaffs_unpack_tags2_tags_only(tags, pt2tp);
139 }
140 } else {
141 if (tags) {
142 memcpy(packed_tags_ptr,
143 yaffs_dev_to_lc(dev)->spare_buffer,
144 packed_tags_size);
145 yaffs_unpack_tags2(tags, &pt, !dev->param.no_tags_ecc);
146 }
147 }
148
149 if (local_data)
150 yaffs_release_temp_buffer(dev, data, __LINE__);
151
152 if (tags && retval == -EBADMSG
153 && tags->ecc_result == YAFFS_ECC_RESULT_NO_ERROR) {
154 tags->ecc_result = YAFFS_ECC_RESULT_UNFIXED;
155 dev->n_ecc_unfixed++;
156 }
157 if (tags && retval == -EUCLEAN
158 && tags->ecc_result == YAFFS_ECC_RESULT_NO_ERROR) {
159 tags->ecc_result = YAFFS_ECC_RESULT_FIXED;
160 dev->n_ecc_fixed++;
161 }
162 if (retval == 0)
163 return YAFFS_OK;
164 else
165 return YAFFS_FAIL;
166}
167
168int nandmtd2_mark_block_bad(struct yaffs_dev *dev, int block_no)
169{
170 struct mtd_info *mtd = yaffs_dev_to_mtd(dev);
171 int retval;
172 yaffs_trace(YAFFS_TRACE_MTD,
173 "nandmtd2_mark_block_bad %d", block_no);
174
175 retval =
176 mtd->block_markbad(mtd,
177 block_no * dev->param.chunks_per_block *
178 dev->param.total_bytes_per_chunk);
179
180 if (retval == 0)
181 return YAFFS_OK;
182 else
183 return YAFFS_FAIL;
184
185}
186
187int nandmtd2_query_block(struct yaffs_dev *dev, int block_no,
188 enum yaffs_block_state *state, u32 * seq_number)
189{
190 struct mtd_info *mtd = yaffs_dev_to_mtd(dev);
191 int retval;
192
193 yaffs_trace(YAFFS_TRACE_MTD, "nandmtd2_query_block %d", block_no);
194 retval =
195 mtd->block_isbad(mtd,
196 block_no * dev->param.chunks_per_block *
197 dev->param.total_bytes_per_chunk);
198
199 if (retval) {
200 yaffs_trace(YAFFS_TRACE_MTD, "block is bad");
201
202 *state = YAFFS_BLOCK_STATE_DEAD;
203 *seq_number = 0;
204 } else {
205 struct yaffs_ext_tags t;
206 nandmtd2_read_chunk_tags(dev, block_no *
207 dev->param.chunks_per_block, NULL, &t);
208
209 if (t.chunk_used) {
210 *seq_number = t.seq_number;
211 *state = YAFFS_BLOCK_STATE_NEEDS_SCANNING;
212 } else {
213 *seq_number = 0;
214 *state = YAFFS_BLOCK_STATE_EMPTY;
215 }
216 }
217 yaffs_trace(YAFFS_TRACE_MTD,
218 "block is bad seq %d state %d", *seq_number, *state);
219
220 if (retval == 0)
221 return YAFFS_OK;
222 else
223 return YAFFS_FAIL;
224}
225
diff --git a/fs/yaffs2/yaffs_mtdif2.h b/fs/yaffs2/yaffs_mtdif2.h
new file mode 100644
index 00000000000..d82112610d0
--- /dev/null
+++ b/fs/yaffs2/yaffs_mtdif2.h
@@ -0,0 +1,29 @@
1/*
2 * YAFFS: Yet another Flash File System . A NAND-flash specific file system.
3 *
4 * Copyright (C) 2002-2010 Aleph One Ltd.
5 * for Toby Churchill Ltd and Brightstar Engineering
6 *
7 * Created by Charles Manning <charles@aleph1.co.uk>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU Lesser General Public License version 2.1 as
11 * published by the Free Software Foundation.
12 *
13 * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL.
14 */
15
16#ifndef __YAFFS_MTDIF2_H__
17#define __YAFFS_MTDIF2_H__
18
19#include "yaffs_guts.h"
20int nandmtd2_write_chunk_tags(struct yaffs_dev *dev, int nand_chunk,
21 const u8 * data,
22 const struct yaffs_ext_tags *tags);
23int nandmtd2_read_chunk_tags(struct yaffs_dev *dev, int nand_chunk,
24 u8 * data, struct yaffs_ext_tags *tags);
25int nandmtd2_mark_block_bad(struct yaffs_dev *dev, int block_no);
26int nandmtd2_query_block(struct yaffs_dev *dev, int block_no,
27 enum yaffs_block_state *state, u32 * seq_number);
28
29#endif
diff --git a/fs/yaffs2/yaffs_nameval.c b/fs/yaffs2/yaffs_nameval.c
new file mode 100644
index 00000000000..daa36f989d3
--- /dev/null
+++ b/fs/yaffs2/yaffs_nameval.c
@@ -0,0 +1,201 @@
1/*
2 * YAFFS: Yet Another Flash File System. A NAND-flash specific file system.
3 *
4 * Copyright (C) 2002-2010 Aleph One Ltd.
5 * for Toby Churchill Ltd and Brightstar Engineering
6 *
7 * Created by Charles Manning <charles@aleph1.co.uk>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 */
13
14/*
15 * This simple implementation of a name-value store assumes a small number of values and fits
16 * into a small finite buffer.
17 *
18 * Each attribute is stored as a record:
19 * sizeof(int) bytes record size.
20 * strnlen+1 bytes name null terminated.
21 * nbytes value.
22 * ----------
23 * total size stored in record size
24 *
25 * This code has not been tested with unicode yet.
26 */
27
28#include "yaffs_nameval.h"
29
30#include "yportenv.h"
31
32static int nval_find(const char *xb, int xb_size, const YCHAR * name,
33 int *exist_size)
34{
35 int pos = 0;
36 int size;
37
38 memcpy(&size, xb, sizeof(int));
39 while (size > 0 && (size < xb_size) && (pos + size < xb_size)) {
40 if (strncmp
41 ((YCHAR *) (xb + pos + sizeof(int)), name, size) == 0) {
42 if (exist_size)
43 *exist_size = size;
44 return pos;
45 }
46 pos += size;
47 if (pos < xb_size - sizeof(int))
48 memcpy(&size, xb + pos, sizeof(int));
49 else
50 size = 0;
51 }
52 if (exist_size)
53 *exist_size = 0;
54 return -1;
55}
56
57static int nval_used(const char *xb, int xb_size)
58{
59 int pos = 0;
60 int size;
61
62 memcpy(&size, xb + pos, sizeof(int));
63 while (size > 0 && (size < xb_size) && (pos + size < xb_size)) {
64 pos += size;
65 if (pos < xb_size - sizeof(int))
66 memcpy(&size, xb + pos, sizeof(int));
67 else
68 size = 0;
69 }
70 return pos;
71}
72
73int nval_del(char *xb, int xb_size, const YCHAR * name)
74{
75 int pos = nval_find(xb, xb_size, name, NULL);
76 int size;
77
78 if (pos >= 0 && pos < xb_size) {
79 /* Find size, shift rest over this record, then zero out the rest of buffer */
80 memcpy(&size, xb + pos, sizeof(int));
81 memcpy(xb + pos, xb + pos + size, xb_size - (pos + size));
82 memset(xb + (xb_size - size), 0, size);
83 return 0;
84 } else {
85 return -ENODATA;
86 }
87}
88
89int nval_set(char *xb, int xb_size, const YCHAR * name, const char *buf,
90 int bsize, int flags)
91{
92 int pos;
93 int namelen = strnlen(name, xb_size);
94 int reclen;
95 int size_exist = 0;
96 int space;
97 int start;
98
99 pos = nval_find(xb, xb_size, name, &size_exist);
100
101 if (flags & XATTR_CREATE && pos >= 0)
102 return -EEXIST;
103 if (flags & XATTR_REPLACE && pos < 0)
104 return -ENODATA;
105
106 start = nval_used(xb, xb_size);
107 space = xb_size - start + size_exist;
108
109 reclen = (sizeof(int) + namelen + 1 + bsize);
110
111 if (reclen > space)
112 return -ENOSPC;
113
114 if (pos >= 0) {
115 nval_del(xb, xb_size, name);
116 start = nval_used(xb, xb_size);
117 }
118
119 pos = start;
120
121 memcpy(xb + pos, &reclen, sizeof(int));
122 pos += sizeof(int);
123 strncpy((YCHAR *) (xb + pos), name, reclen);
124 pos += (namelen + 1);
125 memcpy(xb + pos, buf, bsize);
126 return 0;
127}
128
129int nval_get(const char *xb, int xb_size, const YCHAR * name, char *buf,
130 int bsize)
131{
132 int pos = nval_find(xb, xb_size, name, NULL);
133 int size;
134
135 if (pos >= 0 && pos < xb_size) {
136
137 memcpy(&size, xb + pos, sizeof(int));
138 pos += sizeof(int); /* advance past record length */
139 size -= sizeof(int);
140
141 /* Advance over name string */
142 while (xb[pos] && size > 0 && pos < xb_size) {
143 pos++;
144 size--;
145 }
146 /*Advance over NUL */
147 pos++;
148 size--;
149
150 if (size <= bsize) {
151 memcpy(buf, xb + pos, size);
152 return size;
153 }
154
155 }
156 if (pos >= 0)
157 return -ERANGE;
158 else
159 return -ENODATA;
160}
161
162int nval_list(const char *xb, int xb_size, char *buf, int bsize)
163{
164 int pos = 0;
165 int size;
166 int name_len;
167 int ncopied = 0;
168 int filled = 0;
169
170 memcpy(&size, xb + pos, sizeof(int));
171 while (size > sizeof(int) && size <= xb_size && (pos + size) < xb_size
172 && !filled) {
173 pos += sizeof(int);
174 size -= sizeof(int);
175 name_len = strnlen((YCHAR *) (xb + pos), size);
176 if (ncopied + name_len + 1 < bsize) {
177 memcpy(buf, xb + pos, name_len * sizeof(YCHAR));
178 buf += name_len;
179 *buf = '\0';
180 buf++;
181 if (sizeof(YCHAR) > 1) {
182 *buf = '\0';
183 buf++;
184 }
185 ncopied += (name_len + 1);
186 } else {
187 filled = 1;
188 }
189 pos += size;
190 if (pos < xb_size - sizeof(int))
191 memcpy(&size, xb + pos, sizeof(int));
192 else
193 size = 0;
194 }
195 return ncopied;
196}
197
198int nval_hasvalues(const char *xb, int xb_size)
199{
200 return nval_used(xb, xb_size) > 0;
201}
diff --git a/fs/yaffs2/yaffs_nameval.h b/fs/yaffs2/yaffs_nameval.h
new file mode 100644
index 00000000000..2bb02b62762
--- /dev/null
+++ b/fs/yaffs2/yaffs_nameval.h
@@ -0,0 +1,28 @@
1/*
2 * YAFFS: Yet another Flash File System . A NAND-flash specific file system.
3 *
4 * Copyright (C) 2002-2010 Aleph One Ltd.
5 * for Toby Churchill Ltd and Brightstar Engineering
6 *
7 * Created by Charles Manning <charles@aleph1.co.uk>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU Lesser General Public License version 2.1 as
11 * published by the Free Software Foundation.
12 *
13 * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL.
14 */
15
16#ifndef __NAMEVAL_H__
17#define __NAMEVAL_H__
18
19#include "yportenv.h"
20
21int nval_del(char *xb, int xb_size, const YCHAR * name);
22int nval_set(char *xb, int xb_size, const YCHAR * name, const char *buf,
23 int bsize, int flags);
24int nval_get(const char *xb, int xb_size, const YCHAR * name, char *buf,
25 int bsize);
26int nval_list(const char *xb, int xb_size, char *buf, int bsize);
27int nval_hasvalues(const char *xb, int xb_size);
28#endif
diff --git a/fs/yaffs2/yaffs_nand.c b/fs/yaffs2/yaffs_nand.c
new file mode 100644
index 00000000000..e816cabf43f
--- /dev/null
+++ b/fs/yaffs2/yaffs_nand.c
@@ -0,0 +1,127 @@
1/*
2 * YAFFS: Yet Another Flash File System. A NAND-flash specific file system.
3 *
4 * Copyright (C) 2002-2010 Aleph One Ltd.
5 * for Toby Churchill Ltd and Brightstar Engineering
6 *
7 * Created by Charles Manning <charles@aleph1.co.uk>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 */
13
14#include "yaffs_nand.h"
15#include "yaffs_tagscompat.h"
16#include "yaffs_tagsvalidity.h"
17
18#include "yaffs_getblockinfo.h"
19
20int yaffs_rd_chunk_tags_nand(struct yaffs_dev *dev, int nand_chunk,
21 u8 * buffer, struct yaffs_ext_tags *tags)
22{
23 int result;
24 struct yaffs_ext_tags local_tags;
25
26 int realigned_chunk = nand_chunk - dev->chunk_offset;
27
28 dev->n_page_reads++;
29
30 /* If there are no tags provided, use local tags to get prioritised gc working */
31 if (!tags)
32 tags = &local_tags;
33
34 if (dev->param.read_chunk_tags_fn)
35 result =
36 dev->param.read_chunk_tags_fn(dev, realigned_chunk, buffer,
37 tags);
38 else
39 result = yaffs_tags_compat_rd(dev,
40 realigned_chunk, buffer, tags);
41 if (tags && tags->ecc_result > YAFFS_ECC_RESULT_NO_ERROR) {
42
43 struct yaffs_block_info *bi;
44 bi = yaffs_get_block_info(dev,
45 nand_chunk /
46 dev->param.chunks_per_block);
47 yaffs_handle_chunk_error(dev, bi);
48 }
49
50 return result;
51}
52
53int yaffs_wr_chunk_tags_nand(struct yaffs_dev *dev,
54 int nand_chunk,
55 const u8 * buffer, struct yaffs_ext_tags *tags)
56{
57
58 dev->n_page_writes++;
59
60 nand_chunk -= dev->chunk_offset;
61
62 if (tags) {
63 tags->seq_number = dev->seq_number;
64 tags->chunk_used = 1;
65 if (!yaffs_validate_tags(tags)) {
66 yaffs_trace(YAFFS_TRACE_ERROR, "Writing uninitialised tags");
67 YBUG();
68 }
69 yaffs_trace(YAFFS_TRACE_WRITE,
70 "Writing chunk %d tags %d %d",
71 nand_chunk, tags->obj_id, tags->chunk_id);
72 } else {
73 yaffs_trace(YAFFS_TRACE_ERROR, "Writing with no tags");
74 YBUG();
75 }
76
77 if (dev->param.write_chunk_tags_fn)
78 return dev->param.write_chunk_tags_fn(dev, nand_chunk, buffer,
79 tags);
80 else
81 return yaffs_tags_compat_wr(dev, nand_chunk, buffer, tags);
82}
83
84int yaffs_mark_bad(struct yaffs_dev *dev, int block_no)
85{
86 block_no -= dev->block_offset;
87
88 if (dev->param.bad_block_fn)
89 return dev->param.bad_block_fn(dev, block_no);
90 else
91 return yaffs_tags_compat_mark_bad(dev, block_no);
92}
93
94int yaffs_query_init_block_state(struct yaffs_dev *dev,
95 int block_no,
96 enum yaffs_block_state *state,
97 u32 * seq_number)
98{
99 block_no -= dev->block_offset;
100
101 if (dev->param.query_block_fn)
102 return dev->param.query_block_fn(dev, block_no, state,
103 seq_number);
104 else
105 return yaffs_tags_compat_query_block(dev, block_no,
106 state, seq_number);
107}
108
109int yaffs_erase_block(struct yaffs_dev *dev, int flash_block)
110{
111 int result;
112
113 flash_block -= dev->block_offset;
114
115 dev->n_erasures++;
116
117 result = dev->param.erase_fn(dev, flash_block);
118
119 return result;
120}
121
122int yaffs_init_nand(struct yaffs_dev *dev)
123{
124 if (dev->param.initialise_flash_fn)
125 return dev->param.initialise_flash_fn(dev);
126 return YAFFS_OK;
127}
diff --git a/fs/yaffs2/yaffs_nand.h b/fs/yaffs2/yaffs_nand.h
new file mode 100644
index 00000000000..543f1987124
--- /dev/null
+++ b/fs/yaffs2/yaffs_nand.h
@@ -0,0 +1,38 @@
1/*
2 * YAFFS: Yet another Flash File System . A NAND-flash specific file system.
3 *
4 * Copyright (C) 2002-2010 Aleph One Ltd.
5 * for Toby Churchill Ltd and Brightstar Engineering
6 *
7 * Created by Charles Manning <charles@aleph1.co.uk>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU Lesser General Public License version 2.1 as
11 * published by the Free Software Foundation.
12 *
13 * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL.
14 */
15
16#ifndef __YAFFS_NAND_H__
17#define __YAFFS_NAND_H__
18#include "yaffs_guts.h"
19
20int yaffs_rd_chunk_tags_nand(struct yaffs_dev *dev, int nand_chunk,
21 u8 * buffer, struct yaffs_ext_tags *tags);
22
23int yaffs_wr_chunk_tags_nand(struct yaffs_dev *dev,
24 int nand_chunk,
25 const u8 * buffer, struct yaffs_ext_tags *tags);
26
27int yaffs_mark_bad(struct yaffs_dev *dev, int block_no);
28
29int yaffs_query_init_block_state(struct yaffs_dev *dev,
30 int block_no,
31 enum yaffs_block_state *state,
32 unsigned *seq_number);
33
34int yaffs_erase_block(struct yaffs_dev *dev, int flash_block);
35
36int yaffs_init_nand(struct yaffs_dev *dev);
37
38#endif
diff --git a/fs/yaffs2/yaffs_packedtags1.c b/fs/yaffs2/yaffs_packedtags1.c
new file mode 100644
index 00000000000..a77f0954fc1
--- /dev/null
+++ b/fs/yaffs2/yaffs_packedtags1.c
@@ -0,0 +1,53 @@
1/*
2 * YAFFS: Yet Another Flash File System. A NAND-flash specific file system.
3 *
4 * Copyright (C) 2002-2010 Aleph One Ltd.
5 * for Toby Churchill Ltd and Brightstar Engineering
6 *
7 * Created by Charles Manning <charles@aleph1.co.uk>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 */
13
14#include "yaffs_packedtags1.h"
15#include "yportenv.h"
16
17void yaffs_pack_tags1(struct yaffs_packed_tags1 *pt,
18 const struct yaffs_ext_tags *t)
19{
20 pt->chunk_id = t->chunk_id;
21 pt->serial_number = t->serial_number;
22 pt->n_bytes = t->n_bytes;
23 pt->obj_id = t->obj_id;
24 pt->ecc = 0;
25 pt->deleted = (t->is_deleted) ? 0 : 1;
26 pt->unused_stuff = 0;
27 pt->should_be_ff = 0xFFFFFFFF;
28
29}
30
31void yaffs_unpack_tags1(struct yaffs_ext_tags *t,
32 const struct yaffs_packed_tags1 *pt)
33{
34 static const u8 all_ff[] =
35 { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
36 0xff
37 };
38
39 if (memcmp(all_ff, pt, sizeof(struct yaffs_packed_tags1))) {
40 t->block_bad = 0;
41 if (pt->should_be_ff != 0xFFFFFFFF)
42 t->block_bad = 1;
43 t->chunk_used = 1;
44 t->obj_id = pt->obj_id;
45 t->chunk_id = pt->chunk_id;
46 t->n_bytes = pt->n_bytes;
47 t->ecc_result = YAFFS_ECC_RESULT_NO_ERROR;
48 t->is_deleted = (pt->deleted) ? 0 : 1;
49 t->serial_number = pt->serial_number;
50 } else {
51 memset(t, 0, sizeof(struct yaffs_ext_tags));
52 }
53}
diff --git a/fs/yaffs2/yaffs_packedtags1.h b/fs/yaffs2/yaffs_packedtags1.h
new file mode 100644
index 00000000000..d6861ff505e
--- /dev/null
+++ b/fs/yaffs2/yaffs_packedtags1.h
@@ -0,0 +1,39 @@
1/*
2 * YAFFS: Yet another Flash File System . A NAND-flash specific file system.
3 *
4 * Copyright (C) 2002-2010 Aleph One Ltd.
5 * for Toby Churchill Ltd and Brightstar Engineering
6 *
7 * Created by Charles Manning <charles@aleph1.co.uk>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU Lesser General Public License version 2.1 as
11 * published by the Free Software Foundation.
12 *
13 * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL.
14 */
15
16/* This is used to pack YAFFS1 tags, not YAFFS2 tags. */
17
18#ifndef __YAFFS_PACKEDTAGS1_H__
19#define __YAFFS_PACKEDTAGS1_H__
20
21#include "yaffs_guts.h"
22
23struct yaffs_packed_tags1 {
24 unsigned chunk_id:20;
25 unsigned serial_number:2;
26 unsigned n_bytes:10;
27 unsigned obj_id:18;
28 unsigned ecc:12;
29 unsigned deleted:1;
30 unsigned unused_stuff:1;
31 unsigned should_be_ff;
32
33};
34
35void yaffs_pack_tags1(struct yaffs_packed_tags1 *pt,
36 const struct yaffs_ext_tags *t);
37void yaffs_unpack_tags1(struct yaffs_ext_tags *t,
38 const struct yaffs_packed_tags1 *pt);
39#endif
diff --git a/fs/yaffs2/yaffs_packedtags2.c b/fs/yaffs2/yaffs_packedtags2.c
new file mode 100644
index 00000000000..8e7fea3d286
--- /dev/null
+++ b/fs/yaffs2/yaffs_packedtags2.c
@@ -0,0 +1,196 @@
1/*
2 * YAFFS: Yet Another Flash File System. A NAND-flash specific file system.
3 *
4 * Copyright (C) 2002-2010 Aleph One Ltd.
5 * for Toby Churchill Ltd and Brightstar Engineering
6 *
7 * Created by Charles Manning <charles@aleph1.co.uk>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 */
13
14#include "yaffs_packedtags2.h"
15#include "yportenv.h"
16#include "yaffs_trace.h"
17#include "yaffs_tagsvalidity.h"
18
19/* This code packs a set of extended tags into a binary structure for
20 * NAND storage
21 */
22
23/* Some of the information is "extra" struff which can be packed in to
24 * speed scanning
25 * This is defined by having the EXTRA_HEADER_INFO_FLAG set.
26 */
27
28/* Extra flags applied to chunk_id */
29
30#define EXTRA_HEADER_INFO_FLAG 0x80000000
31#define EXTRA_SHRINK_FLAG 0x40000000
32#define EXTRA_SHADOWS_FLAG 0x20000000
33#define EXTRA_SPARE_FLAGS 0x10000000
34
35#define ALL_EXTRA_FLAGS 0xF0000000
36
37/* Also, the top 4 bits of the object Id are set to the object type. */
38#define EXTRA_OBJECT_TYPE_SHIFT (28)
39#define EXTRA_OBJECT_TYPE_MASK ((0x0F) << EXTRA_OBJECT_TYPE_SHIFT)
40
41static void yaffs_dump_packed_tags2_tags_only(const struct
42 yaffs_packed_tags2_tags_only *ptt)
43{
44 yaffs_trace(YAFFS_TRACE_MTD,
45 "packed tags obj %d chunk %d byte %d seq %d",
46 ptt->obj_id, ptt->chunk_id, ptt->n_bytes, ptt->seq_number);
47}
48
49static void yaffs_dump_packed_tags2(const struct yaffs_packed_tags2 *pt)
50{
51 yaffs_dump_packed_tags2_tags_only(&pt->t);
52}
53
54static void yaffs_dump_tags2(const struct yaffs_ext_tags *t)
55{
56 yaffs_trace(YAFFS_TRACE_MTD,
57 "ext.tags eccres %d blkbad %d chused %d obj %d chunk%d byte %d del %d ser %d seq %d",
58 t->ecc_result, t->block_bad, t->chunk_used, t->obj_id,
59 t->chunk_id, t->n_bytes, t->is_deleted, t->serial_number,
60 t->seq_number);
61
62}
63
64void yaffs_pack_tags2_tags_only(struct yaffs_packed_tags2_tags_only *ptt,
65 const struct yaffs_ext_tags *t)
66{
67 ptt->chunk_id = t->chunk_id;
68 ptt->seq_number = t->seq_number;
69 ptt->n_bytes = t->n_bytes;
70 ptt->obj_id = t->obj_id;
71
72 if (t->chunk_id == 0 && t->extra_available) {
73 /* Store the extra header info instead */
74 /* We save the parent object in the chunk_id */
75 ptt->chunk_id = EXTRA_HEADER_INFO_FLAG | t->extra_parent_id;
76 if (t->extra_is_shrink)
77 ptt->chunk_id |= EXTRA_SHRINK_FLAG;
78 if (t->extra_shadows)
79 ptt->chunk_id |= EXTRA_SHADOWS_FLAG;
80
81 ptt->obj_id &= ~EXTRA_OBJECT_TYPE_MASK;
82 ptt->obj_id |= (t->extra_obj_type << EXTRA_OBJECT_TYPE_SHIFT);
83
84 if (t->extra_obj_type == YAFFS_OBJECT_TYPE_HARDLINK)
85 ptt->n_bytes = t->extra_equiv_id;
86 else if (t->extra_obj_type == YAFFS_OBJECT_TYPE_FILE)
87 ptt->n_bytes = t->extra_length;
88 else
89 ptt->n_bytes = 0;
90 }
91
92 yaffs_dump_packed_tags2_tags_only(ptt);
93 yaffs_dump_tags2(t);
94}
95
96void yaffs_pack_tags2(struct yaffs_packed_tags2 *pt,
97 const struct yaffs_ext_tags *t, int tags_ecc)
98{
99 yaffs_pack_tags2_tags_only(&pt->t, t);
100
101 if (tags_ecc)
102 yaffs_ecc_calc_other((unsigned char *)&pt->t,
103 sizeof(struct
104 yaffs_packed_tags2_tags_only),
105 &pt->ecc);
106}
107
108void yaffs_unpack_tags2_tags_only(struct yaffs_ext_tags *t,
109 struct yaffs_packed_tags2_tags_only *ptt)
110{
111
112 memset(t, 0, sizeof(struct yaffs_ext_tags));
113
114 yaffs_init_tags(t);
115
116 if (ptt->seq_number != 0xFFFFFFFF) {
117 t->block_bad = 0;
118 t->chunk_used = 1;
119 t->obj_id = ptt->obj_id;
120 t->chunk_id = ptt->chunk_id;
121 t->n_bytes = ptt->n_bytes;
122 t->is_deleted = 0;
123 t->serial_number = 0;
124 t->seq_number = ptt->seq_number;
125
126 /* Do extra header info stuff */
127
128 if (ptt->chunk_id & EXTRA_HEADER_INFO_FLAG) {
129 t->chunk_id = 0;
130 t->n_bytes = 0;
131
132 t->extra_available = 1;
133 t->extra_parent_id =
134 ptt->chunk_id & (~(ALL_EXTRA_FLAGS));
135 t->extra_is_shrink =
136 (ptt->chunk_id & EXTRA_SHRINK_FLAG) ? 1 : 0;
137 t->extra_shadows =
138 (ptt->chunk_id & EXTRA_SHADOWS_FLAG) ? 1 : 0;
139 t->extra_obj_type =
140 ptt->obj_id >> EXTRA_OBJECT_TYPE_SHIFT;
141 t->obj_id &= ~EXTRA_OBJECT_TYPE_MASK;
142
143 if (t->extra_obj_type == YAFFS_OBJECT_TYPE_HARDLINK)
144 t->extra_equiv_id = ptt->n_bytes;
145 else
146 t->extra_length = ptt->n_bytes;
147 }
148 }
149
150 yaffs_dump_packed_tags2_tags_only(ptt);
151 yaffs_dump_tags2(t);
152
153}
154
155void yaffs_unpack_tags2(struct yaffs_ext_tags *t, struct yaffs_packed_tags2 *pt,
156 int tags_ecc)
157{
158
159 enum yaffs_ecc_result ecc_result = YAFFS_ECC_RESULT_NO_ERROR;
160
161 if (pt->t.seq_number != 0xFFFFFFFF && tags_ecc) {
162 /* Chunk is in use and we need to do ECC */
163
164 struct yaffs_ecc_other ecc;
165 int result;
166 yaffs_ecc_calc_other((unsigned char *)&pt->t,
167 sizeof(struct
168 yaffs_packed_tags2_tags_only),
169 &ecc);
170 result =
171 yaffs_ecc_correct_other((unsigned char *)&pt->t,
172 sizeof(struct
173 yaffs_packed_tags2_tags_only),
174 &pt->ecc, &ecc);
175 switch (result) {
176 case 0:
177 ecc_result = YAFFS_ECC_RESULT_NO_ERROR;
178 break;
179 case 1:
180 ecc_result = YAFFS_ECC_RESULT_FIXED;
181 break;
182 case -1:
183 ecc_result = YAFFS_ECC_RESULT_UNFIXED;
184 break;
185 default:
186 ecc_result = YAFFS_ECC_RESULT_UNKNOWN;
187 }
188 }
189
190 yaffs_unpack_tags2_tags_only(t, &pt->t);
191
192 t->ecc_result = ecc_result;
193
194 yaffs_dump_packed_tags2(pt);
195 yaffs_dump_tags2(t);
196}
diff --git a/fs/yaffs2/yaffs_packedtags2.h b/fs/yaffs2/yaffs_packedtags2.h
new file mode 100644
index 00000000000..f3296697bc0
--- /dev/null
+++ b/fs/yaffs2/yaffs_packedtags2.h
@@ -0,0 +1,47 @@
1/*
2 * YAFFS: Yet another Flash File System . A NAND-flash specific file system.
3 *
4 * Copyright (C) 2002-2010 Aleph One Ltd.
5 * for Toby Churchill Ltd and Brightstar Engineering
6 *
7 * Created by Charles Manning <charles@aleph1.co.uk>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU Lesser General Public License version 2.1 as
11 * published by the Free Software Foundation.
12 *
13 * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL.
14 */
15
16/* This is used to pack YAFFS2 tags, not YAFFS1tags. */
17
18#ifndef __YAFFS_PACKEDTAGS2_H__
19#define __YAFFS_PACKEDTAGS2_H__
20
21#include "yaffs_guts.h"
22#include "yaffs_ecc.h"
23
24struct yaffs_packed_tags2_tags_only {
25 unsigned seq_number;
26 unsigned obj_id;
27 unsigned chunk_id;
28 unsigned n_bytes;
29};
30
31struct yaffs_packed_tags2 {
32 struct yaffs_packed_tags2_tags_only t;
33 struct yaffs_ecc_other ecc;
34};
35
36/* Full packed tags with ECC, used for oob tags */
37void yaffs_pack_tags2(struct yaffs_packed_tags2 *pt,
38 const struct yaffs_ext_tags *t, int tags_ecc);
39void yaffs_unpack_tags2(struct yaffs_ext_tags *t, struct yaffs_packed_tags2 *pt,
40 int tags_ecc);
41
42/* Only the tags part (no ECC for use with inband tags */
43void yaffs_pack_tags2_tags_only(struct yaffs_packed_tags2_tags_only *pt,
44 const struct yaffs_ext_tags *t);
45void yaffs_unpack_tags2_tags_only(struct yaffs_ext_tags *t,
46 struct yaffs_packed_tags2_tags_only *pt);
47#endif
diff --git a/fs/yaffs2/yaffs_tagscompat.c b/fs/yaffs2/yaffs_tagscompat.c
new file mode 100644
index 00000000000..7578075d9ac
--- /dev/null
+++ b/fs/yaffs2/yaffs_tagscompat.c
@@ -0,0 +1,422 @@
1/*
2 * YAFFS: Yet Another Flash File System. A NAND-flash specific file system.
3 *
4 * Copyright (C) 2002-2010 Aleph One Ltd.
5 * for Toby Churchill Ltd and Brightstar Engineering
6 *
7 * Created by Charles Manning <charles@aleph1.co.uk>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 */
13
14#include "yaffs_guts.h"
15#include "yaffs_tagscompat.h"
16#include "yaffs_ecc.h"
17#include "yaffs_getblockinfo.h"
18#include "yaffs_trace.h"
19
20static void yaffs_handle_rd_data_error(struct yaffs_dev *dev, int nand_chunk);
21
22
23/********** Tags ECC calculations *********/
24
25void yaffs_calc_ecc(const u8 * data, struct yaffs_spare *spare)
26{
27 yaffs_ecc_cacl(data, spare->ecc1);
28 yaffs_ecc_cacl(&data[256], spare->ecc2);
29}
30
31void yaffs_calc_tags_ecc(struct yaffs_tags *tags)
32{
33 /* Calculate an ecc */
34
35 unsigned char *b = ((union yaffs_tags_union *)tags)->as_bytes;
36 unsigned i, j;
37 unsigned ecc = 0;
38 unsigned bit = 0;
39
40 tags->ecc = 0;
41
42 for (i = 0; i < 8; i++) {
43 for (j = 1; j & 0xff; j <<= 1) {
44 bit++;
45 if (b[i] & j)
46 ecc ^= bit;
47 }
48 }
49
50 tags->ecc = ecc;
51
52}
53
54int yaffs_check_tags_ecc(struct yaffs_tags *tags)
55{
56 unsigned ecc = tags->ecc;
57
58 yaffs_calc_tags_ecc(tags);
59
60 ecc ^= tags->ecc;
61
62 if (ecc && ecc <= 64) {
63 /* TODO: Handle the failure better. Retire? */
64 unsigned char *b = ((union yaffs_tags_union *)tags)->as_bytes;
65
66 ecc--;
67
68 b[ecc / 8] ^= (1 << (ecc & 7));
69
70 /* Now recvalc the ecc */
71 yaffs_calc_tags_ecc(tags);
72
73 return 1; /* recovered error */
74 } else if (ecc) {
75 /* Wierd ecc failure value */
76 /* TODO Need to do somethiong here */
77 return -1; /* unrecovered error */
78 }
79
80 return 0;
81}
82
83/********** Tags **********/
84
85static void yaffs_load_tags_to_spare(struct yaffs_spare *spare_ptr,
86 struct yaffs_tags *tags_ptr)
87{
88 union yaffs_tags_union *tu = (union yaffs_tags_union *)tags_ptr;
89
90 yaffs_calc_tags_ecc(tags_ptr);
91
92 spare_ptr->tb0 = tu->as_bytes[0];
93 spare_ptr->tb1 = tu->as_bytes[1];
94 spare_ptr->tb2 = tu->as_bytes[2];
95 spare_ptr->tb3 = tu->as_bytes[3];
96 spare_ptr->tb4 = tu->as_bytes[4];
97 spare_ptr->tb5 = tu->as_bytes[5];
98 spare_ptr->tb6 = tu->as_bytes[6];
99 spare_ptr->tb7 = tu->as_bytes[7];
100}
101
102static void yaffs_get_tags_from_spare(struct yaffs_dev *dev,
103 struct yaffs_spare *spare_ptr,
104 struct yaffs_tags *tags_ptr)
105{
106 union yaffs_tags_union *tu = (union yaffs_tags_union *)tags_ptr;
107 int result;
108
109 tu->as_bytes[0] = spare_ptr->tb0;
110 tu->as_bytes[1] = spare_ptr->tb1;
111 tu->as_bytes[2] = spare_ptr->tb2;
112 tu->as_bytes[3] = spare_ptr->tb3;
113 tu->as_bytes[4] = spare_ptr->tb4;
114 tu->as_bytes[5] = spare_ptr->tb5;
115 tu->as_bytes[6] = spare_ptr->tb6;
116 tu->as_bytes[7] = spare_ptr->tb7;
117
118 result = yaffs_check_tags_ecc(tags_ptr);
119 if (result > 0)
120 dev->n_tags_ecc_fixed++;
121 else if (result < 0)
122 dev->n_tags_ecc_unfixed++;
123}
124
125static void yaffs_spare_init(struct yaffs_spare *spare)
126{
127 memset(spare, 0xFF, sizeof(struct yaffs_spare));
128}
129
130static int yaffs_wr_nand(struct yaffs_dev *dev,
131 int nand_chunk, const u8 * data,
132 struct yaffs_spare *spare)
133{
134 if (nand_chunk < dev->param.start_block * dev->param.chunks_per_block) {
135 yaffs_trace(YAFFS_TRACE_ERROR,
136 "**>> yaffs chunk %d is not valid",
137 nand_chunk);
138 return YAFFS_FAIL;
139 }
140
141 return dev->param.write_chunk_fn(dev, nand_chunk, data, spare);
142}
143
144static int yaffs_rd_chunk_nand(struct yaffs_dev *dev,
145 int nand_chunk,
146 u8 * data,
147 struct yaffs_spare *spare,
148 enum yaffs_ecc_result *ecc_result,
149 int correct_errors)
150{
151 int ret_val;
152 struct yaffs_spare local_spare;
153
154 if (!spare && data) {
155 /* If we don't have a real spare, then we use a local one. */
156 /* Need this for the calculation of the ecc */
157 spare = &local_spare;
158 }
159
160 if (!dev->param.use_nand_ecc) {
161 ret_val =
162 dev->param.read_chunk_fn(dev, nand_chunk, data, spare);
163 if (data && correct_errors) {
164 /* Do ECC correction */
165 /* Todo handle any errors */
166 int ecc_result1, ecc_result2;
167 u8 calc_ecc[3];
168
169 yaffs_ecc_cacl(data, calc_ecc);
170 ecc_result1 =
171 yaffs_ecc_correct(data, spare->ecc1, calc_ecc);
172 yaffs_ecc_cacl(&data[256], calc_ecc);
173 ecc_result2 =
174 yaffs_ecc_correct(&data[256], spare->ecc2,
175 calc_ecc);
176
177 if (ecc_result1 > 0) {
178 yaffs_trace(YAFFS_TRACE_ERROR,
179 "**>>yaffs ecc error fix performed on chunk %d:0",
180 nand_chunk);
181 dev->n_ecc_fixed++;
182 } else if (ecc_result1 < 0) {
183 yaffs_trace(YAFFS_TRACE_ERROR,
184 "**>>yaffs ecc error unfixed on chunk %d:0",
185 nand_chunk);
186 dev->n_ecc_unfixed++;
187 }
188
189 if (ecc_result2 > 0) {
190 yaffs_trace(YAFFS_TRACE_ERROR,
191 "**>>yaffs ecc error fix performed on chunk %d:1",
192 nand_chunk);
193 dev->n_ecc_fixed++;
194 } else if (ecc_result2 < 0) {
195 yaffs_trace(YAFFS_TRACE_ERROR,
196 "**>>yaffs ecc error unfixed on chunk %d:1",
197 nand_chunk);
198 dev->n_ecc_unfixed++;
199 }
200
201 if (ecc_result1 || ecc_result2) {
202 /* We had a data problem on this page */
203 yaffs_handle_rd_data_error(dev, nand_chunk);
204 }
205
206 if (ecc_result1 < 0 || ecc_result2 < 0)
207 *ecc_result = YAFFS_ECC_RESULT_UNFIXED;
208 else if (ecc_result1 > 0 || ecc_result2 > 0)
209 *ecc_result = YAFFS_ECC_RESULT_FIXED;
210 else
211 *ecc_result = YAFFS_ECC_RESULT_NO_ERROR;
212 }
213 } else {
214 /* Must allocate enough memory for spare+2*sizeof(int) */
215 /* for ecc results from device. */
216 struct yaffs_nand_spare nspare;
217
218 memset(&nspare, 0, sizeof(nspare));
219
220 ret_val = dev->param.read_chunk_fn(dev, nand_chunk, data,
221 (struct yaffs_spare *)
222 &nspare);
223 memcpy(spare, &nspare, sizeof(struct yaffs_spare));
224 if (data && correct_errors) {
225 if (nspare.eccres1 > 0) {
226 yaffs_trace(YAFFS_TRACE_ERROR,
227 "**>>mtd ecc error fix performed on chunk %d:0",
228 nand_chunk);
229 } else if (nspare.eccres1 < 0) {
230 yaffs_trace(YAFFS_TRACE_ERROR,
231 "**>>mtd ecc error unfixed on chunk %d:0",
232 nand_chunk);
233 }
234
235 if (nspare.eccres2 > 0) {
236 yaffs_trace(YAFFS_TRACE_ERROR,
237 "**>>mtd ecc error fix performed on chunk %d:1",
238 nand_chunk);
239 } else if (nspare.eccres2 < 0) {
240 yaffs_trace(YAFFS_TRACE_ERROR,
241 "**>>mtd ecc error unfixed on chunk %d:1",
242 nand_chunk);
243 }
244
245 if (nspare.eccres1 || nspare.eccres2) {
246 /* We had a data problem on this page */
247 yaffs_handle_rd_data_error(dev, nand_chunk);
248 }
249
250 if (nspare.eccres1 < 0 || nspare.eccres2 < 0)
251 *ecc_result = YAFFS_ECC_RESULT_UNFIXED;
252 else if (nspare.eccres1 > 0 || nspare.eccres2 > 0)
253 *ecc_result = YAFFS_ECC_RESULT_FIXED;
254 else
255 *ecc_result = YAFFS_ECC_RESULT_NO_ERROR;
256
257 }
258 }
259 return ret_val;
260}
261
262/*
263 * Functions for robustisizing
264 */
265
266static void yaffs_handle_rd_data_error(struct yaffs_dev *dev, int nand_chunk)
267{
268 int flash_block = nand_chunk / dev->param.chunks_per_block;
269
270 /* Mark the block for retirement */
271 yaffs_get_block_info(dev,
272 flash_block + dev->block_offset)->needs_retiring =
273 1;
274 yaffs_trace(YAFFS_TRACE_ERROR | YAFFS_TRACE_BAD_BLOCKS,
275 "**>>Block %d marked for retirement",
276 flash_block);
277
278 /* TODO:
279 * Just do a garbage collection on the affected block
280 * then retire the block
281 * NB recursion
282 */
283}
284
285int yaffs_tags_compat_wr(struct yaffs_dev *dev,
286 int nand_chunk,
287 const u8 * data, const struct yaffs_ext_tags *ext_tags)
288{
289 struct yaffs_spare spare;
290 struct yaffs_tags tags;
291
292 yaffs_spare_init(&spare);
293
294 if (ext_tags->is_deleted)
295 spare.page_status = 0;
296 else {
297 tags.obj_id = ext_tags->obj_id;
298 tags.chunk_id = ext_tags->chunk_id;
299
300 tags.n_bytes_lsb = ext_tags->n_bytes & 0x3ff;
301
302 if (dev->data_bytes_per_chunk >= 1024)
303 tags.n_bytes_msb = (ext_tags->n_bytes >> 10) & 3;
304 else
305 tags.n_bytes_msb = 3;
306
307 tags.serial_number = ext_tags->serial_number;
308
309 if (!dev->param.use_nand_ecc && data)
310 yaffs_calc_ecc(data, &spare);
311
312 yaffs_load_tags_to_spare(&spare, &tags);
313
314 }
315
316 return yaffs_wr_nand(dev, nand_chunk, data, &spare);
317}
318
319int yaffs_tags_compat_rd(struct yaffs_dev *dev,
320 int nand_chunk,
321 u8 * data, struct yaffs_ext_tags *ext_tags)
322{
323
324 struct yaffs_spare spare;
325 struct yaffs_tags tags;
326 enum yaffs_ecc_result ecc_result = YAFFS_ECC_RESULT_UNKNOWN;
327
328 static struct yaffs_spare spare_ff;
329 static int init;
330
331 if (!init) {
332 memset(&spare_ff, 0xFF, sizeof(spare_ff));
333 init = 1;
334 }
335
336 if (yaffs_rd_chunk_nand(dev, nand_chunk, data, &spare, &ecc_result, 1)) {
337 /* ext_tags may be NULL */
338 if (ext_tags) {
339
340 int deleted =
341 (hweight8(spare.page_status) < 7) ? 1 : 0;
342
343 ext_tags->is_deleted = deleted;
344 ext_tags->ecc_result = ecc_result;
345 ext_tags->block_bad = 0; /* We're reading it */
346 /* therefore it is not a bad block */
347 ext_tags->chunk_used =
348 (memcmp(&spare_ff, &spare, sizeof(spare_ff)) !=
349 0) ? 1 : 0;
350
351 if (ext_tags->chunk_used) {
352 yaffs_get_tags_from_spare(dev, &spare, &tags);
353
354 ext_tags->obj_id = tags.obj_id;
355 ext_tags->chunk_id = tags.chunk_id;
356 ext_tags->n_bytes = tags.n_bytes_lsb;
357
358 if (dev->data_bytes_per_chunk >= 1024)
359 ext_tags->n_bytes |=
360 (((unsigned)tags.
361 n_bytes_msb) << 10);
362
363 ext_tags->serial_number = tags.serial_number;
364 }
365 }
366
367 return YAFFS_OK;
368 } else {
369 return YAFFS_FAIL;
370 }
371}
372
373int yaffs_tags_compat_mark_bad(struct yaffs_dev *dev, int flash_block)
374{
375
376 struct yaffs_spare spare;
377
378 memset(&spare, 0xff, sizeof(struct yaffs_spare));
379
380 spare.block_status = 'Y';
381
382 yaffs_wr_nand(dev, flash_block * dev->param.chunks_per_block, NULL,
383 &spare);
384 yaffs_wr_nand(dev, flash_block * dev->param.chunks_per_block + 1,
385 NULL, &spare);
386
387 return YAFFS_OK;
388
389}
390
391int yaffs_tags_compat_query_block(struct yaffs_dev *dev,
392 int block_no,
393 enum yaffs_block_state *state,
394 u32 * seq_number)
395{
396
397 struct yaffs_spare spare0, spare1;
398 static struct yaffs_spare spare_ff;
399 static int init;
400 enum yaffs_ecc_result dummy;
401
402 if (!init) {
403 memset(&spare_ff, 0xFF, sizeof(spare_ff));
404 init = 1;
405 }
406
407 *seq_number = 0;
408
409 yaffs_rd_chunk_nand(dev, block_no * dev->param.chunks_per_block, NULL,
410 &spare0, &dummy, 1);
411 yaffs_rd_chunk_nand(dev, block_no * dev->param.chunks_per_block + 1,
412 NULL, &spare1, &dummy, 1);
413
414 if (hweight8(spare0.block_status & spare1.block_status) < 7)
415 *state = YAFFS_BLOCK_STATE_DEAD;
416 else if (memcmp(&spare_ff, &spare0, sizeof(spare_ff)) == 0)
417 *state = YAFFS_BLOCK_STATE_EMPTY;
418 else
419 *state = YAFFS_BLOCK_STATE_NEEDS_SCANNING;
420
421 return YAFFS_OK;
422}
diff --git a/fs/yaffs2/yaffs_tagscompat.h b/fs/yaffs2/yaffs_tagscompat.h
new file mode 100644
index 00000000000..8cd35dcd3ca
--- /dev/null
+++ b/fs/yaffs2/yaffs_tagscompat.h
@@ -0,0 +1,36 @@
1/*
2 * YAFFS: Yet another Flash File System . A NAND-flash specific file system.
3 *
4 * Copyright (C) 2002-2010 Aleph One Ltd.
5 * for Toby Churchill Ltd and Brightstar Engineering
6 *
7 * Created by Charles Manning <charles@aleph1.co.uk>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU Lesser General Public License version 2.1 as
11 * published by the Free Software Foundation.
12 *
13 * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL.
14 */
15
16#ifndef __YAFFS_TAGSCOMPAT_H__
17#define __YAFFS_TAGSCOMPAT_H__
18
19#include "yaffs_guts.h"
20int yaffs_tags_compat_wr(struct yaffs_dev *dev,
21 int nand_chunk,
22 const u8 * data, const struct yaffs_ext_tags *tags);
23int yaffs_tags_compat_rd(struct yaffs_dev *dev,
24 int nand_chunk,
25 u8 * data, struct yaffs_ext_tags *tags);
26int yaffs_tags_compat_mark_bad(struct yaffs_dev *dev, int block_no);
27int yaffs_tags_compat_query_block(struct yaffs_dev *dev,
28 int block_no,
29 enum yaffs_block_state *state,
30 u32 * seq_number);
31
32void yaffs_calc_tags_ecc(struct yaffs_tags *tags);
33int yaffs_check_tags_ecc(struct yaffs_tags *tags);
34int yaffs_count_bits(u8 byte);
35
36#endif
diff --git a/fs/yaffs2/yaffs_tagsvalidity.c b/fs/yaffs2/yaffs_tagsvalidity.c
new file mode 100644
index 00000000000..4358d79d4be
--- /dev/null
+++ b/fs/yaffs2/yaffs_tagsvalidity.c
@@ -0,0 +1,27 @@
1/*
2 * YAFFS: Yet Another Flash File System. A NAND-flash specific file system.
3 *
4 * Copyright (C) 2002-2010 Aleph One Ltd.
5 * for Toby Churchill Ltd and Brightstar Engineering
6 *
7 * Created by Charles Manning <charles@aleph1.co.uk>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 */
13
14#include "yaffs_tagsvalidity.h"
15
16void yaffs_init_tags(struct yaffs_ext_tags *tags)
17{
18 memset(tags, 0, sizeof(struct yaffs_ext_tags));
19 tags->validity0 = 0xAAAAAAAA;
20 tags->validity1 = 0x55555555;
21}
22
23int yaffs_validate_tags(struct yaffs_ext_tags *tags)
24{
25 return (tags->validity0 == 0xAAAAAAAA && tags->validity1 == 0x55555555);
26
27}
diff --git a/fs/yaffs2/yaffs_tagsvalidity.h b/fs/yaffs2/yaffs_tagsvalidity.h
new file mode 100644
index 00000000000..36a021fc8fa
--- /dev/null
+++ b/fs/yaffs2/yaffs_tagsvalidity.h
@@ -0,0 +1,23 @@
1/*
2 * YAFFS: Yet another Flash File System . A NAND-flash specific file system.
3 *
4 * Copyright (C) 2002-2010 Aleph One Ltd.
5 * for Toby Churchill Ltd and Brightstar Engineering
6 *
7 * Created by Charles Manning <charles@aleph1.co.uk>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU Lesser General Public License version 2.1 as
11 * published by the Free Software Foundation.
12 *
13 * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL.
14 */
15
16#ifndef __YAFFS_TAGS_VALIDITY_H__
17#define __YAFFS_TAGS_VALIDITY_H__
18
19#include "yaffs_guts.h"
20
21void yaffs_init_tags(struct yaffs_ext_tags *tags);
22int yaffs_validate_tags(struct yaffs_ext_tags *tags);
23#endif
diff --git a/fs/yaffs2/yaffs_trace.h b/fs/yaffs2/yaffs_trace.h
new file mode 100644
index 00000000000..6273dbf9f63
--- /dev/null
+++ b/fs/yaffs2/yaffs_trace.h
@@ -0,0 +1,57 @@
1/*
2 * YAFFS: Yet another Flash File System . A NAND-flash specific file system.
3 *
4 * Copyright (C) 2002-2010 Aleph One Ltd.
5 * for Toby Churchill Ltd and Brightstar Engineering
6 *
7 * Created by Charles Manning <charles@aleph1.co.uk>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU Lesser General Public License version 2.1 as
11 * published by the Free Software Foundation.
12 *
13 * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL.
14 */
15
16#ifndef __YTRACE_H__
17#define __YTRACE_H__
18
19extern unsigned int yaffs_trace_mask;
20extern unsigned int yaffs_wr_attempts;
21
22/*
23 * Tracing flags.
24 * The flags masked in YAFFS_TRACE_ALWAYS are always traced.
25 */
26
27#define YAFFS_TRACE_OS 0x00000002
28#define YAFFS_TRACE_ALLOCATE 0x00000004
29#define YAFFS_TRACE_SCAN 0x00000008
30#define YAFFS_TRACE_BAD_BLOCKS 0x00000010
31#define YAFFS_TRACE_ERASE 0x00000020
32#define YAFFS_TRACE_GC 0x00000040
33#define YAFFS_TRACE_WRITE 0x00000080
34#define YAFFS_TRACE_TRACING 0x00000100
35#define YAFFS_TRACE_DELETION 0x00000200
36#define YAFFS_TRACE_BUFFERS 0x00000400
37#define YAFFS_TRACE_NANDACCESS 0x00000800
38#define YAFFS_TRACE_GC_DETAIL 0x00001000
39#define YAFFS_TRACE_SCAN_DEBUG 0x00002000
40#define YAFFS_TRACE_MTD 0x00004000
41#define YAFFS_TRACE_CHECKPOINT 0x00008000
42
43#define YAFFS_TRACE_VERIFY 0x00010000
44#define YAFFS_TRACE_VERIFY_NAND 0x00020000
45#define YAFFS_TRACE_VERIFY_FULL 0x00040000
46#define YAFFS_TRACE_VERIFY_ALL 0x000F0000
47
48#define YAFFS_TRACE_SYNC 0x00100000
49#define YAFFS_TRACE_BACKGROUND 0x00200000
50#define YAFFS_TRACE_LOCK 0x00400000
51#define YAFFS_TRACE_MOUNT 0x00800000
52
53#define YAFFS_TRACE_ERROR 0x40000000
54#define YAFFS_TRACE_BUG 0x80000000
55#define YAFFS_TRACE_ALWAYS 0xF0000000
56
57#endif
diff --git a/fs/yaffs2/yaffs_verify.c b/fs/yaffs2/yaffs_verify.c
new file mode 100644
index 00000000000..738c7f69a5e
--- /dev/null
+++ b/fs/yaffs2/yaffs_verify.c
@@ -0,0 +1,535 @@
1/*
2 * YAFFS: Yet Another Flash File System. A NAND-flash specific file system.
3 *
4 * Copyright (C) 2002-2010 Aleph One Ltd.
5 * for Toby Churchill Ltd and Brightstar Engineering
6 *
7 * Created by Charles Manning <charles@aleph1.co.uk>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 */
13
14#include "yaffs_verify.h"
15#include "yaffs_trace.h"
16#include "yaffs_bitmap.h"
17#include "yaffs_getblockinfo.h"
18#include "yaffs_nand.h"
19
20int yaffs_skip_verification(struct yaffs_dev *dev)
21{
22 dev = dev;
23 return !(yaffs_trace_mask &
24 (YAFFS_TRACE_VERIFY | YAFFS_TRACE_VERIFY_FULL));
25}
26
27static int yaffs_skip_full_verification(struct yaffs_dev *dev)
28{
29 dev = dev;
30 return !(yaffs_trace_mask & (YAFFS_TRACE_VERIFY_FULL));
31}
32
33static int yaffs_skip_nand_verification(struct yaffs_dev *dev)
34{
35 dev = dev;
36 return !(yaffs_trace_mask & (YAFFS_TRACE_VERIFY_NAND));
37}
38
39static const char *block_state_name[] = {
40 "Unknown",
41 "Needs scanning",
42 "Scanning",
43 "Empty",
44 "Allocating",
45 "Full",
46 "Dirty",
47 "Checkpoint",
48 "Collecting",
49 "Dead"
50};
51
52void yaffs_verify_blk(struct yaffs_dev *dev, struct yaffs_block_info *bi, int n)
53{
54 int actually_used;
55 int in_use;
56
57 if (yaffs_skip_verification(dev))
58 return;
59
60 /* Report illegal runtime states */
61 if (bi->block_state >= YAFFS_NUMBER_OF_BLOCK_STATES)
62 yaffs_trace(YAFFS_TRACE_VERIFY,
63 "Block %d has undefined state %d",
64 n, bi->block_state);
65
66 switch (bi->block_state) {
67 case YAFFS_BLOCK_STATE_UNKNOWN:
68 case YAFFS_BLOCK_STATE_SCANNING:
69 case YAFFS_BLOCK_STATE_NEEDS_SCANNING:
70 yaffs_trace(YAFFS_TRACE_VERIFY,
71 "Block %d has bad run-state %s",
72 n, block_state_name[bi->block_state]);
73 }
74
75 /* Check pages in use and soft deletions are legal */
76
77 actually_used = bi->pages_in_use - bi->soft_del_pages;
78
79 if (bi->pages_in_use < 0
80 || bi->pages_in_use > dev->param.chunks_per_block
81 || bi->soft_del_pages < 0
82 || bi->soft_del_pages > dev->param.chunks_per_block
83 || actually_used < 0 || actually_used > dev->param.chunks_per_block)
84 yaffs_trace(YAFFS_TRACE_VERIFY,
85 "Block %d has illegal values pages_in_used %d soft_del_pages %d",
86 n, bi->pages_in_use, bi->soft_del_pages);
87
88 /* Check chunk bitmap legal */
89 in_use = yaffs_count_chunk_bits(dev, n);
90 if (in_use != bi->pages_in_use)
91 yaffs_trace(YAFFS_TRACE_VERIFY,
92 "Block %d has inconsistent values pages_in_use %d counted chunk bits %d",
93 n, bi->pages_in_use, in_use);
94
95}
96
97void yaffs_verify_collected_blk(struct yaffs_dev *dev,
98 struct yaffs_block_info *bi, int n)
99{
100 yaffs_verify_blk(dev, bi, n);
101
102 /* After collection the block should be in the erased state */
103
104 if (bi->block_state != YAFFS_BLOCK_STATE_COLLECTING &&
105 bi->block_state != YAFFS_BLOCK_STATE_EMPTY) {
106 yaffs_trace(YAFFS_TRACE_ERROR,
107 "Block %d is in state %d after gc, should be erased",
108 n, bi->block_state);
109 }
110}
111
112void yaffs_verify_blocks(struct yaffs_dev *dev)
113{
114 int i;
115 int state_count[YAFFS_NUMBER_OF_BLOCK_STATES];
116 int illegal_states = 0;
117
118 if (yaffs_skip_verification(dev))
119 return;
120
121 memset(state_count, 0, sizeof(state_count));
122
123 for (i = dev->internal_start_block; i <= dev->internal_end_block; i++) {
124 struct yaffs_block_info *bi = yaffs_get_block_info(dev, i);
125 yaffs_verify_blk(dev, bi, i);
126
127 if (bi->block_state < YAFFS_NUMBER_OF_BLOCK_STATES)
128 state_count[bi->block_state]++;
129 else
130 illegal_states++;
131 }
132
133 yaffs_trace(YAFFS_TRACE_VERIFY, "Block summary");
134
135 yaffs_trace(YAFFS_TRACE_VERIFY,
136 "%d blocks have illegal states",
137 illegal_states);
138 if (state_count[YAFFS_BLOCK_STATE_ALLOCATING] > 1)
139 yaffs_trace(YAFFS_TRACE_VERIFY,
140 "Too many allocating blocks");
141
142 for (i = 0; i < YAFFS_NUMBER_OF_BLOCK_STATES; i++)
143 yaffs_trace(YAFFS_TRACE_VERIFY,
144 "%s %d blocks",
145 block_state_name[i], state_count[i]);
146
147 if (dev->blocks_in_checkpt != state_count[YAFFS_BLOCK_STATE_CHECKPOINT])
148 yaffs_trace(YAFFS_TRACE_VERIFY,
149 "Checkpoint block count wrong dev %d count %d",
150 dev->blocks_in_checkpt,
151 state_count[YAFFS_BLOCK_STATE_CHECKPOINT]);
152
153 if (dev->n_erased_blocks != state_count[YAFFS_BLOCK_STATE_EMPTY])
154 yaffs_trace(YAFFS_TRACE_VERIFY,
155 "Erased block count wrong dev %d count %d",
156 dev->n_erased_blocks,
157 state_count[YAFFS_BLOCK_STATE_EMPTY]);
158
159 if (state_count[YAFFS_BLOCK_STATE_COLLECTING] > 1)
160 yaffs_trace(YAFFS_TRACE_VERIFY,
161 "Too many collecting blocks %d (max is 1)",
162 state_count[YAFFS_BLOCK_STATE_COLLECTING]);
163}
164
165/*
166 * Verify the object header. oh must be valid, but obj and tags may be NULL in which
167 * case those tests will not be performed.
168 */
169void yaffs_verify_oh(struct yaffs_obj *obj, struct yaffs_obj_hdr *oh,
170 struct yaffs_ext_tags *tags, int parent_check)
171{
172 if (obj && yaffs_skip_verification(obj->my_dev))
173 return;
174
175 if (!(tags && obj && oh)) {
176 yaffs_trace(YAFFS_TRACE_VERIFY,
177 "Verifying object header tags %p obj %p oh %p",
178 tags, obj, oh);
179 return;
180 }
181
182 if (oh->type <= YAFFS_OBJECT_TYPE_UNKNOWN ||
183 oh->type > YAFFS_OBJECT_TYPE_MAX)
184 yaffs_trace(YAFFS_TRACE_VERIFY,
185 "Obj %d header type is illegal value 0x%x",
186 tags->obj_id, oh->type);
187
188 if (tags->obj_id != obj->obj_id)
189 yaffs_trace(YAFFS_TRACE_VERIFY,
190 "Obj %d header mismatch obj_id %d",
191 tags->obj_id, obj->obj_id);
192
193 /*
194 * Check that the object's parent ids match if parent_check requested.
195 *
196 * Tests do not apply to the root object.
197 */
198
199 if (parent_check && tags->obj_id > 1 && !obj->parent)
200 yaffs_trace(YAFFS_TRACE_VERIFY,
201 "Obj %d header mismatch parent_id %d obj->parent is NULL",
202 tags->obj_id, oh->parent_obj_id);
203
204 if (parent_check && obj->parent &&
205 oh->parent_obj_id != obj->parent->obj_id &&
206 (oh->parent_obj_id != YAFFS_OBJECTID_UNLINKED ||
207 obj->parent->obj_id != YAFFS_OBJECTID_DELETED))
208 yaffs_trace(YAFFS_TRACE_VERIFY,
209 "Obj %d header mismatch parent_id %d parent_obj_id %d",
210 tags->obj_id, oh->parent_obj_id,
211 obj->parent->obj_id);
212
213 if (tags->obj_id > 1 && oh->name[0] == 0) /* Null name */
214 yaffs_trace(YAFFS_TRACE_VERIFY,
215 "Obj %d header name is NULL",
216 obj->obj_id);
217
218 if (tags->obj_id > 1 && ((u8) (oh->name[0])) == 0xff) /* Trashed name */
219 yaffs_trace(YAFFS_TRACE_VERIFY,
220 "Obj %d header name is 0xFF",
221 obj->obj_id);
222}
223
224void yaffs_verify_file(struct yaffs_obj *obj)
225{
226 int required_depth;
227 int actual_depth;
228 u32 last_chunk;
229 u32 x;
230 u32 i;
231 struct yaffs_dev *dev;
232 struct yaffs_ext_tags tags;
233 struct yaffs_tnode *tn;
234 u32 obj_id;
235
236 if (!obj)
237 return;
238
239 if (yaffs_skip_verification(obj->my_dev))
240 return;
241
242 dev = obj->my_dev;
243 obj_id = obj->obj_id;
244
245 /* Check file size is consistent with tnode depth */
246 last_chunk =
247 obj->variant.file_variant.file_size / dev->data_bytes_per_chunk + 1;
248 x = last_chunk >> YAFFS_TNODES_LEVEL0_BITS;
249 required_depth = 0;
250 while (x > 0) {
251 x >>= YAFFS_TNODES_INTERNAL_BITS;
252 required_depth++;
253 }
254
255 actual_depth = obj->variant.file_variant.top_level;
256
257 /* Check that the chunks in the tnode tree are all correct.
258 * We do this by scanning through the tnode tree and
259 * checking the tags for every chunk match.
260 */
261
262 if (yaffs_skip_nand_verification(dev))
263 return;
264
265 for (i = 1; i <= last_chunk; i++) {
266 tn = yaffs_find_tnode_0(dev, &obj->variant.file_variant, i);
267
268 if (tn) {
269 u32 the_chunk = yaffs_get_group_base(dev, tn, i);
270 if (the_chunk > 0) {
271 yaffs_rd_chunk_tags_nand(dev, the_chunk, NULL,
272 &tags);
273 if (tags.obj_id != obj_id || tags.chunk_id != i)
274 yaffs_trace(YAFFS_TRACE_VERIFY,
275 "Object %d chunk_id %d NAND mismatch chunk %d tags (%d:%d)",
276 obj_id, i, the_chunk,
277 tags.obj_id, tags.chunk_id);
278 }
279 }
280 }
281}
282
283void yaffs_verify_link(struct yaffs_obj *obj)
284{
285 if (obj && yaffs_skip_verification(obj->my_dev))
286 return;
287
288 /* Verify sane equivalent object */
289}
290
291void yaffs_verify_symlink(struct yaffs_obj *obj)
292{
293 if (obj && yaffs_skip_verification(obj->my_dev))
294 return;
295
296 /* Verify symlink string */
297}
298
299void yaffs_verify_special(struct yaffs_obj *obj)
300{
301 if (obj && yaffs_skip_verification(obj->my_dev))
302 return;
303}
304
305void yaffs_verify_obj(struct yaffs_obj *obj)
306{
307 struct yaffs_dev *dev;
308
309 u32 chunk_min;
310 u32 chunk_max;
311
312 u32 chunk_id_ok;
313 u32 chunk_in_range;
314 u32 chunk_wrongly_deleted;
315 u32 chunk_valid;
316
317 if (!obj)
318 return;
319
320 if (obj->being_created)
321 return;
322
323 dev = obj->my_dev;
324
325 if (yaffs_skip_verification(dev))
326 return;
327
328 /* Check sane object header chunk */
329
330 chunk_min = dev->internal_start_block * dev->param.chunks_per_block;
331 chunk_max =
332 (dev->internal_end_block + 1) * dev->param.chunks_per_block - 1;
333
334 chunk_in_range = (((unsigned)(obj->hdr_chunk)) >= chunk_min &&
335 ((unsigned)(obj->hdr_chunk)) <= chunk_max);
336 chunk_id_ok = chunk_in_range || (obj->hdr_chunk == 0);
337 chunk_valid = chunk_in_range &&
338 yaffs_check_chunk_bit(dev,
339 obj->hdr_chunk / dev->param.chunks_per_block,
340 obj->hdr_chunk % dev->param.chunks_per_block);
341 chunk_wrongly_deleted = chunk_in_range && !chunk_valid;
342
343 if (!obj->fake && (!chunk_id_ok || chunk_wrongly_deleted))
344 yaffs_trace(YAFFS_TRACE_VERIFY,
345 "Obj %d has chunk_id %d %s %s",
346 obj->obj_id, obj->hdr_chunk,
347 chunk_id_ok ? "" : ",out of range",
348 chunk_wrongly_deleted ? ",marked as deleted" : "");
349
350 if (chunk_valid && !yaffs_skip_nand_verification(dev)) {
351 struct yaffs_ext_tags tags;
352 struct yaffs_obj_hdr *oh;
353 u8 *buffer = yaffs_get_temp_buffer(dev, __LINE__);
354
355 oh = (struct yaffs_obj_hdr *)buffer;
356
357 yaffs_rd_chunk_tags_nand(dev, obj->hdr_chunk, buffer, &tags);
358
359 yaffs_verify_oh(obj, oh, &tags, 1);
360
361 yaffs_release_temp_buffer(dev, buffer, __LINE__);
362 }
363
364 /* Verify it has a parent */
365 if (obj && !obj->fake && (!obj->parent || obj->parent->my_dev != dev)) {
366 yaffs_trace(YAFFS_TRACE_VERIFY,
367 "Obj %d has parent pointer %p which does not look like an object",
368 obj->obj_id, obj->parent);
369 }
370
371 /* Verify parent is a directory */
372 if (obj->parent
373 && obj->parent->variant_type != YAFFS_OBJECT_TYPE_DIRECTORY) {
374 yaffs_trace(YAFFS_TRACE_VERIFY,
375 "Obj %d's parent is not a directory (type %d)",
376 obj->obj_id, obj->parent->variant_type);
377 }
378
379 switch (obj->variant_type) {
380 case YAFFS_OBJECT_TYPE_FILE:
381 yaffs_verify_file(obj);
382 break;
383 case YAFFS_OBJECT_TYPE_SYMLINK:
384 yaffs_verify_symlink(obj);
385 break;
386 case YAFFS_OBJECT_TYPE_DIRECTORY:
387 yaffs_verify_dir(obj);
388 break;
389 case YAFFS_OBJECT_TYPE_HARDLINK:
390 yaffs_verify_link(obj);
391 break;
392 case YAFFS_OBJECT_TYPE_SPECIAL:
393 yaffs_verify_special(obj);
394 break;
395 case YAFFS_OBJECT_TYPE_UNKNOWN:
396 default:
397 yaffs_trace(YAFFS_TRACE_VERIFY,
398 "Obj %d has illegaltype %d",
399 obj->obj_id, obj->variant_type);
400 break;
401 }
402}
403
404void yaffs_verify_objects(struct yaffs_dev *dev)
405{
406 struct yaffs_obj *obj;
407 int i;
408 struct list_head *lh;
409
410 if (yaffs_skip_verification(dev))
411 return;
412
413 /* Iterate through the objects in each hash entry */
414
415 for (i = 0; i < YAFFS_NOBJECT_BUCKETS; i++) {
416 list_for_each(lh, &dev->obj_bucket[i].list) {
417 if (lh) {
418 obj =
419 list_entry(lh, struct yaffs_obj, hash_link);
420 yaffs_verify_obj(obj);
421 }
422 }
423 }
424}
425
426void yaffs_verify_obj_in_dir(struct yaffs_obj *obj)
427{
428 struct list_head *lh;
429 struct yaffs_obj *list_obj;
430
431 int count = 0;
432
433 if (!obj) {
434 yaffs_trace(YAFFS_TRACE_ALWAYS, "No object to verify");
435 YBUG();
436 return;
437 }
438
439 if (yaffs_skip_verification(obj->my_dev))
440 return;
441
442 if (!obj->parent) {
443 yaffs_trace(YAFFS_TRACE_ALWAYS, "Object does not have parent" );
444 YBUG();
445 return;
446 }
447
448 if (obj->parent->variant_type != YAFFS_OBJECT_TYPE_DIRECTORY) {
449 yaffs_trace(YAFFS_TRACE_ALWAYS, "Parent is not directory");
450 YBUG();
451 }
452
453 /* Iterate through the objects in each hash entry */
454
455 list_for_each(lh, &obj->parent->variant.dir_variant.children) {
456 if (lh) {
457 list_obj = list_entry(lh, struct yaffs_obj, siblings);
458 yaffs_verify_obj(list_obj);
459 if (obj == list_obj)
460 count++;
461 }
462 }
463
464 if (count != 1) {
465 yaffs_trace(YAFFS_TRACE_ALWAYS,
466 "Object in directory %d times",
467 count);
468 YBUG();
469 }
470}
471
472void yaffs_verify_dir(struct yaffs_obj *directory)
473{
474 struct list_head *lh;
475 struct yaffs_obj *list_obj;
476
477 if (!directory) {
478 YBUG();
479 return;
480 }
481
482 if (yaffs_skip_full_verification(directory->my_dev))
483 return;
484
485 if (directory->variant_type != YAFFS_OBJECT_TYPE_DIRECTORY) {
486 yaffs_trace(YAFFS_TRACE_ALWAYS,
487 "Directory has wrong type: %d",
488 directory->variant_type);
489 YBUG();
490 }
491
492 /* Iterate through the objects in each hash entry */
493
494 list_for_each(lh, &directory->variant.dir_variant.children) {
495 if (lh) {
496 list_obj = list_entry(lh, struct yaffs_obj, siblings);
497 if (list_obj->parent != directory) {
498 yaffs_trace(YAFFS_TRACE_ALWAYS,
499 "Object in directory list has wrong parent %p",
500 list_obj->parent);
501 YBUG();
502 }
503 yaffs_verify_obj_in_dir(list_obj);
504 }
505 }
506}
507
508static int yaffs_free_verification_failures;
509
510void yaffs_verify_free_chunks(struct yaffs_dev *dev)
511{
512 int counted;
513 int difference;
514
515 if (yaffs_skip_verification(dev))
516 return;
517
518 counted = yaffs_count_free_chunks(dev);
519
520 difference = dev->n_free_chunks - counted;
521
522 if (difference) {
523 yaffs_trace(YAFFS_TRACE_ALWAYS,
524 "Freechunks verification failure %d %d %d",
525 dev->n_free_chunks, counted, difference);
526 yaffs_free_verification_failures++;
527 }
528}
529
530int yaffs_verify_file_sane(struct yaffs_obj *in)
531{
532 in = in;
533 return YAFFS_OK;
534}
535
diff --git a/fs/yaffs2/yaffs_verify.h b/fs/yaffs2/yaffs_verify.h
new file mode 100644
index 00000000000..cc6f8899930
--- /dev/null
+++ b/fs/yaffs2/yaffs_verify.h
@@ -0,0 +1,43 @@
1/*
2 * YAFFS: Yet another Flash File System . A NAND-flash specific file system.
3 *
4 * Copyright (C) 2002-2010 Aleph One Ltd.
5 * for Toby Churchill Ltd and Brightstar Engineering
6 *
7 * Created by Charles Manning <charles@aleph1.co.uk>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU Lesser General Public License version 2.1 as
11 * published by the Free Software Foundation.
12 *
13 * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL.
14 */
15
16#ifndef __YAFFS_VERIFY_H__
17#define __YAFFS_VERIFY_H__
18
19#include "yaffs_guts.h"
20
21void yaffs_verify_blk(struct yaffs_dev *dev, struct yaffs_block_info *bi,
22 int n);
23void yaffs_verify_collected_blk(struct yaffs_dev *dev,
24 struct yaffs_block_info *bi, int n);
25void yaffs_verify_blocks(struct yaffs_dev *dev);
26
27void yaffs_verify_oh(struct yaffs_obj *obj, struct yaffs_obj_hdr *oh,
28 struct yaffs_ext_tags *tags, int parent_check);
29void yaffs_verify_file(struct yaffs_obj *obj);
30void yaffs_verify_link(struct yaffs_obj *obj);
31void yaffs_verify_symlink(struct yaffs_obj *obj);
32void yaffs_verify_special(struct yaffs_obj *obj);
33void yaffs_verify_obj(struct yaffs_obj *obj);
34void yaffs_verify_objects(struct yaffs_dev *dev);
35void yaffs_verify_obj_in_dir(struct yaffs_obj *obj);
36void yaffs_verify_dir(struct yaffs_obj *directory);
37void yaffs_verify_free_chunks(struct yaffs_dev *dev);
38
39int yaffs_verify_file_sane(struct yaffs_obj *obj);
40
41int yaffs_skip_verification(struct yaffs_dev *dev);
42
43#endif
diff --git a/fs/yaffs2/yaffs_vfs.c b/fs/yaffs2/yaffs_vfs.c
new file mode 100644
index 00000000000..d5b87531400
--- /dev/null
+++ b/fs/yaffs2/yaffs_vfs.c
@@ -0,0 +1,2792 @@
1/*
2 * YAFFS: Yet Another Flash File System. A NAND-flash specific file system.
3 *
4 * Copyright (C) 2002-2010 Aleph One Ltd.
5 * for Toby Churchill Ltd and Brightstar Engineering
6 *
7 * Created by Charles Manning <charles@aleph1.co.uk>
8 * Acknowledgements:
9 * Luc van OostenRyck for numerous patches.
10 * Nick Bane for numerous patches.
11 * Nick Bane for 2.5/2.6 integration.
12 * Andras Toth for mknod rdev issue.
13 * Michael Fischer for finding the problem with inode inconsistency.
14 * Some code bodily lifted from JFFS
15 *
16 * This program is free software; you can redistribute it and/or modify
17 * it under the terms of the GNU General Public License version 2 as
18 * published by the Free Software Foundation.
19 */
20
21/*
22 *
23 * This is the file system front-end to YAFFS that hooks it up to
24 * the VFS.
25 *
26 * Special notes:
27 * >> 2.4: sb->u.generic_sbp points to the struct yaffs_dev associated with
28 * this superblock
29 * >> 2.6: sb->s_fs_info points to the struct yaffs_dev associated with this
30 * superblock
31 * >> inode->u.generic_ip points to the associated struct yaffs_obj.
32 */
33
34/*
35 * NB There are two variants of Linux VFS glue code. This variant supports
36 * a single version and should not include any multi-version code.
37 */
38#include <linux/version.h>
39
40#include <linux/kernel.h>
41#include <linux/module.h>
42#include <linux/slab.h>
43#include <linux/init.h>
44#include <linux/fs.h>
45#include <linux/proc_fs.h>
46#include <linux/smp_lock.h>
47#include <linux/pagemap.h>
48#include <linux/mtd/mtd.h>
49#include <linux/interrupt.h>
50#include <linux/string.h>
51#include <linux/ctype.h>
52#include <linux/namei.h>
53#include <linux/exportfs.h>
54#include <linux/kthread.h>
55#include <linux/delay.h>
56#include <linux/freezer.h>
57
58#include <asm/div64.h>
59
60#include <linux/statfs.h>
61
62#define UnlockPage(p) unlock_page(p)
63#define Page_Uptodate(page) test_bit(PG_uptodate, &(page)->flags)
64
65#define yaffs_devname(sb, buf) bdevname(sb->s_bdev, buf)
66
67#define YPROC_ROOT NULL
68
69#define Y_INIT_TIMER(a) init_timer_on_stack(a)
70
71#define WRITE_SIZE_STR "writesize"
72#define WRITE_SIZE(mtd) ((mtd)->writesize)
73
74static uint32_t YCALCBLOCKS(uint64_t partition_size, uint32_t block_size)
75{
76 uint64_t result = partition_size;
77 do_div(result, block_size);
78 return (uint32_t) result;
79}
80
81#include <linux/uaccess.h>
82#include <linux/mtd/mtd.h>
83
84#include "yportenv.h"
85#include "yaffs_trace.h"
86#include "yaffs_guts.h"
87#include "yaffs_attribs.h"
88
89#include "yaffs_linux.h"
90
91#include "yaffs_mtdif.h"
92#include "yaffs_mtdif1.h"
93#include "yaffs_mtdif2.h"
94
95unsigned int yaffs_trace_mask = YAFFS_TRACE_BAD_BLOCKS | YAFFS_TRACE_ALWAYS;
96unsigned int yaffs_wr_attempts = YAFFS_WR_ATTEMPTS;
97unsigned int yaffs_auto_checkpoint = 1;
98unsigned int yaffs_gc_control = 1;
99unsigned int yaffs_bg_enable = 1;
100
101/* Module Parameters */
102module_param(yaffs_trace_mask, uint, 0644);
103module_param(yaffs_wr_attempts, uint, 0644);
104module_param(yaffs_auto_checkpoint, uint, 0644);
105module_param(yaffs_gc_control, uint, 0644);
106module_param(yaffs_bg_enable, uint, 0644);
107
108
109#define yaffs_inode_to_obj_lv(iptr) ((iptr)->i_private)
110#define yaffs_inode_to_obj(iptr) ((struct yaffs_obj *)(yaffs_inode_to_obj_lv(iptr)))
111#define yaffs_dentry_to_obj(dptr) yaffs_inode_to_obj((dptr)->d_inode)
112#define yaffs_super_to_dev(sb) ((struct yaffs_dev *)sb->s_fs_info)
113
114#define update_dir_time(dir) do {\
115 (dir)->i_ctime = (dir)->i_mtime = CURRENT_TIME; \
116 } while(0)
117
118
119static unsigned yaffs_gc_control_callback(struct yaffs_dev *dev)
120{
121 return yaffs_gc_control;
122}
123
124static void yaffs_gross_lock(struct yaffs_dev *dev)
125{
126 yaffs_trace(YAFFS_TRACE_LOCK, "yaffs locking %p", current);
127 mutex_lock(&(yaffs_dev_to_lc(dev)->gross_lock));
128 yaffs_trace(YAFFS_TRACE_LOCK, "yaffs locked %p", current);
129}
130
131static void yaffs_gross_unlock(struct yaffs_dev *dev)
132{
133 yaffs_trace(YAFFS_TRACE_LOCK, "yaffs unlocking %p", current);
134 mutex_unlock(&(yaffs_dev_to_lc(dev)->gross_lock));
135}
136
137static void yaffs_fill_inode_from_obj(struct inode *inode,
138 struct yaffs_obj *obj);
139
140static struct inode *yaffs_iget(struct super_block *sb, unsigned long ino)
141{
142 struct inode *inode;
143 struct yaffs_obj *obj;
144 struct yaffs_dev *dev = yaffs_super_to_dev(sb);
145
146 yaffs_trace(YAFFS_TRACE_OS, "yaffs_iget for %lu", ino);
147
148 inode = iget_locked(sb, ino);
149 if (!inode)
150 return ERR_PTR(-ENOMEM);
151 if (!(inode->i_state & I_NEW))
152 return inode;
153
154 /* NB This is called as a side effect of other functions, but
155 * we had to release the lock to prevent deadlocks, so
156 * need to lock again.
157 */
158
159 yaffs_gross_lock(dev);
160
161 obj = yaffs_find_by_number(dev, inode->i_ino);
162
163 yaffs_fill_inode_from_obj(inode, obj);
164
165 yaffs_gross_unlock(dev);
166
167 unlock_new_inode(inode);
168 return inode;
169}
170
171struct inode *yaffs_get_inode(struct super_block *sb, int mode, int dev,
172 struct yaffs_obj *obj)
173{
174 struct inode *inode;
175
176 if (!sb) {
177 yaffs_trace(YAFFS_TRACE_OS,
178 "yaffs_get_inode for NULL super_block!!");
179 return NULL;
180
181 }
182
183 if (!obj) {
184 yaffs_trace(YAFFS_TRACE_OS,
185 "yaffs_get_inode for NULL object!!");
186 return NULL;
187
188 }
189
190 yaffs_trace(YAFFS_TRACE_OS,
191 "yaffs_get_inode for object %d",
192 obj->obj_id);
193
194 inode = yaffs_iget(sb, obj->obj_id);
195 if (IS_ERR(inode))
196 return NULL;
197
198 /* NB Side effect: iget calls back to yaffs_read_inode(). */
199 /* iget also increments the inode's i_count */
200 /* NB You can't be holding gross_lock or deadlock will happen! */
201
202 return inode;
203}
204
205static int yaffs_mknod(struct inode *dir, struct dentry *dentry, int mode,
206 dev_t rdev)
207{
208 struct inode *inode;
209
210 struct yaffs_obj *obj = NULL;
211 struct yaffs_dev *dev;
212
213 struct yaffs_obj *parent = yaffs_inode_to_obj(dir);
214
215 int error = -ENOSPC;
216 uid_t uid = current->cred->fsuid;
217 gid_t gid =
218 (dir->i_mode & S_ISGID) ? dir->i_gid : current->cred->fsgid;
219
220 if ((dir->i_mode & S_ISGID) && S_ISDIR(mode))
221 mode |= S_ISGID;
222
223 if (parent) {
224 yaffs_trace(YAFFS_TRACE_OS,
225 "yaffs_mknod: parent object %d type %d",
226 parent->obj_id, parent->variant_type);
227 } else {
228 yaffs_trace(YAFFS_TRACE_OS,
229 "yaffs_mknod: could not get parent object");
230 return -EPERM;
231 }
232
233 yaffs_trace(YAFFS_TRACE_OS,
234 "yaffs_mknod: making oject for %s, mode %x dev %x",
235 dentry->d_name.name, mode, rdev);
236
237 dev = parent->my_dev;
238
239 yaffs_gross_lock(dev);
240
241 switch (mode & S_IFMT) {
242 default:
243 /* Special (socket, fifo, device...) */
244 yaffs_trace(YAFFS_TRACE_OS, "yaffs_mknod: making special");
245 obj =
246 yaffs_create_special(parent, dentry->d_name.name, mode, uid,
247 gid, old_encode_dev(rdev));
248 break;
249 case S_IFREG: /* file */
250 yaffs_trace(YAFFS_TRACE_OS, "yaffs_mknod: making file");
251 obj = yaffs_create_file(parent, dentry->d_name.name, mode, uid,
252 gid);
253 break;
254 case S_IFDIR: /* directory */
255 yaffs_trace(YAFFS_TRACE_OS, "yaffs_mknod: making directory");
256 obj = yaffs_create_dir(parent, dentry->d_name.name, mode,
257 uid, gid);
258 break;
259 case S_IFLNK: /* symlink */
260 yaffs_trace(YAFFS_TRACE_OS, "yaffs_mknod: making symlink");
261 obj = NULL; /* Do we ever get here? */
262 break;
263 }
264
265 /* Can not call yaffs_get_inode() with gross lock held */
266 yaffs_gross_unlock(dev);
267
268 if (obj) {
269 inode = yaffs_get_inode(dir->i_sb, mode, rdev, obj);
270 d_instantiate(dentry, inode);
271 update_dir_time(dir);
272 yaffs_trace(YAFFS_TRACE_OS,
273 "yaffs_mknod created object %d count = %d",
274 obj->obj_id, atomic_read(&inode->i_count));
275 error = 0;
276 yaffs_fill_inode_from_obj(dir, parent);
277 } else {
278 yaffs_trace(YAFFS_TRACE_OS, "yaffs_mknod failed making object");
279 error = -ENOMEM;
280 }
281
282 return error;
283}
284
285static int yaffs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
286{
287 return yaffs_mknod(dir, dentry, mode | S_IFDIR, 0);
288}
289
290static int yaffs_create(struct inode *dir, struct dentry *dentry, int mode,
291 struct nameidata *n)
292{
293 return yaffs_mknod(dir, dentry, mode | S_IFREG, 0);
294}
295
296static int yaffs_link(struct dentry *old_dentry, struct inode *dir,
297 struct dentry *dentry)
298{
299 struct inode *inode = old_dentry->d_inode;
300 struct yaffs_obj *obj = NULL;
301 struct yaffs_obj *link = NULL;
302 struct yaffs_dev *dev;
303
304 yaffs_trace(YAFFS_TRACE_OS, "yaffs_link");
305
306 obj = yaffs_inode_to_obj(inode);
307 dev = obj->my_dev;
308
309 yaffs_gross_lock(dev);
310
311 if (!S_ISDIR(inode->i_mode)) /* Don't link directories */
312 link =
313 yaffs_link_obj(yaffs_inode_to_obj(dir), dentry->d_name.name,
314 obj);
315
316 if (link) {
317 old_dentry->d_inode->i_nlink = yaffs_get_obj_link_count(obj);
318 d_instantiate(dentry, old_dentry->d_inode);
319 atomic_inc(&old_dentry->d_inode->i_count);
320 yaffs_trace(YAFFS_TRACE_OS,
321 "yaffs_link link count %d i_count %d",
322 old_dentry->d_inode->i_nlink,
323 atomic_read(&old_dentry->d_inode->i_count));
324 }
325
326 yaffs_gross_unlock(dev);
327
328 if (link) {
329 update_dir_time(dir);
330 return 0;
331 }
332
333 return -EPERM;
334}
335
336static int yaffs_symlink(struct inode *dir, struct dentry *dentry,
337 const char *symname)
338{
339 struct yaffs_obj *obj;
340 struct yaffs_dev *dev;
341 uid_t uid = current->cred->fsuid;
342 gid_t gid =
343 (dir->i_mode & S_ISGID) ? dir->i_gid : current->cred->fsgid;
344
345 yaffs_trace(YAFFS_TRACE_OS, "yaffs_symlink");
346
347 dev = yaffs_inode_to_obj(dir)->my_dev;
348 yaffs_gross_lock(dev);
349 obj = yaffs_create_symlink(yaffs_inode_to_obj(dir), dentry->d_name.name,
350 S_IFLNK | S_IRWXUGO, uid, gid, symname);
351 yaffs_gross_unlock(dev);
352
353 if (obj) {
354 struct inode *inode;
355
356 inode = yaffs_get_inode(dir->i_sb, obj->yst_mode, 0, obj);
357 d_instantiate(dentry, inode);
358 update_dir_time(dir);
359 yaffs_trace(YAFFS_TRACE_OS, "symlink created OK");
360 return 0;
361 } else {
362 yaffs_trace(YAFFS_TRACE_OS, "symlink not created");
363 }
364
365 return -ENOMEM;
366}
367
368static struct dentry *yaffs_lookup(struct inode *dir, struct dentry *dentry,
369 struct nameidata *n)
370{
371 struct yaffs_obj *obj;
372 struct inode *inode = NULL;
373
374 struct yaffs_dev *dev = yaffs_inode_to_obj(dir)->my_dev;
375
376 if (current != yaffs_dev_to_lc(dev)->readdir_process)
377 yaffs_gross_lock(dev);
378
379 yaffs_trace(YAFFS_TRACE_OS,
380 "yaffs_lookup for %d:%s",
381 yaffs_inode_to_obj(dir)->obj_id, dentry->d_name.name);
382
383 obj = yaffs_find_by_name(yaffs_inode_to_obj(dir), dentry->d_name.name);
384
385 obj = yaffs_get_equivalent_obj(obj); /* in case it was a hardlink */
386
387 /* Can't hold gross lock when calling yaffs_get_inode() */
388 if (current != yaffs_dev_to_lc(dev)->readdir_process)
389 yaffs_gross_unlock(dev);
390
391 if (obj) {
392 yaffs_trace(YAFFS_TRACE_OS,
393 "yaffs_lookup found %d", obj->obj_id);
394
395 inode = yaffs_get_inode(dir->i_sb, obj->yst_mode, 0, obj);
396
397 if (inode) {
398 yaffs_trace(YAFFS_TRACE_OS, "yaffs_loookup dentry");
399 d_add(dentry, inode);
400 /* return dentry; */
401 return NULL;
402 }
403
404 } else {
405 yaffs_trace(YAFFS_TRACE_OS, "yaffs_lookup not found");
406
407 }
408
409 d_add(dentry, inode);
410
411 return NULL;
412}
413
414static int yaffs_unlink(struct inode *dir, struct dentry *dentry)
415{
416 int ret_val;
417
418 struct yaffs_dev *dev;
419 struct yaffs_obj *obj;
420
421 yaffs_trace(YAFFS_TRACE_OS,
422 "yaffs_unlink %d:%s",
423 (int)(dir->i_ino), dentry->d_name.name);
424 obj = yaffs_inode_to_obj(dir);
425 dev = obj->my_dev;
426
427 yaffs_gross_lock(dev);
428
429 ret_val = yaffs_unlinker(obj, dentry->d_name.name);
430
431 if (ret_val == YAFFS_OK) {
432 dentry->d_inode->i_nlink--;
433 dir->i_version++;
434 yaffs_gross_unlock(dev);
435 mark_inode_dirty(dentry->d_inode);
436 update_dir_time(dir);
437 return 0;
438 }
439 yaffs_gross_unlock(dev);
440 return -ENOTEMPTY;
441}
442
443static int yaffs_sync_object(struct file *file, int datasync)
444{
445
446 struct yaffs_obj *obj;
447 struct yaffs_dev *dev;
448 struct dentry *dentry = file->f_path.dentry;
449
450 obj = yaffs_dentry_to_obj(dentry);
451
452 dev = obj->my_dev;
453
454 yaffs_trace(YAFFS_TRACE_OS | YAFFS_TRACE_SYNC, "yaffs_sync_object");
455 yaffs_gross_lock(dev);
456 yaffs_flush_file(obj, 1, datasync);
457 yaffs_gross_unlock(dev);
458 return 0;
459}
460/*
461 * The VFS layer already does all the dentry stuff for rename.
462 *
463 * NB: POSIX says you can rename an object over an old object of the same name
464 */
465static int yaffs_rename(struct inode *old_dir, struct dentry *old_dentry,
466 struct inode *new_dir, struct dentry *new_dentry)
467{
468 struct yaffs_dev *dev;
469 int ret_val = YAFFS_FAIL;
470 struct yaffs_obj *target;
471
472 yaffs_trace(YAFFS_TRACE_OS, "yaffs_rename");
473 dev = yaffs_inode_to_obj(old_dir)->my_dev;
474
475 yaffs_gross_lock(dev);
476
477 /* Check if the target is an existing directory that is not empty. */
478 target = yaffs_find_by_name(yaffs_inode_to_obj(new_dir),
479 new_dentry->d_name.name);
480
481 if (target && target->variant_type == YAFFS_OBJECT_TYPE_DIRECTORY &&
482 !list_empty(&target->variant.dir_variant.children)) {
483
484 yaffs_trace(YAFFS_TRACE_OS, "target is non-empty dir");
485
486 ret_val = YAFFS_FAIL;
487 } else {
488 /* Now does unlinking internally using shadowing mechanism */
489 yaffs_trace(YAFFS_TRACE_OS, "calling yaffs_rename_obj");
490
491 ret_val = yaffs_rename_obj(yaffs_inode_to_obj(old_dir),
492 old_dentry->d_name.name,
493 yaffs_inode_to_obj(new_dir),
494 new_dentry->d_name.name);
495 }
496 yaffs_gross_unlock(dev);
497
498 if (ret_val == YAFFS_OK) {
499 if (target) {
500 new_dentry->d_inode->i_nlink--;
501 mark_inode_dirty(new_dentry->d_inode);
502 }
503
504 update_dir_time(old_dir);
505 if (old_dir != new_dir)
506 update_dir_time(new_dir);
507 return 0;
508 } else {
509 return -ENOTEMPTY;
510 }
511}
512
513static int yaffs_setattr(struct dentry *dentry, struct iattr *attr)
514{
515 struct inode *inode = dentry->d_inode;
516 int error = 0;
517 struct yaffs_dev *dev;
518
519 yaffs_trace(YAFFS_TRACE_OS,
520 "yaffs_setattr of object %d",
521 yaffs_inode_to_obj(inode)->obj_id);
522
523 /* Fail if a requested resize >= 2GB */
524 if (attr->ia_valid & ATTR_SIZE && (attr->ia_size >> 31))
525 error = -EINVAL;
526
527 if (error == 0)
528 error = inode_change_ok(inode, attr);
529 if (error == 0) {
530 int result;
531 if (!error) {
532 setattr_copy(inode, attr);
533 yaffs_trace(YAFFS_TRACE_OS, "inode_setattr called");
534 if (attr->ia_valid & ATTR_SIZE) {
535 truncate_setsize(inode, attr->ia_size);
536 inode->i_blocks = (inode->i_size + 511) >> 9;
537 }
538 }
539 dev = yaffs_inode_to_obj(inode)->my_dev;
540 if (attr->ia_valid & ATTR_SIZE) {
541 yaffs_trace(YAFFS_TRACE_OS, "resize to %d(%x)",
542 (int)(attr->ia_size),
543 (int)(attr->ia_size));
544 }
545 yaffs_gross_lock(dev);
546 result = yaffs_set_attribs(yaffs_inode_to_obj(inode), attr);
547 if (result == YAFFS_OK) {
548 error = 0;
549 } else {
550 error = -EPERM;
551 }
552 yaffs_gross_unlock(dev);
553
554 }
555
556 yaffs_trace(YAFFS_TRACE_OS, "yaffs_setattr done returning %d", error);
557
558 return error;
559}
560
561#ifdef CONFIG_YAFFS_XATTR
562static int yaffs_setxattr(struct dentry *dentry, const char *name,
563 const void *value, size_t size, int flags)
564{
565 struct inode *inode = dentry->d_inode;
566 int error = 0;
567 struct yaffs_dev *dev;
568 struct yaffs_obj *obj = yaffs_inode_to_obj(inode);
569
570 yaffs_trace(YAFFS_TRACE_OS, "yaffs_setxattr of object %d", obj->obj_id);
571
572 if (error == 0) {
573 int result;
574 dev = obj->my_dev;
575 yaffs_gross_lock(dev);
576 result = yaffs_set_xattrib(obj, name, value, size, flags);
577 if (result == YAFFS_OK)
578 error = 0;
579 else if (result < 0)
580 error = result;
581 yaffs_gross_unlock(dev);
582
583 }
584 yaffs_trace(YAFFS_TRACE_OS, "yaffs_setxattr done returning %d", error);
585
586 return error;
587}
588
589static ssize_t yaffs_getxattr(struct dentry * dentry, const char *name, void *buff,
590 size_t size)
591{
592 struct inode *inode = dentry->d_inode;
593 int error = 0;
594 struct yaffs_dev *dev;
595 struct yaffs_obj *obj = yaffs_inode_to_obj(inode);
596
597 yaffs_trace(YAFFS_TRACE_OS,
598 "yaffs_getxattr \"%s\" from object %d",
599 name, obj->obj_id);
600
601 if (error == 0) {
602 dev = obj->my_dev;
603 yaffs_gross_lock(dev);
604 error = yaffs_get_xattrib(obj, name, buff, size);
605 yaffs_gross_unlock(dev);
606
607 }
608 yaffs_trace(YAFFS_TRACE_OS, "yaffs_getxattr done returning %d", error);
609
610 return error;
611}
612
613static int yaffs_removexattr(struct dentry *dentry, const char *name)
614{
615 struct inode *inode = dentry->d_inode;
616 int error = 0;
617 struct yaffs_dev *dev;
618 struct yaffs_obj *obj = yaffs_inode_to_obj(inode);
619
620 yaffs_trace(YAFFS_TRACE_OS,
621 "yaffs_removexattr of object %d", obj->obj_id);
622
623 if (error == 0) {
624 int result;
625 dev = obj->my_dev;
626 yaffs_gross_lock(dev);
627 result = yaffs_remove_xattrib(obj, name);
628 if (result == YAFFS_OK)
629 error = 0;
630 else if (result < 0)
631 error = result;
632 yaffs_gross_unlock(dev);
633
634 }
635 yaffs_trace(YAFFS_TRACE_OS,
636 "yaffs_removexattr done returning %d", error);
637
638 return error;
639}
640
641static ssize_t yaffs_listxattr(struct dentry * dentry, char *buff, size_t size)
642{
643 struct inode *inode = dentry->d_inode;
644 int error = 0;
645 struct yaffs_dev *dev;
646 struct yaffs_obj *obj = yaffs_inode_to_obj(inode);
647
648 yaffs_trace(YAFFS_TRACE_OS,
649 "yaffs_listxattr of object %d", obj->obj_id);
650
651 if (error == 0) {
652 dev = obj->my_dev;
653 yaffs_gross_lock(dev);
654 error = yaffs_list_xattrib(obj, buff, size);
655 yaffs_gross_unlock(dev);
656
657 }
658 yaffs_trace(YAFFS_TRACE_OS,
659 "yaffs_listxattr done returning %d", error);
660
661 return error;
662}
663
664#endif
665
666static const struct inode_operations yaffs_dir_inode_operations = {
667 .create = yaffs_create,
668 .lookup = yaffs_lookup,
669 .link = yaffs_link,
670 .unlink = yaffs_unlink,
671 .symlink = yaffs_symlink,
672 .mkdir = yaffs_mkdir,
673 .rmdir = yaffs_unlink,
674 .mknod = yaffs_mknod,
675 .rename = yaffs_rename,
676 .setattr = yaffs_setattr,
677#ifdef CONFIG_YAFFS_XATTR
678 .setxattr = yaffs_setxattr,
679 .getxattr = yaffs_getxattr,
680 .listxattr = yaffs_listxattr,
681 .removexattr = yaffs_removexattr,
682#endif
683};
684/*-----------------------------------------------------------------*/
685/* Directory search context allows us to unlock access to yaffs during
686 * filldir without causing problems with the directory being modified.
687 * This is similar to the tried and tested mechanism used in yaffs direct.
688 *
689 * A search context iterates along a doubly linked list of siblings in the
690 * directory. If the iterating object is deleted then this would corrupt
691 * the list iteration, likely causing a crash. The search context avoids
692 * this by using the remove_obj_fn to move the search context to the
693 * next object before the object is deleted.
694 *
695 * Many readdirs (and thus seach conexts) may be alive simulateously so
696 * each struct yaffs_dev has a list of these.
697 *
698 * A seach context lives for the duration of a readdir.
699 *
700 * All these functions must be called while yaffs is locked.
701 */
702
703struct yaffs_search_context {
704 struct yaffs_dev *dev;
705 struct yaffs_obj *dir_obj;
706 struct yaffs_obj *next_return;
707 struct list_head others;
708};
709
710/*
711 * yaffs_new_search() creates a new search context, initialises it and
712 * adds it to the device's search context list.
713 *
714 * Called at start of readdir.
715 */
716static struct yaffs_search_context *yaffs_new_search(struct yaffs_obj *dir)
717{
718 struct yaffs_dev *dev = dir->my_dev;
719 struct yaffs_search_context *sc =
720 kmalloc(sizeof(struct yaffs_search_context), GFP_NOFS);
721 if (sc) {
722 sc->dir_obj = dir;
723 sc->dev = dev;
724 if (list_empty(&sc->dir_obj->variant.dir_variant.children))
725 sc->next_return = NULL;
726 else
727 sc->next_return =
728 list_entry(dir->variant.dir_variant.children.next,
729 struct yaffs_obj, siblings);
730 INIT_LIST_HEAD(&sc->others);
731 list_add(&sc->others, &(yaffs_dev_to_lc(dev)->search_contexts));
732 }
733 return sc;
734}
735
736/*
737 * yaffs_search_end() disposes of a search context and cleans up.
738 */
739static void yaffs_search_end(struct yaffs_search_context *sc)
740{
741 if (sc) {
742 list_del(&sc->others);
743 kfree(sc);
744 }
745}
746
747/*
748 * yaffs_search_advance() moves a search context to the next object.
749 * Called when the search iterates or when an object removal causes
750 * the search context to be moved to the next object.
751 */
752static void yaffs_search_advance(struct yaffs_search_context *sc)
753{
754 if (!sc)
755 return;
756
757 if (sc->next_return == NULL ||
758 list_empty(&sc->dir_obj->variant.dir_variant.children))
759 sc->next_return = NULL;
760 else {
761 struct list_head *next = sc->next_return->siblings.next;
762
763 if (next == &sc->dir_obj->variant.dir_variant.children)
764 sc->next_return = NULL; /* end of list */
765 else
766 sc->next_return =
767 list_entry(next, struct yaffs_obj, siblings);
768 }
769}
770
771/*
772 * yaffs_remove_obj_callback() is called when an object is unlinked.
773 * We check open search contexts and advance any which are currently
774 * on the object being iterated.
775 */
776static void yaffs_remove_obj_callback(struct yaffs_obj *obj)
777{
778
779 struct list_head *i;
780 struct yaffs_search_context *sc;
781 struct list_head *search_contexts =
782 &(yaffs_dev_to_lc(obj->my_dev)->search_contexts);
783
784 /* Iterate through the directory search contexts.
785 * If any are currently on the object being removed, then advance
786 * the search context to the next object to prevent a hanging pointer.
787 */
788 list_for_each(i, search_contexts) {
789 if (i) {
790 sc = list_entry(i, struct yaffs_search_context, others);
791 if (sc->next_return == obj)
792 yaffs_search_advance(sc);
793 }
794 }
795
796}
797
798static int yaffs_readdir(struct file *f, void *dirent, filldir_t filldir)
799{
800 struct yaffs_obj *obj;
801 struct yaffs_dev *dev;
802 struct yaffs_search_context *sc;
803 struct inode *inode = f->f_dentry->d_inode;
804 unsigned long offset, curoffs;
805 struct yaffs_obj *l;
806 int ret_val = 0;
807
808 char name[YAFFS_MAX_NAME_LENGTH + 1];
809
810 obj = yaffs_dentry_to_obj(f->f_dentry);
811 dev = obj->my_dev;
812
813 yaffs_gross_lock(dev);
814
815 yaffs_dev_to_lc(dev)->readdir_process = current;
816
817 offset = f->f_pos;
818
819 sc = yaffs_new_search(obj);
820 if (!sc) {
821 ret_val = -ENOMEM;
822 goto out;
823 }
824
825 yaffs_trace(YAFFS_TRACE_OS,
826 "yaffs_readdir: starting at %d", (int)offset);
827
828 if (offset == 0) {
829 yaffs_trace(YAFFS_TRACE_OS,
830 "yaffs_readdir: entry . ino %d",
831 (int)inode->i_ino);
832 yaffs_gross_unlock(dev);
833 if (filldir(dirent, ".", 1, offset, inode->i_ino, DT_DIR) < 0) {
834 yaffs_gross_lock(dev);
835 goto out;
836 }
837 yaffs_gross_lock(dev);
838 offset++;
839 f->f_pos++;
840 }
841 if (offset == 1) {
842 yaffs_trace(YAFFS_TRACE_OS,
843 "yaffs_readdir: entry .. ino %d",
844 (int)f->f_dentry->d_parent->d_inode->i_ino);
845 yaffs_gross_unlock(dev);
846 if (filldir(dirent, "..", 2, offset,
847 f->f_dentry->d_parent->d_inode->i_ino,
848 DT_DIR) < 0) {
849 yaffs_gross_lock(dev);
850 goto out;
851 }
852 yaffs_gross_lock(dev);
853 offset++;
854 f->f_pos++;
855 }
856
857 curoffs = 1;
858
859 /* If the directory has changed since the open or last call to
860 readdir, rewind to after the 2 canned entries. */
861 if (f->f_version != inode->i_version) {
862 offset = 2;
863 f->f_pos = offset;
864 f->f_version = inode->i_version;
865 }
866
867 while (sc->next_return) {
868 curoffs++;
869 l = sc->next_return;
870 if (curoffs >= offset) {
871 int this_inode = yaffs_get_obj_inode(l);
872 int this_type = yaffs_get_obj_type(l);
873
874 yaffs_get_obj_name(l, name, YAFFS_MAX_NAME_LENGTH + 1);
875 yaffs_trace(YAFFS_TRACE_OS,
876 "yaffs_readdir: %s inode %d",
877 name, yaffs_get_obj_inode(l));
878
879 yaffs_gross_unlock(dev);
880
881 if (filldir(dirent,
882 name,
883 strlen(name),
884 offset, this_inode, this_type) < 0) {
885 yaffs_gross_lock(dev);
886 goto out;
887 }
888
889 yaffs_gross_lock(dev);
890
891 offset++;
892 f->f_pos++;
893 }
894 yaffs_search_advance(sc);
895 }
896
897out:
898 yaffs_search_end(sc);
899 yaffs_dev_to_lc(dev)->readdir_process = NULL;
900 yaffs_gross_unlock(dev);
901
902 return ret_val;
903}
904
905static const struct file_operations yaffs_dir_operations = {
906 .read = generic_read_dir,
907 .readdir = yaffs_readdir,
908 .fsync = yaffs_sync_object,
909 .llseek = generic_file_llseek,
910};
911
912
913
914static int yaffs_file_flush(struct file *file, fl_owner_t id)
915{
916 struct yaffs_obj *obj = yaffs_dentry_to_obj(file->f_dentry);
917
918 struct yaffs_dev *dev = obj->my_dev;
919
920 yaffs_trace(YAFFS_TRACE_OS,
921 "yaffs_file_flush object %d (%s)",
922 obj->obj_id, obj->dirty ? "dirty" : "clean");
923
924 yaffs_gross_lock(dev);
925
926 yaffs_flush_file(obj, 1, 0);
927
928 yaffs_gross_unlock(dev);
929
930 return 0;
931}
932
933static const struct file_operations yaffs_file_operations = {
934 .read = do_sync_read,
935 .write = do_sync_write,
936 .aio_read = generic_file_aio_read,
937 .aio_write = generic_file_aio_write,
938 .mmap = generic_file_mmap,
939 .flush = yaffs_file_flush,
940 .fsync = yaffs_sync_object,
941 .splice_read = generic_file_splice_read,
942 .splice_write = generic_file_splice_write,
943 .llseek = generic_file_llseek,
944};
945
946
947/* ExportFS support */
948static struct inode *yaffs2_nfs_get_inode(struct super_block *sb, uint64_t ino,
949 uint32_t generation)
950{
951 return yaffs_iget(sb, ino);
952}
953
954static struct dentry *yaffs2_fh_to_dentry(struct super_block *sb,
955 struct fid *fid, int fh_len,
956 int fh_type)
957{
958 return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
959 yaffs2_nfs_get_inode);
960}
961
962static struct dentry *yaffs2_fh_to_parent(struct super_block *sb,
963 struct fid *fid, int fh_len,
964 int fh_type)
965{
966 return generic_fh_to_parent(sb, fid, fh_len, fh_type,
967 yaffs2_nfs_get_inode);
968}
969
970struct dentry *yaffs2_get_parent(struct dentry *dentry)
971{
972
973 struct super_block *sb = dentry->d_inode->i_sb;
974 struct dentry *parent = ERR_PTR(-ENOENT);
975 struct inode *inode;
976 unsigned long parent_ino;
977 struct yaffs_obj *d_obj;
978 struct yaffs_obj *parent_obj;
979
980 d_obj = yaffs_inode_to_obj(dentry->d_inode);
981
982 if (d_obj) {
983 parent_obj = d_obj->parent;
984 if (parent_obj) {
985 parent_ino = yaffs_get_obj_inode(parent_obj);
986 inode = yaffs_iget(sb, parent_ino);
987
988 if (IS_ERR(inode)) {
989 parent = ERR_CAST(inode);
990 } else {
991 parent = d_obtain_alias(inode);
992 if (!IS_ERR(parent)) {
993 parent = ERR_PTR(-ENOMEM);
994 iput(inode);
995 }
996 }
997 }
998 }
999
1000 return parent;
1001}
1002
1003/* Just declare a zero structure as a NULL value implies
1004 * using the default functions of exportfs.
1005 */
1006
1007static struct export_operations yaffs_export_ops = {
1008 .fh_to_dentry = yaffs2_fh_to_dentry,
1009 .fh_to_parent = yaffs2_fh_to_parent,
1010 .get_parent = yaffs2_get_parent,
1011};
1012
1013
1014/*-----------------------------------------------------------------*/
1015
1016static int yaffs_readlink(struct dentry *dentry, char __user * buffer,
1017 int buflen)
1018{
1019 unsigned char *alias;
1020 int ret;
1021
1022 struct yaffs_dev *dev = yaffs_dentry_to_obj(dentry)->my_dev;
1023
1024 yaffs_gross_lock(dev);
1025
1026 alias = yaffs_get_symlink_alias(yaffs_dentry_to_obj(dentry));
1027
1028 yaffs_gross_unlock(dev);
1029
1030 if (!alias)
1031 return -ENOMEM;
1032
1033 ret = vfs_readlink(dentry, buffer, buflen, alias);
1034 kfree(alias);
1035 return ret;
1036}
1037
1038static void *yaffs_follow_link(struct dentry *dentry, struct nameidata *nd)
1039{
1040 unsigned char *alias;
1041 void *ret;
1042 struct yaffs_dev *dev = yaffs_dentry_to_obj(dentry)->my_dev;
1043
1044 yaffs_gross_lock(dev);
1045
1046 alias = yaffs_get_symlink_alias(yaffs_dentry_to_obj(dentry));
1047 yaffs_gross_unlock(dev);
1048
1049 if (!alias) {
1050 ret = ERR_PTR(-ENOMEM);
1051 goto out;
1052 }
1053
1054 nd_set_link(nd, alias);
1055 ret = (void *)alias;
1056out:
1057 return ret;
1058}
1059
1060void yaffs_put_link(struct dentry *dentry, struct nameidata *nd, void *alias)
1061{
1062 kfree(alias);
1063}
1064
1065
1066static void yaffs_unstitch_obj(struct inode *inode, struct yaffs_obj *obj)
1067{
1068 /* Clear the association between the inode and
1069 * the struct yaffs_obj.
1070 */
1071 obj->my_inode = NULL;
1072 yaffs_inode_to_obj_lv(inode) = NULL;
1073
1074 /* If the object freeing was deferred, then the real
1075 * free happens now.
1076 * This should fix the inode inconsistency problem.
1077 */
1078 yaffs_handle_defered_free(obj);
1079}
1080
1081/* yaffs_evict_inode combines into one operation what was previously done in
1082 * yaffs_clear_inode() and yaffs_delete_inode()
1083 *
1084 */
1085static void yaffs_evict_inode(struct inode *inode)
1086{
1087 struct yaffs_obj *obj;
1088 struct yaffs_dev *dev;
1089 int deleteme = 0;
1090
1091 obj = yaffs_inode_to_obj(inode);
1092
1093 yaffs_trace(YAFFS_TRACE_OS,
1094 "yaffs_evict_inode: ino %d, count %d %s",
1095 (int)inode->i_ino,
1096 atomic_read(&inode->i_count),
1097 obj ? "object exists" : "null object");
1098
1099 if (!inode->i_nlink && !is_bad_inode(inode))
1100 deleteme = 1;
1101 truncate_inode_pages(&inode->i_data, 0);
1102 end_writeback(inode);
1103
1104 if (deleteme && obj) {
1105 dev = obj->my_dev;
1106 yaffs_gross_lock(dev);
1107 yaffs_del_obj(obj);
1108 yaffs_gross_unlock(dev);
1109 }
1110 if (obj) {
1111 dev = obj->my_dev;
1112 yaffs_gross_lock(dev);
1113 yaffs_unstitch_obj(inode, obj);
1114 yaffs_gross_unlock(dev);
1115 }
1116
1117}
1118
1119static void yaffs_touch_super(struct yaffs_dev *dev)
1120{
1121 struct super_block *sb = yaffs_dev_to_lc(dev)->super;
1122
1123 yaffs_trace(YAFFS_TRACE_OS, "yaffs_touch_super() sb = %p", sb);
1124 if (sb)
1125 sb->s_dirt = 1;
1126}
1127
1128static int yaffs_readpage_nolock(struct file *f, struct page *pg)
1129{
1130 /* Lifted from jffs2 */
1131
1132 struct yaffs_obj *obj;
1133 unsigned char *pg_buf;
1134 int ret;
1135
1136 struct yaffs_dev *dev;
1137
1138 yaffs_trace(YAFFS_TRACE_OS,
1139 "yaffs_readpage_nolock at %08x, size %08x",
1140 (unsigned)(pg->index << PAGE_CACHE_SHIFT),
1141 (unsigned)PAGE_CACHE_SIZE);
1142
1143 obj = yaffs_dentry_to_obj(f->f_dentry);
1144
1145 dev = obj->my_dev;
1146
1147 BUG_ON(!PageLocked(pg));
1148
1149 pg_buf = kmap(pg);
1150 /* FIXME: Can kmap fail? */
1151
1152 yaffs_gross_lock(dev);
1153
1154 ret = yaffs_file_rd(obj, pg_buf,
1155 pg->index << PAGE_CACHE_SHIFT, PAGE_CACHE_SIZE);
1156
1157 yaffs_gross_unlock(dev);
1158
1159 if (ret >= 0)
1160 ret = 0;
1161
1162 if (ret) {
1163 ClearPageUptodate(pg);
1164 SetPageError(pg);
1165 } else {
1166 SetPageUptodate(pg);
1167 ClearPageError(pg);
1168 }
1169
1170 flush_dcache_page(pg);
1171 kunmap(pg);
1172
1173 yaffs_trace(YAFFS_TRACE_OS, "yaffs_readpage_nolock done");
1174 return ret;
1175}
1176
1177static int yaffs_readpage_unlock(struct file *f, struct page *pg)
1178{
1179 int ret = yaffs_readpage_nolock(f, pg);
1180 UnlockPage(pg);
1181 return ret;
1182}
1183
1184static int yaffs_readpage(struct file *f, struct page *pg)
1185{
1186 int ret;
1187
1188 yaffs_trace(YAFFS_TRACE_OS, "yaffs_readpage");
1189 ret = yaffs_readpage_unlock(f, pg);
1190 yaffs_trace(YAFFS_TRACE_OS, "yaffs_readpage done");
1191 return ret;
1192}
1193
1194/* writepage inspired by/stolen from smbfs */
1195
1196static int yaffs_writepage(struct page *page, struct writeback_control *wbc)
1197{
1198 struct yaffs_dev *dev;
1199 struct address_space *mapping = page->mapping;
1200 struct inode *inode;
1201 unsigned long end_index;
1202 char *buffer;
1203 struct yaffs_obj *obj;
1204 int n_written = 0;
1205 unsigned n_bytes;
1206 loff_t i_size;
1207
1208 if (!mapping)
1209 BUG();
1210 inode = mapping->host;
1211 if (!inode)
1212 BUG();
1213 i_size = i_size_read(inode);
1214
1215 end_index = i_size >> PAGE_CACHE_SHIFT;
1216
1217 if (page->index < end_index)
1218 n_bytes = PAGE_CACHE_SIZE;
1219 else {
1220 n_bytes = i_size & (PAGE_CACHE_SIZE - 1);
1221
1222 if (page->index > end_index || !n_bytes) {
1223 yaffs_trace(YAFFS_TRACE_OS,
1224 "yaffs_writepage at %08x, inode size = %08x!!!",
1225 (unsigned)(page->index << PAGE_CACHE_SHIFT),
1226 (unsigned)inode->i_size);
1227 yaffs_trace(YAFFS_TRACE_OS,
1228 " -> don't care!!");
1229
1230 zero_user_segment(page, 0, PAGE_CACHE_SIZE);
1231 set_page_writeback(page);
1232 unlock_page(page);
1233 end_page_writeback(page);
1234 return 0;
1235 }
1236 }
1237
1238 if (n_bytes != PAGE_CACHE_SIZE)
1239 zero_user_segment(page, n_bytes, PAGE_CACHE_SIZE);
1240
1241 get_page(page);
1242
1243 buffer = kmap(page);
1244
1245 obj = yaffs_inode_to_obj(inode);
1246 dev = obj->my_dev;
1247 yaffs_gross_lock(dev);
1248
1249 yaffs_trace(YAFFS_TRACE_OS,
1250 "yaffs_writepage at %08x, size %08x",
1251 (unsigned)(page->index << PAGE_CACHE_SHIFT), n_bytes);
1252 yaffs_trace(YAFFS_TRACE_OS,
1253 "writepag0: obj = %05x, ino = %05x",
1254 (int)obj->variant.file_variant.file_size, (int)inode->i_size);
1255
1256 n_written = yaffs_wr_file(obj, buffer,
1257 page->index << PAGE_CACHE_SHIFT, n_bytes, 0);
1258
1259 yaffs_touch_super(dev);
1260
1261 yaffs_trace(YAFFS_TRACE_OS,
1262 "writepag1: obj = %05x, ino = %05x",
1263 (int)obj->variant.file_variant.file_size, (int)inode->i_size);
1264
1265 yaffs_gross_unlock(dev);
1266
1267 kunmap(page);
1268 set_page_writeback(page);
1269 unlock_page(page);
1270 end_page_writeback(page);
1271 put_page(page);
1272
1273 return (n_written == n_bytes) ? 0 : -ENOSPC;
1274}
1275
1276/* Space holding and freeing is done to ensure we have space available for
1277 * write_begin/end.
1278 * For now we just assume few parallel writes and check against a small
1279 * number.
1280 * Todo: need to do this with a counter to handle parallel reads better.
1281 */
1282
1283static ssize_t yaffs_hold_space(struct file *f)
1284{
1285 struct yaffs_obj *obj;
1286 struct yaffs_dev *dev;
1287
1288 int n_free_chunks;
1289
1290 obj = yaffs_dentry_to_obj(f->f_dentry);
1291
1292 dev = obj->my_dev;
1293
1294 yaffs_gross_lock(dev);
1295
1296 n_free_chunks = yaffs_get_n_free_chunks(dev);
1297
1298 yaffs_gross_unlock(dev);
1299
1300 return (n_free_chunks > 20) ? 1 : 0;
1301}
1302
1303static void yaffs_release_space(struct file *f)
1304{
1305 struct yaffs_obj *obj;
1306 struct yaffs_dev *dev;
1307
1308 obj = yaffs_dentry_to_obj(f->f_dentry);
1309
1310 dev = obj->my_dev;
1311
1312 yaffs_gross_lock(dev);
1313
1314 yaffs_gross_unlock(dev);
1315}
1316
1317static int yaffs_write_begin(struct file *filp, struct address_space *mapping,
1318 loff_t pos, unsigned len, unsigned flags,
1319 struct page **pagep, void **fsdata)
1320{
1321 struct page *pg = NULL;
1322 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
1323
1324 int ret = 0;
1325 int space_held = 0;
1326
1327 /* Get a page */
1328 pg = grab_cache_page_write_begin(mapping, index, flags);
1329
1330 *pagep = pg;
1331 if (!pg) {
1332 ret = -ENOMEM;
1333 goto out;
1334 }
1335 yaffs_trace(YAFFS_TRACE_OS,
1336 "start yaffs_write_begin index %d(%x) uptodate %d",
1337 (int)index, (int)index, Page_Uptodate(pg) ? 1 : 0);
1338
1339 /* Get fs space */
1340 space_held = yaffs_hold_space(filp);
1341
1342 if (!space_held) {
1343 ret = -ENOSPC;
1344 goto out;
1345 }
1346
1347 /* Update page if required */
1348
1349 if (!Page_Uptodate(pg))
1350 ret = yaffs_readpage_nolock(filp, pg);
1351
1352 if (ret)
1353 goto out;
1354
1355 /* Happy path return */
1356 yaffs_trace(YAFFS_TRACE_OS, "end yaffs_write_begin - ok");
1357
1358 return 0;
1359
1360out:
1361 yaffs_trace(YAFFS_TRACE_OS,
1362 "end yaffs_write_begin fail returning %d", ret);
1363 if (space_held)
1364 yaffs_release_space(filp);
1365 if (pg) {
1366 unlock_page(pg);
1367 page_cache_release(pg);
1368 }
1369 return ret;
1370}
1371
1372static ssize_t yaffs_file_write(struct file *f, const char *buf, size_t n,
1373 loff_t * pos)
1374{
1375 struct yaffs_obj *obj;
1376 int n_written, ipos;
1377 struct inode *inode;
1378 struct yaffs_dev *dev;
1379
1380 obj = yaffs_dentry_to_obj(f->f_dentry);
1381
1382 dev = obj->my_dev;
1383
1384 yaffs_gross_lock(dev);
1385
1386 inode = f->f_dentry->d_inode;
1387
1388 if (!S_ISBLK(inode->i_mode) && f->f_flags & O_APPEND)
1389 ipos = inode->i_size;
1390 else
1391 ipos = *pos;
1392
1393 if (!obj)
1394 yaffs_trace(YAFFS_TRACE_OS,
1395 "yaffs_file_write: hey obj is null!");
1396 else
1397 yaffs_trace(YAFFS_TRACE_OS,
1398 "yaffs_file_write about to write writing %u(%x) bytes to object %d at %d(%x)",
1399 (unsigned)n, (unsigned)n, obj->obj_id, ipos, ipos);
1400
1401 n_written = yaffs_wr_file(obj, buf, ipos, n, 0);
1402
1403 yaffs_touch_super(dev);
1404
1405 yaffs_trace(YAFFS_TRACE_OS,
1406 "yaffs_file_write: %d(%x) bytes written",
1407 (unsigned)n, (unsigned)n);
1408
1409 if (n_written > 0) {
1410 ipos += n_written;
1411 *pos = ipos;
1412 if (ipos > inode->i_size) {
1413 inode->i_size = ipos;
1414 inode->i_blocks = (ipos + 511) >> 9;
1415
1416 yaffs_trace(YAFFS_TRACE_OS,
1417 "yaffs_file_write size updated to %d bytes, %d blocks",
1418 ipos, (int)(inode->i_blocks));
1419 }
1420
1421 }
1422 yaffs_gross_unlock(dev);
1423 return (n_written == 0) && (n > 0) ? -ENOSPC : n_written;
1424}
1425
1426static int yaffs_write_end(struct file *filp, struct address_space *mapping,
1427 loff_t pos, unsigned len, unsigned copied,
1428 struct page *pg, void *fsdadata)
1429{
1430 int ret = 0;
1431 void *addr, *kva;
1432 uint32_t offset_into_page = pos & (PAGE_CACHE_SIZE - 1);
1433
1434 kva = kmap(pg);
1435 addr = kva + offset_into_page;
1436
1437 yaffs_trace(YAFFS_TRACE_OS,
1438 "yaffs_write_end addr %p pos %x n_bytes %d",
1439 addr, (unsigned)pos, copied);
1440
1441 ret = yaffs_file_write(filp, addr, copied, &pos);
1442
1443 if (ret != copied) {
1444 yaffs_trace(YAFFS_TRACE_OS,
1445 "yaffs_write_end not same size ret %d copied %d",
1446 ret, copied);
1447 SetPageError(pg);
1448 }
1449
1450 kunmap(pg);
1451
1452 yaffs_release_space(filp);
1453 unlock_page(pg);
1454 page_cache_release(pg);
1455 return ret;
1456}
1457
1458static int yaffs_statfs(struct dentry *dentry, struct kstatfs *buf)
1459{
1460 struct yaffs_dev *dev = yaffs_dentry_to_obj(dentry)->my_dev;
1461 struct super_block *sb = dentry->d_sb;
1462
1463 yaffs_trace(YAFFS_TRACE_OS, "yaffs_statfs");
1464
1465 yaffs_gross_lock(dev);
1466
1467 buf->f_type = YAFFS_MAGIC;
1468 buf->f_bsize = sb->s_blocksize;
1469 buf->f_namelen = 255;
1470
1471 if (dev->data_bytes_per_chunk & (dev->data_bytes_per_chunk - 1)) {
1472 /* Do this if chunk size is not a power of 2 */
1473
1474 uint64_t bytes_in_dev;
1475 uint64_t bytes_free;
1476
1477 bytes_in_dev =
1478 ((uint64_t)
1479 ((dev->param.end_block - dev->param.start_block +
1480 1))) * ((uint64_t) (dev->param.chunks_per_block *
1481 dev->data_bytes_per_chunk));
1482
1483 do_div(bytes_in_dev, sb->s_blocksize); /* bytes_in_dev becomes the number of blocks */
1484 buf->f_blocks = bytes_in_dev;
1485
1486 bytes_free = ((uint64_t) (yaffs_get_n_free_chunks(dev))) *
1487 ((uint64_t) (dev->data_bytes_per_chunk));
1488
1489 do_div(bytes_free, sb->s_blocksize);
1490
1491 buf->f_bfree = bytes_free;
1492
1493 } else if (sb->s_blocksize > dev->data_bytes_per_chunk) {
1494
1495 buf->f_blocks =
1496 (dev->param.end_block - dev->param.start_block + 1) *
1497 dev->param.chunks_per_block /
1498 (sb->s_blocksize / dev->data_bytes_per_chunk);
1499 buf->f_bfree =
1500 yaffs_get_n_free_chunks(dev) /
1501 (sb->s_blocksize / dev->data_bytes_per_chunk);
1502 } else {
1503 buf->f_blocks =
1504 (dev->param.end_block - dev->param.start_block + 1) *
1505 dev->param.chunks_per_block *
1506 (dev->data_bytes_per_chunk / sb->s_blocksize);
1507
1508 buf->f_bfree =
1509 yaffs_get_n_free_chunks(dev) *
1510 (dev->data_bytes_per_chunk / sb->s_blocksize);
1511 }
1512
1513 buf->f_files = 0;
1514 buf->f_ffree = 0;
1515 buf->f_bavail = buf->f_bfree;
1516
1517 yaffs_gross_unlock(dev);
1518 return 0;
1519}
1520
1521static void yaffs_flush_inodes(struct super_block *sb)
1522{
1523 struct inode *iptr;
1524 struct yaffs_obj *obj;
1525
1526 list_for_each_entry(iptr, &sb->s_inodes, i_sb_list) {
1527 obj = yaffs_inode_to_obj(iptr);
1528 if (obj) {
1529 yaffs_trace(YAFFS_TRACE_OS,
1530 "flushing obj %d", obj->obj_id);
1531 yaffs_flush_file(obj, 1, 0);
1532 }
1533 }
1534}
1535
1536static void yaffs_flush_super(struct super_block *sb, int do_checkpoint)
1537{
1538 struct yaffs_dev *dev = yaffs_super_to_dev(sb);
1539 if (!dev)
1540 return;
1541
1542 yaffs_flush_inodes(sb);
1543 yaffs_update_dirty_dirs(dev);
1544 yaffs_flush_whole_cache(dev);
1545 if (do_checkpoint)
1546 yaffs_checkpoint_save(dev);
1547}
1548
1549static unsigned yaffs_bg_gc_urgency(struct yaffs_dev *dev)
1550{
1551 unsigned erased_chunks =
1552 dev->n_erased_blocks * dev->param.chunks_per_block;
1553 struct yaffs_linux_context *context = yaffs_dev_to_lc(dev);
1554 unsigned scattered = 0; /* Free chunks not in an erased block */
1555
1556 if (erased_chunks < dev->n_free_chunks)
1557 scattered = (dev->n_free_chunks - erased_chunks);
1558
1559 if (!context->bg_running)
1560 return 0;
1561 else if (scattered < (dev->param.chunks_per_block * 2))
1562 return 0;
1563 else if (erased_chunks > dev->n_free_chunks / 2)
1564 return 0;
1565 else if (erased_chunks > dev->n_free_chunks / 4)
1566 return 1;
1567 else
1568 return 2;
1569}
1570
1571static int yaffs_do_sync_fs(struct super_block *sb, int request_checkpoint)
1572{
1573
1574 struct yaffs_dev *dev = yaffs_super_to_dev(sb);
1575 unsigned int oneshot_checkpoint = (yaffs_auto_checkpoint & 4);
1576 unsigned gc_urgent = yaffs_bg_gc_urgency(dev);
1577 int do_checkpoint;
1578
1579 yaffs_trace(YAFFS_TRACE_OS | YAFFS_TRACE_SYNC | YAFFS_TRACE_BACKGROUND,
1580 "yaffs_do_sync_fs: gc-urgency %d %s %s%s",
1581 gc_urgent,
1582 sb->s_dirt ? "dirty" : "clean",
1583 request_checkpoint ? "checkpoint requested" : "no checkpoint",
1584 oneshot_checkpoint ? " one-shot" : "");
1585
1586 yaffs_gross_lock(dev);
1587 do_checkpoint = ((request_checkpoint && !gc_urgent) ||
1588 oneshot_checkpoint) && !dev->is_checkpointed;
1589
1590 if (sb->s_dirt || do_checkpoint) {
1591 yaffs_flush_super(sb, !dev->is_checkpointed && do_checkpoint);
1592 sb->s_dirt = 0;
1593 if (oneshot_checkpoint)
1594 yaffs_auto_checkpoint &= ~4;
1595 }
1596 yaffs_gross_unlock(dev);
1597
1598 return 0;
1599}
1600
1601/*
1602 * yaffs background thread functions .
1603 * yaffs_bg_thread_fn() the thread function
1604 * yaffs_bg_start() launches the background thread.
1605 * yaffs_bg_stop() cleans up the background thread.
1606 *
1607 * NB:
1608 * The thread should only run after the yaffs is initialised
1609 * The thread should be stopped before yaffs is unmounted.
1610 * The thread should not do any writing while the fs is in read only.
1611 */
1612
1613void yaffs_background_waker(unsigned long data)
1614{
1615 wake_up_process((struct task_struct *)data);
1616}
1617
1618static int yaffs_bg_thread_fn(void *data)
1619{
1620 struct yaffs_dev *dev = (struct yaffs_dev *)data;
1621 struct yaffs_linux_context *context = yaffs_dev_to_lc(dev);
1622 unsigned long now = jiffies;
1623 unsigned long next_dir_update = now;
1624 unsigned long next_gc = now;
1625 unsigned long expires;
1626 unsigned int urgency;
1627
1628 int gc_result;
1629 struct timer_list timer;
1630
1631 yaffs_trace(YAFFS_TRACE_BACKGROUND,
1632 "yaffs_background starting for dev %p", (void *)dev);
1633
1634 set_freezable();
1635 while (context->bg_running) {
1636 yaffs_trace(YAFFS_TRACE_BACKGROUND, "yaffs_background");
1637
1638 if (kthread_should_stop())
1639 break;
1640
1641 if (try_to_freeze())
1642 continue;
1643
1644 yaffs_gross_lock(dev);
1645
1646 now = jiffies;
1647
1648 if (time_after(now, next_dir_update) && yaffs_bg_enable) {
1649 yaffs_update_dirty_dirs(dev);
1650 next_dir_update = now + HZ;
1651 }
1652
1653 if (time_after(now, next_gc) && yaffs_bg_enable) {
1654 if (!dev->is_checkpointed) {
1655 urgency = yaffs_bg_gc_urgency(dev);
1656 gc_result = yaffs_bg_gc(dev, urgency);
1657 if (urgency > 1)
1658 next_gc = now + HZ / 20 + 1;
1659 else if (urgency > 0)
1660 next_gc = now + HZ / 10 + 1;
1661 else
1662 next_gc = now + HZ * 2;
1663 } else {
1664 /*
1665 * gc not running so set to next_dir_update
1666 * to cut down on wake ups
1667 */
1668 next_gc = next_dir_update;
1669 }
1670 }
1671 yaffs_gross_unlock(dev);
1672 expires = next_dir_update;
1673 if (time_before(next_gc, expires))
1674 expires = next_gc;
1675 if (time_before(expires, now))
1676 expires = now + HZ;
1677
1678 Y_INIT_TIMER(&timer);
1679 timer.expires = expires + 1;
1680 timer.data = (unsigned long)current;
1681 timer.function = yaffs_background_waker;
1682
1683 set_current_state(TASK_INTERRUPTIBLE);
1684 add_timer(&timer);
1685 schedule();
1686 del_timer_sync(&timer);
1687 }
1688
1689 return 0;
1690}
1691
1692static int yaffs_bg_start(struct yaffs_dev *dev)
1693{
1694 int retval = 0;
1695 struct yaffs_linux_context *context = yaffs_dev_to_lc(dev);
1696
1697 if (dev->read_only)
1698 return -1;
1699
1700 context->bg_running = 1;
1701
1702 context->bg_thread = kthread_run(yaffs_bg_thread_fn,
1703 (void *)dev, "yaffs-bg-%d",
1704 context->mount_id);
1705
1706 if (IS_ERR(context->bg_thread)) {
1707 retval = PTR_ERR(context->bg_thread);
1708 context->bg_thread = NULL;
1709 context->bg_running = 0;
1710 }
1711 return retval;
1712}
1713
1714static void yaffs_bg_stop(struct yaffs_dev *dev)
1715{
1716 struct yaffs_linux_context *ctxt = yaffs_dev_to_lc(dev);
1717
1718 ctxt->bg_running = 0;
1719
1720 if (ctxt->bg_thread) {
1721 kthread_stop(ctxt->bg_thread);
1722 ctxt->bg_thread = NULL;
1723 }
1724}
1725
1726static void yaffs_write_super(struct super_block *sb)
1727{
1728 unsigned request_checkpoint = (yaffs_auto_checkpoint >= 2);
1729
1730 yaffs_trace(YAFFS_TRACE_OS | YAFFS_TRACE_SYNC | YAFFS_TRACE_BACKGROUND,
1731 "yaffs_write_super%s",
1732 request_checkpoint ? " checkpt" : "");
1733
1734 yaffs_do_sync_fs(sb, request_checkpoint);
1735
1736}
1737
1738static int yaffs_sync_fs(struct super_block *sb, int wait)
1739{
1740 unsigned request_checkpoint = (yaffs_auto_checkpoint >= 1);
1741
1742 yaffs_trace(YAFFS_TRACE_OS | YAFFS_TRACE_SYNC,
1743 "yaffs_sync_fs%s", request_checkpoint ? " checkpt" : "");
1744
1745 yaffs_do_sync_fs(sb, request_checkpoint);
1746
1747 return 0;
1748}
1749
1750
1751static LIST_HEAD(yaffs_context_list);
1752struct mutex yaffs_context_lock;
1753
1754
1755
1756struct yaffs_options {
1757 int inband_tags;
1758 int skip_checkpoint_read;
1759 int skip_checkpoint_write;
1760 int no_cache;
1761 int tags_ecc_on;
1762 int tags_ecc_overridden;
1763 int lazy_loading_enabled;
1764 int lazy_loading_overridden;
1765 int empty_lost_and_found;
1766 int empty_lost_and_found_overridden;
1767};
1768
1769#define MAX_OPT_LEN 30
1770static int yaffs_parse_options(struct yaffs_options *options,
1771 const char *options_str)
1772{
1773 char cur_opt[MAX_OPT_LEN + 1];
1774 int p;
1775 int error = 0;
1776
1777 /* Parse through the options which is a comma seperated list */
1778
1779 while (options_str && *options_str && !error) {
1780 memset(cur_opt, 0, MAX_OPT_LEN + 1);
1781 p = 0;
1782
1783 while (*options_str == ',')
1784 options_str++;
1785
1786 while (*options_str && *options_str != ',') {
1787 if (p < MAX_OPT_LEN) {
1788 cur_opt[p] = *options_str;
1789 p++;
1790 }
1791 options_str++;
1792 }
1793
1794 if (!strcmp(cur_opt, "inband-tags")) {
1795 options->inband_tags = 1;
1796 } else if (!strcmp(cur_opt, "tags-ecc-off")) {
1797 options->tags_ecc_on = 0;
1798 options->tags_ecc_overridden = 1;
1799 } else if (!strcmp(cur_opt, "tags-ecc-on")) {
1800 options->tags_ecc_on = 1;
1801 options->tags_ecc_overridden = 1;
1802 } else if (!strcmp(cur_opt, "lazy-loading-off")) {
1803 options->lazy_loading_enabled = 0;
1804 options->lazy_loading_overridden = 1;
1805 } else if (!strcmp(cur_opt, "lazy-loading-on")) {
1806 options->lazy_loading_enabled = 1;
1807 options->lazy_loading_overridden = 1;
1808 } else if (!strcmp(cur_opt, "empty-lost-and-found-off")) {
1809 options->empty_lost_and_found = 0;
1810 options->empty_lost_and_found_overridden = 1;
1811 } else if (!strcmp(cur_opt, "empty-lost-and-found-on")) {
1812 options->empty_lost_and_found = 1;
1813 options->empty_lost_and_found_overridden = 1;
1814 } else if (!strcmp(cur_opt, "no-cache")) {
1815 options->no_cache = 1;
1816 } else if (!strcmp(cur_opt, "no-checkpoint-read")) {
1817 options->skip_checkpoint_read = 1;
1818 } else if (!strcmp(cur_opt, "no-checkpoint-write")) {
1819 options->skip_checkpoint_write = 1;
1820 } else if (!strcmp(cur_opt, "no-checkpoint")) {
1821 options->skip_checkpoint_read = 1;
1822 options->skip_checkpoint_write = 1;
1823 } else {
1824 printk(KERN_INFO "yaffs: Bad mount option \"%s\"\n",
1825 cur_opt);
1826 error = 1;
1827 }
1828 }
1829
1830 return error;
1831}
1832
1833static struct address_space_operations yaffs_file_address_operations = {
1834 .readpage = yaffs_readpage,
1835 .writepage = yaffs_writepage,
1836 .write_begin = yaffs_write_begin,
1837 .write_end = yaffs_write_end,
1838};
1839
1840
1841
1842static const struct inode_operations yaffs_file_inode_operations = {
1843 .setattr = yaffs_setattr,
1844#ifdef CONFIG_YAFFS_XATTR
1845 .setxattr = yaffs_setxattr,
1846 .getxattr = yaffs_getxattr,
1847 .listxattr = yaffs_listxattr,
1848 .removexattr = yaffs_removexattr,
1849#endif
1850};
1851
1852static const struct inode_operations yaffs_symlink_inode_operations = {
1853 .readlink = yaffs_readlink,
1854 .follow_link = yaffs_follow_link,
1855 .put_link = yaffs_put_link,
1856 .setattr = yaffs_setattr,
1857#ifdef CONFIG_YAFFS_XATTR
1858 .setxattr = yaffs_setxattr,
1859 .getxattr = yaffs_getxattr,
1860 .listxattr = yaffs_listxattr,
1861 .removexattr = yaffs_removexattr,
1862#endif
1863};
1864
1865static void yaffs_fill_inode_from_obj(struct inode *inode,
1866 struct yaffs_obj *obj)
1867{
1868 if (inode && obj) {
1869
1870 /* Check mode against the variant type and attempt to repair if broken. */
1871 u32 mode = obj->yst_mode;
1872 switch (obj->variant_type) {
1873 case YAFFS_OBJECT_TYPE_FILE:
1874 if (!S_ISREG(mode)) {
1875 obj->yst_mode &= ~S_IFMT;
1876 obj->yst_mode |= S_IFREG;
1877 }
1878
1879 break;
1880 case YAFFS_OBJECT_TYPE_SYMLINK:
1881 if (!S_ISLNK(mode)) {
1882 obj->yst_mode &= ~S_IFMT;
1883 obj->yst_mode |= S_IFLNK;
1884 }
1885
1886 break;
1887 case YAFFS_OBJECT_TYPE_DIRECTORY:
1888 if (!S_ISDIR(mode)) {
1889 obj->yst_mode &= ~S_IFMT;
1890 obj->yst_mode |= S_IFDIR;
1891 }
1892
1893 break;
1894 case YAFFS_OBJECT_TYPE_UNKNOWN:
1895 case YAFFS_OBJECT_TYPE_HARDLINK:
1896 case YAFFS_OBJECT_TYPE_SPECIAL:
1897 default:
1898 /* TODO? */
1899 break;
1900 }
1901
1902 inode->i_flags |= S_NOATIME;
1903
1904 inode->i_ino = obj->obj_id;
1905 inode->i_mode = obj->yst_mode;
1906 inode->i_uid = obj->yst_uid;
1907 inode->i_gid = obj->yst_gid;
1908
1909 inode->i_rdev = old_decode_dev(obj->yst_rdev);
1910
1911 inode->i_atime.tv_sec = (time_t) (obj->yst_atime);
1912 inode->i_atime.tv_nsec = 0;
1913 inode->i_mtime.tv_sec = (time_t) obj->yst_mtime;
1914 inode->i_mtime.tv_nsec = 0;
1915 inode->i_ctime.tv_sec = (time_t) obj->yst_ctime;
1916 inode->i_ctime.tv_nsec = 0;
1917 inode->i_size = yaffs_get_obj_length(obj);
1918 inode->i_blocks = (inode->i_size + 511) >> 9;
1919
1920 inode->i_nlink = yaffs_get_obj_link_count(obj);
1921
1922 yaffs_trace(YAFFS_TRACE_OS,
1923 "yaffs_fill_inode mode %x uid %d gid %d size %d count %d",
1924 inode->i_mode, inode->i_uid, inode->i_gid,
1925 (int)inode->i_size, atomic_read(&inode->i_count));
1926
1927 switch (obj->yst_mode & S_IFMT) {
1928 default: /* fifo, device or socket */
1929 init_special_inode(inode, obj->yst_mode,
1930 old_decode_dev(obj->yst_rdev));
1931 break;
1932 case S_IFREG: /* file */
1933 inode->i_op = &yaffs_file_inode_operations;
1934 inode->i_fop = &yaffs_file_operations;
1935 inode->i_mapping->a_ops =
1936 &yaffs_file_address_operations;
1937 break;
1938 case S_IFDIR: /* directory */
1939 inode->i_op = &yaffs_dir_inode_operations;
1940 inode->i_fop = &yaffs_dir_operations;
1941 break;
1942 case S_IFLNK: /* symlink */
1943 inode->i_op = &yaffs_symlink_inode_operations;
1944 break;
1945 }
1946
1947 yaffs_inode_to_obj_lv(inode) = obj;
1948
1949 obj->my_inode = inode;
1950
1951 } else {
1952 yaffs_trace(YAFFS_TRACE_OS,
1953 "yaffs_fill_inode invalid parameters");
1954 }
1955}
1956
1957static void yaffs_put_super(struct super_block *sb)
1958{
1959 struct yaffs_dev *dev = yaffs_super_to_dev(sb);
1960
1961 yaffs_trace(YAFFS_TRACE_OS, "yaffs_put_super");
1962
1963 yaffs_trace(YAFFS_TRACE_OS | YAFFS_TRACE_BACKGROUND,
1964 "Shutting down yaffs background thread");
1965 yaffs_bg_stop(dev);
1966 yaffs_trace(YAFFS_TRACE_OS | YAFFS_TRACE_BACKGROUND,
1967 "yaffs background thread shut down");
1968
1969 yaffs_gross_lock(dev);
1970
1971 yaffs_flush_super(sb, 1);
1972
1973 if (yaffs_dev_to_lc(dev)->put_super_fn)
1974 yaffs_dev_to_lc(dev)->put_super_fn(sb);
1975
1976 yaffs_deinitialise(dev);
1977
1978 yaffs_gross_unlock(dev);
1979 mutex_lock(&yaffs_context_lock);
1980 list_del_init(&(yaffs_dev_to_lc(dev)->context_list));
1981 mutex_unlock(&yaffs_context_lock);
1982
1983 if (yaffs_dev_to_lc(dev)->spare_buffer) {
1984 kfree(yaffs_dev_to_lc(dev)->spare_buffer);
1985 yaffs_dev_to_lc(dev)->spare_buffer = NULL;
1986 }
1987
1988 kfree(dev);
1989}
1990
1991static void yaffs_mtd_put_super(struct super_block *sb)
1992{
1993 struct mtd_info *mtd = yaffs_dev_to_mtd(yaffs_super_to_dev(sb));
1994
1995 if (mtd->sync)
1996 mtd->sync(mtd);
1997
1998 put_mtd_device(mtd);
1999}
2000
2001static const struct super_operations yaffs_super_ops = {
2002 .statfs = yaffs_statfs,
2003 .put_super = yaffs_put_super,
2004 .evict_inode = yaffs_evict_inode,
2005 .sync_fs = yaffs_sync_fs,
2006 .write_super = yaffs_write_super,
2007};
2008
2009static struct super_block *yaffs_internal_read_super(int yaffs_version,
2010 struct super_block *sb,
2011 void *data, int silent)
2012{
2013 int n_blocks;
2014 struct inode *inode = NULL;
2015 struct dentry *root;
2016 struct yaffs_dev *dev = 0;
2017 char devname_buf[BDEVNAME_SIZE + 1];
2018 struct mtd_info *mtd;
2019 int err;
2020 char *data_str = (char *)data;
2021 struct yaffs_linux_context *context = NULL;
2022 struct yaffs_param *param;
2023
2024 int read_only = 0;
2025
2026 struct yaffs_options options;
2027
2028 unsigned mount_id;
2029 int found;
2030 struct yaffs_linux_context *context_iterator;
2031 struct list_head *l;
2032
2033 sb->s_magic = YAFFS_MAGIC;
2034 sb->s_op = &yaffs_super_ops;
2035 sb->s_flags |= MS_NOATIME;
2036
2037 read_only = ((sb->s_flags & MS_RDONLY) != 0);
2038
2039 sb->s_export_op = &yaffs_export_ops;
2040
2041 if (!sb)
2042 printk(KERN_INFO "yaffs: sb is NULL\n");
2043 else if (!sb->s_dev)
2044 printk(KERN_INFO "yaffs: sb->s_dev is NULL\n");
2045 else if (!yaffs_devname(sb, devname_buf))
2046 printk(KERN_INFO "yaffs: devname is NULL\n");
2047 else
2048 printk(KERN_INFO "yaffs: dev is %d name is \"%s\" %s\n",
2049 sb->s_dev,
2050 yaffs_devname(sb, devname_buf), read_only ? "ro" : "rw");
2051
2052 if (!data_str)
2053 data_str = "";
2054
2055 printk(KERN_INFO "yaffs: passed flags \"%s\"\n", data_str);
2056
2057 memset(&options, 0, sizeof(options));
2058
2059 if (yaffs_parse_options(&options, data_str)) {
2060 /* Option parsing failed */
2061 return NULL;
2062 }
2063
2064 sb->s_blocksize = PAGE_CACHE_SIZE;
2065 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
2066
2067 yaffs_trace(YAFFS_TRACE_OS,
2068 "yaffs_read_super: Using yaffs%d", yaffs_version);
2069 yaffs_trace(YAFFS_TRACE_OS,
2070 "yaffs_read_super: block size %d", (int)(sb->s_blocksize));
2071
2072 yaffs_trace(YAFFS_TRACE_ALWAYS,
2073 "Attempting MTD mount of %u.%u,\"%s\"",
2074 MAJOR(sb->s_dev), MINOR(sb->s_dev),
2075 yaffs_devname(sb, devname_buf));
2076
2077 /* Check it's an mtd device..... */
2078 if (MAJOR(sb->s_dev) != MTD_BLOCK_MAJOR)
2079 return NULL; /* This isn't an mtd device */
2080
2081 /* Get the device */
2082 mtd = get_mtd_device(NULL, MINOR(sb->s_dev));
2083 if (!mtd) {
2084 yaffs_trace(YAFFS_TRACE_ALWAYS,
2085 "MTD device #%u doesn't appear to exist",
2086 MINOR(sb->s_dev));
2087 return NULL;
2088 }
2089 /* Check it's NAND */
2090 if (mtd->type != MTD_NANDFLASH) {
2091 yaffs_trace(YAFFS_TRACE_ALWAYS,
2092 "MTD device is not NAND it's type %d",
2093 mtd->type);
2094 return NULL;
2095 }
2096
2097 yaffs_trace(YAFFS_TRACE_OS, " erase %p", mtd->erase);
2098 yaffs_trace(YAFFS_TRACE_OS, " read %p", mtd->read);
2099 yaffs_trace(YAFFS_TRACE_OS, " write %p", mtd->write);
2100 yaffs_trace(YAFFS_TRACE_OS, " readoob %p", mtd->read_oob);
2101 yaffs_trace(YAFFS_TRACE_OS, " writeoob %p", mtd->write_oob);
2102 yaffs_trace(YAFFS_TRACE_OS, " block_isbad %p", mtd->block_isbad);
2103 yaffs_trace(YAFFS_TRACE_OS, " block_markbad %p", mtd->block_markbad);
2104 yaffs_trace(YAFFS_TRACE_OS, " %s %d", WRITE_SIZE_STR, WRITE_SIZE(mtd));
2105 yaffs_trace(YAFFS_TRACE_OS, " oobsize %d", mtd->oobsize);
2106 yaffs_trace(YAFFS_TRACE_OS, " erasesize %d", mtd->erasesize);
2107 yaffs_trace(YAFFS_TRACE_OS, " size %lld", mtd->size);
2108
2109#ifdef CONFIG_YAFFS_AUTO_YAFFS2
2110
2111 if (yaffs_version == 1 && WRITE_SIZE(mtd) >= 2048) {
2112 yaffs_trace(YAFFS_TRACE_ALWAYS, "auto selecting yaffs2");
2113 yaffs_version = 2;
2114 }
2115
2116 /* Added NCB 26/5/2006 for completeness */
2117 if (yaffs_version == 2 && !options.inband_tags
2118 && WRITE_SIZE(mtd) == 512) {
2119 yaffs_trace(YAFFS_TRACE_ALWAYS, "auto selecting yaffs1");
2120 yaffs_version = 1;
2121 }
2122#endif
2123
2124 if (yaffs_version == 2) {
2125 /* Check for version 2 style functions */
2126 if (!mtd->erase ||
2127 !mtd->block_isbad ||
2128 !mtd->block_markbad ||
2129 !mtd->read ||
2130 !mtd->write || !mtd->read_oob || !mtd->write_oob) {
2131 yaffs_trace(YAFFS_TRACE_ALWAYS,
2132 "MTD device does not support required functions");
2133 return NULL;
2134 }
2135
2136 if ((WRITE_SIZE(mtd) < YAFFS_MIN_YAFFS2_CHUNK_SIZE ||
2137 mtd->oobsize < YAFFS_MIN_YAFFS2_SPARE_SIZE) &&
2138 !options.inband_tags) {
2139 yaffs_trace(YAFFS_TRACE_ALWAYS,
2140 "MTD device does not have the right page sizes");
2141 return NULL;
2142 }
2143 } else {
2144 /* Check for V1 style functions */
2145 if (!mtd->erase ||
2146 !mtd->read ||
2147 !mtd->write || !mtd->read_oob || !mtd->write_oob) {
2148 yaffs_trace(YAFFS_TRACE_ALWAYS,
2149 "MTD device does not support required functions");
2150 return NULL;
2151 }
2152
2153 if (WRITE_SIZE(mtd) < YAFFS_BYTES_PER_CHUNK ||
2154 mtd->oobsize != YAFFS_BYTES_PER_SPARE) {
2155 yaffs_trace(YAFFS_TRACE_ALWAYS,
2156 "MTD device does not support have the right page sizes");
2157 return NULL;
2158 }
2159 }
2160
2161 /* OK, so if we got here, we have an MTD that's NAND and looks
2162 * like it has the right capabilities
2163 * Set the struct yaffs_dev up for mtd
2164 */
2165
2166 if (!read_only && !(mtd->flags & MTD_WRITEABLE)) {
2167 read_only = 1;
2168 printk(KERN_INFO
2169 "yaffs: mtd is read only, setting superblock read only");
2170 sb->s_flags |= MS_RDONLY;
2171 }
2172
2173 dev = kmalloc(sizeof(struct yaffs_dev), GFP_KERNEL);
2174 context = kmalloc(sizeof(struct yaffs_linux_context), GFP_KERNEL);
2175
2176 if (!dev || !context) {
2177 if (dev)
2178 kfree(dev);
2179 if (context)
2180 kfree(context);
2181 dev = NULL;
2182 context = NULL;
2183 }
2184
2185 if (!dev) {
2186 /* Deep shit could not allocate device structure */
2187 yaffs_trace(YAFFS_TRACE_ALWAYS,
2188 "yaffs_read_super failed trying to allocate yaffs_dev");
2189 return NULL;
2190 }
2191 memset(dev, 0, sizeof(struct yaffs_dev));
2192 param = &(dev->param);
2193
2194 memset(context, 0, sizeof(struct yaffs_linux_context));
2195 dev->os_context = context;
2196 INIT_LIST_HEAD(&(context->context_list));
2197 context->dev = dev;
2198 context->super = sb;
2199
2200 dev->read_only = read_only;
2201
2202 sb->s_fs_info = dev;
2203
2204 dev->driver_context = mtd;
2205 param->name = mtd->name;
2206
2207 /* Set up the memory size parameters.... */
2208
2209 n_blocks =
2210 YCALCBLOCKS(mtd->size,
2211 (YAFFS_CHUNKS_PER_BLOCK * YAFFS_BYTES_PER_CHUNK));
2212
2213 param->start_block = 0;
2214 param->end_block = n_blocks - 1;
2215 param->chunks_per_block = YAFFS_CHUNKS_PER_BLOCK;
2216 param->total_bytes_per_chunk = YAFFS_BYTES_PER_CHUNK;
2217 param->n_reserved_blocks = 5;
2218 param->n_caches = (options.no_cache) ? 0 : 10;
2219 param->inband_tags = options.inband_tags;
2220
2221#ifdef CONFIG_YAFFS_DISABLE_LAZY_LOAD
2222 param->disable_lazy_load = 1;
2223#endif
2224#ifdef CONFIG_YAFFS_XATTR
2225 param->enable_xattr = 1;
2226#endif
2227 if (options.lazy_loading_overridden)
2228 param->disable_lazy_load = !options.lazy_loading_enabled;
2229
2230#ifdef CONFIG_YAFFS_DISABLE_TAGS_ECC
2231 param->no_tags_ecc = 1;
2232#endif
2233
2234#ifdef CONFIG_YAFFS_DISABLE_BACKGROUND
2235#else
2236 param->defered_dir_update = 1;
2237#endif
2238
2239 if (options.tags_ecc_overridden)
2240 param->no_tags_ecc = !options.tags_ecc_on;
2241
2242#ifdef CONFIG_YAFFS_EMPTY_LOST_AND_FOUND
2243 param->empty_lost_n_found = 1;
2244#endif
2245
2246#ifdef CONFIG_YAFFS_DISABLE_BLOCK_REFRESHING
2247 param->refresh_period = 0;
2248#else
2249 param->refresh_period = 500;
2250#endif
2251
2252#ifdef CONFIG_YAFFS_ALWAYS_CHECK_CHUNK_ERASED
2253 param->always_check_erased = 1;
2254#endif
2255
2256 if (options.empty_lost_and_found_overridden)
2257 param->empty_lost_n_found = options.empty_lost_and_found;
2258
2259 /* ... and the functions. */
2260 if (yaffs_version == 2) {
2261 param->write_chunk_tags_fn = nandmtd2_write_chunk_tags;
2262 param->read_chunk_tags_fn = nandmtd2_read_chunk_tags;
2263 param->bad_block_fn = nandmtd2_mark_block_bad;
2264 param->query_block_fn = nandmtd2_query_block;
2265 yaffs_dev_to_lc(dev)->spare_buffer =
2266 kmalloc(mtd->oobsize, GFP_NOFS);
2267 param->is_yaffs2 = 1;
2268 param->total_bytes_per_chunk = mtd->writesize;
2269 param->chunks_per_block = mtd->erasesize / mtd->writesize;
2270 n_blocks = YCALCBLOCKS(mtd->size, mtd->erasesize);
2271
2272 param->start_block = 0;
2273 param->end_block = n_blocks - 1;
2274 } else {
2275 /* use the MTD interface in yaffs_mtdif1.c */
2276 param->write_chunk_tags_fn = nandmtd1_write_chunk_tags;
2277 param->read_chunk_tags_fn = nandmtd1_read_chunk_tags;
2278 param->bad_block_fn = nandmtd1_mark_block_bad;
2279 param->query_block_fn = nandmtd1_query_block;
2280 param->is_yaffs2 = 0;
2281 }
2282 /* ... and common functions */
2283 param->erase_fn = nandmtd_erase_block;
2284 param->initialise_flash_fn = nandmtd_initialise;
2285
2286 yaffs_dev_to_lc(dev)->put_super_fn = yaffs_mtd_put_super;
2287
2288 param->sb_dirty_fn = yaffs_touch_super;
2289 param->gc_control = yaffs_gc_control_callback;
2290
2291 yaffs_dev_to_lc(dev)->super = sb;
2292
2293#ifndef CONFIG_YAFFS_DOES_ECC
2294 param->use_nand_ecc = 1;
2295#endif
2296
2297 param->skip_checkpt_rd = options.skip_checkpoint_read;
2298 param->skip_checkpt_wr = options.skip_checkpoint_write;
2299
2300 mutex_lock(&yaffs_context_lock);
2301 /* Get a mount id */
2302 found = 0;
2303 for (mount_id = 0; !found; mount_id++) {
2304 found = 1;
2305 list_for_each(l, &yaffs_context_list) {
2306 context_iterator =
2307 list_entry(l, struct yaffs_linux_context,
2308 context_list);
2309 if (context_iterator->mount_id == mount_id)
2310 found = 0;
2311 }
2312 }
2313 context->mount_id = mount_id;
2314
2315 list_add_tail(&(yaffs_dev_to_lc(dev)->context_list),
2316 &yaffs_context_list);
2317 mutex_unlock(&yaffs_context_lock);
2318
2319 /* Directory search handling... */
2320 INIT_LIST_HEAD(&(yaffs_dev_to_lc(dev)->search_contexts));
2321 param->remove_obj_fn = yaffs_remove_obj_callback;
2322
2323 mutex_init(&(yaffs_dev_to_lc(dev)->gross_lock));
2324
2325 yaffs_gross_lock(dev);
2326
2327 err = yaffs_guts_initialise(dev);
2328
2329 yaffs_trace(YAFFS_TRACE_OS,
2330 "yaffs_read_super: guts initialised %s",
2331 (err == YAFFS_OK) ? "OK" : "FAILED");
2332
2333 if (err == YAFFS_OK)
2334 yaffs_bg_start(dev);
2335
2336 if (!context->bg_thread)
2337 param->defered_dir_update = 0;
2338
2339 /* Release lock before yaffs_get_inode() */
2340 yaffs_gross_unlock(dev);
2341
2342 /* Create root inode */
2343 if (err == YAFFS_OK)
2344 inode = yaffs_get_inode(sb, S_IFDIR | 0755, 0, yaffs_root(dev));
2345
2346 if (!inode)
2347 return NULL;
2348
2349 inode->i_op = &yaffs_dir_inode_operations;
2350 inode->i_fop = &yaffs_dir_operations;
2351
2352 yaffs_trace(YAFFS_TRACE_OS, "yaffs_read_super: got root inode");
2353
2354 root = d_alloc_root(inode);
2355
2356 yaffs_trace(YAFFS_TRACE_OS, "yaffs_read_super: d_alloc_root done");
2357
2358 if (!root) {
2359 iput(inode);
2360 return NULL;
2361 }
2362 sb->s_root = root;
2363 sb->s_dirt = !dev->is_checkpointed;
2364 yaffs_trace(YAFFS_TRACE_ALWAYS,
2365 "yaffs_read_super: is_checkpointed %d",
2366 dev->is_checkpointed);
2367
2368 yaffs_trace(YAFFS_TRACE_OS, "yaffs_read_super: done");
2369 return sb;
2370}
2371
2372static int yaffs_internal_read_super_mtd(struct super_block *sb, void *data,
2373 int silent)
2374{
2375 return yaffs_internal_read_super(1, sb, data, silent) ? 0 : -EINVAL;
2376}
2377
2378static int yaffs_read_super(struct file_system_type *fs,
2379 int flags, const char *dev_name,
2380 void *data, struct vfsmount *mnt)
2381{
2382
2383 return get_sb_bdev(fs, flags, dev_name, data,
2384 yaffs_internal_read_super_mtd, mnt);
2385}
2386
2387static struct file_system_type yaffs_fs_type = {
2388 .owner = THIS_MODULE,
2389 .name = "yaffs",
2390 .get_sb = yaffs_read_super,
2391 .kill_sb = kill_block_super,
2392 .fs_flags = FS_REQUIRES_DEV,
2393};
2394
2395#ifdef CONFIG_YAFFS_YAFFS2
2396
2397static int yaffs2_internal_read_super_mtd(struct super_block *sb, void *data,
2398 int silent)
2399{
2400 return yaffs_internal_read_super(2, sb, data, silent) ? 0 : -EINVAL;
2401}
2402
2403static int yaffs2_read_super(struct file_system_type *fs,
2404 int flags, const char *dev_name, void *data,
2405 struct vfsmount *mnt)
2406{
2407 return get_sb_bdev(fs, flags, dev_name, data,
2408 yaffs2_internal_read_super_mtd, mnt);
2409}
2410
2411static struct file_system_type yaffs2_fs_type = {
2412 .owner = THIS_MODULE,
2413 .name = "yaffs2",
2414 .get_sb = yaffs2_read_super,
2415 .kill_sb = kill_block_super,
2416 .fs_flags = FS_REQUIRES_DEV,
2417};
2418#endif /* CONFIG_YAFFS_YAFFS2 */
2419
2420static struct proc_dir_entry *my_proc_entry;
2421
2422static char *yaffs_dump_dev_part0(char *buf, struct yaffs_dev *dev)
2423{
2424 struct yaffs_param *param = &dev->param;
2425 buf += sprintf(buf, "start_block........... %d\n", param->start_block);
2426 buf += sprintf(buf, "end_block............. %d\n", param->end_block);
2427 buf += sprintf(buf, "total_bytes_per_chunk. %d\n",
2428 param->total_bytes_per_chunk);
2429 buf += sprintf(buf, "use_nand_ecc.......... %d\n",
2430 param->use_nand_ecc);
2431 buf += sprintf(buf, "no_tags_ecc........... %d\n", param->no_tags_ecc);
2432 buf += sprintf(buf, "is_yaffs2............. %d\n", param->is_yaffs2);
2433 buf += sprintf(buf, "inband_tags........... %d\n", param->inband_tags);
2434 buf += sprintf(buf, "empty_lost_n_found.... %d\n",
2435 param->empty_lost_n_found);
2436 buf += sprintf(buf, "disable_lazy_load..... %d\n",
2437 param->disable_lazy_load);
2438 buf += sprintf(buf, "refresh_period........ %d\n",
2439 param->refresh_period);
2440 buf += sprintf(buf, "n_caches.............. %d\n", param->n_caches);
2441 buf += sprintf(buf, "n_reserved_blocks..... %d\n",
2442 param->n_reserved_blocks);
2443 buf += sprintf(buf, "always_check_erased... %d\n",
2444 param->always_check_erased);
2445
2446 return buf;
2447}
2448
2449static char *yaffs_dump_dev_part1(char *buf, struct yaffs_dev *dev)
2450{
2451 buf +=
2452 sprintf(buf, "data_bytes_per_chunk.. %d\n",
2453 dev->data_bytes_per_chunk);
2454 buf += sprintf(buf, "chunk_grp_bits........ %d\n", dev->chunk_grp_bits);
2455 buf += sprintf(buf, "chunk_grp_size........ %d\n", dev->chunk_grp_size);
2456 buf +=
2457 sprintf(buf, "n_erased_blocks....... %d\n", dev->n_erased_blocks);
2458 buf +=
2459 sprintf(buf, "blocks_in_checkpt..... %d\n", dev->blocks_in_checkpt);
2460 buf += sprintf(buf, "\n");
2461 buf += sprintf(buf, "n_tnodes.............. %d\n", dev->n_tnodes);
2462 buf += sprintf(buf, "n_obj................. %d\n", dev->n_obj);
2463 buf += sprintf(buf, "n_free_chunks......... %d\n", dev->n_free_chunks);
2464 buf += sprintf(buf, "\n");
2465 buf += sprintf(buf, "n_page_writes......... %u\n", dev->n_page_writes);
2466 buf += sprintf(buf, "n_page_reads.......... %u\n", dev->n_page_reads);
2467 buf += sprintf(buf, "n_erasures............ %u\n", dev->n_erasures);
2468 buf += sprintf(buf, "n_gc_copies........... %u\n", dev->n_gc_copies);
2469 buf += sprintf(buf, "all_gcs............... %u\n", dev->all_gcs);
2470 buf +=
2471 sprintf(buf, "passive_gc_count...... %u\n", dev->passive_gc_count);
2472 buf +=
2473 sprintf(buf, "oldest_dirty_gc_count. %u\n",
2474 dev->oldest_dirty_gc_count);
2475 buf += sprintf(buf, "n_gc_blocks........... %u\n", dev->n_gc_blocks);
2476 buf += sprintf(buf, "bg_gcs................ %u\n", dev->bg_gcs);
2477 buf +=
2478 sprintf(buf, "n_retired_writes...... %u\n", dev->n_retired_writes);
2479 buf +=
2480 sprintf(buf, "n_retired_blocks...... %u\n", dev->n_retired_blocks);
2481 buf += sprintf(buf, "n_ecc_fixed........... %u\n", dev->n_ecc_fixed);
2482 buf += sprintf(buf, "n_ecc_unfixed......... %u\n", dev->n_ecc_unfixed);
2483 buf +=
2484 sprintf(buf, "n_tags_ecc_fixed...... %u\n", dev->n_tags_ecc_fixed);
2485 buf +=
2486 sprintf(buf, "n_tags_ecc_unfixed.... %u\n",
2487 dev->n_tags_ecc_unfixed);
2488 buf += sprintf(buf, "cache_hits............ %u\n", dev->cache_hits);
2489 buf +=
2490 sprintf(buf, "n_deleted_files....... %u\n", dev->n_deleted_files);
2491 buf +=
2492 sprintf(buf, "n_unlinked_files...... %u\n", dev->n_unlinked_files);
2493 buf += sprintf(buf, "refresh_count......... %u\n", dev->refresh_count);
2494 buf += sprintf(buf, "n_bg_deletions........ %u\n", dev->n_bg_deletions);
2495
2496 return buf;
2497}
2498
2499static int yaffs_proc_read(char *page,
2500 char **start,
2501 off_t offset, int count, int *eof, void *data)
2502{
2503 struct list_head *item;
2504 char *buf = page;
2505 int step = offset;
2506 int n = 0;
2507
2508 /* Get proc_file_read() to step 'offset' by one on each sucessive call.
2509 * We use 'offset' (*ppos) to indicate where we are in dev_list.
2510 * This also assumes the user has posted a read buffer large
2511 * enough to hold the complete output; but that's life in /proc.
2512 */
2513
2514 *(int *)start = 1;
2515
2516 /* Print header first */
2517 if (step == 0)
2518 buf += sprintf(buf, "YAFFS built:" __DATE__ " " __TIME__ "\n");
2519 else if (step == 1)
2520 buf += sprintf(buf, "\n");
2521 else {
2522 step -= 2;
2523
2524 mutex_lock(&yaffs_context_lock);
2525
2526 /* Locate and print the Nth entry. Order N-squared but N is small. */
2527 list_for_each(item, &yaffs_context_list) {
2528 struct yaffs_linux_context *dc =
2529 list_entry(item, struct yaffs_linux_context,
2530 context_list);
2531 struct yaffs_dev *dev = dc->dev;
2532
2533 if (n < (step & ~1)) {
2534 n += 2;
2535 continue;
2536 }
2537 if ((step & 1) == 0) {
2538 buf +=
2539 sprintf(buf, "\nDevice %d \"%s\"\n", n,
2540 dev->param.name);
2541 buf = yaffs_dump_dev_part0(buf, dev);
2542 } else {
2543 buf = yaffs_dump_dev_part1(buf, dev);
2544 }
2545
2546 break;
2547 }
2548 mutex_unlock(&yaffs_context_lock);
2549 }
2550
2551 return buf - page < count ? buf - page : count;
2552}
2553
2554
2555/**
2556 * Set the verbosity of the warnings and error messages.
2557 *
2558 * Note that the names can only be a..z or _ with the current code.
2559 */
2560
2561static struct {
2562 char *mask_name;
2563 unsigned mask_bitfield;
2564} mask_flags[] = {
2565 {"allocate", YAFFS_TRACE_ALLOCATE},
2566 {"always", YAFFS_TRACE_ALWAYS},
2567 {"background", YAFFS_TRACE_BACKGROUND},
2568 {"bad_blocks", YAFFS_TRACE_BAD_BLOCKS},
2569 {"buffers", YAFFS_TRACE_BUFFERS},
2570 {"bug", YAFFS_TRACE_BUG},
2571 {"checkpt", YAFFS_TRACE_CHECKPOINT},
2572 {"deletion", YAFFS_TRACE_DELETION},
2573 {"erase", YAFFS_TRACE_ERASE},
2574 {"error", YAFFS_TRACE_ERROR},
2575 {"gc_detail", YAFFS_TRACE_GC_DETAIL},
2576 {"gc", YAFFS_TRACE_GC},
2577 {"lock", YAFFS_TRACE_LOCK},
2578 {"mtd", YAFFS_TRACE_MTD},
2579 {"nandaccess", YAFFS_TRACE_NANDACCESS},
2580 {"os", YAFFS_TRACE_OS},
2581 {"scan_debug", YAFFS_TRACE_SCAN_DEBUG},
2582 {"scan", YAFFS_TRACE_SCAN},
2583 {"mount", YAFFS_TRACE_MOUNT},
2584 {"tracing", YAFFS_TRACE_TRACING},
2585 {"sync", YAFFS_TRACE_SYNC},
2586 {"write", YAFFS_TRACE_WRITE},
2587 {"verify", YAFFS_TRACE_VERIFY},
2588 {"verify_nand", YAFFS_TRACE_VERIFY_NAND},
2589 {"verify_full", YAFFS_TRACE_VERIFY_FULL},
2590 {"verify_all", YAFFS_TRACE_VERIFY_ALL},
2591 {"all", 0xffffffff},
2592 {"none", 0},
2593 {NULL, 0},
2594};
2595
2596#define MAX_MASK_NAME_LENGTH 40
2597static int yaffs_proc_write_trace_options(struct file *file, const char *buf,
2598 unsigned long count, void *data)
2599{
2600 unsigned rg = 0, mask_bitfield;
2601 char *end;
2602 char *mask_name;
2603 const char *x;
2604 char substring[MAX_MASK_NAME_LENGTH + 1];
2605 int i;
2606 int done = 0;
2607 int add, len = 0;
2608 int pos = 0;
2609
2610 rg = yaffs_trace_mask;
2611
2612 while (!done && (pos < count)) {
2613 done = 1;
2614 while ((pos < count) && isspace(buf[pos]))
2615 pos++;
2616
2617 switch (buf[pos]) {
2618 case '+':
2619 case '-':
2620 case '=':
2621 add = buf[pos];
2622 pos++;
2623 break;
2624
2625 default:
2626 add = ' ';
2627 break;
2628 }
2629 mask_name = NULL;
2630
2631 mask_bitfield = simple_strtoul(buf + pos, &end, 0);
2632
2633 if (end > buf + pos) {
2634 mask_name = "numeral";
2635 len = end - (buf + pos);
2636 pos += len;
2637 done = 0;
2638 } else {
2639 for (x = buf + pos, i = 0;
2640 (*x == '_' || (*x >= 'a' && *x <= 'z')) &&
2641 i < MAX_MASK_NAME_LENGTH; x++, i++, pos++)
2642 substring[i] = *x;
2643 substring[i] = '\0';
2644
2645 for (i = 0; mask_flags[i].mask_name != NULL; i++) {
2646 if (strcmp(substring, mask_flags[i].mask_name)
2647 == 0) {
2648 mask_name = mask_flags[i].mask_name;
2649 mask_bitfield =
2650 mask_flags[i].mask_bitfield;
2651 done = 0;
2652 break;
2653 }
2654 }
2655 }
2656
2657 if (mask_name != NULL) {
2658 done = 0;
2659 switch (add) {
2660 case '-':
2661 rg &= ~mask_bitfield;
2662 break;
2663 case '+':
2664 rg |= mask_bitfield;
2665 break;
2666 case '=':
2667 rg = mask_bitfield;
2668 break;
2669 default:
2670 rg |= mask_bitfield;
2671 break;
2672 }
2673 }
2674 }
2675
2676 yaffs_trace_mask = rg | YAFFS_TRACE_ALWAYS;
2677
2678 printk(KERN_DEBUG "new trace = 0x%08X\n", yaffs_trace_mask);
2679
2680 if (rg & YAFFS_TRACE_ALWAYS) {
2681 for (i = 0; mask_flags[i].mask_name != NULL; i++) {
2682 char flag;
2683 flag = ((rg & mask_flags[i].mask_bitfield) ==
2684 mask_flags[i].mask_bitfield) ? '+' : '-';
2685 printk(KERN_DEBUG "%c%s\n", flag,
2686 mask_flags[i].mask_name);
2687 }
2688 }
2689
2690 return count;
2691}
2692
2693static int yaffs_proc_write(struct file *file, const char *buf,
2694 unsigned long count, void *data)
2695{
2696 return yaffs_proc_write_trace_options(file, buf, count, data);
2697}
2698
2699/* Stuff to handle installation of file systems */
2700struct file_system_to_install {
2701 struct file_system_type *fst;
2702 int installed;
2703};
2704
2705static struct file_system_to_install fs_to_install[] = {
2706 {&yaffs_fs_type, 0},
2707 {&yaffs2_fs_type, 0},
2708 {NULL, 0}
2709};
2710
2711static int __init init_yaffs_fs(void)
2712{
2713 int error = 0;
2714 struct file_system_to_install *fsinst;
2715
2716 yaffs_trace(YAFFS_TRACE_ALWAYS,
2717 "yaffs built " __DATE__ " " __TIME__ " Installing.");
2718
2719#ifdef CONFIG_YAFFS_ALWAYS_CHECK_CHUNK_ERASED
2720 yaffs_trace(YAFFS_TRACE_ALWAYS,
2721 "\n\nYAFFS-WARNING CONFIG_YAFFS_ALWAYS_CHECK_CHUNK_ERASED selected.\n\n\n");
2722#endif
2723
2724 mutex_init(&yaffs_context_lock);
2725
2726 /* Install the proc_fs entries */
2727 my_proc_entry = create_proc_entry("yaffs",
2728 S_IRUGO | S_IFREG, YPROC_ROOT);
2729
2730 if (my_proc_entry) {
2731 my_proc_entry->write_proc = yaffs_proc_write;
2732 my_proc_entry->read_proc = yaffs_proc_read;
2733 my_proc_entry->data = NULL;
2734 } else {
2735 return -ENOMEM;
2736 }
2737
2738
2739 /* Now add the file system entries */
2740
2741 fsinst = fs_to_install;
2742
2743 while (fsinst->fst && !error) {
2744 error = register_filesystem(fsinst->fst);
2745 if (!error)
2746 fsinst->installed = 1;
2747 fsinst++;
2748 }
2749
2750 /* Any errors? uninstall */
2751 if (error) {
2752 fsinst = fs_to_install;
2753
2754 while (fsinst->fst) {
2755 if (fsinst->installed) {
2756 unregister_filesystem(fsinst->fst);
2757 fsinst->installed = 0;
2758 }
2759 fsinst++;
2760 }
2761 }
2762
2763 return error;
2764}
2765
2766static void __exit exit_yaffs_fs(void)
2767{
2768
2769 struct file_system_to_install *fsinst;
2770
2771 yaffs_trace(YAFFS_TRACE_ALWAYS,
2772 "yaffs built " __DATE__ " " __TIME__ " removing.");
2773
2774 remove_proc_entry("yaffs", YPROC_ROOT);
2775
2776 fsinst = fs_to_install;
2777
2778 while (fsinst->fst) {
2779 if (fsinst->installed) {
2780 unregister_filesystem(fsinst->fst);
2781 fsinst->installed = 0;
2782 }
2783 fsinst++;
2784 }
2785}
2786
2787module_init(init_yaffs_fs)
2788 module_exit(exit_yaffs_fs)
2789
2790 MODULE_DESCRIPTION("YAFFS2 - a NAND specific flash file system");
2791MODULE_AUTHOR("Charles Manning, Aleph One Ltd., 2002-2010");
2792MODULE_LICENSE("GPL");
diff --git a/fs/yaffs2/yaffs_yaffs1.c b/fs/yaffs2/yaffs_yaffs1.c
new file mode 100644
index 00000000000..9eb60308254
--- /dev/null
+++ b/fs/yaffs2/yaffs_yaffs1.c
@@ -0,0 +1,433 @@
1/*
2 * YAFFS: Yet Another Flash File System. A NAND-flash specific file system.
3 *
4 * Copyright (C) 2002-2010 Aleph One Ltd.
5 * for Toby Churchill Ltd and Brightstar Engineering
6 *
7 * Created by Charles Manning <charles@aleph1.co.uk>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 */
13
14#include "yaffs_yaffs1.h"
15#include "yportenv.h"
16#include "yaffs_trace.h"
17#include "yaffs_bitmap.h"
18#include "yaffs_getblockinfo.h"
19#include "yaffs_nand.h"
20#include "yaffs_attribs.h"
21
22int yaffs1_scan(struct yaffs_dev *dev)
23{
24 struct yaffs_ext_tags tags;
25 int blk;
26 int result;
27
28 int chunk;
29 int c;
30 int deleted;
31 enum yaffs_block_state state;
32 struct yaffs_obj *hard_list = NULL;
33 struct yaffs_block_info *bi;
34 u32 seq_number;
35 struct yaffs_obj_hdr *oh;
36 struct yaffs_obj *in;
37 struct yaffs_obj *parent;
38
39 int alloc_failed = 0;
40
41 struct yaffs_shadow_fixer *shadow_fixers = NULL;
42
43 u8 *chunk_data;
44
45 yaffs_trace(YAFFS_TRACE_SCAN,
46 "yaffs1_scan starts intstartblk %d intendblk %d...",
47 dev->internal_start_block, dev->internal_end_block);
48
49 chunk_data = yaffs_get_temp_buffer(dev, __LINE__);
50
51 dev->seq_number = YAFFS_LOWEST_SEQUENCE_NUMBER;
52
53 /* Scan all the blocks to determine their state */
54 bi = dev->block_info;
55 for (blk = dev->internal_start_block; blk <= dev->internal_end_block;
56 blk++) {
57 yaffs_clear_chunk_bits(dev, blk);
58 bi->pages_in_use = 0;
59 bi->soft_del_pages = 0;
60
61 yaffs_query_init_block_state(dev, blk, &state, &seq_number);
62
63 bi->block_state = state;
64 bi->seq_number = seq_number;
65
66 if (bi->seq_number == YAFFS_SEQUENCE_BAD_BLOCK)
67 bi->block_state = state = YAFFS_BLOCK_STATE_DEAD;
68
69 yaffs_trace(YAFFS_TRACE_SCAN_DEBUG,
70 "Block scanning block %d state %d seq %d",
71 blk, state, seq_number);
72
73 if (state == YAFFS_BLOCK_STATE_DEAD) {
74 yaffs_trace(YAFFS_TRACE_BAD_BLOCKS,
75 "block %d is bad", blk);
76 } else if (state == YAFFS_BLOCK_STATE_EMPTY) {
77 yaffs_trace(YAFFS_TRACE_SCAN_DEBUG, "Block empty ");
78 dev->n_erased_blocks++;
79 dev->n_free_chunks += dev->param.chunks_per_block;
80 }
81 bi++;
82 }
83
84 /* For each block.... */
85 for (blk = dev->internal_start_block;
86 !alloc_failed && blk <= dev->internal_end_block; blk++) {
87
88 cond_resched();
89
90 bi = yaffs_get_block_info(dev, blk);
91 state = bi->block_state;
92
93 deleted = 0;
94
95 /* For each chunk in each block that needs scanning.... */
96 for (c = 0; !alloc_failed && c < dev->param.chunks_per_block &&
97 state == YAFFS_BLOCK_STATE_NEEDS_SCANNING; c++) {
98 /* Read the tags and decide what to do */
99 chunk = blk * dev->param.chunks_per_block + c;
100
101 result = yaffs_rd_chunk_tags_nand(dev, chunk, NULL,
102 &tags);
103
104 /* Let's have a good look at this chunk... */
105
106 if (tags.ecc_result == YAFFS_ECC_RESULT_UNFIXED
107 || tags.is_deleted) {
108 /* YAFFS1 only...
109 * A deleted chunk
110 */
111 deleted++;
112 dev->n_free_chunks++;
113 /*T((" %d %d deleted\n",blk,c)); */
114 } else if (!tags.chunk_used) {
115 /* An unassigned chunk in the block
116 * This means that either the block is empty or
117 * this is the one being allocated from
118 */
119
120 if (c == 0) {
121 /* We're looking at the first chunk in the block so the block is unused */
122 state = YAFFS_BLOCK_STATE_EMPTY;
123 dev->n_erased_blocks++;
124 } else {
125 /* this is the block being allocated from */
126 yaffs_trace(YAFFS_TRACE_SCAN,
127 " Allocating from %d %d",
128 blk, c);
129 state = YAFFS_BLOCK_STATE_ALLOCATING;
130 dev->alloc_block = blk;
131 dev->alloc_page = c;
132 dev->alloc_block_finder = blk;
133 /* Set block finder here to encourage the allocator to go forth from here. */
134
135 }
136
137 dev->n_free_chunks +=
138 (dev->param.chunks_per_block - c);
139 } else if (tags.chunk_id > 0) {
140 /* chunk_id > 0 so it is a data chunk... */
141 unsigned int endpos;
142
143 yaffs_set_chunk_bit(dev, blk, c);
144 bi->pages_in_use++;
145
146 in = yaffs_find_or_create_by_number(dev,
147 tags.obj_id,
148 YAFFS_OBJECT_TYPE_FILE);
149 /* PutChunkIntoFile checks for a clash (two data chunks with
150 * the same chunk_id).
151 */
152
153 if (!in)
154 alloc_failed = 1;
155
156 if (in) {
157 if (!yaffs_put_chunk_in_file
158 (in, tags.chunk_id, chunk, 1))
159 alloc_failed = 1;
160 }
161
162 endpos =
163 (tags.chunk_id -
164 1) * dev->data_bytes_per_chunk +
165 tags.n_bytes;
166 if (in
167 && in->variant_type ==
168 YAFFS_OBJECT_TYPE_FILE
169 && in->variant.file_variant.scanned_size <
170 endpos) {
171 in->variant.file_variant.scanned_size =
172 endpos;
173 if (!dev->param.use_header_file_size) {
174 in->variant.
175 file_variant.file_size =
176 in->variant.
177 file_variant.scanned_size;
178 }
179
180 }
181 /* T((" %d %d data %d %d\n",blk,c,tags.obj_id,tags.chunk_id)); */
182 } else {
183 /* chunk_id == 0, so it is an ObjectHeader.
184 * Thus, we read in the object header and make the object
185 */
186 yaffs_set_chunk_bit(dev, blk, c);
187 bi->pages_in_use++;
188
189 result = yaffs_rd_chunk_tags_nand(dev, chunk,
190 chunk_data,
191 NULL);
192
193 oh = (struct yaffs_obj_hdr *)chunk_data;
194
195 in = yaffs_find_by_number(dev, tags.obj_id);
196 if (in && in->variant_type != oh->type) {
197 /* This should not happen, but somehow
198 * Wev'e ended up with an obj_id that has been reused but not yet
199 * deleted, and worse still it has changed type. Delete the old object.
200 */
201
202 yaffs_del_obj(in);
203
204 in = 0;
205 }
206
207 in = yaffs_find_or_create_by_number(dev,
208 tags.obj_id,
209 oh->type);
210
211 if (!in)
212 alloc_failed = 1;
213
214 if (in && oh->shadows_obj > 0) {
215
216 struct yaffs_shadow_fixer *fixer;
217 fixer =
218 kmalloc(sizeof
219 (struct yaffs_shadow_fixer),
220 GFP_NOFS);
221 if (fixer) {
222 fixer->next = shadow_fixers;
223 shadow_fixers = fixer;
224 fixer->obj_id = tags.obj_id;
225 fixer->shadowed_id =
226 oh->shadows_obj;
227 yaffs_trace(YAFFS_TRACE_SCAN,
228 " Shadow fixer: %d shadows %d",
229 fixer->obj_id,
230 fixer->shadowed_id);
231
232 }
233
234 }
235
236 if (in && in->valid) {
237 /* We have already filled this one. We have a duplicate and need to resolve it. */
238
239 unsigned existing_serial = in->serial;
240 unsigned new_serial =
241 tags.serial_number;
242
243 if (((existing_serial + 1) & 3) ==
244 new_serial) {
245 /* Use new one - destroy the exisiting one */
246 yaffs_chunk_del(dev,
247 in->hdr_chunk,
248 1, __LINE__);
249 in->valid = 0;
250 } else {
251 /* Use existing - destroy this one. */
252 yaffs_chunk_del(dev, chunk, 1,
253 __LINE__);
254 }
255 }
256
257 if (in && !in->valid &&
258 (tags.obj_id == YAFFS_OBJECTID_ROOT ||
259 tags.obj_id ==
260 YAFFS_OBJECTID_LOSTNFOUND)) {
261 /* We only load some info, don't fiddle with directory structure */
262 in->valid = 1;
263 in->variant_type = oh->type;
264
265 in->yst_mode = oh->yst_mode;
266 yaffs_load_attribs(in, oh);
267 in->hdr_chunk = chunk;
268 in->serial = tags.serial_number;
269
270 } else if (in && !in->valid) {
271 /* we need to load this info */
272
273 in->valid = 1;
274 in->variant_type = oh->type;
275
276 in->yst_mode = oh->yst_mode;
277 yaffs_load_attribs(in, oh);
278 in->hdr_chunk = chunk;
279 in->serial = tags.serial_number;
280
281 yaffs_set_obj_name_from_oh(in, oh);
282 in->dirty = 0;
283
284 /* directory stuff...
285 * hook up to parent
286 */
287
288 parent =
289 yaffs_find_or_create_by_number
290 (dev, oh->parent_obj_id,
291 YAFFS_OBJECT_TYPE_DIRECTORY);
292 if (!parent)
293 alloc_failed = 1;
294 if (parent && parent->variant_type ==
295 YAFFS_OBJECT_TYPE_UNKNOWN) {
296 /* Set up as a directory */
297 parent->variant_type =
298 YAFFS_OBJECT_TYPE_DIRECTORY;
299 INIT_LIST_HEAD(&parent->
300 variant.dir_variant.children);
301 } else if (!parent
302 || parent->variant_type !=
303 YAFFS_OBJECT_TYPE_DIRECTORY) {
304 /* Hoosterman, another problem....
305 * We're trying to use a non-directory as a directory
306 */
307
308 yaffs_trace(YAFFS_TRACE_ERROR,
309 "yaffs tragedy: attempting to use non-directory as a directory in scan. Put in lost+found."
310 );
311 parent = dev->lost_n_found;
312 }
313
314 yaffs_add_obj_to_dir(parent, in);
315
316 if (0 && (parent == dev->del_dir ||
317 parent ==
318 dev->unlinked_dir)) {
319 in->deleted = 1; /* If it is unlinked at start up then it wants deleting */
320 dev->n_deleted_files++;
321 }
322 /* Note re hardlinks.
323 * Since we might scan a hardlink before its equivalent object is scanned
324 * we put them all in a list.
325 * After scanning is complete, we should have all the objects, so we run through this
326 * list and fix up all the chains.
327 */
328
329 switch (in->variant_type) {
330 case YAFFS_OBJECT_TYPE_UNKNOWN:
331 /* Todo got a problem */
332 break;
333 case YAFFS_OBJECT_TYPE_FILE:
334 if (dev->param.
335 use_header_file_size)
336
337 in->variant.
338 file_variant.file_size
339 = oh->file_size;
340
341 break;
342 case YAFFS_OBJECT_TYPE_HARDLINK:
343 in->variant.
344 hardlink_variant.equiv_id =
345 oh->equiv_id;
346 in->hard_links.next =
347 (struct list_head *)
348 hard_list;
349 hard_list = in;
350 break;
351 case YAFFS_OBJECT_TYPE_DIRECTORY:
352 /* Do nothing */
353 break;
354 case YAFFS_OBJECT_TYPE_SPECIAL:
355 /* Do nothing */
356 break;
357 case YAFFS_OBJECT_TYPE_SYMLINK:
358 in->variant.symlink_variant.
359 alias =
360 yaffs_clone_str(oh->alias);
361 if (!in->variant.
362 symlink_variant.alias)
363 alloc_failed = 1;
364 break;
365 }
366
367 }
368 }
369 }
370
371 if (state == YAFFS_BLOCK_STATE_NEEDS_SCANNING) {
372 /* If we got this far while scanning, then the block is fully allocated. */
373 state = YAFFS_BLOCK_STATE_FULL;
374 }
375
376 if (state == YAFFS_BLOCK_STATE_ALLOCATING) {
377 /* If the block was partially allocated then treat it as fully allocated. */
378 state = YAFFS_BLOCK_STATE_FULL;
379 dev->alloc_block = -1;
380 }
381
382 bi->block_state = state;
383
384 /* Now let's see if it was dirty */
385 if (bi->pages_in_use == 0 &&
386 !bi->has_shrink_hdr &&
387 bi->block_state == YAFFS_BLOCK_STATE_FULL) {
388 yaffs_block_became_dirty(dev, blk);
389 }
390
391 }
392
393 /* Ok, we've done all the scanning.
394 * Fix up the hard link chains.
395 * We should now have scanned all the objects, now it's time to add these
396 * hardlinks.
397 */
398
399 yaffs_link_fixup(dev, hard_list);
400
401 /* Fix up any shadowed objects */
402 {
403 struct yaffs_shadow_fixer *fixer;
404 struct yaffs_obj *obj;
405
406 while (shadow_fixers) {
407 fixer = shadow_fixers;
408 shadow_fixers = fixer->next;
409 /* Complete the rename transaction by deleting the shadowed object
410 * then setting the object header to unshadowed.
411 */
412 obj = yaffs_find_by_number(dev, fixer->shadowed_id);
413 if (obj)
414 yaffs_del_obj(obj);
415
416 obj = yaffs_find_by_number(dev, fixer->obj_id);
417
418 if (obj)
419 yaffs_update_oh(obj, NULL, 1, 0, 0, NULL);
420
421 kfree(fixer);
422 }
423 }
424
425 yaffs_release_temp_buffer(dev, chunk_data, __LINE__);
426
427 if (alloc_failed)
428 return YAFFS_FAIL;
429
430 yaffs_trace(YAFFS_TRACE_SCAN, "yaffs1_scan ends");
431
432 return YAFFS_OK;
433}
diff --git a/fs/yaffs2/yaffs_yaffs1.h b/fs/yaffs2/yaffs_yaffs1.h
new file mode 100644
index 00000000000..db23e04973b
--- /dev/null
+++ b/fs/yaffs2/yaffs_yaffs1.h
@@ -0,0 +1,22 @@
1/*
2 * YAFFS: Yet another Flash File System . A NAND-flash specific file system.
3 *
4 * Copyright (C) 2002-2010 Aleph One Ltd.
5 * for Toby Churchill Ltd and Brightstar Engineering
6 *
7 * Created by Charles Manning <charles@aleph1.co.uk>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU Lesser General Public License version 2.1 as
11 * published by the Free Software Foundation.
12 *
13 * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL.
14 */
15
16#ifndef __YAFFS_YAFFS1_H__
17#define __YAFFS_YAFFS1_H__
18
19#include "yaffs_guts.h"
20int yaffs1_scan(struct yaffs_dev *dev);
21
22#endif
diff --git a/fs/yaffs2/yaffs_yaffs2.c b/fs/yaffs2/yaffs_yaffs2.c
new file mode 100644
index 00000000000..33397af7003
--- /dev/null
+++ b/fs/yaffs2/yaffs_yaffs2.c
@@ -0,0 +1,1598 @@
1/*
2 * YAFFS: Yet Another Flash File System. A NAND-flash specific file system.
3 *
4 * Copyright (C) 2002-2010 Aleph One Ltd.
5 * for Toby Churchill Ltd and Brightstar Engineering
6 *
7 * Created by Charles Manning <charles@aleph1.co.uk>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 */
13
14#include "yaffs_guts.h"
15#include "yaffs_trace.h"
16#include "yaffs_yaffs2.h"
17#include "yaffs_checkptrw.h"
18#include "yaffs_bitmap.h"
19#include "yaffs_nand.h"
20#include "yaffs_getblockinfo.h"
21#include "yaffs_verify.h"
22#include "yaffs_attribs.h"
23
24/*
25 * Checkpoints are really no benefit on very small partitions.
26 *
27 * To save space on small partitions don't bother with checkpoints unless
28 * the partition is at least this big.
29 */
30#define YAFFS_CHECKPOINT_MIN_BLOCKS 60
31
32#define YAFFS_SMALL_HOLE_THRESHOLD 4
33
34/*
35 * Oldest Dirty Sequence Number handling.
36 */
37
38/* yaffs_calc_oldest_dirty_seq()
39 * yaffs2_find_oldest_dirty_seq()
40 * Calculate the oldest dirty sequence number if we don't know it.
41 */
42void yaffs_calc_oldest_dirty_seq(struct yaffs_dev *dev)
43{
44 int i;
45 unsigned seq;
46 unsigned block_no = 0;
47 struct yaffs_block_info *b;
48
49 if (!dev->param.is_yaffs2)
50 return;
51
52 /* Find the oldest dirty sequence number. */
53 seq = dev->seq_number + 1;
54 b = dev->block_info;
55 for (i = dev->internal_start_block; i <= dev->internal_end_block; i++) {
56 if (b->block_state == YAFFS_BLOCK_STATE_FULL &&
57 (b->pages_in_use - b->soft_del_pages) <
58 dev->param.chunks_per_block && b->seq_number < seq) {
59 seq = b->seq_number;
60 block_no = i;
61 }
62 b++;
63 }
64
65 if (block_no) {
66 dev->oldest_dirty_seq = seq;
67 dev->oldest_dirty_block = block_no;
68 }
69
70}
71
72void yaffs2_find_oldest_dirty_seq(struct yaffs_dev *dev)
73{
74 if (!dev->param.is_yaffs2)
75 return;
76
77 if (!dev->oldest_dirty_seq)
78 yaffs_calc_oldest_dirty_seq(dev);
79}
80
81/*
82 * yaffs_clear_oldest_dirty_seq()
83 * Called when a block is erased or marked bad. (ie. when its seq_number
84 * becomes invalid). If the value matches the oldest then we clear
85 * dev->oldest_dirty_seq to force its recomputation.
86 */
87void yaffs2_clear_oldest_dirty_seq(struct yaffs_dev *dev,
88 struct yaffs_block_info *bi)
89{
90
91 if (!dev->param.is_yaffs2)
92 return;
93
94 if (!bi || bi->seq_number == dev->oldest_dirty_seq) {
95 dev->oldest_dirty_seq = 0;
96 dev->oldest_dirty_block = 0;
97 }
98}
99
100/*
101 * yaffs2_update_oldest_dirty_seq()
102 * Update the oldest dirty sequence number whenever we dirty a block.
103 * Only do this if the oldest_dirty_seq is actually being tracked.
104 */
105void yaffs2_update_oldest_dirty_seq(struct yaffs_dev *dev, unsigned block_no,
106 struct yaffs_block_info *bi)
107{
108 if (!dev->param.is_yaffs2)
109 return;
110
111 if (dev->oldest_dirty_seq) {
112 if (dev->oldest_dirty_seq > bi->seq_number) {
113 dev->oldest_dirty_seq = bi->seq_number;
114 dev->oldest_dirty_block = block_no;
115 }
116 }
117}
118
119int yaffs_block_ok_for_gc(struct yaffs_dev *dev, struct yaffs_block_info *bi)
120{
121
122 if (!dev->param.is_yaffs2)
123 return 1; /* disqualification only applies to yaffs2. */
124
125 if (!bi->has_shrink_hdr)
126 return 1; /* can gc */
127
128 yaffs2_find_oldest_dirty_seq(dev);
129
130 /* Can't do gc of this block if there are any blocks older than this one that have
131 * discarded pages.
132 */
133 return (bi->seq_number <= dev->oldest_dirty_seq);
134}
135
136/*
137 * yaffs2_find_refresh_block()
138 * periodically finds the oldest full block by sequence number for refreshing.
139 * Only for yaffs2.
140 */
141u32 yaffs2_find_refresh_block(struct yaffs_dev * dev)
142{
143 u32 b;
144
145 u32 oldest = 0;
146 u32 oldest_seq = 0;
147
148 struct yaffs_block_info *bi;
149
150 if (!dev->param.is_yaffs2)
151 return oldest;
152
153 /*
154 * If refresh period < 10 then refreshing is disabled.
155 */
156 if (dev->param.refresh_period < 10)
157 return oldest;
158
159 /*
160 * Fix broken values.
161 */
162 if (dev->refresh_skip > dev->param.refresh_period)
163 dev->refresh_skip = dev->param.refresh_period;
164
165 if (dev->refresh_skip > 0)
166 return oldest;
167
168 /*
169 * Refresh skip is now zero.
170 * We'll do a refresh this time around....
171 * Update the refresh skip and find the oldest block.
172 */
173 dev->refresh_skip = dev->param.refresh_period;
174 dev->refresh_count++;
175 bi = dev->block_info;
176 for (b = dev->internal_start_block; b <= dev->internal_end_block; b++) {
177
178 if (bi->block_state == YAFFS_BLOCK_STATE_FULL) {
179
180 if (oldest < 1 || bi->seq_number < oldest_seq) {
181 oldest = b;
182 oldest_seq = bi->seq_number;
183 }
184 }
185 bi++;
186 }
187
188 if (oldest > 0) {
189 yaffs_trace(YAFFS_TRACE_GC,
190 "GC refresh count %d selected block %d with seq_number %d",
191 dev->refresh_count, oldest, oldest_seq);
192 }
193
194 return oldest;
195}
196
197int yaffs2_checkpt_required(struct yaffs_dev *dev)
198{
199 int nblocks;
200
201 if (!dev->param.is_yaffs2)
202 return 0;
203
204 nblocks = dev->internal_end_block - dev->internal_start_block + 1;
205
206 return !dev->param.skip_checkpt_wr &&
207 !dev->read_only && (nblocks >= YAFFS_CHECKPOINT_MIN_BLOCKS);
208}
209
210int yaffs_calc_checkpt_blocks_required(struct yaffs_dev *dev)
211{
212 int retval;
213
214 if (!dev->param.is_yaffs2)
215 return 0;
216
217 if (!dev->checkpoint_blocks_required && yaffs2_checkpt_required(dev)) {
218 /* Not a valid value so recalculate */
219 int n_bytes = 0;
220 int n_blocks;
221 int dev_blocks =
222 (dev->param.end_block - dev->param.start_block + 1);
223
224 n_bytes += sizeof(struct yaffs_checkpt_validity);
225 n_bytes += sizeof(struct yaffs_checkpt_dev);
226 n_bytes += dev_blocks * sizeof(struct yaffs_block_info);
227 n_bytes += dev_blocks * dev->chunk_bit_stride;
228 n_bytes +=
229 (sizeof(struct yaffs_checkpt_obj) +
230 sizeof(u32)) * (dev->n_obj);
231 n_bytes += (dev->tnode_size + sizeof(u32)) * (dev->n_tnodes);
232 n_bytes += sizeof(struct yaffs_checkpt_validity);
233 n_bytes += sizeof(u32); /* checksum */
234
235 /* Round up and add 2 blocks to allow for some bad blocks, so add 3 */
236
237 n_blocks =
238 (n_bytes /
239 (dev->data_bytes_per_chunk *
240 dev->param.chunks_per_block)) + 3;
241
242 dev->checkpoint_blocks_required = n_blocks;
243 }
244
245 retval = dev->checkpoint_blocks_required - dev->blocks_in_checkpt;
246 if (retval < 0)
247 retval = 0;
248 return retval;
249}
250
251/*--------------------- Checkpointing --------------------*/
252
253static int yaffs2_wr_checkpt_validity_marker(struct yaffs_dev *dev, int head)
254{
255 struct yaffs_checkpt_validity cp;
256
257 memset(&cp, 0, sizeof(cp));
258
259 cp.struct_type = sizeof(cp);
260 cp.magic = YAFFS_MAGIC;
261 cp.version = YAFFS_CHECKPOINT_VERSION;
262 cp.head = (head) ? 1 : 0;
263
264 return (yaffs2_checkpt_wr(dev, &cp, sizeof(cp)) == sizeof(cp)) ? 1 : 0;
265}
266
267static int yaffs2_rd_checkpt_validity_marker(struct yaffs_dev *dev, int head)
268{
269 struct yaffs_checkpt_validity cp;
270 int ok;
271
272 ok = (yaffs2_checkpt_rd(dev, &cp, sizeof(cp)) == sizeof(cp));
273
274 if (ok)
275 ok = (cp.struct_type == sizeof(cp)) &&
276 (cp.magic == YAFFS_MAGIC) &&
277 (cp.version == YAFFS_CHECKPOINT_VERSION) &&
278 (cp.head == ((head) ? 1 : 0));
279 return ok ? 1 : 0;
280}
281
282static void yaffs2_dev_to_checkpt_dev(struct yaffs_checkpt_dev *cp,
283 struct yaffs_dev *dev)
284{
285 cp->n_erased_blocks = dev->n_erased_blocks;
286 cp->alloc_block = dev->alloc_block;
287 cp->alloc_page = dev->alloc_page;
288 cp->n_free_chunks = dev->n_free_chunks;
289
290 cp->n_deleted_files = dev->n_deleted_files;
291 cp->n_unlinked_files = dev->n_unlinked_files;
292 cp->n_bg_deletions = dev->n_bg_deletions;
293 cp->seq_number = dev->seq_number;
294
295}
296
297static void yaffs_checkpt_dev_to_dev(struct yaffs_dev *dev,
298 struct yaffs_checkpt_dev *cp)
299{
300 dev->n_erased_blocks = cp->n_erased_blocks;
301 dev->alloc_block = cp->alloc_block;
302 dev->alloc_page = cp->alloc_page;
303 dev->n_free_chunks = cp->n_free_chunks;
304
305 dev->n_deleted_files = cp->n_deleted_files;
306 dev->n_unlinked_files = cp->n_unlinked_files;
307 dev->n_bg_deletions = cp->n_bg_deletions;
308 dev->seq_number = cp->seq_number;
309}
310
311static int yaffs2_wr_checkpt_dev(struct yaffs_dev *dev)
312{
313 struct yaffs_checkpt_dev cp;
314 u32 n_bytes;
315 u32 n_blocks =
316 (dev->internal_end_block - dev->internal_start_block + 1);
317
318 int ok;
319
320 /* Write device runtime values */
321 yaffs2_dev_to_checkpt_dev(&cp, dev);
322 cp.struct_type = sizeof(cp);
323
324 ok = (yaffs2_checkpt_wr(dev, &cp, sizeof(cp)) == sizeof(cp));
325
326 /* Write block info */
327 if (ok) {
328 n_bytes = n_blocks * sizeof(struct yaffs_block_info);
329 ok = (yaffs2_checkpt_wr(dev, dev->block_info, n_bytes) ==
330 n_bytes);
331 }
332
333 /* Write chunk bits */
334 if (ok) {
335 n_bytes = n_blocks * dev->chunk_bit_stride;
336 ok = (yaffs2_checkpt_wr(dev, dev->chunk_bits, n_bytes) ==
337 n_bytes);
338 }
339 return ok ? 1 : 0;
340
341}
342
343static int yaffs2_rd_checkpt_dev(struct yaffs_dev *dev)
344{
345 struct yaffs_checkpt_dev cp;
346 u32 n_bytes;
347 u32 n_blocks =
348 (dev->internal_end_block - dev->internal_start_block + 1);
349
350 int ok;
351
352 ok = (yaffs2_checkpt_rd(dev, &cp, sizeof(cp)) == sizeof(cp));
353 if (!ok)
354 return 0;
355
356 if (cp.struct_type != sizeof(cp))
357 return 0;
358
359 yaffs_checkpt_dev_to_dev(dev, &cp);
360
361 n_bytes = n_blocks * sizeof(struct yaffs_block_info);
362
363 ok = (yaffs2_checkpt_rd(dev, dev->block_info, n_bytes) == n_bytes);
364
365 if (!ok)
366 return 0;
367 n_bytes = n_blocks * dev->chunk_bit_stride;
368
369 ok = (yaffs2_checkpt_rd(dev, dev->chunk_bits, n_bytes) == n_bytes);
370
371 return ok ? 1 : 0;
372}
373
374static void yaffs2_obj_checkpt_obj(struct yaffs_checkpt_obj *cp,
375 struct yaffs_obj *obj)
376{
377
378 cp->obj_id = obj->obj_id;
379 cp->parent_id = (obj->parent) ? obj->parent->obj_id : 0;
380 cp->hdr_chunk = obj->hdr_chunk;
381 cp->variant_type = obj->variant_type;
382 cp->deleted = obj->deleted;
383 cp->soft_del = obj->soft_del;
384 cp->unlinked = obj->unlinked;
385 cp->fake = obj->fake;
386 cp->rename_allowed = obj->rename_allowed;
387 cp->unlink_allowed = obj->unlink_allowed;
388 cp->serial = obj->serial;
389 cp->n_data_chunks = obj->n_data_chunks;
390
391 if (obj->variant_type == YAFFS_OBJECT_TYPE_FILE)
392 cp->size_or_equiv_obj = obj->variant.file_variant.file_size;
393 else if (obj->variant_type == YAFFS_OBJECT_TYPE_HARDLINK)
394 cp->size_or_equiv_obj = obj->variant.hardlink_variant.equiv_id;
395}
396
397static int taffs2_checkpt_obj_to_obj(struct yaffs_obj *obj,
398 struct yaffs_checkpt_obj *cp)
399{
400
401 struct yaffs_obj *parent;
402
403 if (obj->variant_type != cp->variant_type) {
404 yaffs_trace(YAFFS_TRACE_ERROR,
405 "Checkpoint read object %d type %d chunk %d does not match existing object type %d",
406 cp->obj_id, cp->variant_type, cp->hdr_chunk,
407 obj->variant_type);
408 return 0;
409 }
410
411 obj->obj_id = cp->obj_id;
412
413 if (cp->parent_id)
414 parent = yaffs_find_or_create_by_number(obj->my_dev,
415 cp->parent_id,
416 YAFFS_OBJECT_TYPE_DIRECTORY);
417 else
418 parent = NULL;
419
420 if (parent) {
421 if (parent->variant_type != YAFFS_OBJECT_TYPE_DIRECTORY) {
422 yaffs_trace(YAFFS_TRACE_ALWAYS,
423 "Checkpoint read object %d parent %d type %d chunk %d Parent type, %d, not directory",
424 cp->obj_id, cp->parent_id,
425 cp->variant_type, cp->hdr_chunk,
426 parent->variant_type);
427 return 0;
428 }
429 yaffs_add_obj_to_dir(parent, obj);
430 }
431
432 obj->hdr_chunk = cp->hdr_chunk;
433 obj->variant_type = cp->variant_type;
434 obj->deleted = cp->deleted;
435 obj->soft_del = cp->soft_del;
436 obj->unlinked = cp->unlinked;
437 obj->fake = cp->fake;
438 obj->rename_allowed = cp->rename_allowed;
439 obj->unlink_allowed = cp->unlink_allowed;
440 obj->serial = cp->serial;
441 obj->n_data_chunks = cp->n_data_chunks;
442
443 if (obj->variant_type == YAFFS_OBJECT_TYPE_FILE)
444 obj->variant.file_variant.file_size = cp->size_or_equiv_obj;
445 else if (obj->variant_type == YAFFS_OBJECT_TYPE_HARDLINK)
446 obj->variant.hardlink_variant.equiv_id = cp->size_or_equiv_obj;
447
448 if (obj->hdr_chunk > 0)
449 obj->lazy_loaded = 1;
450 return 1;
451}
452
453static int yaffs2_checkpt_tnode_worker(struct yaffs_obj *in,
454 struct yaffs_tnode *tn, u32 level,
455 int chunk_offset)
456{
457 int i;
458 struct yaffs_dev *dev = in->my_dev;
459 int ok = 1;
460
461 if (tn) {
462 if (level > 0) {
463
464 for (i = 0; i < YAFFS_NTNODES_INTERNAL && ok; i++) {
465 if (tn->internal[i]) {
466 ok = yaffs2_checkpt_tnode_worker(in,
467 tn->
468 internal
469 [i],
470 level -
471 1,
472 (chunk_offset
473 <<
474 YAFFS_TNODES_INTERNAL_BITS)
475 + i);
476 }
477 }
478 } else if (level == 0) {
479 u32 base_offset =
480 chunk_offset << YAFFS_TNODES_LEVEL0_BITS;
481 ok = (yaffs2_checkpt_wr
482 (dev, &base_offset,
483 sizeof(base_offset)) == sizeof(base_offset));
484 if (ok)
485 ok = (yaffs2_checkpt_wr
486 (dev, tn,
487 dev->tnode_size) == dev->tnode_size);
488 }
489 }
490
491 return ok;
492
493}
494
495static int yaffs2_wr_checkpt_tnodes(struct yaffs_obj *obj)
496{
497 u32 end_marker = ~0;
498 int ok = 1;
499
500 if (obj->variant_type == YAFFS_OBJECT_TYPE_FILE) {
501 ok = yaffs2_checkpt_tnode_worker(obj,
502 obj->variant.file_variant.top,
503 obj->variant.file_variant.
504 top_level, 0);
505 if (ok)
506 ok = (yaffs2_checkpt_wr
507 (obj->my_dev, &end_marker,
508 sizeof(end_marker)) == sizeof(end_marker));
509 }
510
511 return ok ? 1 : 0;
512}
513
514static int yaffs2_rd_checkpt_tnodes(struct yaffs_obj *obj)
515{
516 u32 base_chunk;
517 int ok = 1;
518 struct yaffs_dev *dev = obj->my_dev;
519 struct yaffs_file_var *file_stuct_ptr = &obj->variant.file_variant;
520 struct yaffs_tnode *tn;
521 int nread = 0;
522
523 ok = (yaffs2_checkpt_rd(dev, &base_chunk, sizeof(base_chunk)) ==
524 sizeof(base_chunk));
525
526 while (ok && (~base_chunk)) {
527 nread++;
528 /* Read level 0 tnode */
529
530 tn = yaffs_get_tnode(dev);
531 if (tn) {
532 ok = (yaffs2_checkpt_rd(dev, tn, dev->tnode_size) ==
533 dev->tnode_size);
534 } else {
535 ok = 0;
536 }
537
538 if (tn && ok)
539 ok = yaffs_add_find_tnode_0(dev,
540 file_stuct_ptr,
541 base_chunk, tn) ? 1 : 0;
542
543 if (ok)
544 ok = (yaffs2_checkpt_rd
545 (dev, &base_chunk,
546 sizeof(base_chunk)) == sizeof(base_chunk));
547
548 }
549
550 yaffs_trace(YAFFS_TRACE_CHECKPOINT,
551 "Checkpoint read tnodes %d records, last %d. ok %d",
552 nread, base_chunk, ok);
553
554 return ok ? 1 : 0;
555}
556
557static int yaffs2_wr_checkpt_objs(struct yaffs_dev *dev)
558{
559 struct yaffs_obj *obj;
560 struct yaffs_checkpt_obj cp;
561 int i;
562 int ok = 1;
563 struct list_head *lh;
564
565 /* Iterate through the objects in each hash entry,
566 * dumping them to the checkpointing stream.
567 */
568
569 for (i = 0; ok && i < YAFFS_NOBJECT_BUCKETS; i++) {
570 list_for_each(lh, &dev->obj_bucket[i].list) {
571 if (lh) {
572 obj =
573 list_entry(lh, struct yaffs_obj, hash_link);
574 if (!obj->defered_free) {
575 yaffs2_obj_checkpt_obj(&cp, obj);
576 cp.struct_type = sizeof(cp);
577
578 yaffs_trace(YAFFS_TRACE_CHECKPOINT,
579 "Checkpoint write object %d parent %d type %d chunk %d obj addr %p",
580 cp.obj_id, cp.parent_id,
581 cp.variant_type, cp.hdr_chunk, obj);
582
583 ok = (yaffs2_checkpt_wr
584 (dev, &cp,
585 sizeof(cp)) == sizeof(cp));
586
587 if (ok
588 && obj->variant_type ==
589 YAFFS_OBJECT_TYPE_FILE)
590 ok = yaffs2_wr_checkpt_tnodes
591 (obj);
592 }
593 }
594 }
595 }
596
597 /* Dump end of list */
598 memset(&cp, 0xFF, sizeof(struct yaffs_checkpt_obj));
599 cp.struct_type = sizeof(cp);
600
601 if (ok)
602 ok = (yaffs2_checkpt_wr(dev, &cp, sizeof(cp)) == sizeof(cp));
603
604 return ok ? 1 : 0;
605}
606
607static int yaffs2_rd_checkpt_objs(struct yaffs_dev *dev)
608{
609 struct yaffs_obj *obj;
610 struct yaffs_checkpt_obj cp;
611 int ok = 1;
612 int done = 0;
613 struct yaffs_obj *hard_list = NULL;
614
615 while (ok && !done) {
616 ok = (yaffs2_checkpt_rd(dev, &cp, sizeof(cp)) == sizeof(cp));
617 if (cp.struct_type != sizeof(cp)) {
618 yaffs_trace(YAFFS_TRACE_CHECKPOINT,
619 "struct size %d instead of %d ok %d",
620 cp.struct_type, (int)sizeof(cp), ok);
621 ok = 0;
622 }
623
624 yaffs_trace(YAFFS_TRACE_CHECKPOINT,
625 "Checkpoint read object %d parent %d type %d chunk %d ",
626 cp.obj_id, cp.parent_id, cp.variant_type,
627 cp.hdr_chunk);
628
629 if (ok && cp.obj_id == ~0) {
630 done = 1;
631 } else if (ok) {
632 obj =
633 yaffs_find_or_create_by_number(dev, cp.obj_id,
634 cp.variant_type);
635 if (obj) {
636 ok = taffs2_checkpt_obj_to_obj(obj, &cp);
637 if (!ok)
638 break;
639 if (obj->variant_type == YAFFS_OBJECT_TYPE_FILE) {
640 ok = yaffs2_rd_checkpt_tnodes(obj);
641 } else if (obj->variant_type ==
642 YAFFS_OBJECT_TYPE_HARDLINK) {
643 obj->hard_links.next =
644 (struct list_head *)hard_list;
645 hard_list = obj;
646 }
647 } else {
648 ok = 0;
649 }
650 }
651 }
652
653 if (ok)
654 yaffs_link_fixup(dev, hard_list);
655
656 return ok ? 1 : 0;
657}
658
659static int yaffs2_wr_checkpt_sum(struct yaffs_dev *dev)
660{
661 u32 checkpt_sum;
662 int ok;
663
664 yaffs2_get_checkpt_sum(dev, &checkpt_sum);
665
666 ok = (yaffs2_checkpt_wr(dev, &checkpt_sum, sizeof(checkpt_sum)) ==
667 sizeof(checkpt_sum));
668
669 if (!ok)
670 return 0;
671
672 return 1;
673}
674
675static int yaffs2_rd_checkpt_sum(struct yaffs_dev *dev)
676{
677 u32 checkpt_sum0;
678 u32 checkpt_sum1;
679 int ok;
680
681 yaffs2_get_checkpt_sum(dev, &checkpt_sum0);
682
683 ok = (yaffs2_checkpt_rd(dev, &checkpt_sum1, sizeof(checkpt_sum1)) ==
684 sizeof(checkpt_sum1));
685
686 if (!ok)
687 return 0;
688
689 if (checkpt_sum0 != checkpt_sum1)
690 return 0;
691
692 return 1;
693}
694
695static int yaffs2_wr_checkpt_data(struct yaffs_dev *dev)
696{
697 int ok = 1;
698
699 if (!yaffs2_checkpt_required(dev)) {
700 yaffs_trace(YAFFS_TRACE_CHECKPOINT,
701 "skipping checkpoint write");
702 ok = 0;
703 }
704
705 if (ok)
706 ok = yaffs2_checkpt_open(dev, 1);
707
708 if (ok) {
709 yaffs_trace(YAFFS_TRACE_CHECKPOINT,
710 "write checkpoint validity");
711 ok = yaffs2_wr_checkpt_validity_marker(dev, 1);
712 }
713 if (ok) {
714 yaffs_trace(YAFFS_TRACE_CHECKPOINT,
715 "write checkpoint device");
716 ok = yaffs2_wr_checkpt_dev(dev);
717 }
718 if (ok) {
719 yaffs_trace(YAFFS_TRACE_CHECKPOINT,
720 "write checkpoint objects");
721 ok = yaffs2_wr_checkpt_objs(dev);
722 }
723 if (ok) {
724 yaffs_trace(YAFFS_TRACE_CHECKPOINT,
725 "write checkpoint validity");
726 ok = yaffs2_wr_checkpt_validity_marker(dev, 0);
727 }
728
729 if (ok)
730 ok = yaffs2_wr_checkpt_sum(dev);
731
732 if (!yaffs_checkpt_close(dev))
733 ok = 0;
734
735 if (ok)
736 dev->is_checkpointed = 1;
737 else
738 dev->is_checkpointed = 0;
739
740 return dev->is_checkpointed;
741}
742
743static int yaffs2_rd_checkpt_data(struct yaffs_dev *dev)
744{
745 int ok = 1;
746
747 if (!dev->param.is_yaffs2)
748 ok = 0;
749
750 if (ok && dev->param.skip_checkpt_rd) {
751 yaffs_trace(YAFFS_TRACE_CHECKPOINT,
752 "skipping checkpoint read");
753 ok = 0;
754 }
755
756 if (ok)
757 ok = yaffs2_checkpt_open(dev, 0); /* open for read */
758
759 if (ok) {
760 yaffs_trace(YAFFS_TRACE_CHECKPOINT,
761 "read checkpoint validity");
762 ok = yaffs2_rd_checkpt_validity_marker(dev, 1);
763 }
764 if (ok) {
765 yaffs_trace(YAFFS_TRACE_CHECKPOINT,
766 "read checkpoint device");
767 ok = yaffs2_rd_checkpt_dev(dev);
768 }
769 if (ok) {
770 yaffs_trace(YAFFS_TRACE_CHECKPOINT,
771 "read checkpoint objects");
772 ok = yaffs2_rd_checkpt_objs(dev);
773 }
774 if (ok) {
775 yaffs_trace(YAFFS_TRACE_CHECKPOINT,
776 "read checkpoint validity");
777 ok = yaffs2_rd_checkpt_validity_marker(dev, 0);
778 }
779
780 if (ok) {
781 ok = yaffs2_rd_checkpt_sum(dev);
782 yaffs_trace(YAFFS_TRACE_CHECKPOINT,
783 "read checkpoint checksum %d", ok);
784 }
785
786 if (!yaffs_checkpt_close(dev))
787 ok = 0;
788
789 if (ok)
790 dev->is_checkpointed = 1;
791 else
792 dev->is_checkpointed = 0;
793
794 return ok ? 1 : 0;
795
796}
797
798void yaffs2_checkpt_invalidate(struct yaffs_dev *dev)
799{
800 if (dev->is_checkpointed || dev->blocks_in_checkpt > 0) {
801 dev->is_checkpointed = 0;
802 yaffs2_checkpt_invalidate_stream(dev);
803 }
804 if (dev->param.sb_dirty_fn)
805 dev->param.sb_dirty_fn(dev);
806}
807
808int yaffs_checkpoint_save(struct yaffs_dev *dev)
809{
810
811 yaffs_trace(YAFFS_TRACE_CHECKPOINT,
812 "save entry: is_checkpointed %d",
813 dev->is_checkpointed);
814
815 yaffs_verify_objects(dev);
816 yaffs_verify_blocks(dev);
817 yaffs_verify_free_chunks(dev);
818
819 if (!dev->is_checkpointed) {
820 yaffs2_checkpt_invalidate(dev);
821 yaffs2_wr_checkpt_data(dev);
822 }
823
824 yaffs_trace(YAFFS_TRACE_CHECKPOINT | YAFFS_TRACE_MOUNT,
825 "save exit: is_checkpointed %d",
826 dev->is_checkpointed);
827
828 return dev->is_checkpointed;
829}
830
831int yaffs2_checkpt_restore(struct yaffs_dev *dev)
832{
833 int retval;
834 yaffs_trace(YAFFS_TRACE_CHECKPOINT,
835 "restore entry: is_checkpointed %d",
836 dev->is_checkpointed);
837
838 retval = yaffs2_rd_checkpt_data(dev);
839
840 if (dev->is_checkpointed) {
841 yaffs_verify_objects(dev);
842 yaffs_verify_blocks(dev);
843 yaffs_verify_free_chunks(dev);
844 }
845
846 yaffs_trace(YAFFS_TRACE_CHECKPOINT,
847 "restore exit: is_checkpointed %d",
848 dev->is_checkpointed);
849
850 return retval;
851}
852
853int yaffs2_handle_hole(struct yaffs_obj *obj, loff_t new_size)
854{
855 /* if new_size > old_file_size.
856 * We're going to be writing a hole.
857 * If the hole is small then write zeros otherwise write a start of hole marker.
858 */
859
860 loff_t old_file_size;
861 int increase;
862 int small_hole;
863 int result = YAFFS_OK;
864 struct yaffs_dev *dev = NULL;
865
866 u8 *local_buffer = NULL;
867
868 int small_increase_ok = 0;
869
870 if (!obj)
871 return YAFFS_FAIL;
872
873 if (obj->variant_type != YAFFS_OBJECT_TYPE_FILE)
874 return YAFFS_FAIL;
875
876 dev = obj->my_dev;
877
878 /* Bail out if not yaffs2 mode */
879 if (!dev->param.is_yaffs2)
880 return YAFFS_OK;
881
882 old_file_size = obj->variant.file_variant.file_size;
883
884 if (new_size <= old_file_size)
885 return YAFFS_OK;
886
887 increase = new_size - old_file_size;
888
889 if (increase < YAFFS_SMALL_HOLE_THRESHOLD * dev->data_bytes_per_chunk &&
890 yaffs_check_alloc_available(dev, YAFFS_SMALL_HOLE_THRESHOLD + 1))
891 small_hole = 1;
892 else
893 small_hole = 0;
894
895 if (small_hole)
896 local_buffer = yaffs_get_temp_buffer(dev, __LINE__);
897
898 if (local_buffer) {
899 /* fill hole with zero bytes */
900 int pos = old_file_size;
901 int this_write;
902 int written;
903 memset(local_buffer, 0, dev->data_bytes_per_chunk);
904 small_increase_ok = 1;
905
906 while (increase > 0 && small_increase_ok) {
907 this_write = increase;
908 if (this_write > dev->data_bytes_per_chunk)
909 this_write = dev->data_bytes_per_chunk;
910 written =
911 yaffs_do_file_wr(obj, local_buffer, pos, this_write,
912 0);
913 if (written == this_write) {
914 pos += this_write;
915 increase -= this_write;
916 } else {
917 small_increase_ok = 0;
918 }
919 }
920
921 yaffs_release_temp_buffer(dev, local_buffer, __LINE__);
922
923 /* If we were out of space then reverse any chunks we've added */
924 if (!small_increase_ok)
925 yaffs_resize_file_down(obj, old_file_size);
926 }
927
928 if (!small_increase_ok &&
929 obj->parent &&
930 obj->parent->obj_id != YAFFS_OBJECTID_UNLINKED &&
931 obj->parent->obj_id != YAFFS_OBJECTID_DELETED) {
932 /* Write a hole start header with the old file size */
933 yaffs_update_oh(obj, NULL, 0, 1, 0, NULL);
934 }
935
936 return result;
937
938}
939
940struct yaffs_block_index {
941 int seq;
942 int block;
943};
944
945static int yaffs2_ybicmp(const void *a, const void *b)
946{
947 int aseq = ((struct yaffs_block_index *)a)->seq;
948 int bseq = ((struct yaffs_block_index *)b)->seq;
949 int ablock = ((struct yaffs_block_index *)a)->block;
950 int bblock = ((struct yaffs_block_index *)b)->block;
951 if (aseq == bseq)
952 return ablock - bblock;
953 else
954 return aseq - bseq;
955}
956
957int yaffs2_scan_backwards(struct yaffs_dev *dev)
958{
959 struct yaffs_ext_tags tags;
960 int blk;
961 int block_iter;
962 int start_iter;
963 int end_iter;
964 int n_to_scan = 0;
965
966 int chunk;
967 int result;
968 int c;
969 int deleted;
970 enum yaffs_block_state state;
971 struct yaffs_obj *hard_list = NULL;
972 struct yaffs_block_info *bi;
973 u32 seq_number;
974 struct yaffs_obj_hdr *oh;
975 struct yaffs_obj *in;
976 struct yaffs_obj *parent;
977 int n_blocks = dev->internal_end_block - dev->internal_start_block + 1;
978 int is_unlinked;
979 u8 *chunk_data;
980
981 int file_size;
982 int is_shrink;
983 int found_chunks;
984 int equiv_id;
985 int alloc_failed = 0;
986
987 struct yaffs_block_index *block_index = NULL;
988 int alt_block_index = 0;
989
990 yaffs_trace(YAFFS_TRACE_SCAN,
991 "yaffs2_scan_backwards starts intstartblk %d intendblk %d...",
992 dev->internal_start_block, dev->internal_end_block);
993
994 dev->seq_number = YAFFS_LOWEST_SEQUENCE_NUMBER;
995
996 block_index = kmalloc(n_blocks * sizeof(struct yaffs_block_index),
997 GFP_NOFS);
998
999 if (!block_index) {
1000 block_index =
1001 vmalloc(n_blocks * sizeof(struct yaffs_block_index));
1002 alt_block_index = 1;
1003 }
1004
1005 if (!block_index) {
1006 yaffs_trace(YAFFS_TRACE_SCAN,
1007 "yaffs2_scan_backwards() could not allocate block index!"
1008 );
1009 return YAFFS_FAIL;
1010 }
1011
1012 dev->blocks_in_checkpt = 0;
1013
1014 chunk_data = yaffs_get_temp_buffer(dev, __LINE__);
1015
1016 /* Scan all the blocks to determine their state */
1017 bi = dev->block_info;
1018 for (blk = dev->internal_start_block; blk <= dev->internal_end_block;
1019 blk++) {
1020 yaffs_clear_chunk_bits(dev, blk);
1021 bi->pages_in_use = 0;
1022 bi->soft_del_pages = 0;
1023
1024 yaffs_query_init_block_state(dev, blk, &state, &seq_number);
1025
1026 bi->block_state = state;
1027 bi->seq_number = seq_number;
1028
1029 if (bi->seq_number == YAFFS_SEQUENCE_CHECKPOINT_DATA)
1030 bi->block_state = state = YAFFS_BLOCK_STATE_CHECKPOINT;
1031 if (bi->seq_number == YAFFS_SEQUENCE_BAD_BLOCK)
1032 bi->block_state = state = YAFFS_BLOCK_STATE_DEAD;
1033
1034 yaffs_trace(YAFFS_TRACE_SCAN_DEBUG,
1035 "Block scanning block %d state %d seq %d",
1036 blk, state, seq_number);
1037
1038 if (state == YAFFS_BLOCK_STATE_CHECKPOINT) {
1039 dev->blocks_in_checkpt++;
1040
1041 } else if (state == YAFFS_BLOCK_STATE_DEAD) {
1042 yaffs_trace(YAFFS_TRACE_BAD_BLOCKS,
1043 "block %d is bad", blk);
1044 } else if (state == YAFFS_BLOCK_STATE_EMPTY) {
1045 yaffs_trace(YAFFS_TRACE_SCAN_DEBUG, "Block empty ");
1046 dev->n_erased_blocks++;
1047 dev->n_free_chunks += dev->param.chunks_per_block;
1048 } else if (state == YAFFS_BLOCK_STATE_NEEDS_SCANNING) {
1049
1050 /* Determine the highest sequence number */
1051 if (seq_number >= YAFFS_LOWEST_SEQUENCE_NUMBER &&
1052 seq_number < YAFFS_HIGHEST_SEQUENCE_NUMBER) {
1053
1054 block_index[n_to_scan].seq = seq_number;
1055 block_index[n_to_scan].block = blk;
1056
1057 n_to_scan++;
1058
1059 if (seq_number >= dev->seq_number)
1060 dev->seq_number = seq_number;
1061 } else {
1062 /* TODO: Nasty sequence number! */
1063 yaffs_trace(YAFFS_TRACE_SCAN,
1064 "Block scanning block %d has bad sequence number %d",
1065 blk, seq_number);
1066
1067 }
1068 }
1069 bi++;
1070 }
1071
1072 yaffs_trace(YAFFS_TRACE_SCAN, "%d blocks to be sorted...", n_to_scan);
1073
1074 cond_resched();
1075
1076 /* Sort the blocks by sequence number */
1077 sort(block_index, n_to_scan, sizeof(struct yaffs_block_index),
1078 yaffs2_ybicmp, NULL);
1079
1080 cond_resched();
1081
1082 yaffs_trace(YAFFS_TRACE_SCAN, "...done");
1083
1084 /* Now scan the blocks looking at the data. */
1085 start_iter = 0;
1086 end_iter = n_to_scan - 1;
1087 yaffs_trace(YAFFS_TRACE_SCAN_DEBUG, "%d blocks to scan", n_to_scan);
1088
1089 /* For each block.... backwards */
1090 for (block_iter = end_iter; !alloc_failed && block_iter >= start_iter;
1091 block_iter--) {
1092 /* Cooperative multitasking! This loop can run for so
1093 long that watchdog timers expire. */
1094 cond_resched();
1095
1096 /* get the block to scan in the correct order */
1097 blk = block_index[block_iter].block;
1098
1099 bi = yaffs_get_block_info(dev, blk);
1100
1101 state = bi->block_state;
1102
1103 deleted = 0;
1104
1105 /* For each chunk in each block that needs scanning.... */
1106 found_chunks = 0;
1107 for (c = dev->param.chunks_per_block - 1;
1108 !alloc_failed && c >= 0 &&
1109 (state == YAFFS_BLOCK_STATE_NEEDS_SCANNING ||
1110 state == YAFFS_BLOCK_STATE_ALLOCATING); c--) {
1111 /* Scan backwards...
1112 * Read the tags and decide what to do
1113 */
1114
1115 chunk = blk * dev->param.chunks_per_block + c;
1116
1117 result = yaffs_rd_chunk_tags_nand(dev, chunk, NULL,
1118 &tags);
1119
1120 /* Let's have a good look at this chunk... */
1121
1122 if (!tags.chunk_used) {
1123 /* An unassigned chunk in the block.
1124 * If there are used chunks after this one, then
1125 * it is a chunk that was skipped due to failing the erased
1126 * check. Just skip it so that it can be deleted.
1127 * But, more typically, We get here when this is an unallocated
1128 * chunk and his means that either the block is empty or
1129 * this is the one being allocated from
1130 */
1131
1132 if (found_chunks) {
1133 /* This is a chunk that was skipped due to failing the erased check */
1134 } else if (c == 0) {
1135 /* We're looking at the first chunk in the block so the block is unused */
1136 state = YAFFS_BLOCK_STATE_EMPTY;
1137 dev->n_erased_blocks++;
1138 } else {
1139 if (state ==
1140 YAFFS_BLOCK_STATE_NEEDS_SCANNING
1141 || state ==
1142 YAFFS_BLOCK_STATE_ALLOCATING) {
1143 if (dev->seq_number ==
1144 bi->seq_number) {
1145 /* this is the block being allocated from */
1146
1147 yaffs_trace(YAFFS_TRACE_SCAN,
1148 " Allocating from %d %d",
1149 blk, c);
1150
1151 state =
1152 YAFFS_BLOCK_STATE_ALLOCATING;
1153 dev->alloc_block = blk;
1154 dev->alloc_page = c;
1155 dev->
1156 alloc_block_finder =
1157 blk;
1158 } else {
1159 /* This is a partially written block that is not
1160 * the current allocation block.
1161 */
1162
1163 yaffs_trace(YAFFS_TRACE_SCAN,
1164 "Partially written block %d detected",
1165 blk);
1166 }
1167 }
1168 }
1169
1170 dev->n_free_chunks++;
1171
1172 } else if (tags.ecc_result == YAFFS_ECC_RESULT_UNFIXED) {
1173 yaffs_trace(YAFFS_TRACE_SCAN,
1174 " Unfixed ECC in chunk(%d:%d), chunk ignored",
1175 blk, c);
1176
1177 dev->n_free_chunks++;
1178
1179 } else if (tags.obj_id > YAFFS_MAX_OBJECT_ID ||
1180 tags.chunk_id > YAFFS_MAX_CHUNK_ID ||
1181 (tags.chunk_id > 0
1182 && tags.n_bytes > dev->data_bytes_per_chunk)
1183 || tags.seq_number != bi->seq_number) {
1184 yaffs_trace(YAFFS_TRACE_SCAN,
1185 "Chunk (%d:%d) with bad tags:obj = %d, chunk_id = %d, n_bytes = %d, ignored",
1186 blk, c, tags.obj_id,
1187 tags.chunk_id, tags.n_bytes);
1188
1189 dev->n_free_chunks++;
1190
1191 } else if (tags.chunk_id > 0) {
1192 /* chunk_id > 0 so it is a data chunk... */
1193 unsigned int endpos;
1194 u32 chunk_base =
1195 (tags.chunk_id -
1196 1) * dev->data_bytes_per_chunk;
1197
1198 found_chunks = 1;
1199
1200 yaffs_set_chunk_bit(dev, blk, c);
1201 bi->pages_in_use++;
1202
1203 in = yaffs_find_or_create_by_number(dev,
1204 tags.obj_id,
1205 YAFFS_OBJECT_TYPE_FILE);
1206 if (!in) {
1207 /* Out of memory */
1208 alloc_failed = 1;
1209 }
1210
1211 if (in &&
1212 in->variant_type == YAFFS_OBJECT_TYPE_FILE
1213 && chunk_base <
1214 in->variant.file_variant.shrink_size) {
1215 /* This has not been invalidated by a resize */
1216 if (!yaffs_put_chunk_in_file
1217 (in, tags.chunk_id, chunk, -1)) {
1218 alloc_failed = 1;
1219 }
1220
1221 /* File size is calculated by looking at the data chunks if we have not
1222 * seen an object header yet. Stop this practice once we find an object header.
1223 */
1224 endpos = chunk_base + tags.n_bytes;
1225
1226 if (!in->valid && /* have not got an object header yet */
1227 in->variant.file_variant.
1228 scanned_size < endpos) {
1229 in->variant.file_variant.
1230 scanned_size = endpos;
1231 in->variant.file_variant.
1232 file_size = endpos;
1233 }
1234
1235 } else if (in) {
1236 /* This chunk has been invalidated by a resize, or a past file deletion
1237 * so delete the chunk*/
1238 yaffs_chunk_del(dev, chunk, 1,
1239 __LINE__);
1240
1241 }
1242 } else {
1243 /* chunk_id == 0, so it is an ObjectHeader.
1244 * Thus, we read in the object header and make the object
1245 */
1246 found_chunks = 1;
1247
1248 yaffs_set_chunk_bit(dev, blk, c);
1249 bi->pages_in_use++;
1250
1251 oh = NULL;
1252 in = NULL;
1253
1254 if (tags.extra_available) {
1255 in = yaffs_find_or_create_by_number(dev,
1256 tags.
1257 obj_id,
1258 tags.
1259 extra_obj_type);
1260 if (!in)
1261 alloc_failed = 1;
1262 }
1263
1264 if (!in ||
1265 (!in->valid && dev->param.disable_lazy_load)
1266 || tags.extra_shadows || (!in->valid
1267 && (tags.obj_id ==
1268 YAFFS_OBJECTID_ROOT
1269 || tags.
1270 obj_id ==
1271 YAFFS_OBJECTID_LOSTNFOUND)))
1272 {
1273
1274 /* If we don't have valid info then we need to read the chunk
1275 * TODO In future we can probably defer reading the chunk and
1276 * living with invalid data until needed.
1277 */
1278
1279 result = yaffs_rd_chunk_tags_nand(dev,
1280 chunk,
1281 chunk_data,
1282 NULL);
1283
1284 oh = (struct yaffs_obj_hdr *)chunk_data;
1285
1286 if (dev->param.inband_tags) {
1287 /* Fix up the header if they got corrupted by inband tags */
1288 oh->shadows_obj =
1289 oh->inband_shadowed_obj_id;
1290 oh->is_shrink =
1291 oh->inband_is_shrink;
1292 }
1293
1294 if (!in) {
1295 in = yaffs_find_or_create_by_number(dev, tags.obj_id, oh->type);
1296 if (!in)
1297 alloc_failed = 1;
1298 }
1299
1300 }
1301
1302 if (!in) {
1303 /* TODO Hoosterman we have a problem! */
1304 yaffs_trace(YAFFS_TRACE_ERROR,
1305 "yaffs tragedy: Could not make object for object %d at chunk %d during scan",
1306 tags.obj_id, chunk);
1307 continue;
1308 }
1309
1310 if (in->valid) {
1311 /* We have already filled this one.
1312 * We have a duplicate that will be discarded, but
1313 * we first have to suck out resize info if it is a file.
1314 */
1315
1316 if ((in->variant_type ==
1317 YAFFS_OBJECT_TYPE_FILE) && ((oh
1318 &&
1319 oh->
1320 type
1321 ==
1322 YAFFS_OBJECT_TYPE_FILE)
1323 ||
1324 (tags.
1325 extra_available
1326 &&
1327 tags.
1328 extra_obj_type
1329 ==
1330 YAFFS_OBJECT_TYPE_FILE)))
1331 {
1332 u32 this_size =
1333 (oh) ? oh->
1334 file_size :
1335 tags.extra_length;
1336 u32 parent_obj_id =
1337 (oh) ? oh->parent_obj_id :
1338 tags.extra_parent_id;
1339
1340 is_shrink =
1341 (oh) ? oh->
1342 is_shrink :
1343 tags.extra_is_shrink;
1344
1345 /* If it is deleted (unlinked at start also means deleted)
1346 * we treat the file size as being zeroed at this point.
1347 */
1348 if (parent_obj_id ==
1349 YAFFS_OBJECTID_DELETED
1350 || parent_obj_id ==
1351 YAFFS_OBJECTID_UNLINKED) {
1352 this_size = 0;
1353 is_shrink = 1;
1354 }
1355
1356 if (is_shrink
1357 && in->variant.file_variant.
1358 shrink_size > this_size)
1359 in->variant.
1360 file_variant.
1361 shrink_size =
1362 this_size;
1363
1364 if (is_shrink)
1365 bi->has_shrink_hdr = 1;
1366
1367 }
1368 /* Use existing - destroy this one. */
1369 yaffs_chunk_del(dev, chunk, 1,
1370 __LINE__);
1371
1372 }
1373
1374 if (!in->valid && in->variant_type !=
1375 (oh ? oh->type : tags.extra_obj_type))
1376 yaffs_trace(YAFFS_TRACE_ERROR,
1377 "yaffs tragedy: Bad object type, %d != %d, for object %d at chunk %d during scan",
1378 oh ?
1379 oh->type : tags.extra_obj_type,
1380 in->variant_type, tags.obj_id,
1381 chunk);
1382
1383 if (!in->valid &&
1384 (tags.obj_id == YAFFS_OBJECTID_ROOT ||
1385 tags.obj_id ==
1386 YAFFS_OBJECTID_LOSTNFOUND)) {
1387 /* We only load some info, don't fiddle with directory structure */
1388 in->valid = 1;
1389
1390 if (oh) {
1391
1392 in->yst_mode = oh->yst_mode;
1393 yaffs_load_attribs(in, oh);
1394 in->lazy_loaded = 0;
1395 } else {
1396 in->lazy_loaded = 1;
1397 }
1398 in->hdr_chunk = chunk;
1399
1400 } else if (!in->valid) {
1401 /* we need to load this info */
1402
1403 in->valid = 1;
1404 in->hdr_chunk = chunk;
1405
1406 if (oh) {
1407 in->variant_type = oh->type;
1408
1409 in->yst_mode = oh->yst_mode;
1410 yaffs_load_attribs(in, oh);
1411
1412 if (oh->shadows_obj > 0)
1413 yaffs_handle_shadowed_obj
1414 (dev,
1415 oh->shadows_obj,
1416 1);
1417
1418 yaffs_set_obj_name_from_oh(in,
1419 oh);
1420 parent =
1421 yaffs_find_or_create_by_number
1422 (dev, oh->parent_obj_id,
1423 YAFFS_OBJECT_TYPE_DIRECTORY);
1424
1425 file_size = oh->file_size;
1426 is_shrink = oh->is_shrink;
1427 equiv_id = oh->equiv_id;
1428
1429 } else {
1430 in->variant_type =
1431 tags.extra_obj_type;
1432 parent =
1433 yaffs_find_or_create_by_number
1434 (dev, tags.extra_parent_id,
1435 YAFFS_OBJECT_TYPE_DIRECTORY);
1436 file_size = tags.extra_length;
1437 is_shrink =
1438 tags.extra_is_shrink;
1439 equiv_id = tags.extra_equiv_id;
1440 in->lazy_loaded = 1;
1441
1442 }
1443 in->dirty = 0;
1444
1445 if (!parent)
1446 alloc_failed = 1;
1447
1448 /* directory stuff...
1449 * hook up to parent
1450 */
1451
1452 if (parent && parent->variant_type ==
1453 YAFFS_OBJECT_TYPE_UNKNOWN) {
1454 /* Set up as a directory */
1455 parent->variant_type =
1456 YAFFS_OBJECT_TYPE_DIRECTORY;
1457 INIT_LIST_HEAD(&parent->
1458 variant.dir_variant.children);
1459 } else if (!parent
1460 || parent->variant_type !=
1461 YAFFS_OBJECT_TYPE_DIRECTORY) {
1462 /* Hoosterman, another problem....
1463 * We're trying to use a non-directory as a directory
1464 */
1465
1466 yaffs_trace(YAFFS_TRACE_ERROR,
1467 "yaffs tragedy: attempting to use non-directory as a directory in scan. Put in lost+found."
1468 );
1469 parent = dev->lost_n_found;
1470 }
1471
1472 yaffs_add_obj_to_dir(parent, in);
1473
1474 is_unlinked = (parent == dev->del_dir)
1475 || (parent == dev->unlinked_dir);
1476
1477 if (is_shrink) {
1478 /* Mark the block as having a shrink header */
1479 bi->has_shrink_hdr = 1;
1480 }
1481
1482 /* Note re hardlinks.
1483 * Since we might scan a hardlink before its equivalent object is scanned
1484 * we put them all in a list.
1485 * After scanning is complete, we should have all the objects, so we run
1486 * through this list and fix up all the chains.
1487 */
1488
1489 switch (in->variant_type) {
1490 case YAFFS_OBJECT_TYPE_UNKNOWN:
1491 /* Todo got a problem */
1492 break;
1493 case YAFFS_OBJECT_TYPE_FILE:
1494
1495 if (in->variant.
1496 file_variant.scanned_size <
1497 file_size) {
1498 /* This covers the case where the file size is greater
1499 * than where the data is
1500 * This will happen if the file is resized to be larger
1501 * than its current data extents.
1502 */
1503 in->variant.
1504 file_variant.
1505 file_size =
1506 file_size;
1507 in->variant.
1508 file_variant.
1509 scanned_size =
1510 file_size;
1511 }
1512
1513 if (in->variant.file_variant.
1514 shrink_size > file_size)
1515 in->variant.
1516 file_variant.
1517 shrink_size =
1518 file_size;
1519
1520 break;
1521 case YAFFS_OBJECT_TYPE_HARDLINK:
1522 if (!is_unlinked) {
1523 in->variant.
1524 hardlink_variant.
1525 equiv_id = equiv_id;
1526 in->hard_links.next =
1527 (struct list_head *)
1528 hard_list;
1529 hard_list = in;
1530 }
1531 break;
1532 case YAFFS_OBJECT_TYPE_DIRECTORY:
1533 /* Do nothing */
1534 break;
1535 case YAFFS_OBJECT_TYPE_SPECIAL:
1536 /* Do nothing */
1537 break;
1538 case YAFFS_OBJECT_TYPE_SYMLINK:
1539 if (oh) {
1540 in->variant.
1541 symlink_variant.
1542 alias =
1543 yaffs_clone_str(oh->
1544 alias);
1545 if (!in->variant.
1546 symlink_variant.
1547 alias)
1548 alloc_failed =
1549 1;
1550 }
1551 break;
1552 }
1553
1554 }
1555
1556 }
1557
1558 } /* End of scanning for each chunk */
1559
1560 if (state == YAFFS_BLOCK_STATE_NEEDS_SCANNING) {
1561 /* If we got this far while scanning, then the block is fully allocated. */
1562 state = YAFFS_BLOCK_STATE_FULL;
1563 }
1564
1565 bi->block_state = state;
1566
1567 /* Now let's see if it was dirty */
1568 if (bi->pages_in_use == 0 &&
1569 !bi->has_shrink_hdr &&
1570 bi->block_state == YAFFS_BLOCK_STATE_FULL) {
1571 yaffs_block_became_dirty(dev, blk);
1572 }
1573
1574 }
1575
1576 yaffs_skip_rest_of_block(dev);
1577
1578 if (alt_block_index)
1579 vfree(block_index);
1580 else
1581 kfree(block_index);
1582
1583 /* Ok, we've done all the scanning.
1584 * Fix up the hard link chains.
1585 * We should now have scanned all the objects, now it's time to add these
1586 * hardlinks.
1587 */
1588 yaffs_link_fixup(dev, hard_list);
1589
1590 yaffs_release_temp_buffer(dev, chunk_data, __LINE__);
1591
1592 if (alloc_failed)
1593 return YAFFS_FAIL;
1594
1595 yaffs_trace(YAFFS_TRACE_SCAN, "yaffs2_scan_backwards ends");
1596
1597 return YAFFS_OK;
1598}
diff --git a/fs/yaffs2/yaffs_yaffs2.h b/fs/yaffs2/yaffs_yaffs2.h
new file mode 100644
index 00000000000..e1a9287fc50
--- /dev/null
+++ b/fs/yaffs2/yaffs_yaffs2.h
@@ -0,0 +1,39 @@
1/*
2 * YAFFS: Yet another Flash File System . A NAND-flash specific file system.
3 *
4 * Copyright (C) 2002-2010 Aleph One Ltd.
5 * for Toby Churchill Ltd and Brightstar Engineering
6 *
7 * Created by Charles Manning <charles@aleph1.co.uk>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU Lesser General Public License version 2.1 as
11 * published by the Free Software Foundation.
12 *
13 * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL.
14 */
15
16#ifndef __YAFFS_YAFFS2_H__
17#define __YAFFS_YAFFS2_H__
18
19#include "yaffs_guts.h"
20
21void yaffs_calc_oldest_dirty_seq(struct yaffs_dev *dev);
22void yaffs2_find_oldest_dirty_seq(struct yaffs_dev *dev);
23void yaffs2_clear_oldest_dirty_seq(struct yaffs_dev *dev,
24 struct yaffs_block_info *bi);
25void yaffs2_update_oldest_dirty_seq(struct yaffs_dev *dev, unsigned block_no,
26 struct yaffs_block_info *bi);
27int yaffs_block_ok_for_gc(struct yaffs_dev *dev, struct yaffs_block_info *bi);
28u32 yaffs2_find_refresh_block(struct yaffs_dev *dev);
29int yaffs2_checkpt_required(struct yaffs_dev *dev);
30int yaffs_calc_checkpt_blocks_required(struct yaffs_dev *dev);
31
32void yaffs2_checkpt_invalidate(struct yaffs_dev *dev);
33int yaffs2_checkpt_save(struct yaffs_dev *dev);
34int yaffs2_checkpt_restore(struct yaffs_dev *dev);
35
36int yaffs2_handle_hole(struct yaffs_obj *obj, loff_t new_size);
37int yaffs2_scan_backwards(struct yaffs_dev *dev);
38
39#endif
diff --git a/fs/yaffs2/yportenv.h b/fs/yaffs2/yportenv.h
new file mode 100644
index 00000000000..8183425448c
--- /dev/null
+++ b/fs/yaffs2/yportenv.h
@@ -0,0 +1,70 @@
1/*
2 * YAFFS: Yet another Flash File System . A NAND-flash specific file system.
3 *
4 * Copyright (C) 2002-2010 Aleph One Ltd.
5 * for Toby Churchill Ltd and Brightstar Engineering
6 *
7 * Created by Charles Manning <charles@aleph1.co.uk>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU Lesser General Public License version 2.1 as
11 * published by the Free Software Foundation.
12 *
13 * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL.
14 */
15
16#ifndef __YPORTENV_LINUX_H__
17#define __YPORTENV_LINUX_H__
18
19#include <linux/version.h>
20#include <linux/kernel.h>
21#include <linux/mm.h>
22#include <linux/sched.h>
23#include <linux/string.h>
24#include <linux/slab.h>
25#include <linux/vmalloc.h>
26#include <linux/xattr.h>
27#include <linux/list.h>
28#include <linux/types.h>
29#include <linux/fs.h>
30#include <linux/stat.h>
31#include <linux/sort.h>
32#include <linux/bitops.h>
33
34#define YCHAR char
35#define YUCHAR unsigned char
36#define _Y(x) x
37
38#define YAFFS_LOSTNFOUND_NAME "lost+found"
39#define YAFFS_LOSTNFOUND_PREFIX "obj"
40
41
42#define YAFFS_ROOT_MODE 0755
43#define YAFFS_LOSTNFOUND_MODE 0700
44
45#define Y_CURRENT_TIME CURRENT_TIME.tv_sec
46#define Y_TIME_CONVERT(x) (x).tv_sec
47
48#define compile_time_assertion(assertion) \
49 ({ int x = __builtin_choose_expr(assertion, 0, (void)0); (void) x; })
50
51
52#ifndef Y_DUMP_STACK
53#define Y_DUMP_STACK() dump_stack()
54#endif
55
56#define yaffs_trace(msk, fmt, ...) do { \
57 if(yaffs_trace_mask & (msk)) \
58 printk(KERN_DEBUG "yaffs: " fmt "\n", ##__VA_ARGS__); \
59} while(0)
60
61#ifndef YBUG
62#define YBUG() do {\
63 yaffs_trace(YAFFS_TRACE_BUG,\
64 "bug " __FILE__ " %d",\
65 __LINE__);\
66 Y_DUMP_STACK();\
67} while (0)
68#endif
69
70#endif