aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig4
-rw-r--r--fs/configfs/dir.c32
-rw-r--r--fs/ext2/balloc.c1
-rw-r--r--fs/ext2/ialloc.c1
-rw-r--r--fs/ext2/super.c41
-rw-r--r--fs/ext3/inode.c13
-rw-r--r--fs/ext3/super.c42
-rw-r--r--fs/jffs2/jffs2_fs_i.h4
-rw-r--r--fs/jffs2/nodelist.c6
-rw-r--r--fs/jffs2/nodelist.h1
-rw-r--r--fs/jffs2/summary.c5
-rw-r--r--fs/jffs2/xattr.c1
-rw-r--r--fs/nfs/direct.c50
-rw-r--r--fs/nfs/nfs4proc.c6
-rw-r--r--fs/nfs/read.c30
-rw-r--r--fs/nfs/write.c41
-rw-r--r--fs/nfsd/nfs4recover.c21
-rw-r--r--fs/ocfs2/Makefile1
-rw-r--r--fs/ocfs2/alloc.c28
-rw-r--r--fs/ocfs2/aops.c83
-rw-r--r--fs/ocfs2/buffer_head_io.c95
-rw-r--r--fs/ocfs2/buffer_head_io.h2
-rw-r--r--fs/ocfs2/cluster/heartbeat.c8
-rw-r--r--fs/ocfs2/dir.c28
-rw-r--r--fs/ocfs2/dlm/dlmast.c10
-rw-r--r--fs/ocfs2/dlmglue.c9
-rw-r--r--fs/ocfs2/dlmglue.h5
-rw-r--r--fs/ocfs2/file.c3
-rw-r--r--fs/ocfs2/inode.c32
-rw-r--r--fs/ocfs2/inode.h3
-rw-r--r--fs/ocfs2/ioctl.c136
-rw-r--r--fs/ocfs2/ioctl.h16
-rw-r--r--fs/ocfs2/namei.c32
-rw-r--r--fs/ocfs2/ocfs2_fs.h24
-rw-r--r--fs/ocfs2/uptodate.c21
-rw-r--r--fs/ocfs2/uptodate.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c18
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c27
-rw-r--r--fs/xfs/quota/xfs_qm_bhv.c2
-rw-r--r--fs/xfs/xfs_alloc.h20
-rw-r--r--fs/xfs/xfs_fsops.c16
-rw-r--r--fs/xfs/xfs_mount.c32
-rw-r--r--fs/xfs/xfs_vfsops.c3
43 files changed, 677 insertions, 278 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 3f00a9faabcb..530581628311 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -325,8 +325,8 @@ config FS_POSIX_ACL
325source "fs/xfs/Kconfig" 325source "fs/xfs/Kconfig"
326 326
327config OCFS2_FS 327config OCFS2_FS
328 tristate "OCFS2 file system support (EXPERIMENTAL)" 328 tristate "OCFS2 file system support"
329 depends on NET && SYSFS && EXPERIMENTAL 329 depends on NET && SYSFS
330 select CONFIGFS_FS 330 select CONFIGFS_FS
331 select JBD 331 select JBD
332 select CRC32 332 select CRC32
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index df025453dd97..816e8ef64560 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -86,6 +86,32 @@ static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent * pare
86 return sd; 86 return sd;
87} 87}
88 88
89/*
90 *
91 * Return -EEXIST if there is already a configfs element with the same
92 * name for the same parent.
93 *
94 * called with parent inode's i_mutex held
95 */
96int configfs_dirent_exists(struct configfs_dirent *parent_sd,
97 const unsigned char *new)
98{
99 struct configfs_dirent * sd;
100
101 list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
102 if (sd->s_element) {
103 const unsigned char *existing = configfs_get_name(sd);
104 if (strcmp(existing, new))
105 continue;
106 else
107 return -EEXIST;
108 }
109 }
110
111 return 0;
112}
113
114
89int configfs_make_dirent(struct configfs_dirent * parent_sd, 115int configfs_make_dirent(struct configfs_dirent * parent_sd,
90 struct dentry * dentry, void * element, 116 struct dentry * dentry, void * element,
91 umode_t mode, int type) 117 umode_t mode, int type)
@@ -136,8 +162,10 @@ static int create_dir(struct config_item * k, struct dentry * p,
136 int error; 162 int error;
137 umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO; 163 umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO;
138 164
139 error = configfs_make_dirent(p->d_fsdata, d, k, mode, 165 error = configfs_dirent_exists(p->d_fsdata, d->d_name.name);
140 CONFIGFS_DIR); 166 if (!error)
167 error = configfs_make_dirent(p->d_fsdata, d, k, mode,
168 CONFIGFS_DIR);
141 if (!error) { 169 if (!error) {
142 error = configfs_create(d, mode, init_dir); 170 error = configfs_create(d, mode, init_dir);
143 if (!error) { 171 if (!error) {
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index d4870432ecfc..b1981d0e95ad 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -539,7 +539,6 @@ unsigned long ext2_count_free (struct buffer_head * map, unsigned int numchars)
539 539
540#endif /* EXT2FS_DEBUG */ 540#endif /* EXT2FS_DEBUG */
541 541
542/* Superblock must be locked */
543unsigned long ext2_count_free_blocks (struct super_block * sb) 542unsigned long ext2_count_free_blocks (struct super_block * sb)
544{ 543{
545 struct ext2_group_desc * desc; 544 struct ext2_group_desc * desc;
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index de85c61c58c5..695f69ccf908 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -637,7 +637,6 @@ fail:
637 return ERR_PTR(err); 637 return ERR_PTR(err);
638} 638}
639 639
640/* Superblock must be locked */
641unsigned long ext2_count_free_inodes (struct super_block * sb) 640unsigned long ext2_count_free_inodes (struct super_block * sb)
642{ 641{
643 struct ext2_group_desc *desc; 642 struct ext2_group_desc *desc;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 681dea8f9532..4286ff6330b6 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -251,6 +251,44 @@ static struct super_operations ext2_sops = {
251#endif 251#endif
252}; 252};
253 253
254static struct dentry *ext2_get_dentry(struct super_block *sb, void *vobjp)
255{
256 __u32 *objp = vobjp;
257 unsigned long ino = objp[0];
258 __u32 generation = objp[1];
259 struct inode *inode;
260 struct dentry *result;
261
262 if (ino < EXT2_FIRST_INO(sb) && ino != EXT2_ROOT_INO)
263 return ERR_PTR(-ESTALE);
264 if (ino > le32_to_cpu(EXT2_SB(sb)->s_es->s_inodes_count))
265 return ERR_PTR(-ESTALE);
266
267 /* iget isn't really right if the inode is currently unallocated!!
268 * ext2_read_inode currently does appropriate checks, but
269 * it might be "neater" to call ext2_get_inode first and check
270 * if the inode is valid.....
271 */
272 inode = iget(sb, ino);
273 if (inode == NULL)
274 return ERR_PTR(-ENOMEM);
275 if (is_bad_inode(inode) ||
276 (generation && inode->i_generation != generation)) {
277 /* we didn't find the right inode.. */
278 iput(inode);
279 return ERR_PTR(-ESTALE);
280 }
281 /* now to find a dentry.
282 * If possible, get a well-connected one
283 */
284 result = d_alloc_anon(inode);
285 if (!result) {
286 iput(inode);
287 return ERR_PTR(-ENOMEM);
288 }
289 return result;
290}
291
254/* Yes, most of these are left as NULL!! 292/* Yes, most of these are left as NULL!!
255 * A NULL value implies the default, which works with ext2-like file 293 * A NULL value implies the default, which works with ext2-like file
256 * systems, but can be improved upon. 294 * systems, but can be improved upon.
@@ -258,6 +296,7 @@ static struct super_operations ext2_sops = {
258 */ 296 */
259static struct export_operations ext2_export_ops = { 297static struct export_operations ext2_export_ops = {
260 .get_parent = ext2_get_parent, 298 .get_parent = ext2_get_parent,
299 .get_dentry = ext2_get_dentry,
261}; 300};
262 301
263static unsigned long get_sb_block(void **data) 302static unsigned long get_sb_block(void **data)
@@ -1044,7 +1083,6 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf)
1044 unsigned long overhead; 1083 unsigned long overhead;
1045 int i; 1084 int i;
1046 1085
1047 lock_super(sb);
1048 if (test_opt (sb, MINIX_DF)) 1086 if (test_opt (sb, MINIX_DF))
1049 overhead = 0; 1087 overhead = 0;
1050 else { 1088 else {
@@ -1085,7 +1123,6 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf)
1085 buf->f_files = le32_to_cpu(sbi->s_es->s_inodes_count); 1123 buf->f_files = le32_to_cpu(sbi->s_es->s_inodes_count);
1086 buf->f_ffree = ext2_count_free_inodes (sb); 1124 buf->f_ffree = ext2_count_free_inodes (sb);
1087 buf->f_namelen = EXT2_NAME_LEN; 1125 buf->f_namelen = EXT2_NAME_LEN;
1088 unlock_super(sb);
1089 return 0; 1126 return 0;
1090} 1127}
1091 1128
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index c5ee9f0691e3..84be02e93652 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -925,7 +925,7 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
925 set_buffer_new(bh_result); 925 set_buffer_new(bh_result);
926got_it: 926got_it:
927 map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key)); 927 map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
928 if (blocks_to_boundary == 0) 928 if (count > blocks_to_boundary)
929 set_buffer_boundary(bh_result); 929 set_buffer_boundary(bh_result);
930 err = count; 930 err = count;
931 /* Clean up and exit */ 931 /* Clean up and exit */
@@ -1009,11 +1009,14 @@ struct buffer_head *ext3_getblk(handle_t *handle, struct inode *inode,
1009 buffer_trace_init(&dummy.b_history); 1009 buffer_trace_init(&dummy.b_history);
1010 err = ext3_get_blocks_handle(handle, inode, block, 1, 1010 err = ext3_get_blocks_handle(handle, inode, block, 1,
1011 &dummy, create, 1); 1011 &dummy, create, 1);
1012 if (err == 1) { 1012 /*
1013 * ext3_get_blocks_handle() returns number of blocks
1014 * mapped. 0 in case of a HOLE.
1015 */
1016 if (err > 0) {
1017 if (err > 1)
1018 WARN_ON(1);
1013 err = 0; 1019 err = 0;
1014 } else if (err >= 0) {
1015 WARN_ON(1);
1016 err = -EIO;
1017 } 1020 }
1018 *errp = err; 1021 *errp = err;
1019 if (!err && buffer_mapped(&dummy)) { 1022 if (!err && buffer_mapped(&dummy)) {
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 813d589cc6c0..3559086eee5f 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -554,6 +554,47 @@ static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs)
554 return 0; 554 return 0;
555} 555}
556 556
557
558static struct dentry *ext3_get_dentry(struct super_block *sb, void *vobjp)
559{
560 __u32 *objp = vobjp;
561 unsigned long ino = objp[0];
562 __u32 generation = objp[1];
563 struct inode *inode;
564 struct dentry *result;
565
566 if (ino < EXT3_FIRST_INO(sb) && ino != EXT3_ROOT_INO)
567 return ERR_PTR(-ESTALE);
568 if (ino > le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count))
569 return ERR_PTR(-ESTALE);
570
571 /* iget isn't really right if the inode is currently unallocated!!
572 *
573 * ext3_read_inode will return a bad_inode if the inode had been
574 * deleted, so we should be safe.
575 *
576 * Currently we don't know the generation for parent directory, so
577 * a generation of 0 means "accept any"
578 */
579 inode = iget(sb, ino);
580 if (inode == NULL)
581 return ERR_PTR(-ENOMEM);
582 if (is_bad_inode(inode) ||
583 (generation && inode->i_generation != generation)) {
584 iput(inode);
585 return ERR_PTR(-ESTALE);
586 }
587 /* now to find a dentry.
588 * If possible, get a well-connected one
589 */
590 result = d_alloc_anon(inode);
591 if (!result) {
592 iput(inode);
593 return ERR_PTR(-ENOMEM);
594 }
595 return result;
596}
597
557#ifdef CONFIG_QUOTA 598#ifdef CONFIG_QUOTA
558#define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group") 599#define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group")
559#define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) 600#define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
@@ -622,6 +663,7 @@ static struct super_operations ext3_sops = {
622 663
623static struct export_operations ext3_export_ops = { 664static struct export_operations ext3_export_ops = {
624 .get_parent = ext3_get_parent, 665 .get_parent = ext3_get_parent,
666 .get_dentry = ext3_get_dentry,
625}; 667};
626 668
627enum { 669enum {
diff --git a/fs/jffs2/jffs2_fs_i.h b/fs/jffs2/jffs2_fs_i.h
index 2e0cc8e00b85..3a566077ac95 100644
--- a/fs/jffs2/jffs2_fs_i.h
+++ b/fs/jffs2/jffs2_fs_i.h
@@ -41,11 +41,7 @@ struct jffs2_inode_info {
41 41
42 uint16_t flags; 42 uint16_t flags;
43 uint8_t usercompr; 43 uint8_t usercompr;
44#if !defined (__ECOS)
45#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,2)
46 struct inode vfs_inode; 44 struct inode vfs_inode;
47#endif
48#endif
49#ifdef CONFIG_JFFS2_FS_POSIX_ACL 45#ifdef CONFIG_JFFS2_FS_POSIX_ACL
50 struct posix_acl *i_acl_access; 46 struct posix_acl *i_acl_access;
51 struct posix_acl *i_acl_default; 47 struct posix_acl *i_acl_default;
diff --git a/fs/jffs2/nodelist.c b/fs/jffs2/nodelist.c
index 7675b33396c7..5a6b4d64206c 100644
--- a/fs/jffs2/nodelist.c
+++ b/fs/jffs2/nodelist.c
@@ -21,6 +21,9 @@
21#include <linux/pagemap.h> 21#include <linux/pagemap.h>
22#include "nodelist.h" 22#include "nodelist.h"
23 23
24static void jffs2_obsolete_node_frag(struct jffs2_sb_info *c,
25 struct jffs2_node_frag *this);
26
24void jffs2_add_fd_to_list(struct jffs2_sb_info *c, struct jffs2_full_dirent *new, struct jffs2_full_dirent **list) 27void jffs2_add_fd_to_list(struct jffs2_sb_info *c, struct jffs2_full_dirent *new, struct jffs2_full_dirent **list)
25{ 28{
26 struct jffs2_full_dirent **prev = list; 29 struct jffs2_full_dirent **prev = list;
@@ -87,7 +90,8 @@ void jffs2_truncate_fragtree(struct jffs2_sb_info *c, struct rb_root *list, uint
87 } 90 }
88} 91}
89 92
90void jffs2_obsolete_node_frag(struct jffs2_sb_info *c, struct jffs2_node_frag *this) 93static void jffs2_obsolete_node_frag(struct jffs2_sb_info *c,
94 struct jffs2_node_frag *this)
91{ 95{
92 if (this->node) { 96 if (this->node) {
93 this->node->frags--; 97 this->node->frags--;
diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h
index cae92c14116d..0ddfd70307fb 100644
--- a/fs/jffs2/nodelist.h
+++ b/fs/jffs2/nodelist.h
@@ -334,7 +334,6 @@ void jffs2_kill_fragtree(struct rb_root *root, struct jffs2_sb_info *c_delete);
334struct rb_node *rb_next(struct rb_node *); 334struct rb_node *rb_next(struct rb_node *);
335struct rb_node *rb_prev(struct rb_node *); 335struct rb_node *rb_prev(struct rb_node *);
336void rb_replace_node(struct rb_node *victim, struct rb_node *new, struct rb_root *root); 336void rb_replace_node(struct rb_node *victim, struct rb_node *new, struct rb_root *root);
337void jffs2_obsolete_node_frag(struct jffs2_sb_info *c, struct jffs2_node_frag *this);
338int jffs2_add_full_dnode_to_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f, struct jffs2_full_dnode *fn); 337int jffs2_add_full_dnode_to_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f, struct jffs2_full_dnode *fn);
339void jffs2_truncate_fragtree (struct jffs2_sb_info *c, struct rb_root *list, uint32_t size); 338void jffs2_truncate_fragtree (struct jffs2_sb_info *c, struct rb_root *list, uint32_t size);
340int jffs2_add_older_frag_to_fragtree(struct jffs2_sb_info *c, struct jffs2_inode_info *f, struct jffs2_tmp_dnode_info *tn); 339int jffs2_add_older_frag_to_fragtree(struct jffs2_sb_info *c, struct jffs2_inode_info *f, struct jffs2_tmp_dnode_info *tn);
diff --git a/fs/jffs2/summary.c b/fs/jffs2/summary.c
index c19bd476e8ec..e52cef526d90 100644
--- a/fs/jffs2/summary.c
+++ b/fs/jffs2/summary.c
@@ -252,6 +252,11 @@ int jffs2_sum_add_kvec(struct jffs2_sb_info *c, const struct kvec *invecs,
252 union jffs2_node_union *node; 252 union jffs2_node_union *node;
253 struct jffs2_eraseblock *jeb; 253 struct jffs2_eraseblock *jeb;
254 254
255 if (c->summary->sum_size == JFFS2_SUMMARY_NOSUM_SIZE) {
256 dbg_summary("Summary is disabled for this jeb! Skipping summary info!\n");
257 return 0;
258 }
259
255 node = invecs[0].iov_base; 260 node = invecs[0].iov_base;
256 jeb = &c->blocks[ofs / c->sector_size]; 261 jeb = &c->blocks[ofs / c->sector_size];
257 ofs -= jeb->offset; 262 ofs -= jeb->offset;
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index 25bc1ae08648..4da09ce1d1f5 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -1215,7 +1215,6 @@ int jffs2_garbage_collect_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xatt
1215 rc = jffs2_reserve_space_gc(c, totlen, &length, JFFS2_SUMMARY_XATTR_SIZE); 1215 rc = jffs2_reserve_space_gc(c, totlen, &length, JFFS2_SUMMARY_XATTR_SIZE);
1216 if (rc) { 1216 if (rc) {
1217 JFFS2_WARNING("jffs2_reserve_space_gc()=%d, request=%u\n", rc, totlen); 1217 JFFS2_WARNING("jffs2_reserve_space_gc()=%d, request=%u\n", rc, totlen);
1218 rc = rc ? rc : -EBADFD;
1219 goto out; 1218 goto out;
1220 } 1219 }
1221 rc = save_xattr_datum(c, xd); 1220 rc = save_xattr_datum(c, xd);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index fecd3b095deb..76ca1cbc38f9 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -100,25 +100,6 @@ static inline int put_dreq(struct nfs_direct_req *dreq)
100 return atomic_dec_and_test(&dreq->io_count); 100 return atomic_dec_and_test(&dreq->io_count);
101} 101}
102 102
103/*
104 * "size" is never larger than rsize or wsize.
105 */
106static inline int nfs_direct_count_pages(unsigned long user_addr, size_t size)
107{
108 int page_count;
109
110 page_count = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT;
111 page_count -= user_addr >> PAGE_SHIFT;
112 BUG_ON(page_count < 0);
113
114 return page_count;
115}
116
117static inline unsigned int nfs_max_pages(unsigned int size)
118{
119 return (size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
120}
121
122/** 103/**
123 * nfs_direct_IO - NFS address space operation for direct I/O 104 * nfs_direct_IO - NFS address space operation for direct I/O
124 * @rw: direction (read or write) 105 * @rw: direction (read or write)
@@ -276,28 +257,24 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned lo
276 struct nfs_open_context *ctx = dreq->ctx; 257 struct nfs_open_context *ctx = dreq->ctx;
277 struct inode *inode = ctx->dentry->d_inode; 258 struct inode *inode = ctx->dentry->d_inode;
278 size_t rsize = NFS_SERVER(inode)->rsize; 259 size_t rsize = NFS_SERVER(inode)->rsize;
279 unsigned int rpages = nfs_max_pages(rsize);
280 unsigned int pgbase; 260 unsigned int pgbase;
281 int result; 261 int result;
282 ssize_t started = 0; 262 ssize_t started = 0;
283 263
284 get_dreq(dreq); 264 get_dreq(dreq);
285 265
286 pgbase = user_addr & ~PAGE_MASK;
287 do { 266 do {
288 struct nfs_read_data *data; 267 struct nfs_read_data *data;
289 size_t bytes; 268 size_t bytes;
290 269
270 pgbase = user_addr & ~PAGE_MASK;
271 bytes = min(rsize,count);
272
291 result = -ENOMEM; 273 result = -ENOMEM;
292 data = nfs_readdata_alloc(rpages); 274 data = nfs_readdata_alloc(pgbase + bytes);
293 if (unlikely(!data)) 275 if (unlikely(!data))
294 break; 276 break;
295 277
296 bytes = rsize;
297 if (count < rsize)
298 bytes = count;
299
300 data->npages = nfs_direct_count_pages(user_addr, bytes);
301 down_read(&current->mm->mmap_sem); 278 down_read(&current->mm->mmap_sem);
302 result = get_user_pages(current, current->mm, user_addr, 279 result = get_user_pages(current, current->mm, user_addr,
303 data->npages, 1, 0, data->pagevec, NULL); 280 data->npages, 1, 0, data->pagevec, NULL);
@@ -344,8 +321,10 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned lo
344 started += bytes; 321 started += bytes;
345 user_addr += bytes; 322 user_addr += bytes;
346 pos += bytes; 323 pos += bytes;
324 /* FIXME: Remove this unnecessary math from final patch */
347 pgbase += bytes; 325 pgbase += bytes;
348 pgbase &= ~PAGE_MASK; 326 pgbase &= ~PAGE_MASK;
327 BUG_ON(pgbase != (user_addr & ~PAGE_MASK));
349 328
350 count -= bytes; 329 count -= bytes;
351 } while (count != 0); 330 } while (count != 0);
@@ -524,7 +503,7 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode
524 503
525static void nfs_alloc_commit_data(struct nfs_direct_req *dreq) 504static void nfs_alloc_commit_data(struct nfs_direct_req *dreq)
526{ 505{
527 dreq->commit_data = nfs_commit_alloc(0); 506 dreq->commit_data = nfs_commit_alloc();
528 if (dreq->commit_data != NULL) 507 if (dreq->commit_data != NULL)
529 dreq->commit_data->req = (struct nfs_page *) dreq; 508 dreq->commit_data->req = (struct nfs_page *) dreq;
530} 509}
@@ -605,28 +584,24 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned l
605 struct nfs_open_context *ctx = dreq->ctx; 584 struct nfs_open_context *ctx = dreq->ctx;
606 struct inode *inode = ctx->dentry->d_inode; 585 struct inode *inode = ctx->dentry->d_inode;
607 size_t wsize = NFS_SERVER(inode)->wsize; 586 size_t wsize = NFS_SERVER(inode)->wsize;
608 unsigned int wpages = nfs_max_pages(wsize);
609 unsigned int pgbase; 587 unsigned int pgbase;
610 int result; 588 int result;
611 ssize_t started = 0; 589 ssize_t started = 0;
612 590
613 get_dreq(dreq); 591 get_dreq(dreq);
614 592
615 pgbase = user_addr & ~PAGE_MASK;
616 do { 593 do {
617 struct nfs_write_data *data; 594 struct nfs_write_data *data;
618 size_t bytes; 595 size_t bytes;
619 596
597 pgbase = user_addr & ~PAGE_MASK;
598 bytes = min(wsize,count);
599
620 result = -ENOMEM; 600 result = -ENOMEM;
621 data = nfs_writedata_alloc(wpages); 601 data = nfs_writedata_alloc(pgbase + bytes);
622 if (unlikely(!data)) 602 if (unlikely(!data))
623 break; 603 break;
624 604
625 bytes = wsize;
626 if (count < wsize)
627 bytes = count;
628
629 data->npages = nfs_direct_count_pages(user_addr, bytes);
630 down_read(&current->mm->mmap_sem); 605 down_read(&current->mm->mmap_sem);
631 result = get_user_pages(current, current->mm, user_addr, 606 result = get_user_pages(current, current->mm, user_addr,
632 data->npages, 0, 0, data->pagevec, NULL); 607 data->npages, 0, 0, data->pagevec, NULL);
@@ -676,8 +651,11 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned l
676 started += bytes; 651 started += bytes;
677 user_addr += bytes; 652 user_addr += bytes;
678 pos += bytes; 653 pos += bytes;
654
655 /* FIXME: Remove this useless math from the final patch */
679 pgbase += bytes; 656 pgbase += bytes;
680 pgbase &= ~PAGE_MASK; 657 pgbase &= ~PAGE_MASK;
658 BUG_ON(pgbase != (user_addr & ~PAGE_MASK));
681 659
682 count -= bytes; 660 count -= bytes;
683 } while (count != 0); 661 } while (count != 0);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 153898e1331f..b14145b7b87f 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -970,7 +970,7 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st
970 status = -ENOMEM; 970 status = -ENOMEM;
971 opendata = nfs4_opendata_alloc(dentry, sp, flags, sattr); 971 opendata = nfs4_opendata_alloc(dentry, sp, flags, sattr);
972 if (opendata == NULL) 972 if (opendata == NULL)
973 goto err_put_state_owner; 973 goto err_release_rwsem;
974 974
975 status = _nfs4_proc_open(opendata); 975 status = _nfs4_proc_open(opendata);
976 if (status != 0) 976 if (status != 0)
@@ -989,11 +989,11 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st
989 return 0; 989 return 0;
990err_opendata_free: 990err_opendata_free:
991 nfs4_opendata_free(opendata); 991 nfs4_opendata_free(opendata);
992err_release_rwsem:
993 up_read(&clp->cl_sem);
992err_put_state_owner: 994err_put_state_owner:
993 nfs4_put_state_owner(sp); 995 nfs4_put_state_owner(sp);
994out_err: 996out_err:
995 /* Note: clp->cl_sem must be released before nfs4_put_open_state()! */
996 up_read(&clp->cl_sem);
997 *res = NULL; 997 *res = NULL;
998 return status; 998 return status;
999} 999}
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index da9cf11c326f..f0aff824a291 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -43,13 +43,15 @@ static mempool_t *nfs_rdata_mempool;
43 43
44#define MIN_POOL_READ (32) 44#define MIN_POOL_READ (32)
45 45
46struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) 46struct nfs_read_data *nfs_readdata_alloc(size_t len)
47{ 47{
48 unsigned int pagecount = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
48 struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS); 49 struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS);
49 50
50 if (p) { 51 if (p) {
51 memset(p, 0, sizeof(*p)); 52 memset(p, 0, sizeof(*p));
52 INIT_LIST_HEAD(&p->pages); 53 INIT_LIST_HEAD(&p->pages);
54 p->npages = pagecount;
53 if (pagecount <= ARRAY_SIZE(p->page_array)) 55 if (pagecount <= ARRAY_SIZE(p->page_array))
54 p->pagevec = p->page_array; 56 p->pagevec = p->page_array;
55 else { 57 else {
@@ -140,7 +142,7 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode,
140 int result; 142 int result;
141 struct nfs_read_data *rdata; 143 struct nfs_read_data *rdata;
142 144
143 rdata = nfs_readdata_alloc(1); 145 rdata = nfs_readdata_alloc(count);
144 if (!rdata) 146 if (!rdata)
145 return -ENOMEM; 147 return -ENOMEM;
146 148
@@ -202,9 +204,11 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode,
202 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; 204 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME;
203 spin_unlock(&inode->i_lock); 205 spin_unlock(&inode->i_lock);
204 206
205 nfs_readpage_truncate_uninitialised_page(rdata); 207 if (rdata->res.eof || rdata->res.count == rdata->args.count) {
206 if (rdata->res.eof || rdata->res.count == rdata->args.count)
207 SetPageUptodate(page); 208 SetPageUptodate(page);
209 if (rdata->res.eof && count != 0)
210 memclear_highpage_flush(page, rdata->args.pgbase, count);
211 }
208 result = 0; 212 result = 0;
209 213
210io_error: 214io_error:
@@ -336,25 +340,25 @@ static int nfs_pagein_multi(struct list_head *head, struct inode *inode)
336 struct nfs_page *req = nfs_list_entry(head->next); 340 struct nfs_page *req = nfs_list_entry(head->next);
337 struct page *page = req->wb_page; 341 struct page *page = req->wb_page;
338 struct nfs_read_data *data; 342 struct nfs_read_data *data;
339 unsigned int rsize = NFS_SERVER(inode)->rsize; 343 size_t rsize = NFS_SERVER(inode)->rsize, nbytes;
340 unsigned int nbytes, offset; 344 unsigned int offset;
341 int requests = 0; 345 int requests = 0;
342 LIST_HEAD(list); 346 LIST_HEAD(list);
343 347
344 nfs_list_remove_request(req); 348 nfs_list_remove_request(req);
345 349
346 nbytes = req->wb_bytes; 350 nbytes = req->wb_bytes;
347 for(;;) { 351 do {
348 data = nfs_readdata_alloc(1); 352 size_t len = min(nbytes,rsize);
353
354 data = nfs_readdata_alloc(len);
349 if (!data) 355 if (!data)
350 goto out_bad; 356 goto out_bad;
351 INIT_LIST_HEAD(&data->pages); 357 INIT_LIST_HEAD(&data->pages);
352 list_add(&data->pages, &list); 358 list_add(&data->pages, &list);
353 requests++; 359 requests++;
354 if (nbytes <= rsize) 360 nbytes -= len;
355 break; 361 } while(nbytes != 0);
356 nbytes -= rsize;
357 }
358 atomic_set(&req->wb_complete, requests); 362 atomic_set(&req->wb_complete, requests);
359 363
360 ClearPageError(page); 364 ClearPageError(page);
@@ -402,7 +406,7 @@ static int nfs_pagein_one(struct list_head *head, struct inode *inode)
402 if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) 406 if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE)
403 return nfs_pagein_multi(head, inode); 407 return nfs_pagein_multi(head, inode);
404 408
405 data = nfs_readdata_alloc(NFS_SERVER(inode)->rpages); 409 data = nfs_readdata_alloc(NFS_SERVER(inode)->rsize);
406 if (!data) 410 if (!data)
407 goto out_bad; 411 goto out_bad;
408 412
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 50774991f8d5..7084ac9a6455 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -90,22 +90,13 @@ static mempool_t *nfs_commit_mempool;
90 90
91static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion); 91static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion);
92 92
93struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount) 93struct nfs_write_data *nfs_commit_alloc(void)
94{ 94{
95 struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS); 95 struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS);
96 96
97 if (p) { 97 if (p) {
98 memset(p, 0, sizeof(*p)); 98 memset(p, 0, sizeof(*p));
99 INIT_LIST_HEAD(&p->pages); 99 INIT_LIST_HEAD(&p->pages);
100 if (pagecount <= ARRAY_SIZE(p->page_array))
101 p->pagevec = p->page_array;
102 else {
103 p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS);
104 if (!p->pagevec) {
105 mempool_free(p, nfs_commit_mempool);
106 p = NULL;
107 }
108 }
109 } 100 }
110 return p; 101 return p;
111} 102}
@@ -117,13 +108,15 @@ void nfs_commit_free(struct nfs_write_data *p)
117 mempool_free(p, nfs_commit_mempool); 108 mempool_free(p, nfs_commit_mempool);
118} 109}
119 110
120struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) 111struct nfs_write_data *nfs_writedata_alloc(size_t len)
121{ 112{
113 unsigned int pagecount = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
122 struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS); 114 struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS);
123 115
124 if (p) { 116 if (p) {
125 memset(p, 0, sizeof(*p)); 117 memset(p, 0, sizeof(*p));
126 INIT_LIST_HEAD(&p->pages); 118 INIT_LIST_HEAD(&p->pages);
119 p->npages = pagecount;
127 if (pagecount <= ARRAY_SIZE(p->page_array)) 120 if (pagecount <= ARRAY_SIZE(p->page_array))
128 p->pagevec = p->page_array; 121 p->pagevec = p->page_array;
129 else { 122 else {
@@ -208,7 +201,7 @@ static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode,
208 int result, written = 0; 201 int result, written = 0;
209 struct nfs_write_data *wdata; 202 struct nfs_write_data *wdata;
210 203
211 wdata = nfs_writedata_alloc(1); 204 wdata = nfs_writedata_alloc(wsize);
212 if (!wdata) 205 if (!wdata)
213 return -ENOMEM; 206 return -ENOMEM;
214 207
@@ -597,8 +590,8 @@ static void nfs_cancel_commit_list(struct list_head *head)
597 req = nfs_list_entry(head->next); 590 req = nfs_list_entry(head->next);
598 nfs_list_remove_request(req); 591 nfs_list_remove_request(req);
599 nfs_inode_remove_request(req); 592 nfs_inode_remove_request(req);
600 nfs_clear_page_writeback(req);
601 dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); 593 dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
594 nfs_clear_page_writeback(req);
602 } 595 }
603} 596}
604 597
@@ -999,24 +992,24 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, int how)
999 struct nfs_page *req = nfs_list_entry(head->next); 992 struct nfs_page *req = nfs_list_entry(head->next);
1000 struct page *page = req->wb_page; 993 struct page *page = req->wb_page;
1001 struct nfs_write_data *data; 994 struct nfs_write_data *data;
1002 unsigned int wsize = NFS_SERVER(inode)->wsize; 995 size_t wsize = NFS_SERVER(inode)->wsize, nbytes;
1003 unsigned int nbytes, offset; 996 unsigned int offset;
1004 int requests = 0; 997 int requests = 0;
1005 LIST_HEAD(list); 998 LIST_HEAD(list);
1006 999
1007 nfs_list_remove_request(req); 1000 nfs_list_remove_request(req);
1008 1001
1009 nbytes = req->wb_bytes; 1002 nbytes = req->wb_bytes;
1010 for (;;) { 1003 do {
1011 data = nfs_writedata_alloc(1); 1004 size_t len = min(nbytes, wsize);
1005
1006 data = nfs_writedata_alloc(len);
1012 if (!data) 1007 if (!data)
1013 goto out_bad; 1008 goto out_bad;
1014 list_add(&data->pages, &list); 1009 list_add(&data->pages, &list);
1015 requests++; 1010 requests++;
1016 if (nbytes <= wsize) 1011 nbytes -= len;
1017 break; 1012 } while (nbytes != 0);
1018 nbytes -= wsize;
1019 }
1020 atomic_set(&req->wb_complete, requests); 1013 atomic_set(&req->wb_complete, requests);
1021 1014
1022 ClearPageError(page); 1015 ClearPageError(page);
@@ -1070,7 +1063,7 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, int how)
1070 struct nfs_write_data *data; 1063 struct nfs_write_data *data;
1071 unsigned int count; 1064 unsigned int count;
1072 1065
1073 data = nfs_writedata_alloc(NFS_SERVER(inode)->wpages); 1066 data = nfs_writedata_alloc(NFS_SERVER(inode)->wsize);
1074 if (!data) 1067 if (!data)
1075 goto out_bad; 1068 goto out_bad;
1076 1069
@@ -1378,7 +1371,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
1378 struct nfs_write_data *data; 1371 struct nfs_write_data *data;
1379 struct nfs_page *req; 1372 struct nfs_page *req;
1380 1373
1381 data = nfs_commit_alloc(NFS_SERVER(inode)->wpages); 1374 data = nfs_commit_alloc();
1382 1375
1383 if (!data) 1376 if (!data)
1384 goto out_bad; 1377 goto out_bad;
@@ -1393,8 +1386,8 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
1393 req = nfs_list_entry(head->next); 1386 req = nfs_list_entry(head->next);
1394 nfs_list_remove_request(req); 1387 nfs_list_remove_request(req);
1395 nfs_mark_request_commit(req); 1388 nfs_mark_request_commit(req);
1396 nfs_clear_page_writeback(req);
1397 dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); 1389 dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
1390 nfs_clear_page_writeback(req);
1398 } 1391 }
1399 return -ENOMEM; 1392 return -ENOMEM;
1400} 1393}
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 06da7506363c..e35d7e52fdeb 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -33,7 +33,7 @@
33* 33*
34*/ 34*/
35 35
36 36#include <linux/err.h>
37#include <linux/sunrpc/svc.h> 37#include <linux/sunrpc/svc.h>
38#include <linux/nfsd/nfsd.h> 38#include <linux/nfsd/nfsd.h>
39#include <linux/nfs4.h> 39#include <linux/nfs4.h>
@@ -87,34 +87,35 @@ int
87nfs4_make_rec_clidname(char *dname, struct xdr_netobj *clname) 87nfs4_make_rec_clidname(char *dname, struct xdr_netobj *clname)
88{ 88{
89 struct xdr_netobj cksum; 89 struct xdr_netobj cksum;
90 struct crypto_tfm *tfm; 90 struct hash_desc desc;
91 struct scatterlist sg[1]; 91 struct scatterlist sg[1];
92 int status = nfserr_resource; 92 int status = nfserr_resource;
93 93
94 dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n", 94 dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n",
95 clname->len, clname->data); 95 clname->len, clname->data);
96 tfm = crypto_alloc_tfm("md5", CRYPTO_TFM_REQ_MAY_SLEEP); 96 desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
97 if (tfm == NULL) 97 desc.tfm = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
98 goto out; 98 if (IS_ERR(desc.tfm))
99 cksum.len = crypto_tfm_alg_digestsize(tfm); 99 goto out_no_tfm;
100 cksum.len = crypto_hash_digestsize(desc.tfm);
100 cksum.data = kmalloc(cksum.len, GFP_KERNEL); 101 cksum.data = kmalloc(cksum.len, GFP_KERNEL);
101 if (cksum.data == NULL) 102 if (cksum.data == NULL)
102 goto out; 103 goto out;
103 crypto_digest_init(tfm);
104 104
105 sg[0].page = virt_to_page(clname->data); 105 sg[0].page = virt_to_page(clname->data);
106 sg[0].offset = offset_in_page(clname->data); 106 sg[0].offset = offset_in_page(clname->data);
107 sg[0].length = clname->len; 107 sg[0].length = clname->len;
108 108
109 crypto_digest_update(tfm, sg, 1); 109 if (crypto_hash_digest(&desc, sg, sg->length, cksum.data))
110 crypto_digest_final(tfm, cksum.data); 110 goto out;
111 111
112 md5_to_hex(dname, cksum.data); 112 md5_to_hex(dname, cksum.data);
113 113
114 kfree(cksum.data); 114 kfree(cksum.data);
115 status = nfs_ok; 115 status = nfs_ok;
116out: 116out:
117 crypto_free_tfm(tfm); 117 crypto_free_hash(desc.tfm);
118out_no_tfm:
118 return status; 119 return status;
119} 120}
120 121
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile
index 7d3be845a614..9fb8132f19b0 100644
--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile
@@ -16,6 +16,7 @@ ocfs2-objs := \
16 file.o \ 16 file.o \
17 heartbeat.o \ 17 heartbeat.o \
18 inode.o \ 18 inode.o \
19 ioctl.o \
19 journal.o \ 20 journal.o \
20 localalloc.o \ 21 localalloc.o \
21 mmap.o \ 22 mmap.o \
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index edaab05a93e0..f43bc5f18a35 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -1717,17 +1717,29 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
1717 1717
1718 ocfs2_remove_from_cache(inode, eb_bh); 1718 ocfs2_remove_from_cache(inode, eb_bh);
1719 1719
1720 BUG_ON(eb->h_suballoc_slot);
1721 BUG_ON(el->l_recs[0].e_clusters); 1720 BUG_ON(el->l_recs[0].e_clusters);
1722 BUG_ON(el->l_recs[0].e_cpos); 1721 BUG_ON(el->l_recs[0].e_cpos);
1723 BUG_ON(el->l_recs[0].e_blkno); 1722 BUG_ON(el->l_recs[0].e_blkno);
1724 status = ocfs2_free_extent_block(handle, 1723 if (eb->h_suballoc_slot == 0) {
1725 tc->tc_ext_alloc_inode, 1724 /*
1726 tc->tc_ext_alloc_bh, 1725 * This code only understands how to
1727 eb); 1726 * lock the suballocator in slot 0,
1728 if (status < 0) { 1727 * which is fine because allocation is
1729 mlog_errno(status); 1728 * only ever done out of that
1730 goto bail; 1729 * suballocator too. A future version
1730 * might change that however, so avoid
1731 * a free if we don't know how to
1732 * handle it. This way an fs incompat
1733 * bit will not be necessary.
1734 */
1735 status = ocfs2_free_extent_block(handle,
1736 tc->tc_ext_alloc_inode,
1737 tc->tc_ext_alloc_bh,
1738 eb);
1739 if (status < 0) {
1740 mlog_errno(status);
1741 goto bail;
1742 }
1731 } 1743 }
1732 } 1744 }
1733 brelse(eb_bh); 1745 brelse(eb_bh);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index f1d1c342ce01..3d7c082a8f58 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -391,31 +391,28 @@ out:
391static int ocfs2_commit_write(struct file *file, struct page *page, 391static int ocfs2_commit_write(struct file *file, struct page *page,
392 unsigned from, unsigned to) 392 unsigned from, unsigned to)
393{ 393{
394 int ret, extending = 0, locklevel = 0; 394 int ret;
395 loff_t new_i_size;
396 struct buffer_head *di_bh = NULL; 395 struct buffer_head *di_bh = NULL;
397 struct inode *inode = page->mapping->host; 396 struct inode *inode = page->mapping->host;
398 struct ocfs2_journal_handle *handle = NULL; 397 struct ocfs2_journal_handle *handle = NULL;
398 struct ocfs2_dinode *di;
399 399
400 mlog_entry("(0x%p, 0x%p, %u, %u)\n", file, page, from, to); 400 mlog_entry("(0x%p, 0x%p, %u, %u)\n", file, page, from, to);
401 401
402 /* NOTE: ocfs2_file_aio_write has ensured that it's safe for 402 /* NOTE: ocfs2_file_aio_write has ensured that it's safe for
403 * us to sample inode->i_size here without the metadata lock: 403 * us to continue here without rechecking the I/O against
404 * changed inode values.
404 * 405 *
405 * 1) We're currently holding the inode alloc lock, so no 406 * 1) We're currently holding the inode alloc lock, so no
406 * nodes can change it underneath us. 407 * nodes can change it underneath us.
407 * 408 *
408 * 2) We've had to take the metadata lock at least once 409 * 2) We've had to take the metadata lock at least once
409 * already to check for extending writes, hence insuring 410 * already to check for extending writes, suid removal, etc.
410 * that our current copy is also up to date. 411 * The meta data update code then ensures that we don't get a
412 * stale inode allocation image (i_size, i_clusters, etc).
411 */ 413 */
412 new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
413 if (new_i_size > i_size_read(inode)) {
414 extending = 1;
415 locklevel = 1;
416 }
417 414
418 ret = ocfs2_meta_lock_with_page(inode, NULL, &di_bh, locklevel, page); 415 ret = ocfs2_meta_lock_with_page(inode, NULL, &di_bh, 1, page);
419 if (ret != 0) { 416 if (ret != 0) {
420 mlog_errno(ret); 417 mlog_errno(ret);
421 goto out; 418 goto out;
@@ -427,23 +424,20 @@ static int ocfs2_commit_write(struct file *file, struct page *page,
427 goto out_unlock_meta; 424 goto out_unlock_meta;
428 } 425 }
429 426
430 if (extending) { 427 handle = ocfs2_start_walk_page_trans(inode, page, from, to);
431 handle = ocfs2_start_walk_page_trans(inode, page, from, to); 428 if (IS_ERR(handle)) {
432 if (IS_ERR(handle)) { 429 ret = PTR_ERR(handle);
433 ret = PTR_ERR(handle); 430 goto out_unlock_data;
434 handle = NULL; 431 }
435 goto out_unlock_data;
436 }
437 432
438 /* Mark our buffer early. We'd rather catch this error up here 433 /* Mark our buffer early. We'd rather catch this error up here
439 * as opposed to after a successful commit_write which would 434 * as opposed to after a successful commit_write which would
440 * require us to set back inode->i_size. */ 435 * require us to set back inode->i_size. */
441 ret = ocfs2_journal_access(handle, inode, di_bh, 436 ret = ocfs2_journal_access(handle, inode, di_bh,
442 OCFS2_JOURNAL_ACCESS_WRITE); 437 OCFS2_JOURNAL_ACCESS_WRITE);
443 if (ret < 0) { 438 if (ret < 0) {
444 mlog_errno(ret); 439 mlog_errno(ret);
445 goto out_commit; 440 goto out_commit;
446 }
447 } 441 }
448 442
449 /* might update i_size */ 443 /* might update i_size */
@@ -453,37 +447,28 @@ static int ocfs2_commit_write(struct file *file, struct page *page,
453 goto out_commit; 447 goto out_commit;
454 } 448 }
455 449
456 if (extending) { 450 di = (struct ocfs2_dinode *)di_bh->b_data;
457 loff_t size = (u64) i_size_read(inode);
458 struct ocfs2_dinode *di =
459 (struct ocfs2_dinode *)di_bh->b_data;
460 451
461 /* ocfs2_mark_inode_dirty is too heavy to use here. */ 452 /* ocfs2_mark_inode_dirty() is too heavy to use here. */
462 inode->i_blocks = ocfs2_align_bytes_to_sectors(size); 453 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
463 inode->i_ctime = inode->i_mtime = CURRENT_TIME; 454 di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec);
455 di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
464 456
465 di->i_size = cpu_to_le64(size); 457 inode->i_blocks = ocfs2_align_bytes_to_sectors((u64)(i_size_read(inode)));
466 di->i_ctime = di->i_mtime = 458 di->i_size = cpu_to_le64((u64)i_size_read(inode));
467 cpu_to_le64(inode->i_mtime.tv_sec);
468 di->i_ctime_nsec = di->i_mtime_nsec =
469 cpu_to_le32(inode->i_mtime.tv_nsec);
470 459
471 ret = ocfs2_journal_dirty(handle, di_bh); 460 ret = ocfs2_journal_dirty(handle, di_bh);
472 if (ret < 0) { 461 if (ret < 0) {
473 mlog_errno(ret); 462 mlog_errno(ret);
474 goto out_commit; 463 goto out_commit;
475 }
476 } 464 }
477 465
478 BUG_ON(extending && (i_size_read(inode) != new_i_size));
479
480out_commit: 466out_commit:
481 if (handle) 467 ocfs2_commit_trans(handle);
482 ocfs2_commit_trans(handle);
483out_unlock_data: 468out_unlock_data:
484 ocfs2_data_unlock(inode, 1); 469 ocfs2_data_unlock(inode, 1);
485out_unlock_meta: 470out_unlock_meta:
486 ocfs2_meta_unlock(inode, locklevel); 471 ocfs2_meta_unlock(inode, 1);
487out: 472out:
488 if (di_bh) 473 if (di_bh)
489 brelse(di_bh); 474 brelse(di_bh);
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index 9a24adf9be6e..c9037414f4f6 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -100,6 +100,9 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
100 mlog_entry("(block=(%llu), nr=(%d), flags=%d, inode=%p)\n", 100 mlog_entry("(block=(%llu), nr=(%d), flags=%d, inode=%p)\n",
101 (unsigned long long)block, nr, flags, inode); 101 (unsigned long long)block, nr, flags, inode);
102 102
103 BUG_ON((flags & OCFS2_BH_READAHEAD) &&
104 (!inode || !(flags & OCFS2_BH_CACHED)));
105
103 if (osb == NULL || osb->sb == NULL || bhs == NULL) { 106 if (osb == NULL || osb->sb == NULL || bhs == NULL) {
104 status = -EINVAL; 107 status = -EINVAL;
105 mlog_errno(status); 108 mlog_errno(status);
@@ -140,6 +143,30 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
140 bh = bhs[i]; 143 bh = bhs[i];
141 ignore_cache = 0; 144 ignore_cache = 0;
142 145
146 /* There are three read-ahead cases here which we need to
147 * be concerned with. All three assume a buffer has
148 * previously been submitted with OCFS2_BH_READAHEAD
149 * and it hasn't yet completed I/O.
150 *
151 * 1) The current request is sync to disk. This rarely
152 * happens these days, and never when performance
153 * matters - the code can just wait on the buffer
154 * lock and re-submit.
155 *
156 * 2) The current request is cached, but not
157 * readahead. ocfs2_buffer_uptodate() will return
158 * false anyway, so we'll wind up waiting on the
159 * buffer lock to do I/O. We re-check the request
160 * with after getting the lock to avoid a re-submit.
161 *
162 * 3) The current request is readahead (and so must
163 * also be a caching one). We short circuit if the
164 * buffer is locked (under I/O) and if it's in the
165 * uptodate cache. The re-check from #2 catches the
166 * case that the previous read-ahead completes just
167 * before our is-it-in-flight check.
168 */
169
143 if (flags & OCFS2_BH_CACHED && 170 if (flags & OCFS2_BH_CACHED &&
144 !ocfs2_buffer_uptodate(inode, bh)) { 171 !ocfs2_buffer_uptodate(inode, bh)) {
145 mlog(ML_UPTODATE, 172 mlog(ML_UPTODATE,
@@ -169,6 +196,14 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
169 continue; 196 continue;
170 } 197 }
171 198
199 /* A read-ahead request was made - if the
200 * buffer is already under read-ahead from a
201 * previously submitted request than we are
202 * done here. */
203 if ((flags & OCFS2_BH_READAHEAD)
204 && ocfs2_buffer_read_ahead(inode, bh))
205 continue;
206
172 lock_buffer(bh); 207 lock_buffer(bh);
173 if (buffer_jbd(bh)) { 208 if (buffer_jbd(bh)) {
174#ifdef CATCH_BH_JBD_RACES 209#ifdef CATCH_BH_JBD_RACES
@@ -181,13 +216,22 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
181 continue; 216 continue;
182#endif 217#endif
183 } 218 }
219
220 /* Re-check ocfs2_buffer_uptodate() as a
221 * previously read-ahead buffer may have
222 * completed I/O while we were waiting for the
223 * buffer lock. */
224 if ((flags & OCFS2_BH_CACHED)
225 && !(flags & OCFS2_BH_READAHEAD)
226 && ocfs2_buffer_uptodate(inode, bh)) {
227 unlock_buffer(bh);
228 continue;
229 }
230
184 clear_buffer_uptodate(bh); 231 clear_buffer_uptodate(bh);
185 get_bh(bh); /* for end_buffer_read_sync() */ 232 get_bh(bh); /* for end_buffer_read_sync() */
186 bh->b_end_io = end_buffer_read_sync; 233 bh->b_end_io = end_buffer_read_sync;
187 if (flags & OCFS2_BH_READAHEAD) 234 submit_bh(READ, bh);
188 submit_bh(READA, bh);
189 else
190 submit_bh(READ, bh);
191 continue; 235 continue;
192 } 236 }
193 } 237 }
@@ -197,34 +241,39 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
197 for (i = (nr - 1); i >= 0; i--) { 241 for (i = (nr - 1); i >= 0; i--) {
198 bh = bhs[i]; 242 bh = bhs[i];
199 243
200 /* We know this can't have changed as we hold the 244 if (!(flags & OCFS2_BH_READAHEAD)) {
201 * inode sem. Avoid doing any work on the bh if the 245 /* We know this can't have changed as we hold the
202 * journal has it. */ 246 * inode sem. Avoid doing any work on the bh if the
203 if (!buffer_jbd(bh)) 247 * journal has it. */
204 wait_on_buffer(bh); 248 if (!buffer_jbd(bh))
205 249 wait_on_buffer(bh);
206 if (!buffer_uptodate(bh)) { 250
207 /* Status won't be cleared from here on out, 251 if (!buffer_uptodate(bh)) {
208 * so we can safely record this and loop back 252 /* Status won't be cleared from here on out,
209 * to cleanup the other buffers. Don't need to 253 * so we can safely record this and loop back
210 * remove the clustered uptodate information 254 * to cleanup the other buffers. Don't need to
211 * for this bh as it's not marked locally 255 * remove the clustered uptodate information
212 * uptodate. */ 256 * for this bh as it's not marked locally
213 status = -EIO; 257 * uptodate. */
214 brelse(bh); 258 status = -EIO;
215 bhs[i] = NULL; 259 brelse(bh);
216 continue; 260 bhs[i] = NULL;
261 continue;
262 }
217 } 263 }
218 264
265 /* Always set the buffer in the cache, even if it was
266 * a forced read, or read-ahead which hasn't yet
267 * completed. */
219 if (inode) 268 if (inode)
220 ocfs2_set_buffer_uptodate(inode, bh); 269 ocfs2_set_buffer_uptodate(inode, bh);
221 } 270 }
222 if (inode) 271 if (inode)
223 mutex_unlock(&OCFS2_I(inode)->ip_io_mutex); 272 mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
224 273
225 mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s\n", 274 mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n",
226 (unsigned long long)block, nr, 275 (unsigned long long)block, nr,
227 (!(flags & OCFS2_BH_CACHED) || ignore_cache) ? "no" : "yes"); 276 (!(flags & OCFS2_BH_CACHED) || ignore_cache) ? "no" : "yes", flags);
228 277
229bail: 278bail:
230 279
diff --git a/fs/ocfs2/buffer_head_io.h b/fs/ocfs2/buffer_head_io.h
index 6ecb90937b68..6cc20930fac3 100644
--- a/fs/ocfs2/buffer_head_io.h
+++ b/fs/ocfs2/buffer_head_io.h
@@ -49,7 +49,7 @@ int ocfs2_read_blocks(struct ocfs2_super *osb,
49 49
50 50
51#define OCFS2_BH_CACHED 1 51#define OCFS2_BH_CACHED 1
52#define OCFS2_BH_READAHEAD 8 /* use this to pass READA down to submit_bh */ 52#define OCFS2_BH_READAHEAD 8
53 53
54static inline int ocfs2_read_block(struct ocfs2_super * osb, u64 off, 54static inline int ocfs2_read_block(struct ocfs2_super * osb, u64 off,
55 struct buffer_head **bh, int flags, 55 struct buffer_head **bh, int flags,
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 504595d6cf65..305cba3681fe 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -320,8 +320,12 @@ static int compute_max_sectors(struct block_device *bdev)
320 max_pages = q->max_hw_segments; 320 max_pages = q->max_hw_segments;
321 max_pages--; /* Handle I/Os that straddle a page */ 321 max_pages--; /* Handle I/Os that straddle a page */
322 322
323 max_sectors = max_pages << (PAGE_SHIFT - 9); 323 if (max_pages) {
324 324 max_sectors = max_pages << (PAGE_SHIFT - 9);
325 } else {
326 /* If BIO contains 1 or less than 1 page. */
327 max_sectors = q->max_sectors;
328 }
325 /* Why is fls() 1-based???? */ 329 /* Why is fls() 1-based???? */
326 pow_two_sectors = 1 << (fls(max_sectors) - 1); 330 pow_two_sectors = 1 << (fls(max_sectors) - 1);
327 331
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 3d494d1a5f36..04e01915b86e 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -74,14 +74,14 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
74int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir) 74int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
75{ 75{
76 int error = 0; 76 int error = 0;
77 unsigned long offset, blk; 77 unsigned long offset, blk, last_ra_blk = 0;
78 int i, num, stored; 78 int i, stored;
79 struct buffer_head * bh, * tmp; 79 struct buffer_head * bh, * tmp;
80 struct ocfs2_dir_entry * de; 80 struct ocfs2_dir_entry * de;
81 int err; 81 int err;
82 struct inode *inode = filp->f_dentry->d_inode; 82 struct inode *inode = filp->f_dentry->d_inode;
83 struct super_block * sb = inode->i_sb; 83 struct super_block * sb = inode->i_sb;
84 int have_disk_lock = 0; 84 unsigned int ra_sectors = 16;
85 85
86 mlog_entry("dirino=%llu\n", 86 mlog_entry("dirino=%llu\n",
87 (unsigned long long)OCFS2_I(inode)->ip_blkno); 87 (unsigned long long)OCFS2_I(inode)->ip_blkno);
@@ -95,9 +95,8 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
95 mlog_errno(error); 95 mlog_errno(error);
96 /* we haven't got any yet, so propagate the error. */ 96 /* we haven't got any yet, so propagate the error. */
97 stored = error; 97 stored = error;
98 goto bail; 98 goto bail_nolock;
99 } 99 }
100 have_disk_lock = 1;
101 100
102 offset = filp->f_pos & (sb->s_blocksize - 1); 101 offset = filp->f_pos & (sb->s_blocksize - 1);
103 102
@@ -113,16 +112,21 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
113 continue; 112 continue;
114 } 113 }
115 114
116 /* 115 /* The idea here is to begin with 8k read-ahead and to stay
117 * Do the readahead (8k) 116 * 4k ahead of our current position.
118 */ 117 *
119 if (!offset) { 118 * TODO: Use the pagecache for this. We just need to
120 for (i = 16 >> (sb->s_blocksize_bits - 9), num = 0; 119 * make sure it's cluster-safe... */
120 if (!last_ra_blk
121 || (((last_ra_blk - blk) << 9) <= (ra_sectors / 2))) {
122 for (i = ra_sectors >> (sb->s_blocksize_bits - 9);
121 i > 0; i--) { 123 i > 0; i--) {
122 tmp = ocfs2_bread(inode, ++blk, &err, 1); 124 tmp = ocfs2_bread(inode, ++blk, &err, 1);
123 if (tmp) 125 if (tmp)
124 brelse(tmp); 126 brelse(tmp);
125 } 127 }
128 last_ra_blk = blk;
129 ra_sectors = 8;
126 } 130 }
127 131
128revalidate: 132revalidate:
@@ -194,9 +198,9 @@ revalidate:
194 198
195 stored = 0; 199 stored = 0;
196bail: 200bail:
197 if (have_disk_lock) 201 ocfs2_meta_unlock(inode, 0);
198 ocfs2_meta_unlock(inode, 0);
199 202
203bail_nolock:
200 mlog_exit(stored); 204 mlog_exit(stored);
201 205
202 return stored; 206 return stored;
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index 42775e2bbe2c..f13a4bac41f0 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -367,12 +367,10 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data)
367 goto do_ast; 367 goto do_ast;
368 } 368 }
369 369
370 mlog(ML_ERROR, "got %sast for unknown lock! cookie=%u:%llu, " 370 mlog(0, "got %sast for unknown lock! cookie=%u:%llu, "
371 "name=%.*s, namelen=%u\n", 371 "name=%.*s, namelen=%u\n", past->type == DLM_AST ? "" : "b",
372 past->type == DLM_AST ? "" : "b", 372 dlm_get_lock_cookie_node(cookie), dlm_get_lock_cookie_seq(cookie),
373 dlm_get_lock_cookie_node(cookie), 373 locklen, name, locklen);
374 dlm_get_lock_cookie_seq(cookie),
375 locklen, name, locklen);
376 374
377 ret = DLM_NORMAL; 375 ret = DLM_NORMAL;
378unlock_out: 376unlock_out:
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 762eb1fbb34d..151b41781eab 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -1330,6 +1330,7 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
1330 cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime)); 1330 cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime));
1331 lvb->lvb_imtime_packed = 1331 lvb->lvb_imtime_packed =
1332 cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime)); 1332 cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime));
1333 lvb->lvb_iattr = cpu_to_be32(oi->ip_attr);
1333 1334
1334 mlog_meta_lvb(0, lockres); 1335 mlog_meta_lvb(0, lockres);
1335 1336
@@ -1360,6 +1361,9 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
1360 oi->ip_clusters = be32_to_cpu(lvb->lvb_iclusters); 1361 oi->ip_clusters = be32_to_cpu(lvb->lvb_iclusters);
1361 i_size_write(inode, be64_to_cpu(lvb->lvb_isize)); 1362 i_size_write(inode, be64_to_cpu(lvb->lvb_isize));
1362 1363
1364 oi->ip_attr = be32_to_cpu(lvb->lvb_iattr);
1365 ocfs2_set_inode_flags(inode);
1366
1363 /* fast-symlinks are a special case */ 1367 /* fast-symlinks are a special case */
1364 if (S_ISLNK(inode->i_mode) && !oi->ip_clusters) 1368 if (S_ISLNK(inode->i_mode) && !oi->ip_clusters)
1365 inode->i_blocks = 0; 1369 inode->i_blocks = 0;
@@ -2899,8 +2903,9 @@ void ocfs2_dump_meta_lvb_info(u64 level,
2899 be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid), 2903 be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid),
2900 be16_to_cpu(lvb->lvb_imode)); 2904 be16_to_cpu(lvb->lvb_imode));
2901 mlog(level, "nlink %u, atime_packed 0x%llx, ctime_packed 0x%llx, " 2905 mlog(level, "nlink %u, atime_packed 0x%llx, ctime_packed 0x%llx, "
2902 "mtime_packed 0x%llx\n", be16_to_cpu(lvb->lvb_inlink), 2906 "mtime_packed 0x%llx iattr 0x%x\n", be16_to_cpu(lvb->lvb_inlink),
2903 (long long)be64_to_cpu(lvb->lvb_iatime_packed), 2907 (long long)be64_to_cpu(lvb->lvb_iatime_packed),
2904 (long long)be64_to_cpu(lvb->lvb_ictime_packed), 2908 (long long)be64_to_cpu(lvb->lvb_ictime_packed),
2905 (long long)be64_to_cpu(lvb->lvb_imtime_packed)); 2909 (long long)be64_to_cpu(lvb->lvb_imtime_packed),
2910 be32_to_cpu(lvb->lvb_iattr));
2906} 2911}
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index 8f2d1db2d9ea..243ae862ece5 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -27,7 +27,7 @@
27#ifndef DLMGLUE_H 27#ifndef DLMGLUE_H
28#define DLMGLUE_H 28#define DLMGLUE_H
29 29
30#define OCFS2_LVB_VERSION 2 30#define OCFS2_LVB_VERSION 3
31 31
32struct ocfs2_meta_lvb { 32struct ocfs2_meta_lvb {
33 __be32 lvb_version; 33 __be32 lvb_version;
@@ -40,7 +40,8 @@ struct ocfs2_meta_lvb {
40 __be64 lvb_isize; 40 __be64 lvb_isize;
41 __be16 lvb_imode; 41 __be16 lvb_imode;
42 __be16 lvb_inlink; 42 __be16 lvb_inlink;
43 __be32 lvb_reserved[3]; 43 __be32 lvb_iattr;
44 __be32 lvb_reserved[2];
44}; 45};
45 46
46/* ocfs2_meta_lock_full() and ocfs2_data_lock_full() 'arg_flags' flags */ 47/* ocfs2_meta_lock_full() and ocfs2_data_lock_full() 'arg_flags' flags */
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index a9559c874530..2bbfa17090cf 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -44,6 +44,7 @@
44#include "file.h" 44#include "file.h"
45#include "sysfile.h" 45#include "sysfile.h"
46#include "inode.h" 46#include "inode.h"
47#include "ioctl.h"
47#include "journal.h" 48#include "journal.h"
48#include "mmap.h" 49#include "mmap.h"
49#include "suballoc.h" 50#include "suballoc.h"
@@ -1227,10 +1228,12 @@ const struct file_operations ocfs2_fops = {
1227 .open = ocfs2_file_open, 1228 .open = ocfs2_file_open,
1228 .aio_read = ocfs2_file_aio_read, 1229 .aio_read = ocfs2_file_aio_read,
1229 .aio_write = ocfs2_file_aio_write, 1230 .aio_write = ocfs2_file_aio_write,
1231 .ioctl = ocfs2_ioctl,
1230}; 1232};
1231 1233
1232const struct file_operations ocfs2_dops = { 1234const struct file_operations ocfs2_dops = {
1233 .read = generic_read_dir, 1235 .read = generic_read_dir,
1234 .readdir = ocfs2_readdir, 1236 .readdir = ocfs2_readdir,
1235 .fsync = ocfs2_sync_file, 1237 .fsync = ocfs2_sync_file,
1238 .ioctl = ocfs2_ioctl,
1236}; 1239};
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 327a5b7b86ed..7bcf69154592 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -71,6 +71,26 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
71 struct inode *inode, 71 struct inode *inode,
72 struct buffer_head *fe_bh); 72 struct buffer_head *fe_bh);
73 73
74void ocfs2_set_inode_flags(struct inode *inode)
75{
76 unsigned int flags = OCFS2_I(inode)->ip_attr;
77
78 inode->i_flags &= ~(S_IMMUTABLE |
79 S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
80
81 if (flags & OCFS2_IMMUTABLE_FL)
82 inode->i_flags |= S_IMMUTABLE;
83
84 if (flags & OCFS2_SYNC_FL)
85 inode->i_flags |= S_SYNC;
86 if (flags & OCFS2_APPEND_FL)
87 inode->i_flags |= S_APPEND;
88 if (flags & OCFS2_NOATIME_FL)
89 inode->i_flags |= S_NOATIME;
90 if (flags & OCFS2_DIRSYNC_FL)
91 inode->i_flags |= S_DIRSYNC;
92}
93
74struct inode *ocfs2_ilookup_for_vote(struct ocfs2_super *osb, 94struct inode *ocfs2_ilookup_for_vote(struct ocfs2_super *osb,
75 u64 blkno, 95 u64 blkno,
76 int delete_vote) 96 int delete_vote)
@@ -260,7 +280,6 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
260 inode->i_blocks = 280 inode->i_blocks =
261 ocfs2_align_bytes_to_sectors(le64_to_cpu(fe->i_size)); 281 ocfs2_align_bytes_to_sectors(le64_to_cpu(fe->i_size));
262 inode->i_mapping->a_ops = &ocfs2_aops; 282 inode->i_mapping->a_ops = &ocfs2_aops;
263 inode->i_flags |= S_NOATIME;
264 inode->i_atime.tv_sec = le64_to_cpu(fe->i_atime); 283 inode->i_atime.tv_sec = le64_to_cpu(fe->i_atime);
265 inode->i_atime.tv_nsec = le32_to_cpu(fe->i_atime_nsec); 284 inode->i_atime.tv_nsec = le32_to_cpu(fe->i_atime_nsec);
266 inode->i_mtime.tv_sec = le64_to_cpu(fe->i_mtime); 285 inode->i_mtime.tv_sec = le64_to_cpu(fe->i_mtime);
@@ -276,6 +295,7 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
276 295
277 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters); 296 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
278 OCFS2_I(inode)->ip_orphaned_slot = OCFS2_INVALID_SLOT; 297 OCFS2_I(inode)->ip_orphaned_slot = OCFS2_INVALID_SLOT;
298 OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr);
279 299
280 if (create_ino) 300 if (create_ino)
281 inode->i_ino = ino_from_blkno(inode->i_sb, 301 inode->i_ino = ino_from_blkno(inode->i_sb,
@@ -330,6 +350,9 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
330 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_data_lockres, 350 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_data_lockres,
331 OCFS2_LOCK_TYPE_DATA, inode); 351 OCFS2_LOCK_TYPE_DATA, inode);
332 352
353 ocfs2_set_inode_flags(inode);
354 inode->i_flags |= S_NOATIME;
355
333 status = 0; 356 status = 0;
334bail: 357bail:
335 mlog_exit(status); 358 mlog_exit(status);
@@ -1027,12 +1050,8 @@ struct buffer_head *ocfs2_bread(struct inode *inode,
1027 u64 p_blkno; 1050 u64 p_blkno;
1028 int readflags = OCFS2_BH_CACHED; 1051 int readflags = OCFS2_BH_CACHED;
1029 1052
1030#if 0
1031 /* only turn this on if we know we can deal with read_block
1032 * returning nothing */
1033 if (reada) 1053 if (reada)
1034 readflags |= OCFS2_BH_READAHEAD; 1054 readflags |= OCFS2_BH_READAHEAD;
1035#endif
1036 1055
1037 if (((u64)block << inode->i_sb->s_blocksize_bits) >= 1056 if (((u64)block << inode->i_sb->s_blocksize_bits) >=
1038 i_size_read(inode)) { 1057 i_size_read(inode)) {
@@ -1131,6 +1150,7 @@ int ocfs2_mark_inode_dirty(struct ocfs2_journal_handle *handle,
1131 1150
1132 spin_lock(&OCFS2_I(inode)->ip_lock); 1151 spin_lock(&OCFS2_I(inode)->ip_lock);
1133 fe->i_clusters = cpu_to_le32(OCFS2_I(inode)->ip_clusters); 1152 fe->i_clusters = cpu_to_le32(OCFS2_I(inode)->ip_clusters);
1153 fe->i_attr = cpu_to_le32(OCFS2_I(inode)->ip_attr);
1134 spin_unlock(&OCFS2_I(inode)->ip_lock); 1154 spin_unlock(&OCFS2_I(inode)->ip_lock);
1135 1155
1136 fe->i_size = cpu_to_le64(i_size_read(inode)); 1156 fe->i_size = cpu_to_le64(i_size_read(inode));
@@ -1169,6 +1189,8 @@ void ocfs2_refresh_inode(struct inode *inode,
1169 spin_lock(&OCFS2_I(inode)->ip_lock); 1189 spin_lock(&OCFS2_I(inode)->ip_lock);
1170 1190
1171 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters); 1191 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
1192 OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr);
1193 ocfs2_set_inode_flags(inode);
1172 i_size_write(inode, le64_to_cpu(fe->i_size)); 1194 i_size_write(inode, le64_to_cpu(fe->i_size));
1173 inode->i_nlink = le16_to_cpu(fe->i_links_count); 1195 inode->i_nlink = le16_to_cpu(fe->i_links_count);
1174 inode->i_uid = le32_to_cpu(fe->i_uid); 1196 inode->i_uid = le32_to_cpu(fe->i_uid);
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 35140f6cf840..4d1e53992566 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -56,6 +56,7 @@ struct ocfs2_inode_info
56 struct ocfs2_journal_handle *ip_handle; 56 struct ocfs2_journal_handle *ip_handle;
57 57
58 u32 ip_flags; /* see below */ 58 u32 ip_flags; /* see below */
59 u32 ip_attr; /* inode attributes */
59 60
60 /* protected by recovery_lock. */ 61 /* protected by recovery_lock. */
61 struct inode *ip_next_orphan; 62 struct inode *ip_next_orphan;
@@ -142,4 +143,6 @@ int ocfs2_mark_inode_dirty(struct ocfs2_journal_handle *handle,
142int ocfs2_aio_read(struct file *file, struct kiocb *req, struct iocb *iocb); 143int ocfs2_aio_read(struct file *file, struct kiocb *req, struct iocb *iocb);
143int ocfs2_aio_write(struct file *file, struct kiocb *req, struct iocb *iocb); 144int ocfs2_aio_write(struct file *file, struct kiocb *req, struct iocb *iocb);
144 145
146void ocfs2_set_inode_flags(struct inode *inode);
147
145#endif /* OCFS2_INODE_H */ 148#endif /* OCFS2_INODE_H */
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
new file mode 100644
index 000000000000..3663cef80689
--- /dev/null
+++ b/fs/ocfs2/ioctl.c
@@ -0,0 +1,136 @@
1/*
2 * linux/fs/ocfs2/ioctl.c
3 *
4 * Copyright (C) 2006 Herbert Poetzl
5 * adapted from Remy Card's ext2/ioctl.c
6 */
7
8#include <linux/fs.h>
9#include <linux/mount.h>
10
11#define MLOG_MASK_PREFIX ML_INODE
12#include <cluster/masklog.h>
13
14#include "ocfs2.h"
15#include "alloc.h"
16#include "dlmglue.h"
17#include "inode.h"
18#include "journal.h"
19
20#include "ocfs2_fs.h"
21#include "ioctl.h"
22
23#include <linux/ext2_fs.h>
24
25static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags)
26{
27 int status;
28
29 status = ocfs2_meta_lock(inode, NULL, NULL, 0);
30 if (status < 0) {
31 mlog_errno(status);
32 return status;
33 }
34 *flags = OCFS2_I(inode)->ip_attr;
35 ocfs2_meta_unlock(inode, 0);
36
37 mlog_exit(status);
38 return status;
39}
40
41static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
42 unsigned mask)
43{
44 struct ocfs2_inode_info *ocfs2_inode = OCFS2_I(inode);
45 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
46 struct ocfs2_journal_handle *handle = NULL;
47 struct buffer_head *bh = NULL;
48 unsigned oldflags;
49 int status;
50
51 mutex_lock(&inode->i_mutex);
52
53 status = ocfs2_meta_lock(inode, NULL, &bh, 1);
54 if (status < 0) {
55 mlog_errno(status);
56 goto bail;
57 }
58
59 status = -EROFS;
60 if (IS_RDONLY(inode))
61 goto bail_unlock;
62
63 status = -EACCES;
64 if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
65 goto bail_unlock;
66
67 if (!S_ISDIR(inode->i_mode))
68 flags &= ~OCFS2_DIRSYNC_FL;
69
70 handle = ocfs2_start_trans(osb, NULL, OCFS2_INODE_UPDATE_CREDITS);
71 if (IS_ERR(handle)) {
72 status = PTR_ERR(handle);
73 mlog_errno(status);
74 goto bail_unlock;
75 }
76
77 oldflags = ocfs2_inode->ip_attr;
78 flags = flags & mask;
79 flags |= oldflags & ~mask;
80
81 /*
82 * The IMMUTABLE and APPEND_ONLY flags can only be changed by
83 * the relevant capability.
84 */
85 status = -EPERM;
86 if ((oldflags & OCFS2_IMMUTABLE_FL) || ((flags ^ oldflags) &
87 (OCFS2_APPEND_FL | OCFS2_IMMUTABLE_FL))) {
88 if (!capable(CAP_LINUX_IMMUTABLE))
89 goto bail_unlock;
90 }
91
92 ocfs2_inode->ip_attr = flags;
93 ocfs2_set_inode_flags(inode);
94
95 status = ocfs2_mark_inode_dirty(handle, inode, bh);
96 if (status < 0)
97 mlog_errno(status);
98
99 ocfs2_commit_trans(handle);
100bail_unlock:
101 ocfs2_meta_unlock(inode, 1);
102bail:
103 mutex_unlock(&inode->i_mutex);
104
105 if (bh)
106 brelse(bh);
107
108 mlog_exit(status);
109 return status;
110}
111
112int ocfs2_ioctl(struct inode * inode, struct file * filp,
113 unsigned int cmd, unsigned long arg)
114{
115 unsigned int flags;
116 int status;
117
118 switch (cmd) {
119 case OCFS2_IOC_GETFLAGS:
120 status = ocfs2_get_inode_attr(inode, &flags);
121 if (status < 0)
122 return status;
123
124 flags &= OCFS2_FL_VISIBLE;
125 return put_user(flags, (int __user *) arg);
126 case OCFS2_IOC_SETFLAGS:
127 if (get_user(flags, (int __user *) arg))
128 return -EFAULT;
129
130 return ocfs2_set_inode_attr(inode, flags,
131 OCFS2_FL_MODIFIABLE);
132 default:
133 return -ENOTTY;
134 }
135}
136
diff --git a/fs/ocfs2/ioctl.h b/fs/ocfs2/ioctl.h
new file mode 100644
index 000000000000..4a7c82931dba
--- /dev/null
+++ b/fs/ocfs2/ioctl.h
@@ -0,0 +1,16 @@
1/*
2 * ioctl.h
3 *
4 * Function prototypes
5 *
6 * Copyright (C) 2006 Herbert Poetzl
7 *
8 */
9
10#ifndef OCFS2_IOCTL_H
11#define OCFS2_IOCTL_H
12
13int ocfs2_ioctl(struct inode * inode, struct file * filp,
14 unsigned int cmd, unsigned long arg);
15
16#endif /* OCFS2_IOCTL_H */
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 0673862c8bdd..0d3e939b1f56 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -56,6 +56,7 @@
56#include "journal.h" 56#include "journal.h"
57#include "namei.h" 57#include "namei.h"
58#include "suballoc.h" 58#include "suballoc.h"
59#include "super.h"
59#include "symlink.h" 60#include "symlink.h"
60#include "sysfile.h" 61#include "sysfile.h"
61#include "uptodate.h" 62#include "uptodate.h"
@@ -310,13 +311,6 @@ static int ocfs2_mknod(struct inode *dir,
310 /* get our super block */ 311 /* get our super block */
311 osb = OCFS2_SB(dir->i_sb); 312 osb = OCFS2_SB(dir->i_sb);
312 313
313 if (S_ISDIR(mode) && (dir->i_nlink >= OCFS2_LINK_MAX)) {
314 mlog(ML_ERROR, "inode %llu has i_nlink of %u\n",
315 (unsigned long long)OCFS2_I(dir)->ip_blkno, dir->i_nlink);
316 status = -EMLINK;
317 goto leave;
318 }
319
320 handle = ocfs2_alloc_handle(osb); 314 handle = ocfs2_alloc_handle(osb);
321 if (handle == NULL) { 315 if (handle == NULL) {
322 status = -ENOMEM; 316 status = -ENOMEM;
@@ -331,6 +325,11 @@ static int ocfs2_mknod(struct inode *dir,
331 goto leave; 325 goto leave;
332 } 326 }
333 327
328 if (S_ISDIR(mode) && (dir->i_nlink >= OCFS2_LINK_MAX)) {
329 status = -EMLINK;
330 goto leave;
331 }
332
334 dirfe = (struct ocfs2_dinode *) parent_fe_bh->b_data; 333 dirfe = (struct ocfs2_dinode *) parent_fe_bh->b_data;
335 if (!dirfe->i_links_count) { 334 if (!dirfe->i_links_count) {
336 /* can't make a file in a deleted directory. */ 335 /* can't make a file in a deleted directory. */
@@ -643,11 +642,6 @@ static int ocfs2_link(struct dentry *old_dentry,
643 goto bail; 642 goto bail;
644 } 643 }
645 644
646 if (inode->i_nlink >= OCFS2_LINK_MAX) {
647 err = -EMLINK;
648 goto bail;
649 }
650
651 handle = ocfs2_alloc_handle(osb); 645 handle = ocfs2_alloc_handle(osb);
652 if (handle == NULL) { 646 if (handle == NULL) {
653 err = -ENOMEM; 647 err = -ENOMEM;
@@ -661,6 +655,11 @@ static int ocfs2_link(struct dentry *old_dentry,
661 goto bail; 655 goto bail;
662 } 656 }
663 657
658 if (!dir->i_nlink) {
659 err = -ENOENT;
660 goto bail;
661 }
662
664 err = ocfs2_check_dir_for_entry(dir, dentry->d_name.name, 663 err = ocfs2_check_dir_for_entry(dir, dentry->d_name.name,
665 dentry->d_name.len); 664 dentry->d_name.len);
666 if (err) 665 if (err)
@@ -1964,13 +1963,8 @@ restart:
1964 } 1963 }
1965 num++; 1964 num++;
1966 1965
1967 /* XXX: questionable readahead stuff here */
1968 bh = ocfs2_bread(dir, b++, &err, 1); 1966 bh = ocfs2_bread(dir, b++, &err, 1);
1969 bh_use[ra_max] = bh; 1967 bh_use[ra_max] = bh;
1970#if 0 // ???
1971 if (bh)
1972 ll_rw_block(READ, 1, &bh);
1973#endif
1974 } 1968 }
1975 } 1969 }
1976 if ((bh = bh_use[ra_ptr++]) == NULL) 1970 if ((bh = bh_use[ra_ptr++]) == NULL)
@@ -1978,6 +1972,10 @@ restart:
1978 wait_on_buffer(bh); 1972 wait_on_buffer(bh);
1979 if (!buffer_uptodate(bh)) { 1973 if (!buffer_uptodate(bh)) {
1980 /* read error, skip block & hope for the best */ 1974 /* read error, skip block & hope for the best */
1975 ocfs2_error(dir->i_sb, "reading directory %llu, "
1976 "offset %lu\n",
1977 (unsigned long long)OCFS2_I(dir)->ip_blkno,
1978 block);
1981 brelse(bh); 1979 brelse(bh);
1982 goto next; 1980 goto next;
1983 } 1981 }
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index c5b1ac547c15..3330a5dc6be2 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -114,6 +114,26 @@
114#define OCFS2_CHAIN_FL (0x00000400) /* Chain allocator */ 114#define OCFS2_CHAIN_FL (0x00000400) /* Chain allocator */
115#define OCFS2_DEALLOC_FL (0x00000800) /* Truncate log */ 115#define OCFS2_DEALLOC_FL (0x00000800) /* Truncate log */
116 116
117/* Inode attributes, keep in sync with EXT2 */
118#define OCFS2_SECRM_FL (0x00000001) /* Secure deletion */
119#define OCFS2_UNRM_FL (0x00000002) /* Undelete */
120#define OCFS2_COMPR_FL (0x00000004) /* Compress file */
121#define OCFS2_SYNC_FL (0x00000008) /* Synchronous updates */
122#define OCFS2_IMMUTABLE_FL (0x00000010) /* Immutable file */
123#define OCFS2_APPEND_FL (0x00000020) /* writes to file may only append */
124#define OCFS2_NODUMP_FL (0x00000040) /* do not dump file */
125#define OCFS2_NOATIME_FL (0x00000080) /* do not update atime */
126#define OCFS2_DIRSYNC_FL (0x00010000) /* dirsync behaviour (directories only) */
127
128#define OCFS2_FL_VISIBLE (0x000100FF) /* User visible flags */
129#define OCFS2_FL_MODIFIABLE (0x000100FF) /* User modifiable flags */
130
131/*
132 * ioctl commands
133 */
134#define OCFS2_IOC_GETFLAGS _IOR('f', 1, long)
135#define OCFS2_IOC_SETFLAGS _IOW('f', 2, long)
136
117/* 137/*
118 * Journal Flags (ocfs2_dinode.id1.journal1.i_flags) 138 * Journal Flags (ocfs2_dinode.id1.journal1.i_flags)
119 */ 139 */
@@ -399,7 +419,9 @@ struct ocfs2_dinode {
399 __le32 i_atime_nsec; 419 __le32 i_atime_nsec;
400 __le32 i_ctime_nsec; 420 __le32 i_ctime_nsec;
401 __le32 i_mtime_nsec; 421 __le32 i_mtime_nsec;
402/*70*/ __le64 i_reserved1[9]; 422 __le32 i_attr;
423 __le32 i_reserved1;
424/*70*/ __le64 i_reserved2[8];
403/*B8*/ union { 425/*B8*/ union {
404 __le64 i_pad1; /* Generic way to refer to this 426 __le64 i_pad1; /* Generic way to refer to this
405 64bit union */ 427 64bit union */
diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c
index b8a00a793326..9707ed7a3206 100644
--- a/fs/ocfs2/uptodate.c
+++ b/fs/ocfs2/uptodate.c
@@ -206,7 +206,10 @@ static int ocfs2_buffer_cached(struct ocfs2_inode_info *oi,
206} 206}
207 207
208/* Warning: even if it returns true, this does *not* guarantee that 208/* Warning: even if it returns true, this does *not* guarantee that
209 * the block is stored in our inode metadata cache. */ 209 * the block is stored in our inode metadata cache.
210 *
211 * This can be called under lock_buffer()
212 */
210int ocfs2_buffer_uptodate(struct inode *inode, 213int ocfs2_buffer_uptodate(struct inode *inode,
211 struct buffer_head *bh) 214 struct buffer_head *bh)
212{ 215{
@@ -226,6 +229,16 @@ int ocfs2_buffer_uptodate(struct inode *inode,
226 return ocfs2_buffer_cached(OCFS2_I(inode), bh); 229 return ocfs2_buffer_cached(OCFS2_I(inode), bh);
227} 230}
228 231
232/*
233 * Determine whether a buffer is currently out on a read-ahead request.
234 * ip_io_sem should be held to serialize submitters with the logic here.
235 */
236int ocfs2_buffer_read_ahead(struct inode *inode,
237 struct buffer_head *bh)
238{
239 return buffer_locked(bh) && ocfs2_buffer_cached(OCFS2_I(inode), bh);
240}
241
229/* Requires ip_lock */ 242/* Requires ip_lock */
230static void ocfs2_append_cache_array(struct ocfs2_caching_info *ci, 243static void ocfs2_append_cache_array(struct ocfs2_caching_info *ci,
231 sector_t block) 244 sector_t block)
@@ -403,7 +416,11 @@ out_free:
403 * 416 *
404 * Note that this function may actually fail to insert the block if 417 * Note that this function may actually fail to insert the block if
405 * memory cannot be allocated. This is not fatal however (but may 418 * memory cannot be allocated. This is not fatal however (but may
406 * result in a performance penalty) */ 419 * result in a performance penalty)
420 *
421 * Readahead buffers can be passed in here before the I/O request is
422 * completed.
423 */
407void ocfs2_set_buffer_uptodate(struct inode *inode, 424void ocfs2_set_buffer_uptodate(struct inode *inode,
408 struct buffer_head *bh) 425 struct buffer_head *bh)
409{ 426{
diff --git a/fs/ocfs2/uptodate.h b/fs/ocfs2/uptodate.h
index 01cd32d26b06..2e73206059a8 100644
--- a/fs/ocfs2/uptodate.h
+++ b/fs/ocfs2/uptodate.h
@@ -40,5 +40,7 @@ void ocfs2_set_new_buffer_uptodate(struct inode *inode,
40 struct buffer_head *bh); 40 struct buffer_head *bh);
41void ocfs2_remove_from_cache(struct inode *inode, 41void ocfs2_remove_from_cache(struct inode *inode,
42 struct buffer_head *bh); 42 struct buffer_head *bh);
43int ocfs2_buffer_read_ahead(struct inode *inode,
44 struct buffer_head *bh);
43 45
44#endif /* OCFS2_UPTODATE_H */ 46#endif /* OCFS2_UPTODATE_H */
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index c40f81ba9b13..34dcb43a7837 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1390,11 +1390,19 @@ xfs_vm_direct_IO(
1390 1390
1391 iocb->private = xfs_alloc_ioend(inode, IOMAP_UNWRITTEN); 1391 iocb->private = xfs_alloc_ioend(inode, IOMAP_UNWRITTEN);
1392 1392
1393 ret = blockdev_direct_IO_own_locking(rw, iocb, inode, 1393 if (rw == WRITE) {
1394 iomap.iomap_target->bt_bdev, 1394 ret = blockdev_direct_IO_own_locking(rw, iocb, inode,
1395 iov, offset, nr_segs, 1395 iomap.iomap_target->bt_bdev,
1396 xfs_get_blocks_direct, 1396 iov, offset, nr_segs,
1397 xfs_end_io_direct); 1397 xfs_get_blocks_direct,
1398 xfs_end_io_direct);
1399 } else {
1400 ret = blockdev_direct_IO_no_locking(rw, iocb, inode,
1401 iomap.iomap_target->bt_bdev,
1402 iov, offset, nr_segs,
1403 xfs_get_blocks_direct,
1404 xfs_end_io_direct);
1405 }
1398 1406
1399 if (unlikely(ret <= 0 && iocb->private)) 1407 if (unlikely(ret <= 0 && iocb->private))
1400 xfs_destroy_ioend(iocb->private); 1408 xfs_destroy_ioend(iocb->private);
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 5d9cfd91ad08..ee788b1cb364 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -264,7 +264,9 @@ xfs_read(
264 dmflags, &locktype); 264 dmflags, &locktype);
265 if (ret) { 265 if (ret) {
266 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 266 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
267 goto unlock_mutex; 267 if (unlikely(ioflags & IO_ISDIRECT))
268 mutex_unlock(&inode->i_mutex);
269 return ret;
268 } 270 }
269 } 271 }
270 272
@@ -272,6 +274,9 @@ xfs_read(
272 bhv_vop_flushinval_pages(vp, ctooff(offtoct(*offset)), 274 bhv_vop_flushinval_pages(vp, ctooff(offtoct(*offset)),
273 -1, FI_REMAPF_LOCKED); 275 -1, FI_REMAPF_LOCKED);
274 276
277 if (unlikely(ioflags & IO_ISDIRECT))
278 mutex_unlock(&inode->i_mutex);
279
275 xfs_rw_enter_trace(XFS_READ_ENTER, &ip->i_iocore, 280 xfs_rw_enter_trace(XFS_READ_ENTER, &ip->i_iocore,
276 (void *)iovp, segs, *offset, ioflags); 281 (void *)iovp, segs, *offset, ioflags);
277 ret = __generic_file_aio_read(iocb, iovp, segs, offset); 282 ret = __generic_file_aio_read(iocb, iovp, segs, offset);
@@ -281,10 +286,6 @@ xfs_read(
281 XFS_STATS_ADD(xs_read_bytes, ret); 286 XFS_STATS_ADD(xs_read_bytes, ret);
282 287
283 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 288 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
284
285unlock_mutex:
286 if (unlikely(ioflags & IO_ISDIRECT))
287 mutex_unlock(&inode->i_mutex);
288 return ret; 289 return ret;
289} 290}
290 291
@@ -390,6 +391,8 @@ xfs_splice_write(
390 xfs_inode_t *ip = XFS_BHVTOI(bdp); 391 xfs_inode_t *ip = XFS_BHVTOI(bdp);
391 xfs_mount_t *mp = ip->i_mount; 392 xfs_mount_t *mp = ip->i_mount;
392 ssize_t ret; 393 ssize_t ret;
394 struct inode *inode = outfilp->f_mapping->host;
395 xfs_fsize_t isize;
393 396
394 XFS_STATS_INC(xs_write_calls); 397 XFS_STATS_INC(xs_write_calls);
395 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 398 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
@@ -416,6 +419,20 @@ xfs_splice_write(
416 if (ret > 0) 419 if (ret > 0)
417 XFS_STATS_ADD(xs_write_bytes, ret); 420 XFS_STATS_ADD(xs_write_bytes, ret);
418 421
422 isize = i_size_read(inode);
423 if (unlikely(ret < 0 && ret != -EFAULT && *ppos > isize))
424 *ppos = isize;
425
426 if (*ppos > ip->i_d.di_size) {
427 xfs_ilock(ip, XFS_ILOCK_EXCL);
428 if (*ppos > ip->i_d.di_size) {
429 ip->i_d.di_size = *ppos;
430 i_size_write(inode, *ppos);
431 ip->i_update_core = 1;
432 ip->i_update_size = 1;
433 }
434 xfs_iunlock(ip, XFS_ILOCK_EXCL);
435 }
419 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 436 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
420 return ret; 437 return ret;
421} 438}
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
index f137856c3261..db8872be8c87 100644
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -203,7 +203,7 @@ xfs_qm_statvfs(
203 if (error || !vnode) 203 if (error || !vnode)
204 return error; 204 return error;
205 205
206 mp = XFS_BHVTOM(bhv); 206 mp = xfs_vfstom(bhvtovfs(bhv));
207 ip = xfs_vtoi(vnode); 207 ip = xfs_vtoi(vnode);
208 208
209 if (!(ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)) 209 if (!(ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT))
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index 650591f999ae..5a4256120ccc 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -44,6 +44,26 @@ typedef enum xfs_alloctype
44#define XFS_ALLOC_FLAG_FREEING 0x00000002 /* indicate caller is freeing extents*/ 44#define XFS_ALLOC_FLAG_FREEING 0x00000002 /* indicate caller is freeing extents*/
45 45
46/* 46/*
47 * In order to avoid ENOSPC-related deadlock caused by
48 * out-of-order locking of AGF buffer (PV 947395), we place
49 * constraints on the relationship among actual allocations for
50 * data blocks, freelist blocks, and potential file data bmap
51 * btree blocks. However, these restrictions may result in no
52 * actual space allocated for a delayed extent, for example, a data
53 * block in a certain AG is allocated but there is no additional
54 * block for the additional bmap btree block due to a split of the
55 * bmap btree of the file. The result of this may lead to an
56 * infinite loop in xfssyncd when the file gets flushed to disk and
57 * all delayed extents need to be actually allocated. To get around
58 * this, we explicitly set aside a few blocks which will not be
59 * reserved in delayed allocation. Considering the minimum number of
60 * needed freelist blocks is 4 fsbs _per AG_, a potential split of file's bmap
61 * btree requires 1 fsb, so we set the number of set-aside blocks
62 * to 4 + 4*agcount.
63 */
64#define XFS_ALLOC_SET_ASIDE(mp) (4 + ((mp)->m_sb.sb_agcount * 4))
65
66/*
47 * Argument structure for xfs_alloc routines. 67 * Argument structure for xfs_alloc routines.
48 * This is turned into a structure to avoid having 20 arguments passed 68 * This is turned into a structure to avoid having 20 arguments passed
49 * down several levels of the stack. 69 * down several levels of the stack.
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 077629bab532..c064e72ada9e 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -462,7 +462,7 @@ xfs_fs_counts(
462 462
463 xfs_icsb_sync_counters_lazy(mp); 463 xfs_icsb_sync_counters_lazy(mp);
464 s = XFS_SB_LOCK(mp); 464 s = XFS_SB_LOCK(mp);
465 cnt->freedata = mp->m_sb.sb_fdblocks; 465 cnt->freedata = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
466 cnt->freertx = mp->m_sb.sb_frextents; 466 cnt->freertx = mp->m_sb.sb_frextents;
467 cnt->freeino = mp->m_sb.sb_ifree; 467 cnt->freeino = mp->m_sb.sb_ifree;
468 cnt->allocino = mp->m_sb.sb_icount; 468 cnt->allocino = mp->m_sb.sb_icount;
@@ -519,15 +519,19 @@ xfs_reserve_blocks(
519 } 519 }
520 mp->m_resblks = request; 520 mp->m_resblks = request;
521 } else { 521 } else {
522 __int64_t free;
523
524 free = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
522 delta = request - mp->m_resblks; 525 delta = request - mp->m_resblks;
523 lcounter = mp->m_sb.sb_fdblocks - delta; 526 lcounter = free - delta;
524 if (lcounter < 0) { 527 if (lcounter < 0) {
525 /* We can't satisfy the request, just get what we can */ 528 /* We can't satisfy the request, just get what we can */
526 mp->m_resblks += mp->m_sb.sb_fdblocks; 529 mp->m_resblks += free;
527 mp->m_resblks_avail += mp->m_sb.sb_fdblocks; 530 mp->m_resblks_avail += free;
528 mp->m_sb.sb_fdblocks = 0; 531 mp->m_sb.sb_fdblocks = XFS_ALLOC_SET_ASIDE(mp);
529 } else { 532 } else {
530 mp->m_sb.sb_fdblocks = lcounter; 533 mp->m_sb.sb_fdblocks =
534 lcounter + XFS_ALLOC_SET_ASIDE(mp);
531 mp->m_resblks = request; 535 mp->m_resblks = request;
532 mp->m_resblks_avail += delta; 536 mp->m_resblks_avail += delta;
533 } 537 }
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 4be5c0b2d296..9dfae18d995f 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1243,24 +1243,6 @@ xfs_mod_sb(xfs_trans_t *tp, __int64_t fields)
1243 xfs_trans_log_buf(tp, bp, first, last); 1243 xfs_trans_log_buf(tp, bp, first, last);
1244} 1244}
1245 1245
1246/*
1247 * In order to avoid ENOSPC-related deadlock caused by
1248 * out-of-order locking of AGF buffer (PV 947395), we place
1249 * constraints on the relationship among actual allocations for
1250 * data blocks, freelist blocks, and potential file data bmap
1251 * btree blocks. However, these restrictions may result in no
1252 * actual space allocated for a delayed extent, for example, a data
1253 * block in a certain AG is allocated but there is no additional
1254 * block for the additional bmap btree block due to a split of the
1255 * bmap btree of the file. The result of this may lead to an
1256 * infinite loop in xfssyncd when the file gets flushed to disk and
1257 * all delayed extents need to be actually allocated. To get around
1258 * this, we explicitly set aside a few blocks which will not be
1259 * reserved in delayed allocation. Considering the minimum number of
1260 * needed freelist blocks is 4 fsbs, a potential split of file's bmap
1261 * btree requires 1 fsb, so we set the number of set-aside blocks to 8.
1262*/
1263#define SET_ASIDE_BLOCKS 8
1264 1246
1265/* 1247/*
1266 * xfs_mod_incore_sb_unlocked() is a utility routine common used to apply 1248 * xfs_mod_incore_sb_unlocked() is a utility routine common used to apply
@@ -1306,7 +1288,8 @@ xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field,
1306 return 0; 1288 return 0;
1307 case XFS_SBS_FDBLOCKS: 1289 case XFS_SBS_FDBLOCKS:
1308 1290
1309 lcounter = (long long)mp->m_sb.sb_fdblocks - SET_ASIDE_BLOCKS; 1291 lcounter = (long long)
1292 mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
1310 res_used = (long long)(mp->m_resblks - mp->m_resblks_avail); 1293 res_used = (long long)(mp->m_resblks - mp->m_resblks_avail);
1311 1294
1312 if (delta > 0) { /* Putting blocks back */ 1295 if (delta > 0) { /* Putting blocks back */
@@ -1340,7 +1323,7 @@ xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field,
1340 } 1323 }
1341 } 1324 }
1342 1325
1343 mp->m_sb.sb_fdblocks = lcounter + SET_ASIDE_BLOCKS; 1326 mp->m_sb.sb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp);
1344 return 0; 1327 return 0;
1345 case XFS_SBS_FREXTENTS: 1328 case XFS_SBS_FREXTENTS:
1346 lcounter = (long long)mp->m_sb.sb_frextents; 1329 lcounter = (long long)mp->m_sb.sb_frextents;
@@ -2021,7 +2004,8 @@ xfs_icsb_sync_counters_lazy(
2021 * when we get near ENOSPC. 2004 * when we get near ENOSPC.
2022 */ 2005 */
2023#define XFS_ICSB_INO_CNTR_REENABLE 64 2006#define XFS_ICSB_INO_CNTR_REENABLE 64
2024#define XFS_ICSB_FDBLK_CNTR_REENABLE 512 2007#define XFS_ICSB_FDBLK_CNTR_REENABLE(mp) \
2008 (512 + XFS_ALLOC_SET_ASIDE(mp))
2025STATIC void 2009STATIC void
2026xfs_icsb_balance_counter( 2010xfs_icsb_balance_counter(
2027 xfs_mount_t *mp, 2011 xfs_mount_t *mp,
@@ -2055,7 +2039,7 @@ xfs_icsb_balance_counter(
2055 case XFS_SBS_FDBLOCKS: 2039 case XFS_SBS_FDBLOCKS:
2056 count = mp->m_sb.sb_fdblocks; 2040 count = mp->m_sb.sb_fdblocks;
2057 resid = do_div(count, weight); 2041 resid = do_div(count, weight);
2058 if (count < XFS_ICSB_FDBLK_CNTR_REENABLE) 2042 if (count < XFS_ICSB_FDBLK_CNTR_REENABLE(mp))
2059 goto out; 2043 goto out;
2060 break; 2044 break;
2061 default: 2045 default:
@@ -2110,11 +2094,11 @@ again:
2110 case XFS_SBS_FDBLOCKS: 2094 case XFS_SBS_FDBLOCKS:
2111 BUG_ON((mp->m_resblks - mp->m_resblks_avail) != 0); 2095 BUG_ON((mp->m_resblks - mp->m_resblks_avail) != 0);
2112 2096
2113 lcounter = icsbp->icsb_fdblocks; 2097 lcounter = icsbp->icsb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
2114 lcounter += delta; 2098 lcounter += delta;
2115 if (unlikely(lcounter < 0)) 2099 if (unlikely(lcounter < 0))
2116 goto slow_path; 2100 goto slow_path;
2117 icsbp->icsb_fdblocks = lcounter; 2101 icsbp->icsb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp);
2118 break; 2102 break;
2119 default: 2103 default:
2120 BUG(); 2104 BUG();
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index b427d220a169..a34796e57afb 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -811,7 +811,8 @@ xfs_statvfs(
811 statp->f_bsize = sbp->sb_blocksize; 811 statp->f_bsize = sbp->sb_blocksize;
812 lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0; 812 lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0;
813 statp->f_blocks = sbp->sb_dblocks - lsize; 813 statp->f_blocks = sbp->sb_dblocks - lsize;
814 statp->f_bfree = statp->f_bavail = sbp->sb_fdblocks; 814 statp->f_bfree = statp->f_bavail =
815 sbp->sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
815 fakeinos = statp->f_bfree << sbp->sb_inopblog; 816 fakeinos = statp->f_bfree << sbp->sb_inopblog;
816#if XFS_BIG_INUMS 817#if XFS_BIG_INUMS
817 fakeinos += mp->m_inoadd; 818 fakeinos += mp->m_inoadd;