aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorRussell King <rmk@dyn-67.arm.linux.org.uk>2009-04-02 18:22:11 -0400
committerRussell King <rmk+kernel@arm.linux.org.uk>2009-04-02 18:22:11 -0400
commitcd02938a828f4b2098a074afb7454f106f2e8df5 (patch)
tree7b543fd6aa82a62dc3a9614c26f89daca83e77d5 /fs
parent9d681f3a1b27fdfc17ea251cf8d5f627dab34670 (diff)
parent172ef275444efa12d834fb9d1b1acdac92db47f7 (diff)
Merge branch 'smsc911x-armplatforms' of git://github.com/steveglen/linux-2.6
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/proc.c1
-rw-r--r--fs/autofs4/autofs_i.h2
-rw-r--r--fs/autofs4/dev-ioctl.c29
-rw-r--r--fs/autofs4/expire.c27
-rw-r--r--fs/autofs4/root.c41
-rw-r--r--fs/btrfs/Makefile2
-rw-r--r--fs/btrfs/btrfs_inode.h31
-rw-r--r--fs/btrfs/ctree.c588
-rw-r--r--fs/btrfs/ctree.h71
-rw-r--r--fs/btrfs/delayed-ref.c669
-rw-r--r--fs/btrfs/delayed-ref.h193
-rw-r--r--fs/btrfs/dir-item.c3
-rw-r--r--fs/btrfs/disk-io.c81
-rw-r--r--fs/btrfs/disk-io.h1
-rw-r--r--fs/btrfs/extent-tree.c1674
-rw-r--r--fs/btrfs/extent_io.c51
-rw-r--r--fs/btrfs/extent_io.h3
-rw-r--r--fs/btrfs/file-item.c7
-rw-r--r--fs/btrfs/file.c50
-rw-r--r--fs/btrfs/inode-item.c3
-rw-r--r--fs/btrfs/inode.c206
-rw-r--r--fs/btrfs/locking.c21
-rw-r--r--fs/btrfs/ordered-data.c118
-rw-r--r--fs/btrfs/ordered-data.h4
-rw-r--r--fs/btrfs/transaction.c151
-rw-r--r--fs/btrfs/transaction.h8
-rw-r--r--fs/btrfs/tree-defrag.c2
-rw-r--r--fs/btrfs/tree-log.c444
-rw-r--r--fs/btrfs/tree-log.h17
-rw-r--r--fs/buffer.c56
-rw-r--r--fs/cifs/cifs_debug.c1
-rw-r--r--fs/ecryptfs/keystore.c3
-rw-r--r--fs/ecryptfs/messaging.c3
-rw-r--r--fs/eventfd.c26
-rw-r--r--fs/eventpoll.c614
-rw-r--r--fs/ext4/balloc.c14
-rw-r--r--fs/ext4/dir.c16
-rw-r--r--fs/ext4/ext4.h93
-rw-r--r--fs/ext4/ext4_extents.h1
-rw-r--r--fs/ext4/ext4_i.h6
-rw-r--r--fs/ext4/ext4_sb.h14
-rw-r--r--fs/ext4/extents.c127
-rw-r--r--fs/ext4/file.c7
-rw-r--r--fs/ext4/ialloc.c273
-rw-r--r--fs/ext4/inode.c429
-rw-r--r--fs/ext4/ioctl.c17
-rw-r--r--fs/ext4/mballoc.c158
-rw-r--r--fs/ext4/mballoc.h8
-rw-r--r--fs/ext4/namei.c164
-rw-r--r--fs/ext4/resize.c8
-rw-r--r--fs/ext4/super.c327
-rw-r--r--fs/fcntl.c10
-rw-r--r--fs/fuse/file.c6
-rw-r--r--fs/gfs2/ops_file.c5
-rw-r--r--fs/hugetlbfs/inode.c21
-rw-r--r--fs/jbd2/commit.c5
-rw-r--r--fs/jbd2/revoke.c24
-rw-r--r--fs/jbd2/transaction.c2
-rw-r--r--fs/jfs/Kconfig1
-rw-r--r--fs/jfs/jfs_debug.c1
-rw-r--r--fs/jfs/jfs_extent.c63
-rw-r--r--fs/jfs/jfs_imap.c10
-rw-r--r--fs/jfs/jfs_metapage.c18
-rw-r--r--fs/jfs/jfs_types.h29
-rw-r--r--fs/jfs/jfs_xtree.c263
-rw-r--r--fs/jfs/jfs_xtree.h2
-rw-r--r--fs/jfs/super.c4
-rw-r--r--fs/lockd/clntlock.c51
-rw-r--r--fs/lockd/mon.c8
-rw-r--r--fs/lockd/svc.c42
-rw-r--r--fs/nfs/callback.c31
-rw-r--r--fs/nfs/callback.h1
-rw-r--r--fs/nfs/client.c118
-rw-r--r--fs/nfs/dir.c9
-rw-r--r--fs/nfs/file.c37
-rw-r--r--fs/nfs/getroot.c4
-rw-r--r--fs/nfs/inode.c309
-rw-r--r--fs/nfs/internal.h4
-rw-r--r--fs/nfs/nfs2xdr.c9
-rw-r--r--fs/nfs/nfs3proc.c1
-rw-r--r--fs/nfs/nfs3xdr.c37
-rw-r--r--fs/nfs/nfs4proc.c47
-rw-r--r--fs/nfs/nfs4state.c10
-rw-r--r--fs/nfs/nfs4xdr.c213
-rw-r--r--fs/nfs/pagelist.c11
-rw-r--r--fs/nfs/proc.c1
-rw-r--r--fs/nfs/super.c4
-rw-r--r--fs/nfs/write.c53
-rw-r--r--fs/nfsd/nfsctl.c6
-rw-r--r--fs/nfsd/nfssvc.c5
-rw-r--r--fs/ntfs/dir.c4
-rw-r--r--fs/ntfs/inode.c3
-rw-r--r--fs/ntfs/layout.h329
-rw-r--r--fs/ntfs/logfile.h6
-rw-r--r--fs/ntfs/mft.c2
-rw-r--r--fs/ntfs/super.c50
-rw-r--r--fs/ntfs/usnjrnl.h48
-rw-r--r--fs/ocfs2/mmap.c6
-rw-r--r--fs/proc/generic.c63
-rw-r--r--fs/proc/inode-alloc.txt14
-rw-r--r--fs/proc/inode.c21
-rw-r--r--fs/proc/internal.h1
-rw-r--r--fs/proc/proc_tty.c13
-rw-r--r--fs/proc/task_mmu.c8
-rw-r--r--fs/proc/uptime.c38
-rw-r--r--fs/ramfs/file-nommu.c15
-rw-r--r--fs/ramfs/inode.c94
-rw-r--r--fs/reiserfs/Makefile4
-rw-r--r--fs/reiserfs/README4
-rw-r--r--fs/reiserfs/bitmap.c72
-rw-r--r--fs/reiserfs/dir.c28
-rw-r--r--fs/reiserfs/do_balan.c313
-rw-r--r--fs/reiserfs/file.c34
-rw-r--r--fs/reiserfs/fix_node.c1021
-rw-r--r--fs/reiserfs/hashes.c2
-rw-r--r--fs/reiserfs/ibalance.c22
-rw-r--r--fs/reiserfs/inode.c203
-rw-r--r--fs/reiserfs/ioctl.c2
-rw-r--r--fs/reiserfs/item_ops.c68
-rw-r--r--fs/reiserfs/journal.c1077
-rw-r--r--fs/reiserfs/lbalance.c66
-rw-r--r--fs/reiserfs/namei.c180
-rw-r--r--fs/reiserfs/objectid.c12
-rw-r--r--fs/reiserfs/prints.c134
-rw-r--r--fs/reiserfs/procfs.c16
-rw-r--r--fs/reiserfs/resize.c6
-rw-r--r--fs/reiserfs/stree.c1168
-rw-r--r--fs/reiserfs/super.c303
-rw-r--r--fs/reiserfs/tail_conversion.c96
-rw-r--r--fs/reiserfs/xattr.c1375
-rw-r--r--fs/reiserfs/xattr_acl.c257
-rw-r--r--fs/reiserfs/xattr_security.c80
-rw-r--r--fs/reiserfs/xattr_trusted.c45
-rw-r--r--fs/reiserfs/xattr_user.c31
-rw-r--r--fs/seq_file.c2
-rw-r--r--fs/sysfs/bin.c8
-rw-r--r--fs/ubifs/file.c9
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c4
138 files changed, 8568 insertions, 7127 deletions
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 7578c1ab9e0b..8630615e57fe 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -146,7 +146,6 @@ int afs_proc_init(void)
146 proc_afs = proc_mkdir("fs/afs", NULL); 146 proc_afs = proc_mkdir("fs/afs", NULL);
147 if (!proc_afs) 147 if (!proc_afs)
148 goto error_dir; 148 goto error_dir;
149 proc_afs->owner = THIS_MODULE;
150 149
151 p = proc_create("cells", 0, proc_afs, &afs_proc_cells_fops); 150 p = proc_create("cells", 0, proc_afs, &afs_proc_cells_fops);
152 if (!p) 151 if (!p)
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index a76803108d06..b7ff33c63101 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -186,6 +186,8 @@ int autofs4_expire_wait(struct dentry *dentry);
186int autofs4_expire_run(struct super_block *, struct vfsmount *, 186int autofs4_expire_run(struct super_block *, struct vfsmount *,
187 struct autofs_sb_info *, 187 struct autofs_sb_info *,
188 struct autofs_packet_expire __user *); 188 struct autofs_packet_expire __user *);
189int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
190 struct autofs_sb_info *sbi, int when);
189int autofs4_expire_multi(struct super_block *, struct vfsmount *, 191int autofs4_expire_multi(struct super_block *, struct vfsmount *,
190 struct autofs_sb_info *, int __user *); 192 struct autofs_sb_info *, int __user *);
191struct dentry *autofs4_expire_direct(struct super_block *sb, 193struct dentry *autofs4_expire_direct(struct super_block *sb,
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 025e105bffea..9e5ae8a4f5c8 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -525,40 +525,13 @@ static int autofs_dev_ioctl_expire(struct file *fp,
525 struct autofs_sb_info *sbi, 525 struct autofs_sb_info *sbi,
526 struct autofs_dev_ioctl *param) 526 struct autofs_dev_ioctl *param)
527{ 527{
528 struct dentry *dentry;
529 struct vfsmount *mnt; 528 struct vfsmount *mnt;
530 int err = -EAGAIN;
531 int how; 529 int how;
532 530
533 how = param->expire.how; 531 how = param->expire.how;
534 mnt = fp->f_path.mnt; 532 mnt = fp->f_path.mnt;
535 533
536 if (autofs_type_trigger(sbi->type)) 534 return autofs4_do_expire_multi(sbi->sb, mnt, sbi, how);
537 dentry = autofs4_expire_direct(sbi->sb, mnt, sbi, how);
538 else
539 dentry = autofs4_expire_indirect(sbi->sb, mnt, sbi, how);
540
541 if (dentry) {
542 struct autofs_info *ino = autofs4_dentry_ino(dentry);
543
544 /*
545 * This is synchronous because it makes the daemon a
546 * little easier
547 */
548 err = autofs4_wait(sbi, dentry, NFY_EXPIRE);
549
550 spin_lock(&sbi->fs_lock);
551 if (ino->flags & AUTOFS_INF_MOUNTPOINT) {
552 ino->flags &= ~AUTOFS_INF_MOUNTPOINT;
553 sbi->sb->s_root->d_mounted++;
554 }
555 ino->flags &= ~AUTOFS_INF_EXPIRING;
556 complete_all(&ino->expire_complete);
557 spin_unlock(&sbi->fs_lock);
558 dput(dentry);
559 }
560
561 return err;
562} 535}
563 536
564/* Check if autofs mount point is in use */ 537/* Check if autofs mount point is in use */
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index e3bd50776f9e..75f7ddacf7d6 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -478,22 +478,16 @@ int autofs4_expire_run(struct super_block *sb,
478 return ret; 478 return ret;
479} 479}
480 480
481/* Call repeatedly until it returns -EAGAIN, meaning there's nothing 481int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
482 more to be done */ 482 struct autofs_sb_info *sbi, int when)
483int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt,
484 struct autofs_sb_info *sbi, int __user *arg)
485{ 483{
486 struct dentry *dentry; 484 struct dentry *dentry;
487 int ret = -EAGAIN; 485 int ret = -EAGAIN;
488 int do_now = 0;
489
490 if (arg && get_user(do_now, arg))
491 return -EFAULT;
492 486
493 if (autofs_type_trigger(sbi->type)) 487 if (autofs_type_trigger(sbi->type))
494 dentry = autofs4_expire_direct(sb, mnt, sbi, do_now); 488 dentry = autofs4_expire_direct(sb, mnt, sbi, when);
495 else 489 else
496 dentry = autofs4_expire_indirect(sb, mnt, sbi, do_now); 490 dentry = autofs4_expire_indirect(sb, mnt, sbi, when);
497 491
498 if (dentry) { 492 if (dentry) {
499 struct autofs_info *ino = autofs4_dentry_ino(dentry); 493 struct autofs_info *ino = autofs4_dentry_ino(dentry);
@@ -516,3 +510,16 @@ int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt,
516 return ret; 510 return ret;
517} 511}
518 512
513/* Call repeatedly until it returns -EAGAIN, meaning there's nothing
514 more to be done */
515int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt,
516 struct autofs_sb_info *sbi, int __user *arg)
517{
518 int do_now = 0;
519
520 if (arg && get_user(do_now, arg))
521 return -EFAULT;
522
523 return autofs4_do_expire_multi(sb, mnt, sbi, do_now);
524}
525
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 74b1469a9504..e383bf0334f1 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -485,22 +485,6 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
485 DPRINTK("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d", 485 DPRINTK("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d",
486 current->pid, task_pgrp_nr(current), sbi->catatonic, oz_mode); 486 current->pid, task_pgrp_nr(current), sbi->catatonic, oz_mode);
487 487
488 expiring = autofs4_lookup_expiring(sbi, dentry->d_parent, &dentry->d_name);
489 if (expiring) {
490 /*
491 * If we are racing with expire the request might not
492 * be quite complete but the directory has been removed
493 * so it must have been successful, so just wait for it.
494 */
495 ino = autofs4_dentry_ino(expiring);
496 autofs4_expire_wait(expiring);
497 spin_lock(&sbi->lookup_lock);
498 if (!list_empty(&ino->expiring))
499 list_del_init(&ino->expiring);
500 spin_unlock(&sbi->lookup_lock);
501 dput(expiring);
502 }
503
504 unhashed = autofs4_lookup_active(sbi, dentry->d_parent, &dentry->d_name); 488 unhashed = autofs4_lookup_active(sbi, dentry->d_parent, &dentry->d_name);
505 if (unhashed) 489 if (unhashed)
506 dentry = unhashed; 490 dentry = unhashed;
@@ -538,14 +522,31 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
538 } 522 }
539 523
540 if (!oz_mode) { 524 if (!oz_mode) {
525 mutex_unlock(&dir->i_mutex);
526 expiring = autofs4_lookup_expiring(sbi,
527 dentry->d_parent,
528 &dentry->d_name);
529 if (expiring) {
530 /*
531 * If we are racing with expire the request might not
532 * be quite complete but the directory has been removed
533 * so it must have been successful, so just wait for it.
534 */
535 ino = autofs4_dentry_ino(expiring);
536 autofs4_expire_wait(expiring);
537 spin_lock(&sbi->lookup_lock);
538 if (!list_empty(&ino->expiring))
539 list_del_init(&ino->expiring);
540 spin_unlock(&sbi->lookup_lock);
541 dput(expiring);
542 }
543
541 spin_lock(&dentry->d_lock); 544 spin_lock(&dentry->d_lock);
542 dentry->d_flags |= DCACHE_AUTOFS_PENDING; 545 dentry->d_flags |= DCACHE_AUTOFS_PENDING;
543 spin_unlock(&dentry->d_lock); 546 spin_unlock(&dentry->d_lock);
544 if (dentry->d_op && dentry->d_op->d_revalidate) { 547 if (dentry->d_op && dentry->d_op->d_revalidate)
545 mutex_unlock(&dir->i_mutex);
546 (dentry->d_op->d_revalidate)(dentry, nd); 548 (dentry->d_op->d_revalidate)(dentry, nd);
547 mutex_lock(&dir->i_mutex); 549 mutex_lock(&dir->i_mutex);
548 }
549 } 550 }
550 551
551 /* 552 /*
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index d2cf5a54a4b8..9adf5e4f7e96 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -8,7 +8,7 @@ btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
8 extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ 8 extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
9 extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ 9 extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
10 ref-cache.o export.o tree-log.o acl.o free-space-cache.o zlib.o \ 10 ref-cache.o export.o tree-log.o acl.o free-space-cache.o zlib.o \
11 compression.o 11 compression.o delayed-ref.o
12else 12else
13 13
14# Normal Makefile 14# Normal Makefile
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 72677ce2b74f..b30986f00b9d 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -66,6 +66,12 @@ struct btrfs_inode {
66 */ 66 */
67 struct list_head delalloc_inodes; 67 struct list_head delalloc_inodes;
68 68
69 /*
70 * list for tracking inodes that must be sent to disk before a
71 * rename or truncate commit
72 */
73 struct list_head ordered_operations;
74
69 /* the space_info for where this inode's data allocations are done */ 75 /* the space_info for where this inode's data allocations are done */
70 struct btrfs_space_info *space_info; 76 struct btrfs_space_info *space_info;
71 77
@@ -86,12 +92,6 @@ struct btrfs_inode {
86 */ 92 */
87 u64 logged_trans; 93 u64 logged_trans;
88 94
89 /*
90 * trans that last made a change that should be fully fsync'd. This
91 * gets reset to zero each time the inode is logged
92 */
93 u64 log_dirty_trans;
94
95 /* total number of bytes pending delalloc, used by stat to calc the 95 /* total number of bytes pending delalloc, used by stat to calc the
96 * real block usage of the file 96 * real block usage of the file
97 */ 97 */
@@ -121,6 +121,25 @@ struct btrfs_inode {
121 /* the start of block group preferred for allocations. */ 121 /* the start of block group preferred for allocations. */
122 u64 block_group; 122 u64 block_group;
123 123
124 /* the fsync log has some corner cases that mean we have to check
125 * directories to see if any unlinks have been done before
126 * the directory was logged. See tree-log.c for all the
127 * details
128 */
129 u64 last_unlink_trans;
130
131 /*
132 * ordered_data_close is set by truncate when a file that used
133 * to have good data has been truncated to zero. When it is set
134 * the btrfs file release call will add this inode to the
135 * ordered operations list so that we make sure to flush out any
136 * new data the application may have written before commit.
137 *
138 * yes, its silly to have a single bitflag, but we might grow more
139 * of these.
140 */
141 unsigned ordered_data_close:1;
142
124 struct inode vfs_inode; 143 struct inode vfs_inode;
125}; 144};
126 145
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 37f31b5529aa..dbb724124633 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -254,18 +254,13 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
254 * empty_size -- a hint that you plan on doing more cow. This is the size in 254 * empty_size -- a hint that you plan on doing more cow. This is the size in
255 * bytes the allocator should try to find free next to the block it returns. 255 * bytes the allocator should try to find free next to the block it returns.
256 * This is just a hint and may be ignored by the allocator. 256 * This is just a hint and may be ignored by the allocator.
257 *
258 * prealloc_dest -- if you have already reserved a destination for the cow,
259 * this uses that block instead of allocating a new one.
260 * btrfs_alloc_reserved_extent is used to finish the allocation.
261 */ 257 */
262static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, 258static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
263 struct btrfs_root *root, 259 struct btrfs_root *root,
264 struct extent_buffer *buf, 260 struct extent_buffer *buf,
265 struct extent_buffer *parent, int parent_slot, 261 struct extent_buffer *parent, int parent_slot,
266 struct extent_buffer **cow_ret, 262 struct extent_buffer **cow_ret,
267 u64 search_start, u64 empty_size, 263 u64 search_start, u64 empty_size)
268 u64 prealloc_dest)
269{ 264{
270 u64 parent_start; 265 u64 parent_start;
271 struct extent_buffer *cow; 266 struct extent_buffer *cow;
@@ -291,26 +286,10 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
291 level = btrfs_header_level(buf); 286 level = btrfs_header_level(buf);
292 nritems = btrfs_header_nritems(buf); 287 nritems = btrfs_header_nritems(buf);
293 288
294 if (prealloc_dest) { 289 cow = btrfs_alloc_free_block(trans, root, buf->len,
295 struct btrfs_key ins; 290 parent_start, root->root_key.objectid,
296 291 trans->transid, level,
297 ins.objectid = prealloc_dest; 292 search_start, empty_size);
298 ins.offset = buf->len;
299 ins.type = BTRFS_EXTENT_ITEM_KEY;
300
301 ret = btrfs_alloc_reserved_extent(trans, root, parent_start,
302 root->root_key.objectid,
303 trans->transid, level, &ins);
304 BUG_ON(ret);
305 cow = btrfs_init_new_buffer(trans, root, prealloc_dest,
306 buf->len, level);
307 } else {
308 cow = btrfs_alloc_free_block(trans, root, buf->len,
309 parent_start,
310 root->root_key.objectid,
311 trans->transid, level,
312 search_start, empty_size);
313 }
314 if (IS_ERR(cow)) 293 if (IS_ERR(cow))
315 return PTR_ERR(cow); 294 return PTR_ERR(cow);
316 295
@@ -413,7 +392,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
413noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, 392noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
414 struct btrfs_root *root, struct extent_buffer *buf, 393 struct btrfs_root *root, struct extent_buffer *buf,
415 struct extent_buffer *parent, int parent_slot, 394 struct extent_buffer *parent, int parent_slot,
416 struct extent_buffer **cow_ret, u64 prealloc_dest) 395 struct extent_buffer **cow_ret)
417{ 396{
418 u64 search_start; 397 u64 search_start;
419 int ret; 398 int ret;
@@ -436,7 +415,6 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
436 btrfs_header_owner(buf) == root->root_key.objectid && 415 btrfs_header_owner(buf) == root->root_key.objectid &&
437 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { 416 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
438 *cow_ret = buf; 417 *cow_ret = buf;
439 WARN_ON(prealloc_dest);
440 return 0; 418 return 0;
441 } 419 }
442 420
@@ -447,8 +425,7 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
447 btrfs_set_lock_blocking(buf); 425 btrfs_set_lock_blocking(buf);
448 426
449 ret = __btrfs_cow_block(trans, root, buf, parent, 427 ret = __btrfs_cow_block(trans, root, buf, parent,
450 parent_slot, cow_ret, search_start, 0, 428 parent_slot, cow_ret, search_start, 0);
451 prealloc_dest);
452 return ret; 429 return ret;
453} 430}
454 431
@@ -617,7 +594,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
617 err = __btrfs_cow_block(trans, root, cur, parent, i, 594 err = __btrfs_cow_block(trans, root, cur, parent, i,
618 &cur, search_start, 595 &cur, search_start,
619 min(16 * blocksize, 596 min(16 * blocksize,
620 (end_slot - i) * blocksize), 0); 597 (end_slot - i) * blocksize));
621 if (err) { 598 if (err) {
622 btrfs_tree_unlock(cur); 599 btrfs_tree_unlock(cur);
623 free_extent_buffer(cur); 600 free_extent_buffer(cur);
@@ -937,7 +914,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
937 BUG_ON(!child); 914 BUG_ON(!child);
938 btrfs_tree_lock(child); 915 btrfs_tree_lock(child);
939 btrfs_set_lock_blocking(child); 916 btrfs_set_lock_blocking(child);
940 ret = btrfs_cow_block(trans, root, child, mid, 0, &child, 0); 917 ret = btrfs_cow_block(trans, root, child, mid, 0, &child);
941 BUG_ON(ret); 918 BUG_ON(ret);
942 919
943 spin_lock(&root->node_lock); 920 spin_lock(&root->node_lock);
@@ -945,6 +922,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
945 spin_unlock(&root->node_lock); 922 spin_unlock(&root->node_lock);
946 923
947 ret = btrfs_update_extent_ref(trans, root, child->start, 924 ret = btrfs_update_extent_ref(trans, root, child->start,
925 child->len,
948 mid->start, child->start, 926 mid->start, child->start,
949 root->root_key.objectid, 927 root->root_key.objectid,
950 trans->transid, level - 1); 928 trans->transid, level - 1);
@@ -971,6 +949,10 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
971 BTRFS_NODEPTRS_PER_BLOCK(root) / 4) 949 BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
972 return 0; 950 return 0;
973 951
952 if (trans->transaction->delayed_refs.flushing &&
953 btrfs_header_nritems(mid) > 2)
954 return 0;
955
974 if (btrfs_header_nritems(mid) < 2) 956 if (btrfs_header_nritems(mid) < 2)
975 err_on_enospc = 1; 957 err_on_enospc = 1;
976 958
@@ -979,7 +961,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
979 btrfs_tree_lock(left); 961 btrfs_tree_lock(left);
980 btrfs_set_lock_blocking(left); 962 btrfs_set_lock_blocking(left);
981 wret = btrfs_cow_block(trans, root, left, 963 wret = btrfs_cow_block(trans, root, left,
982 parent, pslot - 1, &left, 0); 964 parent, pslot - 1, &left);
983 if (wret) { 965 if (wret) {
984 ret = wret; 966 ret = wret;
985 goto enospc; 967 goto enospc;
@@ -990,7 +972,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
990 btrfs_tree_lock(right); 972 btrfs_tree_lock(right);
991 btrfs_set_lock_blocking(right); 973 btrfs_set_lock_blocking(right);
992 wret = btrfs_cow_block(trans, root, right, 974 wret = btrfs_cow_block(trans, root, right,
993 parent, pslot + 1, &right, 0); 975 parent, pslot + 1, &right);
994 if (wret) { 976 if (wret) {
995 ret = wret; 977 ret = wret;
996 goto enospc; 978 goto enospc;
@@ -1171,7 +1153,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
1171 wret = 1; 1153 wret = 1;
1172 } else { 1154 } else {
1173 ret = btrfs_cow_block(trans, root, left, parent, 1155 ret = btrfs_cow_block(trans, root, left, parent,
1174 pslot - 1, &left, 0); 1156 pslot - 1, &left);
1175 if (ret) 1157 if (ret)
1176 wret = 1; 1158 wret = 1;
1177 else { 1159 else {
@@ -1222,7 +1204,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
1222 } else { 1204 } else {
1223 ret = btrfs_cow_block(trans, root, right, 1205 ret = btrfs_cow_block(trans, root, right,
1224 parent, pslot + 1, 1206 parent, pslot + 1,
1225 &right, 0); 1207 &right);
1226 if (ret) 1208 if (ret)
1227 wret = 1; 1209 wret = 1;
1228 else { 1210 else {
@@ -1492,7 +1474,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
1492 u8 lowest_level = 0; 1474 u8 lowest_level = 0;
1493 u64 blocknr; 1475 u64 blocknr;
1494 u64 gen; 1476 u64 gen;
1495 struct btrfs_key prealloc_block;
1496 1477
1497 lowest_level = p->lowest_level; 1478 lowest_level = p->lowest_level;
1498 WARN_ON(lowest_level && ins_len > 0); 1479 WARN_ON(lowest_level && ins_len > 0);
@@ -1501,8 +1482,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
1501 if (ins_len < 0) 1482 if (ins_len < 0)
1502 lowest_unlock = 2; 1483 lowest_unlock = 2;
1503 1484
1504 prealloc_block.objectid = 0;
1505
1506again: 1485again:
1507 if (p->skip_locking) 1486 if (p->skip_locking)
1508 b = btrfs_root_node(root); 1487 b = btrfs_root_node(root);
@@ -1529,44 +1508,11 @@ again:
1529 !btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) { 1508 !btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) {
1530 goto cow_done; 1509 goto cow_done;
1531 } 1510 }
1532
1533 /* ok, we have to cow, is our old prealloc the right
1534 * size?
1535 */
1536 if (prealloc_block.objectid &&
1537 prealloc_block.offset != b->len) {
1538 btrfs_release_path(root, p);
1539 btrfs_free_reserved_extent(root,
1540 prealloc_block.objectid,
1541 prealloc_block.offset);
1542 prealloc_block.objectid = 0;
1543 goto again;
1544 }
1545
1546 /*
1547 * for higher level blocks, try not to allocate blocks
1548 * with the block and the parent locks held.
1549 */
1550 if (level > 0 && !prealloc_block.objectid) {
1551 u32 size = b->len;
1552 u64 hint = b->start;
1553
1554 btrfs_release_path(root, p);
1555 ret = btrfs_reserve_extent(trans, root,
1556 size, size, 0,
1557 hint, (u64)-1,
1558 &prealloc_block, 0);
1559 BUG_ON(ret);
1560 goto again;
1561 }
1562
1563 btrfs_set_path_blocking(p); 1511 btrfs_set_path_blocking(p);
1564 1512
1565 wret = btrfs_cow_block(trans, root, b, 1513 wret = btrfs_cow_block(trans, root, b,
1566 p->nodes[level + 1], 1514 p->nodes[level + 1],
1567 p->slots[level + 1], 1515 p->slots[level + 1], &b);
1568 &b, prealloc_block.objectid);
1569 prealloc_block.objectid = 0;
1570 if (wret) { 1516 if (wret) {
1571 free_extent_buffer(b); 1517 free_extent_buffer(b);
1572 ret = wret; 1518 ret = wret;
@@ -1742,12 +1688,8 @@ done:
1742 * we don't really know what they plan on doing with the path 1688 * we don't really know what they plan on doing with the path
1743 * from here on, so for now just mark it as blocking 1689 * from here on, so for now just mark it as blocking
1744 */ 1690 */
1745 btrfs_set_path_blocking(p); 1691 if (!p->leave_spinning)
1746 if (prealloc_block.objectid) { 1692 btrfs_set_path_blocking(p);
1747 btrfs_free_reserved_extent(root,
1748 prealloc_block.objectid,
1749 prealloc_block.offset);
1750 }
1751 return ret; 1693 return ret;
1752} 1694}
1753 1695
@@ -1768,7 +1710,7 @@ int btrfs_merge_path(struct btrfs_trans_handle *trans,
1768 int ret; 1710 int ret;
1769 1711
1770 eb = btrfs_lock_root_node(root); 1712 eb = btrfs_lock_root_node(root);
1771 ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb, 0); 1713 ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb);
1772 BUG_ON(ret); 1714 BUG_ON(ret);
1773 1715
1774 btrfs_set_lock_blocking(eb); 1716 btrfs_set_lock_blocking(eb);
@@ -1826,7 +1768,7 @@ int btrfs_merge_path(struct btrfs_trans_handle *trans,
1826 } 1768 }
1827 1769
1828 ret = btrfs_cow_block(trans, root, eb, parent, slot, 1770 ret = btrfs_cow_block(trans, root, eb, parent, slot,
1829 &eb, 0); 1771 &eb);
1830 BUG_ON(ret); 1772 BUG_ON(ret);
1831 1773
1832 if (root->root_key.objectid == 1774 if (root->root_key.objectid ==
@@ -2139,7 +2081,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
2139 spin_unlock(&root->node_lock); 2081 spin_unlock(&root->node_lock);
2140 2082
2141 ret = btrfs_update_extent_ref(trans, root, lower->start, 2083 ret = btrfs_update_extent_ref(trans, root, lower->start,
2142 lower->start, c->start, 2084 lower->len, lower->start, c->start,
2143 root->root_key.objectid, 2085 root->root_key.objectid,
2144 trans->transid, level - 1); 2086 trans->transid, level - 1);
2145 BUG_ON(ret); 2087 BUG_ON(ret);
@@ -2221,7 +2163,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
2221 ret = insert_new_root(trans, root, path, level + 1); 2163 ret = insert_new_root(trans, root, path, level + 1);
2222 if (ret) 2164 if (ret)
2223 return ret; 2165 return ret;
2224 } else { 2166 } else if (!trans->transaction->delayed_refs.flushing) {
2225 ret = push_nodes_for_insert(trans, root, path, level); 2167 ret = push_nodes_for_insert(trans, root, path, level);
2226 c = path->nodes[level]; 2168 c = path->nodes[level];
2227 if (!ret && btrfs_header_nritems(c) < 2169 if (!ret && btrfs_header_nritems(c) <
@@ -2329,66 +2271,27 @@ noinline int btrfs_leaf_free_space(struct btrfs_root *root,
2329 return ret; 2271 return ret;
2330} 2272}
2331 2273
2332/* 2274static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
2333 * push some data in the path leaf to the right, trying to free up at 2275 struct btrfs_root *root,
2334 * least data_size bytes. returns zero if the push worked, nonzero otherwise 2276 struct btrfs_path *path,
2335 * 2277 int data_size, int empty,
2336 * returns 1 if the push failed because the other node didn't have enough 2278 struct extent_buffer *right,
2337 * room, 0 if everything worked out and < 0 if there were major errors. 2279 int free_space, u32 left_nritems)
2338 */
2339static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
2340 *root, struct btrfs_path *path, int data_size,
2341 int empty)
2342{ 2280{
2343 struct extent_buffer *left = path->nodes[0]; 2281 struct extent_buffer *left = path->nodes[0];
2344 struct extent_buffer *right; 2282 struct extent_buffer *upper = path->nodes[1];
2345 struct extent_buffer *upper;
2346 struct btrfs_disk_key disk_key; 2283 struct btrfs_disk_key disk_key;
2347 int slot; 2284 int slot;
2348 u32 i; 2285 u32 i;
2349 int free_space;
2350 int push_space = 0; 2286 int push_space = 0;
2351 int push_items = 0; 2287 int push_items = 0;
2352 struct btrfs_item *item; 2288 struct btrfs_item *item;
2353 u32 left_nritems;
2354 u32 nr; 2289 u32 nr;
2355 u32 right_nritems; 2290 u32 right_nritems;
2356 u32 data_end; 2291 u32 data_end;
2357 u32 this_item_size; 2292 u32 this_item_size;
2358 int ret; 2293 int ret;
2359 2294
2360 slot = path->slots[1];
2361 if (!path->nodes[1])
2362 return 1;
2363
2364 upper = path->nodes[1];
2365 if (slot >= btrfs_header_nritems(upper) - 1)
2366 return 1;
2367
2368 btrfs_assert_tree_locked(path->nodes[1]);
2369
2370 right = read_node_slot(root, upper, slot + 1);
2371 btrfs_tree_lock(right);
2372 btrfs_set_lock_blocking(right);
2373
2374 free_space = btrfs_leaf_free_space(root, right);
2375 if (free_space < data_size)
2376 goto out_unlock;
2377
2378 /* cow and double check */
2379 ret = btrfs_cow_block(trans, root, right, upper,
2380 slot + 1, &right, 0);
2381 if (ret)
2382 goto out_unlock;
2383
2384 free_space = btrfs_leaf_free_space(root, right);
2385 if (free_space < data_size)
2386 goto out_unlock;
2387
2388 left_nritems = btrfs_header_nritems(left);
2389 if (left_nritems == 0)
2390 goto out_unlock;
2391
2392 if (empty) 2295 if (empty)
2393 nr = 0; 2296 nr = 0;
2394 else 2297 else
@@ -2397,6 +2300,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
2397 if (path->slots[0] >= left_nritems) 2300 if (path->slots[0] >= left_nritems)
2398 push_space += data_size; 2301 push_space += data_size;
2399 2302
2303 slot = path->slots[1];
2400 i = left_nritems - 1; 2304 i = left_nritems - 1;
2401 while (i >= nr) { 2305 while (i >= nr) {
2402 item = btrfs_item_nr(left, i); 2306 item = btrfs_item_nr(left, i);
@@ -2528,24 +2432,82 @@ out_unlock:
2528} 2432}
2529 2433
2530/* 2434/*
2435 * push some data in the path leaf to the right, trying to free up at
2436 * least data_size bytes. returns zero if the push worked, nonzero otherwise
2437 *
2438 * returns 1 if the push failed because the other node didn't have enough
2439 * room, 0 if everything worked out and < 0 if there were major errors.
2440 */
2441static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
2442 *root, struct btrfs_path *path, int data_size,
2443 int empty)
2444{
2445 struct extent_buffer *left = path->nodes[0];
2446 struct extent_buffer *right;
2447 struct extent_buffer *upper;
2448 int slot;
2449 int free_space;
2450 u32 left_nritems;
2451 int ret;
2452
2453 if (!path->nodes[1])
2454 return 1;
2455
2456 slot = path->slots[1];
2457 upper = path->nodes[1];
2458 if (slot >= btrfs_header_nritems(upper) - 1)
2459 return 1;
2460
2461 btrfs_assert_tree_locked(path->nodes[1]);
2462
2463 right = read_node_slot(root, upper, slot + 1);
2464 btrfs_tree_lock(right);
2465 btrfs_set_lock_blocking(right);
2466
2467 free_space = btrfs_leaf_free_space(root, right);
2468 if (free_space < data_size)
2469 goto out_unlock;
2470
2471 /* cow and double check */
2472 ret = btrfs_cow_block(trans, root, right, upper,
2473 slot + 1, &right);
2474 if (ret)
2475 goto out_unlock;
2476
2477 free_space = btrfs_leaf_free_space(root, right);
2478 if (free_space < data_size)
2479 goto out_unlock;
2480
2481 left_nritems = btrfs_header_nritems(left);
2482 if (left_nritems == 0)
2483 goto out_unlock;
2484
2485 return __push_leaf_right(trans, root, path, data_size, empty,
2486 right, free_space, left_nritems);
2487out_unlock:
2488 btrfs_tree_unlock(right);
2489 free_extent_buffer(right);
2490 return 1;
2491}
2492
2493/*
2531 * push some data in the path leaf to the left, trying to free up at 2494 * push some data in the path leaf to the left, trying to free up at
2532 * least data_size bytes. returns zero if the push worked, nonzero otherwise 2495 * least data_size bytes. returns zero if the push worked, nonzero otherwise
2533 */ 2496 */
2534static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root 2497static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
2535 *root, struct btrfs_path *path, int data_size, 2498 struct btrfs_root *root,
2536 int empty) 2499 struct btrfs_path *path, int data_size,
2500 int empty, struct extent_buffer *left,
2501 int free_space, int right_nritems)
2537{ 2502{
2538 struct btrfs_disk_key disk_key; 2503 struct btrfs_disk_key disk_key;
2539 struct extent_buffer *right = path->nodes[0]; 2504 struct extent_buffer *right = path->nodes[0];
2540 struct extent_buffer *left;
2541 int slot; 2505 int slot;
2542 int i; 2506 int i;
2543 int free_space;
2544 int push_space = 0; 2507 int push_space = 0;
2545 int push_items = 0; 2508 int push_items = 0;
2546 struct btrfs_item *item; 2509 struct btrfs_item *item;
2547 u32 old_left_nritems; 2510 u32 old_left_nritems;
2548 u32 right_nritems;
2549 u32 nr; 2511 u32 nr;
2550 int ret = 0; 2512 int ret = 0;
2551 int wret; 2513 int wret;
@@ -2553,41 +2515,6 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
2553 u32 old_left_item_size; 2515 u32 old_left_item_size;
2554 2516
2555 slot = path->slots[1]; 2517 slot = path->slots[1];
2556 if (slot == 0)
2557 return 1;
2558 if (!path->nodes[1])
2559 return 1;
2560
2561 right_nritems = btrfs_header_nritems(right);
2562 if (right_nritems == 0)
2563 return 1;
2564
2565 btrfs_assert_tree_locked(path->nodes[1]);
2566
2567 left = read_node_slot(root, path->nodes[1], slot - 1);
2568 btrfs_tree_lock(left);
2569 btrfs_set_lock_blocking(left);
2570
2571 free_space = btrfs_leaf_free_space(root, left);
2572 if (free_space < data_size) {
2573 ret = 1;
2574 goto out;
2575 }
2576
2577 /* cow and double check */
2578 ret = btrfs_cow_block(trans, root, left,
2579 path->nodes[1], slot - 1, &left, 0);
2580 if (ret) {
2581 /* we hit -ENOSPC, but it isn't fatal here */
2582 ret = 1;
2583 goto out;
2584 }
2585
2586 free_space = btrfs_leaf_free_space(root, left);
2587 if (free_space < data_size) {
2588 ret = 1;
2589 goto out;
2590 }
2591 2518
2592 if (empty) 2519 if (empty)
2593 nr = right_nritems; 2520 nr = right_nritems;
@@ -2755,6 +2682,154 @@ out:
2755} 2682}
2756 2683
2757/* 2684/*
2685 * push some data in the path leaf to the left, trying to free up at
2686 * least data_size bytes. returns zero if the push worked, nonzero otherwise
2687 */
2688static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
2689 *root, struct btrfs_path *path, int data_size,
2690 int empty)
2691{
2692 struct extent_buffer *right = path->nodes[0];
2693 struct extent_buffer *left;
2694 int slot;
2695 int free_space;
2696 u32 right_nritems;
2697 int ret = 0;
2698
2699 slot = path->slots[1];
2700 if (slot == 0)
2701 return 1;
2702 if (!path->nodes[1])
2703 return 1;
2704
2705 right_nritems = btrfs_header_nritems(right);
2706 if (right_nritems == 0)
2707 return 1;
2708
2709 btrfs_assert_tree_locked(path->nodes[1]);
2710
2711 left = read_node_slot(root, path->nodes[1], slot - 1);
2712 btrfs_tree_lock(left);
2713 btrfs_set_lock_blocking(left);
2714
2715 free_space = btrfs_leaf_free_space(root, left);
2716 if (free_space < data_size) {
2717 ret = 1;
2718 goto out;
2719 }
2720
2721 /* cow and double check */
2722 ret = btrfs_cow_block(trans, root, left,
2723 path->nodes[1], slot - 1, &left);
2724 if (ret) {
2725 /* we hit -ENOSPC, but it isn't fatal here */
2726 ret = 1;
2727 goto out;
2728 }
2729
2730 free_space = btrfs_leaf_free_space(root, left);
2731 if (free_space < data_size) {
2732 ret = 1;
2733 goto out;
2734 }
2735
2736 return __push_leaf_left(trans, root, path, data_size,
2737 empty, left, free_space, right_nritems);
2738out:
2739 btrfs_tree_unlock(left);
2740 free_extent_buffer(left);
2741 return ret;
2742}
2743
2744/*
2745 * split the path's leaf in two, making sure there is at least data_size
2746 * available for the resulting leaf level of the path.
2747 *
2748 * returns 0 if all went well and < 0 on failure.
2749 */
2750static noinline int copy_for_split(struct btrfs_trans_handle *trans,
2751 struct btrfs_root *root,
2752 struct btrfs_path *path,
2753 struct extent_buffer *l,
2754 struct extent_buffer *right,
2755 int slot, int mid, int nritems)
2756{
2757 int data_copy_size;
2758 int rt_data_off;
2759 int i;
2760 int ret = 0;
2761 int wret;
2762 struct btrfs_disk_key disk_key;
2763
2764 nritems = nritems - mid;
2765 btrfs_set_header_nritems(right, nritems);
2766 data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l);
2767
2768 copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
2769 btrfs_item_nr_offset(mid),
2770 nritems * sizeof(struct btrfs_item));
2771
2772 copy_extent_buffer(right, l,
2773 btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
2774 data_copy_size, btrfs_leaf_data(l) +
2775 leaf_data_end(root, l), data_copy_size);
2776
2777 rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
2778 btrfs_item_end_nr(l, mid);
2779
2780 for (i = 0; i < nritems; i++) {
2781 struct btrfs_item *item = btrfs_item_nr(right, i);
2782 u32 ioff;
2783
2784 if (!right->map_token) {
2785 map_extent_buffer(right, (unsigned long)item,
2786 sizeof(struct btrfs_item),
2787 &right->map_token, &right->kaddr,
2788 &right->map_start, &right->map_len,
2789 KM_USER1);
2790 }
2791
2792 ioff = btrfs_item_offset(right, item);
2793 btrfs_set_item_offset(right, item, ioff + rt_data_off);
2794 }
2795
2796 if (right->map_token) {
2797 unmap_extent_buffer(right, right->map_token, KM_USER1);
2798 right->map_token = NULL;
2799 }
2800
2801 btrfs_set_header_nritems(l, mid);
2802 ret = 0;
2803 btrfs_item_key(right, &disk_key, 0);
2804 wret = insert_ptr(trans, root, path, &disk_key, right->start,
2805 path->slots[1] + 1, 1);
2806 if (wret)
2807 ret = wret;
2808
2809 btrfs_mark_buffer_dirty(right);
2810 btrfs_mark_buffer_dirty(l);
2811 BUG_ON(path->slots[0] != slot);
2812
2813 ret = btrfs_update_ref(trans, root, l, right, 0, nritems);
2814 BUG_ON(ret);
2815
2816 if (mid <= slot) {
2817 btrfs_tree_unlock(path->nodes[0]);
2818 free_extent_buffer(path->nodes[0]);
2819 path->nodes[0] = right;
2820 path->slots[0] -= mid;
2821 path->slots[1] += 1;
2822 } else {
2823 btrfs_tree_unlock(right);
2824 free_extent_buffer(right);
2825 }
2826
2827 BUG_ON(path->slots[0] < 0);
2828
2829 return ret;
2830}
2831
2832/*
2758 * split the path's leaf in two, making sure there is at least data_size 2833 * split the path's leaf in two, making sure there is at least data_size
2759 * available for the resulting leaf level of the path. 2834 * available for the resulting leaf level of the path.
2760 * 2835 *
@@ -2771,17 +2846,14 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
2771 int mid; 2846 int mid;
2772 int slot; 2847 int slot;
2773 struct extent_buffer *right; 2848 struct extent_buffer *right;
2774 int data_copy_size;
2775 int rt_data_off;
2776 int i;
2777 int ret = 0; 2849 int ret = 0;
2778 int wret; 2850 int wret;
2779 int double_split; 2851 int double_split;
2780 int num_doubles = 0; 2852 int num_doubles = 0;
2781 struct btrfs_disk_key disk_key;
2782 2853
2783 /* first try to make some room by pushing left and right */ 2854 /* first try to make some room by pushing left and right */
2784 if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) { 2855 if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY &&
2856 !trans->transaction->delayed_refs.flushing) {
2785 wret = push_leaf_right(trans, root, path, data_size, 0); 2857 wret = push_leaf_right(trans, root, path, data_size, 0);
2786 if (wret < 0) 2858 if (wret < 0)
2787 return wret; 2859 return wret;
@@ -2830,11 +2902,14 @@ again:
2830 write_extent_buffer(right, root->fs_info->chunk_tree_uuid, 2902 write_extent_buffer(right, root->fs_info->chunk_tree_uuid,
2831 (unsigned long)btrfs_header_chunk_tree_uuid(right), 2903 (unsigned long)btrfs_header_chunk_tree_uuid(right),
2832 BTRFS_UUID_SIZE); 2904 BTRFS_UUID_SIZE);
2905
2833 if (mid <= slot) { 2906 if (mid <= slot) {
2834 if (nritems == 1 || 2907 if (nritems == 1 ||
2835 leaf_space_used(l, mid, nritems - mid) + data_size > 2908 leaf_space_used(l, mid, nritems - mid) + data_size >
2836 BTRFS_LEAF_DATA_SIZE(root)) { 2909 BTRFS_LEAF_DATA_SIZE(root)) {
2837 if (slot >= nritems) { 2910 if (slot >= nritems) {
2911 struct btrfs_disk_key disk_key;
2912
2838 btrfs_cpu_key_to_disk(&disk_key, ins_key); 2913 btrfs_cpu_key_to_disk(&disk_key, ins_key);
2839 btrfs_set_header_nritems(right, 0); 2914 btrfs_set_header_nritems(right, 0);
2840 wret = insert_ptr(trans, root, path, 2915 wret = insert_ptr(trans, root, path,
@@ -2862,6 +2937,8 @@ again:
2862 if (leaf_space_used(l, 0, mid) + data_size > 2937 if (leaf_space_used(l, 0, mid) + data_size >
2863 BTRFS_LEAF_DATA_SIZE(root)) { 2938 BTRFS_LEAF_DATA_SIZE(root)) {
2864 if (!extend && data_size && slot == 0) { 2939 if (!extend && data_size && slot == 0) {
2940 struct btrfs_disk_key disk_key;
2941
2865 btrfs_cpu_key_to_disk(&disk_key, ins_key); 2942 btrfs_cpu_key_to_disk(&disk_key, ins_key);
2866 btrfs_set_header_nritems(right, 0); 2943 btrfs_set_header_nritems(right, 0);
2867 wret = insert_ptr(trans, root, path, 2944 wret = insert_ptr(trans, root, path,
@@ -2894,76 +2971,16 @@ again:
2894 } 2971 }
2895 } 2972 }
2896 } 2973 }
2897 nritems = nritems - mid;
2898 btrfs_set_header_nritems(right, nritems);
2899 data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l);
2900
2901 copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
2902 btrfs_item_nr_offset(mid),
2903 nritems * sizeof(struct btrfs_item));
2904
2905 copy_extent_buffer(right, l,
2906 btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
2907 data_copy_size, btrfs_leaf_data(l) +
2908 leaf_data_end(root, l), data_copy_size);
2909
2910 rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
2911 btrfs_item_end_nr(l, mid);
2912
2913 for (i = 0; i < nritems; i++) {
2914 struct btrfs_item *item = btrfs_item_nr(right, i);
2915 u32 ioff;
2916
2917 if (!right->map_token) {
2918 map_extent_buffer(right, (unsigned long)item,
2919 sizeof(struct btrfs_item),
2920 &right->map_token, &right->kaddr,
2921 &right->map_start, &right->map_len,
2922 KM_USER1);
2923 }
2924
2925 ioff = btrfs_item_offset(right, item);
2926 btrfs_set_item_offset(right, item, ioff + rt_data_off);
2927 }
2928
2929 if (right->map_token) {
2930 unmap_extent_buffer(right, right->map_token, KM_USER1);
2931 right->map_token = NULL;
2932 }
2933
2934 btrfs_set_header_nritems(l, mid);
2935 ret = 0;
2936 btrfs_item_key(right, &disk_key, 0);
2937 wret = insert_ptr(trans, root, path, &disk_key, right->start,
2938 path->slots[1] + 1, 1);
2939 if (wret)
2940 ret = wret;
2941
2942 btrfs_mark_buffer_dirty(right);
2943 btrfs_mark_buffer_dirty(l);
2944 BUG_ON(path->slots[0] != slot);
2945 2974
2946 ret = btrfs_update_ref(trans, root, l, right, 0, nritems); 2975 ret = copy_for_split(trans, root, path, l, right, slot, mid, nritems);
2947 BUG_ON(ret); 2976 BUG_ON(ret);
2948 2977
2949 if (mid <= slot) {
2950 btrfs_tree_unlock(path->nodes[0]);
2951 free_extent_buffer(path->nodes[0]);
2952 path->nodes[0] = right;
2953 path->slots[0] -= mid;
2954 path->slots[1] += 1;
2955 } else {
2956 btrfs_tree_unlock(right);
2957 free_extent_buffer(right);
2958 }
2959
2960 BUG_ON(path->slots[0] < 0);
2961
2962 if (double_split) { 2978 if (double_split) {
2963 BUG_ON(num_doubles != 0); 2979 BUG_ON(num_doubles != 0);
2964 num_doubles++; 2980 num_doubles++;
2965 goto again; 2981 goto again;
2966 } 2982 }
2983
2967 return ret; 2984 return ret;
2968} 2985}
2969 2986
@@ -3021,26 +3038,27 @@ int btrfs_split_item(struct btrfs_trans_handle *trans,
3021 return -EAGAIN; 3038 return -EAGAIN;
3022 } 3039 }
3023 3040
3041 btrfs_set_path_blocking(path);
3024 ret = split_leaf(trans, root, &orig_key, path, 3042 ret = split_leaf(trans, root, &orig_key, path,
3025 sizeof(struct btrfs_item), 1); 3043 sizeof(struct btrfs_item), 1);
3026 path->keep_locks = 0; 3044 path->keep_locks = 0;
3027 BUG_ON(ret); 3045 BUG_ON(ret);
3028 3046
3047 btrfs_unlock_up_safe(path, 1);
3048 leaf = path->nodes[0];
3049 BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item));
3050
3051split:
3029 /* 3052 /*
3030 * make sure any changes to the path from split_leaf leave it 3053 * make sure any changes to the path from split_leaf leave it
3031 * in a blocking state 3054 * in a blocking state
3032 */ 3055 */
3033 btrfs_set_path_blocking(path); 3056 btrfs_set_path_blocking(path);
3034 3057
3035 leaf = path->nodes[0];
3036 BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item));
3037
3038split:
3039 item = btrfs_item_nr(leaf, path->slots[0]); 3058 item = btrfs_item_nr(leaf, path->slots[0]);
3040 orig_offset = btrfs_item_offset(leaf, item); 3059 orig_offset = btrfs_item_offset(leaf, item);
3041 item_size = btrfs_item_size(leaf, item); 3060 item_size = btrfs_item_size(leaf, item);
3042 3061
3043
3044 buf = kmalloc(item_size, GFP_NOFS); 3062 buf = kmalloc(item_size, GFP_NOFS);
3045 read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf, 3063 read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf,
3046 path->slots[0]), item_size); 3064 path->slots[0]), item_size);
@@ -3445,39 +3463,27 @@ out:
3445} 3463}
3446 3464
3447/* 3465/*
3448 * Given a key and some data, insert items into the tree. 3466 * this is a helper for btrfs_insert_empty_items, the main goal here is
3449 * This does all the path init required, making room in the tree if needed. 3467 * to save stack depth by doing the bulk of the work in a function
3468 * that doesn't call btrfs_search_slot
3450 */ 3469 */
3451int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, 3470static noinline_for_stack int
3452 struct btrfs_root *root, 3471setup_items_for_insert(struct btrfs_trans_handle *trans,
3453 struct btrfs_path *path, 3472 struct btrfs_root *root, struct btrfs_path *path,
3454 struct btrfs_key *cpu_key, u32 *data_size, 3473 struct btrfs_key *cpu_key, u32 *data_size,
3455 int nr) 3474 u32 total_data, u32 total_size, int nr)
3456{ 3475{
3457 struct extent_buffer *leaf;
3458 struct btrfs_item *item; 3476 struct btrfs_item *item;
3459 int ret = 0;
3460 int slot;
3461 int slot_orig;
3462 int i; 3477 int i;
3463 u32 nritems; 3478 u32 nritems;
3464 u32 total_size = 0;
3465 u32 total_data = 0;
3466 unsigned int data_end; 3479 unsigned int data_end;
3467 struct btrfs_disk_key disk_key; 3480 struct btrfs_disk_key disk_key;
3481 int ret;
3482 struct extent_buffer *leaf;
3483 int slot;
3468 3484
3469 for (i = 0; i < nr; i++)
3470 total_data += data_size[i];
3471
3472 total_size = total_data + (nr * sizeof(struct btrfs_item));
3473 ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
3474 if (ret == 0)
3475 return -EEXIST;
3476 if (ret < 0)
3477 goto out;
3478
3479 slot_orig = path->slots[0];
3480 leaf = path->nodes[0]; 3485 leaf = path->nodes[0];
3486 slot = path->slots[0];
3481 3487
3482 nritems = btrfs_header_nritems(leaf); 3488 nritems = btrfs_header_nritems(leaf);
3483 data_end = leaf_data_end(root, leaf); 3489 data_end = leaf_data_end(root, leaf);
@@ -3489,9 +3495,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
3489 BUG(); 3495 BUG();
3490 } 3496 }
3491 3497
3492 slot = path->slots[0];
3493 BUG_ON(slot < 0);
3494
3495 if (slot != nritems) { 3498 if (slot != nritems) {
3496 unsigned int old_data = btrfs_item_end_nr(leaf, slot); 3499 unsigned int old_data = btrfs_item_end_nr(leaf, slot);
3497 3500
@@ -3547,21 +3550,60 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
3547 data_end -= data_size[i]; 3550 data_end -= data_size[i];
3548 btrfs_set_item_size(leaf, item, data_size[i]); 3551 btrfs_set_item_size(leaf, item, data_size[i]);
3549 } 3552 }
3553
3550 btrfs_set_header_nritems(leaf, nritems + nr); 3554 btrfs_set_header_nritems(leaf, nritems + nr);
3551 btrfs_mark_buffer_dirty(leaf);
3552 3555
3553 ret = 0; 3556 ret = 0;
3554 if (slot == 0) { 3557 if (slot == 0) {
3558 struct btrfs_disk_key disk_key;
3555 btrfs_cpu_key_to_disk(&disk_key, cpu_key); 3559 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
3556 ret = fixup_low_keys(trans, root, path, &disk_key, 1); 3560 ret = fixup_low_keys(trans, root, path, &disk_key, 1);
3557 } 3561 }
3562 btrfs_unlock_up_safe(path, 1);
3563 btrfs_mark_buffer_dirty(leaf);
3558 3564
3559 if (btrfs_leaf_free_space(root, leaf) < 0) { 3565 if (btrfs_leaf_free_space(root, leaf) < 0) {
3560 btrfs_print_leaf(root, leaf); 3566 btrfs_print_leaf(root, leaf);
3561 BUG(); 3567 BUG();
3562 } 3568 }
3569 return ret;
3570}
3571
3572/*
3573 * Given a key and some data, insert items into the tree.
3574 * This does all the path init required, making room in the tree if needed.
3575 */
3576int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
3577 struct btrfs_root *root,
3578 struct btrfs_path *path,
3579 struct btrfs_key *cpu_key, u32 *data_size,
3580 int nr)
3581{
3582 struct extent_buffer *leaf;
3583 int ret = 0;
3584 int slot;
3585 int i;
3586 u32 total_size = 0;
3587 u32 total_data = 0;
3588
3589 for (i = 0; i < nr; i++)
3590 total_data += data_size[i];
3591
3592 total_size = total_data + (nr * sizeof(struct btrfs_item));
3593 ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
3594 if (ret == 0)
3595 return -EEXIST;
3596 if (ret < 0)
3597 goto out;
3598
3599 leaf = path->nodes[0];
3600 slot = path->slots[0];
3601 BUG_ON(slot < 0);
3602
3603 ret = setup_items_for_insert(trans, root, path, cpu_key, data_size,
3604 total_data, total_size, nr);
3605
3563out: 3606out:
3564 btrfs_unlock_up_safe(path, 1);
3565 return ret; 3607 return ret;
3566} 3608}
3567 3609
@@ -3749,7 +3791,8 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3749 } 3791 }
3750 3792
3751 /* delete the leaf if it is mostly empty */ 3793 /* delete the leaf if it is mostly empty */
3752 if (used < BTRFS_LEAF_DATA_SIZE(root) / 4) { 3794 if (used < BTRFS_LEAF_DATA_SIZE(root) / 4 &&
3795 !trans->transaction->delayed_refs.flushing) {
3753 /* push_leaf_left fixes the path. 3796 /* push_leaf_left fixes the path.
3754 * make sure the path still points to our leaf 3797 * make sure the path still points to our leaf
3755 * for possible call to del_ptr below 3798 * for possible call to del_ptr below
@@ -3757,6 +3800,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3757 slot = path->slots[1]; 3800 slot = path->slots[1];
3758 extent_buffer_get(leaf); 3801 extent_buffer_get(leaf);
3759 3802
3803 btrfs_set_path_blocking(path);
3760 wret = push_leaf_left(trans, root, path, 1, 1); 3804 wret = push_leaf_left(trans, root, path, 1, 1);
3761 if (wret < 0 && wret != -ENOSPC) 3805 if (wret < 0 && wret != -ENOSPC)
3762 ret = wret; 3806 ret = wret;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 5e1d4e30e9d8..9417713542a2 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -45,6 +45,13 @@ struct btrfs_ordered_sum;
45 45
46#define BTRFS_MAX_LEVEL 8 46#define BTRFS_MAX_LEVEL 8
47 47
48/*
49 * files bigger than this get some pre-flushing when they are added
50 * to the ordered operations list. That way we limit the total
51 * work done by the commit
52 */
53#define BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT (8 * 1024 * 1024)
54
48/* holds pointers to all of the tree roots */ 55/* holds pointers to all of the tree roots */
49#define BTRFS_ROOT_TREE_OBJECTID 1ULL 56#define BTRFS_ROOT_TREE_OBJECTID 1ULL
50 57
@@ -401,15 +408,16 @@ struct btrfs_path {
401 int locks[BTRFS_MAX_LEVEL]; 408 int locks[BTRFS_MAX_LEVEL];
402 int reada; 409 int reada;
403 /* keep some upper locks as we walk down */ 410 /* keep some upper locks as we walk down */
404 int keep_locks;
405 int skip_locking;
406 int lowest_level; 411 int lowest_level;
407 412
408 /* 413 /*
409 * set by btrfs_split_item, tells search_slot to keep all locks 414 * set by btrfs_split_item, tells search_slot to keep all locks
410 * and to force calls to keep space in the nodes 415 * and to force calls to keep space in the nodes
411 */ 416 */
412 int search_for_split; 417 unsigned int search_for_split:1;
418 unsigned int keep_locks:1;
419 unsigned int skip_locking:1;
420 unsigned int leave_spinning:1;
413}; 421};
414 422
415/* 423/*
@@ -688,15 +696,18 @@ struct btrfs_fs_info {
688 struct rb_root block_group_cache_tree; 696 struct rb_root block_group_cache_tree;
689 697
690 struct extent_io_tree pinned_extents; 698 struct extent_io_tree pinned_extents;
691 struct extent_io_tree pending_del;
692 struct extent_io_tree extent_ins;
693 699
694 /* logical->physical extent mapping */ 700 /* logical->physical extent mapping */
695 struct btrfs_mapping_tree mapping_tree; 701 struct btrfs_mapping_tree mapping_tree;
696 702
697 u64 generation; 703 u64 generation;
698 u64 last_trans_committed; 704 u64 last_trans_committed;
699 u64 last_trans_new_blockgroup; 705
706 /*
707 * this is updated to the current trans every time a full commit
708 * is required instead of the faster short fsync log commits
709 */
710 u64 last_trans_log_full_commit;
700 u64 open_ioctl_trans; 711 u64 open_ioctl_trans;
701 unsigned long mount_opt; 712 unsigned long mount_opt;
702 u64 max_extent; 713 u64 max_extent;
@@ -717,12 +728,21 @@ struct btrfs_fs_info {
717 struct mutex tree_log_mutex; 728 struct mutex tree_log_mutex;
718 struct mutex transaction_kthread_mutex; 729 struct mutex transaction_kthread_mutex;
719 struct mutex cleaner_mutex; 730 struct mutex cleaner_mutex;
720 struct mutex extent_ins_mutex;
721 struct mutex pinned_mutex; 731 struct mutex pinned_mutex;
722 struct mutex chunk_mutex; 732 struct mutex chunk_mutex;
723 struct mutex drop_mutex; 733 struct mutex drop_mutex;
724 struct mutex volume_mutex; 734 struct mutex volume_mutex;
725 struct mutex tree_reloc_mutex; 735 struct mutex tree_reloc_mutex;
736
737 /*
738 * this protects the ordered operations list only while we are
739 * processing all of the entries on it. This way we make
740 * sure the commit code doesn't find the list temporarily empty
741 * because another function happens to be doing non-waiting preflush
742 * before jumping into the main commit.
743 */
744 struct mutex ordered_operations_mutex;
745
726 struct list_head trans_list; 746 struct list_head trans_list;
727 struct list_head hashers; 747 struct list_head hashers;
728 struct list_head dead_roots; 748 struct list_head dead_roots;
@@ -737,10 +757,29 @@ struct btrfs_fs_info {
737 * ordered extents 757 * ordered extents
738 */ 758 */
739 spinlock_t ordered_extent_lock; 759 spinlock_t ordered_extent_lock;
760
761 /*
762 * all of the data=ordered extents pending writeback
763 * these can span multiple transactions and basically include
764 * every dirty data page that isn't from nodatacow
765 */
740 struct list_head ordered_extents; 766 struct list_head ordered_extents;
767
768 /*
769 * all of the inodes that have delalloc bytes. It is possible for
770 * this list to be empty even when there is still dirty data=ordered
771 * extents waiting to finish IO.
772 */
741 struct list_head delalloc_inodes; 773 struct list_head delalloc_inodes;
742 774
743 /* 775 /*
776 * special rename and truncate targets that must be on disk before
777 * we're allowed to commit. This is basically the ext3 style
778 * data=ordered list.
779 */
780 struct list_head ordered_operations;
781
782 /*
744 * there is a pool of worker threads for checksumming during writes 783 * there is a pool of worker threads for checksumming during writes
745 * and a pool for checksumming after reads. This is because readers 784 * and a pool for checksumming after reads. This is because readers
746 * can run with FS locks held, and the writers may be waiting for 785 * can run with FS locks held, and the writers may be waiting for
@@ -781,6 +820,11 @@ struct btrfs_fs_info {
781 atomic_t throttle_gen; 820 atomic_t throttle_gen;
782 821
783 u64 total_pinned; 822 u64 total_pinned;
823
824 /* protected by the delalloc lock, used to keep from writing
825 * metadata until there is a nice batch
826 */
827 u64 dirty_metadata_bytes;
784 struct list_head dirty_cowonly_roots; 828 struct list_head dirty_cowonly_roots;
785 829
786 struct btrfs_fs_devices *fs_devices; 830 struct btrfs_fs_devices *fs_devices;
@@ -1704,18 +1748,15 @@ static inline struct dentry *fdentry(struct file *file)
1704} 1748}
1705 1749
1706/* extent-tree.c */ 1750/* extent-tree.c */
1751int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
1752 struct btrfs_root *root, unsigned long count);
1707int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); 1753int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len);
1708int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans,
1709 struct btrfs_root *root, u64 bytenr,
1710 u64 num_bytes, u32 *refs);
1711int btrfs_update_pinned_extents(struct btrfs_root *root, 1754int btrfs_update_pinned_extents(struct btrfs_root *root,
1712 u64 bytenr, u64 num, int pin); 1755 u64 bytenr, u64 num, int pin);
1713int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, 1756int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
1714 struct btrfs_root *root, struct extent_buffer *leaf); 1757 struct btrfs_root *root, struct extent_buffer *leaf);
1715int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, 1758int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
1716 struct btrfs_root *root, u64 objectid, u64 bytenr); 1759 struct btrfs_root *root, u64 objectid, u64 bytenr);
1717int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
1718 struct btrfs_root *root);
1719int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy); 1760int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy);
1720struct btrfs_block_group_cache *btrfs_lookup_block_group( 1761struct btrfs_block_group_cache *btrfs_lookup_block_group(
1721 struct btrfs_fs_info *info, 1762 struct btrfs_fs_info *info,
@@ -1777,7 +1818,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1777 u64 root_objectid, u64 ref_generation, 1818 u64 root_objectid, u64 ref_generation,
1778 u64 owner_objectid); 1819 u64 owner_objectid);
1779int btrfs_update_extent_ref(struct btrfs_trans_handle *trans, 1820int btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
1780 struct btrfs_root *root, u64 bytenr, 1821 struct btrfs_root *root, u64 bytenr, u64 num_bytes,
1781 u64 orig_parent, u64 parent, 1822 u64 orig_parent, u64 parent,
1782 u64 root_objectid, u64 ref_generation, 1823 u64 root_objectid, u64 ref_generation,
1783 u64 owner_objectid); 1824 u64 owner_objectid);
@@ -1838,7 +1879,7 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
1838int btrfs_cow_block(struct btrfs_trans_handle *trans, 1879int btrfs_cow_block(struct btrfs_trans_handle *trans,
1839 struct btrfs_root *root, struct extent_buffer *buf, 1880 struct btrfs_root *root, struct extent_buffer *buf,
1840 struct extent_buffer *parent, int parent_slot, 1881 struct extent_buffer *parent, int parent_slot,
1841 struct extent_buffer **cow_ret, u64 prealloc_dest); 1882 struct extent_buffer **cow_ret);
1842int btrfs_copy_root(struct btrfs_trans_handle *trans, 1883int btrfs_copy_root(struct btrfs_trans_handle *trans,
1843 struct btrfs_root *root, 1884 struct btrfs_root *root,
1844 struct extent_buffer *buf, 1885 struct extent_buffer *buf,
@@ -2060,7 +2101,7 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
2060unsigned long btrfs_force_ra(struct address_space *mapping, 2101unsigned long btrfs_force_ra(struct address_space *mapping,
2061 struct file_ra_state *ra, struct file *file, 2102 struct file_ra_state *ra, struct file *file,
2062 pgoff_t offset, pgoff_t last_index); 2103 pgoff_t offset, pgoff_t last_index);
2063int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page); 2104int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
2064int btrfs_readpage(struct file *file, struct page *page); 2105int btrfs_readpage(struct file *file, struct page *page);
2065void btrfs_delete_inode(struct inode *inode); 2106void btrfs_delete_inode(struct inode *inode);
2066void btrfs_put_inode(struct inode *inode); 2107void btrfs_put_inode(struct inode *inode);
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
new file mode 100644
index 000000000000..cbf7dc8ae3ec
--- /dev/null
+++ b/fs/btrfs/delayed-ref.c
@@ -0,0 +1,669 @@
1/*
2 * Copyright (C) 2009 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/sched.h>
20#include <linux/sort.h>
21#include <linux/ftrace.h>
22#include "ctree.h"
23#include "delayed-ref.h"
24#include "transaction.h"
25
26/*
27 * delayed back reference update tracking. For subvolume trees
28 * we queue up extent allocations and backref maintenance for
29 * delayed processing. This avoids deep call chains where we
30 * add extents in the middle of btrfs_search_slot, and it allows
31 * us to buffer up frequently modified backrefs in an rb tree instead
32 * of hammering updates on the extent allocation tree.
33 *
34 * Right now this code is only used for reference counted trees, but
35 * the long term goal is to get rid of the similar code for delayed
36 * extent tree modifications.
37 */
38
39/*
40 * entries in the rb tree are ordered by the byte number of the extent
41 * and by the byte number of the parent block.
42 */
43static int comp_entry(struct btrfs_delayed_ref_node *ref,
44 u64 bytenr, u64 parent)
45{
46 if (bytenr < ref->bytenr)
47 return -1;
48 if (bytenr > ref->bytenr)
49 return 1;
50 if (parent < ref->parent)
51 return -1;
52 if (parent > ref->parent)
53 return 1;
54 return 0;
55}
56
57/*
58 * insert a new ref into the rbtree. This returns any existing refs
59 * for the same (bytenr,parent) tuple, or NULL if the new node was properly
60 * inserted.
61 */
62static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
63 u64 bytenr, u64 parent,
64 struct rb_node *node)
65{
66 struct rb_node **p = &root->rb_node;
67 struct rb_node *parent_node = NULL;
68 struct btrfs_delayed_ref_node *entry;
69 int cmp;
70
71 while (*p) {
72 parent_node = *p;
73 entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
74 rb_node);
75
76 cmp = comp_entry(entry, bytenr, parent);
77 if (cmp < 0)
78 p = &(*p)->rb_left;
79 else if (cmp > 0)
80 p = &(*p)->rb_right;
81 else
82 return entry;
83 }
84
85 entry = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
86 rb_link_node(node, parent_node, p);
87 rb_insert_color(node, root);
88 return NULL;
89}
90
91/*
92 * find an entry based on (bytenr,parent). This returns the delayed
93 * ref if it was able to find one, or NULL if nothing was in that spot
94 */
95static struct btrfs_delayed_ref_node *tree_search(struct rb_root *root,
96 u64 bytenr, u64 parent,
97 struct btrfs_delayed_ref_node **last)
98{
99 struct rb_node *n = root->rb_node;
100 struct btrfs_delayed_ref_node *entry;
101 int cmp;
102
103 while (n) {
104 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
105 WARN_ON(!entry->in_tree);
106 if (last)
107 *last = entry;
108
109 cmp = comp_entry(entry, bytenr, parent);
110 if (cmp < 0)
111 n = n->rb_left;
112 else if (cmp > 0)
113 n = n->rb_right;
114 else
115 return entry;
116 }
117 return NULL;
118}
119
120int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
121 struct btrfs_delayed_ref_head *head)
122{
123 struct btrfs_delayed_ref_root *delayed_refs;
124
125 delayed_refs = &trans->transaction->delayed_refs;
126 assert_spin_locked(&delayed_refs->lock);
127 if (mutex_trylock(&head->mutex))
128 return 0;
129
130 atomic_inc(&head->node.refs);
131 spin_unlock(&delayed_refs->lock);
132
133 mutex_lock(&head->mutex);
134 spin_lock(&delayed_refs->lock);
135 if (!head->node.in_tree) {
136 mutex_unlock(&head->mutex);
137 btrfs_put_delayed_ref(&head->node);
138 return -EAGAIN;
139 }
140 btrfs_put_delayed_ref(&head->node);
141 return 0;
142}
143
144int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
145 struct list_head *cluster, u64 start)
146{
147 int count = 0;
148 struct btrfs_delayed_ref_root *delayed_refs;
149 struct rb_node *node;
150 struct btrfs_delayed_ref_node *ref;
151 struct btrfs_delayed_ref_head *head;
152
153 delayed_refs = &trans->transaction->delayed_refs;
154 if (start == 0) {
155 node = rb_first(&delayed_refs->root);
156 } else {
157 ref = NULL;
158 tree_search(&delayed_refs->root, start, (u64)-1, &ref);
159 if (ref) {
160 struct btrfs_delayed_ref_node *tmp;
161
162 node = rb_prev(&ref->rb_node);
163 while (node) {
164 tmp = rb_entry(node,
165 struct btrfs_delayed_ref_node,
166 rb_node);
167 if (tmp->bytenr < start)
168 break;
169 ref = tmp;
170 node = rb_prev(&ref->rb_node);
171 }
172 node = &ref->rb_node;
173 } else
174 node = rb_first(&delayed_refs->root);
175 }
176again:
177 while (node && count < 32) {
178 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
179 if (btrfs_delayed_ref_is_head(ref)) {
180 head = btrfs_delayed_node_to_head(ref);
181 if (list_empty(&head->cluster)) {
182 list_add_tail(&head->cluster, cluster);
183 delayed_refs->run_delayed_start =
184 head->node.bytenr;
185 count++;
186
187 WARN_ON(delayed_refs->num_heads_ready == 0);
188 delayed_refs->num_heads_ready--;
189 } else if (count) {
190 /* the goal of the clustering is to find extents
191 * that are likely to end up in the same extent
192 * leaf on disk. So, we don't want them spread
193 * all over the tree. Stop now if we've hit
194 * a head that was already in use
195 */
196 break;
197 }
198 }
199 node = rb_next(node);
200 }
201 if (count) {
202 return 0;
203 } else if (start) {
204 /*
205 * we've gone to the end of the rbtree without finding any
206 * clusters. start from the beginning and try again
207 */
208 start = 0;
209 node = rb_first(&delayed_refs->root);
210 goto again;
211 }
212 return 1;
213}
214
215/*
216 * This checks to see if there are any delayed refs in the
217 * btree for a given bytenr. It returns one if it finds any
218 * and zero otherwise.
219 *
220 * If it only finds a head node, it returns 0.
221 *
222 * The idea is to use this when deciding if you can safely delete an
223 * extent from the extent allocation tree. There may be a pending
224 * ref in the rbtree that adds or removes references, so as long as this
225 * returns one you need to leave the BTRFS_EXTENT_ITEM in the extent
226 * allocation tree.
227 */
228int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr)
229{
230 struct btrfs_delayed_ref_node *ref;
231 struct btrfs_delayed_ref_root *delayed_refs;
232 struct rb_node *prev_node;
233 int ret = 0;
234
235 delayed_refs = &trans->transaction->delayed_refs;
236 spin_lock(&delayed_refs->lock);
237
238 ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL);
239 if (ref) {
240 prev_node = rb_prev(&ref->rb_node);
241 if (!prev_node)
242 goto out;
243 ref = rb_entry(prev_node, struct btrfs_delayed_ref_node,
244 rb_node);
245 if (ref->bytenr == bytenr)
246 ret = 1;
247 }
248out:
249 spin_unlock(&delayed_refs->lock);
250 return ret;
251}
252
253/*
254 * helper function to lookup reference count
255 *
256 * the head node for delayed ref is used to store the sum of all the
257 * reference count modifications queued up in the rbtree. This way you
258 * can check to see what the reference count would be if all of the
259 * delayed refs are processed.
260 */
261int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans,
262 struct btrfs_root *root, u64 bytenr,
263 u64 num_bytes, u32 *refs)
264{
265 struct btrfs_delayed_ref_node *ref;
266 struct btrfs_delayed_ref_head *head;
267 struct btrfs_delayed_ref_root *delayed_refs;
268 struct btrfs_path *path;
269 struct extent_buffer *leaf;
270 struct btrfs_extent_item *ei;
271 struct btrfs_key key;
272 u32 num_refs;
273 int ret;
274
275 path = btrfs_alloc_path();
276 if (!path)
277 return -ENOMEM;
278
279 key.objectid = bytenr;
280 key.type = BTRFS_EXTENT_ITEM_KEY;
281 key.offset = num_bytes;
282 delayed_refs = &trans->transaction->delayed_refs;
283again:
284 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
285 &key, path, 0, 0);
286 if (ret < 0)
287 goto out;
288
289 if (ret == 0) {
290 leaf = path->nodes[0];
291 ei = btrfs_item_ptr(leaf, path->slots[0],
292 struct btrfs_extent_item);
293 num_refs = btrfs_extent_refs(leaf, ei);
294 } else {
295 num_refs = 0;
296 ret = 0;
297 }
298
299 spin_lock(&delayed_refs->lock);
300 ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL);
301 if (ref) {
302 head = btrfs_delayed_node_to_head(ref);
303 if (mutex_trylock(&head->mutex)) {
304 num_refs += ref->ref_mod;
305 mutex_unlock(&head->mutex);
306 *refs = num_refs;
307 goto out;
308 }
309
310 atomic_inc(&ref->refs);
311 spin_unlock(&delayed_refs->lock);
312
313 btrfs_release_path(root->fs_info->extent_root, path);
314
315 mutex_lock(&head->mutex);
316 mutex_unlock(&head->mutex);
317 btrfs_put_delayed_ref(ref);
318 goto again;
319 } else {
320 *refs = num_refs;
321 }
322out:
323 spin_unlock(&delayed_refs->lock);
324 btrfs_free_path(path);
325 return ret;
326}
327
328/*
329 * helper function to update an extent delayed ref in the
330 * rbtree. existing and update must both have the same
331 * bytenr and parent
332 *
333 * This may free existing if the update cancels out whatever
334 * operation it was doing.
335 */
336static noinline void
337update_existing_ref(struct btrfs_trans_handle *trans,
338 struct btrfs_delayed_ref_root *delayed_refs,
339 struct btrfs_delayed_ref_node *existing,
340 struct btrfs_delayed_ref_node *update)
341{
342 struct btrfs_delayed_ref *existing_ref;
343 struct btrfs_delayed_ref *ref;
344
345 existing_ref = btrfs_delayed_node_to_ref(existing);
346 ref = btrfs_delayed_node_to_ref(update);
347
348 if (ref->pin)
349 existing_ref->pin = 1;
350
351 if (ref->action != existing_ref->action) {
352 /*
353 * this is effectively undoing either an add or a
354 * drop. We decrement the ref_mod, and if it goes
355 * down to zero we just delete the entry without
356 * every changing the extent allocation tree.
357 */
358 existing->ref_mod--;
359 if (existing->ref_mod == 0) {
360 rb_erase(&existing->rb_node,
361 &delayed_refs->root);
362 existing->in_tree = 0;
363 btrfs_put_delayed_ref(existing);
364 delayed_refs->num_entries--;
365 if (trans->delayed_ref_updates)
366 trans->delayed_ref_updates--;
367 }
368 } else {
369 if (existing_ref->action == BTRFS_ADD_DELAYED_REF) {
370 /* if we're adding refs, make sure all the
371 * details match up. The extent could
372 * have been totally freed and reallocated
373 * by a different owner before the delayed
374 * ref entries were removed.
375 */
376 existing_ref->owner_objectid = ref->owner_objectid;
377 existing_ref->generation = ref->generation;
378 existing_ref->root = ref->root;
379 existing->num_bytes = update->num_bytes;
380 }
381 /*
382 * the action on the existing ref matches
383 * the action on the ref we're trying to add.
384 * Bump the ref_mod by one so the backref that
385 * is eventually added/removed has the correct
386 * reference count
387 */
388 existing->ref_mod += update->ref_mod;
389 }
390}
391
392/*
393 * helper function to update the accounting in the head ref
394 * existing and update must have the same bytenr
395 */
396static noinline void
397update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
398 struct btrfs_delayed_ref_node *update)
399{
400 struct btrfs_delayed_ref_head *existing_ref;
401 struct btrfs_delayed_ref_head *ref;
402
403 existing_ref = btrfs_delayed_node_to_head(existing);
404 ref = btrfs_delayed_node_to_head(update);
405
406 if (ref->must_insert_reserved) {
407 /* if the extent was freed and then
408 * reallocated before the delayed ref
409 * entries were processed, we can end up
410 * with an existing head ref without
411 * the must_insert_reserved flag set.
412 * Set it again here
413 */
414 existing_ref->must_insert_reserved = ref->must_insert_reserved;
415
416 /*
417 * update the num_bytes so we make sure the accounting
418 * is done correctly
419 */
420 existing->num_bytes = update->num_bytes;
421
422 }
423
424 /*
425 * update the reference mod on the head to reflect this new operation
426 */
427 existing->ref_mod += update->ref_mod;
428}
429
430/*
431 * helper function to actually insert a delayed ref into the rbtree.
432 * this does all the dirty work in terms of maintaining the correct
433 * overall modification count in the head node and properly dealing
434 * with updating existing nodes as new modifications are queued.
435 */
436static noinline int __btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
437 struct btrfs_delayed_ref_node *ref,
438 u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root,
439 u64 ref_generation, u64 owner_objectid, int action,
440 int pin)
441{
442 struct btrfs_delayed_ref_node *existing;
443 struct btrfs_delayed_ref *full_ref;
444 struct btrfs_delayed_ref_head *head_ref = NULL;
445 struct btrfs_delayed_ref_root *delayed_refs;
446 int count_mod = 1;
447 int must_insert_reserved = 0;
448
449 /*
450 * the head node stores the sum of all the mods, so dropping a ref
451 * should drop the sum in the head node by one.
452 */
453 if (parent == (u64)-1) {
454 if (action == BTRFS_DROP_DELAYED_REF)
455 count_mod = -1;
456 else if (action == BTRFS_UPDATE_DELAYED_HEAD)
457 count_mod = 0;
458 }
459
460 /*
461 * BTRFS_ADD_DELAYED_EXTENT means that we need to update
462 * the reserved accounting when the extent is finally added, or
463 * if a later modification deletes the delayed ref without ever
464 * inserting the extent into the extent allocation tree.
465 * ref->must_insert_reserved is the flag used to record
466 * that accounting mods are required.
467 *
468 * Once we record must_insert_reserved, switch the action to
469 * BTRFS_ADD_DELAYED_REF because other special casing is not required.
470 */
471 if (action == BTRFS_ADD_DELAYED_EXTENT) {
472 must_insert_reserved = 1;
473 action = BTRFS_ADD_DELAYED_REF;
474 } else {
475 must_insert_reserved = 0;
476 }
477
478
479 delayed_refs = &trans->transaction->delayed_refs;
480
481 /* first set the basic ref node struct up */
482 atomic_set(&ref->refs, 1);
483 ref->bytenr = bytenr;
484 ref->parent = parent;
485 ref->ref_mod = count_mod;
486 ref->in_tree = 1;
487 ref->num_bytes = num_bytes;
488
489 if (btrfs_delayed_ref_is_head(ref)) {
490 head_ref = btrfs_delayed_node_to_head(ref);
491 head_ref->must_insert_reserved = must_insert_reserved;
492 INIT_LIST_HEAD(&head_ref->cluster);
493 mutex_init(&head_ref->mutex);
494 } else {
495 full_ref = btrfs_delayed_node_to_ref(ref);
496 full_ref->root = ref_root;
497 full_ref->generation = ref_generation;
498 full_ref->owner_objectid = owner_objectid;
499 full_ref->pin = pin;
500 full_ref->action = action;
501 }
502
503 existing = tree_insert(&delayed_refs->root, bytenr,
504 parent, &ref->rb_node);
505
506 if (existing) {
507 if (btrfs_delayed_ref_is_head(ref))
508 update_existing_head_ref(existing, ref);
509 else
510 update_existing_ref(trans, delayed_refs, existing, ref);
511
512 /*
513 * we've updated the existing ref, free the newly
514 * allocated ref
515 */
516 kfree(ref);
517 } else {
518 if (btrfs_delayed_ref_is_head(ref)) {
519 delayed_refs->num_heads++;
520 delayed_refs->num_heads_ready++;
521 }
522 delayed_refs->num_entries++;
523 trans->delayed_ref_updates++;
524 }
525 return 0;
526}
527
528/*
529 * add a delayed ref to the tree. This does all of the accounting required
530 * to make sure the delayed ref is eventually processed before this
531 * transaction commits.
532 */
533int btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
534 u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root,
535 u64 ref_generation, u64 owner_objectid, int action,
536 int pin)
537{
538 struct btrfs_delayed_ref *ref;
539 struct btrfs_delayed_ref_head *head_ref;
540 struct btrfs_delayed_ref_root *delayed_refs;
541 int ret;
542
543 ref = kmalloc(sizeof(*ref), GFP_NOFS);
544 if (!ref)
545 return -ENOMEM;
546
547 /*
548 * the parent = 0 case comes from cases where we don't actually
549 * know the parent yet. It will get updated later via a add/drop
550 * pair.
551 */
552 if (parent == 0)
553 parent = bytenr;
554
555 head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
556 if (!head_ref) {
557 kfree(ref);
558 return -ENOMEM;
559 }
560 delayed_refs = &trans->transaction->delayed_refs;
561 spin_lock(&delayed_refs->lock);
562
563 /*
564 * insert both the head node and the new ref without dropping
565 * the spin lock
566 */
567 ret = __btrfs_add_delayed_ref(trans, &head_ref->node, bytenr, num_bytes,
568 (u64)-1, 0, 0, 0, action, pin);
569 BUG_ON(ret);
570
571 ret = __btrfs_add_delayed_ref(trans, &ref->node, bytenr, num_bytes,
572 parent, ref_root, ref_generation,
573 owner_objectid, action, pin);
574 BUG_ON(ret);
575 spin_unlock(&delayed_refs->lock);
576 return 0;
577}
578
579/*
580 * this does a simple search for the head node for a given extent.
581 * It must be called with the delayed ref spinlock held, and it returns
582 * the head node if any where found, or NULL if not.
583 */
584struct btrfs_delayed_ref_head *
585btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr)
586{
587 struct btrfs_delayed_ref_node *ref;
588 struct btrfs_delayed_ref_root *delayed_refs;
589
590 delayed_refs = &trans->transaction->delayed_refs;
591 ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL);
592 if (ref)
593 return btrfs_delayed_node_to_head(ref);
594 return NULL;
595}
596
597/*
598 * add a delayed ref to the tree. This does all of the accounting required
599 * to make sure the delayed ref is eventually processed before this
600 * transaction commits.
601 *
602 * The main point of this call is to add and remove a backreference in a single
603 * shot, taking the lock only once, and only searching for the head node once.
604 *
605 * It is the same as doing a ref add and delete in two separate calls.
606 */
607int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans,
608 u64 bytenr, u64 num_bytes, u64 orig_parent,
609 u64 parent, u64 orig_ref_root, u64 ref_root,
610 u64 orig_ref_generation, u64 ref_generation,
611 u64 owner_objectid, int pin)
612{
613 struct btrfs_delayed_ref *ref;
614 struct btrfs_delayed_ref *old_ref;
615 struct btrfs_delayed_ref_head *head_ref;
616 struct btrfs_delayed_ref_root *delayed_refs;
617 int ret;
618
619 ref = kmalloc(sizeof(*ref), GFP_NOFS);
620 if (!ref)
621 return -ENOMEM;
622
623 old_ref = kmalloc(sizeof(*old_ref), GFP_NOFS);
624 if (!old_ref) {
625 kfree(ref);
626 return -ENOMEM;
627 }
628
629 /*
630 * the parent = 0 case comes from cases where we don't actually
631 * know the parent yet. It will get updated later via a add/drop
632 * pair.
633 */
634 if (parent == 0)
635 parent = bytenr;
636 if (orig_parent == 0)
637 orig_parent = bytenr;
638
639 head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
640 if (!head_ref) {
641 kfree(ref);
642 kfree(old_ref);
643 return -ENOMEM;
644 }
645 delayed_refs = &trans->transaction->delayed_refs;
646 spin_lock(&delayed_refs->lock);
647
648 /*
649 * insert both the head node and the new ref without dropping
650 * the spin lock
651 */
652 ret = __btrfs_add_delayed_ref(trans, &head_ref->node, bytenr, num_bytes,
653 (u64)-1, 0, 0, 0,
654 BTRFS_UPDATE_DELAYED_HEAD, 0);
655 BUG_ON(ret);
656
657 ret = __btrfs_add_delayed_ref(trans, &ref->node, bytenr, num_bytes,
658 parent, ref_root, ref_generation,
659 owner_objectid, BTRFS_ADD_DELAYED_REF, 0);
660 BUG_ON(ret);
661
662 ret = __btrfs_add_delayed_ref(trans, &old_ref->node, bytenr, num_bytes,
663 orig_parent, orig_ref_root,
664 orig_ref_generation, owner_objectid,
665 BTRFS_DROP_DELAYED_REF, pin);
666 BUG_ON(ret);
667 spin_unlock(&delayed_refs->lock);
668 return 0;
669}
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
new file mode 100644
index 000000000000..3bec2ff0b15c
--- /dev/null
+++ b/fs/btrfs/delayed-ref.h
@@ -0,0 +1,193 @@
1/*
2 * Copyright (C) 2008 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18#ifndef __DELAYED_REF__
19#define __DELAYED_REF__
20
21/* these are the possible values of struct btrfs_delayed_ref->action */
22#define BTRFS_ADD_DELAYED_REF 1 /* add one backref to the tree */
23#define BTRFS_DROP_DELAYED_REF 2 /* delete one backref from the tree */
24#define BTRFS_ADD_DELAYED_EXTENT 3 /* record a full extent allocation */
25#define BTRFS_UPDATE_DELAYED_HEAD 4 /* not changing ref count on head ref */
26
27struct btrfs_delayed_ref_node {
28 struct rb_node rb_node;
29
30 /* the starting bytenr of the extent */
31 u64 bytenr;
32
33 /* the parent our backref will point to */
34 u64 parent;
35
36 /* the size of the extent */
37 u64 num_bytes;
38
39 /* ref count on this data structure */
40 atomic_t refs;
41
42 /*
43 * how many refs is this entry adding or deleting. For
44 * head refs, this may be a negative number because it is keeping
45 * track of the total mods done to the reference count.
46 * For individual refs, this will always be a positive number
47 *
48 * It may be more than one, since it is possible for a single
49 * parent to have more than one ref on an extent
50 */
51 int ref_mod;
52
53 /* is this node still in the rbtree? */
54 unsigned int in_tree:1;
55};
56
57/*
58 * the head refs are used to hold a lock on a given extent, which allows us
59 * to make sure that only one process is running the delayed refs
60 * at a time for a single extent. They also store the sum of all the
61 * reference count modifications we've queued up.
62 */
63struct btrfs_delayed_ref_head {
64 struct btrfs_delayed_ref_node node;
65
66 /*
67 * the mutex is held while running the refs, and it is also
68 * held when checking the sum of reference modifications.
69 */
70 struct mutex mutex;
71
72 struct list_head cluster;
73
74 /*
75 * when a new extent is allocated, it is just reserved in memory
76 * The actual extent isn't inserted into the extent allocation tree
77 * until the delayed ref is processed. must_insert_reserved is
78 * used to flag a delayed ref so the accounting can be updated
79 * when a full insert is done.
80 *
81 * It is possible the extent will be freed before it is ever
82 * inserted into the extent allocation tree. In this case
83 * we need to update the in ram accounting to properly reflect
84 * the free has happened.
85 */
86 unsigned int must_insert_reserved:1;
87};
88
89struct btrfs_delayed_ref {
90 struct btrfs_delayed_ref_node node;
91
92 /* the root objectid our ref will point to */
93 u64 root;
94
95 /* the generation for the backref */
96 u64 generation;
97
98 /* owner_objectid of the backref */
99 u64 owner_objectid;
100
101 /* operation done by this entry in the rbtree */
102 u8 action;
103
104 /* if pin == 1, when the extent is freed it will be pinned until
105 * transaction commit
106 */
107 unsigned int pin:1;
108};
109
110struct btrfs_delayed_ref_root {
111 struct rb_root root;
112
113 /* this spin lock protects the rbtree and the entries inside */
114 spinlock_t lock;
115
116 /* how many delayed ref updates we've queued, used by the
117 * throttling code
118 */
119 unsigned long num_entries;
120
121 /* total number of head nodes in tree */
122 unsigned long num_heads;
123
124 /* total number of head nodes ready for processing */
125 unsigned long num_heads_ready;
126
127 /*
128 * set when the tree is flushing before a transaction commit,
129 * used by the throttling code to decide if new updates need
130 * to be run right away
131 */
132 int flushing;
133
134 u64 run_delayed_start;
135};
136
137static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
138{
139 WARN_ON(atomic_read(&ref->refs) == 0);
140 if (atomic_dec_and_test(&ref->refs)) {
141 WARN_ON(ref->in_tree);
142 kfree(ref);
143 }
144}
145
146int btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
147 u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root,
148 u64 ref_generation, u64 owner_objectid, int action,
149 int pin);
150
151struct btrfs_delayed_ref_head *
152btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr);
153int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr);
154int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans,
155 struct btrfs_root *root, u64 bytenr,
156 u64 num_bytes, u32 *refs);
157int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans,
158 u64 bytenr, u64 num_bytes, u64 orig_parent,
159 u64 parent, u64 orig_ref_root, u64 ref_root,
160 u64 orig_ref_generation, u64 ref_generation,
161 u64 owner_objectid, int pin);
162int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
163 struct btrfs_delayed_ref_head *head);
164int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
165 struct list_head *cluster, u64 search_start);
166/*
167 * a node might live in a head or a regular ref, this lets you
168 * test for the proper type to use.
169 */
170static int btrfs_delayed_ref_is_head(struct btrfs_delayed_ref_node *node)
171{
172 return node->parent == (u64)-1;
173}
174
175/*
176 * helper functions to cast a node into its container
177 */
178static inline struct btrfs_delayed_ref *
179btrfs_delayed_node_to_ref(struct btrfs_delayed_ref_node *node)
180{
181 WARN_ON(btrfs_delayed_ref_is_head(node));
182 return container_of(node, struct btrfs_delayed_ref, node);
183
184}
185
186static inline struct btrfs_delayed_ref_head *
187btrfs_delayed_node_to_head(struct btrfs_delayed_ref_node *node)
188{
189 WARN_ON(!btrfs_delayed_ref_is_head(node));
190 return container_of(node, struct btrfs_delayed_ref_head, node);
191
192}
193#endif
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index 926a0b287a7d..1d70236ba00c 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -145,7 +145,10 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
145 key.objectid = dir; 145 key.objectid = dir;
146 btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); 146 btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY);
147 key.offset = btrfs_name_hash(name, name_len); 147 key.offset = btrfs_name_hash(name, name_len);
148
148 path = btrfs_alloc_path(); 149 path = btrfs_alloc_path();
150 path->leave_spinning = 1;
151
149 data_size = sizeof(*dir_item) + name_len; 152 data_size = sizeof(*dir_item) + name_len;
150 dir_item = insert_with_overflow(trans, root, path, &key, data_size, 153 dir_item = insert_with_overflow(trans, root, path, &key, data_size,
151 name, name_len); 154 name, name_len);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 6ec80c0fc869..92d73929d381 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -668,14 +668,31 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
668static int btree_writepage(struct page *page, struct writeback_control *wbc) 668static int btree_writepage(struct page *page, struct writeback_control *wbc)
669{ 669{
670 struct extent_io_tree *tree; 670 struct extent_io_tree *tree;
671 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
672 struct extent_buffer *eb;
673 int was_dirty;
674
671 tree = &BTRFS_I(page->mapping->host)->io_tree; 675 tree = &BTRFS_I(page->mapping->host)->io_tree;
676 if (!(current->flags & PF_MEMALLOC)) {
677 return extent_write_full_page(tree, page,
678 btree_get_extent, wbc);
679 }
672 680
673 if (current->flags & PF_MEMALLOC) { 681 redirty_page_for_writepage(wbc, page);
674 redirty_page_for_writepage(wbc, page); 682 eb = btrfs_find_tree_block(root, page_offset(page),
675 unlock_page(page); 683 PAGE_CACHE_SIZE);
676 return 0; 684 WARN_ON(!eb);
685
686 was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
687 if (!was_dirty) {
688 spin_lock(&root->fs_info->delalloc_lock);
689 root->fs_info->dirty_metadata_bytes += PAGE_CACHE_SIZE;
690 spin_unlock(&root->fs_info->delalloc_lock);
677 } 691 }
678 return extent_write_full_page(tree, page, btree_get_extent, wbc); 692 free_extent_buffer(eb);
693
694 unlock_page(page);
695 return 0;
679} 696}
680 697
681static int btree_writepages(struct address_space *mapping, 698static int btree_writepages(struct address_space *mapping,
@@ -684,15 +701,15 @@ static int btree_writepages(struct address_space *mapping,
684 struct extent_io_tree *tree; 701 struct extent_io_tree *tree;
685 tree = &BTRFS_I(mapping->host)->io_tree; 702 tree = &BTRFS_I(mapping->host)->io_tree;
686 if (wbc->sync_mode == WB_SYNC_NONE) { 703 if (wbc->sync_mode == WB_SYNC_NONE) {
704 struct btrfs_root *root = BTRFS_I(mapping->host)->root;
687 u64 num_dirty; 705 u64 num_dirty;
688 u64 start = 0;
689 unsigned long thresh = 32 * 1024 * 1024; 706 unsigned long thresh = 32 * 1024 * 1024;
690 707
691 if (wbc->for_kupdate) 708 if (wbc->for_kupdate)
692 return 0; 709 return 0;
693 710
694 num_dirty = count_range_bits(tree, &start, (u64)-1, 711 /* this is a bit racy, but that's ok */
695 thresh, EXTENT_DIRTY); 712 num_dirty = root->fs_info->dirty_metadata_bytes;
696 if (num_dirty < thresh) 713 if (num_dirty < thresh)
697 return 0; 714 return 0;
698 } 715 }
@@ -859,9 +876,17 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
859 root->fs_info->running_transaction->transid) { 876 root->fs_info->running_transaction->transid) {
860 btrfs_assert_tree_locked(buf); 877 btrfs_assert_tree_locked(buf);
861 878
862 /* ugh, clear_extent_buffer_dirty can be expensive */ 879 if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) {
863 btrfs_set_lock_blocking(buf); 880 spin_lock(&root->fs_info->delalloc_lock);
881 if (root->fs_info->dirty_metadata_bytes >= buf->len)
882 root->fs_info->dirty_metadata_bytes -= buf->len;
883 else
884 WARN_ON(1);
885 spin_unlock(&root->fs_info->delalloc_lock);
886 }
864 887
888 /* ugh, clear_extent_buffer_dirty needs to lock the page */
889 btrfs_set_lock_blocking(buf);
865 clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, 890 clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree,
866 buf); 891 buf);
867 } 892 }
@@ -1471,12 +1496,6 @@ static int transaction_kthread(void *arg)
1471 vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); 1496 vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
1472 mutex_lock(&root->fs_info->transaction_kthread_mutex); 1497 mutex_lock(&root->fs_info->transaction_kthread_mutex);
1473 1498
1474 if (root->fs_info->total_ref_cache_size > 20 * 1024 * 1024) {
1475 printk(KERN_INFO "btrfs: total reference cache "
1476 "size %llu\n",
1477 root->fs_info->total_ref_cache_size);
1478 }
1479
1480 mutex_lock(&root->fs_info->trans_mutex); 1499 mutex_lock(&root->fs_info->trans_mutex);
1481 cur = root->fs_info->running_transaction; 1500 cur = root->fs_info->running_transaction;
1482 if (!cur) { 1501 if (!cur) {
@@ -1493,6 +1512,7 @@ static int transaction_kthread(void *arg)
1493 mutex_unlock(&root->fs_info->trans_mutex); 1512 mutex_unlock(&root->fs_info->trans_mutex);
1494 trans = btrfs_start_transaction(root, 1); 1513 trans = btrfs_start_transaction(root, 1);
1495 ret = btrfs_commit_transaction(trans, root); 1514 ret = btrfs_commit_transaction(trans, root);
1515
1496sleep: 1516sleep:
1497 wake_up_process(root->fs_info->cleaner_kthread); 1517 wake_up_process(root->fs_info->cleaner_kthread);
1498 mutex_unlock(&root->fs_info->transaction_kthread_mutex); 1518 mutex_unlock(&root->fs_info->transaction_kthread_mutex);
@@ -1552,6 +1572,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1552 INIT_LIST_HEAD(&fs_info->dead_roots); 1572 INIT_LIST_HEAD(&fs_info->dead_roots);
1553 INIT_LIST_HEAD(&fs_info->hashers); 1573 INIT_LIST_HEAD(&fs_info->hashers);
1554 INIT_LIST_HEAD(&fs_info->delalloc_inodes); 1574 INIT_LIST_HEAD(&fs_info->delalloc_inodes);
1575 INIT_LIST_HEAD(&fs_info->ordered_operations);
1555 spin_lock_init(&fs_info->delalloc_lock); 1576 spin_lock_init(&fs_info->delalloc_lock);
1556 spin_lock_init(&fs_info->new_trans_lock); 1577 spin_lock_init(&fs_info->new_trans_lock);
1557 spin_lock_init(&fs_info->ref_cache_lock); 1578 spin_lock_init(&fs_info->ref_cache_lock);
@@ -1611,10 +1632,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1611 1632
1612 extent_io_tree_init(&fs_info->pinned_extents, 1633 extent_io_tree_init(&fs_info->pinned_extents,
1613 fs_info->btree_inode->i_mapping, GFP_NOFS); 1634 fs_info->btree_inode->i_mapping, GFP_NOFS);
1614 extent_io_tree_init(&fs_info->pending_del,
1615 fs_info->btree_inode->i_mapping, GFP_NOFS);
1616 extent_io_tree_init(&fs_info->extent_ins,
1617 fs_info->btree_inode->i_mapping, GFP_NOFS);
1618 fs_info->do_barriers = 1; 1635 fs_info->do_barriers = 1;
1619 1636
1620 INIT_LIST_HEAD(&fs_info->dead_reloc_roots); 1637 INIT_LIST_HEAD(&fs_info->dead_reloc_roots);
@@ -1627,9 +1644,9 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1627 insert_inode_hash(fs_info->btree_inode); 1644 insert_inode_hash(fs_info->btree_inode);
1628 1645
1629 mutex_init(&fs_info->trans_mutex); 1646 mutex_init(&fs_info->trans_mutex);
1647 mutex_init(&fs_info->ordered_operations_mutex);
1630 mutex_init(&fs_info->tree_log_mutex); 1648 mutex_init(&fs_info->tree_log_mutex);
1631 mutex_init(&fs_info->drop_mutex); 1649 mutex_init(&fs_info->drop_mutex);
1632 mutex_init(&fs_info->extent_ins_mutex);
1633 mutex_init(&fs_info->pinned_mutex); 1650 mutex_init(&fs_info->pinned_mutex);
1634 mutex_init(&fs_info->chunk_mutex); 1651 mutex_init(&fs_info->chunk_mutex);
1635 mutex_init(&fs_info->transaction_kthread_mutex); 1652 mutex_init(&fs_info->transaction_kthread_mutex);
@@ -2358,8 +2375,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
2358 struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; 2375 struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
2359 u64 transid = btrfs_header_generation(buf); 2376 u64 transid = btrfs_header_generation(buf);
2360 struct inode *btree_inode = root->fs_info->btree_inode; 2377 struct inode *btree_inode = root->fs_info->btree_inode;
2361 2378 int was_dirty;
2362 btrfs_set_lock_blocking(buf);
2363 2379
2364 btrfs_assert_tree_locked(buf); 2380 btrfs_assert_tree_locked(buf);
2365 if (transid != root->fs_info->generation) { 2381 if (transid != root->fs_info->generation) {
@@ -2370,7 +2386,13 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
2370 (unsigned long long)root->fs_info->generation); 2386 (unsigned long long)root->fs_info->generation);
2371 WARN_ON(1); 2387 WARN_ON(1);
2372 } 2388 }
2373 set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf); 2389 was_dirty = set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree,
2390 buf);
2391 if (!was_dirty) {
2392 spin_lock(&root->fs_info->delalloc_lock);
2393 root->fs_info->dirty_metadata_bytes += buf->len;
2394 spin_unlock(&root->fs_info->delalloc_lock);
2395 }
2374} 2396}
2375 2397
2376void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) 2398void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
@@ -2410,6 +2432,7 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
2410int btree_lock_page_hook(struct page *page) 2432int btree_lock_page_hook(struct page *page)
2411{ 2433{
2412 struct inode *inode = page->mapping->host; 2434 struct inode *inode = page->mapping->host;
2435 struct btrfs_root *root = BTRFS_I(inode)->root;
2413 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 2436 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2414 struct extent_buffer *eb; 2437 struct extent_buffer *eb;
2415 unsigned long len; 2438 unsigned long len;
@@ -2425,6 +2448,16 @@ int btree_lock_page_hook(struct page *page)
2425 2448
2426 btrfs_tree_lock(eb); 2449 btrfs_tree_lock(eb);
2427 btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); 2450 btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
2451
2452 if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
2453 spin_lock(&root->fs_info->delalloc_lock);
2454 if (root->fs_info->dirty_metadata_bytes >= eb->len)
2455 root->fs_info->dirty_metadata_bytes -= eb->len;
2456 else
2457 WARN_ON(1);
2458 spin_unlock(&root->fs_info->delalloc_lock);
2459 }
2460
2428 btrfs_tree_unlock(eb); 2461 btrfs_tree_unlock(eb);
2429 free_extent_buffer(eb); 2462 free_extent_buffer(eb);
2430out: 2463out:
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 95029db227be..c958ecbc1916 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -72,6 +72,7 @@ int btrfs_insert_dev_radix(struct btrfs_root *root,
72void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr); 72void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr);
73int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); 73int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root);
74void btrfs_mark_buffer_dirty(struct extent_buffer *buf); 74void btrfs_mark_buffer_dirty(struct extent_buffer *buf);
75void btrfs_mark_buffer_dirty_nonblocking(struct extent_buffer *buf);
75int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid); 76int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid);
76int btrfs_set_buffer_uptodate(struct extent_buffer *buf); 77int btrfs_set_buffer_uptodate(struct extent_buffer *buf);
77int wait_on_tree_block_writeback(struct btrfs_root *root, 78int wait_on_tree_block_writeback(struct btrfs_root *root,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index fefe83ad2059..f5e7cae63d80 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -49,17 +49,23 @@ struct pending_extent_op {
49 int del; 49 int del;
50}; 50};
51 51
52static int finish_current_insert(struct btrfs_trans_handle *trans, 52static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
53 struct btrfs_root *extent_root, int all); 53 struct btrfs_root *root, u64 parent,
54static int del_pending_extents(struct btrfs_trans_handle *trans, 54 u64 root_objectid, u64 ref_generation,
55 struct btrfs_root *extent_root, int all); 55 u64 owner, struct btrfs_key *ins,
56static int pin_down_bytes(struct btrfs_trans_handle *trans, 56 int ref_mod);
57 struct btrfs_root *root, 57static int update_reserved_extents(struct btrfs_root *root,
58 u64 bytenr, u64 num_bytes, int is_data); 58 u64 bytenr, u64 num, int reserve);
59static int update_block_group(struct btrfs_trans_handle *trans, 59static int update_block_group(struct btrfs_trans_handle *trans,
60 struct btrfs_root *root, 60 struct btrfs_root *root,
61 u64 bytenr, u64 num_bytes, int alloc, 61 u64 bytenr, u64 num_bytes, int alloc,
62 int mark_free); 62 int mark_free);
63static noinline int __btrfs_free_extent(struct btrfs_trans_handle *trans,
64 struct btrfs_root *root,
65 u64 bytenr, u64 num_bytes, u64 parent,
66 u64 root_objectid, u64 ref_generation,
67 u64 owner_objectid, int pin,
68 int ref_to_drop);
63 69
64static int do_chunk_alloc(struct btrfs_trans_handle *trans, 70static int do_chunk_alloc(struct btrfs_trans_handle *trans,
65 struct btrfs_root *extent_root, u64 alloc_bytes, 71 struct btrfs_root *extent_root, u64 alloc_bytes,
@@ -554,262 +560,13 @@ out:
554 return ret; 560 return ret;
555} 561}
556 562
557/*
558 * updates all the backrefs that are pending on update_list for the
559 * extent_root
560 */
561static noinline int update_backrefs(struct btrfs_trans_handle *trans,
562 struct btrfs_root *extent_root,
563 struct btrfs_path *path,
564 struct list_head *update_list)
565{
566 struct btrfs_key key;
567 struct btrfs_extent_ref *ref;
568 struct btrfs_fs_info *info = extent_root->fs_info;
569 struct pending_extent_op *op;
570 struct extent_buffer *leaf;
571 int ret = 0;
572 struct list_head *cur = update_list->next;
573 u64 ref_objectid;
574 u64 ref_root = extent_root->root_key.objectid;
575
576 op = list_entry(cur, struct pending_extent_op, list);
577
578search:
579 key.objectid = op->bytenr;
580 key.type = BTRFS_EXTENT_REF_KEY;
581 key.offset = op->orig_parent;
582
583 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 1);
584 BUG_ON(ret);
585
586 leaf = path->nodes[0];
587
588loop:
589 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
590
591 ref_objectid = btrfs_ref_objectid(leaf, ref);
592
593 if (btrfs_ref_root(leaf, ref) != ref_root ||
594 btrfs_ref_generation(leaf, ref) != op->orig_generation ||
595 (ref_objectid != op->level &&
596 ref_objectid != BTRFS_MULTIPLE_OBJECTIDS)) {
597 printk(KERN_ERR "btrfs couldn't find %llu, parent %llu, "
598 "root %llu, owner %u\n",
599 (unsigned long long)op->bytenr,
600 (unsigned long long)op->orig_parent,
601 (unsigned long long)ref_root, op->level);
602 btrfs_print_leaf(extent_root, leaf);
603 BUG();
604 }
605
606 key.objectid = op->bytenr;
607 key.offset = op->parent;
608 key.type = BTRFS_EXTENT_REF_KEY;
609 ret = btrfs_set_item_key_safe(trans, extent_root, path, &key);
610 BUG_ON(ret);
611 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
612 btrfs_set_ref_generation(leaf, ref, op->generation);
613
614 cur = cur->next;
615
616 list_del_init(&op->list);
617 unlock_extent(&info->extent_ins, op->bytenr,
618 op->bytenr + op->num_bytes - 1, GFP_NOFS);
619 kfree(op);
620
621 if (cur == update_list) {
622 btrfs_mark_buffer_dirty(path->nodes[0]);
623 btrfs_release_path(extent_root, path);
624 goto out;
625 }
626
627 op = list_entry(cur, struct pending_extent_op, list);
628
629 path->slots[0]++;
630 while (path->slots[0] < btrfs_header_nritems(leaf)) {
631 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
632 if (key.objectid == op->bytenr &&
633 key.type == BTRFS_EXTENT_REF_KEY)
634 goto loop;
635 path->slots[0]++;
636 }
637
638 btrfs_mark_buffer_dirty(path->nodes[0]);
639 btrfs_release_path(extent_root, path);
640 goto search;
641
642out:
643 return 0;
644}
645
646static noinline int insert_extents(struct btrfs_trans_handle *trans,
647 struct btrfs_root *extent_root,
648 struct btrfs_path *path,
649 struct list_head *insert_list, int nr)
650{
651 struct btrfs_key *keys;
652 u32 *data_size;
653 struct pending_extent_op *op;
654 struct extent_buffer *leaf;
655 struct list_head *cur = insert_list->next;
656 struct btrfs_fs_info *info = extent_root->fs_info;
657 u64 ref_root = extent_root->root_key.objectid;
658 int i = 0, last = 0, ret;
659 int total = nr * 2;
660
661 if (!nr)
662 return 0;
663
664 keys = kzalloc(total * sizeof(struct btrfs_key), GFP_NOFS);
665 if (!keys)
666 return -ENOMEM;
667
668 data_size = kzalloc(total * sizeof(u32), GFP_NOFS);
669 if (!data_size) {
670 kfree(keys);
671 return -ENOMEM;
672 }
673
674 list_for_each_entry(op, insert_list, list) {
675 keys[i].objectid = op->bytenr;
676 keys[i].offset = op->num_bytes;
677 keys[i].type = BTRFS_EXTENT_ITEM_KEY;
678 data_size[i] = sizeof(struct btrfs_extent_item);
679 i++;
680
681 keys[i].objectid = op->bytenr;
682 keys[i].offset = op->parent;
683 keys[i].type = BTRFS_EXTENT_REF_KEY;
684 data_size[i] = sizeof(struct btrfs_extent_ref);
685 i++;
686 }
687
688 op = list_entry(cur, struct pending_extent_op, list);
689 i = 0;
690 while (i < total) {
691 int c;
692 ret = btrfs_insert_some_items(trans, extent_root, path,
693 keys+i, data_size+i, total-i);
694 BUG_ON(ret < 0);
695
696 if (last && ret > 1)
697 BUG();
698
699 leaf = path->nodes[0];
700 for (c = 0; c < ret; c++) {
701 int ref_first = keys[i].type == BTRFS_EXTENT_REF_KEY;
702
703 /*
704 * if the first item we inserted was a backref, then
705 * the EXTENT_ITEM will be the odd c's, else it will
706 * be the even c's
707 */
708 if ((ref_first && (c % 2)) ||
709 (!ref_first && !(c % 2))) {
710 struct btrfs_extent_item *itm;
711
712 itm = btrfs_item_ptr(leaf, path->slots[0] + c,
713 struct btrfs_extent_item);
714 btrfs_set_extent_refs(path->nodes[0], itm, 1);
715 op->del++;
716 } else {
717 struct btrfs_extent_ref *ref;
718
719 ref = btrfs_item_ptr(leaf, path->slots[0] + c,
720 struct btrfs_extent_ref);
721 btrfs_set_ref_root(leaf, ref, ref_root);
722 btrfs_set_ref_generation(leaf, ref,
723 op->generation);
724 btrfs_set_ref_objectid(leaf, ref, op->level);
725 btrfs_set_ref_num_refs(leaf, ref, 1);
726 op->del++;
727 }
728
729 /*
730 * using del to see when its ok to free up the
731 * pending_extent_op. In the case where we insert the
732 * last item on the list in order to help do batching
733 * we need to not free the extent op until we actually
734 * insert the extent_item
735 */
736 if (op->del == 2) {
737 unlock_extent(&info->extent_ins, op->bytenr,
738 op->bytenr + op->num_bytes - 1,
739 GFP_NOFS);
740 cur = cur->next;
741 list_del_init(&op->list);
742 kfree(op);
743 if (cur != insert_list)
744 op = list_entry(cur,
745 struct pending_extent_op,
746 list);
747 }
748 }
749 btrfs_mark_buffer_dirty(leaf);
750 btrfs_release_path(extent_root, path);
751
752 /*
753 * Ok backref's and items usually go right next to eachother,
754 * but if we could only insert 1 item that means that we
755 * inserted on the end of a leaf, and we have no idea what may
756 * be on the next leaf so we just play it safe. In order to
757 * try and help this case we insert the last thing on our
758 * insert list so hopefully it will end up being the last
759 * thing on the leaf and everything else will be before it,
760 * which will let us insert a whole bunch of items at the same
761 * time.
762 */
763 if (ret == 1 && !last && (i + ret < total)) {
764 /*
765 * last: where we will pick up the next time around
766 * i: our current key to insert, will be total - 1
767 * cur: the current op we are screwing with
768 * op: duh
769 */
770 last = i + ret;
771 i = total - 1;
772 cur = insert_list->prev;
773 op = list_entry(cur, struct pending_extent_op, list);
774 } else if (last) {
775 /*
776 * ok we successfully inserted the last item on the
777 * list, lets reset everything
778 *
779 * i: our current key to insert, so where we left off
780 * last time
781 * last: done with this
782 * cur: the op we are messing with
783 * op: duh
784 * total: since we inserted the last key, we need to
785 * decrement total so we dont overflow
786 */
787 i = last;
788 last = 0;
789 total--;
790 if (i < total) {
791 cur = insert_list->next;
792 op = list_entry(cur, struct pending_extent_op,
793 list);
794 }
795 } else {
796 i += ret;
797 }
798
799 cond_resched();
800 }
801 ret = 0;
802 kfree(keys);
803 kfree(data_size);
804 return ret;
805}
806
807static noinline int insert_extent_backref(struct btrfs_trans_handle *trans, 563static noinline int insert_extent_backref(struct btrfs_trans_handle *trans,
808 struct btrfs_root *root, 564 struct btrfs_root *root,
809 struct btrfs_path *path, 565 struct btrfs_path *path,
810 u64 bytenr, u64 parent, 566 u64 bytenr, u64 parent,
811 u64 ref_root, u64 ref_generation, 567 u64 ref_root, u64 ref_generation,
812 u64 owner_objectid) 568 u64 owner_objectid,
569 int refs_to_add)
813{ 570{
814 struct btrfs_key key; 571 struct btrfs_key key;
815 struct extent_buffer *leaf; 572 struct extent_buffer *leaf;
@@ -829,9 +586,10 @@ static noinline int insert_extent_backref(struct btrfs_trans_handle *trans,
829 btrfs_set_ref_root(leaf, ref, ref_root); 586 btrfs_set_ref_root(leaf, ref, ref_root);
830 btrfs_set_ref_generation(leaf, ref, ref_generation); 587 btrfs_set_ref_generation(leaf, ref, ref_generation);
831 btrfs_set_ref_objectid(leaf, ref, owner_objectid); 588 btrfs_set_ref_objectid(leaf, ref, owner_objectid);
832 btrfs_set_ref_num_refs(leaf, ref, 1); 589 btrfs_set_ref_num_refs(leaf, ref, refs_to_add);
833 } else if (ret == -EEXIST) { 590 } else if (ret == -EEXIST) {
834 u64 existing_owner; 591 u64 existing_owner;
592
835 BUG_ON(owner_objectid < BTRFS_FIRST_FREE_OBJECTID); 593 BUG_ON(owner_objectid < BTRFS_FIRST_FREE_OBJECTID);
836 leaf = path->nodes[0]; 594 leaf = path->nodes[0];
837 ref = btrfs_item_ptr(leaf, path->slots[0], 595 ref = btrfs_item_ptr(leaf, path->slots[0],
@@ -845,7 +603,7 @@ static noinline int insert_extent_backref(struct btrfs_trans_handle *trans,
845 603
846 num_refs = btrfs_ref_num_refs(leaf, ref); 604 num_refs = btrfs_ref_num_refs(leaf, ref);
847 BUG_ON(num_refs == 0); 605 BUG_ON(num_refs == 0);
848 btrfs_set_ref_num_refs(leaf, ref, num_refs + 1); 606 btrfs_set_ref_num_refs(leaf, ref, num_refs + refs_to_add);
849 607
850 existing_owner = btrfs_ref_objectid(leaf, ref); 608 existing_owner = btrfs_ref_objectid(leaf, ref);
851 if (existing_owner != owner_objectid && 609 if (existing_owner != owner_objectid &&
@@ -857,6 +615,7 @@ static noinline int insert_extent_backref(struct btrfs_trans_handle *trans,
857 } else { 615 } else {
858 goto out; 616 goto out;
859 } 617 }
618 btrfs_unlock_up_safe(path, 1);
860 btrfs_mark_buffer_dirty(path->nodes[0]); 619 btrfs_mark_buffer_dirty(path->nodes[0]);
861out: 620out:
862 btrfs_release_path(root, path); 621 btrfs_release_path(root, path);
@@ -865,7 +624,8 @@ out:
865 624
866static noinline int remove_extent_backref(struct btrfs_trans_handle *trans, 625static noinline int remove_extent_backref(struct btrfs_trans_handle *trans,
867 struct btrfs_root *root, 626 struct btrfs_root *root,
868 struct btrfs_path *path) 627 struct btrfs_path *path,
628 int refs_to_drop)
869{ 629{
870 struct extent_buffer *leaf; 630 struct extent_buffer *leaf;
871 struct btrfs_extent_ref *ref; 631 struct btrfs_extent_ref *ref;
@@ -875,8 +635,8 @@ static noinline int remove_extent_backref(struct btrfs_trans_handle *trans,
875 leaf = path->nodes[0]; 635 leaf = path->nodes[0];
876 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); 636 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
877 num_refs = btrfs_ref_num_refs(leaf, ref); 637 num_refs = btrfs_ref_num_refs(leaf, ref);
878 BUG_ON(num_refs == 0); 638 BUG_ON(num_refs < refs_to_drop);
879 num_refs -= 1; 639 num_refs -= refs_to_drop;
880 if (num_refs == 0) { 640 if (num_refs == 0) {
881 ret = btrfs_del_item(trans, root, path); 641 ret = btrfs_del_item(trans, root, path);
882 } else { 642 } else {
@@ -927,332 +687,28 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
927#endif 687#endif
928} 688}
929 689
930static noinline int free_extents(struct btrfs_trans_handle *trans,
931 struct btrfs_root *extent_root,
932 struct list_head *del_list)
933{
934 struct btrfs_fs_info *info = extent_root->fs_info;
935 struct btrfs_path *path;
936 struct btrfs_key key, found_key;
937 struct extent_buffer *leaf;
938 struct list_head *cur;
939 struct pending_extent_op *op;
940 struct btrfs_extent_item *ei;
941 int ret, num_to_del, extent_slot = 0, found_extent = 0;
942 u32 refs;
943 u64 bytes_freed = 0;
944
945 path = btrfs_alloc_path();
946 if (!path)
947 return -ENOMEM;
948 path->reada = 1;
949
950search:
951 /* search for the backref for the current ref we want to delete */
952 cur = del_list->next;
953 op = list_entry(cur, struct pending_extent_op, list);
954 ret = lookup_extent_backref(trans, extent_root, path, op->bytenr,
955 op->orig_parent,
956 extent_root->root_key.objectid,
957 op->orig_generation, op->level, 1);
958 if (ret) {
959 printk(KERN_ERR "btrfs unable to find backref byte nr %llu "
960 "root %llu gen %llu owner %u\n",
961 (unsigned long long)op->bytenr,
962 (unsigned long long)extent_root->root_key.objectid,
963 (unsigned long long)op->orig_generation, op->level);
964 btrfs_print_leaf(extent_root, path->nodes[0]);
965 WARN_ON(1);
966 goto out;
967 }
968
969 extent_slot = path->slots[0];
970 num_to_del = 1;
971 found_extent = 0;
972
973 /*
974 * if we aren't the first item on the leaf we can move back one and see
975 * if our ref is right next to our extent item
976 */
977 if (likely(extent_slot)) {
978 extent_slot--;
979 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
980 extent_slot);
981 if (found_key.objectid == op->bytenr &&
982 found_key.type == BTRFS_EXTENT_ITEM_KEY &&
983 found_key.offset == op->num_bytes) {
984 num_to_del++;
985 found_extent = 1;
986 }
987 }
988
989 /*
990 * if we didn't find the extent we need to delete the backref and then
991 * search for the extent item key so we can update its ref count
992 */
993 if (!found_extent) {
994 key.objectid = op->bytenr;
995 key.type = BTRFS_EXTENT_ITEM_KEY;
996 key.offset = op->num_bytes;
997
998 ret = remove_extent_backref(trans, extent_root, path);
999 BUG_ON(ret);
1000 btrfs_release_path(extent_root, path);
1001 ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
1002 BUG_ON(ret);
1003 extent_slot = path->slots[0];
1004 }
1005
1006 /* this is where we update the ref count for the extent */
1007 leaf = path->nodes[0];
1008 ei = btrfs_item_ptr(leaf, extent_slot, struct btrfs_extent_item);
1009 refs = btrfs_extent_refs(leaf, ei);
1010 BUG_ON(refs == 0);
1011 refs--;
1012 btrfs_set_extent_refs(leaf, ei, refs);
1013
1014 btrfs_mark_buffer_dirty(leaf);
1015
1016 /*
1017 * This extent needs deleting. The reason cur_slot is extent_slot +
1018 * num_to_del is because extent_slot points to the slot where the extent
1019 * is, and if the backref was not right next to the extent we will be
1020 * deleting at least 1 item, and will want to start searching at the
1021 * slot directly next to extent_slot. However if we did find the
1022 * backref next to the extent item them we will be deleting at least 2
1023 * items and will want to start searching directly after the ref slot
1024 */
1025 if (!refs) {
1026 struct list_head *pos, *n, *end;
1027 int cur_slot = extent_slot+num_to_del;
1028 u64 super_used;
1029 u64 root_used;
1030
1031 path->slots[0] = extent_slot;
1032 bytes_freed = op->num_bytes;
1033
1034 mutex_lock(&info->pinned_mutex);
1035 ret = pin_down_bytes(trans, extent_root, op->bytenr,
1036 op->num_bytes, op->level >=
1037 BTRFS_FIRST_FREE_OBJECTID);
1038 mutex_unlock(&info->pinned_mutex);
1039 BUG_ON(ret < 0);
1040 op->del = ret;
1041
1042 /*
1043 * we need to see if we can delete multiple things at once, so
1044 * start looping through the list of extents we are wanting to
1045 * delete and see if their extent/backref's are right next to
1046 * eachother and the extents only have 1 ref
1047 */
1048 for (pos = cur->next; pos != del_list; pos = pos->next) {
1049 struct pending_extent_op *tmp;
1050
1051 tmp = list_entry(pos, struct pending_extent_op, list);
1052
1053 /* we only want to delete extent+ref at this stage */
1054 if (cur_slot >= btrfs_header_nritems(leaf) - 1)
1055 break;
1056
1057 btrfs_item_key_to_cpu(leaf, &found_key, cur_slot);
1058 if (found_key.objectid != tmp->bytenr ||
1059 found_key.type != BTRFS_EXTENT_ITEM_KEY ||
1060 found_key.offset != tmp->num_bytes)
1061 break;
1062
1063 /* check to make sure this extent only has one ref */
1064 ei = btrfs_item_ptr(leaf, cur_slot,
1065 struct btrfs_extent_item);
1066 if (btrfs_extent_refs(leaf, ei) != 1)
1067 break;
1068
1069 btrfs_item_key_to_cpu(leaf, &found_key, cur_slot+1);
1070 if (found_key.objectid != tmp->bytenr ||
1071 found_key.type != BTRFS_EXTENT_REF_KEY ||
1072 found_key.offset != tmp->orig_parent)
1073 break;
1074
1075 /*
1076 * the ref is right next to the extent, we can set the
1077 * ref count to 0 since we will delete them both now
1078 */
1079 btrfs_set_extent_refs(leaf, ei, 0);
1080
1081 /* pin down the bytes for this extent */
1082 mutex_lock(&info->pinned_mutex);
1083 ret = pin_down_bytes(trans, extent_root, tmp->bytenr,
1084 tmp->num_bytes, tmp->level >=
1085 BTRFS_FIRST_FREE_OBJECTID);
1086 mutex_unlock(&info->pinned_mutex);
1087 BUG_ON(ret < 0);
1088
1089 /*
1090 * use the del field to tell if we need to go ahead and
1091 * free up the extent when we delete the item or not.
1092 */
1093 tmp->del = ret;
1094 bytes_freed += tmp->num_bytes;
1095
1096 num_to_del += 2;
1097 cur_slot += 2;
1098 }
1099 end = pos;
1100
1101 /* update the free space counters */
1102 spin_lock(&info->delalloc_lock);
1103 super_used = btrfs_super_bytes_used(&info->super_copy);
1104 btrfs_set_super_bytes_used(&info->super_copy,
1105 super_used - bytes_freed);
1106
1107 root_used = btrfs_root_used(&extent_root->root_item);
1108 btrfs_set_root_used(&extent_root->root_item,
1109 root_used - bytes_freed);
1110 spin_unlock(&info->delalloc_lock);
1111
1112 /* delete the items */
1113 ret = btrfs_del_items(trans, extent_root, path,
1114 path->slots[0], num_to_del);
1115 BUG_ON(ret);
1116
1117 /*
1118 * loop through the extents we deleted and do the cleanup work
1119 * on them
1120 */
1121 for (pos = cur, n = pos->next; pos != end;
1122 pos = n, n = pos->next) {
1123 struct pending_extent_op *tmp;
1124 tmp = list_entry(pos, struct pending_extent_op, list);
1125
1126 /*
1127 * remember tmp->del tells us wether or not we pinned
1128 * down the extent
1129 */
1130 ret = update_block_group(trans, extent_root,
1131 tmp->bytenr, tmp->num_bytes, 0,
1132 tmp->del);
1133 BUG_ON(ret);
1134
1135 list_del_init(&tmp->list);
1136 unlock_extent(&info->extent_ins, tmp->bytenr,
1137 tmp->bytenr + tmp->num_bytes - 1,
1138 GFP_NOFS);
1139 kfree(tmp);
1140 }
1141 } else if (refs && found_extent) {
1142 /*
1143 * the ref and extent were right next to eachother, but the
1144 * extent still has a ref, so just free the backref and keep
1145 * going
1146 */
1147 ret = remove_extent_backref(trans, extent_root, path);
1148 BUG_ON(ret);
1149
1150 list_del_init(&op->list);
1151 unlock_extent(&info->extent_ins, op->bytenr,
1152 op->bytenr + op->num_bytes - 1, GFP_NOFS);
1153 kfree(op);
1154 } else {
1155 /*
1156 * the extent has multiple refs and the backref we were looking
1157 * for was not right next to it, so just unlock and go next,
1158 * we're good to go
1159 */
1160 list_del_init(&op->list);
1161 unlock_extent(&info->extent_ins, op->bytenr,
1162 op->bytenr + op->num_bytes - 1, GFP_NOFS);
1163 kfree(op);
1164 }
1165
1166 btrfs_release_path(extent_root, path);
1167 if (!list_empty(del_list))
1168 goto search;
1169
1170out:
1171 btrfs_free_path(path);
1172 return ret;
1173}
1174
1175static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans, 690static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
1176 struct btrfs_root *root, u64 bytenr, 691 struct btrfs_root *root, u64 bytenr,
692 u64 num_bytes,
1177 u64 orig_parent, u64 parent, 693 u64 orig_parent, u64 parent,
1178 u64 orig_root, u64 ref_root, 694 u64 orig_root, u64 ref_root,
1179 u64 orig_generation, u64 ref_generation, 695 u64 orig_generation, u64 ref_generation,
1180 u64 owner_objectid) 696 u64 owner_objectid)
1181{ 697{
1182 int ret; 698 int ret;
1183 struct btrfs_root *extent_root = root->fs_info->extent_root; 699 int pin = owner_objectid < BTRFS_FIRST_FREE_OBJECTID;
1184 struct btrfs_path *path;
1185
1186 if (root == root->fs_info->extent_root) {
1187 struct pending_extent_op *extent_op;
1188 u64 num_bytes;
1189
1190 BUG_ON(owner_objectid >= BTRFS_MAX_LEVEL);
1191 num_bytes = btrfs_level_size(root, (int)owner_objectid);
1192 mutex_lock(&root->fs_info->extent_ins_mutex);
1193 if (test_range_bit(&root->fs_info->extent_ins, bytenr,
1194 bytenr + num_bytes - 1, EXTENT_WRITEBACK, 0)) {
1195 u64 priv;
1196 ret = get_state_private(&root->fs_info->extent_ins,
1197 bytenr, &priv);
1198 BUG_ON(ret);
1199 extent_op = (struct pending_extent_op *)
1200 (unsigned long)priv;
1201 BUG_ON(extent_op->parent != orig_parent);
1202 BUG_ON(extent_op->generation != orig_generation);
1203 700
1204 extent_op->parent = parent; 701 ret = btrfs_update_delayed_ref(trans, bytenr, num_bytes,
1205 extent_op->generation = ref_generation; 702 orig_parent, parent, orig_root,
1206 } else { 703 ref_root, orig_generation,
1207 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); 704 ref_generation, owner_objectid, pin);
1208 BUG_ON(!extent_op);
1209
1210 extent_op->type = PENDING_BACKREF_UPDATE;
1211 extent_op->bytenr = bytenr;
1212 extent_op->num_bytes = num_bytes;
1213 extent_op->parent = parent;
1214 extent_op->orig_parent = orig_parent;
1215 extent_op->generation = ref_generation;
1216 extent_op->orig_generation = orig_generation;
1217 extent_op->level = (int)owner_objectid;
1218 INIT_LIST_HEAD(&extent_op->list);
1219 extent_op->del = 0;
1220
1221 set_extent_bits(&root->fs_info->extent_ins,
1222 bytenr, bytenr + num_bytes - 1,
1223 EXTENT_WRITEBACK, GFP_NOFS);
1224 set_state_private(&root->fs_info->extent_ins,
1225 bytenr, (unsigned long)extent_op);
1226 }
1227 mutex_unlock(&root->fs_info->extent_ins_mutex);
1228 return 0;
1229 }
1230
1231 path = btrfs_alloc_path();
1232 if (!path)
1233 return -ENOMEM;
1234 ret = lookup_extent_backref(trans, extent_root, path,
1235 bytenr, orig_parent, orig_root,
1236 orig_generation, owner_objectid, 1);
1237 if (ret)
1238 goto out;
1239 ret = remove_extent_backref(trans, extent_root, path);
1240 if (ret)
1241 goto out;
1242 ret = insert_extent_backref(trans, extent_root, path, bytenr,
1243 parent, ref_root, ref_generation,
1244 owner_objectid);
1245 BUG_ON(ret); 705 BUG_ON(ret);
1246 finish_current_insert(trans, extent_root, 0);
1247 del_pending_extents(trans, extent_root, 0);
1248out:
1249 btrfs_free_path(path);
1250 return ret; 706 return ret;
1251} 707}
1252 708
1253int btrfs_update_extent_ref(struct btrfs_trans_handle *trans, 709int btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
1254 struct btrfs_root *root, u64 bytenr, 710 struct btrfs_root *root, u64 bytenr,
1255 u64 orig_parent, u64 parent, 711 u64 num_bytes, u64 orig_parent, u64 parent,
1256 u64 ref_root, u64 ref_generation, 712 u64 ref_root, u64 ref_generation,
1257 u64 owner_objectid) 713 u64 owner_objectid)
1258{ 714{
@@ -1260,20 +716,36 @@ int btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
1260 if (ref_root == BTRFS_TREE_LOG_OBJECTID && 716 if (ref_root == BTRFS_TREE_LOG_OBJECTID &&
1261 owner_objectid < BTRFS_FIRST_FREE_OBJECTID) 717 owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
1262 return 0; 718 return 0;
1263 ret = __btrfs_update_extent_ref(trans, root, bytenr, orig_parent, 719
1264 parent, ref_root, ref_root, 720 ret = __btrfs_update_extent_ref(trans, root, bytenr, num_bytes,
1265 ref_generation, ref_generation, 721 orig_parent, parent, ref_root,
1266 owner_objectid); 722 ref_root, ref_generation,
723 ref_generation, owner_objectid);
1267 return ret; 724 return ret;
1268} 725}
1269
1270static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 726static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1271 struct btrfs_root *root, u64 bytenr, 727 struct btrfs_root *root, u64 bytenr,
728 u64 num_bytes,
1272 u64 orig_parent, u64 parent, 729 u64 orig_parent, u64 parent,
1273 u64 orig_root, u64 ref_root, 730 u64 orig_root, u64 ref_root,
1274 u64 orig_generation, u64 ref_generation, 731 u64 orig_generation, u64 ref_generation,
1275 u64 owner_objectid) 732 u64 owner_objectid)
1276{ 733{
734 int ret;
735
736 ret = btrfs_add_delayed_ref(trans, bytenr, num_bytes, parent, ref_root,
737 ref_generation, owner_objectid,
738 BTRFS_ADD_DELAYED_REF, 0);
739 BUG_ON(ret);
740 return ret;
741}
742
743static noinline_for_stack int add_extent_ref(struct btrfs_trans_handle *trans,
744 struct btrfs_root *root, u64 bytenr,
745 u64 num_bytes, u64 parent, u64 ref_root,
746 u64 ref_generation, u64 owner_objectid,
747 int refs_to_add)
748{
1277 struct btrfs_path *path; 749 struct btrfs_path *path;
1278 int ret; 750 int ret;
1279 struct btrfs_key key; 751 struct btrfs_key key;
@@ -1286,17 +758,24 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1286 return -ENOMEM; 758 return -ENOMEM;
1287 759
1288 path->reada = 1; 760 path->reada = 1;
761 path->leave_spinning = 1;
1289 key.objectid = bytenr; 762 key.objectid = bytenr;
1290 key.type = BTRFS_EXTENT_ITEM_KEY; 763 key.type = BTRFS_EXTENT_ITEM_KEY;
1291 key.offset = (u64)-1; 764 key.offset = num_bytes;
1292 765
1293 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 766 /* first find the extent item and update its reference count */
1294 0, 1); 767 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key,
1295 if (ret < 0) 768 path, 0, 1);
769 if (ret < 0) {
770 btrfs_set_path_blocking(path);
1296 return ret; 771 return ret;
1297 BUG_ON(ret == 0 || path->slots[0] == 0); 772 }
1298 773
1299 path->slots[0]--; 774 if (ret > 0) {
775 WARN_ON(1);
776 btrfs_free_path(path);
777 return -EIO;
778 }
1300 l = path->nodes[0]; 779 l = path->nodes[0];
1301 780
1302 btrfs_item_key_to_cpu(l, &key, path->slots[0]); 781 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
@@ -1310,21 +789,24 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1310 BUG_ON(key.type != BTRFS_EXTENT_ITEM_KEY); 789 BUG_ON(key.type != BTRFS_EXTENT_ITEM_KEY);
1311 790
1312 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); 791 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
792
1313 refs = btrfs_extent_refs(l, item); 793 refs = btrfs_extent_refs(l, item);
1314 btrfs_set_extent_refs(l, item, refs + 1); 794 btrfs_set_extent_refs(l, item, refs + refs_to_add);
795 btrfs_unlock_up_safe(path, 1);
796
1315 btrfs_mark_buffer_dirty(path->nodes[0]); 797 btrfs_mark_buffer_dirty(path->nodes[0]);
1316 798
1317 btrfs_release_path(root->fs_info->extent_root, path); 799 btrfs_release_path(root->fs_info->extent_root, path);
1318 800
1319 path->reada = 1; 801 path->reada = 1;
802 path->leave_spinning = 1;
803
804 /* now insert the actual backref */
1320 ret = insert_extent_backref(trans, root->fs_info->extent_root, 805 ret = insert_extent_backref(trans, root->fs_info->extent_root,
1321 path, bytenr, parent, 806 path, bytenr, parent,
1322 ref_root, ref_generation, 807 ref_root, ref_generation,
1323 owner_objectid); 808 owner_objectid, refs_to_add);
1324 BUG_ON(ret); 809 BUG_ON(ret);
1325 finish_current_insert(trans, root->fs_info->extent_root, 0);
1326 del_pending_extents(trans, root->fs_info->extent_root, 0);
1327
1328 btrfs_free_path(path); 810 btrfs_free_path(path);
1329 return 0; 811 return 0;
1330} 812}
@@ -1339,68 +821,278 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1339 if (ref_root == BTRFS_TREE_LOG_OBJECTID && 821 if (ref_root == BTRFS_TREE_LOG_OBJECTID &&
1340 owner_objectid < BTRFS_FIRST_FREE_OBJECTID) 822 owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
1341 return 0; 823 return 0;
1342 ret = __btrfs_inc_extent_ref(trans, root, bytenr, 0, parent, 824
825 ret = __btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, parent,
1343 0, ref_root, 0, ref_generation, 826 0, ref_root, 0, ref_generation,
1344 owner_objectid); 827 owner_objectid);
1345 return ret; 828 return ret;
1346} 829}
1347 830
1348int btrfs_extent_post_op(struct btrfs_trans_handle *trans, 831static int drop_delayed_ref(struct btrfs_trans_handle *trans,
1349 struct btrfs_root *root) 832 struct btrfs_root *root,
833 struct btrfs_delayed_ref_node *node)
834{
835 int ret = 0;
836 struct btrfs_delayed_ref *ref = btrfs_delayed_node_to_ref(node);
837
838 BUG_ON(node->ref_mod == 0);
839 ret = __btrfs_free_extent(trans, root, node->bytenr, node->num_bytes,
840 node->parent, ref->root, ref->generation,
841 ref->owner_objectid, ref->pin, node->ref_mod);
842
843 return ret;
844}
845
846/* helper function to actually process a single delayed ref entry */
847static noinline int run_one_delayed_ref(struct btrfs_trans_handle *trans,
848 struct btrfs_root *root,
849 struct btrfs_delayed_ref_node *node,
850 int insert_reserved)
1350{ 851{
1351 u64 start;
1352 u64 end;
1353 int ret; 852 int ret;
853 struct btrfs_delayed_ref *ref;
854
855 if (node->parent == (u64)-1) {
856 struct btrfs_delayed_ref_head *head;
857 /*
858 * we've hit the end of the chain and we were supposed
859 * to insert this extent into the tree. But, it got
860 * deleted before we ever needed to insert it, so all
861 * we have to do is clean up the accounting
862 */
863 if (insert_reserved) {
864 update_reserved_extents(root, node->bytenr,
865 node->num_bytes, 0);
866 }
867 head = btrfs_delayed_node_to_head(node);
868 mutex_unlock(&head->mutex);
869 return 0;
870 }
1354 871
1355 while(1) { 872 ref = btrfs_delayed_node_to_ref(node);
1356 finish_current_insert(trans, root->fs_info->extent_root, 1); 873 if (ref->action == BTRFS_ADD_DELAYED_REF) {
1357 del_pending_extents(trans, root->fs_info->extent_root, 1); 874 if (insert_reserved) {
875 struct btrfs_key ins;
1358 876
1359 /* is there more work to do? */ 877 ins.objectid = node->bytenr;
1360 ret = find_first_extent_bit(&root->fs_info->pending_del, 878 ins.offset = node->num_bytes;
1361 0, &start, &end, EXTENT_WRITEBACK); 879 ins.type = BTRFS_EXTENT_ITEM_KEY;
1362 if (!ret) 880
1363 continue; 881 /* record the full extent allocation */
1364 ret = find_first_extent_bit(&root->fs_info->extent_ins, 882 ret = __btrfs_alloc_reserved_extent(trans, root,
1365 0, &start, &end, EXTENT_WRITEBACK); 883 node->parent, ref->root,
1366 if (!ret) 884 ref->generation, ref->owner_objectid,
1367 continue; 885 &ins, node->ref_mod);
1368 break; 886 update_reserved_extents(root, node->bytenr,
887 node->num_bytes, 0);
888 } else {
889 /* just add one backref */
890 ret = add_extent_ref(trans, root, node->bytenr,
891 node->num_bytes,
892 node->parent, ref->root, ref->generation,
893 ref->owner_objectid, node->ref_mod);
894 }
895 BUG_ON(ret);
896 } else if (ref->action == BTRFS_DROP_DELAYED_REF) {
897 WARN_ON(insert_reserved);
898 ret = drop_delayed_ref(trans, root, node);
1369 } 899 }
1370 return 0; 900 return 0;
1371} 901}
1372 902
1373int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans, 903static noinline struct btrfs_delayed_ref_node *
1374 struct btrfs_root *root, u64 bytenr, 904select_delayed_ref(struct btrfs_delayed_ref_head *head)
1375 u64 num_bytes, u32 *refs)
1376{ 905{
1377 struct btrfs_path *path; 906 struct rb_node *node;
907 struct btrfs_delayed_ref_node *ref;
908 int action = BTRFS_ADD_DELAYED_REF;
909again:
910 /*
911 * select delayed ref of type BTRFS_ADD_DELAYED_REF first.
912 * this prevents ref count from going down to zero when
913 * there still are pending delayed ref.
914 */
915 node = rb_prev(&head->node.rb_node);
916 while (1) {
917 if (!node)
918 break;
919 ref = rb_entry(node, struct btrfs_delayed_ref_node,
920 rb_node);
921 if (ref->bytenr != head->node.bytenr)
922 break;
923 if (btrfs_delayed_node_to_ref(ref)->action == action)
924 return ref;
925 node = rb_prev(node);
926 }
927 if (action == BTRFS_ADD_DELAYED_REF) {
928 action = BTRFS_DROP_DELAYED_REF;
929 goto again;
930 }
931 return NULL;
932}
933
934static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
935 struct btrfs_root *root,
936 struct list_head *cluster)
937{
938 struct btrfs_delayed_ref_root *delayed_refs;
939 struct btrfs_delayed_ref_node *ref;
940 struct btrfs_delayed_ref_head *locked_ref = NULL;
1378 int ret; 941 int ret;
1379 struct btrfs_key key; 942 int count = 0;
1380 struct extent_buffer *l; 943 int must_insert_reserved = 0;
1381 struct btrfs_extent_item *item;
1382 944
1383 WARN_ON(num_bytes < root->sectorsize); 945 delayed_refs = &trans->transaction->delayed_refs;
1384 path = btrfs_alloc_path(); 946 while (1) {
1385 path->reada = 1; 947 if (!locked_ref) {
1386 key.objectid = bytenr; 948 /* pick a new head ref from the cluster list */
1387 key.offset = num_bytes; 949 if (list_empty(cluster))
1388 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 950 break;
1389 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 951
1390 0, 0); 952 locked_ref = list_entry(cluster->next,
1391 if (ret < 0) 953 struct btrfs_delayed_ref_head, cluster);
1392 goto out; 954
1393 if (ret != 0) { 955 /* grab the lock that says we are going to process
1394 btrfs_print_leaf(root, path->nodes[0]); 956 * all the refs for this head */
1395 printk(KERN_INFO "btrfs failed to find block number %llu\n", 957 ret = btrfs_delayed_ref_lock(trans, locked_ref);
1396 (unsigned long long)bytenr); 958
1397 BUG(); 959 /*
960 * we may have dropped the spin lock to get the head
961 * mutex lock, and that might have given someone else
962 * time to free the head. If that's true, it has been
963 * removed from our list and we can move on.
964 */
965 if (ret == -EAGAIN) {
966 locked_ref = NULL;
967 count++;
968 continue;
969 }
970 }
971
972 /*
973 * record the must insert reserved flag before we
974 * drop the spin lock.
975 */
976 must_insert_reserved = locked_ref->must_insert_reserved;
977 locked_ref->must_insert_reserved = 0;
978
979 /*
980 * locked_ref is the head node, so we have to go one
981 * node back for any delayed ref updates
982 */
983 ref = select_delayed_ref(locked_ref);
984 if (!ref) {
985 /* All delayed refs have been processed, Go ahead
986 * and send the head node to run_one_delayed_ref,
987 * so that any accounting fixes can happen
988 */
989 ref = &locked_ref->node;
990 list_del_init(&locked_ref->cluster);
991 locked_ref = NULL;
992 }
993
994 ref->in_tree = 0;
995 rb_erase(&ref->rb_node, &delayed_refs->root);
996 delayed_refs->num_entries--;
997 spin_unlock(&delayed_refs->lock);
998
999 ret = run_one_delayed_ref(trans, root, ref,
1000 must_insert_reserved);
1001 BUG_ON(ret);
1002 btrfs_put_delayed_ref(ref);
1003
1004 count++;
1005 cond_resched();
1006 spin_lock(&delayed_refs->lock);
1007 }
1008 return count;
1009}
1010
1011/*
1012 * this starts processing the delayed reference count updates and
1013 * extent insertions we have queued up so far. count can be
1014 * 0, which means to process everything in the tree at the start
1015 * of the run (but not newly added entries), or it can be some target
1016 * number you'd like to process.
1017 */
1018int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
1019 struct btrfs_root *root, unsigned long count)
1020{
1021 struct rb_node *node;
1022 struct btrfs_delayed_ref_root *delayed_refs;
1023 struct btrfs_delayed_ref_node *ref;
1024 struct list_head cluster;
1025 int ret;
1026 int run_all = count == (unsigned long)-1;
1027 int run_most = 0;
1028
1029 if (root == root->fs_info->extent_root)
1030 root = root->fs_info->tree_root;
1031
1032 delayed_refs = &trans->transaction->delayed_refs;
1033 INIT_LIST_HEAD(&cluster);
1034again:
1035 spin_lock(&delayed_refs->lock);
1036 if (count == 0) {
1037 count = delayed_refs->num_entries * 2;
1038 run_most = 1;
1039 }
1040 while (1) {
1041 if (!(run_all || run_most) &&
1042 delayed_refs->num_heads_ready < 64)
1043 break;
1044
1045 /*
1046 * go find something we can process in the rbtree. We start at
1047 * the beginning of the tree, and then build a cluster
1048 * of refs to process starting at the first one we are able to
1049 * lock
1050 */
1051 ret = btrfs_find_ref_cluster(trans, &cluster,
1052 delayed_refs->run_delayed_start);
1053 if (ret)
1054 break;
1055
1056 ret = run_clustered_refs(trans, root, &cluster);
1057 BUG_ON(ret < 0);
1058
1059 count -= min_t(unsigned long, ret, count);
1060
1061 if (count == 0)
1062 break;
1063 }
1064
1065 if (run_all) {
1066 node = rb_first(&delayed_refs->root);
1067 if (!node)
1068 goto out;
1069 count = (unsigned long)-1;
1070
1071 while (node) {
1072 ref = rb_entry(node, struct btrfs_delayed_ref_node,
1073 rb_node);
1074 if (btrfs_delayed_ref_is_head(ref)) {
1075 struct btrfs_delayed_ref_head *head;
1076
1077 head = btrfs_delayed_node_to_head(ref);
1078 atomic_inc(&ref->refs);
1079
1080 spin_unlock(&delayed_refs->lock);
1081 mutex_lock(&head->mutex);
1082 mutex_unlock(&head->mutex);
1083
1084 btrfs_put_delayed_ref(ref);
1085 cond_resched();
1086 goto again;
1087 }
1088 node = rb_next(node);
1089 }
1090 spin_unlock(&delayed_refs->lock);
1091 schedule_timeout(1);
1092 goto again;
1398 } 1093 }
1399 l = path->nodes[0];
1400 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
1401 *refs = btrfs_extent_refs(l, item);
1402out: 1094out:
1403 btrfs_free_path(path); 1095 spin_unlock(&delayed_refs->lock);
1404 return 0; 1096 return 0;
1405} 1097}
1406 1098
@@ -1624,7 +1316,7 @@ noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans,
1624 int refi = 0; 1316 int refi = 0;
1625 int slot; 1317 int slot;
1626 int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, 1318 int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *,
1627 u64, u64, u64, u64, u64, u64, u64, u64); 1319 u64, u64, u64, u64, u64, u64, u64, u64, u64);
1628 1320
1629 ref_root = btrfs_header_owner(buf); 1321 ref_root = btrfs_header_owner(buf);
1630 ref_generation = btrfs_header_generation(buf); 1322 ref_generation = btrfs_header_generation(buf);
@@ -1696,12 +1388,19 @@ noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans,
1696 1388
1697 if (level == 0) { 1389 if (level == 0) {
1698 btrfs_item_key_to_cpu(buf, &key, slot); 1390 btrfs_item_key_to_cpu(buf, &key, slot);
1391 fi = btrfs_item_ptr(buf, slot,
1392 struct btrfs_file_extent_item);
1393
1394 bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1395 if (bytenr == 0)
1396 continue;
1699 1397
1700 ret = process_func(trans, root, bytenr, 1398 ret = process_func(trans, root, bytenr,
1701 orig_buf->start, buf->start, 1399 btrfs_file_extent_disk_num_bytes(buf, fi),
1702 orig_root, ref_root, 1400 orig_buf->start, buf->start,
1703 orig_generation, ref_generation, 1401 orig_root, ref_root,
1704 key.objectid); 1402 orig_generation, ref_generation,
1403 key.objectid);
1705 1404
1706 if (ret) { 1405 if (ret) {
1707 faili = slot; 1406 faili = slot;
@@ -1709,7 +1408,7 @@ noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans,
1709 goto fail; 1408 goto fail;
1710 } 1409 }
1711 } else { 1410 } else {
1712 ret = process_func(trans, root, bytenr, 1411 ret = process_func(trans, root, bytenr, buf->len,
1713 orig_buf->start, buf->start, 1412 orig_buf->start, buf->start,
1714 orig_root, ref_root, 1413 orig_root, ref_root,
1715 orig_generation, ref_generation, 1414 orig_generation, ref_generation,
@@ -1786,17 +1485,17 @@ int btrfs_update_ref(struct btrfs_trans_handle *trans,
1786 if (bytenr == 0) 1485 if (bytenr == 0)
1787 continue; 1486 continue;
1788 ret = __btrfs_update_extent_ref(trans, root, bytenr, 1487 ret = __btrfs_update_extent_ref(trans, root, bytenr,
1789 orig_buf->start, buf->start, 1488 btrfs_file_extent_disk_num_bytes(buf, fi),
1790 orig_root, ref_root, 1489 orig_buf->start, buf->start,
1791 orig_generation, ref_generation, 1490 orig_root, ref_root, orig_generation,
1792 key.objectid); 1491 ref_generation, key.objectid);
1793 if (ret) 1492 if (ret)
1794 goto fail; 1493 goto fail;
1795 } else { 1494 } else {
1796 bytenr = btrfs_node_blockptr(buf, slot); 1495 bytenr = btrfs_node_blockptr(buf, slot);
1797 ret = __btrfs_update_extent_ref(trans, root, bytenr, 1496 ret = __btrfs_update_extent_ref(trans, root, bytenr,
1798 orig_buf->start, buf->start, 1497 buf->len, orig_buf->start,
1799 orig_root, ref_root, 1498 buf->start, orig_root, ref_root,
1800 orig_generation, ref_generation, 1499 orig_generation, ref_generation,
1801 level - 1); 1500 level - 1);
1802 if (ret) 1501 if (ret)
@@ -1815,7 +1514,6 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
1815 struct btrfs_block_group_cache *cache) 1514 struct btrfs_block_group_cache *cache)
1816{ 1515{
1817 int ret; 1516 int ret;
1818 int pending_ret;
1819 struct btrfs_root *extent_root = root->fs_info->extent_root; 1517 struct btrfs_root *extent_root = root->fs_info->extent_root;
1820 unsigned long bi; 1518 unsigned long bi;
1821 struct extent_buffer *leaf; 1519 struct extent_buffer *leaf;
@@ -1831,12 +1529,8 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
1831 btrfs_mark_buffer_dirty(leaf); 1529 btrfs_mark_buffer_dirty(leaf);
1832 btrfs_release_path(extent_root, path); 1530 btrfs_release_path(extent_root, path);
1833fail: 1531fail:
1834 finish_current_insert(trans, extent_root, 0);
1835 pending_ret = del_pending_extents(trans, extent_root, 0);
1836 if (ret) 1532 if (ret)
1837 return ret; 1533 return ret;
1838 if (pending_ret)
1839 return pending_ret;
1840 return 0; 1534 return 0;
1841 1535
1842} 1536}
@@ -2361,6 +2055,8 @@ int btrfs_update_pinned_extents(struct btrfs_root *root,
2361 clear_extent_dirty(&fs_info->pinned_extents, 2055 clear_extent_dirty(&fs_info->pinned_extents,
2362 bytenr, bytenr + num - 1, GFP_NOFS); 2056 bytenr, bytenr + num - 1, GFP_NOFS);
2363 } 2057 }
2058 mutex_unlock(&root->fs_info->pinned_mutex);
2059
2364 while (num > 0) { 2060 while (num > 0) {
2365 cache = btrfs_lookup_block_group(fs_info, bytenr); 2061 cache = btrfs_lookup_block_group(fs_info, bytenr);
2366 BUG_ON(!cache); 2062 BUG_ON(!cache);
@@ -2452,8 +2148,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
2452 u64 end; 2148 u64 end;
2453 int ret; 2149 int ret;
2454 2150
2455 mutex_lock(&root->fs_info->pinned_mutex);
2456 while (1) { 2151 while (1) {
2152 mutex_lock(&root->fs_info->pinned_mutex);
2457 ret = find_first_extent_bit(unpin, 0, &start, &end, 2153 ret = find_first_extent_bit(unpin, 0, &start, &end,
2458 EXTENT_DIRTY); 2154 EXTENT_DIRTY);
2459 if (ret) 2155 if (ret)
@@ -2461,209 +2157,21 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
2461 2157
2462 ret = btrfs_discard_extent(root, start, end + 1 - start); 2158 ret = btrfs_discard_extent(root, start, end + 1 - start);
2463 2159
2160 /* unlocks the pinned mutex */
2464 btrfs_update_pinned_extents(root, start, end + 1 - start, 0); 2161 btrfs_update_pinned_extents(root, start, end + 1 - start, 0);
2465 clear_extent_dirty(unpin, start, end, GFP_NOFS); 2162 clear_extent_dirty(unpin, start, end, GFP_NOFS);
2466 2163
2467 if (need_resched()) { 2164 cond_resched();
2468 mutex_unlock(&root->fs_info->pinned_mutex);
2469 cond_resched();
2470 mutex_lock(&root->fs_info->pinned_mutex);
2471 }
2472 } 2165 }
2473 mutex_unlock(&root->fs_info->pinned_mutex); 2166 mutex_unlock(&root->fs_info->pinned_mutex);
2474 return ret; 2167 return ret;
2475} 2168}
2476 2169
2477static int finish_current_insert(struct btrfs_trans_handle *trans,
2478 struct btrfs_root *extent_root, int all)
2479{
2480 u64 start;
2481 u64 end;
2482 u64 priv;
2483 u64 search = 0;
2484 struct btrfs_fs_info *info = extent_root->fs_info;
2485 struct btrfs_path *path;
2486 struct pending_extent_op *extent_op, *tmp;
2487 struct list_head insert_list, update_list;
2488 int ret;
2489 int num_inserts = 0, max_inserts, restart = 0;
2490
2491 path = btrfs_alloc_path();
2492 INIT_LIST_HEAD(&insert_list);
2493 INIT_LIST_HEAD(&update_list);
2494
2495 max_inserts = extent_root->leafsize /
2496 (2 * sizeof(struct btrfs_key) + 2 * sizeof(struct btrfs_item) +
2497 sizeof(struct btrfs_extent_ref) +
2498 sizeof(struct btrfs_extent_item));
2499again:
2500 mutex_lock(&info->extent_ins_mutex);
2501 while (1) {
2502 ret = find_first_extent_bit(&info->extent_ins, search, &start,
2503 &end, EXTENT_WRITEBACK);
2504 if (ret) {
2505 if (restart && !num_inserts &&
2506 list_empty(&update_list)) {
2507 restart = 0;
2508 search = 0;
2509 continue;
2510 }
2511 break;
2512 }
2513
2514 ret = try_lock_extent(&info->extent_ins, start, end, GFP_NOFS);
2515 if (!ret) {
2516 if (all)
2517 restart = 1;
2518 search = end + 1;
2519 if (need_resched()) {
2520 mutex_unlock(&info->extent_ins_mutex);
2521 cond_resched();
2522 mutex_lock(&info->extent_ins_mutex);
2523 }
2524 continue;
2525 }
2526
2527 ret = get_state_private(&info->extent_ins, start, &priv);
2528 BUG_ON(ret);
2529 extent_op = (struct pending_extent_op *)(unsigned long) priv;
2530
2531 if (extent_op->type == PENDING_EXTENT_INSERT) {
2532 num_inserts++;
2533 list_add_tail(&extent_op->list, &insert_list);
2534 search = end + 1;
2535 if (num_inserts == max_inserts) {
2536 restart = 1;
2537 break;
2538 }
2539 } else if (extent_op->type == PENDING_BACKREF_UPDATE) {
2540 list_add_tail(&extent_op->list, &update_list);
2541 search = end + 1;
2542 } else {
2543 BUG();
2544 }
2545 }
2546
2547 /*
2548 * process the update list, clear the writeback bit for it, and if
2549 * somebody marked this thing for deletion then just unlock it and be
2550 * done, the free_extents will handle it
2551 */
2552 list_for_each_entry_safe(extent_op, tmp, &update_list, list) {
2553 clear_extent_bits(&info->extent_ins, extent_op->bytenr,
2554 extent_op->bytenr + extent_op->num_bytes - 1,
2555 EXTENT_WRITEBACK, GFP_NOFS);
2556 if (extent_op->del) {
2557 list_del_init(&extent_op->list);
2558 unlock_extent(&info->extent_ins, extent_op->bytenr,
2559 extent_op->bytenr + extent_op->num_bytes
2560 - 1, GFP_NOFS);
2561 kfree(extent_op);
2562 }
2563 }
2564 mutex_unlock(&info->extent_ins_mutex);
2565
2566 /*
2567 * still have things left on the update list, go ahead an update
2568 * everything
2569 */
2570 if (!list_empty(&update_list)) {
2571 ret = update_backrefs(trans, extent_root, path, &update_list);
2572 BUG_ON(ret);
2573
2574 /* we may have COW'ed new blocks, so lets start over */
2575 if (all)
2576 restart = 1;
2577 }
2578
2579 /*
2580 * if no inserts need to be done, but we skipped some extents and we
2581 * need to make sure everything is cleaned then reset everything and
2582 * go back to the beginning
2583 */
2584 if (!num_inserts && restart) {
2585 search = 0;
2586 restart = 0;
2587 INIT_LIST_HEAD(&update_list);
2588 INIT_LIST_HEAD(&insert_list);
2589 goto again;
2590 } else if (!num_inserts) {
2591 goto out;
2592 }
2593
2594 /*
2595 * process the insert extents list. Again if we are deleting this
2596 * extent, then just unlock it, pin down the bytes if need be, and be
2597 * done with it. Saves us from having to actually insert the extent
2598 * into the tree and then subsequently come along and delete it
2599 */
2600 mutex_lock(&info->extent_ins_mutex);
2601 list_for_each_entry_safe(extent_op, tmp, &insert_list, list) {
2602 clear_extent_bits(&info->extent_ins, extent_op->bytenr,
2603 extent_op->bytenr + extent_op->num_bytes - 1,
2604 EXTENT_WRITEBACK, GFP_NOFS);
2605 if (extent_op->del) {
2606 u64 used;
2607 list_del_init(&extent_op->list);
2608 unlock_extent(&info->extent_ins, extent_op->bytenr,
2609 extent_op->bytenr + extent_op->num_bytes
2610 - 1, GFP_NOFS);
2611
2612 mutex_lock(&extent_root->fs_info->pinned_mutex);
2613 ret = pin_down_bytes(trans, extent_root,
2614 extent_op->bytenr,
2615 extent_op->num_bytes, 0);
2616 mutex_unlock(&extent_root->fs_info->pinned_mutex);
2617
2618 spin_lock(&info->delalloc_lock);
2619 used = btrfs_super_bytes_used(&info->super_copy);
2620 btrfs_set_super_bytes_used(&info->super_copy,
2621 used - extent_op->num_bytes);
2622 used = btrfs_root_used(&extent_root->root_item);
2623 btrfs_set_root_used(&extent_root->root_item,
2624 used - extent_op->num_bytes);
2625 spin_unlock(&info->delalloc_lock);
2626
2627 ret = update_block_group(trans, extent_root,
2628 extent_op->bytenr,
2629 extent_op->num_bytes,
2630 0, ret > 0);
2631 BUG_ON(ret);
2632 kfree(extent_op);
2633 num_inserts--;
2634 }
2635 }
2636 mutex_unlock(&info->extent_ins_mutex);
2637
2638 ret = insert_extents(trans, extent_root, path, &insert_list,
2639 num_inserts);
2640 BUG_ON(ret);
2641
2642 /*
2643 * if restart is set for whatever reason we need to go back and start
2644 * searching through the pending list again.
2645 *
2646 * We just inserted some extents, which could have resulted in new
2647 * blocks being allocated, which would result in new blocks needing
2648 * updates, so if all is set we _must_ restart to get the updated
2649 * blocks.
2650 */
2651 if (restart || all) {
2652 INIT_LIST_HEAD(&insert_list);
2653 INIT_LIST_HEAD(&update_list);
2654 search = 0;
2655 restart = 0;
2656 num_inserts = 0;
2657 goto again;
2658 }
2659out:
2660 btrfs_free_path(path);
2661 return 0;
2662}
2663
2664static int pin_down_bytes(struct btrfs_trans_handle *trans, 2170static int pin_down_bytes(struct btrfs_trans_handle *trans,
2665 struct btrfs_root *root, 2171 struct btrfs_root *root,
2666 u64 bytenr, u64 num_bytes, int is_data) 2172 struct btrfs_path *path,
2173 u64 bytenr, u64 num_bytes, int is_data,
2174 struct extent_buffer **must_clean)
2667{ 2175{
2668 int err = 0; 2176 int err = 0;
2669 struct extent_buffer *buf; 2177 struct extent_buffer *buf;
@@ -2686,17 +2194,19 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans,
2686 u64 header_transid = btrfs_header_generation(buf); 2194 u64 header_transid = btrfs_header_generation(buf);
2687 if (header_owner != BTRFS_TREE_LOG_OBJECTID && 2195 if (header_owner != BTRFS_TREE_LOG_OBJECTID &&
2688 header_owner != BTRFS_TREE_RELOC_OBJECTID && 2196 header_owner != BTRFS_TREE_RELOC_OBJECTID &&
2197 header_owner != BTRFS_DATA_RELOC_TREE_OBJECTID &&
2689 header_transid == trans->transid && 2198 header_transid == trans->transid &&
2690 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { 2199 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
2691 clean_tree_block(NULL, root, buf); 2200 *must_clean = buf;
2692 btrfs_tree_unlock(buf);
2693 free_extent_buffer(buf);
2694 return 1; 2201 return 1;
2695 } 2202 }
2696 btrfs_tree_unlock(buf); 2203 btrfs_tree_unlock(buf);
2697 } 2204 }
2698 free_extent_buffer(buf); 2205 free_extent_buffer(buf);
2699pinit: 2206pinit:
2207 btrfs_set_path_blocking(path);
2208 mutex_lock(&root->fs_info->pinned_mutex);
2209 /* unlocks the pinned mutex */
2700 btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); 2210 btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
2701 2211
2702 BUG_ON(err < 0); 2212 BUG_ON(err < 0);
@@ -2710,7 +2220,8 @@ static int __free_extent(struct btrfs_trans_handle *trans,
2710 struct btrfs_root *root, 2220 struct btrfs_root *root,
2711 u64 bytenr, u64 num_bytes, u64 parent, 2221 u64 bytenr, u64 num_bytes, u64 parent,
2712 u64 root_objectid, u64 ref_generation, 2222 u64 root_objectid, u64 ref_generation,
2713 u64 owner_objectid, int pin, int mark_free) 2223 u64 owner_objectid, int pin, int mark_free,
2224 int refs_to_drop)
2714{ 2225{
2715 struct btrfs_path *path; 2226 struct btrfs_path *path;
2716 struct btrfs_key key; 2227 struct btrfs_key key;
@@ -2732,6 +2243,7 @@ static int __free_extent(struct btrfs_trans_handle *trans,
2732 return -ENOMEM; 2243 return -ENOMEM;
2733 2244
2734 path->reada = 1; 2245 path->reada = 1;
2246 path->leave_spinning = 1;
2735 ret = lookup_extent_backref(trans, extent_root, path, 2247 ret = lookup_extent_backref(trans, extent_root, path,
2736 bytenr, parent, root_objectid, 2248 bytenr, parent, root_objectid,
2737 ref_generation, owner_objectid, 1); 2249 ref_generation, owner_objectid, 1);
@@ -2753,9 +2265,11 @@ static int __free_extent(struct btrfs_trans_handle *trans,
2753 break; 2265 break;
2754 } 2266 }
2755 if (!found_extent) { 2267 if (!found_extent) {
2756 ret = remove_extent_backref(trans, extent_root, path); 2268 ret = remove_extent_backref(trans, extent_root, path,
2269 refs_to_drop);
2757 BUG_ON(ret); 2270 BUG_ON(ret);
2758 btrfs_release_path(extent_root, path); 2271 btrfs_release_path(extent_root, path);
2272 path->leave_spinning = 1;
2759 ret = btrfs_search_slot(trans, extent_root, 2273 ret = btrfs_search_slot(trans, extent_root,
2760 &key, path, -1, 1); 2274 &key, path, -1, 1);
2761 if (ret) { 2275 if (ret) {
@@ -2771,8 +2285,9 @@ static int __free_extent(struct btrfs_trans_handle *trans,
2771 btrfs_print_leaf(extent_root, path->nodes[0]); 2285 btrfs_print_leaf(extent_root, path->nodes[0]);
2772 WARN_ON(1); 2286 WARN_ON(1);
2773 printk(KERN_ERR "btrfs unable to find ref byte nr %llu " 2287 printk(KERN_ERR "btrfs unable to find ref byte nr %llu "
2774 "root %llu gen %llu owner %llu\n", 2288 "parent %llu root %llu gen %llu owner %llu\n",
2775 (unsigned long long)bytenr, 2289 (unsigned long long)bytenr,
2290 (unsigned long long)parent,
2776 (unsigned long long)root_objectid, 2291 (unsigned long long)root_objectid,
2777 (unsigned long long)ref_generation, 2292 (unsigned long long)ref_generation,
2778 (unsigned long long)owner_objectid); 2293 (unsigned long long)owner_objectid);
@@ -2782,17 +2297,23 @@ static int __free_extent(struct btrfs_trans_handle *trans,
2782 ei = btrfs_item_ptr(leaf, extent_slot, 2297 ei = btrfs_item_ptr(leaf, extent_slot,
2783 struct btrfs_extent_item); 2298 struct btrfs_extent_item);
2784 refs = btrfs_extent_refs(leaf, ei); 2299 refs = btrfs_extent_refs(leaf, ei);
2785 BUG_ON(refs == 0);
2786 refs -= 1;
2787 btrfs_set_extent_refs(leaf, ei, refs);
2788 2300
2301 /*
2302 * we're not allowed to delete the extent item if there
2303 * are other delayed ref updates pending
2304 */
2305
2306 BUG_ON(refs < refs_to_drop);
2307 refs -= refs_to_drop;
2308 btrfs_set_extent_refs(leaf, ei, refs);
2789 btrfs_mark_buffer_dirty(leaf); 2309 btrfs_mark_buffer_dirty(leaf);
2790 2310
2791 if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) { 2311 if (refs == 0 && found_extent &&
2312 path->slots[0] == extent_slot + 1) {
2792 struct btrfs_extent_ref *ref; 2313 struct btrfs_extent_ref *ref;
2793 ref = btrfs_item_ptr(leaf, path->slots[0], 2314 ref = btrfs_item_ptr(leaf, path->slots[0],
2794 struct btrfs_extent_ref); 2315 struct btrfs_extent_ref);
2795 BUG_ON(btrfs_ref_num_refs(leaf, ref) != 1); 2316 BUG_ON(btrfs_ref_num_refs(leaf, ref) != refs_to_drop);
2796 /* if the back ref and the extent are next to each other 2317 /* if the back ref and the extent are next to each other
2797 * they get deleted below in one shot 2318 * they get deleted below in one shot
2798 */ 2319 */
@@ -2800,11 +2321,13 @@ static int __free_extent(struct btrfs_trans_handle *trans,
2800 num_to_del = 2; 2321 num_to_del = 2;
2801 } else if (found_extent) { 2322 } else if (found_extent) {
2802 /* otherwise delete the extent back ref */ 2323 /* otherwise delete the extent back ref */
2803 ret = remove_extent_backref(trans, extent_root, path); 2324 ret = remove_extent_backref(trans, extent_root, path,
2325 refs_to_drop);
2804 BUG_ON(ret); 2326 BUG_ON(ret);
2805 /* if refs are 0, we need to setup the path for deletion */ 2327 /* if refs are 0, we need to setup the path for deletion */
2806 if (refs == 0) { 2328 if (refs == 0) {
2807 btrfs_release_path(extent_root, path); 2329 btrfs_release_path(extent_root, path);
2330 path->leave_spinning = 1;
2808 ret = btrfs_search_slot(trans, extent_root, &key, path, 2331 ret = btrfs_search_slot(trans, extent_root, &key, path,
2809 -1, 1); 2332 -1, 1);
2810 BUG_ON(ret); 2333 BUG_ON(ret);
@@ -2814,16 +2337,18 @@ static int __free_extent(struct btrfs_trans_handle *trans,
2814 if (refs == 0) { 2337 if (refs == 0) {
2815 u64 super_used; 2338 u64 super_used;
2816 u64 root_used; 2339 u64 root_used;
2340 struct extent_buffer *must_clean = NULL;
2817 2341
2818 if (pin) { 2342 if (pin) {
2819 mutex_lock(&root->fs_info->pinned_mutex); 2343 ret = pin_down_bytes(trans, root, path,
2820 ret = pin_down_bytes(trans, root, bytenr, num_bytes, 2344 bytenr, num_bytes,
2821 owner_objectid >= BTRFS_FIRST_FREE_OBJECTID); 2345 owner_objectid >= BTRFS_FIRST_FREE_OBJECTID,
2822 mutex_unlock(&root->fs_info->pinned_mutex); 2346 &must_clean);
2823 if (ret > 0) 2347 if (ret > 0)
2824 mark_free = 1; 2348 mark_free = 1;
2825 BUG_ON(ret < 0); 2349 BUG_ON(ret < 0);
2826 } 2350 }
2351
2827 /* block accounting for super block */ 2352 /* block accounting for super block */
2828 spin_lock(&info->delalloc_lock); 2353 spin_lock(&info->delalloc_lock);
2829 super_used = btrfs_super_bytes_used(&info->super_copy); 2354 super_used = btrfs_super_bytes_used(&info->super_copy);
@@ -2835,14 +2360,34 @@ static int __free_extent(struct btrfs_trans_handle *trans,
2835 btrfs_set_root_used(&root->root_item, 2360 btrfs_set_root_used(&root->root_item,
2836 root_used - num_bytes); 2361 root_used - num_bytes);
2837 spin_unlock(&info->delalloc_lock); 2362 spin_unlock(&info->delalloc_lock);
2363
2364 /*
2365 * it is going to be very rare for someone to be waiting
2366 * on the block we're freeing. del_items might need to
2367 * schedule, so rather than get fancy, just force it
2368 * to blocking here
2369 */
2370 if (must_clean)
2371 btrfs_set_lock_blocking(must_clean);
2372
2838 ret = btrfs_del_items(trans, extent_root, path, path->slots[0], 2373 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
2839 num_to_del); 2374 num_to_del);
2840 BUG_ON(ret); 2375 BUG_ON(ret);
2841 btrfs_release_path(extent_root, path); 2376 btrfs_release_path(extent_root, path);
2842 2377
2378 if (must_clean) {
2379 clean_tree_block(NULL, root, must_clean);
2380 btrfs_tree_unlock(must_clean);
2381 free_extent_buffer(must_clean);
2382 }
2383
2843 if (owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 2384 if (owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
2844 ret = btrfs_del_csums(trans, root, bytenr, num_bytes); 2385 ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
2845 BUG_ON(ret); 2386 BUG_ON(ret);
2387 } else {
2388 invalidate_mapping_pages(info->btree_inode->i_mapping,
2389 bytenr >> PAGE_CACHE_SHIFT,
2390 (bytenr + num_bytes - 1) >> PAGE_CACHE_SHIFT);
2846 } 2391 }
2847 2392
2848 ret = update_block_group(trans, root, bytenr, num_bytes, 0, 2393 ret = update_block_group(trans, root, bytenr, num_bytes, 0,
@@ -2850,218 +2395,103 @@ static int __free_extent(struct btrfs_trans_handle *trans,
2850 BUG_ON(ret); 2395 BUG_ON(ret);
2851 } 2396 }
2852 btrfs_free_path(path); 2397 btrfs_free_path(path);
2853 finish_current_insert(trans, extent_root, 0);
2854 return ret; 2398 return ret;
2855} 2399}
2856 2400
2857/* 2401/*
2858 * find all the blocks marked as pending in the radix tree and remove 2402 * remove an extent from the root, returns 0 on success
2859 * them from the extent map
2860 */ 2403 */
2861static int del_pending_extents(struct btrfs_trans_handle *trans, 2404static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
2862 struct btrfs_root *extent_root, int all) 2405 struct btrfs_root *root,
2406 u64 bytenr, u64 num_bytes, u64 parent,
2407 u64 root_objectid, u64 ref_generation,
2408 u64 owner_objectid, int pin,
2409 int refs_to_drop)
2863{ 2410{
2864 int ret; 2411 WARN_ON(num_bytes < root->sectorsize);
2865 int err = 0;
2866 u64 start;
2867 u64 end;
2868 u64 priv;
2869 u64 search = 0;
2870 int nr = 0, skipped = 0;
2871 struct extent_io_tree *pending_del;
2872 struct extent_io_tree *extent_ins;
2873 struct pending_extent_op *extent_op;
2874 struct btrfs_fs_info *info = extent_root->fs_info;
2875 struct list_head delete_list;
2876
2877 INIT_LIST_HEAD(&delete_list);
2878 extent_ins = &extent_root->fs_info->extent_ins;
2879 pending_del = &extent_root->fs_info->pending_del;
2880
2881again:
2882 mutex_lock(&info->extent_ins_mutex);
2883 while (1) {
2884 ret = find_first_extent_bit(pending_del, search, &start, &end,
2885 EXTENT_WRITEBACK);
2886 if (ret) {
2887 if (all && skipped && !nr) {
2888 search = 0;
2889 skipped = 0;
2890 continue;
2891 }
2892 mutex_unlock(&info->extent_ins_mutex);
2893 break;
2894 }
2895
2896 ret = try_lock_extent(extent_ins, start, end, GFP_NOFS);
2897 if (!ret) {
2898 search = end+1;
2899 skipped = 1;
2900
2901 if (need_resched()) {
2902 mutex_unlock(&info->extent_ins_mutex);
2903 cond_resched();
2904 mutex_lock(&info->extent_ins_mutex);
2905 }
2906
2907 continue;
2908 }
2909 BUG_ON(ret < 0);
2910
2911 ret = get_state_private(pending_del, start, &priv);
2912 BUG_ON(ret);
2913 extent_op = (struct pending_extent_op *)(unsigned long)priv;
2914
2915 clear_extent_bits(pending_del, start, end, EXTENT_WRITEBACK,
2916 GFP_NOFS);
2917 if (!test_range_bit(extent_ins, start, end,
2918 EXTENT_WRITEBACK, 0)) {
2919 list_add_tail(&extent_op->list, &delete_list);
2920 nr++;
2921 } else {
2922 kfree(extent_op);
2923
2924 ret = get_state_private(&info->extent_ins, start,
2925 &priv);
2926 BUG_ON(ret);
2927 extent_op = (struct pending_extent_op *)
2928 (unsigned long)priv;
2929
2930 clear_extent_bits(&info->extent_ins, start, end,
2931 EXTENT_WRITEBACK, GFP_NOFS);
2932
2933 if (extent_op->type == PENDING_BACKREF_UPDATE) {
2934 list_add_tail(&extent_op->list, &delete_list);
2935 search = end + 1;
2936 nr++;
2937 continue;
2938 }
2939
2940 mutex_lock(&extent_root->fs_info->pinned_mutex);
2941 ret = pin_down_bytes(trans, extent_root, start,
2942 end + 1 - start, 0);
2943 mutex_unlock(&extent_root->fs_info->pinned_mutex);
2944
2945 ret = update_block_group(trans, extent_root, start,
2946 end + 1 - start, 0, ret > 0);
2947
2948 unlock_extent(extent_ins, start, end, GFP_NOFS);
2949 BUG_ON(ret);
2950 kfree(extent_op);
2951 }
2952 if (ret)
2953 err = ret;
2954
2955 search = end + 1;
2956
2957 if (need_resched()) {
2958 mutex_unlock(&info->extent_ins_mutex);
2959 cond_resched();
2960 mutex_lock(&info->extent_ins_mutex);
2961 }
2962 }
2963 2412
2964 if (nr) { 2413 /*
2965 ret = free_extents(trans, extent_root, &delete_list); 2414 * if metadata always pin
2966 BUG_ON(ret); 2415 * if data pin when any transaction has committed this
2967 } 2416 */
2417 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID ||
2418 ref_generation != trans->transid)
2419 pin = 1;
2968 2420
2969 if (all && skipped) { 2421 if (ref_generation != trans->transid)
2970 INIT_LIST_HEAD(&delete_list); 2422 pin = 1;
2971 search = 0;
2972 nr = 0;
2973 goto again;
2974 }
2975 2423
2976 if (!err) 2424 return __free_extent(trans, root, bytenr, num_bytes, parent,
2977 finish_current_insert(trans, extent_root, 0); 2425 root_objectid, ref_generation,
2978 return err; 2426 owner_objectid, pin, pin == 0, refs_to_drop);
2979} 2427}
2980 2428
2981/* 2429/*
2982 * remove an extent from the root, returns 0 on success 2430 * when we free an extent, it is possible (and likely) that we free the last
2431 * delayed ref for that extent as well. This searches the delayed ref tree for
2432 * a given extent, and if there are no other delayed refs to be processed, it
2433 * removes it from the tree.
2983 */ 2434 */
2984static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 2435static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
2985 struct btrfs_root *root, 2436 struct btrfs_root *root, u64 bytenr)
2986 u64 bytenr, u64 num_bytes, u64 parent,
2987 u64 root_objectid, u64 ref_generation,
2988 u64 owner_objectid, int pin)
2989{ 2437{
2990 struct btrfs_root *extent_root = root->fs_info->extent_root; 2438 struct btrfs_delayed_ref_head *head;
2991 int pending_ret; 2439 struct btrfs_delayed_ref_root *delayed_refs;
2440 struct btrfs_delayed_ref_node *ref;
2441 struct rb_node *node;
2992 int ret; 2442 int ret;
2993 2443
2994 WARN_ON(num_bytes < root->sectorsize); 2444 delayed_refs = &trans->transaction->delayed_refs;
2995 if (root == extent_root) { 2445 spin_lock(&delayed_refs->lock);
2996 struct pending_extent_op *extent_op = NULL; 2446 head = btrfs_find_delayed_ref_head(trans, bytenr);
2997 2447 if (!head)
2998 mutex_lock(&root->fs_info->extent_ins_mutex); 2448 goto out;
2999 if (test_range_bit(&root->fs_info->extent_ins, bytenr,
3000 bytenr + num_bytes - 1, EXTENT_WRITEBACK, 0)) {
3001 u64 priv;
3002 ret = get_state_private(&root->fs_info->extent_ins,
3003 bytenr, &priv);
3004 BUG_ON(ret);
3005 extent_op = (struct pending_extent_op *)
3006 (unsigned long)priv;
3007 2449
3008 extent_op->del = 1; 2450 node = rb_prev(&head->node.rb_node);
3009 if (extent_op->type == PENDING_EXTENT_INSERT) { 2451 if (!node)
3010 mutex_unlock(&root->fs_info->extent_ins_mutex); 2452 goto out;
3011 return 0;
3012 }
3013 }
3014 2453
3015 if (extent_op) { 2454 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
3016 ref_generation = extent_op->orig_generation;
3017 parent = extent_op->orig_parent;
3018 }
3019 2455
3020 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); 2456 /* there are still entries for this ref, we can't drop it */
3021 BUG_ON(!extent_op); 2457 if (ref->bytenr == bytenr)
3022 2458 goto out;
3023 extent_op->type = PENDING_EXTENT_DELETE;
3024 extent_op->bytenr = bytenr;
3025 extent_op->num_bytes = num_bytes;
3026 extent_op->parent = parent;
3027 extent_op->orig_parent = parent;
3028 extent_op->generation = ref_generation;
3029 extent_op->orig_generation = ref_generation;
3030 extent_op->level = (int)owner_objectid;
3031 INIT_LIST_HEAD(&extent_op->list);
3032 extent_op->del = 0;
3033
3034 set_extent_bits(&root->fs_info->pending_del,
3035 bytenr, bytenr + num_bytes - 1,
3036 EXTENT_WRITEBACK, GFP_NOFS);
3037 set_state_private(&root->fs_info->pending_del,
3038 bytenr, (unsigned long)extent_op);
3039 mutex_unlock(&root->fs_info->extent_ins_mutex);
3040 return 0;
3041 }
3042 /* if metadata always pin */
3043 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
3044 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
3045 mutex_lock(&root->fs_info->pinned_mutex);
3046 btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
3047 mutex_unlock(&root->fs_info->pinned_mutex);
3048 update_reserved_extents(root, bytenr, num_bytes, 0);
3049 return 0;
3050 }
3051 pin = 1;
3052 }
3053 2459
3054 /* if data pin when any transaction has committed this */ 2460 /*
3055 if (ref_generation != trans->transid) 2461 * waiting for the lock here would deadlock. If someone else has it
3056 pin = 1; 2462 * locked they are already in the process of dropping it anyway
2463 */
2464 if (!mutex_trylock(&head->mutex))
2465 goto out;
3057 2466
3058 ret = __free_extent(trans, root, bytenr, num_bytes, parent, 2467 /*
3059 root_objectid, ref_generation, 2468 * at this point we have a head with no other entries. Go
3060 owner_objectid, pin, pin == 0); 2469 * ahead and process it.
2470 */
2471 head->node.in_tree = 0;
2472 rb_erase(&head->node.rb_node, &delayed_refs->root);
3061 2473
3062 finish_current_insert(trans, root->fs_info->extent_root, 0); 2474 delayed_refs->num_entries--;
3063 pending_ret = del_pending_extents(trans, root->fs_info->extent_root, 0); 2475
3064 return ret ? ret : pending_ret; 2476 /*
2477 * we don't take a ref on the node because we're removing it from the
2478 * tree, so we just steal the ref the tree was holding.
2479 */
2480 delayed_refs->num_heads--;
2481 if (list_empty(&head->cluster))
2482 delayed_refs->num_heads_ready--;
2483
2484 list_del_init(&head->cluster);
2485 spin_unlock(&delayed_refs->lock);
2486
2487 ret = run_one_delayed_ref(trans, root->fs_info->tree_root,
2488 &head->node, head->must_insert_reserved);
2489 BUG_ON(ret);
2490 btrfs_put_delayed_ref(&head->node);
2491 return 0;
2492out:
2493 spin_unlock(&delayed_refs->lock);
2494 return 0;
3065} 2495}
3066 2496
3067int btrfs_free_extent(struct btrfs_trans_handle *trans, 2497int btrfs_free_extent(struct btrfs_trans_handle *trans,
@@ -3072,9 +2502,30 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
3072{ 2502{
3073 int ret; 2503 int ret;
3074 2504
3075 ret = __btrfs_free_extent(trans, root, bytenr, num_bytes, parent, 2505 /*
3076 root_objectid, ref_generation, 2506 * tree log blocks never actually go into the extent allocation
3077 owner_objectid, pin); 2507 * tree, just update pinning info and exit early.
2508 *
2509 * data extents referenced by the tree log do need to have
2510 * their reference counts bumped.
2511 */
2512 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID &&
2513 owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
2514 mutex_lock(&root->fs_info->pinned_mutex);
2515
2516 /* unlocks the pinned mutex */
2517 btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
2518 update_reserved_extents(root, bytenr, num_bytes, 0);
2519 ret = 0;
2520 } else {
2521 ret = btrfs_add_delayed_ref(trans, bytenr, num_bytes, parent,
2522 root_objectid, ref_generation,
2523 owner_objectid,
2524 BTRFS_DROP_DELAYED_REF, 1);
2525 BUG_ON(ret);
2526 ret = check_ref_cleanup(trans, root, bytenr);
2527 BUG_ON(ret);
2528 }
3078 return ret; 2529 return ret;
3079} 2530}
3080 2531
@@ -3475,10 +2926,10 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
3475static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, 2926static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
3476 struct btrfs_root *root, u64 parent, 2927 struct btrfs_root *root, u64 parent,
3477 u64 root_objectid, u64 ref_generation, 2928 u64 root_objectid, u64 ref_generation,
3478 u64 owner, struct btrfs_key *ins) 2929 u64 owner, struct btrfs_key *ins,
2930 int ref_mod)
3479{ 2931{
3480 int ret; 2932 int ret;
3481 int pending_ret;
3482 u64 super_used; 2933 u64 super_used;
3483 u64 root_used; 2934 u64 root_used;
3484 u64 num_bytes = ins->offset; 2935 u64 num_bytes = ins->offset;
@@ -3503,33 +2954,6 @@ static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
3503 btrfs_set_root_used(&root->root_item, root_used + num_bytes); 2954 btrfs_set_root_used(&root->root_item, root_used + num_bytes);
3504 spin_unlock(&info->delalloc_lock); 2955 spin_unlock(&info->delalloc_lock);
3505 2956
3506 if (root == extent_root) {
3507 struct pending_extent_op *extent_op;
3508
3509 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
3510 BUG_ON(!extent_op);
3511
3512 extent_op->type = PENDING_EXTENT_INSERT;
3513 extent_op->bytenr = ins->objectid;
3514 extent_op->num_bytes = ins->offset;
3515 extent_op->parent = parent;
3516 extent_op->orig_parent = 0;
3517 extent_op->generation = ref_generation;
3518 extent_op->orig_generation = 0;
3519 extent_op->level = (int)owner;
3520 INIT_LIST_HEAD(&extent_op->list);
3521 extent_op->del = 0;
3522
3523 mutex_lock(&root->fs_info->extent_ins_mutex);
3524 set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
3525 ins->objectid + ins->offset - 1,
3526 EXTENT_WRITEBACK, GFP_NOFS);
3527 set_state_private(&root->fs_info->extent_ins,
3528 ins->objectid, (unsigned long)extent_op);
3529 mutex_unlock(&root->fs_info->extent_ins_mutex);
3530 goto update_block;
3531 }
3532
3533 memcpy(&keys[0], ins, sizeof(*ins)); 2957 memcpy(&keys[0], ins, sizeof(*ins));
3534 keys[1].objectid = ins->objectid; 2958 keys[1].objectid = ins->objectid;
3535 keys[1].type = BTRFS_EXTENT_REF_KEY; 2959 keys[1].type = BTRFS_EXTENT_REF_KEY;
@@ -3540,37 +2964,31 @@ static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
3540 path = btrfs_alloc_path(); 2964 path = btrfs_alloc_path();
3541 BUG_ON(!path); 2965 BUG_ON(!path);
3542 2966
2967 path->leave_spinning = 1;
3543 ret = btrfs_insert_empty_items(trans, extent_root, path, keys, 2968 ret = btrfs_insert_empty_items(trans, extent_root, path, keys,
3544 sizes, 2); 2969 sizes, 2);
3545 BUG_ON(ret); 2970 BUG_ON(ret);
3546 2971
3547 extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0], 2972 extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
3548 struct btrfs_extent_item); 2973 struct btrfs_extent_item);
3549 btrfs_set_extent_refs(path->nodes[0], extent_item, 1); 2974 btrfs_set_extent_refs(path->nodes[0], extent_item, ref_mod);
3550 ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1, 2975 ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
3551 struct btrfs_extent_ref); 2976 struct btrfs_extent_ref);
3552 2977
3553 btrfs_set_ref_root(path->nodes[0], ref, root_objectid); 2978 btrfs_set_ref_root(path->nodes[0], ref, root_objectid);
3554 btrfs_set_ref_generation(path->nodes[0], ref, ref_generation); 2979 btrfs_set_ref_generation(path->nodes[0], ref, ref_generation);
3555 btrfs_set_ref_objectid(path->nodes[0], ref, owner); 2980 btrfs_set_ref_objectid(path->nodes[0], ref, owner);
3556 btrfs_set_ref_num_refs(path->nodes[0], ref, 1); 2981 btrfs_set_ref_num_refs(path->nodes[0], ref, ref_mod);
3557 2982
3558 btrfs_mark_buffer_dirty(path->nodes[0]); 2983 btrfs_mark_buffer_dirty(path->nodes[0]);
3559 2984
3560 trans->alloc_exclude_start = 0; 2985 trans->alloc_exclude_start = 0;
3561 trans->alloc_exclude_nr = 0; 2986 trans->alloc_exclude_nr = 0;
3562 btrfs_free_path(path); 2987 btrfs_free_path(path);
3563 finish_current_insert(trans, extent_root, 0);
3564 pending_ret = del_pending_extents(trans, extent_root, 0);
3565 2988
3566 if (ret) 2989 if (ret)
3567 goto out; 2990 goto out;
3568 if (pending_ret) {
3569 ret = pending_ret;
3570 goto out;
3571 }
3572 2991
3573update_block:
3574 ret = update_block_group(trans, root, ins->objectid, 2992 ret = update_block_group(trans, root, ins->objectid,
3575 ins->offset, 1, 0); 2993 ins->offset, 1, 0);
3576 if (ret) { 2994 if (ret) {
@@ -3592,9 +3010,12 @@ int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
3592 3010
3593 if (root_objectid == BTRFS_TREE_LOG_OBJECTID) 3011 if (root_objectid == BTRFS_TREE_LOG_OBJECTID)
3594 return 0; 3012 return 0;
3595 ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid, 3013
3596 ref_generation, owner, ins); 3014 ret = btrfs_add_delayed_ref(trans, ins->objectid,
3597 update_reserved_extents(root, ins->objectid, ins->offset, 0); 3015 ins->offset, parent, root_objectid,
3016 ref_generation, owner,
3017 BTRFS_ADD_DELAYED_EXTENT, 0);
3018 BUG_ON(ret);
3598 return ret; 3019 return ret;
3599} 3020}
3600 3021
@@ -3621,7 +3042,7 @@ int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans,
3621 BUG_ON(ret); 3042 BUG_ON(ret);
3622 put_block_group(block_group); 3043 put_block_group(block_group);
3623 ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid, 3044 ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid,
3624 ref_generation, owner, ins); 3045 ref_generation, owner, ins, 1);
3625 return ret; 3046 return ret;
3626} 3047}
3627 3048
@@ -3640,20 +3061,18 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
3640 u64 search_end, struct btrfs_key *ins, u64 data) 3061 u64 search_end, struct btrfs_key *ins, u64 data)
3641{ 3062{
3642 int ret; 3063 int ret;
3643
3644 ret = __btrfs_reserve_extent(trans, root, num_bytes, 3064 ret = __btrfs_reserve_extent(trans, root, num_bytes,
3645 min_alloc_size, empty_size, hint_byte, 3065 min_alloc_size, empty_size, hint_byte,
3646 search_end, ins, data); 3066 search_end, ins, data);
3647 BUG_ON(ret); 3067 BUG_ON(ret);
3648 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { 3068 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
3649 ret = __btrfs_alloc_reserved_extent(trans, root, parent, 3069 ret = btrfs_add_delayed_ref(trans, ins->objectid,
3650 root_objectid, ref_generation, 3070 ins->offset, parent, root_objectid,
3651 owner_objectid, ins); 3071 ref_generation, owner_objectid,
3072 BTRFS_ADD_DELAYED_EXTENT, 0);
3652 BUG_ON(ret); 3073 BUG_ON(ret);
3653
3654 } else {
3655 update_reserved_extents(root, ins->objectid, ins->offset, 1);
3656 } 3074 }
3075 update_reserved_extents(root, ins->objectid, ins->offset, 1);
3657 return ret; 3076 return ret;
3658} 3077}
3659 3078
@@ -3789,7 +3208,7 @@ int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
3789 3208
3790 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); 3209 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
3791 3210
3792 ret = __btrfs_free_extent(trans, root, disk_bytenr, 3211 ret = btrfs_free_extent(trans, root, disk_bytenr,
3793 btrfs_file_extent_disk_num_bytes(leaf, fi), 3212 btrfs_file_extent_disk_num_bytes(leaf, fi),
3794 leaf->start, leaf_owner, leaf_generation, 3213 leaf->start, leaf_owner, leaf_generation,
3795 key.objectid, 0); 3214 key.objectid, 0);
@@ -3829,7 +3248,7 @@ static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
3829 */ 3248 */
3830 for (i = 0; i < ref->nritems; i++) { 3249 for (i = 0; i < ref->nritems; i++) {
3831 info = ref->extents + sorted[i].slot; 3250 info = ref->extents + sorted[i].slot;
3832 ret = __btrfs_free_extent(trans, root, info->bytenr, 3251 ret = btrfs_free_extent(trans, root, info->bytenr,
3833 info->num_bytes, ref->bytenr, 3252 info->num_bytes, ref->bytenr,
3834 ref->owner, ref->generation, 3253 ref->owner, ref->generation,
3835 info->objectid, 0); 3254 info->objectid, 0);
@@ -3846,12 +3265,13 @@ static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
3846 return 0; 3265 return 0;
3847} 3266}
3848 3267
3849static int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, 3268static int drop_snap_lookup_refcount(struct btrfs_trans_handle *trans,
3269 struct btrfs_root *root, u64 start,
3850 u64 len, u32 *refs) 3270 u64 len, u32 *refs)
3851{ 3271{
3852 int ret; 3272 int ret;
3853 3273
3854 ret = btrfs_lookup_extent_ref(NULL, root, start, len, refs); 3274 ret = btrfs_lookup_extent_ref(trans, root, start, len, refs);
3855 BUG_ON(ret); 3275 BUG_ON(ret);
3856 3276
3857#if 0 /* some debugging code in case we see problems here */ 3277#if 0 /* some debugging code in case we see problems here */
@@ -3959,7 +3379,8 @@ static noinline int drop_level_one_refs(struct btrfs_trans_handle *trans,
3959 * we just decrement it below and don't update any 3379 * we just decrement it below and don't update any
3960 * of the refs the leaf points to. 3380 * of the refs the leaf points to.
3961 */ 3381 */
3962 ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs); 3382 ret = drop_snap_lookup_refcount(trans, root, bytenr,
3383 blocksize, &refs);
3963 BUG_ON(ret); 3384 BUG_ON(ret);
3964 if (refs != 1) 3385 if (refs != 1)
3965 continue; 3386 continue;
@@ -4010,7 +3431,7 @@ static noinline int drop_level_one_refs(struct btrfs_trans_handle *trans,
4010 */ 3431 */
4011 for (i = 0; i < refi; i++) { 3432 for (i = 0; i < refi; i++) {
4012 bytenr = sorted[i].bytenr; 3433 bytenr = sorted[i].bytenr;
4013 ret = __btrfs_free_extent(trans, root, bytenr, 3434 ret = btrfs_free_extent(trans, root, bytenr,
4014 blocksize, eb->start, 3435 blocksize, eb->start,
4015 root_owner, root_gen, 0, 1); 3436 root_owner, root_gen, 0, 1);
4016 BUG_ON(ret); 3437 BUG_ON(ret);
@@ -4053,7 +3474,7 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
4053 3474
4054 WARN_ON(*level < 0); 3475 WARN_ON(*level < 0);
4055 WARN_ON(*level >= BTRFS_MAX_LEVEL); 3476 WARN_ON(*level >= BTRFS_MAX_LEVEL);
4056 ret = drop_snap_lookup_refcount(root, path->nodes[*level]->start, 3477 ret = drop_snap_lookup_refcount(trans, root, path->nodes[*level]->start,
4057 path->nodes[*level]->len, &refs); 3478 path->nodes[*level]->len, &refs);
4058 BUG_ON(ret); 3479 BUG_ON(ret);
4059 if (refs > 1) 3480 if (refs > 1)
@@ -4104,7 +3525,8 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
4104 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); 3525 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
4105 blocksize = btrfs_level_size(root, *level - 1); 3526 blocksize = btrfs_level_size(root, *level - 1);
4106 3527
4107 ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs); 3528 ret = drop_snap_lookup_refcount(trans, root, bytenr,
3529 blocksize, &refs);
4108 BUG_ON(ret); 3530 BUG_ON(ret);
4109 3531
4110 /* 3532 /*
@@ -4119,7 +3541,7 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
4119 root_gen = btrfs_header_generation(parent); 3541 root_gen = btrfs_header_generation(parent);
4120 path->slots[*level]++; 3542 path->slots[*level]++;
4121 3543
4122 ret = __btrfs_free_extent(trans, root, bytenr, 3544 ret = btrfs_free_extent(trans, root, bytenr,
4123 blocksize, parent->start, 3545 blocksize, parent->start,
4124 root_owner, root_gen, 3546 root_owner, root_gen,
4125 *level - 1, 1); 3547 *level - 1, 1);
@@ -4165,7 +3587,7 @@ out:
4165 * cleanup and free the reference on the last node 3587 * cleanup and free the reference on the last node
4166 * we processed 3588 * we processed
4167 */ 3589 */
4168 ret = __btrfs_free_extent(trans, root, bytenr, blocksize, 3590 ret = btrfs_free_extent(trans, root, bytenr, blocksize,
4169 parent->start, root_owner, root_gen, 3591 parent->start, root_owner, root_gen,
4170 *level, 1); 3592 *level, 1);
4171 free_extent_buffer(path->nodes[*level]); 3593 free_extent_buffer(path->nodes[*level]);
@@ -4354,6 +3776,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
4354 struct btrfs_path *path; 3776 struct btrfs_path *path;
4355 int i; 3777 int i;
4356 int orig_level; 3778 int orig_level;
3779 int update_count;
4357 struct btrfs_root_item *root_item = &root->root_item; 3780 struct btrfs_root_item *root_item = &root->root_item;
4358 3781
4359 WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex)); 3782 WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex));
@@ -4395,6 +3818,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
4395 } 3818 }
4396 } 3819 }
4397 while (1) { 3820 while (1) {
3821 unsigned long update;
4398 wret = walk_down_tree(trans, root, path, &level); 3822 wret = walk_down_tree(trans, root, path, &level);
4399 if (wret > 0) 3823 if (wret > 0)
4400 break; 3824 break;
@@ -4407,12 +3831,21 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
4407 break; 3831 break;
4408 if (wret < 0) 3832 if (wret < 0)
4409 ret = wret; 3833 ret = wret;
4410 if (trans->transaction->in_commit) { 3834 if (trans->transaction->in_commit ||
3835 trans->transaction->delayed_refs.flushing) {
4411 ret = -EAGAIN; 3836 ret = -EAGAIN;
4412 break; 3837 break;
4413 } 3838 }
4414 atomic_inc(&root->fs_info->throttle_gen); 3839 atomic_inc(&root->fs_info->throttle_gen);
4415 wake_up(&root->fs_info->transaction_throttle); 3840 wake_up(&root->fs_info->transaction_throttle);
3841 for (update_count = 0; update_count < 16; update_count++) {
3842 update = trans->delayed_ref_updates;
3843 trans->delayed_ref_updates = 0;
3844 if (update)
3845 btrfs_run_delayed_refs(trans, root, update);
3846 else
3847 break;
3848 }
4416 } 3849 }
4417 for (i = 0; i <= orig_level; i++) { 3850 for (i = 0; i <= orig_level; i++) {
4418 if (path->nodes[i]) { 3851 if (path->nodes[i]) {
@@ -5457,6 +4890,7 @@ static noinline int replace_extents_in_leaf(struct btrfs_trans_handle *trans,
5457 root->root_key.objectid, 4890 root->root_key.objectid,
5458 trans->transid, key.objectid); 4891 trans->transid, key.objectid);
5459 BUG_ON(ret); 4892 BUG_ON(ret);
4893
5460 ret = btrfs_free_extent(trans, root, 4894 ret = btrfs_free_extent(trans, root,
5461 bytenr, num_bytes, leaf->start, 4895 bytenr, num_bytes, leaf->start,
5462 btrfs_header_owner(leaf), 4896 btrfs_header_owner(leaf),
@@ -5768,9 +5202,6 @@ static noinline int relocate_tree_block(struct btrfs_trans_handle *trans,
5768 ref_path, NULL, NULL); 5202 ref_path, NULL, NULL);
5769 BUG_ON(ret); 5203 BUG_ON(ret);
5770 5204
5771 if (root == root->fs_info->extent_root)
5772 btrfs_extent_post_op(trans, root);
5773
5774 return 0; 5205 return 0;
5775} 5206}
5776 5207
@@ -6038,6 +5469,7 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
6038 if (!path) 5469 if (!path)
6039 return -ENOMEM; 5470 return -ENOMEM;
6040 5471
5472 path->leave_spinning = 1;
6041 ret = btrfs_insert_empty_inode(trans, root, path, objectid); 5473 ret = btrfs_insert_empty_inode(trans, root, path, objectid);
6042 if (ret) 5474 if (ret)
6043 goto out; 5475 goto out;
@@ -6208,6 +5640,9 @@ again:
6208 btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1); 5640 btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1);
6209 mutex_unlock(&root->fs_info->cleaner_mutex); 5641 mutex_unlock(&root->fs_info->cleaner_mutex);
6210 5642
5643 trans = btrfs_start_transaction(info->tree_root, 1);
5644 btrfs_commit_transaction(trans, info->tree_root);
5645
6211 while (1) { 5646 while (1) {
6212 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 5647 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6213 if (ret < 0) 5648 if (ret < 0)
@@ -6466,7 +5901,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
6466 5901
6467 extent_root = root->fs_info->extent_root; 5902 extent_root = root->fs_info->extent_root;
6468 5903
6469 root->fs_info->last_trans_new_blockgroup = trans->transid; 5904 root->fs_info->last_trans_log_full_commit = trans->transid;
6470 5905
6471 cache = kzalloc(sizeof(*cache), GFP_NOFS); 5906 cache = kzalloc(sizeof(*cache), GFP_NOFS);
6472 if (!cache) 5907 if (!cache)
@@ -6500,9 +5935,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
6500 sizeof(cache->item)); 5935 sizeof(cache->item));
6501 BUG_ON(ret); 5936 BUG_ON(ret);
6502 5937
6503 finish_current_insert(trans, extent_root, 0);
6504 ret = del_pending_extents(trans, extent_root, 0);
6505 BUG_ON(ret);
6506 set_avail_alloc_bits(extent_root->fs_info, type); 5938 set_avail_alloc_bits(extent_root->fs_info, type);
6507 5939
6508 return 0; 5940 return 0;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index ebe6b29e6069..08085af089e2 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3124,20 +3124,15 @@ void free_extent_buffer(struct extent_buffer *eb)
3124int clear_extent_buffer_dirty(struct extent_io_tree *tree, 3124int clear_extent_buffer_dirty(struct extent_io_tree *tree,
3125 struct extent_buffer *eb) 3125 struct extent_buffer *eb)
3126{ 3126{
3127 int set;
3128 unsigned long i; 3127 unsigned long i;
3129 unsigned long num_pages; 3128 unsigned long num_pages;
3130 struct page *page; 3129 struct page *page;
3131 3130
3132 u64 start = eb->start;
3133 u64 end = start + eb->len - 1;
3134
3135 set = clear_extent_dirty(tree, start, end, GFP_NOFS);
3136 num_pages = num_extent_pages(eb->start, eb->len); 3131 num_pages = num_extent_pages(eb->start, eb->len);
3137 3132
3138 for (i = 0; i < num_pages; i++) { 3133 for (i = 0; i < num_pages; i++) {
3139 page = extent_buffer_page(eb, i); 3134 page = extent_buffer_page(eb, i);
3140 if (!set && !PageDirty(page)) 3135 if (!PageDirty(page))
3141 continue; 3136 continue;
3142 3137
3143 lock_page(page); 3138 lock_page(page);
@@ -3146,22 +3141,6 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree,
3146 else 3141 else
3147 set_page_private(page, EXTENT_PAGE_PRIVATE); 3142 set_page_private(page, EXTENT_PAGE_PRIVATE);
3148 3143
3149 /*
3150 * if we're on the last page or the first page and the
3151 * block isn't aligned on a page boundary, do extra checks
3152 * to make sure we don't clean page that is partially dirty
3153 */
3154 if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
3155 ((i == num_pages - 1) &&
3156 ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) {
3157 start = (u64)page->index << PAGE_CACHE_SHIFT;
3158 end = start + PAGE_CACHE_SIZE - 1;
3159 if (test_range_bit(tree, start, end,
3160 EXTENT_DIRTY, 0)) {
3161 unlock_page(page);
3162 continue;
3163 }
3164 }
3165 clear_page_dirty_for_io(page); 3144 clear_page_dirty_for_io(page);
3166 spin_lock_irq(&page->mapping->tree_lock); 3145 spin_lock_irq(&page->mapping->tree_lock);
3167 if (!PageDirty(page)) { 3146 if (!PageDirty(page)) {
@@ -3187,29 +3166,13 @@ int set_extent_buffer_dirty(struct extent_io_tree *tree,
3187{ 3166{
3188 unsigned long i; 3167 unsigned long i;
3189 unsigned long num_pages; 3168 unsigned long num_pages;
3169 int was_dirty = 0;
3190 3170
3171 was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
3191 num_pages = num_extent_pages(eb->start, eb->len); 3172 num_pages = num_extent_pages(eb->start, eb->len);
3192 for (i = 0; i < num_pages; i++) { 3173 for (i = 0; i < num_pages; i++)
3193 struct page *page = extent_buffer_page(eb, i);
3194 /* writepage may need to do something special for the
3195 * first page, we have to make sure page->private is
3196 * properly set. releasepage may drop page->private
3197 * on us if the page isn't already dirty.
3198 */
3199 lock_page(page);
3200 if (i == 0) {
3201 set_page_extent_head(page, eb->len);
3202 } else if (PagePrivate(page) &&
3203 page->private != EXTENT_PAGE_PRIVATE) {
3204 set_page_extent_mapped(page);
3205 }
3206 __set_page_dirty_nobuffers(extent_buffer_page(eb, i)); 3174 __set_page_dirty_nobuffers(extent_buffer_page(eb, i));
3207 set_extent_dirty(tree, page_offset(page), 3175 return was_dirty;
3208 page_offset(page) + PAGE_CACHE_SIZE - 1,
3209 GFP_NOFS);
3210 unlock_page(page);
3211 }
3212 return 0;
3213} 3176}
3214 3177
3215int clear_extent_buffer_uptodate(struct extent_io_tree *tree, 3178int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
@@ -3789,6 +3752,10 @@ int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page)
3789 ret = 0; 3752 ret = 0;
3790 goto out; 3753 goto out;
3791 } 3754 }
3755 if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
3756 ret = 0;
3757 goto out;
3758 }
3792 /* at this point we can safely release the extent buffer */ 3759 /* at this point we can safely release the extent buffer */
3793 num_pages = num_extent_pages(eb->start, eb->len); 3760 num_pages = num_extent_pages(eb->start, eb->len);
3794 for (i = 0; i < num_pages; i++) 3761 for (i = 0; i < num_pages; i++)
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 1f9df88afbf6..5bc20abf3f3d 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -25,6 +25,7 @@
25/* these are bit numbers for test/set bit */ 25/* these are bit numbers for test/set bit */
26#define EXTENT_BUFFER_UPTODATE 0 26#define EXTENT_BUFFER_UPTODATE 0
27#define EXTENT_BUFFER_BLOCKING 1 27#define EXTENT_BUFFER_BLOCKING 1
28#define EXTENT_BUFFER_DIRTY 2
28 29
29/* 30/*
30 * page->private values. Every page that is controlled by the extent 31 * page->private values. Every page that is controlled by the extent
@@ -254,6 +255,8 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree,
254 struct extent_buffer *eb); 255 struct extent_buffer *eb);
255int set_extent_buffer_dirty(struct extent_io_tree *tree, 256int set_extent_buffer_dirty(struct extent_io_tree *tree,
256 struct extent_buffer *eb); 257 struct extent_buffer *eb);
258int test_extent_buffer_dirty(struct extent_io_tree *tree,
259 struct extent_buffer *eb);
257int set_extent_buffer_uptodate(struct extent_io_tree *tree, 260int set_extent_buffer_uptodate(struct extent_io_tree *tree,
258 struct extent_buffer *eb); 261 struct extent_buffer *eb);
259int clear_extent_buffer_uptodate(struct extent_io_tree *tree, 262int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 964652435fd1..9b99886562d0 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -52,6 +52,7 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
52 file_key.offset = pos; 52 file_key.offset = pos;
53 btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); 53 btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY);
54 54
55 path->leave_spinning = 1;
55 ret = btrfs_insert_empty_item(trans, root, path, &file_key, 56 ret = btrfs_insert_empty_item(trans, root, path, &file_key,
56 sizeof(*item)); 57 sizeof(*item));
57 if (ret < 0) 58 if (ret < 0)
@@ -523,6 +524,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
523 key.offset = end_byte - 1; 524 key.offset = end_byte - 1;
524 key.type = BTRFS_EXTENT_CSUM_KEY; 525 key.type = BTRFS_EXTENT_CSUM_KEY;
525 526
527 path->leave_spinning = 1;
526 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 528 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
527 if (ret > 0) { 529 if (ret > 0) {
528 if (path->slots[0] == 0) 530 if (path->slots[0] == 0)
@@ -757,8 +759,10 @@ insert:
757 } else { 759 } else {
758 ins_size = csum_size; 760 ins_size = csum_size;
759 } 761 }
762 path->leave_spinning = 1;
760 ret = btrfs_insert_empty_item(trans, root, path, &file_key, 763 ret = btrfs_insert_empty_item(trans, root, path, &file_key,
761 ins_size); 764 ins_size);
765 path->leave_spinning = 0;
762 if (ret < 0) 766 if (ret < 0)
763 goto fail_unlock; 767 goto fail_unlock;
764 if (ret != 0) { 768 if (ret != 0) {
@@ -776,7 +780,6 @@ found:
776 item_end = (struct btrfs_csum_item *)((unsigned char *)item_end + 780 item_end = (struct btrfs_csum_item *)((unsigned char *)item_end +
777 btrfs_item_size_nr(leaf, path->slots[0])); 781 btrfs_item_size_nr(leaf, path->slots[0]));
778 eb_token = NULL; 782 eb_token = NULL;
779 cond_resched();
780next_sector: 783next_sector:
781 784
782 if (!eb_token || 785 if (!eb_token ||
@@ -817,9 +820,9 @@ next_sector:
817 eb_token = NULL; 820 eb_token = NULL;
818 } 821 }
819 btrfs_mark_buffer_dirty(path->nodes[0]); 822 btrfs_mark_buffer_dirty(path->nodes[0]);
820 cond_resched();
821 if (total_bytes < sums->len) { 823 if (total_bytes < sums->len) {
822 btrfs_release_path(root, path); 824 btrfs_release_path(root, path);
825 cond_resched();
823 goto again; 826 goto again;
824 } 827 }
825out: 828out:
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index dc78954861b3..9c9fb46ccd08 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -606,6 +606,7 @@ next_slot:
606 btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY); 606 btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY);
607 607
608 btrfs_release_path(root, path); 608 btrfs_release_path(root, path);
609 path->leave_spinning = 1;
609 ret = btrfs_insert_empty_item(trans, root, path, &ins, 610 ret = btrfs_insert_empty_item(trans, root, path, &ins,
610 sizeof(*extent)); 611 sizeof(*extent));
611 BUG_ON(ret); 612 BUG_ON(ret);
@@ -639,17 +640,22 @@ next_slot:
639 ram_bytes); 640 ram_bytes);
640 btrfs_set_file_extent_type(leaf, extent, found_type); 641 btrfs_set_file_extent_type(leaf, extent, found_type);
641 642
643 btrfs_unlock_up_safe(path, 1);
642 btrfs_mark_buffer_dirty(path->nodes[0]); 644 btrfs_mark_buffer_dirty(path->nodes[0]);
645 btrfs_set_lock_blocking(path->nodes[0]);
643 646
644 if (disk_bytenr != 0) { 647 if (disk_bytenr != 0) {
645 ret = btrfs_update_extent_ref(trans, root, 648 ret = btrfs_update_extent_ref(trans, root,
646 disk_bytenr, orig_parent, 649 disk_bytenr,
650 le64_to_cpu(old.disk_num_bytes),
651 orig_parent,
647 leaf->start, 652 leaf->start,
648 root->root_key.objectid, 653 root->root_key.objectid,
649 trans->transid, ins.objectid); 654 trans->transid, ins.objectid);
650 655
651 BUG_ON(ret); 656 BUG_ON(ret);
652 } 657 }
658 path->leave_spinning = 0;
653 btrfs_release_path(root, path); 659 btrfs_release_path(root, path);
654 if (disk_bytenr != 0) 660 if (disk_bytenr != 0)
655 inode_add_bytes(inode, extent_end - end); 661 inode_add_bytes(inode, extent_end - end);
@@ -912,7 +918,7 @@ again:
912 btrfs_set_file_extent_other_encoding(leaf, fi, 0); 918 btrfs_set_file_extent_other_encoding(leaf, fi, 0);
913 919
914 if (orig_parent != leaf->start) { 920 if (orig_parent != leaf->start) {
915 ret = btrfs_update_extent_ref(trans, root, bytenr, 921 ret = btrfs_update_extent_ref(trans, root, bytenr, num_bytes,
916 orig_parent, leaf->start, 922 orig_parent, leaf->start,
917 root->root_key.objectid, 923 root->root_key.objectid,
918 trans->transid, inode->i_ino); 924 trans->transid, inode->i_ino);
@@ -1155,6 +1161,20 @@ out_nolock:
1155 page_cache_release(pinned[1]); 1161 page_cache_release(pinned[1]);
1156 *ppos = pos; 1162 *ppos = pos;
1157 1163
1164 /*
1165 * we want to make sure fsync finds this change
1166 * but we haven't joined a transaction running right now.
1167 *
1168 * Later on, someone is sure to update the inode and get the
1169 * real transid recorded.
1170 *
1171 * We set last_trans now to the fs_info generation + 1,
1172 * this will either be one more than the running transaction
1173 * or the generation used for the next transaction if there isn't
1174 * one running right now.
1175 */
1176 BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;
1177
1158 if (num_written > 0 && will_write) { 1178 if (num_written > 0 && will_write) {
1159 struct btrfs_trans_handle *trans; 1179 struct btrfs_trans_handle *trans;
1160 1180
@@ -1167,8 +1187,11 @@ out_nolock:
1167 ret = btrfs_log_dentry_safe(trans, root, 1187 ret = btrfs_log_dentry_safe(trans, root,
1168 file->f_dentry); 1188 file->f_dentry);
1169 if (ret == 0) { 1189 if (ret == 0) {
1170 btrfs_sync_log(trans, root); 1190 ret = btrfs_sync_log(trans, root);
1171 btrfs_end_transaction(trans, root); 1191 if (ret == 0)
1192 btrfs_end_transaction(trans, root);
1193 else
1194 btrfs_commit_transaction(trans, root);
1172 } else { 1195 } else {
1173 btrfs_commit_transaction(trans, root); 1196 btrfs_commit_transaction(trans, root);
1174 } 1197 }
@@ -1185,6 +1208,18 @@ out_nolock:
1185 1208
1186int btrfs_release_file(struct inode *inode, struct file *filp) 1209int btrfs_release_file(struct inode *inode, struct file *filp)
1187{ 1210{
1211 /*
1212 * ordered_data_close is set by settattr when we are about to truncate
1213 * a file from a non-zero size to a zero size. This tries to
1214 * flush down new bytes that may have been written if the
1215 * application were using truncate to replace a file in place.
1216 */
1217 if (BTRFS_I(inode)->ordered_data_close) {
1218 BTRFS_I(inode)->ordered_data_close = 0;
1219 btrfs_add_ordered_operation(NULL, BTRFS_I(inode)->root, inode);
1220 if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
1221 filemap_flush(inode->i_mapping);
1222 }
1188 if (filp->private_data) 1223 if (filp->private_data)
1189 btrfs_ioctl_trans_end(filp); 1224 btrfs_ioctl_trans_end(filp);
1190 return 0; 1225 return 0;
@@ -1260,8 +1295,11 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
1260 if (ret > 0) { 1295 if (ret > 0) {
1261 ret = btrfs_commit_transaction(trans, root); 1296 ret = btrfs_commit_transaction(trans, root);
1262 } else { 1297 } else {
1263 btrfs_sync_log(trans, root); 1298 ret = btrfs_sync_log(trans, root);
1264 ret = btrfs_end_transaction(trans, root); 1299 if (ret == 0)
1300 ret = btrfs_end_transaction(trans, root);
1301 else
1302 ret = btrfs_commit_transaction(trans, root);
1265 } 1303 }
1266 mutex_lock(&dentry->d_inode->i_mutex); 1304 mutex_lock(&dentry->d_inode->i_mutex);
1267out: 1305out:
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c
index 3d46fa1f29a4..6b627c611808 100644
--- a/fs/btrfs/inode-item.c
+++ b/fs/btrfs/inode-item.c
@@ -73,6 +73,8 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
73 if (!path) 73 if (!path)
74 return -ENOMEM; 74 return -ENOMEM;
75 75
76 path->leave_spinning = 1;
77
76 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 78 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
77 if (ret > 0) { 79 if (ret > 0) {
78 ret = -ENOENT; 80 ret = -ENOENT;
@@ -127,6 +129,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
127 if (!path) 129 if (!path)
128 return -ENOMEM; 130 return -ENOMEM;
129 131
132 path->leave_spinning = 1;
130 ret = btrfs_insert_empty_item(trans, root, path, &key, 133 ret = btrfs_insert_empty_item(trans, root, path, &key,
131 ins_len); 134 ins_len);
132 if (ret == -EEXIST) { 135 if (ret == -EEXIST) {
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 7d4f948bc22a..06d8db5afb08 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -134,6 +134,7 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
134 if (!path) 134 if (!path)
135 return -ENOMEM; 135 return -ENOMEM;
136 136
137 path->leave_spinning = 1;
137 btrfs_set_trans_block_group(trans, inode); 138 btrfs_set_trans_block_group(trans, inode);
138 139
139 key.objectid = inode->i_ino; 140 key.objectid = inode->i_ino;
@@ -167,9 +168,9 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
167 cur_size = min_t(unsigned long, compressed_size, 168 cur_size = min_t(unsigned long, compressed_size,
168 PAGE_CACHE_SIZE); 169 PAGE_CACHE_SIZE);
169 170
170 kaddr = kmap(cpage); 171 kaddr = kmap_atomic(cpage, KM_USER0);
171 write_extent_buffer(leaf, kaddr, ptr, cur_size); 172 write_extent_buffer(leaf, kaddr, ptr, cur_size);
172 kunmap(cpage); 173 kunmap_atomic(kaddr, KM_USER0);
173 174
174 i++; 175 i++;
175 ptr += cur_size; 176 ptr += cur_size;
@@ -204,7 +205,7 @@ fail:
204 * does the checks required to make sure the data is small enough 205 * does the checks required to make sure the data is small enough
205 * to fit as an inline extent. 206 * to fit as an inline extent.
206 */ 207 */
207static int cow_file_range_inline(struct btrfs_trans_handle *trans, 208static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
208 struct btrfs_root *root, 209 struct btrfs_root *root,
209 struct inode *inode, u64 start, u64 end, 210 struct inode *inode, u64 start, u64 end,
210 size_t compressed_size, 211 size_t compressed_size,
@@ -854,11 +855,6 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
854 u64 cur_end; 855 u64 cur_end;
855 int limit = 10 * 1024 * 1042; 856 int limit = 10 * 1024 * 1042;
856 857
857 if (!btrfs_test_opt(root, COMPRESS)) {
858 return cow_file_range(inode, locked_page, start, end,
859 page_started, nr_written, 1);
860 }
861
862 clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED | 858 clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED |
863 EXTENT_DELALLOC, 1, 0, GFP_NOFS); 859 EXTENT_DELALLOC, 1, 0, GFP_NOFS);
864 while (start < end) { 860 while (start < end) {
@@ -935,7 +931,8 @@ static noinline int csum_exist_in_range(struct btrfs_root *root,
935 * If no cow copies or snapshots exist, we write directly to the existing 931 * If no cow copies or snapshots exist, we write directly to the existing
936 * blocks on disk 932 * blocks on disk
937 */ 933 */
938static int run_delalloc_nocow(struct inode *inode, struct page *locked_page, 934static noinline int run_delalloc_nocow(struct inode *inode,
935 struct page *locked_page,
939 u64 start, u64 end, int *page_started, int force, 936 u64 start, u64 end, int *page_started, int force,
940 unsigned long *nr_written) 937 unsigned long *nr_written)
941{ 938{
@@ -1133,6 +1130,7 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
1133 unsigned long *nr_written) 1130 unsigned long *nr_written)
1134{ 1131{
1135 int ret; 1132 int ret;
1133 struct btrfs_root *root = BTRFS_I(inode)->root;
1136 1134
1137 if (btrfs_test_flag(inode, NODATACOW)) 1135 if (btrfs_test_flag(inode, NODATACOW))
1138 ret = run_delalloc_nocow(inode, locked_page, start, end, 1136 ret = run_delalloc_nocow(inode, locked_page, start, end,
@@ -1140,10 +1138,12 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
1140 else if (btrfs_test_flag(inode, PREALLOC)) 1138 else if (btrfs_test_flag(inode, PREALLOC))
1141 ret = run_delalloc_nocow(inode, locked_page, start, end, 1139 ret = run_delalloc_nocow(inode, locked_page, start, end,
1142 page_started, 0, nr_written); 1140 page_started, 0, nr_written);
1141 else if (!btrfs_test_opt(root, COMPRESS))
1142 ret = cow_file_range(inode, locked_page, start, end,
1143 page_started, nr_written, 1);
1143 else 1144 else
1144 ret = cow_file_range_async(inode, locked_page, start, end, 1145 ret = cow_file_range_async(inode, locked_page, start, end,
1145 page_started, nr_written); 1146 page_started, nr_written);
1146
1147 return ret; 1147 return ret;
1148} 1148}
1149 1149
@@ -1453,6 +1453,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1453 path = btrfs_alloc_path(); 1453 path = btrfs_alloc_path();
1454 BUG_ON(!path); 1454 BUG_ON(!path);
1455 1455
1456 path->leave_spinning = 1;
1456 ret = btrfs_drop_extents(trans, root, inode, file_pos, 1457 ret = btrfs_drop_extents(trans, root, inode, file_pos,
1457 file_pos + num_bytes, file_pos, &hint); 1458 file_pos + num_bytes, file_pos, &hint);
1458 BUG_ON(ret); 1459 BUG_ON(ret);
@@ -1475,6 +1476,10 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1475 btrfs_set_file_extent_compression(leaf, fi, compression); 1476 btrfs_set_file_extent_compression(leaf, fi, compression);
1476 btrfs_set_file_extent_encryption(leaf, fi, encryption); 1477 btrfs_set_file_extent_encryption(leaf, fi, encryption);
1477 btrfs_set_file_extent_other_encoding(leaf, fi, other_encoding); 1478 btrfs_set_file_extent_other_encoding(leaf, fi, other_encoding);
1479
1480 btrfs_unlock_up_safe(path, 1);
1481 btrfs_set_lock_blocking(leaf);
1482
1478 btrfs_mark_buffer_dirty(leaf); 1483 btrfs_mark_buffer_dirty(leaf);
1479 1484
1480 inode_add_bytes(inode, num_bytes); 1485 inode_add_bytes(inode, num_bytes);
@@ -1487,11 +1492,35 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1487 root->root_key.objectid, 1492 root->root_key.objectid,
1488 trans->transid, inode->i_ino, &ins); 1493 trans->transid, inode->i_ino, &ins);
1489 BUG_ON(ret); 1494 BUG_ON(ret);
1490
1491 btrfs_free_path(path); 1495 btrfs_free_path(path);
1496
1492 return 0; 1497 return 0;
1493} 1498}
1494 1499
1500/*
1501 * helper function for btrfs_finish_ordered_io, this
1502 * just reads in some of the csum leaves to prime them into ram
1503 * before we start the transaction. It limits the amount of btree
1504 * reads required while inside the transaction.
1505 */
1506static noinline void reada_csum(struct btrfs_root *root,
1507 struct btrfs_path *path,
1508 struct btrfs_ordered_extent *ordered_extent)
1509{
1510 struct btrfs_ordered_sum *sum;
1511 u64 bytenr;
1512
1513 sum = list_entry(ordered_extent->list.next, struct btrfs_ordered_sum,
1514 list);
1515 bytenr = sum->sums[0].bytenr;
1516
1517 /*
1518 * we don't care about the results, the point of this search is
1519 * just to get the btree leaves into ram
1520 */
1521 btrfs_lookup_csum(NULL, root->fs_info->csum_root, path, bytenr, 0);
1522}
1523
1495/* as ordered data IO finishes, this gets called so we can finish 1524/* as ordered data IO finishes, this gets called so we can finish
1496 * an ordered extent if the range of bytes in the file it covers are 1525 * an ordered extent if the range of bytes in the file it covers are
1497 * fully written. 1526 * fully written.
@@ -1500,8 +1529,9 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1500{ 1529{
1501 struct btrfs_root *root = BTRFS_I(inode)->root; 1530 struct btrfs_root *root = BTRFS_I(inode)->root;
1502 struct btrfs_trans_handle *trans; 1531 struct btrfs_trans_handle *trans;
1503 struct btrfs_ordered_extent *ordered_extent; 1532 struct btrfs_ordered_extent *ordered_extent = NULL;
1504 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 1533 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
1534 struct btrfs_path *path;
1505 int compressed = 0; 1535 int compressed = 0;
1506 int ret; 1536 int ret;
1507 1537
@@ -1509,9 +1539,33 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1509 if (!ret) 1539 if (!ret)
1510 return 0; 1540 return 0;
1511 1541
1542 /*
1543 * before we join the transaction, try to do some of our IO.
1544 * This will limit the amount of IO that we have to do with
1545 * the transaction running. We're unlikely to need to do any
1546 * IO if the file extents are new, the disk_i_size checks
1547 * covers the most common case.
1548 */
1549 if (start < BTRFS_I(inode)->disk_i_size) {
1550 path = btrfs_alloc_path();
1551 if (path) {
1552 ret = btrfs_lookup_file_extent(NULL, root, path,
1553 inode->i_ino,
1554 start, 0);
1555 ordered_extent = btrfs_lookup_ordered_extent(inode,
1556 start);
1557 if (!list_empty(&ordered_extent->list)) {
1558 btrfs_release_path(root, path);
1559 reada_csum(root, path, ordered_extent);
1560 }
1561 btrfs_free_path(path);
1562 }
1563 }
1564
1512 trans = btrfs_join_transaction(root, 1); 1565 trans = btrfs_join_transaction(root, 1);
1513 1566
1514 ordered_extent = btrfs_lookup_ordered_extent(inode, start); 1567 if (!ordered_extent)
1568 ordered_extent = btrfs_lookup_ordered_extent(inode, start);
1515 BUG_ON(!ordered_extent); 1569 BUG_ON(!ordered_extent);
1516 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) 1570 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags))
1517 goto nocow; 1571 goto nocow;
@@ -2101,6 +2155,7 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
2101 2155
2102 path = btrfs_alloc_path(); 2156 path = btrfs_alloc_path();
2103 BUG_ON(!path); 2157 BUG_ON(!path);
2158 path->leave_spinning = 1;
2104 ret = btrfs_lookup_inode(trans, root, path, 2159 ret = btrfs_lookup_inode(trans, root, path,
2105 &BTRFS_I(inode)->location, 1); 2160 &BTRFS_I(inode)->location, 1);
2106 if (ret) { 2161 if (ret) {
@@ -2147,6 +2202,7 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
2147 goto err; 2202 goto err;
2148 } 2203 }
2149 2204
2205 path->leave_spinning = 1;
2150 di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, 2206 di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
2151 name, name_len, -1); 2207 name, name_len, -1);
2152 if (IS_ERR(di)) { 2208 if (IS_ERR(di)) {
@@ -2190,8 +2246,6 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
2190 ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len, 2246 ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len,
2191 inode, dir->i_ino); 2247 inode, dir->i_ino);
2192 BUG_ON(ret != 0 && ret != -ENOENT); 2248 BUG_ON(ret != 0 && ret != -ENOENT);
2193 if (ret != -ENOENT)
2194 BTRFS_I(dir)->log_dirty_trans = trans->transid;
2195 2249
2196 ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len, 2250 ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len,
2197 dir, index); 2251 dir, index);
@@ -2224,6 +2278,9 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
2224 trans = btrfs_start_transaction(root, 1); 2278 trans = btrfs_start_transaction(root, 1);
2225 2279
2226 btrfs_set_trans_block_group(trans, dir); 2280 btrfs_set_trans_block_group(trans, dir);
2281
2282 btrfs_record_unlink_dir(trans, dir, dentry->d_inode, 0);
2283
2227 ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, 2284 ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,
2228 dentry->d_name.name, dentry->d_name.len); 2285 dentry->d_name.name, dentry->d_name.len);
2229 2286
@@ -2498,6 +2555,7 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
2498 key.type = (u8)-1; 2555 key.type = (u8)-1;
2499 2556
2500search_again: 2557search_again:
2558 path->leave_spinning = 1;
2501 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 2559 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2502 if (ret < 0) 2560 if (ret < 0)
2503 goto error; 2561 goto error;
@@ -2644,6 +2702,7 @@ delete:
2644 break; 2702 break;
2645 } 2703 }
2646 if (found_extent) { 2704 if (found_extent) {
2705 btrfs_set_path_blocking(path);
2647 ret = btrfs_free_extent(trans, root, extent_start, 2706 ret = btrfs_free_extent(trans, root, extent_start,
2648 extent_num_bytes, 2707 extent_num_bytes,
2649 leaf->start, root_owner, 2708 leaf->start, root_owner,
@@ -2848,11 +2907,21 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
2848 if (err) 2907 if (err)
2849 return err; 2908 return err;
2850 2909
2851 if (S_ISREG(inode->i_mode) && 2910 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
2852 attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) { 2911 if (attr->ia_size > inode->i_size) {
2853 err = btrfs_cont_expand(inode, attr->ia_size); 2912 err = btrfs_cont_expand(inode, attr->ia_size);
2854 if (err) 2913 if (err)
2855 return err; 2914 return err;
2915 } else if (inode->i_size > 0 &&
2916 attr->ia_size == 0) {
2917
2918 /* we're truncating a file that used to have good
2919 * data down to zero. Make sure it gets into
2920 * the ordered flush list so that any new writes
2921 * get down to disk quickly.
2922 */
2923 BTRFS_I(inode)->ordered_data_close = 1;
2924 }
2856 } 2925 }
2857 2926
2858 err = inode_setattr(inode, attr); 2927 err = inode_setattr(inode, attr);
@@ -2984,13 +3053,14 @@ static noinline void init_btrfs_i(struct inode *inode)
2984 bi->disk_i_size = 0; 3053 bi->disk_i_size = 0;
2985 bi->flags = 0; 3054 bi->flags = 0;
2986 bi->index_cnt = (u64)-1; 3055 bi->index_cnt = (u64)-1;
2987 bi->log_dirty_trans = 0; 3056 bi->last_unlink_trans = 0;
2988 extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); 3057 extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
2989 extent_io_tree_init(&BTRFS_I(inode)->io_tree, 3058 extent_io_tree_init(&BTRFS_I(inode)->io_tree,
2990 inode->i_mapping, GFP_NOFS); 3059 inode->i_mapping, GFP_NOFS);
2991 extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, 3060 extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
2992 inode->i_mapping, GFP_NOFS); 3061 inode->i_mapping, GFP_NOFS);
2993 INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes); 3062 INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes);
3063 INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations);
2994 btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); 3064 btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree);
2995 mutex_init(&BTRFS_I(inode)->extent_mutex); 3065 mutex_init(&BTRFS_I(inode)->extent_mutex);
2996 mutex_init(&BTRFS_I(inode)->log_mutex); 3066 mutex_init(&BTRFS_I(inode)->log_mutex);
@@ -3449,6 +3519,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
3449 sizes[0] = sizeof(struct btrfs_inode_item); 3519 sizes[0] = sizeof(struct btrfs_inode_item);
3450 sizes[1] = name_len + sizeof(*ref); 3520 sizes[1] = name_len + sizeof(*ref);
3451 3521
3522 path->leave_spinning = 1;
3452 ret = btrfs_insert_empty_items(trans, root, path, key, sizes, 2); 3523 ret = btrfs_insert_empty_items(trans, root, path, key, sizes, 2);
3453 if (ret != 0) 3524 if (ret != 0)
3454 goto fail; 3525 goto fail;
@@ -3727,6 +3798,8 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
3727 drop_inode = 1; 3798 drop_inode = 1;
3728 3799
3729 nr = trans->blocks_used; 3800 nr = trans->blocks_used;
3801
3802 btrfs_log_new_name(trans, inode, NULL, dentry->d_parent);
3730 btrfs_end_transaction_throttle(trans, root); 3803 btrfs_end_transaction_throttle(trans, root);
3731fail: 3804fail:
3732 if (drop_inode) { 3805 if (drop_inode) {
@@ -4292,8 +4365,9 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
4292 * beyond EOF, then the page is guaranteed safe against truncation until we 4365 * beyond EOF, then the page is guaranteed safe against truncation until we
4293 * unlock the page. 4366 * unlock the page.
4294 */ 4367 */
4295int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) 4368int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
4296{ 4369{
4370 struct page *page = vmf->page;
4297 struct inode *inode = fdentry(vma->vm_file)->d_inode; 4371 struct inode *inode = fdentry(vma->vm_file)->d_inode;
4298 struct btrfs_root *root = BTRFS_I(inode)->root; 4372 struct btrfs_root *root = BTRFS_I(inode)->root;
4299 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 4373 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
@@ -4306,10 +4380,15 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
4306 u64 page_end; 4380 u64 page_end;
4307 4381
4308 ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE); 4382 ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE);
4309 if (ret) 4383 if (ret) {
4384 if (ret == -ENOMEM)
4385 ret = VM_FAULT_OOM;
4386 else /* -ENOSPC, -EIO, etc */
4387 ret = VM_FAULT_SIGBUS;
4310 goto out; 4388 goto out;
4389 }
4311 4390
4312 ret = -EINVAL; 4391 ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */
4313again: 4392again:
4314 lock_page(page); 4393 lock_page(page);
4315 size = i_size_read(inode); 4394 size = i_size_read(inode);
@@ -4357,6 +4436,8 @@ again:
4357 } 4436 }
4358 ClearPageChecked(page); 4437 ClearPageChecked(page);
4359 set_page_dirty(page); 4438 set_page_dirty(page);
4439
4440 BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;
4360 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 4441 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
4361 4442
4362out_unlock: 4443out_unlock:
@@ -4382,6 +4463,27 @@ static void btrfs_truncate(struct inode *inode)
4382 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); 4463 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
4383 4464
4384 trans = btrfs_start_transaction(root, 1); 4465 trans = btrfs_start_transaction(root, 1);
4466
4467 /*
4468 * setattr is responsible for setting the ordered_data_close flag,
4469 * but that is only tested during the last file release. That
4470 * could happen well after the next commit, leaving a great big
4471 * window where new writes may get lost if someone chooses to write
4472 * to this file after truncating to zero
4473 *
4474 * The inode doesn't have any dirty data here, and so if we commit
4475 * this is a noop. If someone immediately starts writing to the inode
4476 * it is very likely we'll catch some of their writes in this
4477 * transaction, and the commit will find this file on the ordered
4478 * data list with good things to send down.
4479 *
4480 * This is a best effort solution, there is still a window where
4481 * using truncate to replace the contents of the file will
4482 * end up with a zero length file after a crash.
4483 */
4484 if (inode->i_size == 0 && BTRFS_I(inode)->ordered_data_close)
4485 btrfs_add_ordered_operation(trans, root, inode);
4486
4385 btrfs_set_trans_block_group(trans, inode); 4487 btrfs_set_trans_block_group(trans, inode);
4386 btrfs_i_size_write(inode, inode->i_size); 4488 btrfs_i_size_write(inode, inode->i_size);
4387 4489
@@ -4458,12 +4560,15 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
4458 ei->i_acl = BTRFS_ACL_NOT_CACHED; 4560 ei->i_acl = BTRFS_ACL_NOT_CACHED;
4459 ei->i_default_acl = BTRFS_ACL_NOT_CACHED; 4561 ei->i_default_acl = BTRFS_ACL_NOT_CACHED;
4460 INIT_LIST_HEAD(&ei->i_orphan); 4562 INIT_LIST_HEAD(&ei->i_orphan);
4563 INIT_LIST_HEAD(&ei->ordered_operations);
4461 return &ei->vfs_inode; 4564 return &ei->vfs_inode;
4462} 4565}
4463 4566
4464void btrfs_destroy_inode(struct inode *inode) 4567void btrfs_destroy_inode(struct inode *inode)
4465{ 4568{
4466 struct btrfs_ordered_extent *ordered; 4569 struct btrfs_ordered_extent *ordered;
4570 struct btrfs_root *root = BTRFS_I(inode)->root;
4571
4467 WARN_ON(!list_empty(&inode->i_dentry)); 4572 WARN_ON(!list_empty(&inode->i_dentry));
4468 WARN_ON(inode->i_data.nrpages); 4573 WARN_ON(inode->i_data.nrpages);
4469 4574
@@ -4474,13 +4579,24 @@ void btrfs_destroy_inode(struct inode *inode)
4474 BTRFS_I(inode)->i_default_acl != BTRFS_ACL_NOT_CACHED) 4579 BTRFS_I(inode)->i_default_acl != BTRFS_ACL_NOT_CACHED)
4475 posix_acl_release(BTRFS_I(inode)->i_default_acl); 4580 posix_acl_release(BTRFS_I(inode)->i_default_acl);
4476 4581
4477 spin_lock(&BTRFS_I(inode)->root->list_lock); 4582 /*
4583 * Make sure we're properly removed from the ordered operation
4584 * lists.
4585 */
4586 smp_mb();
4587 if (!list_empty(&BTRFS_I(inode)->ordered_operations)) {
4588 spin_lock(&root->fs_info->ordered_extent_lock);
4589 list_del_init(&BTRFS_I(inode)->ordered_operations);
4590 spin_unlock(&root->fs_info->ordered_extent_lock);
4591 }
4592
4593 spin_lock(&root->list_lock);
4478 if (!list_empty(&BTRFS_I(inode)->i_orphan)) { 4594 if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
4479 printk(KERN_ERR "BTRFS: inode %lu: inode still on the orphan" 4595 printk(KERN_ERR "BTRFS: inode %lu: inode still on the orphan"
4480 " list\n", inode->i_ino); 4596 " list\n", inode->i_ino);
4481 dump_stack(); 4597 dump_stack();
4482 } 4598 }
4483 spin_unlock(&BTRFS_I(inode)->root->list_lock); 4599 spin_unlock(&root->list_lock);
4484 4600
4485 while (1) { 4601 while (1) {
4486 ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); 4602 ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1);
@@ -4605,8 +4721,36 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
4605 if (ret) 4721 if (ret)
4606 goto out_unlock; 4722 goto out_unlock;
4607 4723
4724 /*
4725 * we're using rename to replace one file with another.
4726 * and the replacement file is large. Start IO on it now so
4727 * we don't add too much work to the end of the transaction
4728 */
4729 if (new_inode && old_inode && S_ISREG(old_inode->i_mode) &&
4730 new_inode->i_size &&
4731 old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
4732 filemap_flush(old_inode->i_mapping);
4733
4608 trans = btrfs_start_transaction(root, 1); 4734 trans = btrfs_start_transaction(root, 1);
4609 4735
4736 /*
4737 * make sure the inode gets flushed if it is replacing
4738 * something.
4739 */
4740 if (new_inode && new_inode->i_size &&
4741 old_inode && S_ISREG(old_inode->i_mode)) {
4742 btrfs_add_ordered_operation(trans, root, old_inode);
4743 }
4744
4745 /*
4746 * this is an ugly little race, but the rename is required to make
4747 * sure that if we crash, the inode is either at the old name
4748 * or the new one. pinning the log transaction lets us make sure
4749 * we don't allow a log commit to come in after we unlink the
4750 * name but before we add the new name back in.
4751 */
4752 btrfs_pin_log_trans(root);
4753
4610 btrfs_set_trans_block_group(trans, new_dir); 4754 btrfs_set_trans_block_group(trans, new_dir);
4611 4755
4612 btrfs_inc_nlink(old_dentry->d_inode); 4756 btrfs_inc_nlink(old_dentry->d_inode);
@@ -4614,6 +4758,9 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
4614 new_dir->i_ctime = new_dir->i_mtime = ctime; 4758 new_dir->i_ctime = new_dir->i_mtime = ctime;
4615 old_inode->i_ctime = ctime; 4759 old_inode->i_ctime = ctime;
4616 4760
4761 if (old_dentry->d_parent != new_dentry->d_parent)
4762 btrfs_record_unlink_dir(trans, old_dir, old_inode, 1);
4763
4617 ret = btrfs_unlink_inode(trans, root, old_dir, old_dentry->d_inode, 4764 ret = btrfs_unlink_inode(trans, root, old_dir, old_dentry->d_inode,
4618 old_dentry->d_name.name, 4765 old_dentry->d_name.name,
4619 old_dentry->d_name.len); 4766 old_dentry->d_name.len);
@@ -4645,7 +4792,14 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
4645 if (ret) 4792 if (ret)
4646 goto out_fail; 4793 goto out_fail;
4647 4794
4795 btrfs_log_new_name(trans, old_inode, old_dir,
4796 new_dentry->d_parent);
4648out_fail: 4797out_fail:
4798
4799 /* this btrfs_end_log_trans just allows the current
4800 * log-sub transaction to complete
4801 */
4802 btrfs_end_log_trans(root);
4649 btrfs_end_transaction_throttle(trans, root); 4803 btrfs_end_transaction_throttle(trans, root);
4650out_unlock: 4804out_unlock:
4651 return ret; 4805 return ret;
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index 47b0a88c12a2..a5310c0f41e2 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -71,12 +71,13 @@ void btrfs_clear_lock_blocking(struct extent_buffer *eb)
71static int btrfs_spin_on_block(struct extent_buffer *eb) 71static int btrfs_spin_on_block(struct extent_buffer *eb)
72{ 72{
73 int i; 73 int i;
74
74 for (i = 0; i < 512; i++) { 75 for (i = 0; i < 512; i++) {
75 cpu_relax();
76 if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) 76 if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
77 return 1; 77 return 1;
78 if (need_resched()) 78 if (need_resched())
79 break; 79 break;
80 cpu_relax();
80 } 81 }
81 return 0; 82 return 0;
82} 83}
@@ -95,13 +96,15 @@ int btrfs_try_spin_lock(struct extent_buffer *eb)
95{ 96{
96 int i; 97 int i;
97 98
98 spin_nested(eb); 99 if (btrfs_spin_on_block(eb)) {
99 if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) 100 spin_nested(eb);
100 return 1; 101 if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
101 spin_unlock(&eb->lock); 102 return 1;
102 103 spin_unlock(&eb->lock);
104 }
103 /* spin for a bit on the BLOCKING flag */ 105 /* spin for a bit on the BLOCKING flag */
104 for (i = 0; i < 2; i++) { 106 for (i = 0; i < 2; i++) {
107 cpu_relax();
105 if (!btrfs_spin_on_block(eb)) 108 if (!btrfs_spin_on_block(eb))
106 break; 109 break;
107 110
@@ -148,6 +151,9 @@ int btrfs_tree_lock(struct extent_buffer *eb)
148 DEFINE_WAIT(wait); 151 DEFINE_WAIT(wait);
149 wait.func = btrfs_wake_function; 152 wait.func = btrfs_wake_function;
150 153
154 if (!btrfs_spin_on_block(eb))
155 goto sleep;
156
151 while(1) { 157 while(1) {
152 spin_nested(eb); 158 spin_nested(eb);
153 159
@@ -165,9 +171,10 @@ int btrfs_tree_lock(struct extent_buffer *eb)
165 * spin for a bit, and if the blocking flag goes away, 171 * spin for a bit, and if the blocking flag goes away,
166 * loop around 172 * loop around
167 */ 173 */
174 cpu_relax();
168 if (btrfs_spin_on_block(eb)) 175 if (btrfs_spin_on_block(eb))
169 continue; 176 continue;
170 177sleep:
171 prepare_to_wait_exclusive(&eb->lock_wq, &wait, 178 prepare_to_wait_exclusive(&eb->lock_wq, &wait,
172 TASK_UNINTERRUPTIBLE); 179 TASK_UNINTERRUPTIBLE);
173 180
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 77c2411a5f0f..53c87b197d70 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -310,6 +310,16 @@ int btrfs_remove_ordered_extent(struct inode *inode,
310 310
311 spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); 311 spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
312 list_del_init(&entry->root_extent_list); 312 list_del_init(&entry->root_extent_list);
313
314 /*
315 * we have no more ordered extents for this inode and
316 * no dirty pages. We can safely remove it from the
317 * list of ordered extents
318 */
319 if (RB_EMPTY_ROOT(&tree->tree) &&
320 !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) {
321 list_del_init(&BTRFS_I(inode)->ordered_operations);
322 }
313 spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); 323 spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
314 324
315 mutex_unlock(&tree->mutex); 325 mutex_unlock(&tree->mutex);
@@ -370,6 +380,68 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only)
370} 380}
371 381
372/* 382/*
383 * this is used during transaction commit to write all the inodes
384 * added to the ordered operation list. These files must be fully on
385 * disk before the transaction commits.
386 *
387 * we have two modes here, one is to just start the IO via filemap_flush
388 * and the other is to wait for all the io. When we wait, we have an
389 * extra check to make sure the ordered operation list really is empty
390 * before we return
391 */
392int btrfs_run_ordered_operations(struct btrfs_root *root, int wait)
393{
394 struct btrfs_inode *btrfs_inode;
395 struct inode *inode;
396 struct list_head splice;
397
398 INIT_LIST_HEAD(&splice);
399
400 mutex_lock(&root->fs_info->ordered_operations_mutex);
401 spin_lock(&root->fs_info->ordered_extent_lock);
402again:
403 list_splice_init(&root->fs_info->ordered_operations, &splice);
404
405 while (!list_empty(&splice)) {
406 btrfs_inode = list_entry(splice.next, struct btrfs_inode,
407 ordered_operations);
408
409 inode = &btrfs_inode->vfs_inode;
410
411 list_del_init(&btrfs_inode->ordered_operations);
412
413 /*
414 * the inode may be getting freed (in sys_unlink path).
415 */
416 inode = igrab(inode);
417
418 if (!wait && inode) {
419 list_add_tail(&BTRFS_I(inode)->ordered_operations,
420 &root->fs_info->ordered_operations);
421 }
422 spin_unlock(&root->fs_info->ordered_extent_lock);
423
424 if (inode) {
425 if (wait)
426 btrfs_wait_ordered_range(inode, 0, (u64)-1);
427 else
428 filemap_flush(inode->i_mapping);
429 iput(inode);
430 }
431
432 cond_resched();
433 spin_lock(&root->fs_info->ordered_extent_lock);
434 }
435 if (wait && !list_empty(&root->fs_info->ordered_operations))
436 goto again;
437
438 spin_unlock(&root->fs_info->ordered_extent_lock);
439 mutex_unlock(&root->fs_info->ordered_operations_mutex);
440
441 return 0;
442}
443
444/*
373 * Used to start IO or wait for a given ordered extent to finish. 445 * Used to start IO or wait for a given ordered extent to finish.
374 * 446 *
375 * If wait is one, this effectively waits on page writeback for all the pages 447 * If wait is one, this effectively waits on page writeback for all the pages
@@ -726,3 +798,49 @@ int btrfs_wait_on_page_writeback_range(struct address_space *mapping,
726 798
727 return ret; 799 return ret;
728} 800}
801
802/*
803 * add a given inode to the list of inodes that must be fully on
804 * disk before a transaction commit finishes.
805 *
806 * This basically gives us the ext3 style data=ordered mode, and it is mostly
807 * used to make sure renamed files are fully on disk.
808 *
809 * It is a noop if the inode is already fully on disk.
810 *
811 * If trans is not null, we'll do a friendly check for a transaction that
812 * is already flushing things and force the IO down ourselves.
813 */
814int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
815 struct btrfs_root *root,
816 struct inode *inode)
817{
818 u64 last_mod;
819
820 last_mod = max(BTRFS_I(inode)->generation, BTRFS_I(inode)->last_trans);
821
822 /*
823 * if this file hasn't been changed since the last transaction
824 * commit, we can safely return without doing anything
825 */
826 if (last_mod < root->fs_info->last_trans_committed)
827 return 0;
828
829 /*
830 * the transaction is already committing. Just start the IO and
831 * don't bother with all of this list nonsense
832 */
833 if (trans && root->fs_info->running_transaction->blocked) {
834 btrfs_wait_ordered_range(inode, 0, (u64)-1);
835 return 0;
836 }
837
838 spin_lock(&root->fs_info->ordered_extent_lock);
839 if (list_empty(&BTRFS_I(inode)->ordered_operations)) {
840 list_add_tail(&BTRFS_I(inode)->ordered_operations,
841 &root->fs_info->ordered_operations);
842 }
843 spin_unlock(&root->fs_info->ordered_extent_lock);
844
845 return 0;
846}
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index ab66d5e8d6d6..3d31c8827b01 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -155,4 +155,8 @@ int btrfs_wait_on_page_writeback_range(struct address_space *mapping,
155int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start, 155int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start,
156 loff_t end, int sync_mode); 156 loff_t end, int sync_mode);
157int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only); 157int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only);
158int btrfs_run_ordered_operations(struct btrfs_root *root, int wait);
159int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
160 struct btrfs_root *root,
161 struct inode *inode);
158#endif 162#endif
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 4112d53d4f4d..664782c6a2df 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -65,6 +65,15 @@ static noinline int join_transaction(struct btrfs_root *root)
65 cur_trans->use_count = 1; 65 cur_trans->use_count = 1;
66 cur_trans->commit_done = 0; 66 cur_trans->commit_done = 0;
67 cur_trans->start_time = get_seconds(); 67 cur_trans->start_time = get_seconds();
68
69 cur_trans->delayed_refs.root.rb_node = NULL;
70 cur_trans->delayed_refs.num_entries = 0;
71 cur_trans->delayed_refs.num_heads_ready = 0;
72 cur_trans->delayed_refs.num_heads = 0;
73 cur_trans->delayed_refs.flushing = 0;
74 cur_trans->delayed_refs.run_delayed_start = 0;
75 spin_lock_init(&cur_trans->delayed_refs.lock);
76
68 INIT_LIST_HEAD(&cur_trans->pending_snapshots); 77 INIT_LIST_HEAD(&cur_trans->pending_snapshots);
69 list_add_tail(&cur_trans->list, &root->fs_info->trans_list); 78 list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
70 extent_io_tree_init(&cur_trans->dirty_pages, 79 extent_io_tree_init(&cur_trans->dirty_pages,
@@ -182,6 +191,8 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
182 h->block_group = 0; 191 h->block_group = 0;
183 h->alloc_exclude_nr = 0; 192 h->alloc_exclude_nr = 0;
184 h->alloc_exclude_start = 0; 193 h->alloc_exclude_start = 0;
194 h->delayed_ref_updates = 0;
195
185 root->fs_info->running_transaction->use_count++; 196 root->fs_info->running_transaction->use_count++;
186 mutex_unlock(&root->fs_info->trans_mutex); 197 mutex_unlock(&root->fs_info->trans_mutex);
187 return h; 198 return h;
@@ -271,7 +282,6 @@ void btrfs_throttle(struct btrfs_root *root)
271 if (!root->fs_info->open_ioctl_trans) 282 if (!root->fs_info->open_ioctl_trans)
272 wait_current_trans(root); 283 wait_current_trans(root);
273 mutex_unlock(&root->fs_info->trans_mutex); 284 mutex_unlock(&root->fs_info->trans_mutex);
274
275 throttle_on_drops(root); 285 throttle_on_drops(root);
276} 286}
277 287
@@ -280,6 +290,27 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
280{ 290{
281 struct btrfs_transaction *cur_trans; 291 struct btrfs_transaction *cur_trans;
282 struct btrfs_fs_info *info = root->fs_info; 292 struct btrfs_fs_info *info = root->fs_info;
293 int count = 0;
294
295 while (count < 4) {
296 unsigned long cur = trans->delayed_ref_updates;
297 trans->delayed_ref_updates = 0;
298 if (cur &&
299 trans->transaction->delayed_refs.num_heads_ready > 64) {
300 trans->delayed_ref_updates = 0;
301
302 /*
303 * do a full flush if the transaction is trying
304 * to close
305 */
306 if (trans->transaction->delayed_refs.flushing)
307 cur = 0;
308 btrfs_run_delayed_refs(trans, root, cur);
309 } else {
310 break;
311 }
312 count++;
313 }
283 314
284 mutex_lock(&info->trans_mutex); 315 mutex_lock(&info->trans_mutex);
285 cur_trans = info->running_transaction; 316 cur_trans = info->running_transaction;
@@ -424,9 +455,10 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
424 u64 old_root_bytenr; 455 u64 old_root_bytenr;
425 struct btrfs_root *tree_root = root->fs_info->tree_root; 456 struct btrfs_root *tree_root = root->fs_info->tree_root;
426 457
427 btrfs_extent_post_op(trans, root);
428 btrfs_write_dirty_block_groups(trans, root); 458 btrfs_write_dirty_block_groups(trans, root);
429 btrfs_extent_post_op(trans, root); 459
460 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
461 BUG_ON(ret);
430 462
431 while (1) { 463 while (1) {
432 old_root_bytenr = btrfs_root_bytenr(&root->root_item); 464 old_root_bytenr = btrfs_root_bytenr(&root->root_item);
@@ -438,14 +470,14 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
438 btrfs_header_level(root->node)); 470 btrfs_header_level(root->node));
439 btrfs_set_root_generation(&root->root_item, trans->transid); 471 btrfs_set_root_generation(&root->root_item, trans->transid);
440 472
441 btrfs_extent_post_op(trans, root);
442
443 ret = btrfs_update_root(trans, tree_root, 473 ret = btrfs_update_root(trans, tree_root,
444 &root->root_key, 474 &root->root_key,
445 &root->root_item); 475 &root->root_item);
446 BUG_ON(ret); 476 BUG_ON(ret);
447 btrfs_write_dirty_block_groups(trans, root); 477 btrfs_write_dirty_block_groups(trans, root);
448 btrfs_extent_post_op(trans, root); 478
479 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
480 BUG_ON(ret);
449 } 481 }
450 return 0; 482 return 0;
451} 483}
@@ -459,15 +491,18 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
459 struct btrfs_fs_info *fs_info = root->fs_info; 491 struct btrfs_fs_info *fs_info = root->fs_info;
460 struct list_head *next; 492 struct list_head *next;
461 struct extent_buffer *eb; 493 struct extent_buffer *eb;
494 int ret;
462 495
463 btrfs_extent_post_op(trans, fs_info->tree_root); 496 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
497 BUG_ON(ret);
464 498
465 eb = btrfs_lock_root_node(fs_info->tree_root); 499 eb = btrfs_lock_root_node(fs_info->tree_root);
466 btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, 0, &eb, 0); 500 btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, 0, &eb);
467 btrfs_tree_unlock(eb); 501 btrfs_tree_unlock(eb);
468 free_extent_buffer(eb); 502 free_extent_buffer(eb);
469 503
470 btrfs_extent_post_op(trans, fs_info->tree_root); 504 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
505 BUG_ON(ret);
471 506
472 while (!list_empty(&fs_info->dirty_cowonly_roots)) { 507 while (!list_empty(&fs_info->dirty_cowonly_roots)) {
473 next = fs_info->dirty_cowonly_roots.next; 508 next = fs_info->dirty_cowonly_roots.next;
@@ -475,6 +510,9 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
475 root = list_entry(next, struct btrfs_root, dirty_list); 510 root = list_entry(next, struct btrfs_root, dirty_list);
476 511
477 update_cowonly_root(trans, root); 512 update_cowonly_root(trans, root);
513
514 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
515 BUG_ON(ret);
478 } 516 }
479 return 0; 517 return 0;
480} 518}
@@ -635,6 +673,31 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
635} 673}
636 674
637/* 675/*
676 * when dropping snapshots, we generate a ton of delayed refs, and it makes
677 * sense not to join the transaction while it is trying to flush the current
678 * queue of delayed refs out.
679 *
680 * This is used by the drop snapshot code only
681 */
682static noinline int wait_transaction_pre_flush(struct btrfs_fs_info *info)
683{
684 DEFINE_WAIT(wait);
685
686 mutex_lock(&info->trans_mutex);
687 while (info->running_transaction &&
688 info->running_transaction->delayed_refs.flushing) {
689 prepare_to_wait(&info->transaction_wait, &wait,
690 TASK_UNINTERRUPTIBLE);
691 mutex_unlock(&info->trans_mutex);
692 schedule();
693 mutex_lock(&info->trans_mutex);
694 finish_wait(&info->transaction_wait, &wait);
695 }
696 mutex_unlock(&info->trans_mutex);
697 return 0;
698}
699
700/*
638 * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on 701 * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on
639 * all of them 702 * all of them
640 */ 703 */
@@ -661,7 +724,22 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root,
661 atomic_inc(&root->fs_info->throttles); 724 atomic_inc(&root->fs_info->throttles);
662 725
663 while (1) { 726 while (1) {
727 /*
728 * we don't want to jump in and create a bunch of
729 * delayed refs if the transaction is starting to close
730 */
731 wait_transaction_pre_flush(tree_root->fs_info);
664 trans = btrfs_start_transaction(tree_root, 1); 732 trans = btrfs_start_transaction(tree_root, 1);
733
734 /*
735 * we've joined a transaction, make sure it isn't
736 * closing right now
737 */
738 if (trans->transaction->delayed_refs.flushing) {
739 btrfs_end_transaction(trans, tree_root);
740 continue;
741 }
742
665 mutex_lock(&root->fs_info->drop_mutex); 743 mutex_lock(&root->fs_info->drop_mutex);
666 ret = btrfs_drop_snapshot(trans, dirty->root); 744 ret = btrfs_drop_snapshot(trans, dirty->root);
667 if (ret != -EAGAIN) 745 if (ret != -EAGAIN)
@@ -766,7 +844,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
766 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); 844 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
767 845
768 old = btrfs_lock_root_node(root); 846 old = btrfs_lock_root_node(root);
769 btrfs_cow_block(trans, root, old, NULL, 0, &old, 0); 847 btrfs_cow_block(trans, root, old, NULL, 0, &old);
770 848
771 btrfs_copy_root(trans, root, old, &tmp, objectid); 849 btrfs_copy_root(trans, root, old, &tmp, objectid);
772 btrfs_tree_unlock(old); 850 btrfs_tree_unlock(old);
@@ -894,12 +972,31 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
894 struct extent_io_tree *pinned_copy; 972 struct extent_io_tree *pinned_copy;
895 DEFINE_WAIT(wait); 973 DEFINE_WAIT(wait);
896 int ret; 974 int ret;
975 int should_grow = 0;
976 unsigned long now = get_seconds();
977
978 btrfs_run_ordered_operations(root, 0);
979
980 /* make a pass through all the delayed refs we have so far
981 * any runnings procs may add more while we are here
982 */
983 ret = btrfs_run_delayed_refs(trans, root, 0);
984 BUG_ON(ret);
985
986 cur_trans = trans->transaction;
987 /*
988 * set the flushing flag so procs in this transaction have to
989 * start sending their work down.
990 */
991 cur_trans->delayed_refs.flushing = 1;
992
993 ret = btrfs_run_delayed_refs(trans, root, 0);
994 BUG_ON(ret);
897 995
898 INIT_LIST_HEAD(&dirty_fs_roots);
899 mutex_lock(&root->fs_info->trans_mutex); 996 mutex_lock(&root->fs_info->trans_mutex);
900 if (trans->transaction->in_commit) { 997 INIT_LIST_HEAD(&dirty_fs_roots);
901 cur_trans = trans->transaction; 998 if (cur_trans->in_commit) {
902 trans->transaction->use_count++; 999 cur_trans->use_count++;
903 mutex_unlock(&root->fs_info->trans_mutex); 1000 mutex_unlock(&root->fs_info->trans_mutex);
904 btrfs_end_transaction(trans, root); 1001 btrfs_end_transaction(trans, root);
905 1002
@@ -922,7 +1019,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
922 1019
923 trans->transaction->in_commit = 1; 1020 trans->transaction->in_commit = 1;
924 trans->transaction->blocked = 1; 1021 trans->transaction->blocked = 1;
925 cur_trans = trans->transaction;
926 if (cur_trans->list.prev != &root->fs_info->trans_list) { 1022 if (cur_trans->list.prev != &root->fs_info->trans_list) {
927 prev_trans = list_entry(cur_trans->list.prev, 1023 prev_trans = list_entry(cur_trans->list.prev,
928 struct btrfs_transaction, list); 1024 struct btrfs_transaction, list);
@@ -937,6 +1033,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
937 } 1033 }
938 } 1034 }
939 1035
1036 if (now < cur_trans->start_time || now - cur_trans->start_time < 1)
1037 should_grow = 1;
1038
940 do { 1039 do {
941 int snap_pending = 0; 1040 int snap_pending = 0;
942 joined = cur_trans->num_joined; 1041 joined = cur_trans->num_joined;
@@ -949,7 +1048,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
949 1048
950 if (cur_trans->num_writers > 1) 1049 if (cur_trans->num_writers > 1)
951 timeout = MAX_SCHEDULE_TIMEOUT; 1050 timeout = MAX_SCHEDULE_TIMEOUT;
952 else 1051 else if (should_grow)
953 timeout = 1; 1052 timeout = 1;
954 1053
955 mutex_unlock(&root->fs_info->trans_mutex); 1054 mutex_unlock(&root->fs_info->trans_mutex);
@@ -959,16 +1058,30 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
959 BUG_ON(ret); 1058 BUG_ON(ret);
960 } 1059 }
961 1060
962 schedule_timeout(timeout); 1061 /*
1062 * rename don't use btrfs_join_transaction, so, once we
1063 * set the transaction to blocked above, we aren't going
1064 * to get any new ordered operations. We can safely run
1065 * it here and no for sure that nothing new will be added
1066 * to the list
1067 */
1068 btrfs_run_ordered_operations(root, 1);
1069
1070 smp_mb();
1071 if (cur_trans->num_writers > 1 || should_grow)
1072 schedule_timeout(timeout);
963 1073
964 mutex_lock(&root->fs_info->trans_mutex); 1074 mutex_lock(&root->fs_info->trans_mutex);
965 finish_wait(&cur_trans->writer_wait, &wait); 1075 finish_wait(&cur_trans->writer_wait, &wait);
966 } while (cur_trans->num_writers > 1 || 1076 } while (cur_trans->num_writers > 1 ||
967 (cur_trans->num_joined != joined)); 1077 (should_grow && cur_trans->num_joined != joined));
968 1078
969 ret = create_pending_snapshots(trans, root->fs_info); 1079 ret = create_pending_snapshots(trans, root->fs_info);
970 BUG_ON(ret); 1080 BUG_ON(ret);
971 1081
1082 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1083 BUG_ON(ret);
1084
972 WARN_ON(cur_trans != trans->transaction); 1085 WARN_ON(cur_trans != trans->transaction);
973 1086
974 /* btrfs_commit_tree_roots is responsible for getting the 1087 /* btrfs_commit_tree_roots is responsible for getting the
@@ -1032,6 +1145,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1032 btrfs_copy_pinned(root, pinned_copy); 1145 btrfs_copy_pinned(root, pinned_copy);
1033 1146
1034 trans->transaction->blocked = 0; 1147 trans->transaction->blocked = 0;
1148
1035 wake_up(&root->fs_info->transaction_throttle); 1149 wake_up(&root->fs_info->transaction_throttle);
1036 wake_up(&root->fs_info->transaction_wait); 1150 wake_up(&root->fs_info->transaction_wait);
1037 1151
@@ -1058,6 +1172,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1058 mutex_lock(&root->fs_info->trans_mutex); 1172 mutex_lock(&root->fs_info->trans_mutex);
1059 1173
1060 cur_trans->commit_done = 1; 1174 cur_trans->commit_done = 1;
1175
1061 root->fs_info->last_trans_committed = cur_trans->transid; 1176 root->fs_info->last_trans_committed = cur_trans->transid;
1062 wake_up(&cur_trans->commit_wait); 1177 wake_up(&cur_trans->commit_wait);
1063 1178
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index ea292117f882..94f5bde2b58d 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -19,10 +19,16 @@
19#ifndef __BTRFS_TRANSACTION__ 19#ifndef __BTRFS_TRANSACTION__
20#define __BTRFS_TRANSACTION__ 20#define __BTRFS_TRANSACTION__
21#include "btrfs_inode.h" 21#include "btrfs_inode.h"
22#include "delayed-ref.h"
22 23
23struct btrfs_transaction { 24struct btrfs_transaction {
24 u64 transid; 25 u64 transid;
26 /*
27 * total writers in this transaction, it must be zero before the
28 * transaction can end
29 */
25 unsigned long num_writers; 30 unsigned long num_writers;
31
26 unsigned long num_joined; 32 unsigned long num_joined;
27 int in_commit; 33 int in_commit;
28 int use_count; 34 int use_count;
@@ -34,6 +40,7 @@ struct btrfs_transaction {
34 wait_queue_head_t writer_wait; 40 wait_queue_head_t writer_wait;
35 wait_queue_head_t commit_wait; 41 wait_queue_head_t commit_wait;
36 struct list_head pending_snapshots; 42 struct list_head pending_snapshots;
43 struct btrfs_delayed_ref_root delayed_refs;
37}; 44};
38 45
39struct btrfs_trans_handle { 46struct btrfs_trans_handle {
@@ -44,6 +51,7 @@ struct btrfs_trans_handle {
44 u64 block_group; 51 u64 block_group;
45 u64 alloc_exclude_start; 52 u64 alloc_exclude_start;
46 u64 alloc_exclude_nr; 53 u64 alloc_exclude_nr;
54 unsigned long delayed_ref_updates;
47}; 55};
48 56
49struct btrfs_pending_snapshot { 57struct btrfs_pending_snapshot {
diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c
index 98d25fa4570e..b10eacdb1620 100644
--- a/fs/btrfs/tree-defrag.c
+++ b/fs/btrfs/tree-defrag.c
@@ -124,8 +124,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
124 } 124 }
125 125
126 btrfs_release_path(root, path); 126 btrfs_release_path(root, path);
127 if (is_extent)
128 btrfs_extent_post_op(trans, root);
129out: 127out:
130 if (path) 128 if (path)
131 btrfs_free_path(path); 129 btrfs_free_path(path);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 9c462fbd60fa..fc9b87a7975b 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -35,6 +35,49 @@
35#define LOG_INODE_EXISTS 1 35#define LOG_INODE_EXISTS 1
36 36
37/* 37/*
38 * directory trouble cases
39 *
40 * 1) on rename or unlink, if the inode being unlinked isn't in the fsync
41 * log, we must force a full commit before doing an fsync of the directory
42 * where the unlink was done.
43 * ---> record transid of last unlink/rename per directory
44 *
45 * mkdir foo/some_dir
46 * normal commit
47 * rename foo/some_dir foo2/some_dir
48 * mkdir foo/some_dir
49 * fsync foo/some_dir/some_file
50 *
51 * The fsync above will unlink the original some_dir without recording
52 * it in its new location (foo2). After a crash, some_dir will be gone
53 * unless the fsync of some_file forces a full commit
54 *
55 * 2) we must log any new names for any file or dir that is in the fsync
56 * log. ---> check inode while renaming/linking.
57 *
58 * 2a) we must log any new names for any file or dir during rename
59 * when the directory they are being removed from was logged.
60 * ---> check inode and old parent dir during rename
61 *
62 * 2a is actually the more important variant. With the extra logging
63 * a crash might unlink the old name without recreating the new one
64 *
65 * 3) after a crash, we must go through any directories with a link count
66 * of zero and redo the rm -rf
67 *
68 * mkdir f1/foo
69 * normal commit
70 * rm -rf f1/foo
71 * fsync(f1)
72 *
73 * The directory f1 was fully removed from the FS, but fsync was never
74 * called on f1, only its parent dir. After a crash the rm -rf must
75 * be replayed. This must be able to recurse down the entire
76 * directory tree. The inode link count fixup code takes care of the
77 * ugly details.
78 */
79
80/*
38 * stages for the tree walking. The first 81 * stages for the tree walking. The first
39 * stage (0) is to only pin down the blocks we find 82 * stage (0) is to only pin down the blocks we find
40 * the second stage (1) is to make sure that all the inodes 83 * the second stage (1) is to make sure that all the inodes
@@ -47,12 +90,17 @@
47#define LOG_WALK_REPLAY_INODES 1 90#define LOG_WALK_REPLAY_INODES 1
48#define LOG_WALK_REPLAY_ALL 2 91#define LOG_WALK_REPLAY_ALL 2
49 92
50static int __btrfs_log_inode(struct btrfs_trans_handle *trans, 93static int btrfs_log_inode(struct btrfs_trans_handle *trans,
51 struct btrfs_root *root, struct inode *inode, 94 struct btrfs_root *root, struct inode *inode,
52 int inode_only); 95 int inode_only);
53static int link_to_fixup_dir(struct btrfs_trans_handle *trans, 96static int link_to_fixup_dir(struct btrfs_trans_handle *trans,
54 struct btrfs_root *root, 97 struct btrfs_root *root,
55 struct btrfs_path *path, u64 objectid); 98 struct btrfs_path *path, u64 objectid);
99static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans,
100 struct btrfs_root *root,
101 struct btrfs_root *log,
102 struct btrfs_path *path,
103 u64 dirid, int del_all);
56 104
57/* 105/*
58 * tree logging is a special write ahead log used to make sure that 106 * tree logging is a special write ahead log used to make sure that
@@ -133,10 +181,25 @@ static int join_running_log_trans(struct btrfs_root *root)
133} 181}
134 182
135/* 183/*
184 * This either makes the current running log transaction wait
185 * until you call btrfs_end_log_trans() or it makes any future
186 * log transactions wait until you call btrfs_end_log_trans()
187 */
188int btrfs_pin_log_trans(struct btrfs_root *root)
189{
190 int ret = -ENOENT;
191
192 mutex_lock(&root->log_mutex);
193 atomic_inc(&root->log_writers);
194 mutex_unlock(&root->log_mutex);
195 return ret;
196}
197
198/*
136 * indicate we're done making changes to the log tree 199 * indicate we're done making changes to the log tree
137 * and wake up anyone waiting to do a sync 200 * and wake up anyone waiting to do a sync
138 */ 201 */
139static int end_log_trans(struct btrfs_root *root) 202int btrfs_end_log_trans(struct btrfs_root *root)
140{ 203{
141 if (atomic_dec_and_test(&root->log_writers)) { 204 if (atomic_dec_and_test(&root->log_writers)) {
142 smp_mb(); 205 smp_mb();
@@ -203,7 +266,6 @@ static int process_one_buffer(struct btrfs_root *log,
203 mutex_lock(&log->fs_info->pinned_mutex); 266 mutex_lock(&log->fs_info->pinned_mutex);
204 btrfs_update_pinned_extents(log->fs_info->extent_root, 267 btrfs_update_pinned_extents(log->fs_info->extent_root,
205 eb->start, eb->len, 1); 268 eb->start, eb->len, 1);
206 mutex_unlock(&log->fs_info->pinned_mutex);
207 } 269 }
208 270
209 if (btrfs_buffer_uptodate(eb, gen)) { 271 if (btrfs_buffer_uptodate(eb, gen)) {
@@ -603,6 +665,7 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
603 665
604 ret = link_to_fixup_dir(trans, root, path, location.objectid); 666 ret = link_to_fixup_dir(trans, root, path, location.objectid);
605 BUG_ON(ret); 667 BUG_ON(ret);
668
606 ret = btrfs_unlink_inode(trans, root, dir, inode, name, name_len); 669 ret = btrfs_unlink_inode(trans, root, dir, inode, name, name_len);
607 BUG_ON(ret); 670 BUG_ON(ret);
608 kfree(name); 671 kfree(name);
@@ -804,6 +867,7 @@ conflict_again:
804 victim_name_len)) { 867 victim_name_len)) {
805 btrfs_inc_nlink(inode); 868 btrfs_inc_nlink(inode);
806 btrfs_release_path(root, path); 869 btrfs_release_path(root, path);
870
807 ret = btrfs_unlink_inode(trans, root, dir, 871 ret = btrfs_unlink_inode(trans, root, dir,
808 inode, victim_name, 872 inode, victim_name,
809 victim_name_len); 873 victim_name_len);
@@ -922,13 +986,20 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
922 key.offset--; 986 key.offset--;
923 btrfs_release_path(root, path); 987 btrfs_release_path(root, path);
924 } 988 }
925 btrfs_free_path(path); 989 btrfs_release_path(root, path);
926 if (nlink != inode->i_nlink) { 990 if (nlink != inode->i_nlink) {
927 inode->i_nlink = nlink; 991 inode->i_nlink = nlink;
928 btrfs_update_inode(trans, root, inode); 992 btrfs_update_inode(trans, root, inode);
929 } 993 }
930 BTRFS_I(inode)->index_cnt = (u64)-1; 994 BTRFS_I(inode)->index_cnt = (u64)-1;
931 995
996 if (inode->i_nlink == 0 && S_ISDIR(inode->i_mode)) {
997 ret = replay_dir_deletes(trans, root, NULL, path,
998 inode->i_ino, 1);
999 BUG_ON(ret);
1000 }
1001 btrfs_free_path(path);
1002
932 return 0; 1003 return 0;
933} 1004}
934 1005
@@ -971,9 +1042,12 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
971 1042
972 iput(inode); 1043 iput(inode);
973 1044
974 if (key.offset == 0) 1045 /*
975 break; 1046 * fixup on a directory may create new entries,
976 key.offset--; 1047 * make sure we always look for the highset possible
1048 * offset
1049 */
1050 key.offset = (u64)-1;
977 } 1051 }
978 btrfs_release_path(root, path); 1052 btrfs_release_path(root, path);
979 return 0; 1053 return 0;
@@ -1313,11 +1387,11 @@ again:
1313 read_extent_buffer(eb, name, (unsigned long)(di + 1), 1387 read_extent_buffer(eb, name, (unsigned long)(di + 1),
1314 name_len); 1388 name_len);
1315 log_di = NULL; 1389 log_di = NULL;
1316 if (dir_key->type == BTRFS_DIR_ITEM_KEY) { 1390 if (log && dir_key->type == BTRFS_DIR_ITEM_KEY) {
1317 log_di = btrfs_lookup_dir_item(trans, log, log_path, 1391 log_di = btrfs_lookup_dir_item(trans, log, log_path,
1318 dir_key->objectid, 1392 dir_key->objectid,
1319 name, name_len, 0); 1393 name, name_len, 0);
1320 } else if (dir_key->type == BTRFS_DIR_INDEX_KEY) { 1394 } else if (log && dir_key->type == BTRFS_DIR_INDEX_KEY) {
1321 log_di = btrfs_lookup_dir_index_item(trans, log, 1395 log_di = btrfs_lookup_dir_index_item(trans, log,
1322 log_path, 1396 log_path,
1323 dir_key->objectid, 1397 dir_key->objectid,
@@ -1378,7 +1452,7 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans,
1378 struct btrfs_root *root, 1452 struct btrfs_root *root,
1379 struct btrfs_root *log, 1453 struct btrfs_root *log,
1380 struct btrfs_path *path, 1454 struct btrfs_path *path,
1381 u64 dirid) 1455 u64 dirid, int del_all)
1382{ 1456{
1383 u64 range_start; 1457 u64 range_start;
1384 u64 range_end; 1458 u64 range_end;
@@ -1408,10 +1482,14 @@ again:
1408 range_start = 0; 1482 range_start = 0;
1409 range_end = 0; 1483 range_end = 0;
1410 while (1) { 1484 while (1) {
1411 ret = find_dir_range(log, path, dirid, key_type, 1485 if (del_all)
1412 &range_start, &range_end); 1486 range_end = (u64)-1;
1413 if (ret != 0) 1487 else {
1414 break; 1488 ret = find_dir_range(log, path, dirid, key_type,
1489 &range_start, &range_end);
1490 if (ret != 0)
1491 break;
1492 }
1415 1493
1416 dir_key.offset = range_start; 1494 dir_key.offset = range_start;
1417 while (1) { 1495 while (1) {
@@ -1437,7 +1515,8 @@ again:
1437 break; 1515 break;
1438 1516
1439 ret = check_item_in_log(trans, root, log, path, 1517 ret = check_item_in_log(trans, root, log, path,
1440 log_path, dir, &found_key); 1518 log_path, dir,
1519 &found_key);
1441 BUG_ON(ret); 1520 BUG_ON(ret);
1442 if (found_key.offset == (u64)-1) 1521 if (found_key.offset == (u64)-1)
1443 break; 1522 break;
@@ -1514,7 +1593,7 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
1514 mode = btrfs_inode_mode(eb, inode_item); 1593 mode = btrfs_inode_mode(eb, inode_item);
1515 if (S_ISDIR(mode)) { 1594 if (S_ISDIR(mode)) {
1516 ret = replay_dir_deletes(wc->trans, 1595 ret = replay_dir_deletes(wc->trans,
1517 root, log, path, key.objectid); 1596 root, log, path, key.objectid, 0);
1518 BUG_ON(ret); 1597 BUG_ON(ret);
1519 } 1598 }
1520 ret = overwrite_item(wc->trans, root, path, 1599 ret = overwrite_item(wc->trans, root, path,
@@ -1533,6 +1612,17 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
1533 root, inode, inode->i_size, 1612 root, inode, inode->i_size,
1534 BTRFS_EXTENT_DATA_KEY); 1613 BTRFS_EXTENT_DATA_KEY);
1535 BUG_ON(ret); 1614 BUG_ON(ret);
1615
1616 /* if the nlink count is zero here, the iput
1617 * will free the inode. We bump it to make
1618 * sure it doesn't get freed until the link
1619 * count fixup is done
1620 */
1621 if (inode->i_nlink == 0) {
1622 btrfs_inc_nlink(inode);
1623 btrfs_update_inode(wc->trans,
1624 root, inode);
1625 }
1536 iput(inode); 1626 iput(inode);
1537 } 1627 }
1538 ret = link_to_fixup_dir(wc->trans, root, 1628 ret = link_to_fixup_dir(wc->trans, root,
@@ -1840,7 +1930,8 @@ static int update_log_root(struct btrfs_trans_handle *trans,
1840 return ret; 1930 return ret;
1841} 1931}
1842 1932
1843static int wait_log_commit(struct btrfs_root *root, unsigned long transid) 1933static int wait_log_commit(struct btrfs_trans_handle *trans,
1934 struct btrfs_root *root, unsigned long transid)
1844{ 1935{
1845 DEFINE_WAIT(wait); 1936 DEFINE_WAIT(wait);
1846 int index = transid % 2; 1937 int index = transid % 2;
@@ -1854,9 +1945,12 @@ static int wait_log_commit(struct btrfs_root *root, unsigned long transid)
1854 prepare_to_wait(&root->log_commit_wait[index], 1945 prepare_to_wait(&root->log_commit_wait[index],
1855 &wait, TASK_UNINTERRUPTIBLE); 1946 &wait, TASK_UNINTERRUPTIBLE);
1856 mutex_unlock(&root->log_mutex); 1947 mutex_unlock(&root->log_mutex);
1857 if (root->log_transid < transid + 2 && 1948
1949 if (root->fs_info->last_trans_log_full_commit !=
1950 trans->transid && root->log_transid < transid + 2 &&
1858 atomic_read(&root->log_commit[index])) 1951 atomic_read(&root->log_commit[index]))
1859 schedule(); 1952 schedule();
1953
1860 finish_wait(&root->log_commit_wait[index], &wait); 1954 finish_wait(&root->log_commit_wait[index], &wait);
1861 mutex_lock(&root->log_mutex); 1955 mutex_lock(&root->log_mutex);
1862 } while (root->log_transid < transid + 2 && 1956 } while (root->log_transid < transid + 2 &&
@@ -1864,14 +1958,16 @@ static int wait_log_commit(struct btrfs_root *root, unsigned long transid)
1864 return 0; 1958 return 0;
1865} 1959}
1866 1960
1867static int wait_for_writer(struct btrfs_root *root) 1961static int wait_for_writer(struct btrfs_trans_handle *trans,
1962 struct btrfs_root *root)
1868{ 1963{
1869 DEFINE_WAIT(wait); 1964 DEFINE_WAIT(wait);
1870 while (atomic_read(&root->log_writers)) { 1965 while (atomic_read(&root->log_writers)) {
1871 prepare_to_wait(&root->log_writer_wait, 1966 prepare_to_wait(&root->log_writer_wait,
1872 &wait, TASK_UNINTERRUPTIBLE); 1967 &wait, TASK_UNINTERRUPTIBLE);
1873 mutex_unlock(&root->log_mutex); 1968 mutex_unlock(&root->log_mutex);
1874 if (atomic_read(&root->log_writers)) 1969 if (root->fs_info->last_trans_log_full_commit !=
1970 trans->transid && atomic_read(&root->log_writers))
1875 schedule(); 1971 schedule();
1876 mutex_lock(&root->log_mutex); 1972 mutex_lock(&root->log_mutex);
1877 finish_wait(&root->log_writer_wait, &wait); 1973 finish_wait(&root->log_writer_wait, &wait);
@@ -1882,7 +1978,14 @@ static int wait_for_writer(struct btrfs_root *root)
1882/* 1978/*
1883 * btrfs_sync_log does sends a given tree log down to the disk and 1979 * btrfs_sync_log does sends a given tree log down to the disk and
1884 * updates the super blocks to record it. When this call is done, 1980 * updates the super blocks to record it. When this call is done,
1885 * you know that any inodes previously logged are safely on disk 1981 * you know that any inodes previously logged are safely on disk only
1982 * if it returns 0.
1983 *
1984 * Any other return value means you need to call btrfs_commit_transaction.
1985 * Some of the edge cases for fsyncing directories that have had unlinks
1986 * or renames done in the past mean that sometimes the only safe
1987 * fsync is to commit the whole FS. When btrfs_sync_log returns -EAGAIN,
1988 * that has happened.
1886 */ 1989 */
1887int btrfs_sync_log(struct btrfs_trans_handle *trans, 1990int btrfs_sync_log(struct btrfs_trans_handle *trans,
1888 struct btrfs_root *root) 1991 struct btrfs_root *root)
@@ -1896,7 +1999,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
1896 mutex_lock(&root->log_mutex); 1999 mutex_lock(&root->log_mutex);
1897 index1 = root->log_transid % 2; 2000 index1 = root->log_transid % 2;
1898 if (atomic_read(&root->log_commit[index1])) { 2001 if (atomic_read(&root->log_commit[index1])) {
1899 wait_log_commit(root, root->log_transid); 2002 wait_log_commit(trans, root, root->log_transid);
1900 mutex_unlock(&root->log_mutex); 2003 mutex_unlock(&root->log_mutex);
1901 return 0; 2004 return 0;
1902 } 2005 }
@@ -1904,18 +2007,26 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
1904 2007
1905 /* wait for previous tree log sync to complete */ 2008 /* wait for previous tree log sync to complete */
1906 if (atomic_read(&root->log_commit[(index1 + 1) % 2])) 2009 if (atomic_read(&root->log_commit[(index1 + 1) % 2]))
1907 wait_log_commit(root, root->log_transid - 1); 2010 wait_log_commit(trans, root, root->log_transid - 1);
1908 2011
1909 while (1) { 2012 while (1) {
1910 unsigned long batch = root->log_batch; 2013 unsigned long batch = root->log_batch;
1911 mutex_unlock(&root->log_mutex); 2014 mutex_unlock(&root->log_mutex);
1912 schedule_timeout_uninterruptible(1); 2015 schedule_timeout_uninterruptible(1);
1913 mutex_lock(&root->log_mutex); 2016 mutex_lock(&root->log_mutex);
1914 wait_for_writer(root); 2017
2018 wait_for_writer(trans, root);
1915 if (batch == root->log_batch) 2019 if (batch == root->log_batch)
1916 break; 2020 break;
1917 } 2021 }
1918 2022
2023 /* bail out if we need to do a full commit */
2024 if (root->fs_info->last_trans_log_full_commit == trans->transid) {
2025 ret = -EAGAIN;
2026 mutex_unlock(&root->log_mutex);
2027 goto out;
2028 }
2029
1919 ret = btrfs_write_and_wait_marked_extents(log, &log->dirty_log_pages); 2030 ret = btrfs_write_and_wait_marked_extents(log, &log->dirty_log_pages);
1920 BUG_ON(ret); 2031 BUG_ON(ret);
1921 2032
@@ -1951,16 +2062,29 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
1951 2062
1952 index2 = log_root_tree->log_transid % 2; 2063 index2 = log_root_tree->log_transid % 2;
1953 if (atomic_read(&log_root_tree->log_commit[index2])) { 2064 if (atomic_read(&log_root_tree->log_commit[index2])) {
1954 wait_log_commit(log_root_tree, log_root_tree->log_transid); 2065 wait_log_commit(trans, log_root_tree,
2066 log_root_tree->log_transid);
1955 mutex_unlock(&log_root_tree->log_mutex); 2067 mutex_unlock(&log_root_tree->log_mutex);
1956 goto out; 2068 goto out;
1957 } 2069 }
1958 atomic_set(&log_root_tree->log_commit[index2], 1); 2070 atomic_set(&log_root_tree->log_commit[index2], 1);
1959 2071
1960 if (atomic_read(&log_root_tree->log_commit[(index2 + 1) % 2])) 2072 if (atomic_read(&log_root_tree->log_commit[(index2 + 1) % 2])) {
1961 wait_log_commit(log_root_tree, log_root_tree->log_transid - 1); 2073 wait_log_commit(trans, log_root_tree,
2074 log_root_tree->log_transid - 1);
2075 }
2076
2077 wait_for_writer(trans, log_root_tree);
1962 2078
1963 wait_for_writer(log_root_tree); 2079 /*
2080 * now that we've moved on to the tree of log tree roots,
2081 * check the full commit flag again
2082 */
2083 if (root->fs_info->last_trans_log_full_commit == trans->transid) {
2084 mutex_unlock(&log_root_tree->log_mutex);
2085 ret = -EAGAIN;
2086 goto out_wake_log_root;
2087 }
1964 2088
1965 ret = btrfs_write_and_wait_marked_extents(log_root_tree, 2089 ret = btrfs_write_and_wait_marked_extents(log_root_tree,
1966 &log_root_tree->dirty_log_pages); 2090 &log_root_tree->dirty_log_pages);
@@ -1985,7 +2109,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
1985 * in and cause problems either. 2109 * in and cause problems either.
1986 */ 2110 */
1987 write_ctree_super(trans, root->fs_info->tree_root, 2); 2111 write_ctree_super(trans, root->fs_info->tree_root, 2);
2112 ret = 0;
1988 2113
2114out_wake_log_root:
1989 atomic_set(&log_root_tree->log_commit[index2], 0); 2115 atomic_set(&log_root_tree->log_commit[index2], 0);
1990 smp_mb(); 2116 smp_mb();
1991 if (waitqueue_active(&log_root_tree->log_commit_wait[index2])) 2117 if (waitqueue_active(&log_root_tree->log_commit_wait[index2]))
@@ -1998,7 +2124,8 @@ out:
1998 return 0; 2124 return 0;
1999} 2125}
2000 2126
2001/* * free all the extents used by the tree log. This should be called 2127/*
2128 * free all the extents used by the tree log. This should be called
2002 * at commit time of the full transaction 2129 * at commit time of the full transaction
2003 */ 2130 */
2004int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) 2131int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root)
@@ -2132,7 +2259,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
2132 2259
2133 btrfs_free_path(path); 2260 btrfs_free_path(path);
2134 mutex_unlock(&BTRFS_I(dir)->log_mutex); 2261 mutex_unlock(&BTRFS_I(dir)->log_mutex);
2135 end_log_trans(root); 2262 btrfs_end_log_trans(root);
2136 2263
2137 return 0; 2264 return 0;
2138} 2265}
@@ -2159,7 +2286,7 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
2159 ret = btrfs_del_inode_ref(trans, log, name, name_len, inode->i_ino, 2286 ret = btrfs_del_inode_ref(trans, log, name, name_len, inode->i_ino,
2160 dirid, &index); 2287 dirid, &index);
2161 mutex_unlock(&BTRFS_I(inode)->log_mutex); 2288 mutex_unlock(&BTRFS_I(inode)->log_mutex);
2162 end_log_trans(root); 2289 btrfs_end_log_trans(root);
2163 2290
2164 return ret; 2291 return ret;
2165} 2292}
@@ -2559,7 +2686,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
2559 * 2686 *
2560 * This handles both files and directories. 2687 * This handles both files and directories.
2561 */ 2688 */
2562static int __btrfs_log_inode(struct btrfs_trans_handle *trans, 2689static int btrfs_log_inode(struct btrfs_trans_handle *trans,
2563 struct btrfs_root *root, struct inode *inode, 2690 struct btrfs_root *root, struct inode *inode,
2564 int inode_only) 2691 int inode_only)
2565{ 2692{
@@ -2585,28 +2712,17 @@ static int __btrfs_log_inode(struct btrfs_trans_handle *trans,
2585 min_key.offset = 0; 2712 min_key.offset = 0;
2586 2713
2587 max_key.objectid = inode->i_ino; 2714 max_key.objectid = inode->i_ino;
2715
2716 /* today the code can only do partial logging of directories */
2717 if (!S_ISDIR(inode->i_mode))
2718 inode_only = LOG_INODE_ALL;
2719
2588 if (inode_only == LOG_INODE_EXISTS || S_ISDIR(inode->i_mode)) 2720 if (inode_only == LOG_INODE_EXISTS || S_ISDIR(inode->i_mode))
2589 max_key.type = BTRFS_XATTR_ITEM_KEY; 2721 max_key.type = BTRFS_XATTR_ITEM_KEY;
2590 else 2722 else
2591 max_key.type = (u8)-1; 2723 max_key.type = (u8)-1;
2592 max_key.offset = (u64)-1; 2724 max_key.offset = (u64)-1;
2593 2725
2594 /*
2595 * if this inode has already been logged and we're in inode_only
2596 * mode, we don't want to delete the things that have already
2597 * been written to the log.
2598 *
2599 * But, if the inode has been through an inode_only log,
2600 * the logged_trans field is not set. This allows us to catch
2601 * any new names for this inode in the backrefs by logging it
2602 * again
2603 */
2604 if (inode_only == LOG_INODE_EXISTS &&
2605 BTRFS_I(inode)->logged_trans == trans->transid) {
2606 btrfs_free_path(path);
2607 btrfs_free_path(dst_path);
2608 goto out;
2609 }
2610 mutex_lock(&BTRFS_I(inode)->log_mutex); 2726 mutex_lock(&BTRFS_I(inode)->log_mutex);
2611 2727
2612 /* 2728 /*
@@ -2693,7 +2809,6 @@ next_slot:
2693 if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { 2809 if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) {
2694 btrfs_release_path(root, path); 2810 btrfs_release_path(root, path);
2695 btrfs_release_path(log, dst_path); 2811 btrfs_release_path(log, dst_path);
2696 BTRFS_I(inode)->log_dirty_trans = 0;
2697 ret = log_directory_changes(trans, root, inode, path, dst_path); 2812 ret = log_directory_changes(trans, root, inode, path, dst_path);
2698 BUG_ON(ret); 2813 BUG_ON(ret);
2699 } 2814 }
@@ -2702,19 +2817,69 @@ next_slot:
2702 2817
2703 btrfs_free_path(path); 2818 btrfs_free_path(path);
2704 btrfs_free_path(dst_path); 2819 btrfs_free_path(dst_path);
2705out:
2706 return 0; 2820 return 0;
2707} 2821}
2708 2822
2709int btrfs_log_inode(struct btrfs_trans_handle *trans, 2823/*
2710 struct btrfs_root *root, struct inode *inode, 2824 * follow the dentry parent pointers up the chain and see if any
2711 int inode_only) 2825 * of the directories in it require a full commit before they can
2826 * be logged. Returns zero if nothing special needs to be done or 1 if
2827 * a full commit is required.
2828 */
2829static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
2830 struct inode *inode,
2831 struct dentry *parent,
2832 struct super_block *sb,
2833 u64 last_committed)
2712{ 2834{
2713 int ret; 2835 int ret = 0;
2836 struct btrfs_root *root;
2714 2837
2715 start_log_trans(trans, root); 2838 /*
2716 ret = __btrfs_log_inode(trans, root, inode, inode_only); 2839 * for regular files, if its inode is already on disk, we don't
2717 end_log_trans(root); 2840 * have to worry about the parents at all. This is because
2841 * we can use the last_unlink_trans field to record renames
2842 * and other fun in this file.
2843 */
2844 if (S_ISREG(inode->i_mode) &&
2845 BTRFS_I(inode)->generation <= last_committed &&
2846 BTRFS_I(inode)->last_unlink_trans <= last_committed)
2847 goto out;
2848
2849 if (!S_ISDIR(inode->i_mode)) {
2850 if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb)
2851 goto out;
2852 inode = parent->d_inode;
2853 }
2854
2855 while (1) {
2856 BTRFS_I(inode)->logged_trans = trans->transid;
2857 smp_mb();
2858
2859 if (BTRFS_I(inode)->last_unlink_trans > last_committed) {
2860 root = BTRFS_I(inode)->root;
2861
2862 /*
2863 * make sure any commits to the log are forced
2864 * to be full commits
2865 */
2866 root->fs_info->last_trans_log_full_commit =
2867 trans->transid;
2868 ret = 1;
2869 break;
2870 }
2871
2872 if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb)
2873 break;
2874
2875 if (parent == sb->s_root)
2876 break;
2877
2878 parent = parent->d_parent;
2879 inode = parent->d_inode;
2880
2881 }
2882out:
2718 return ret; 2883 return ret;
2719} 2884}
2720 2885
@@ -2724,31 +2889,65 @@ int btrfs_log_inode(struct btrfs_trans_handle *trans,
2724 * only logging is done of any parent directories that are older than 2889 * only logging is done of any parent directories that are older than
2725 * the last committed transaction 2890 * the last committed transaction
2726 */ 2891 */
2727int btrfs_log_dentry(struct btrfs_trans_handle *trans, 2892int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
2728 struct btrfs_root *root, struct dentry *dentry) 2893 struct btrfs_root *root, struct inode *inode,
2894 struct dentry *parent, int exists_only)
2729{ 2895{
2730 int inode_only = LOG_INODE_ALL; 2896 int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL;
2731 struct super_block *sb; 2897 struct super_block *sb;
2732 int ret; 2898 int ret = 0;
2899 u64 last_committed = root->fs_info->last_trans_committed;
2900
2901 sb = inode->i_sb;
2902
2903 if (root->fs_info->last_trans_log_full_commit >
2904 root->fs_info->last_trans_committed) {
2905 ret = 1;
2906 goto end_no_trans;
2907 }
2908
2909 ret = check_parent_dirs_for_sync(trans, inode, parent,
2910 sb, last_committed);
2911 if (ret)
2912 goto end_no_trans;
2733 2913
2734 start_log_trans(trans, root); 2914 start_log_trans(trans, root);
2735 sb = dentry->d_inode->i_sb;
2736 while (1) {
2737 ret = __btrfs_log_inode(trans, root, dentry->d_inode,
2738 inode_only);
2739 BUG_ON(ret);
2740 inode_only = LOG_INODE_EXISTS;
2741 2915
2742 dentry = dentry->d_parent; 2916 ret = btrfs_log_inode(trans, root, inode, inode_only);
2743 if (!dentry || !dentry->d_inode || sb != dentry->d_inode->i_sb) 2917 BUG_ON(ret);
2918
2919 /*
2920 * for regular files, if its inode is already on disk, we don't
2921 * have to worry about the parents at all. This is because
2922 * we can use the last_unlink_trans field to record renames
2923 * and other fun in this file.
2924 */
2925 if (S_ISREG(inode->i_mode) &&
2926 BTRFS_I(inode)->generation <= last_committed &&
2927 BTRFS_I(inode)->last_unlink_trans <= last_committed)
2928 goto no_parent;
2929
2930 inode_only = LOG_INODE_EXISTS;
2931 while (1) {
2932 if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb)
2744 break; 2933 break;
2745 2934
2746 if (BTRFS_I(dentry->d_inode)->generation <= 2935 inode = parent->d_inode;
2747 root->fs_info->last_trans_committed) 2936 if (BTRFS_I(inode)->generation >
2937 root->fs_info->last_trans_committed) {
2938 ret = btrfs_log_inode(trans, root, inode, inode_only);
2939 BUG_ON(ret);
2940 }
2941 if (parent == sb->s_root)
2748 break; 2942 break;
2943
2944 parent = parent->d_parent;
2749 } 2945 }
2750 end_log_trans(root); 2946no_parent:
2751 return 0; 2947 ret = 0;
2948 btrfs_end_log_trans(root);
2949end_no_trans:
2950 return ret;
2752} 2951}
2753 2952
2754/* 2953/*
@@ -2760,12 +2959,8 @@ int btrfs_log_dentry(struct btrfs_trans_handle *trans,
2760int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, 2959int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
2761 struct btrfs_root *root, struct dentry *dentry) 2960 struct btrfs_root *root, struct dentry *dentry)
2762{ 2961{
2763 u64 gen; 2962 return btrfs_log_inode_parent(trans, root, dentry->d_inode,
2764 gen = root->fs_info->last_trans_new_blockgroup; 2963 dentry->d_parent, 0);
2765 if (gen > root->fs_info->last_trans_committed)
2766 return 1;
2767 else
2768 return btrfs_log_dentry(trans, root, dentry);
2769} 2964}
2770 2965
2771/* 2966/*
@@ -2884,3 +3079,94 @@ again:
2884 kfree(log_root_tree); 3079 kfree(log_root_tree);
2885 return 0; 3080 return 0;
2886} 3081}
3082
3083/*
3084 * there are some corner cases where we want to force a full
3085 * commit instead of allowing a directory to be logged.
3086 *
3087 * They revolve around files there were unlinked from the directory, and
3088 * this function updates the parent directory so that a full commit is
3089 * properly done if it is fsync'd later after the unlinks are done.
3090 */
3091void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
3092 struct inode *dir, struct inode *inode,
3093 int for_rename)
3094{
3095 /*
3096 * when we're logging a file, if it hasn't been renamed
3097 * or unlinked, and its inode is fully committed on disk,
3098 * we don't have to worry about walking up the directory chain
3099 * to log its parents.
3100 *
3101 * So, we use the last_unlink_trans field to put this transid
3102 * into the file. When the file is logged we check it and
3103 * don't log the parents if the file is fully on disk.
3104 */
3105 if (S_ISREG(inode->i_mode))
3106 BTRFS_I(inode)->last_unlink_trans = trans->transid;
3107
3108 /*
3109 * if this directory was already logged any new
3110 * names for this file/dir will get recorded
3111 */
3112 smp_mb();
3113 if (BTRFS_I(dir)->logged_trans == trans->transid)
3114 return;
3115
3116 /*
3117 * if the inode we're about to unlink was logged,
3118 * the log will be properly updated for any new names
3119 */
3120 if (BTRFS_I(inode)->logged_trans == trans->transid)
3121 return;
3122
3123 /*
3124 * when renaming files across directories, if the directory
3125 * there we're unlinking from gets fsync'd later on, there's
3126 * no way to find the destination directory later and fsync it
3127 * properly. So, we have to be conservative and force commits
3128 * so the new name gets discovered.
3129 */
3130 if (for_rename)
3131 goto record;
3132
3133 /* we can safely do the unlink without any special recording */
3134 return;
3135
3136record:
3137 BTRFS_I(dir)->last_unlink_trans = trans->transid;
3138}
3139
3140/*
3141 * Call this after adding a new name for a file and it will properly
3142 * update the log to reflect the new name.
3143 *
3144 * It will return zero if all goes well, and it will return 1 if a
3145 * full transaction commit is required.
3146 */
3147int btrfs_log_new_name(struct btrfs_trans_handle *trans,
3148 struct inode *inode, struct inode *old_dir,
3149 struct dentry *parent)
3150{
3151 struct btrfs_root * root = BTRFS_I(inode)->root;
3152
3153 /*
3154 * this will force the logging code to walk the dentry chain
3155 * up for the file
3156 */
3157 if (S_ISREG(inode->i_mode))
3158 BTRFS_I(inode)->last_unlink_trans = trans->transid;
3159
3160 /*
3161 * if this inode hasn't been logged and directory we're renaming it
3162 * from hasn't been logged, we don't need to log it
3163 */
3164 if (BTRFS_I(inode)->logged_trans <=
3165 root->fs_info->last_trans_committed &&
3166 (!old_dir || BTRFS_I(old_dir)->logged_trans <=
3167 root->fs_info->last_trans_committed))
3168 return 0;
3169
3170 return btrfs_log_inode_parent(trans, root, inode, parent, 1);
3171}
3172
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
index b9409b32ed02..d09c7609e16b 100644
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -22,14 +22,9 @@
22int btrfs_sync_log(struct btrfs_trans_handle *trans, 22int btrfs_sync_log(struct btrfs_trans_handle *trans,
23 struct btrfs_root *root); 23 struct btrfs_root *root);
24int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root); 24int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root);
25int btrfs_log_dentry(struct btrfs_trans_handle *trans,
26 struct btrfs_root *root, struct dentry *dentry);
27int btrfs_recover_log_trees(struct btrfs_root *tree_root); 25int btrfs_recover_log_trees(struct btrfs_root *tree_root);
28int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, 26int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
29 struct btrfs_root *root, struct dentry *dentry); 27 struct btrfs_root *root, struct dentry *dentry);
30int btrfs_log_inode(struct btrfs_trans_handle *trans,
31 struct btrfs_root *root, struct inode *inode,
32 int inode_only);
33int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, 28int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
34 struct btrfs_root *root, 29 struct btrfs_root *root,
35 const char *name, int name_len, 30 const char *name, int name_len,
@@ -38,4 +33,16 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
38 struct btrfs_root *root, 33 struct btrfs_root *root,
39 const char *name, int name_len, 34 const char *name, int name_len,
40 struct inode *inode, u64 dirid); 35 struct inode *inode, u64 dirid);
36int btrfs_join_running_log_trans(struct btrfs_root *root);
37int btrfs_end_log_trans(struct btrfs_root *root);
38int btrfs_pin_log_trans(struct btrfs_root *root);
39int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
40 struct btrfs_root *root, struct inode *inode,
41 struct dentry *parent, int exists_only);
42void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
43 struct inode *dir, struct inode *inode,
44 int for_rename);
45int btrfs_log_new_name(struct btrfs_trans_handle *trans,
46 struct inode *inode, struct inode *old_dir,
47 struct dentry *parent);
41#endif 48#endif
diff --git a/fs/buffer.c b/fs/buffer.c
index a2fd743d97cb..f5f8b15a6e40 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -290,7 +290,7 @@ static void free_more_memory(void)
290 &zone); 290 &zone);
291 if (zone) 291 if (zone)
292 try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0, 292 try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0,
293 GFP_NOFS); 293 GFP_NOFS, NULL);
294 } 294 }
295} 295}
296 296
@@ -547,6 +547,39 @@ repeat:
547 return err; 547 return err;
548} 548}
549 549
550void do_thaw_all(unsigned long unused)
551{
552 struct super_block *sb;
553 char b[BDEVNAME_SIZE];
554
555 spin_lock(&sb_lock);
556restart:
557 list_for_each_entry(sb, &super_blocks, s_list) {
558 sb->s_count++;
559 spin_unlock(&sb_lock);
560 down_read(&sb->s_umount);
561 while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb))
562 printk(KERN_WARNING "Emergency Thaw on %s\n",
563 bdevname(sb->s_bdev, b));
564 up_read(&sb->s_umount);
565 spin_lock(&sb_lock);
566 if (__put_super_and_need_restart(sb))
567 goto restart;
568 }
569 spin_unlock(&sb_lock);
570 printk(KERN_WARNING "Emergency Thaw complete\n");
571}
572
573/**
574 * emergency_thaw_all -- forcibly thaw every frozen filesystem
575 *
576 * Used for emergency unfreeze of all filesystems via SysRq
577 */
578void emergency_thaw_all(void)
579{
580 pdflush_operation(do_thaw_all, 0);
581}
582
550/** 583/**
551 * sync_mapping_buffers - write out & wait upon a mapping's "associated" buffers 584 * sync_mapping_buffers - write out & wait upon a mapping's "associated" buffers
552 * @mapping: the mapping which wants those buffers written 585 * @mapping: the mapping which wants those buffers written
@@ -621,14 +654,7 @@ static void __set_page_dirty(struct page *page,
621 spin_lock_irq(&mapping->tree_lock); 654 spin_lock_irq(&mapping->tree_lock);
622 if (page->mapping) { /* Race with truncate? */ 655 if (page->mapping) { /* Race with truncate? */
623 WARN_ON_ONCE(warn && !PageUptodate(page)); 656 WARN_ON_ONCE(warn && !PageUptodate(page));
624 657 account_page_dirtied(page, mapping);
625 if (mapping_cap_account_dirty(mapping)) {
626 __inc_zone_page_state(page, NR_FILE_DIRTY);
627 __inc_bdi_stat(mapping->backing_dev_info,
628 BDI_RECLAIMABLE);
629 task_dirty_inc(current);
630 task_io_account_write(PAGE_CACHE_SIZE);
631 }
632 radix_tree_tag_set(&mapping->page_tree, 658 radix_tree_tag_set(&mapping->page_tree,
633 page_index(page), PAGECACHE_TAG_DIRTY); 659 page_index(page), PAGECACHE_TAG_DIRTY);
634 } 660 }
@@ -2320,13 +2346,14 @@ int block_commit_write(struct page *page, unsigned from, unsigned to)
2320 * unlock the page. 2346 * unlock the page.
2321 */ 2347 */
2322int 2348int
2323block_page_mkwrite(struct vm_area_struct *vma, struct page *page, 2349block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2324 get_block_t get_block) 2350 get_block_t get_block)
2325{ 2351{
2352 struct page *page = vmf->page;
2326 struct inode *inode = vma->vm_file->f_path.dentry->d_inode; 2353 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
2327 unsigned long end; 2354 unsigned long end;
2328 loff_t size; 2355 loff_t size;
2329 int ret = -EINVAL; 2356 int ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */
2330 2357
2331 lock_page(page); 2358 lock_page(page);
2332 size = i_size_read(inode); 2359 size = i_size_read(inode);
@@ -2346,6 +2373,13 @@ block_page_mkwrite(struct vm_area_struct *vma, struct page *page,
2346 if (!ret) 2373 if (!ret)
2347 ret = block_commit_write(page, 0, end); 2374 ret = block_commit_write(page, 0, end);
2348 2375
2376 if (unlikely(ret)) {
2377 if (ret == -ENOMEM)
2378 ret = VM_FAULT_OOM;
2379 else /* -ENOSPC, -EIO, etc */
2380 ret = VM_FAULT_SIGBUS;
2381 }
2382
2349out_unlock: 2383out_unlock:
2350 unlock_page(page); 2384 unlock_page(page);
2351 return ret; 2385 return ret;
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 877e4d9a1159..7f19fefd3d45 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -404,7 +404,6 @@ cifs_proc_init(void)
404 if (proc_fs_cifs == NULL) 404 if (proc_fs_cifs == NULL)
405 return; 405 return;
406 406
407 proc_fs_cifs->owner = THIS_MODULE;
408 proc_create("DebugData", 0, proc_fs_cifs, &cifs_debug_data_proc_fops); 407 proc_create("DebugData", 0, proc_fs_cifs, &cifs_debug_data_proc_fops);
409 408
410#ifdef CONFIG_CIFS_STATS 409#ifdef CONFIG_CIFS_STATS
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index e4a6223c3145..af737bb56cb7 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -740,8 +740,7 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
740out_release_free_unlock: 740out_release_free_unlock:
741 crypto_free_hash(s->hash_desc.tfm); 741 crypto_free_hash(s->hash_desc.tfm);
742out_free_unlock: 742out_free_unlock:
743 memset(s->block_aligned_filename, 0, s->block_aligned_filename_size); 743 kzfree(s->block_aligned_filename);
744 kfree(s->block_aligned_filename);
745out_unlock: 744out_unlock:
746 mutex_unlock(s->tfm_mutex); 745 mutex_unlock(s->tfm_mutex);
747out: 746out:
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c
index 96ef51489e01..295e7fa56755 100644
--- a/fs/ecryptfs/messaging.c
+++ b/fs/ecryptfs/messaging.c
@@ -291,8 +291,7 @@ int ecryptfs_exorcise_daemon(struct ecryptfs_daemon *daemon)
291 if (daemon->user_ns) 291 if (daemon->user_ns)
292 put_user_ns(daemon->user_ns); 292 put_user_ns(daemon->user_ns);
293 mutex_unlock(&daemon->mux); 293 mutex_unlock(&daemon->mux);
294 memset(daemon, 0, sizeof(*daemon)); 294 kzfree(daemon);
295 kfree(daemon);
296out: 295out:
297 return rc; 296 return rc;
298} 297}
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 5de2c2db3aa2..2a701d593d35 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -28,6 +28,7 @@ struct eventfd_ctx {
28 * issue a wakeup. 28 * issue a wakeup.
29 */ 29 */
30 __u64 count; 30 __u64 count;
31 unsigned int flags;
31}; 32};
32 33
33/* 34/*
@@ -50,7 +51,7 @@ int eventfd_signal(struct file *file, int n)
50 n = (int) (ULLONG_MAX - ctx->count); 51 n = (int) (ULLONG_MAX - ctx->count);
51 ctx->count += n; 52 ctx->count += n;
52 if (waitqueue_active(&ctx->wqh)) 53 if (waitqueue_active(&ctx->wqh))
53 wake_up_locked(&ctx->wqh); 54 wake_up_locked_poll(&ctx->wqh, POLLIN);
54 spin_unlock_irqrestore(&ctx->wqh.lock, flags); 55 spin_unlock_irqrestore(&ctx->wqh.lock, flags);
55 56
56 return n; 57 return n;
@@ -87,22 +88,20 @@ static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
87{ 88{
88 struct eventfd_ctx *ctx = file->private_data; 89 struct eventfd_ctx *ctx = file->private_data;
89 ssize_t res; 90 ssize_t res;
90 __u64 ucnt; 91 __u64 ucnt = 0;
91 DECLARE_WAITQUEUE(wait, current); 92 DECLARE_WAITQUEUE(wait, current);
92 93
93 if (count < sizeof(ucnt)) 94 if (count < sizeof(ucnt))
94 return -EINVAL; 95 return -EINVAL;
95 spin_lock_irq(&ctx->wqh.lock); 96 spin_lock_irq(&ctx->wqh.lock);
96 res = -EAGAIN; 97 res = -EAGAIN;
97 ucnt = ctx->count; 98 if (ctx->count > 0)
98 if (ucnt > 0)
99 res = sizeof(ucnt); 99 res = sizeof(ucnt);
100 else if (!(file->f_flags & O_NONBLOCK)) { 100 else if (!(file->f_flags & O_NONBLOCK)) {
101 __add_wait_queue(&ctx->wqh, &wait); 101 __add_wait_queue(&ctx->wqh, &wait);
102 for (res = 0;;) { 102 for (res = 0;;) {
103 set_current_state(TASK_INTERRUPTIBLE); 103 set_current_state(TASK_INTERRUPTIBLE);
104 if (ctx->count > 0) { 104 if (ctx->count > 0) {
105 ucnt = ctx->count;
106 res = sizeof(ucnt); 105 res = sizeof(ucnt);
107 break; 106 break;
108 } 107 }
@@ -117,10 +116,11 @@ static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
117 __remove_wait_queue(&ctx->wqh, &wait); 116 __remove_wait_queue(&ctx->wqh, &wait);
118 __set_current_state(TASK_RUNNING); 117 __set_current_state(TASK_RUNNING);
119 } 118 }
120 if (res > 0) { 119 if (likely(res > 0)) {
121 ctx->count = 0; 120 ucnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;
121 ctx->count -= ucnt;
122 if (waitqueue_active(&ctx->wqh)) 122 if (waitqueue_active(&ctx->wqh))
123 wake_up_locked(&ctx->wqh); 123 wake_up_locked_poll(&ctx->wqh, POLLOUT);
124 } 124 }
125 spin_unlock_irq(&ctx->wqh.lock); 125 spin_unlock_irq(&ctx->wqh.lock);
126 if (res > 0 && put_user(ucnt, (__u64 __user *) buf)) 126 if (res > 0 && put_user(ucnt, (__u64 __user *) buf))
@@ -166,10 +166,10 @@ static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t c
166 __remove_wait_queue(&ctx->wqh, &wait); 166 __remove_wait_queue(&ctx->wqh, &wait);
167 __set_current_state(TASK_RUNNING); 167 __set_current_state(TASK_RUNNING);
168 } 168 }
169 if (res > 0) { 169 if (likely(res > 0)) {
170 ctx->count += ucnt; 170 ctx->count += ucnt;
171 if (waitqueue_active(&ctx->wqh)) 171 if (waitqueue_active(&ctx->wqh))
172 wake_up_locked(&ctx->wqh); 172 wake_up_locked_poll(&ctx->wqh, POLLIN);
173 } 173 }
174 spin_unlock_irq(&ctx->wqh.lock); 174 spin_unlock_irq(&ctx->wqh.lock);
175 175
@@ -207,7 +207,7 @@ SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
207 BUILD_BUG_ON(EFD_CLOEXEC != O_CLOEXEC); 207 BUILD_BUG_ON(EFD_CLOEXEC != O_CLOEXEC);
208 BUILD_BUG_ON(EFD_NONBLOCK != O_NONBLOCK); 208 BUILD_BUG_ON(EFD_NONBLOCK != O_NONBLOCK);
209 209
210 if (flags & ~(EFD_CLOEXEC | EFD_NONBLOCK)) 210 if (flags & ~EFD_FLAGS_SET)
211 return -EINVAL; 211 return -EINVAL;
212 212
213 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); 213 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
@@ -216,13 +216,14 @@ SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
216 216
217 init_waitqueue_head(&ctx->wqh); 217 init_waitqueue_head(&ctx->wqh);
218 ctx->count = count; 218 ctx->count = count;
219 ctx->flags = flags;
219 220
220 /* 221 /*
221 * When we call this, the initialization must be complete, since 222 * When we call this, the initialization must be complete, since
222 * anon_inode_getfd() will install the fd. 223 * anon_inode_getfd() will install the fd.
223 */ 224 */
224 fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx, 225 fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx,
225 flags & (O_CLOEXEC | O_NONBLOCK)); 226 flags & EFD_SHARED_FCNTL_FLAGS);
226 if (fd < 0) 227 if (fd < 0)
227 kfree(ctx); 228 kfree(ctx);
228 return fd; 229 return fd;
@@ -232,3 +233,4 @@ SYSCALL_DEFINE1(eventfd, unsigned int, count)
232{ 233{
233 return sys_eventfd2(count, 0); 234 return sys_eventfd2(count, 0);
234} 235}
236
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index c5c424f23fd5..a89f370fadb5 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * fs/eventpoll.c (Efficent event polling implementation) 2 * fs/eventpoll.c (Efficient event retrieval implementation)
3 * Copyright (C) 2001,...,2007 Davide Libenzi 3 * Copyright (C) 2001,...,2009 Davide Libenzi
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 6 * it under the terms of the GNU General Public License as published by
@@ -71,29 +71,11 @@
71 * a better scalability. 71 * a better scalability.
72 */ 72 */
73 73
74#define DEBUG_EPOLL 0
75
76#if DEBUG_EPOLL > 0
77#define DPRINTK(x) printk x
78#define DNPRINTK(n, x) do { if ((n) <= DEBUG_EPOLL) printk x; } while (0)
79#else /* #if DEBUG_EPOLL > 0 */
80#define DPRINTK(x) (void) 0
81#define DNPRINTK(n, x) (void) 0
82#endif /* #if DEBUG_EPOLL > 0 */
83
84#define DEBUG_EPI 0
85
86#if DEBUG_EPI != 0
87#define EPI_SLAB_DEBUG (SLAB_DEBUG_FREE | SLAB_RED_ZONE /* | SLAB_POISON */)
88#else /* #if DEBUG_EPI != 0 */
89#define EPI_SLAB_DEBUG 0
90#endif /* #if DEBUG_EPI != 0 */
91
92/* Epoll private bits inside the event mask */ 74/* Epoll private bits inside the event mask */
93#define EP_PRIVATE_BITS (EPOLLONESHOT | EPOLLET) 75#define EP_PRIVATE_BITS (EPOLLONESHOT | EPOLLET)
94 76
95/* Maximum number of poll wake up nests we are allowing */ 77/* Maximum number of nesting allowed inside epoll sets */
96#define EP_MAX_POLLWAKE_NESTS 4 78#define EP_MAX_NESTS 4
97 79
98/* Maximum msec timeout value storeable in a long int */ 80/* Maximum msec timeout value storeable in a long int */
99#define EP_MAX_MSTIMEO min(1000ULL * MAX_SCHEDULE_TIMEOUT / HZ, (LONG_MAX - 999ULL) / HZ) 81#define EP_MAX_MSTIMEO min(1000ULL * MAX_SCHEDULE_TIMEOUT / HZ, (LONG_MAX - 999ULL) / HZ)
@@ -110,24 +92,21 @@ struct epoll_filefd {
110}; 92};
111 93
112/* 94/*
113 * Node that is linked into the "wake_task_list" member of the "struct poll_safewake". 95 * Structure used to track possible nested calls, for too deep recursions
114 * It is used to keep track on all tasks that are currently inside the wake_up() code 96 * and loop cycles.
115 * to 1) short-circuit the one coming from the same task and same wait queue head
116 * (loop) 2) allow a maximum number of epoll descriptors inclusion nesting
117 * 3) let go the ones coming from other tasks.
118 */ 97 */
119struct wake_task_node { 98struct nested_call_node {
120 struct list_head llink; 99 struct list_head llink;
121 struct task_struct *task; 100 void *cookie;
122 wait_queue_head_t *wq; 101 int cpu;
123}; 102};
124 103
125/* 104/*
126 * This is used to implement the safe poll wake up avoiding to reenter 105 * This structure is used as collector for nested calls, to check for
127 * the poll callback from inside wake_up(). 106 * maximum recursion dept and loop cycles.
128 */ 107 */
129struct poll_safewake { 108struct nested_calls {
130 struct list_head wake_task_list; 109 struct list_head tasks_call_list;
131 spinlock_t lock; 110 spinlock_t lock;
132}; 111};
133 112
@@ -213,7 +192,7 @@ struct eppoll_entry {
213 struct list_head llink; 192 struct list_head llink;
214 193
215 /* The "base" pointer is set to the container "struct epitem" */ 194 /* The "base" pointer is set to the container "struct epitem" */
216 void *base; 195 struct epitem *base;
217 196
218 /* 197 /*
219 * Wait queue item that will be linked to the target file wait 198 * Wait queue item that will be linked to the target file wait
@@ -231,6 +210,12 @@ struct ep_pqueue {
231 struct epitem *epi; 210 struct epitem *epi;
232}; 211};
233 212
213/* Used by the ep_send_events() function as callback private data */
214struct ep_send_events_data {
215 int maxevents;
216 struct epoll_event __user *events;
217};
218
234/* 219/*
235 * Configuration options available inside /proc/sys/fs/epoll/ 220 * Configuration options available inside /proc/sys/fs/epoll/
236 */ 221 */
@@ -242,8 +227,11 @@ static int max_user_watches __read_mostly;
242 */ 227 */
243static DEFINE_MUTEX(epmutex); 228static DEFINE_MUTEX(epmutex);
244 229
245/* Safe wake up implementation */ 230/* Used for safe wake up implementation */
246static struct poll_safewake psw; 231static struct nested_calls poll_safewake_ncalls;
232
233/* Used to call file's f_op->poll() under the nested calls boundaries */
234static struct nested_calls poll_readywalk_ncalls;
247 235
248/* Slab cache used to allocate "struct epitem" */ 236/* Slab cache used to allocate "struct epitem" */
249static struct kmem_cache *epi_cache __read_mostly; 237static struct kmem_cache *epi_cache __read_mostly;
@@ -312,89 +300,230 @@ static inline int ep_op_has_event(int op)
312} 300}
313 301
314/* Initialize the poll safe wake up structure */ 302/* Initialize the poll safe wake up structure */
315static void ep_poll_safewake_init(struct poll_safewake *psw) 303static void ep_nested_calls_init(struct nested_calls *ncalls)
316{ 304{
317 305 INIT_LIST_HEAD(&ncalls->tasks_call_list);
318 INIT_LIST_HEAD(&psw->wake_task_list); 306 spin_lock_init(&ncalls->lock);
319 spin_lock_init(&psw->lock);
320} 307}
321 308
322/* 309/**
323 * Perform a safe wake up of the poll wait list. The problem is that 310 * ep_call_nested - Perform a bound (possibly) nested call, by checking
324 * with the new callback'd wake up system, it is possible that the 311 * that the recursion limit is not exceeded, and that
325 * poll callback is reentered from inside the call to wake_up() done 312 * the same nested call (by the meaning of same cookie) is
326 * on the poll wait queue head. The rule is that we cannot reenter the 313 * no re-entered.
327 * wake up code from the same task more than EP_MAX_POLLWAKE_NESTS times, 314 *
328 * and we cannot reenter the same wait queue head at all. This will 315 * @ncalls: Pointer to the nested_calls structure to be used for this call.
329 * enable to have a hierarchy of epoll file descriptor of no more than 316 * @max_nests: Maximum number of allowed nesting calls.
330 * EP_MAX_POLLWAKE_NESTS deep. We need the irq version of the spin lock 317 * @nproc: Nested call core function pointer.
331 * because this one gets called by the poll callback, that in turn is called 318 * @priv: Opaque data to be passed to the @nproc callback.
332 * from inside a wake_up(), that might be called from irq context. 319 * @cookie: Cookie to be used to identify this nested call.
320 *
321 * Returns: Returns the code returned by the @nproc callback, or -1 if
322 * the maximum recursion limit has been exceeded.
333 */ 323 */
334static void ep_poll_safewake(struct poll_safewake *psw, wait_queue_head_t *wq) 324static int ep_call_nested(struct nested_calls *ncalls, int max_nests,
325 int (*nproc)(void *, void *, int), void *priv,
326 void *cookie)
335{ 327{
336 int wake_nests = 0; 328 int error, call_nests = 0;
337 unsigned long flags; 329 unsigned long flags;
338 struct task_struct *this_task = current; 330 int this_cpu = get_cpu();
339 struct list_head *lsthead = &psw->wake_task_list; 331 struct list_head *lsthead = &ncalls->tasks_call_list;
340 struct wake_task_node *tncur; 332 struct nested_call_node *tncur;
341 struct wake_task_node tnode; 333 struct nested_call_node tnode;
342 334
343 spin_lock_irqsave(&psw->lock, flags); 335 spin_lock_irqsave(&ncalls->lock, flags);
344 336
345 /* Try to see if the current task is already inside this wakeup call */ 337 /*
338 * Try to see if the current task is already inside this wakeup call.
339 * We use a list here, since the population inside this set is always
340 * very much limited.
341 */
346 list_for_each_entry(tncur, lsthead, llink) { 342 list_for_each_entry(tncur, lsthead, llink) {
347 343 if (tncur->cpu == this_cpu &&
348 if (tncur->wq == wq || 344 (tncur->cookie == cookie || ++call_nests > max_nests)) {
349 (tncur->task == this_task && ++wake_nests > EP_MAX_POLLWAKE_NESTS)) {
350 /* 345 /*
351 * Ops ... loop detected or maximum nest level reached. 346 * Ops ... loop detected or maximum nest level reached.
352 * We abort this wake by breaking the cycle itself. 347 * We abort this wake by breaking the cycle itself.
353 */ 348 */
354 spin_unlock_irqrestore(&psw->lock, flags); 349 error = -1;
355 return; 350 goto out_unlock;
356 } 351 }
357 } 352 }
358 353
359 /* Add the current task to the list */ 354 /* Add the current task and cookie to the list */
360 tnode.task = this_task; 355 tnode.cpu = this_cpu;
361 tnode.wq = wq; 356 tnode.cookie = cookie;
362 list_add(&tnode.llink, lsthead); 357 list_add(&tnode.llink, lsthead);
363 358
364 spin_unlock_irqrestore(&psw->lock, flags); 359 spin_unlock_irqrestore(&ncalls->lock, flags);
365 360
366 /* Do really wake up now */ 361 /* Call the nested function */
367 wake_up_nested(wq, 1 + wake_nests); 362 error = (*nproc)(priv, cookie, call_nests);
368 363
369 /* Remove the current task from the list */ 364 /* Remove the current task from the list */
370 spin_lock_irqsave(&psw->lock, flags); 365 spin_lock_irqsave(&ncalls->lock, flags);
371 list_del(&tnode.llink); 366 list_del(&tnode.llink);
372 spin_unlock_irqrestore(&psw->lock, flags); 367 out_unlock:
368 spin_unlock_irqrestore(&ncalls->lock, flags);
369
370 put_cpu();
371 return error;
372}
373
374#ifdef CONFIG_DEBUG_LOCK_ALLOC
375static inline void ep_wake_up_nested(wait_queue_head_t *wqueue,
376 unsigned long events, int subclass)
377{
378 unsigned long flags;
379
380 spin_lock_irqsave_nested(&wqueue->lock, flags, subclass);
381 wake_up_locked_poll(wqueue, events);
382 spin_unlock_irqrestore(&wqueue->lock, flags);
383}
384#else
385static inline void ep_wake_up_nested(wait_queue_head_t *wqueue,
386 unsigned long events, int subclass)
387{
388 wake_up_poll(wqueue, events);
389}
390#endif
391
392static int ep_poll_wakeup_proc(void *priv, void *cookie, int call_nests)
393{
394 ep_wake_up_nested((wait_queue_head_t *) cookie, POLLIN,
395 1 + call_nests);
396 return 0;
397}
398
399/*
400 * Perform a safe wake up of the poll wait list. The problem is that
401 * with the new callback'd wake up system, it is possible that the
402 * poll callback is reentered from inside the call to wake_up() done
403 * on the poll wait queue head. The rule is that we cannot reenter the
404 * wake up code from the same task more than EP_MAX_NESTS times,
405 * and we cannot reenter the same wait queue head at all. This will
406 * enable to have a hierarchy of epoll file descriptor of no more than
407 * EP_MAX_NESTS deep.
408 */
409static void ep_poll_safewake(wait_queue_head_t *wq)
410{
411 ep_call_nested(&poll_safewake_ncalls, EP_MAX_NESTS,
412 ep_poll_wakeup_proc, NULL, wq);
373} 413}
374 414
375/* 415/*
376 * This function unregister poll callbacks from the associated file descriptor. 416 * This function unregisters poll callbacks from the associated file
377 * Since this must be called without holding "ep->lock" the atomic exchange trick 417 * descriptor. Must be called with "mtx" held (or "epmutex" if called from
378 * will protect us from multiple unregister. 418 * ep_free).
379 */ 419 */
380static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi) 420static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi)
381{ 421{
382 int nwait;
383 struct list_head *lsthead = &epi->pwqlist; 422 struct list_head *lsthead = &epi->pwqlist;
384 struct eppoll_entry *pwq; 423 struct eppoll_entry *pwq;
385 424
386 /* This is called without locks, so we need the atomic exchange */ 425 while (!list_empty(lsthead)) {
387 nwait = xchg(&epi->nwait, 0); 426 pwq = list_first_entry(lsthead, struct eppoll_entry, llink);
388 427
389 if (nwait) { 428 list_del(&pwq->llink);
390 while (!list_empty(lsthead)) { 429 remove_wait_queue(pwq->whead, &pwq->wait);
391 pwq = list_first_entry(lsthead, struct eppoll_entry, llink); 430 kmem_cache_free(pwq_cache, pwq);
431 }
432}
392 433
393 list_del_init(&pwq->llink); 434/**
394 remove_wait_queue(pwq->whead, &pwq->wait); 435 * ep_scan_ready_list - Scans the ready list in a way that makes possible for
395 kmem_cache_free(pwq_cache, pwq); 436 * the scan code, to call f_op->poll(). Also allows for
396 } 437 * O(NumReady) performance.
438 *
439 * @ep: Pointer to the epoll private data structure.
440 * @sproc: Pointer to the scan callback.
441 * @priv: Private opaque data passed to the @sproc callback.
442 *
443 * Returns: The same integer error code returned by the @sproc callback.
444 */
445static int ep_scan_ready_list(struct eventpoll *ep,
446 int (*sproc)(struct eventpoll *,
447 struct list_head *, void *),
448 void *priv)
449{
450 int error, pwake = 0;
451 unsigned long flags;
452 struct epitem *epi, *nepi;
453 LIST_HEAD(txlist);
454
455 /*
456 * We need to lock this because we could be hit by
457 * eventpoll_release_file() and epoll_ctl().
458 */
459 mutex_lock(&ep->mtx);
460
461 /*
462 * Steal the ready list, and re-init the original one to the
463 * empty list. Also, set ep->ovflist to NULL so that events
464 * happening while looping w/out locks, are not lost. We cannot
465 * have the poll callback to queue directly on ep->rdllist,
466 * because we want the "sproc" callback to be able to do it
467 * in a lockless way.
468 */
469 spin_lock_irqsave(&ep->lock, flags);
470 list_splice_init(&ep->rdllist, &txlist);
471 ep->ovflist = NULL;
472 spin_unlock_irqrestore(&ep->lock, flags);
473
474 /*
475 * Now call the callback function.
476 */
477 error = (*sproc)(ep, &txlist, priv);
478
479 spin_lock_irqsave(&ep->lock, flags);
480 /*
481 * During the time we spent inside the "sproc" callback, some
482 * other events might have been queued by the poll callback.
483 * We re-insert them inside the main ready-list here.
484 */
485 for (nepi = ep->ovflist; (epi = nepi) != NULL;
486 nepi = epi->next, epi->next = EP_UNACTIVE_PTR) {
487 /*
488 * We need to check if the item is already in the list.
489 * During the "sproc" callback execution time, items are
490 * queued into ->ovflist but the "txlist" might already
491 * contain them, and the list_splice() below takes care of them.
492 */
493 if (!ep_is_linked(&epi->rdllink))
494 list_add_tail(&epi->rdllink, &ep->rdllist);
495 }
496 /*
497 * We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after
498 * releasing the lock, events will be queued in the normal way inside
499 * ep->rdllist.
500 */
501 ep->ovflist = EP_UNACTIVE_PTR;
502
503 /*
504 * Quickly re-inject items left on "txlist".
505 */
506 list_splice(&txlist, &ep->rdllist);
507
508 if (!list_empty(&ep->rdllist)) {
509 /*
510 * Wake up (if active) both the eventpoll wait list and
511 * the ->poll() wait list (delayed after we release the lock).
512 */
513 if (waitqueue_active(&ep->wq))
514 wake_up_locked(&ep->wq);
515 if (waitqueue_active(&ep->poll_wait))
516 pwake++;
397 } 517 }
518 spin_unlock_irqrestore(&ep->lock, flags);
519
520 mutex_unlock(&ep->mtx);
521
522 /* We have to call this outside the lock */
523 if (pwake)
524 ep_poll_safewake(&ep->poll_wait);
525
526 return error;
398} 527}
399 528
400/* 529/*
@@ -434,9 +563,6 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
434 563
435 atomic_dec(&ep->user->epoll_watches); 564 atomic_dec(&ep->user->epoll_watches);
436 565
437 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_remove(%p, %p)\n",
438 current, ep, file));
439
440 return 0; 566 return 0;
441} 567}
442 568
@@ -447,7 +573,7 @@ static void ep_free(struct eventpoll *ep)
447 573
448 /* We need to release all tasks waiting for these file */ 574 /* We need to release all tasks waiting for these file */
449 if (waitqueue_active(&ep->poll_wait)) 575 if (waitqueue_active(&ep->poll_wait))
450 ep_poll_safewake(&psw, &ep->poll_wait); 576 ep_poll_safewake(&ep->poll_wait);
451 577
452 /* 578 /*
453 * We need to lock this because we could be hit by 579 * We need to lock this because we could be hit by
@@ -492,26 +618,54 @@ static int ep_eventpoll_release(struct inode *inode, struct file *file)
492 if (ep) 618 if (ep)
493 ep_free(ep); 619 ep_free(ep);
494 620
495 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: close() ep=%p\n", current, ep));
496 return 0; 621 return 0;
497} 622}
498 623
624static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head,
625 void *priv)
626{
627 struct epitem *epi, *tmp;
628
629 list_for_each_entry_safe(epi, tmp, head, rdllink) {
630 if (epi->ffd.file->f_op->poll(epi->ffd.file, NULL) &
631 epi->event.events)
632 return POLLIN | POLLRDNORM;
633 else {
634 /*
635 * Item has been dropped into the ready list by the poll
636 * callback, but it's not actually ready, as far as
637 * caller requested events goes. We can remove it here.
638 */
639 list_del_init(&epi->rdllink);
640 }
641 }
642
643 return 0;
644}
645
646static int ep_poll_readyevents_proc(void *priv, void *cookie, int call_nests)
647{
648 return ep_scan_ready_list(priv, ep_read_events_proc, NULL);
649}
650
499static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait) 651static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait)
500{ 652{
501 unsigned int pollflags = 0; 653 int pollflags;
502 unsigned long flags;
503 struct eventpoll *ep = file->private_data; 654 struct eventpoll *ep = file->private_data;
504 655
505 /* Insert inside our poll wait queue */ 656 /* Insert inside our poll wait queue */
506 poll_wait(file, &ep->poll_wait, wait); 657 poll_wait(file, &ep->poll_wait, wait);
507 658
508 /* Check our condition */ 659 /*
509 spin_lock_irqsave(&ep->lock, flags); 660 * Proceed to find out if wanted events are really available inside
510 if (!list_empty(&ep->rdllist)) 661 * the ready list. This need to be done under ep_call_nested()
511 pollflags = POLLIN | POLLRDNORM; 662 * supervision, since the call to f_op->poll() done on listed files
512 spin_unlock_irqrestore(&ep->lock, flags); 663 * could re-enter here.
664 */
665 pollflags = ep_call_nested(&poll_readywalk_ncalls, EP_MAX_NESTS,
666 ep_poll_readyevents_proc, ep, ep);
513 667
514 return pollflags; 668 return pollflags != -1 ? pollflags : 0;
515} 669}
516 670
517/* File callbacks that implement the eventpoll file behaviour */ 671/* File callbacks that implement the eventpoll file behaviour */
@@ -541,7 +695,7 @@ void eventpoll_release_file(struct file *file)
541 * We don't want to get "file->f_lock" because it is not 695 * We don't want to get "file->f_lock" because it is not
542 * necessary. It is not necessary because we're in the "struct file" 696 * necessary. It is not necessary because we're in the "struct file"
543 * cleanup path, and this means that noone is using this file anymore. 697 * cleanup path, and this means that noone is using this file anymore.
544 * So, for example, epoll_ctl() cannot hit here sicne if we reach this 698 * So, for example, epoll_ctl() cannot hit here since if we reach this
545 * point, the file counter already went to zero and fget() would fail. 699 * point, the file counter already went to zero and fget() would fail.
546 * The only hit might come from ep_free() but by holding the mutex 700 * The only hit might come from ep_free() but by holding the mutex
547 * will correctly serialize the operation. We do need to acquire 701 * will correctly serialize the operation. We do need to acquire
@@ -588,8 +742,6 @@ static int ep_alloc(struct eventpoll **pep)
588 742
589 *pep = ep; 743 *pep = ep;
590 744
591 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_alloc() ep=%p\n",
592 current, ep));
593 return 0; 745 return 0;
594 746
595free_uid: 747free_uid:
@@ -623,9 +775,6 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
623 } 775 }
624 } 776 }
625 777
626 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_find(%p) -> %p\n",
627 current, file, epir));
628
629 return epir; 778 return epir;
630} 779}
631 780
@@ -641,9 +790,6 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
641 struct epitem *epi = ep_item_from_wait(wait); 790 struct epitem *epi = ep_item_from_wait(wait);
642 struct eventpoll *ep = epi->ep; 791 struct eventpoll *ep = epi->ep;
643 792
644 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: poll_callback(%p) epi=%p ep=%p\n",
645 current, epi->ffd.file, epi, ep));
646
647 spin_lock_irqsave(&ep->lock, flags); 793 spin_lock_irqsave(&ep->lock, flags);
648 794
649 /* 795 /*
@@ -656,6 +802,15 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
656 goto out_unlock; 802 goto out_unlock;
657 803
658 /* 804 /*
805 * Check the events coming with the callback. At this stage, not
806 * every device reports the events in the "key" parameter of the
807 * callback. We need to be able to handle both cases here, hence the
808 * test for "key" != NULL before the event match test.
809 */
810 if (key && !((unsigned long) key & epi->event.events))
811 goto out_unlock;
812
813 /*
659 * If we are trasfering events to userspace, we can hold no locks 814 * If we are trasfering events to userspace, we can hold no locks
660 * (because we're accessing user memory, and because of linux f_op->poll() 815 * (because we're accessing user memory, and because of linux f_op->poll()
661 * semantics). All the events that happens during that period of time are 816 * semantics). All the events that happens during that period of time are
@@ -670,12 +825,9 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
670 } 825 }
671 826
672 /* If this file is already in the ready list we exit soon */ 827 /* If this file is already in the ready list we exit soon */
673 if (ep_is_linked(&epi->rdllink)) 828 if (!ep_is_linked(&epi->rdllink))
674 goto is_linked; 829 list_add_tail(&epi->rdllink, &ep->rdllist);
675
676 list_add_tail(&epi->rdllink, &ep->rdllist);
677 830
678is_linked:
679 /* 831 /*
680 * Wake up ( if active ) both the eventpoll wait list and the ->poll() 832 * Wake up ( if active ) both the eventpoll wait list and the ->poll()
681 * wait list. 833 * wait list.
@@ -690,7 +842,7 @@ out_unlock:
690 842
691 /* We have to call this outside the lock */ 843 /* We have to call this outside the lock */
692 if (pwake) 844 if (pwake)
693 ep_poll_safewake(&psw, &ep->poll_wait); 845 ep_poll_safewake(&ep->poll_wait);
694 846
695 return 1; 847 return 1;
696} 848}
@@ -817,10 +969,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
817 969
818 /* We have to call this outside the lock */ 970 /* We have to call this outside the lock */
819 if (pwake) 971 if (pwake)
820 ep_poll_safewake(&psw, &ep->poll_wait); 972 ep_poll_safewake(&ep->poll_wait);
821
822 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_insert(%p, %p, %d)\n",
823 current, ep, tfile, fd));
824 973
825 return 0; 974 return 0;
826 975
@@ -851,15 +1000,14 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
851{ 1000{
852 int pwake = 0; 1001 int pwake = 0;
853 unsigned int revents; 1002 unsigned int revents;
854 unsigned long flags;
855 1003
856 /* 1004 /*
857 * Set the new event interest mask before calling f_op->poll(), otherwise 1005 * Set the new event interest mask before calling f_op->poll();
858 * a potential race might occur. In fact if we do this operation inside 1006 * otherwise we might miss an event that happens between the
859 * the lock, an event might happen between the f_op->poll() call and the 1007 * f_op->poll() call and the new event set registering.
860 * new event set registering.
861 */ 1008 */
862 epi->event.events = event->events; 1009 epi->event.events = event->events;
1010 epi->event.data = event->data; /* protected by mtx */
863 1011
864 /* 1012 /*
865 * Get current event bits. We can safely use the file* here because 1013 * Get current event bits. We can safely use the file* here because
@@ -867,16 +1015,12 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
867 */ 1015 */
868 revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL); 1016 revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL);
869 1017
870 spin_lock_irqsave(&ep->lock, flags);
871
872 /* Copy the data member from inside the lock */
873 epi->event.data = event->data;
874
875 /* 1018 /*
876 * If the item is "hot" and it is not registered inside the ready 1019 * If the item is "hot" and it is not registered inside the ready
877 * list, push it inside. 1020 * list, push it inside.
878 */ 1021 */
879 if (revents & event->events) { 1022 if (revents & event->events) {
1023 spin_lock_irq(&ep->lock);
880 if (!ep_is_linked(&epi->rdllink)) { 1024 if (!ep_is_linked(&epi->rdllink)) {
881 list_add_tail(&epi->rdllink, &ep->rdllist); 1025 list_add_tail(&epi->rdllink, &ep->rdllist);
882 1026
@@ -886,142 +1030,84 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
886 if (waitqueue_active(&ep->poll_wait)) 1030 if (waitqueue_active(&ep->poll_wait))
887 pwake++; 1031 pwake++;
888 } 1032 }
1033 spin_unlock_irq(&ep->lock);
889 } 1034 }
890 spin_unlock_irqrestore(&ep->lock, flags);
891 1035
892 /* We have to call this outside the lock */ 1036 /* We have to call this outside the lock */
893 if (pwake) 1037 if (pwake)
894 ep_poll_safewake(&psw, &ep->poll_wait); 1038 ep_poll_safewake(&ep->poll_wait);
895 1039
896 return 0; 1040 return 0;
897} 1041}
898 1042
899static int ep_send_events(struct eventpoll *ep, struct epoll_event __user *events, 1043static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
900 int maxevents) 1044 void *priv)
901{ 1045{
902 int eventcnt, error = -EFAULT, pwake = 0; 1046 struct ep_send_events_data *esed = priv;
1047 int eventcnt;
903 unsigned int revents; 1048 unsigned int revents;
904 unsigned long flags; 1049 struct epitem *epi;
905 struct epitem *epi, *nepi; 1050 struct epoll_event __user *uevent;
906 struct list_head txlist;
907
908 INIT_LIST_HEAD(&txlist);
909
910 /*
911 * We need to lock this because we could be hit by
912 * eventpoll_release_file() and epoll_ctl(EPOLL_CTL_DEL).
913 */
914 mutex_lock(&ep->mtx);
915
916 /*
917 * Steal the ready list, and re-init the original one to the
918 * empty list. Also, set ep->ovflist to NULL so that events
919 * happening while looping w/out locks, are not lost. We cannot
920 * have the poll callback to queue directly on ep->rdllist,
921 * because we are doing it in the loop below, in a lockless way.
922 */
923 spin_lock_irqsave(&ep->lock, flags);
924 list_splice(&ep->rdllist, &txlist);
925 INIT_LIST_HEAD(&ep->rdllist);
926 ep->ovflist = NULL;
927 spin_unlock_irqrestore(&ep->lock, flags);
928 1051
929 /* 1052 /*
930 * We can loop without lock because this is a task private list. 1053 * We can loop without lock because we are passed a task private list.
931 * We just splice'd out the ep->rdllist in ep_collect_ready_items(). 1054 * Items cannot vanish during the loop because ep_scan_ready_list() is
932 * Items cannot vanish during the loop because we are holding "mtx". 1055 * holding "mtx" during this call.
933 */ 1056 */
934 for (eventcnt = 0; !list_empty(&txlist) && eventcnt < maxevents;) { 1057 for (eventcnt = 0, uevent = esed->events;
935 epi = list_first_entry(&txlist, struct epitem, rdllink); 1058 !list_empty(head) && eventcnt < esed->maxevents;) {
1059 epi = list_first_entry(head, struct epitem, rdllink);
936 1060
937 list_del_init(&epi->rdllink); 1061 list_del_init(&epi->rdllink);
938 1062
939 /* 1063 revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL) &
940 * Get the ready file event set. We can safely use the file 1064 epi->event.events;
941 * because we are holding the "mtx" and this will guarantee
942 * that both the file and the item will not vanish.
943 */
944 revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL);
945 revents &= epi->event.events;
946 1065
947 /* 1066 /*
948 * Is the event mask intersect the caller-requested one, 1067 * If the event mask intersect the caller-requested one,
949 * deliver the event to userspace. Again, we are holding 1068 * deliver the event to userspace. Again, ep_scan_ready_list()
950 * "mtx", so no operations coming from userspace can change 1069 * is holding "mtx", so no operations coming from userspace
951 * the item. 1070 * can change the item.
952 */ 1071 */
953 if (revents) { 1072 if (revents) {
954 if (__put_user(revents, 1073 if (__put_user(revents, &uevent->events) ||
955 &events[eventcnt].events) || 1074 __put_user(epi->event.data, &uevent->data)) {
956 __put_user(epi->event.data, 1075 list_add(&epi->rdllink, head);
957 &events[eventcnt].data)) 1076 return eventcnt ? eventcnt : -EFAULT;
958 goto errxit; 1077 }
1078 eventcnt++;
1079 uevent++;
959 if (epi->event.events & EPOLLONESHOT) 1080 if (epi->event.events & EPOLLONESHOT)
960 epi->event.events &= EP_PRIVATE_BITS; 1081 epi->event.events &= EP_PRIVATE_BITS;
961 eventcnt++; 1082 else if (!(epi->event.events & EPOLLET)) {
1083 /*
1084 * If this file has been added with Level
1085 * Trigger mode, we need to insert back inside
1086 * the ready list, so that the next call to
1087 * epoll_wait() will check again the events
1088 * availability. At this point, noone can insert
1089 * into ep->rdllist besides us. The epoll_ctl()
1090 * callers are locked out by
1091 * ep_scan_ready_list() holding "mtx" and the
1092 * poll callback will queue them in ep->ovflist.
1093 */
1094 list_add_tail(&epi->rdllink, &ep->rdllist);
1095 }
962 } 1096 }
963 /*
964 * At this point, noone can insert into ep->rdllist besides
965 * us. The epoll_ctl() callers are locked out by us holding
966 * "mtx" and the poll callback will queue them in ep->ovflist.
967 */
968 if (!(epi->event.events & EPOLLET) &&
969 (revents & epi->event.events))
970 list_add_tail(&epi->rdllink, &ep->rdllist);
971 }
972 error = 0;
973
974errxit:
975
976 spin_lock_irqsave(&ep->lock, flags);
977 /*
978 * During the time we spent in the loop above, some other events
979 * might have been queued by the poll callback. We re-insert them
980 * inside the main ready-list here.
981 */
982 for (nepi = ep->ovflist; (epi = nepi) != NULL;
983 nepi = epi->next, epi->next = EP_UNACTIVE_PTR) {
984 /*
985 * If the above loop quit with errors, the epoll item might still
986 * be linked to "txlist", and the list_splice() done below will
987 * take care of those cases.
988 */
989 if (!ep_is_linked(&epi->rdllink))
990 list_add_tail(&epi->rdllink, &ep->rdllist);
991 } 1097 }
992 /*
993 * We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after
994 * releasing the lock, events will be queued in the normal way inside
995 * ep->rdllist.
996 */
997 ep->ovflist = EP_UNACTIVE_PTR;
998 1098
999 /* 1099 return eventcnt;
1000 * In case of error in the event-send loop, or in case the number of 1100}
1001 * ready events exceeds the userspace limit, we need to splice the
1002 * "txlist" back inside ep->rdllist.
1003 */
1004 list_splice(&txlist, &ep->rdllist);
1005
1006 if (!list_empty(&ep->rdllist)) {
1007 /*
1008 * Wake up (if active) both the eventpoll wait list and the ->poll()
1009 * wait list (delayed after we release the lock).
1010 */
1011 if (waitqueue_active(&ep->wq))
1012 wake_up_locked(&ep->wq);
1013 if (waitqueue_active(&ep->poll_wait))
1014 pwake++;
1015 }
1016 spin_unlock_irqrestore(&ep->lock, flags);
1017 1101
1018 mutex_unlock(&ep->mtx); 1102static int ep_send_events(struct eventpoll *ep,
1103 struct epoll_event __user *events, int maxevents)
1104{
1105 struct ep_send_events_data esed;
1019 1106
1020 /* We have to call this outside the lock */ 1107 esed.maxevents = maxevents;
1021 if (pwake) 1108 esed.events = events;
1022 ep_poll_safewake(&psw, &ep->poll_wait);
1023 1109
1024 return eventcnt == 0 ? error: eventcnt; 1110 return ep_scan_ready_list(ep, ep_send_events_proc, &esed);
1025} 1111}
1026 1112
1027static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, 1113static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
@@ -1033,7 +1119,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
1033 wait_queue_t wait; 1119 wait_queue_t wait;
1034 1120
1035 /* 1121 /*
1036 * Calculate the timeout by checking for the "infinite" value ( -1 ) 1122 * Calculate the timeout by checking for the "infinite" value (-1)
1037 * and the overflow condition. The passed timeout is in milliseconds, 1123 * and the overflow condition. The passed timeout is in milliseconds,
1038 * that why (t * HZ) / 1000. 1124 * that why (t * HZ) / 1000.
1039 */ 1125 */
@@ -1076,9 +1162,8 @@ retry:
1076 1162
1077 set_current_state(TASK_RUNNING); 1163 set_current_state(TASK_RUNNING);
1078 } 1164 }
1079
1080 /* Is it worth to try to dig for events ? */ 1165 /* Is it worth to try to dig for events ? */
1081 eavail = !list_empty(&ep->rdllist); 1166 eavail = !list_empty(&ep->rdllist) || ep->ovflist != EP_UNACTIVE_PTR;
1082 1167
1083 spin_unlock_irqrestore(&ep->lock, flags); 1168 spin_unlock_irqrestore(&ep->lock, flags);
1084 1169
@@ -1099,41 +1184,30 @@ retry:
1099 */ 1184 */
1100SYSCALL_DEFINE1(epoll_create1, int, flags) 1185SYSCALL_DEFINE1(epoll_create1, int, flags)
1101{ 1186{
1102 int error, fd = -1; 1187 int error;
1103 struct eventpoll *ep; 1188 struct eventpoll *ep = NULL;
1104 1189
1105 /* Check the EPOLL_* constant for consistency. */ 1190 /* Check the EPOLL_* constant for consistency. */
1106 BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC); 1191 BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC);
1107 1192
1108 if (flags & ~EPOLL_CLOEXEC) 1193 if (flags & ~EPOLL_CLOEXEC)
1109 return -EINVAL; 1194 return -EINVAL;
1110
1111 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d)\n",
1112 current, flags));
1113
1114 /* 1195 /*
1115 * Create the internal data structure ( "struct eventpoll" ). 1196 * Create the internal data structure ("struct eventpoll").
1116 */ 1197 */
1117 error = ep_alloc(&ep); 1198 error = ep_alloc(&ep);
1118 if (error < 0) { 1199 if (error < 0)
1119 fd = error; 1200 return error;
1120 goto error_return;
1121 }
1122
1123 /* 1201 /*
1124 * Creates all the items needed to setup an eventpoll file. That is, 1202 * Creates all the items needed to setup an eventpoll file. That is,
1125 * a file structure and a free file descriptor. 1203 * a file structure and a free file descriptor.
1126 */ 1204 */
1127 fd = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep, 1205 error = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep,
1128 flags & O_CLOEXEC); 1206 flags & O_CLOEXEC);
1129 if (fd < 0) 1207 if (error < 0)
1130 ep_free(ep); 1208 ep_free(ep);
1131 1209
1132error_return: 1210 return error;
1133 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n",
1134 current, flags, fd));
1135
1136 return fd;
1137} 1211}
1138 1212
1139SYSCALL_DEFINE1(epoll_create, int, size) 1213SYSCALL_DEFINE1(epoll_create, int, size)
@@ -1158,9 +1232,6 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1158 struct epitem *epi; 1232 struct epitem *epi;
1159 struct epoll_event epds; 1233 struct epoll_event epds;
1160 1234
1161 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_ctl(%d, %d, %d, %p)\n",
1162 current, epfd, op, fd, event));
1163
1164 error = -EFAULT; 1235 error = -EFAULT;
1165 if (ep_op_has_event(op) && 1236 if (ep_op_has_event(op) &&
1166 copy_from_user(&epds, event, sizeof(struct epoll_event))) 1237 copy_from_user(&epds, event, sizeof(struct epoll_event)))
@@ -1211,7 +1282,6 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1211 case EPOLL_CTL_ADD: 1282 case EPOLL_CTL_ADD:
1212 if (!epi) { 1283 if (!epi) {
1213 epds.events |= POLLERR | POLLHUP; 1284 epds.events |= POLLERR | POLLHUP;
1214
1215 error = ep_insert(ep, &epds, tfile, fd); 1285 error = ep_insert(ep, &epds, tfile, fd);
1216 } else 1286 } else
1217 error = -EEXIST; 1287 error = -EEXIST;
@@ -1237,8 +1307,6 @@ error_tgt_fput:
1237error_fput: 1307error_fput:
1238 fput(file); 1308 fput(file);
1239error_return: 1309error_return:
1240 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_ctl(%d, %d, %d, %p) = %d\n",
1241 current, epfd, op, fd, event, error));
1242 1310
1243 return error; 1311 return error;
1244} 1312}
@@ -1254,9 +1322,6 @@ SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events,
1254 struct file *file; 1322 struct file *file;
1255 struct eventpoll *ep; 1323 struct eventpoll *ep;
1256 1324
1257 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_wait(%d, %p, %d, %d)\n",
1258 current, epfd, events, maxevents, timeout));
1259
1260 /* The maximum number of event must be greater than zero */ 1325 /* The maximum number of event must be greater than zero */
1261 if (maxevents <= 0 || maxevents > EP_MAX_EVENTS) 1326 if (maxevents <= 0 || maxevents > EP_MAX_EVENTS)
1262 return -EINVAL; 1327 return -EINVAL;
@@ -1293,8 +1358,6 @@ SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events,
1293error_fput: 1358error_fput:
1294 fput(file); 1359 fput(file);
1295error_return: 1360error_return:
1296 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_wait(%d, %p, %d, %d) = %d\n",
1297 current, epfd, events, maxevents, timeout, error));
1298 1361
1299 return error; 1362 return error;
1300} 1363}
@@ -1359,17 +1422,18 @@ static int __init eventpoll_init(void)
1359 EP_ITEM_COST; 1422 EP_ITEM_COST;
1360 1423
1361 /* Initialize the structure used to perform safe poll wait head wake ups */ 1424 /* Initialize the structure used to perform safe poll wait head wake ups */
1362 ep_poll_safewake_init(&psw); 1425 ep_nested_calls_init(&poll_safewake_ncalls);
1426
1427 /* Initialize the structure used to perform file's f_op->poll() calls */
1428 ep_nested_calls_init(&poll_readywalk_ncalls);
1363 1429
1364 /* Allocates slab cache used to allocate "struct epitem" items */ 1430 /* Allocates slab cache used to allocate "struct epitem" items */
1365 epi_cache = kmem_cache_create("eventpoll_epi", sizeof(struct epitem), 1431 epi_cache = kmem_cache_create("eventpoll_epi", sizeof(struct epitem),
1366 0, SLAB_HWCACHE_ALIGN|EPI_SLAB_DEBUG|SLAB_PANIC, 1432 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
1367 NULL);
1368 1433
1369 /* Allocates slab cache used to allocate "struct eppoll_entry" */ 1434 /* Allocates slab cache used to allocate "struct eppoll_entry" */
1370 pwq_cache = kmem_cache_create("eventpoll_pwq", 1435 pwq_cache = kmem_cache_create("eventpoll_pwq",
1371 sizeof(struct eppoll_entry), 0, 1436 sizeof(struct eppoll_entry), 0, SLAB_PANIC, NULL);
1372 EPI_SLAB_DEBUG|SLAB_PANIC, NULL);
1373 1437
1374 return 0; 1438 return 0;
1375} 1439}
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 38f40d55899c..53c72ad85877 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -55,7 +55,8 @@ static int ext4_block_in_group(struct super_block *sb, ext4_fsblk_t block,
55} 55}
56 56
57static int ext4_group_used_meta_blocks(struct super_block *sb, 57static int ext4_group_used_meta_blocks(struct super_block *sb,
58 ext4_group_t block_group) 58 ext4_group_t block_group,
59 struct ext4_group_desc *gdp)
59{ 60{
60 ext4_fsblk_t tmp; 61 ext4_fsblk_t tmp;
61 struct ext4_sb_info *sbi = EXT4_SB(sb); 62 struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -63,10 +64,6 @@ static int ext4_group_used_meta_blocks(struct super_block *sb,
63 int used_blocks = sbi->s_itb_per_group + 2; 64 int used_blocks = sbi->s_itb_per_group + 2;
64 65
65 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) { 66 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
66 struct ext4_group_desc *gdp;
67 struct buffer_head *bh;
68
69 gdp = ext4_get_group_desc(sb, block_group, &bh);
70 if (!ext4_block_in_group(sb, ext4_block_bitmap(sb, gdp), 67 if (!ext4_block_in_group(sb, ext4_block_bitmap(sb, gdp),
71 block_group)) 68 block_group))
72 used_blocks--; 69 used_blocks--;
@@ -177,7 +174,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
177 */ 174 */
178 mark_bitmap_end(group_blocks, sb->s_blocksize * 8, bh->b_data); 175 mark_bitmap_end(group_blocks, sb->s_blocksize * 8, bh->b_data);
179 } 176 }
180 return free_blocks - ext4_group_used_meta_blocks(sb, block_group); 177 return free_blocks - ext4_group_used_meta_blocks(sb, block_group, gdp);
181} 178}
182 179
183 180
@@ -473,9 +470,8 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
473 470
474 if (sbi->s_log_groups_per_flex) { 471 if (sbi->s_log_groups_per_flex) {
475 ext4_group_t flex_group = ext4_flex_group(sbi, block_group); 472 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
476 spin_lock(sb_bgl_lock(sbi, flex_group)); 473 atomic_add(blocks_freed,
477 sbi->s_flex_groups[flex_group].free_blocks += blocks_freed; 474 &sbi->s_flex_groups[flex_group].free_blocks);
478 spin_unlock(sb_bgl_lock(sbi, flex_group));
479 } 475 }
480 /* 476 /*
481 * request to reload the buddy with the 477 * request to reload the buddy with the
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 2df2e40b01af..b64789929a65 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -67,7 +67,8 @@ int ext4_check_dir_entry(const char *function, struct inode *dir,
67 unsigned int offset) 67 unsigned int offset)
68{ 68{
69 const char *error_msg = NULL; 69 const char *error_msg = NULL;
70 const int rlen = ext4_rec_len_from_disk(de->rec_len); 70 const int rlen = ext4_rec_len_from_disk(de->rec_len,
71 dir->i_sb->s_blocksize);
71 72
72 if (rlen < EXT4_DIR_REC_LEN(1)) 73 if (rlen < EXT4_DIR_REC_LEN(1))
73 error_msg = "rec_len is smaller than minimal"; 74 error_msg = "rec_len is smaller than minimal";
@@ -178,10 +179,11 @@ revalidate:
178 * least that it is non-zero. A 179 * least that it is non-zero. A
179 * failure will be detected in the 180 * failure will be detected in the
180 * dirent test below. */ 181 * dirent test below. */
181 if (ext4_rec_len_from_disk(de->rec_len) 182 if (ext4_rec_len_from_disk(de->rec_len,
182 < EXT4_DIR_REC_LEN(1)) 183 sb->s_blocksize) < EXT4_DIR_REC_LEN(1))
183 break; 184 break;
184 i += ext4_rec_len_from_disk(de->rec_len); 185 i += ext4_rec_len_from_disk(de->rec_len,
186 sb->s_blocksize);
185 } 187 }
186 offset = i; 188 offset = i;
187 filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1)) 189 filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1))
@@ -203,7 +205,8 @@ revalidate:
203 ret = stored; 205 ret = stored;
204 goto out; 206 goto out;
205 } 207 }
206 offset += ext4_rec_len_from_disk(de->rec_len); 208 offset += ext4_rec_len_from_disk(de->rec_len,
209 sb->s_blocksize);
207 if (le32_to_cpu(de->inode)) { 210 if (le32_to_cpu(de->inode)) {
208 /* We might block in the next section 211 /* We might block in the next section
209 * if the data destination is 212 * if the data destination is
@@ -225,7 +228,8 @@ revalidate:
225 goto revalidate; 228 goto revalidate;
226 stored++; 229 stored++;
227 } 230 }
228 filp->f_pos += ext4_rec_len_from_disk(de->rec_len); 231 filp->f_pos += ext4_rec_len_from_disk(de->rec_len,
232 sb->s_blocksize);
229 } 233 }
230 offset = 0; 234 offset = 0;
231 brelse(bh); 235 brelse(bh);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 6083bb38057b..d0f15ef56de1 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -33,14 +33,6 @@
33#undef EXT4FS_DEBUG 33#undef EXT4FS_DEBUG
34 34
35/* 35/*
36 * Define EXT4_RESERVATION to reserve data blocks for expanding files
37 */
38#define EXT4_DEFAULT_RESERVE_BLOCKS 8
39/*max window size: 1024(direct blocks) + 3([t,d]indirect blocks) */
40#define EXT4_MAX_RESERVE_BLOCKS 1027
41#define EXT4_RESERVE_WINDOW_NOT_ALLOCATED 0
42
43/*
44 * Debug code 36 * Debug code
45 */ 37 */
46#ifdef EXT4FS_DEBUG 38#ifdef EXT4FS_DEBUG
@@ -54,8 +46,6 @@
54#define ext4_debug(f, a...) do {} while (0) 46#define ext4_debug(f, a...) do {} while (0)
55#endif 47#endif
56 48
57#define EXT4_MULTIBLOCK_ALLOCATOR 1
58
59/* prefer goal again. length */ 49/* prefer goal again. length */
60#define EXT4_MB_HINT_MERGE 1 50#define EXT4_MB_HINT_MERGE 1
61/* blocks already reserved */ 51/* blocks already reserved */
@@ -180,8 +170,9 @@ struct ext4_group_desc
180 */ 170 */
181 171
182struct flex_groups { 172struct flex_groups {
183 __u32 free_inodes; 173 atomic_t free_inodes;
184 __u32 free_blocks; 174 atomic_t free_blocks;
175 atomic_t used_dirs;
185}; 176};
186 177
187#define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */ 178#define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */
@@ -249,6 +240,30 @@ struct flex_groups {
249#define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ 240#define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */
250#define EXT4_FL_USER_MODIFIABLE 0x000B80FF /* User modifiable flags */ 241#define EXT4_FL_USER_MODIFIABLE 0x000B80FF /* User modifiable flags */
251 242
243/* Flags that should be inherited by new inodes from their parent. */
244#define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
245 EXT4_SYNC_FL | EXT4_IMMUTABLE_FL | EXT4_APPEND_FL |\
246 EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
247 EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
248 EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL)
249
250/* Flags that are appropriate for regular files (all but dir-specific ones). */
251#define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL))
252
253/* Flags that are appropriate for non-directories/regular files. */
254#define EXT4_OTHER_FLMASK (EXT4_NODUMP_FL | EXT4_NOATIME_FL)
255
256/* Mask out flags that are inappropriate for the given type of inode. */
257static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags)
258{
259 if (S_ISDIR(mode))
260 return flags;
261 else if (S_ISREG(mode))
262 return flags & EXT4_REG_FLMASK;
263 else
264 return flags & EXT4_OTHER_FLMASK;
265}
266
252/* 267/*
253 * Inode dynamic state flags 268 * Inode dynamic state flags
254 */ 269 */
@@ -256,6 +271,7 @@ struct flex_groups {
256#define EXT4_STATE_NEW 0x00000002 /* inode is newly created */ 271#define EXT4_STATE_NEW 0x00000002 /* inode is newly created */
257#define EXT4_STATE_XATTR 0x00000004 /* has in-inode xattrs */ 272#define EXT4_STATE_XATTR 0x00000004 /* has in-inode xattrs */
258#define EXT4_STATE_NO_EXPAND 0x00000008 /* No space for expansion */ 273#define EXT4_STATE_NO_EXPAND 0x00000008 /* No space for expansion */
274#define EXT4_STATE_DA_ALLOC_CLOSE 0x00000010 /* Alloc DA blks on close */
259 275
260/* Used to pass group descriptor data when online resize is done */ 276/* Used to pass group descriptor data when online resize is done */
261struct ext4_new_group_input { 277struct ext4_new_group_input {
@@ -303,7 +319,9 @@ struct ext4_new_group_data {
303#define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long) 319#define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long)
304#define EXT4_IOC_GROUP_ADD _IOW('f', 8, struct ext4_new_group_input) 320#define EXT4_IOC_GROUP_ADD _IOW('f', 8, struct ext4_new_group_input)
305#define EXT4_IOC_MIGRATE _IO('f', 9) 321#define EXT4_IOC_MIGRATE _IO('f', 9)
322 /* note ioctl 10 reserved for an early version of the FIEMAP ioctl */
306 /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */ 323 /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */
324#define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12)
307 325
308/* 326/*
309 * ioctl commands in 32 bit emulation 327 * ioctl commands in 32 bit emulation
@@ -531,7 +549,7 @@ do { \
531#define EXT4_MOUNT_NO_UID32 0x02000 /* Disable 32-bit UIDs */ 549#define EXT4_MOUNT_NO_UID32 0x02000 /* Disable 32-bit UIDs */
532#define EXT4_MOUNT_XATTR_USER 0x04000 /* Extended user attributes */ 550#define EXT4_MOUNT_XATTR_USER 0x04000 /* Extended user attributes */
533#define EXT4_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */ 551#define EXT4_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */
534#define EXT4_MOUNT_RESERVATION 0x10000 /* Preallocation */ 552#define EXT4_MOUNT_NO_AUTO_DA_ALLOC 0x10000 /* No auto delalloc mapping */
535#define EXT4_MOUNT_BARRIER 0x20000 /* Use block barriers */ 553#define EXT4_MOUNT_BARRIER 0x20000 /* Use block barriers */
536#define EXT4_MOUNT_NOBH 0x40000 /* No bufferheads */ 554#define EXT4_MOUNT_NOBH 0x40000 /* No bufferheads */
537#define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */ 555#define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */
@@ -666,7 +684,8 @@ struct ext4_super_block {
666 __u8 s_log_groups_per_flex; /* FLEX_BG group size */ 684 __u8 s_log_groups_per_flex; /* FLEX_BG group size */
667 __u8 s_reserved_char_pad2; 685 __u8 s_reserved_char_pad2;
668 __le16 s_reserved_pad; 686 __le16 s_reserved_pad;
669 __u32 s_reserved[162]; /* Padding to the end of the block */ 687 __le64 s_kbytes_written; /* nr of lifetime kilobytes written */
688 __u32 s_reserved[160]; /* Padding to the end of the block */
670}; 689};
671 690
672#ifdef __KERNEL__ 691#ifdef __KERNEL__
@@ -814,6 +833,12 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
814#define EXT4_DEF_MAX_BATCH_TIME 15000 /* 15ms */ 833#define EXT4_DEF_MAX_BATCH_TIME 15000 /* 15ms */
815 834
816/* 835/*
836 * Minimum number of groups in a flexgroup before we separate out
837 * directories into the first block group of a flexgroup
838 */
839#define EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME 4
840
841/*
817 * Structure of a directory entry 842 * Structure of a directory entry
818 */ 843 */
819#define EXT4_NAME_LEN 255 844#define EXT4_NAME_LEN 255
@@ -865,24 +890,6 @@ struct ext4_dir_entry_2 {
865 ~EXT4_DIR_ROUND) 890 ~EXT4_DIR_ROUND)
866#define EXT4_MAX_REC_LEN ((1<<16)-1) 891#define EXT4_MAX_REC_LEN ((1<<16)-1)
867 892
868static inline unsigned ext4_rec_len_from_disk(__le16 dlen)
869{
870 unsigned len = le16_to_cpu(dlen);
871
872 if (len == EXT4_MAX_REC_LEN || len == 0)
873 return 1 << 16;
874 return len;
875}
876
877static inline __le16 ext4_rec_len_to_disk(unsigned len)
878{
879 if (len == (1 << 16))
880 return cpu_to_le16(EXT4_MAX_REC_LEN);
881 else if (len > (1 << 16))
882 BUG();
883 return cpu_to_le16(len);
884}
885
886/* 893/*
887 * Hash Tree Directory indexing 894 * Hash Tree Directory indexing
888 * (c) Daniel Phillips, 2001 895 * (c) Daniel Phillips, 2001
@@ -970,22 +977,6 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
970 977
971extern struct proc_dir_entry *ext4_proc_root; 978extern struct proc_dir_entry *ext4_proc_root;
972 979
973#ifdef CONFIG_PROC_FS
974extern const struct file_operations ext4_ui_proc_fops;
975
976#define EXT4_PROC_HANDLER(name, var) \
977do { \
978 proc = proc_create_data(name, mode, sbi->s_proc, \
979 &ext4_ui_proc_fops, &sbi->s_##var); \
980 if (proc == NULL) { \
981 printk(KERN_ERR "EXT4-fs: can't create %s\n", name); \
982 goto err_out; \
983 } \
984} while (0)
985#else
986#define EXT4_PROC_HANDLER(name, var)
987#endif
988
989/* 980/*
990 * Function prototypes 981 * Function prototypes
991 */ 982 */
@@ -1092,13 +1083,14 @@ extern int ext4_can_truncate(struct inode *inode);
1092extern void ext4_truncate(struct inode *); 1083extern void ext4_truncate(struct inode *);
1093extern void ext4_set_inode_flags(struct inode *); 1084extern void ext4_set_inode_flags(struct inode *);
1094extern void ext4_get_inode_flags(struct ext4_inode_info *); 1085extern void ext4_get_inode_flags(struct ext4_inode_info *);
1086extern int ext4_alloc_da_blocks(struct inode *inode);
1095extern void ext4_set_aops(struct inode *inode); 1087extern void ext4_set_aops(struct inode *inode);
1096extern int ext4_writepage_trans_blocks(struct inode *); 1088extern int ext4_writepage_trans_blocks(struct inode *);
1097extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks); 1089extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks);
1098extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); 1090extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
1099extern int ext4_block_truncate_page(handle_t *handle, 1091extern int ext4_block_truncate_page(handle_t *handle,
1100 struct address_space *mapping, loff_t from); 1092 struct address_space *mapping, loff_t from);
1101extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page); 1093extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
1102extern qsize_t ext4_get_reserved_space(struct inode *inode); 1094extern qsize_t ext4_get_reserved_space(struct inode *inode);
1103 1095
1104/* ioctl.c */ 1096/* ioctl.c */
@@ -1107,7 +1099,10 @@ extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long);
1107 1099
1108/* migrate.c */ 1100/* migrate.c */
1109extern int ext4_ext_migrate(struct inode *); 1101extern int ext4_ext_migrate(struct inode *);
1102
1110/* namei.c */ 1103/* namei.c */
1104extern unsigned int ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize);
1105extern __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize);
1111extern int ext4_orphan_add(handle_t *, struct inode *); 1106extern int ext4_orphan_add(handle_t *, struct inode *);
1112extern int ext4_orphan_del(handle_t *, struct inode *); 1107extern int ext4_orphan_del(handle_t *, struct inode *);
1113extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, 1108extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 18cb67b2cbbc..f0c3ec85bd48 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -241,5 +241,6 @@ extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *,
241extern int ext4_ext_search_right(struct inode *, struct ext4_ext_path *, 241extern int ext4_ext_search_right(struct inode *, struct ext4_ext_path *,
242 ext4_lblk_t *, ext4_fsblk_t *); 242 ext4_lblk_t *, ext4_fsblk_t *);
243extern void ext4_ext_drop_refs(struct ext4_ext_path *); 243extern void ext4_ext_drop_refs(struct ext4_ext_path *);
244extern int ext4_ext_check_inode(struct inode *inode);
244#endif /* _EXT4_EXTENTS */ 245#endif /* _EXT4_EXTENTS */
245 246
diff --git a/fs/ext4/ext4_i.h b/fs/ext4/ext4_i.h
index e69acc16f5c4..4ce2187123aa 100644
--- a/fs/ext4/ext4_i.h
+++ b/fs/ext4/ext4_i.h
@@ -33,9 +33,6 @@ typedef __u32 ext4_lblk_t;
33/* data type for block group number */ 33/* data type for block group number */
34typedef unsigned int ext4_group_t; 34typedef unsigned int ext4_group_t;
35 35
36#define rsv_start rsv_window._rsv_start
37#define rsv_end rsv_window._rsv_end
38
39/* 36/*
40 * storage for cached extent 37 * storage for cached extent
41 */ 38 */
@@ -125,6 +122,9 @@ struct ext4_inode_info {
125 struct list_head i_prealloc_list; 122 struct list_head i_prealloc_list;
126 spinlock_t i_prealloc_lock; 123 spinlock_t i_prealloc_lock;
127 124
125 /* ialloc */
126 ext4_group_t i_last_alloc_group;
127
128 /* allocation reservation info for delalloc */ 128 /* allocation reservation info for delalloc */
129 unsigned int i_reserved_data_blocks; 129 unsigned int i_reserved_data_blocks;
130 unsigned int i_reserved_meta_blocks; 130 unsigned int i_reserved_meta_blocks;
diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h
index 039b6ea1a042..57b71fefbccf 100644
--- a/fs/ext4/ext4_sb.h
+++ b/fs/ext4/ext4_sb.h
@@ -62,12 +62,10 @@ struct ext4_sb_info {
62 struct percpu_counter s_freeinodes_counter; 62 struct percpu_counter s_freeinodes_counter;
63 struct percpu_counter s_dirs_counter; 63 struct percpu_counter s_dirs_counter;
64 struct percpu_counter s_dirtyblocks_counter; 64 struct percpu_counter s_dirtyblocks_counter;
65 struct blockgroup_lock s_blockgroup_lock; 65 struct blockgroup_lock *s_blockgroup_lock;
66 struct proc_dir_entry *s_proc; 66 struct proc_dir_entry *s_proc;
67 67 struct kobject s_kobj;
68 /* root of the per fs reservation window tree */ 68 struct completion s_kobj_unregister;
69 spinlock_t s_rsv_window_lock;
70 struct rb_root s_rsv_window_root;
71 69
72 /* Journaling */ 70 /* Journaling */
73 struct inode *s_journal_inode; 71 struct inode *s_journal_inode;
@@ -146,6 +144,10 @@ struct ext4_sb_info {
146 /* locality groups */ 144 /* locality groups */
147 struct ext4_locality_group *s_locality_groups; 145 struct ext4_locality_group *s_locality_groups;
148 146
147 /* for write statistics */
148 unsigned long s_sectors_written_start;
149 u64 s_kbytes_written;
150
149 unsigned int s_log_groups_per_flex; 151 unsigned int s_log_groups_per_flex;
150 struct flex_groups *s_flex_groups; 152 struct flex_groups *s_flex_groups;
151}; 153};
@@ -153,7 +155,7 @@ struct ext4_sb_info {
153static inline spinlock_t * 155static inline spinlock_t *
154sb_bgl_lock(struct ext4_sb_info *sbi, unsigned int block_group) 156sb_bgl_lock(struct ext4_sb_info *sbi, unsigned int block_group)
155{ 157{
156 return bgl_lock_ptr(&sbi->s_blockgroup_lock, block_group); 158 return bgl_lock_ptr(sbi->s_blockgroup_lock, block_group);
157} 159}
158 160
159#endif /* _EXT4_SB */ 161#endif /* _EXT4_SB */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index e0aa4fe4f596..ac77d8b8251d 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -152,6 +152,8 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
152 ext4_fsblk_t bg_start; 152 ext4_fsblk_t bg_start;
153 ext4_fsblk_t last_block; 153 ext4_fsblk_t last_block;
154 ext4_grpblk_t colour; 154 ext4_grpblk_t colour;
155 ext4_group_t block_group;
156 int flex_size = ext4_flex_bg_size(EXT4_SB(inode->i_sb));
155 int depth; 157 int depth;
156 158
157 if (path) { 159 if (path) {
@@ -170,10 +172,31 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
170 } 172 }
171 173
172 /* OK. use inode's group */ 174 /* OK. use inode's group */
173 bg_start = (ei->i_block_group * EXT4_BLOCKS_PER_GROUP(inode->i_sb)) + 175 block_group = ei->i_block_group;
176 if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) {
177 /*
178 * If there are at least EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME
179 * block groups per flexgroup, reserve the first block
180 * group for directories and special files. Regular
181 * files will start at the second block group. This
182 * tends to speed up directory access and improves
183 * fsck times.
184 */
185 block_group &= ~(flex_size-1);
186 if (S_ISREG(inode->i_mode))
187 block_group++;
188 }
189 bg_start = (block_group * EXT4_BLOCKS_PER_GROUP(inode->i_sb)) +
174 le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_first_data_block); 190 le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_first_data_block);
175 last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1; 191 last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1;
176 192
193 /*
194 * If we are doing delayed allocation, we don't need take
195 * colour into account.
196 */
197 if (test_opt(inode->i_sb, DELALLOC))
198 return bg_start;
199
177 if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block) 200 if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block)
178 colour = (current->pid % 16) * 201 colour = (current->pid % 16) *
179 (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16); 202 (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16);
@@ -301,7 +324,64 @@ ext4_ext_max_entries(struct inode *inode, int depth)
301 return max; 324 return max;
302} 325}
303 326
304static int __ext4_ext_check_header(const char *function, struct inode *inode, 327static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
328{
329 ext4_fsblk_t block = ext_pblock(ext);
330 int len = ext4_ext_get_actual_len(ext);
331 struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
332 if (unlikely(block < le32_to_cpu(es->s_first_data_block) ||
333 ((block + len) > ext4_blocks_count(es))))
334 return 0;
335 else
336 return 1;
337}
338
339static int ext4_valid_extent_idx(struct inode *inode,
340 struct ext4_extent_idx *ext_idx)
341{
342 ext4_fsblk_t block = idx_pblock(ext_idx);
343 struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
344 if (unlikely(block < le32_to_cpu(es->s_first_data_block) ||
345 (block > ext4_blocks_count(es))))
346 return 0;
347 else
348 return 1;
349}
350
351static int ext4_valid_extent_entries(struct inode *inode,
352 struct ext4_extent_header *eh,
353 int depth)
354{
355 struct ext4_extent *ext;
356 struct ext4_extent_idx *ext_idx;
357 unsigned short entries;
358 if (eh->eh_entries == 0)
359 return 1;
360
361 entries = le16_to_cpu(eh->eh_entries);
362
363 if (depth == 0) {
364 /* leaf entries */
365 ext = EXT_FIRST_EXTENT(eh);
366 while (entries) {
367 if (!ext4_valid_extent(inode, ext))
368 return 0;
369 ext++;
370 entries--;
371 }
372 } else {
373 ext_idx = EXT_FIRST_INDEX(eh);
374 while (entries) {
375 if (!ext4_valid_extent_idx(inode, ext_idx))
376 return 0;
377 ext_idx++;
378 entries--;
379 }
380 }
381 return 1;
382}
383
384static int __ext4_ext_check(const char *function, struct inode *inode,
305 struct ext4_extent_header *eh, 385 struct ext4_extent_header *eh,
306 int depth) 386 int depth)
307{ 387{
@@ -329,11 +409,15 @@ static int __ext4_ext_check_header(const char *function, struct inode *inode,
329 error_msg = "invalid eh_entries"; 409 error_msg = "invalid eh_entries";
330 goto corrupted; 410 goto corrupted;
331 } 411 }
412 if (!ext4_valid_extent_entries(inode, eh, depth)) {
413 error_msg = "invalid extent entries";
414 goto corrupted;
415 }
332 return 0; 416 return 0;
333 417
334corrupted: 418corrupted:
335 ext4_error(inode->i_sb, function, 419 ext4_error(inode->i_sb, function,
336 "bad header in inode #%lu: %s - magic %x, " 420 "bad header/extent in inode #%lu: %s - magic %x, "
337 "entries %u, max %u(%u), depth %u(%u)", 421 "entries %u, max %u(%u), depth %u(%u)",
338 inode->i_ino, error_msg, le16_to_cpu(eh->eh_magic), 422 inode->i_ino, error_msg, le16_to_cpu(eh->eh_magic),
339 le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max), 423 le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max),
@@ -342,8 +426,13 @@ corrupted:
342 return -EIO; 426 return -EIO;
343} 427}
344 428
345#define ext4_ext_check_header(inode, eh, depth) \ 429#define ext4_ext_check(inode, eh, depth) \
346 __ext4_ext_check_header(__func__, inode, eh, depth) 430 __ext4_ext_check(__func__, inode, eh, depth)
431
432int ext4_ext_check_inode(struct inode *inode)
433{
434 return ext4_ext_check(inode, ext_inode_hdr(inode), ext_depth(inode));
435}
347 436
348#ifdef EXT_DEBUG 437#ifdef EXT_DEBUG
349static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path) 438static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
@@ -547,9 +636,6 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
547 636
548 eh = ext_inode_hdr(inode); 637 eh = ext_inode_hdr(inode);
549 depth = ext_depth(inode); 638 depth = ext_depth(inode);
550 if (ext4_ext_check_header(inode, eh, depth))
551 return ERR_PTR(-EIO);
552
553 639
554 /* account possible depth increase */ 640 /* account possible depth increase */
555 if (!path) { 641 if (!path) {
@@ -565,6 +651,8 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
565 i = depth; 651 i = depth;
566 /* walk through the tree */ 652 /* walk through the tree */
567 while (i) { 653 while (i) {
654 int need_to_validate = 0;
655
568 ext_debug("depth %d: num %d, max %d\n", 656 ext_debug("depth %d: num %d, max %d\n",
569 ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); 657 ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
570 658
@@ -573,10 +661,17 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
573 path[ppos].p_depth = i; 661 path[ppos].p_depth = i;
574 path[ppos].p_ext = NULL; 662 path[ppos].p_ext = NULL;
575 663
576 bh = sb_bread(inode->i_sb, path[ppos].p_block); 664 bh = sb_getblk(inode->i_sb, path[ppos].p_block);
577 if (!bh) 665 if (unlikely(!bh))
578 goto err; 666 goto err;
579 667 if (!bh_uptodate_or_lock(bh)) {
668 if (bh_submit_read(bh) < 0) {
669 put_bh(bh);
670 goto err;
671 }
672 /* validate the extent entries */
673 need_to_validate = 1;
674 }
580 eh = ext_block_hdr(bh); 675 eh = ext_block_hdr(bh);
581 ppos++; 676 ppos++;
582 BUG_ON(ppos > depth); 677 BUG_ON(ppos > depth);
@@ -584,7 +679,7 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
584 path[ppos].p_hdr = eh; 679 path[ppos].p_hdr = eh;
585 i--; 680 i--;
586 681
587 if (ext4_ext_check_header(inode, eh, i)) 682 if (need_to_validate && ext4_ext_check(inode, eh, i))
588 goto err; 683 goto err;
589 } 684 }
590 685
@@ -1181,7 +1276,7 @@ got_index:
1181 return -EIO; 1276 return -EIO;
1182 eh = ext_block_hdr(bh); 1277 eh = ext_block_hdr(bh);
1183 /* subtract from p_depth to get proper eh_depth */ 1278 /* subtract from p_depth to get proper eh_depth */
1184 if (ext4_ext_check_header(inode, eh, path->p_depth - depth)) { 1279 if (ext4_ext_check(inode, eh, path->p_depth - depth)) {
1185 put_bh(bh); 1280 put_bh(bh);
1186 return -EIO; 1281 return -EIO;
1187 } 1282 }
@@ -1194,7 +1289,7 @@ got_index:
1194 if (bh == NULL) 1289 if (bh == NULL)
1195 return -EIO; 1290 return -EIO;
1196 eh = ext_block_hdr(bh); 1291 eh = ext_block_hdr(bh);
1197 if (ext4_ext_check_header(inode, eh, path->p_depth - depth)) { 1292 if (ext4_ext_check(inode, eh, path->p_depth - depth)) {
1198 put_bh(bh); 1293 put_bh(bh);
1199 return -EIO; 1294 return -EIO;
1200 } 1295 }
@@ -2137,7 +2232,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
2137 return -ENOMEM; 2232 return -ENOMEM;
2138 } 2233 }
2139 path[0].p_hdr = ext_inode_hdr(inode); 2234 path[0].p_hdr = ext_inode_hdr(inode);
2140 if (ext4_ext_check_header(inode, path[0].p_hdr, depth)) { 2235 if (ext4_ext_check(inode, path[0].p_hdr, depth)) {
2141 err = -EIO; 2236 err = -EIO;
2142 goto out; 2237 goto out;
2143 } 2238 }
@@ -2191,7 +2286,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
2191 err = -EIO; 2286 err = -EIO;
2192 break; 2287 break;
2193 } 2288 }
2194 if (ext4_ext_check_header(inode, ext_block_hdr(bh), 2289 if (ext4_ext_check(inode, ext_block_hdr(bh),
2195 depth - i - 1)) { 2290 depth - i - 1)) {
2196 err = -EIO; 2291 err = -EIO;
2197 break; 2292 break;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index f731cb545a03..588af8c77246 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -33,9 +33,14 @@
33 */ 33 */
34static int ext4_release_file(struct inode *inode, struct file *filp) 34static int ext4_release_file(struct inode *inode, struct file *filp)
35{ 35{
36 if (EXT4_I(inode)->i_state & EXT4_STATE_DA_ALLOC_CLOSE) {
37 ext4_alloc_da_blocks(inode);
38 EXT4_I(inode)->i_state &= ~EXT4_STATE_DA_ALLOC_CLOSE;
39 }
36 /* if we are the last writer on the inode, drop the block reservation */ 40 /* if we are the last writer on the inode, drop the block reservation */
37 if ((filp->f_mode & FMODE_WRITE) && 41 if ((filp->f_mode & FMODE_WRITE) &&
38 (atomic_read(&inode->i_writecount) == 1)) 42 (atomic_read(&inode->i_writecount) == 1) &&
43 !EXT4_I(inode)->i_reserved_data_blocks)
39 { 44 {
40 down_write(&EXT4_I(inode)->i_data_sem); 45 down_write(&EXT4_I(inode)->i_data_sem);
41 ext4_discard_preallocations(inode); 46 ext4_discard_preallocations(inode);
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index fb51b40e3e8f..47b84e8df568 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -189,7 +189,6 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
189 struct ext4_super_block *es; 189 struct ext4_super_block *es;
190 struct ext4_sb_info *sbi; 190 struct ext4_sb_info *sbi;
191 int fatal = 0, err, count, cleared; 191 int fatal = 0, err, count, cleared;
192 ext4_group_t flex_group;
193 192
194 if (atomic_read(&inode->i_count) > 1) { 193 if (atomic_read(&inode->i_count) > 1) {
195 printk(KERN_ERR "ext4_free_inode: inode has count=%d\n", 194 printk(KERN_ERR "ext4_free_inode: inode has count=%d\n",
@@ -268,6 +267,13 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
268 if (is_directory) { 267 if (is_directory) {
269 count = ext4_used_dirs_count(sb, gdp) - 1; 268 count = ext4_used_dirs_count(sb, gdp) - 1;
270 ext4_used_dirs_set(sb, gdp, count); 269 ext4_used_dirs_set(sb, gdp, count);
270 if (sbi->s_log_groups_per_flex) {
271 ext4_group_t f;
272
273 f = ext4_flex_group(sbi, block_group);
274 atomic_dec(&sbi->s_flex_groups[f].free_inodes);
275 }
276
271 } 277 }
272 gdp->bg_checksum = ext4_group_desc_csum(sbi, 278 gdp->bg_checksum = ext4_group_desc_csum(sbi,
273 block_group, gdp); 279 block_group, gdp);
@@ -277,10 +283,10 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
277 percpu_counter_dec(&sbi->s_dirs_counter); 283 percpu_counter_dec(&sbi->s_dirs_counter);
278 284
279 if (sbi->s_log_groups_per_flex) { 285 if (sbi->s_log_groups_per_flex) {
280 flex_group = ext4_flex_group(sbi, block_group); 286 ext4_group_t f;
281 spin_lock(sb_bgl_lock(sbi, flex_group)); 287
282 sbi->s_flex_groups[flex_group].free_inodes++; 288 f = ext4_flex_group(sbi, block_group);
283 spin_unlock(sb_bgl_lock(sbi, flex_group)); 289 atomic_inc(&sbi->s_flex_groups[f].free_inodes);
284 } 290 }
285 } 291 }
286 BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata"); 292 BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata");
@@ -360,9 +366,9 @@ static int find_group_flex(struct super_block *sb, struct inode *parent,
360 sbi->s_log_groups_per_flex; 366 sbi->s_log_groups_per_flex;
361 367
362find_close_to_parent: 368find_close_to_parent:
363 flexbg_free_blocks = flex_group[best_flex].free_blocks; 369 flexbg_free_blocks = atomic_read(&flex_group[best_flex].free_blocks);
364 flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex; 370 flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex;
365 if (flex_group[best_flex].free_inodes && 371 if (atomic_read(&flex_group[best_flex].free_inodes) &&
366 flex_freeb_ratio > free_block_ratio) 372 flex_freeb_ratio > free_block_ratio)
367 goto found_flexbg; 373 goto found_flexbg;
368 374
@@ -375,24 +381,24 @@ find_close_to_parent:
375 if (i == parent_fbg_group || i == parent_fbg_group - 1) 381 if (i == parent_fbg_group || i == parent_fbg_group - 1)
376 continue; 382 continue;
377 383
378 flexbg_free_blocks = flex_group[i].free_blocks; 384 flexbg_free_blocks = atomic_read(&flex_group[i].free_blocks);
379 flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex; 385 flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex;
380 386
381 if (flex_freeb_ratio > free_block_ratio && 387 if (flex_freeb_ratio > free_block_ratio &&
382 flex_group[i].free_inodes) { 388 (atomic_read(&flex_group[i].free_inodes))) {
383 best_flex = i; 389 best_flex = i;
384 goto found_flexbg; 390 goto found_flexbg;
385 } 391 }
386 392
387 if (flex_group[best_flex].free_inodes == 0 || 393 if ((atomic_read(&flex_group[best_flex].free_inodes) == 0) ||
388 (flex_group[i].free_blocks > 394 ((atomic_read(&flex_group[i].free_blocks) >
389 flex_group[best_flex].free_blocks && 395 atomic_read(&flex_group[best_flex].free_blocks)) &&
390 flex_group[i].free_inodes)) 396 atomic_read(&flex_group[i].free_inodes)))
391 best_flex = i; 397 best_flex = i;
392 } 398 }
393 399
394 if (!flex_group[best_flex].free_inodes || 400 if (!atomic_read(&flex_group[best_flex].free_inodes) ||
395 !flex_group[best_flex].free_blocks) 401 !atomic_read(&flex_group[best_flex].free_blocks))
396 return -1; 402 return -1;
397 403
398found_flexbg: 404found_flexbg:
@@ -410,6 +416,42 @@ out:
410 return 0; 416 return 0;
411} 417}
412 418
419struct orlov_stats {
420 __u32 free_inodes;
421 __u32 free_blocks;
422 __u32 used_dirs;
423};
424
425/*
426 * Helper function for Orlov's allocator; returns critical information
427 * for a particular block group or flex_bg. If flex_size is 1, then g
428 * is a block group number; otherwise it is flex_bg number.
429 */
430void get_orlov_stats(struct super_block *sb, ext4_group_t g,
431 int flex_size, struct orlov_stats *stats)
432{
433 struct ext4_group_desc *desc;
434 struct flex_groups *flex_group = EXT4_SB(sb)->s_flex_groups;
435
436 if (flex_size > 1) {
437 stats->free_inodes = atomic_read(&flex_group[g].free_inodes);
438 stats->free_blocks = atomic_read(&flex_group[g].free_blocks);
439 stats->used_dirs = atomic_read(&flex_group[g].used_dirs);
440 return;
441 }
442
443 desc = ext4_get_group_desc(sb, g, NULL);
444 if (desc) {
445 stats->free_inodes = ext4_free_inodes_count(sb, desc);
446 stats->free_blocks = ext4_free_blks_count(sb, desc);
447 stats->used_dirs = ext4_used_dirs_count(sb, desc);
448 } else {
449 stats->free_inodes = 0;
450 stats->free_blocks = 0;
451 stats->used_dirs = 0;
452 }
453}
454
413/* 455/*
414 * Orlov's allocator for directories. 456 * Orlov's allocator for directories.
415 * 457 *
@@ -425,35 +467,34 @@ out:
425 * it has too many directories already (max_dirs) or 467 * it has too many directories already (max_dirs) or
426 * it has too few free inodes left (min_inodes) or 468 * it has too few free inodes left (min_inodes) or
427 * it has too few free blocks left (min_blocks) or 469 * it has too few free blocks left (min_blocks) or
428 * it's already running too large debt (max_debt).
429 * Parent's group is preferred, if it doesn't satisfy these 470 * Parent's group is preferred, if it doesn't satisfy these
430 * conditions we search cyclically through the rest. If none 471 * conditions we search cyclically through the rest. If none
431 * of the groups look good we just look for a group with more 472 * of the groups look good we just look for a group with more
432 * free inodes than average (starting at parent's group). 473 * free inodes than average (starting at parent's group).
433 *
434 * Debt is incremented each time we allocate a directory and decremented
435 * when we allocate an inode, within 0--255.
436 */ 474 */
437 475
438#define INODE_COST 64
439#define BLOCK_COST 256
440
441static int find_group_orlov(struct super_block *sb, struct inode *parent, 476static int find_group_orlov(struct super_block *sb, struct inode *parent,
442 ext4_group_t *group) 477 ext4_group_t *group, int mode)
443{ 478{
444 ext4_group_t parent_group = EXT4_I(parent)->i_block_group; 479 ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
445 struct ext4_sb_info *sbi = EXT4_SB(sb); 480 struct ext4_sb_info *sbi = EXT4_SB(sb);
446 struct ext4_super_block *es = sbi->s_es;
447 ext4_group_t ngroups = sbi->s_groups_count; 481 ext4_group_t ngroups = sbi->s_groups_count;
448 int inodes_per_group = EXT4_INODES_PER_GROUP(sb); 482 int inodes_per_group = EXT4_INODES_PER_GROUP(sb);
449 unsigned int freei, avefreei; 483 unsigned int freei, avefreei;
450 ext4_fsblk_t freeb, avefreeb; 484 ext4_fsblk_t freeb, avefreeb;
451 ext4_fsblk_t blocks_per_dir;
452 unsigned int ndirs; 485 unsigned int ndirs;
453 int max_debt, max_dirs, min_inodes; 486 int max_dirs, min_inodes;
454 ext4_grpblk_t min_blocks; 487 ext4_grpblk_t min_blocks;
455 ext4_group_t i; 488 ext4_group_t i, grp, g;
456 struct ext4_group_desc *desc; 489 struct ext4_group_desc *desc;
490 struct orlov_stats stats;
491 int flex_size = ext4_flex_bg_size(sbi);
492
493 if (flex_size > 1) {
494 ngroups = (ngroups + flex_size - 1) >>
495 sbi->s_log_groups_per_flex;
496 parent_group >>= sbi->s_log_groups_per_flex;
497 }
457 498
458 freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter); 499 freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter);
459 avefreei = freei / ngroups; 500 avefreei = freei / ngroups;
@@ -462,71 +503,97 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
462 do_div(avefreeb, ngroups); 503 do_div(avefreeb, ngroups);
463 ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter); 504 ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter);
464 505
465 if ((parent == sb->s_root->d_inode) || 506 if (S_ISDIR(mode) &&
466 (EXT4_I(parent)->i_flags & EXT4_TOPDIR_FL)) { 507 ((parent == sb->s_root->d_inode) ||
508 (EXT4_I(parent)->i_flags & EXT4_TOPDIR_FL))) {
467 int best_ndir = inodes_per_group; 509 int best_ndir = inodes_per_group;
468 ext4_group_t grp;
469 int ret = -1; 510 int ret = -1;
470 511
471 get_random_bytes(&grp, sizeof(grp)); 512 get_random_bytes(&grp, sizeof(grp));
472 parent_group = (unsigned)grp % ngroups; 513 parent_group = (unsigned)grp % ngroups;
473 for (i = 0; i < ngroups; i++) { 514 for (i = 0; i < ngroups; i++) {
474 grp = (parent_group + i) % ngroups; 515 g = (parent_group + i) % ngroups;
475 desc = ext4_get_group_desc(sb, grp, NULL); 516 get_orlov_stats(sb, g, flex_size, &stats);
476 if (!desc || !ext4_free_inodes_count(sb, desc)) 517 if (!stats.free_inodes)
477 continue; 518 continue;
478 if (ext4_used_dirs_count(sb, desc) >= best_ndir) 519 if (stats.used_dirs >= best_ndir)
479 continue; 520 continue;
480 if (ext4_free_inodes_count(sb, desc) < avefreei) 521 if (stats.free_inodes < avefreei)
481 continue; 522 continue;
482 if (ext4_free_blks_count(sb, desc) < avefreeb) 523 if (stats.free_blocks < avefreeb)
483 continue; 524 continue;
484 *group = grp; 525 grp = g;
485 ret = 0; 526 ret = 0;
486 best_ndir = ext4_used_dirs_count(sb, desc); 527 best_ndir = stats.used_dirs;
528 }
529 if (ret)
530 goto fallback;
531 found_flex_bg:
532 if (flex_size == 1) {
533 *group = grp;
534 return 0;
535 }
536
537 /*
538 * We pack inodes at the beginning of the flexgroup's
539 * inode tables. Block allocation decisions will do
540 * something similar, although regular files will
541 * start at 2nd block group of the flexgroup. See
542 * ext4_ext_find_goal() and ext4_find_near().
543 */
544 grp *= flex_size;
545 for (i = 0; i < flex_size; i++) {
546 if (grp+i >= sbi->s_groups_count)
547 break;
548 desc = ext4_get_group_desc(sb, grp+i, NULL);
549 if (desc && ext4_free_inodes_count(sb, desc)) {
550 *group = grp+i;
551 return 0;
552 }
487 } 553 }
488 if (ret == 0)
489 return ret;
490 goto fallback; 554 goto fallback;
491 } 555 }
492 556
493 blocks_per_dir = ext4_blocks_count(es) - freeb;
494 do_div(blocks_per_dir, ndirs);
495
496 max_dirs = ndirs / ngroups + inodes_per_group / 16; 557 max_dirs = ndirs / ngroups + inodes_per_group / 16;
497 min_inodes = avefreei - inodes_per_group / 4; 558 min_inodes = avefreei - inodes_per_group*flex_size / 4;
498 min_blocks = avefreeb - EXT4_BLOCKS_PER_GROUP(sb) / 4; 559 if (min_inodes < 1)
499 560 min_inodes = 1;
500 max_debt = EXT4_BLOCKS_PER_GROUP(sb); 561 min_blocks = avefreeb - EXT4_BLOCKS_PER_GROUP(sb)*flex_size / 4;
501 max_debt /= max_t(int, blocks_per_dir, BLOCK_COST); 562
502 if (max_debt * INODE_COST > inodes_per_group) 563 /*
503 max_debt = inodes_per_group / INODE_COST; 564 * Start looking in the flex group where we last allocated an
504 if (max_debt > 255) 565 * inode for this parent directory
505 max_debt = 255; 566 */
506 if (max_debt == 0) 567 if (EXT4_I(parent)->i_last_alloc_group != ~0) {
507 max_debt = 1; 568 parent_group = EXT4_I(parent)->i_last_alloc_group;
569 if (flex_size > 1)
570 parent_group >>= sbi->s_log_groups_per_flex;
571 }
508 572
509 for (i = 0; i < ngroups; i++) { 573 for (i = 0; i < ngroups; i++) {
510 *group = (parent_group + i) % ngroups; 574 grp = (parent_group + i) % ngroups;
511 desc = ext4_get_group_desc(sb, *group, NULL); 575 get_orlov_stats(sb, grp, flex_size, &stats);
512 if (!desc || !ext4_free_inodes_count(sb, desc)) 576 if (stats.used_dirs >= max_dirs)
513 continue;
514 if (ext4_used_dirs_count(sb, desc) >= max_dirs)
515 continue; 577 continue;
516 if (ext4_free_inodes_count(sb, desc) < min_inodes) 578 if (stats.free_inodes < min_inodes)
517 continue; 579 continue;
518 if (ext4_free_blks_count(sb, desc) < min_blocks) 580 if (stats.free_blocks < min_blocks)
519 continue; 581 continue;
520 return 0; 582 goto found_flex_bg;
521 } 583 }
522 584
523fallback: 585fallback:
586 ngroups = sbi->s_groups_count;
587 avefreei = freei / ngroups;
588 parent_group = EXT4_I(parent)->i_block_group;
524 for (i = 0; i < ngroups; i++) { 589 for (i = 0; i < ngroups; i++) {
525 *group = (parent_group + i) % ngroups; 590 grp = (parent_group + i) % ngroups;
526 desc = ext4_get_group_desc(sb, *group, NULL); 591 desc = ext4_get_group_desc(sb, grp, NULL);
527 if (desc && ext4_free_inodes_count(sb, desc) && 592 if (desc && ext4_free_inodes_count(sb, desc) &&
528 ext4_free_inodes_count(sb, desc) >= avefreei) 593 ext4_free_inodes_count(sb, desc) >= avefreei) {
594 *group = grp;
529 return 0; 595 return 0;
596 }
530 } 597 }
531 598
532 if (avefreei) { 599 if (avefreei) {
@@ -542,12 +609,51 @@ fallback:
542} 609}
543 610
544static int find_group_other(struct super_block *sb, struct inode *parent, 611static int find_group_other(struct super_block *sb, struct inode *parent,
545 ext4_group_t *group) 612 ext4_group_t *group, int mode)
546{ 613{
547 ext4_group_t parent_group = EXT4_I(parent)->i_block_group; 614 ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
548 ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; 615 ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
549 struct ext4_group_desc *desc; 616 struct ext4_group_desc *desc;
550 ext4_group_t i; 617 ext4_group_t i, last;
618 int flex_size = ext4_flex_bg_size(EXT4_SB(sb));
619
620 /*
621 * Try to place the inode is the same flex group as its
622 * parent. If we can't find space, use the Orlov algorithm to
623 * find another flex group, and store that information in the
624 * parent directory's inode information so that use that flex
625 * group for future allocations.
626 */
627 if (flex_size > 1) {
628 int retry = 0;
629
630 try_again:
631 parent_group &= ~(flex_size-1);
632 last = parent_group + flex_size;
633 if (last > ngroups)
634 last = ngroups;
635 for (i = parent_group; i < last; i++) {
636 desc = ext4_get_group_desc(sb, i, NULL);
637 if (desc && ext4_free_inodes_count(sb, desc)) {
638 *group = i;
639 return 0;
640 }
641 }
642 if (!retry && EXT4_I(parent)->i_last_alloc_group != ~0) {
643 retry = 1;
644 parent_group = EXT4_I(parent)->i_last_alloc_group;
645 goto try_again;
646 }
647 /*
648 * If this didn't work, use the Orlov search algorithm
649 * to find a new flex group; we pass in the mode to
650 * avoid the topdir algorithms.
651 */
652 *group = parent_group + flex_size;
653 if (*group > ngroups)
654 *group = 0;
655 return find_group_orlov(sb, parent, group, mode);
656 }
551 657
552 /* 658 /*
553 * Try to place the inode in its parent directory 659 * Try to place the inode in its parent directory
@@ -665,6 +771,11 @@ static int ext4_claim_inode(struct super_block *sb,
665 if (S_ISDIR(mode)) { 771 if (S_ISDIR(mode)) {
666 count = ext4_used_dirs_count(sb, gdp) + 1; 772 count = ext4_used_dirs_count(sb, gdp) + 1;
667 ext4_used_dirs_set(sb, gdp, count); 773 ext4_used_dirs_set(sb, gdp, count);
774 if (sbi->s_log_groups_per_flex) {
775 ext4_group_t f = ext4_flex_group(sbi, group);
776
777 atomic_inc(&sbi->s_flex_groups[f].free_inodes);
778 }
668 } 779 }
669 gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); 780 gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
670err_ret: 781err_ret:
@@ -716,10 +827,10 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
716 sbi = EXT4_SB(sb); 827 sbi = EXT4_SB(sb);
717 es = sbi->s_es; 828 es = sbi->s_es;
718 829
719 if (sbi->s_log_groups_per_flex) { 830 if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) {
720 ret2 = find_group_flex(sb, dir, &group); 831 ret2 = find_group_flex(sb, dir, &group);
721 if (ret2 == -1) { 832 if (ret2 == -1) {
722 ret2 = find_group_other(sb, dir, &group); 833 ret2 = find_group_other(sb, dir, &group, mode);
723 if (ret2 == 0 && once) 834 if (ret2 == 0 && once)
724 once = 0; 835 once = 0;
725 printk(KERN_NOTICE "ext4: find_group_flex " 836 printk(KERN_NOTICE "ext4: find_group_flex "
@@ -733,11 +844,12 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
733 if (test_opt(sb, OLDALLOC)) 844 if (test_opt(sb, OLDALLOC))
734 ret2 = find_group_dir(sb, dir, &group); 845 ret2 = find_group_dir(sb, dir, &group);
735 else 846 else
736 ret2 = find_group_orlov(sb, dir, &group); 847 ret2 = find_group_orlov(sb, dir, &group, mode);
737 } else 848 } else
738 ret2 = find_group_other(sb, dir, &group); 849 ret2 = find_group_other(sb, dir, &group, mode);
739 850
740got_group: 851got_group:
852 EXT4_I(dir)->i_last_alloc_group = group;
741 err = -ENOSPC; 853 err = -ENOSPC;
742 if (ret2 == -1) 854 if (ret2 == -1)
743 goto out; 855 goto out;
@@ -858,9 +970,7 @@ got:
858 970
859 if (sbi->s_log_groups_per_flex) { 971 if (sbi->s_log_groups_per_flex) {
860 flex_group = ext4_flex_group(sbi, group); 972 flex_group = ext4_flex_group(sbi, group);
861 spin_lock(sb_bgl_lock(sbi, flex_group)); 973 atomic_dec(&sbi->s_flex_groups[flex_group].free_inodes);
862 sbi->s_flex_groups[flex_group].free_inodes--;
863 spin_unlock(sb_bgl_lock(sbi, flex_group));
864 } 974 }
865 975
866 inode->i_uid = current_fsuid(); 976 inode->i_uid = current_fsuid();
@@ -885,19 +995,16 @@ got:
885 ei->i_disksize = 0; 995 ei->i_disksize = 0;
886 996
887 /* 997 /*
888 * Don't inherit extent flag from directory. We set extent flag on 998 * Don't inherit extent flag from directory, amongst others. We set
889 * newly created directory and file only if -o extent mount option is 999 * extent flag on newly created directory and file only if -o extent
890 * specified 1000 * mount option is specified
891 */ 1001 */
892 ei->i_flags = EXT4_I(dir)->i_flags & ~(EXT4_INDEX_FL|EXT4_EXTENTS_FL); 1002 ei->i_flags =
893 if (S_ISLNK(mode)) 1003 ext4_mask_flags(mode, EXT4_I(dir)->i_flags & EXT4_FL_INHERITED);
894 ei->i_flags &= ~(EXT4_IMMUTABLE_FL|EXT4_APPEND_FL);
895 /* dirsync only applies to directories */
896 if (!S_ISDIR(mode))
897 ei->i_flags &= ~EXT4_DIRSYNC_FL;
898 ei->i_file_acl = 0; 1004 ei->i_file_acl = 0;
899 ei->i_dtime = 0; 1005 ei->i_dtime = 0;
900 ei->i_block_group = group; 1006 ei->i_block_group = group;
1007 ei->i_last_alloc_group = ~0;
901 1008
902 ext4_set_inode_flags(inode); 1009 ext4_set_inode_flags(inode);
903 if (IS_DIRSYNC(inode)) 1010 if (IS_DIRSYNC(inode))
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 71d3ecd5db79..a2e7952bc5f9 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -371,6 +371,34 @@ static int ext4_block_to_path(struct inode *inode,
371 return n; 371 return n;
372} 372}
373 373
374static int __ext4_check_blockref(const char *function, struct inode *inode,
375 unsigned int *p, unsigned int max) {
376
377 unsigned int maxblocks = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es);
378 unsigned int *bref = p;
379 while (bref < p+max) {
380 if (unlikely(*bref >= maxblocks)) {
381 ext4_error(inode->i_sb, function,
382 "block reference %u >= max (%u) "
383 "in inode #%lu, offset=%d",
384 *bref, maxblocks,
385 inode->i_ino, (int)(bref-p));
386 return -EIO;
387 }
388 bref++;
389 }
390 return 0;
391}
392
393
394#define ext4_check_indirect_blockref(inode, bh) \
395 __ext4_check_blockref(__func__, inode, (__le32 *)(bh)->b_data, \
396 EXT4_ADDR_PER_BLOCK((inode)->i_sb))
397
398#define ext4_check_inode_blockref(inode) \
399 __ext4_check_blockref(__func__, inode, EXT4_I(inode)->i_data, \
400 EXT4_NDIR_BLOCKS)
401
374/** 402/**
375 * ext4_get_branch - read the chain of indirect blocks leading to data 403 * ext4_get_branch - read the chain of indirect blocks leading to data
376 * @inode: inode in question 404 * @inode: inode in question
@@ -415,9 +443,22 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth,
415 if (!p->key) 443 if (!p->key)
416 goto no_block; 444 goto no_block;
417 while (--depth) { 445 while (--depth) {
418 bh = sb_bread(sb, le32_to_cpu(p->key)); 446 bh = sb_getblk(sb, le32_to_cpu(p->key));
419 if (!bh) 447 if (unlikely(!bh))
420 goto failure; 448 goto failure;
449
450 if (!bh_uptodate_or_lock(bh)) {
451 if (bh_submit_read(bh) < 0) {
452 put_bh(bh);
453 goto failure;
454 }
455 /* validate block references */
456 if (ext4_check_indirect_blockref(inode, bh)) {
457 put_bh(bh);
458 goto failure;
459 }
460 }
461
421 add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets); 462 add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets);
422 /* Reader: end */ 463 /* Reader: end */
423 if (!p->key) 464 if (!p->key)
@@ -459,6 +500,8 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind)
459 ext4_fsblk_t bg_start; 500 ext4_fsblk_t bg_start;
460 ext4_fsblk_t last_block; 501 ext4_fsblk_t last_block;
461 ext4_grpblk_t colour; 502 ext4_grpblk_t colour;
503 ext4_group_t block_group;
504 int flex_size = ext4_flex_bg_size(EXT4_SB(inode->i_sb));
462 505
463 /* Try to find previous block */ 506 /* Try to find previous block */
464 for (p = ind->p - 1; p >= start; p--) { 507 for (p = ind->p - 1; p >= start; p--) {
@@ -474,9 +517,22 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind)
474 * It is going to be referred to from the inode itself? OK, just put it 517 * It is going to be referred to from the inode itself? OK, just put it
475 * into the same cylinder group then. 518 * into the same cylinder group then.
476 */ 519 */
477 bg_start = ext4_group_first_block_no(inode->i_sb, ei->i_block_group); 520 block_group = ei->i_block_group;
521 if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) {
522 block_group &= ~(flex_size-1);
523 if (S_ISREG(inode->i_mode))
524 block_group++;
525 }
526 bg_start = ext4_group_first_block_no(inode->i_sb, block_group);
478 last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1; 527 last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1;
479 528
529 /*
530 * If we are doing delayed allocation, we don't need take
531 * colour into account.
532 */
533 if (test_opt(inode->i_sb, DELALLOC))
534 return bg_start;
535
480 if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block) 536 if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block)
481 colour = (current->pid % 16) * 537 colour = (current->pid % 16) *
482 (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16); 538 (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16);
@@ -1052,9 +1108,16 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
1052 /* 1108 /*
1053 * free those over-booking quota for metadata blocks 1109 * free those over-booking quota for metadata blocks
1054 */ 1110 */
1055
1056 if (mdb_free) 1111 if (mdb_free)
1057 vfs_dq_release_reservation_block(inode, mdb_free); 1112 vfs_dq_release_reservation_block(inode, mdb_free);
1113
1114 /*
1115 * If we have done all the pending block allocations and if
1116 * there aren't any writers on the inode, we can discard the
1117 * inode's preallocations.
1118 */
1119 if (!total && (atomic_read(&inode->i_writecount) == 0))
1120 ext4_discard_preallocations(inode);
1058} 1121}
1059 1122
1060/* 1123/*
@@ -1688,9 +1751,10 @@ static void ext4_da_page_release_reservation(struct page *page,
1688 1751
1689struct mpage_da_data { 1752struct mpage_da_data {
1690 struct inode *inode; 1753 struct inode *inode;
1691 struct buffer_head lbh; /* extent of blocks */ 1754 sector_t b_blocknr; /* start block number of extent */
1755 size_t b_size; /* size of extent */
1756 unsigned long b_state; /* state of the extent */
1692 unsigned long first_page, next_page; /* extent of pages */ 1757 unsigned long first_page, next_page; /* extent of pages */
1693 get_block_t *get_block;
1694 struct writeback_control *wbc; 1758 struct writeback_control *wbc;
1695 int io_done; 1759 int io_done;
1696 int pages_written; 1760 int pages_written;
@@ -1704,7 +1768,6 @@ struct mpage_da_data {
1704 * @mpd->inode: inode 1768 * @mpd->inode: inode
1705 * @mpd->first_page: first page of the extent 1769 * @mpd->first_page: first page of the extent
1706 * @mpd->next_page: page after the last page of the extent 1770 * @mpd->next_page: page after the last page of the extent
1707 * @mpd->get_block: the filesystem's block mapper function
1708 * 1771 *
1709 * By the time mpage_da_submit_io() is called we expect all blocks 1772 * By the time mpage_da_submit_io() is called we expect all blocks
1710 * to be allocated. this may be wrong if allocation failed. 1773 * to be allocated. this may be wrong if allocation failed.
@@ -1724,7 +1787,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
1724 /* 1787 /*
1725 * We need to start from the first_page to the next_page - 1 1788 * We need to start from the first_page to the next_page - 1
1726 * to make sure we also write the mapped dirty buffer_heads. 1789 * to make sure we also write the mapped dirty buffer_heads.
1727 * If we look at mpd->lbh.b_blocknr we would only be looking 1790 * If we look at mpd->b_blocknr we would only be looking
1728 * at the currently mapped buffer_heads. 1791 * at the currently mapped buffer_heads.
1729 */ 1792 */
1730 index = mpd->first_page; 1793 index = mpd->first_page;
@@ -1914,68 +1977,111 @@ static void ext4_print_free_blocks(struct inode *inode)
1914 return; 1977 return;
1915} 1978}
1916 1979
1980#define EXT4_DELALLOC_RSVED 1
1981static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
1982 struct buffer_head *bh_result, int create)
1983{
1984 int ret;
1985 unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
1986 loff_t disksize = EXT4_I(inode)->i_disksize;
1987 handle_t *handle = NULL;
1988
1989 handle = ext4_journal_current_handle();
1990 BUG_ON(!handle);
1991 ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
1992 bh_result, create, 0, EXT4_DELALLOC_RSVED);
1993 if (ret <= 0)
1994 return ret;
1995
1996 bh_result->b_size = (ret << inode->i_blkbits);
1997
1998 if (ext4_should_order_data(inode)) {
1999 int retval;
2000 retval = ext4_jbd2_file_inode(handle, inode);
2001 if (retval)
2002 /*
2003 * Failed to add inode for ordered mode. Don't
2004 * update file size
2005 */
2006 return retval;
2007 }
2008
2009 /*
2010 * Update on-disk size along with block allocation we don't
2011 * use 'extend_disksize' as size may change within already
2012 * allocated block -bzzz
2013 */
2014 disksize = ((loff_t) iblock + ret) << inode->i_blkbits;
2015 if (disksize > i_size_read(inode))
2016 disksize = i_size_read(inode);
2017 if (disksize > EXT4_I(inode)->i_disksize) {
2018 ext4_update_i_disksize(inode, disksize);
2019 ret = ext4_mark_inode_dirty(handle, inode);
2020 return ret;
2021 }
2022 return 0;
2023}
2024
1917/* 2025/*
1918 * mpage_da_map_blocks - go through given space 2026 * mpage_da_map_blocks - go through given space
1919 * 2027 *
1920 * @mpd->lbh - bh describing space 2028 * @mpd - bh describing space
1921 * @mpd->get_block - the filesystem's block mapper function
1922 * 2029 *
1923 * The function skips space we know is already mapped to disk blocks. 2030 * The function skips space we know is already mapped to disk blocks.
1924 * 2031 *
1925 */ 2032 */
1926static int mpage_da_map_blocks(struct mpage_da_data *mpd) 2033static int mpage_da_map_blocks(struct mpage_da_data *mpd)
1927{ 2034{
1928 int err = 0; 2035 int err = 0;
1929 struct buffer_head new; 2036 struct buffer_head new;
1930 struct buffer_head *lbh = &mpd->lbh;
1931 sector_t next; 2037 sector_t next;
1932 2038
1933 /* 2039 /*
1934 * We consider only non-mapped and non-allocated blocks 2040 * We consider only non-mapped and non-allocated blocks
1935 */ 2041 */
1936 if (buffer_mapped(lbh) && !buffer_delay(lbh)) 2042 if ((mpd->b_state & (1 << BH_Mapped)) &&
2043 !(mpd->b_state & (1 << BH_Delay)))
1937 return 0; 2044 return 0;
1938 new.b_state = lbh->b_state; 2045 new.b_state = mpd->b_state;
1939 new.b_blocknr = 0; 2046 new.b_blocknr = 0;
1940 new.b_size = lbh->b_size; 2047 new.b_size = mpd->b_size;
1941 next = lbh->b_blocknr; 2048 next = mpd->b_blocknr;
1942 /* 2049 /*
1943 * If we didn't accumulate anything 2050 * If we didn't accumulate anything
1944 * to write simply return 2051 * to write simply return
1945 */ 2052 */
1946 if (!new.b_size) 2053 if (!new.b_size)
1947 return 0; 2054 return 0;
1948 err = mpd->get_block(mpd->inode, next, &new, 1);
1949 if (err) {
1950 2055
1951 /* If get block returns with error 2056 err = ext4_da_get_block_write(mpd->inode, next, &new, 1);
1952 * we simply return. Later writepage 2057 if (err) {
1953 * will redirty the page and writepages 2058 /*
1954 * will find the dirty page again 2059 * If get block returns with error we simply
2060 * return. Later writepage will redirty the page and
2061 * writepages will find the dirty page again
1955 */ 2062 */
1956 if (err == -EAGAIN) 2063 if (err == -EAGAIN)
1957 return 0; 2064 return 0;
1958 2065
1959 if (err == -ENOSPC && 2066 if (err == -ENOSPC &&
1960 ext4_count_free_blocks(mpd->inode->i_sb)) { 2067 ext4_count_free_blocks(mpd->inode->i_sb)) {
1961 mpd->retval = err; 2068 mpd->retval = err;
1962 return 0; 2069 return 0;
1963 } 2070 }
1964 2071
1965 /* 2072 /*
1966 * get block failure will cause us 2073 * get block failure will cause us to loop in
1967 * to loop in writepages. Because 2074 * writepages, because a_ops->writepage won't be able
1968 * a_ops->writepage won't be able to 2075 * to make progress. The page will be redirtied by
1969 * make progress. The page will be redirtied 2076 * writepage and writepages will again try to write
1970 * by writepage and writepages will again 2077 * the same.
1971 * try to write the same.
1972 */ 2078 */
1973 printk(KERN_EMERG "%s block allocation failed for inode %lu " 2079 printk(KERN_EMERG "%s block allocation failed for inode %lu "
1974 "at logical offset %llu with max blocks " 2080 "at logical offset %llu with max blocks "
1975 "%zd with error %d\n", 2081 "%zd with error %d\n",
1976 __func__, mpd->inode->i_ino, 2082 __func__, mpd->inode->i_ino,
1977 (unsigned long long)next, 2083 (unsigned long long)next,
1978 lbh->b_size >> mpd->inode->i_blkbits, err); 2084 mpd->b_size >> mpd->inode->i_blkbits, err);
1979 printk(KERN_EMERG "This should not happen.!! " 2085 printk(KERN_EMERG "This should not happen.!! "
1980 "Data will be lost\n"); 2086 "Data will be lost\n");
1981 if (err == -ENOSPC) { 2087 if (err == -ENOSPC) {
@@ -1983,7 +2089,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
1983 } 2089 }
1984 /* invlaidate all the pages */ 2090 /* invlaidate all the pages */
1985 ext4_da_block_invalidatepages(mpd, next, 2091 ext4_da_block_invalidatepages(mpd, next,
1986 lbh->b_size >> mpd->inode->i_blkbits); 2092 mpd->b_size >> mpd->inode->i_blkbits);
1987 return err; 2093 return err;
1988 } 2094 }
1989 BUG_ON(new.b_size == 0); 2095 BUG_ON(new.b_size == 0);
@@ -1995,7 +2101,8 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
1995 * If blocks are delayed marked, we need to 2101 * If blocks are delayed marked, we need to
1996 * put actual blocknr and drop delayed bit 2102 * put actual blocknr and drop delayed bit
1997 */ 2103 */
1998 if (buffer_delay(lbh) || buffer_unwritten(lbh)) 2104 if ((mpd->b_state & (1 << BH_Delay)) ||
2105 (mpd->b_state & (1 << BH_Unwritten)))
1999 mpage_put_bnr_to_bhs(mpd, next, &new); 2106 mpage_put_bnr_to_bhs(mpd, next, &new);
2000 2107
2001 return 0; 2108 return 0;
@@ -2014,12 +2121,11 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
2014 * the function is used to collect contig. blocks in same state 2121 * the function is used to collect contig. blocks in same state
2015 */ 2122 */
2016static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, 2123static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
2017 sector_t logical, struct buffer_head *bh) 2124 sector_t logical, size_t b_size,
2125 unsigned long b_state)
2018{ 2126{
2019 sector_t next; 2127 sector_t next;
2020 size_t b_size = bh->b_size; 2128 int nrblocks = mpd->b_size >> mpd->inode->i_blkbits;
2021 struct buffer_head *lbh = &mpd->lbh;
2022 int nrblocks = lbh->b_size >> mpd->inode->i_blkbits;
2023 2129
2024 /* check if thereserved journal credits might overflow */ 2130 /* check if thereserved journal credits might overflow */
2025 if (!(EXT4_I(mpd->inode)->i_flags & EXT4_EXTENTS_FL)) { 2131 if (!(EXT4_I(mpd->inode)->i_flags & EXT4_EXTENTS_FL)) {
@@ -2046,19 +2152,19 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
2046 /* 2152 /*
2047 * First block in the extent 2153 * First block in the extent
2048 */ 2154 */
2049 if (lbh->b_size == 0) { 2155 if (mpd->b_size == 0) {
2050 lbh->b_blocknr = logical; 2156 mpd->b_blocknr = logical;
2051 lbh->b_size = b_size; 2157 mpd->b_size = b_size;
2052 lbh->b_state = bh->b_state & BH_FLAGS; 2158 mpd->b_state = b_state & BH_FLAGS;
2053 return; 2159 return;
2054 } 2160 }
2055 2161
2056 next = lbh->b_blocknr + nrblocks; 2162 next = mpd->b_blocknr + nrblocks;
2057 /* 2163 /*
2058 * Can we merge the block to our big extent? 2164 * Can we merge the block to our big extent?
2059 */ 2165 */
2060 if (logical == next && (bh->b_state & BH_FLAGS) == lbh->b_state) { 2166 if (logical == next && (b_state & BH_FLAGS) == mpd->b_state) {
2061 lbh->b_size += b_size; 2167 mpd->b_size += b_size;
2062 return; 2168 return;
2063 } 2169 }
2064 2170
@@ -2087,7 +2193,7 @@ static int __mpage_da_writepage(struct page *page,
2087{ 2193{
2088 struct mpage_da_data *mpd = data; 2194 struct mpage_da_data *mpd = data;
2089 struct inode *inode = mpd->inode; 2195 struct inode *inode = mpd->inode;
2090 struct buffer_head *bh, *head, fake; 2196 struct buffer_head *bh, *head;
2091 sector_t logical; 2197 sector_t logical;
2092 2198
2093 if (mpd->io_done) { 2199 if (mpd->io_done) {
@@ -2129,9 +2235,9 @@ static int __mpage_da_writepage(struct page *page,
2129 /* 2235 /*
2130 * ... and blocks 2236 * ... and blocks
2131 */ 2237 */
2132 mpd->lbh.b_size = 0; 2238 mpd->b_size = 0;
2133 mpd->lbh.b_state = 0; 2239 mpd->b_state = 0;
2134 mpd->lbh.b_blocknr = 0; 2240 mpd->b_blocknr = 0;
2135 } 2241 }
2136 2242
2137 mpd->next_page = page->index + 1; 2243 mpd->next_page = page->index + 1;
@@ -2139,16 +2245,8 @@ static int __mpage_da_writepage(struct page *page,
2139 (PAGE_CACHE_SHIFT - inode->i_blkbits); 2245 (PAGE_CACHE_SHIFT - inode->i_blkbits);
2140 2246
2141 if (!page_has_buffers(page)) { 2247 if (!page_has_buffers(page)) {
2142 /* 2248 mpage_add_bh_to_extent(mpd, logical, PAGE_CACHE_SIZE,
2143 * There is no attached buffer heads yet (mmap?) 2249 (1 << BH_Dirty) | (1 << BH_Uptodate));
2144 * we treat the page asfull of dirty blocks
2145 */
2146 bh = &fake;
2147 bh->b_size = PAGE_CACHE_SIZE;
2148 bh->b_state = 0;
2149 set_buffer_dirty(bh);
2150 set_buffer_uptodate(bh);
2151 mpage_add_bh_to_extent(mpd, logical, bh);
2152 if (mpd->io_done) 2250 if (mpd->io_done)
2153 return MPAGE_DA_EXTENT_TAIL; 2251 return MPAGE_DA_EXTENT_TAIL;
2154 } else { 2252 } else {
@@ -2166,8 +2264,10 @@ static int __mpage_da_writepage(struct page *page,
2166 * with the page in ext4_da_writepage 2264 * with the page in ext4_da_writepage
2167 */ 2265 */
2168 if (buffer_dirty(bh) && 2266 if (buffer_dirty(bh) &&
2169 (!buffer_mapped(bh) || buffer_delay(bh))) { 2267 (!buffer_mapped(bh) || buffer_delay(bh))) {
2170 mpage_add_bh_to_extent(mpd, logical, bh); 2268 mpage_add_bh_to_extent(mpd, logical,
2269 bh->b_size,
2270 bh->b_state);
2171 if (mpd->io_done) 2271 if (mpd->io_done)
2172 return MPAGE_DA_EXTENT_TAIL; 2272 return MPAGE_DA_EXTENT_TAIL;
2173 } else if (buffer_dirty(bh) && (buffer_mapped(bh))) { 2273 } else if (buffer_dirty(bh) && (buffer_mapped(bh))) {
@@ -2179,9 +2279,8 @@ static int __mpage_da_writepage(struct page *page,
2179 * unmapped buffer_head later we need to 2279 * unmapped buffer_head later we need to
2180 * use the b_state flag of that buffer_head. 2280 * use the b_state flag of that buffer_head.
2181 */ 2281 */
2182 if (mpd->lbh.b_size == 0) 2282 if (mpd->b_size == 0)
2183 mpd->lbh.b_state = 2283 mpd->b_state = bh->b_state & BH_FLAGS;
2184 bh->b_state & BH_FLAGS;
2185 } 2284 }
2186 logical++; 2285 logical++;
2187 } while ((bh = bh->b_this_page) != head); 2286 } while ((bh = bh->b_this_page) != head);
@@ -2191,51 +2290,6 @@ static int __mpage_da_writepage(struct page *page,
2191} 2290}
2192 2291
2193/* 2292/*
2194 * mpage_da_writepages - walk the list of dirty pages of the given
2195 * address space, allocates non-allocated blocks, maps newly-allocated
2196 * blocks to existing bhs and issue IO them
2197 *
2198 * @mapping: address space structure to write
2199 * @wbc: subtract the number of written pages from *@wbc->nr_to_write
2200 * @get_block: the filesystem's block mapper function.
2201 *
2202 * This is a library function, which implements the writepages()
2203 * address_space_operation.
2204 */
2205static int mpage_da_writepages(struct address_space *mapping,
2206 struct writeback_control *wbc,
2207 struct mpage_da_data *mpd)
2208{
2209 int ret;
2210
2211 if (!mpd->get_block)
2212 return generic_writepages(mapping, wbc);
2213
2214 mpd->lbh.b_size = 0;
2215 mpd->lbh.b_state = 0;
2216 mpd->lbh.b_blocknr = 0;
2217 mpd->first_page = 0;
2218 mpd->next_page = 0;
2219 mpd->io_done = 0;
2220 mpd->pages_written = 0;
2221 mpd->retval = 0;
2222
2223 ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, mpd);
2224 /*
2225 * Handle last extent of pages
2226 */
2227 if (!mpd->io_done && mpd->next_page != mpd->first_page) {
2228 if (mpage_da_map_blocks(mpd) == 0)
2229 mpage_da_submit_io(mpd);
2230
2231 mpd->io_done = 1;
2232 ret = MPAGE_DA_EXTENT_TAIL;
2233 }
2234 wbc->nr_to_write -= mpd->pages_written;
2235 return ret;
2236}
2237
2238/*
2239 * this is a special callback for ->write_begin() only 2293 * this is a special callback for ->write_begin() only
2240 * it's intention is to return mapped block or reserve space 2294 * it's intention is to return mapped block or reserve space
2241 */ 2295 */
@@ -2274,51 +2328,6 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
2274 2328
2275 return ret; 2329 return ret;
2276} 2330}
2277#define EXT4_DELALLOC_RSVED 1
2278static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
2279 struct buffer_head *bh_result, int create)
2280{
2281 int ret;
2282 unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
2283 loff_t disksize = EXT4_I(inode)->i_disksize;
2284 handle_t *handle = NULL;
2285
2286 handle = ext4_journal_current_handle();
2287 BUG_ON(!handle);
2288 ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
2289 bh_result, create, 0, EXT4_DELALLOC_RSVED);
2290 if (ret > 0) {
2291
2292 bh_result->b_size = (ret << inode->i_blkbits);
2293
2294 if (ext4_should_order_data(inode)) {
2295 int retval;
2296 retval = ext4_jbd2_file_inode(handle, inode);
2297 if (retval)
2298 /*
2299 * Failed to add inode for ordered
2300 * mode. Don't update file size
2301 */
2302 return retval;
2303 }
2304
2305 /*
2306 * Update on-disk size along with block allocation
2307 * we don't use 'extend_disksize' as size may change
2308 * within already allocated block -bzzz
2309 */
2310 disksize = ((loff_t) iblock + ret) << inode->i_blkbits;
2311 if (disksize > i_size_read(inode))
2312 disksize = i_size_read(inode);
2313 if (disksize > EXT4_I(inode)->i_disksize) {
2314 ext4_update_i_disksize(inode, disksize);
2315 ret = ext4_mark_inode_dirty(handle, inode);
2316 return ret;
2317 }
2318 ret = 0;
2319 }
2320 return ret;
2321}
2322 2331
2323static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh) 2332static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh)
2324{ 2333{
@@ -2569,8 +2578,38 @@ retry:
2569 dump_stack(); 2578 dump_stack();
2570 goto out_writepages; 2579 goto out_writepages;
2571 } 2580 }
2572 mpd.get_block = ext4_da_get_block_write; 2581
2573 ret = mpage_da_writepages(mapping, wbc, &mpd); 2582 /*
2583 * Now call __mpage_da_writepage to find the next
2584 * contiguous region of logical blocks that need
2585 * blocks to be allocated by ext4. We don't actually
2586 * submit the blocks for I/O here, even though
2587 * write_cache_pages thinks it will, and will set the
2588 * pages as clean for write before calling
2589 * __mpage_da_writepage().
2590 */
2591 mpd.b_size = 0;
2592 mpd.b_state = 0;
2593 mpd.b_blocknr = 0;
2594 mpd.first_page = 0;
2595 mpd.next_page = 0;
2596 mpd.io_done = 0;
2597 mpd.pages_written = 0;
2598 mpd.retval = 0;
2599 ret = write_cache_pages(mapping, wbc, __mpage_da_writepage,
2600 &mpd);
2601 /*
2602 * If we have a contigous extent of pages and we
2603 * haven't done the I/O yet, map the blocks and submit
2604 * them for I/O.
2605 */
2606 if (!mpd.io_done && mpd.next_page != mpd.first_page) {
2607 if (mpage_da_map_blocks(&mpd) == 0)
2608 mpage_da_submit_io(&mpd);
2609 mpd.io_done = 1;
2610 ret = MPAGE_DA_EXTENT_TAIL;
2611 }
2612 wbc->nr_to_write -= mpd.pages_written;
2574 2613
2575 ext4_journal_stop(handle); 2614 ext4_journal_stop(handle);
2576 2615
@@ -2846,6 +2885,48 @@ out:
2846 return; 2885 return;
2847} 2886}
2848 2887
2888/*
2889 * Force all delayed allocation blocks to be allocated for a given inode.
2890 */
2891int ext4_alloc_da_blocks(struct inode *inode)
2892{
2893 if (!EXT4_I(inode)->i_reserved_data_blocks &&
2894 !EXT4_I(inode)->i_reserved_meta_blocks)
2895 return 0;
2896
2897 /*
2898 * We do something simple for now. The filemap_flush() will
2899 * also start triggering a write of the data blocks, which is
2900 * not strictly speaking necessary (and for users of
2901 * laptop_mode, not even desirable). However, to do otherwise
2902 * would require replicating code paths in:
2903 *
2904 * ext4_da_writepages() ->
2905 * write_cache_pages() ---> (via passed in callback function)
2906 * __mpage_da_writepage() -->
2907 * mpage_add_bh_to_extent()
2908 * mpage_da_map_blocks()
2909 *
2910 * The problem is that write_cache_pages(), located in
2911 * mm/page-writeback.c, marks pages clean in preparation for
2912 * doing I/O, which is not desirable if we're not planning on
2913 * doing I/O at all.
2914 *
2915 * We could call write_cache_pages(), and then redirty all of
2916 * the pages by calling redirty_page_for_writeback() but that
2917 * would be ugly in the extreme. So instead we would need to
2918 * replicate parts of the code in the above functions,
2919 * simplifying them becuase we wouldn't actually intend to
2920 * write out the pages, but rather only collect contiguous
2921 * logical block extents, call the multi-block allocator, and
2922 * then update the buffer heads with the block allocations.
2923 *
2924 * For now, though, we'll cheat by calling filemap_flush(),
2925 * which will map the blocks, and start the I/O, but not
2926 * actually wait for the I/O to complete.
2927 */
2928 return filemap_flush(inode->i_mapping);
2929}
2849 2930
2850/* 2931/*
2851 * bmap() is special. It gets used by applications such as lilo and by 2932 * bmap() is special. It gets used by applications such as lilo and by
@@ -3868,6 +3949,9 @@ void ext4_truncate(struct inode *inode)
3868 if (!ext4_can_truncate(inode)) 3949 if (!ext4_can_truncate(inode))
3869 return; 3950 return;
3870 3951
3952 if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
3953 ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE;
3954
3871 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { 3955 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
3872 ext4_ext_truncate(inode); 3956 ext4_ext_truncate(inode);
3873 return; 3957 return;
@@ -4110,12 +4194,7 @@ make_io:
4110 unsigned num; 4194 unsigned num;
4111 4195
4112 table = ext4_inode_table(sb, gdp); 4196 table = ext4_inode_table(sb, gdp);
4113 /* Make sure s_inode_readahead_blks is a power of 2 */ 4197 /* s_inode_readahead_blks is always a power of 2 */
4114 while (EXT4_SB(sb)->s_inode_readahead_blks &
4115 (EXT4_SB(sb)->s_inode_readahead_blks-1))
4116 EXT4_SB(sb)->s_inode_readahead_blks =
4117 (EXT4_SB(sb)->s_inode_readahead_blks &
4118 (EXT4_SB(sb)->s_inode_readahead_blks-1));
4119 b = block & ~(EXT4_SB(sb)->s_inode_readahead_blks-1); 4198 b = block & ~(EXT4_SB(sb)->s_inode_readahead_blks-1);
4120 if (table > b) 4199 if (table > b)
4121 b = table; 4200 b = table;
@@ -4287,6 +4366,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4287 ei->i_disksize = inode->i_size; 4366 ei->i_disksize = inode->i_size;
4288 inode->i_generation = le32_to_cpu(raw_inode->i_generation); 4367 inode->i_generation = le32_to_cpu(raw_inode->i_generation);
4289 ei->i_block_group = iloc.block_group; 4368 ei->i_block_group = iloc.block_group;
4369 ei->i_last_alloc_group = ~0;
4290 /* 4370 /*
4291 * NOTE! The in-memory inode i_data array is in little-endian order 4371 * NOTE! The in-memory inode i_data array is in little-endian order
4292 * even on big-endian machines: we do NOT byteswap the block numbers! 4372 * even on big-endian machines: we do NOT byteswap the block numbers!
@@ -4329,6 +4409,20 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4329 (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32; 4409 (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32;
4330 } 4410 }
4331 4411
4412 if (ei->i_flags & EXT4_EXTENTS_FL) {
4413 /* Validate extent which is part of inode */
4414 ret = ext4_ext_check_inode(inode);
4415 } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
4416 (S_ISLNK(inode->i_mode) &&
4417 !ext4_inode_is_fast_symlink(inode))) {
4418 /* Validate block references which are part of inode */
4419 ret = ext4_check_inode_blockref(inode);
4420 }
4421 if (ret) {
4422 brelse(bh);
4423 goto bad_inode;
4424 }
4425
4332 if (S_ISREG(inode->i_mode)) { 4426 if (S_ISREG(inode->i_mode)) {
4333 inode->i_op = &ext4_file_inode_operations; 4427 inode->i_op = &ext4_file_inode_operations;
4334 inode->i_fop = &ext4_file_operations; 4428 inode->i_fop = &ext4_file_operations;
@@ -4345,7 +4439,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4345 inode->i_op = &ext4_symlink_inode_operations; 4439 inode->i_op = &ext4_symlink_inode_operations;
4346 ext4_set_aops(inode); 4440 ext4_set_aops(inode);
4347 } 4441 }
4348 } else { 4442 } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
4443 S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
4349 inode->i_op = &ext4_special_inode_operations; 4444 inode->i_op = &ext4_special_inode_operations;
4350 if (raw_inode->i_block[0]) 4445 if (raw_inode->i_block[0])
4351 init_special_inode(inode, inode->i_mode, 4446 init_special_inode(inode, inode->i_mode,
@@ -4353,6 +4448,13 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4353 else 4448 else
4354 init_special_inode(inode, inode->i_mode, 4449 init_special_inode(inode, inode->i_mode,
4355 new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); 4450 new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
4451 } else {
4452 brelse(bh);
4453 ret = -EIO;
4454 ext4_error(inode->i_sb, __func__,
4455 "bogus i_mode (%o) for inode=%lu",
4456 inode->i_mode, inode->i_ino);
4457 goto bad_inode;
4356 } 4458 }
4357 brelse(iloc.bh); 4459 brelse(iloc.bh);
4358 ext4_set_inode_flags(inode); 4460 ext4_set_inode_flags(inode);
@@ -5146,8 +5248,9 @@ static int ext4_bh_unmapped(handle_t *handle, struct buffer_head *bh)
5146 return !buffer_mapped(bh); 5248 return !buffer_mapped(bh);
5147} 5249}
5148 5250
5149int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page) 5251int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
5150{ 5252{
5253 struct page *page = vmf->page;
5151 loff_t size; 5254 loff_t size;
5152 unsigned long len; 5255 unsigned long len;
5153 int ret = -EINVAL; 5256 int ret = -EINVAL;
@@ -5199,6 +5302,8 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page)
5199 goto out_unlock; 5302 goto out_unlock;
5200 ret = 0; 5303 ret = 0;
5201out_unlock: 5304out_unlock:
5305 if (ret)
5306 ret = VM_FAULT_SIGBUS;
5202 up_read(&inode->i_alloc_sem); 5307 up_read(&inode->i_alloc_sem);
5203 return ret; 5308 return ret;
5204} 5309}
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 42dc83fb247a..91e75f7a9e73 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -48,8 +48,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
48 if (err) 48 if (err)
49 return err; 49 return err;
50 50
51 if (!S_ISDIR(inode->i_mode)) 51 flags = ext4_mask_flags(inode->i_mode, flags);
52 flags &= ~EXT4_DIRSYNC_FL;
53 52
54 err = -EPERM; 53 err = -EPERM;
55 mutex_lock(&inode->i_mutex); 54 mutex_lock(&inode->i_mutex);
@@ -263,6 +262,20 @@ setversion_out:
263 return err; 262 return err;
264 } 263 }
265 264
265 case EXT4_IOC_ALLOC_DA_BLKS:
266 {
267 int err;
268 if (!is_owner_or_cap(inode))
269 return -EACCES;
270
271 err = mnt_want_write(filp->f_path.mnt);
272 if (err)
273 return err;
274 err = ext4_alloc_da_blocks(inode);
275 mnt_drop_write(filp->f_path.mnt);
276 return err;
277 }
278
266 default: 279 default:
267 return -ENOTTY; 280 return -ENOTTY;
268 } 281 }
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index b038188bd039..f871677a7984 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -46,22 +46,23 @@
46 * The allocation request involve request for multiple number of blocks 46 * The allocation request involve request for multiple number of blocks
47 * near to the goal(block) value specified. 47 * near to the goal(block) value specified.
48 * 48 *
49 * During initialization phase of the allocator we decide to use the group 49 * During initialization phase of the allocator we decide to use the
50 * preallocation or inode preallocation depending on the size file. The 50 * group preallocation or inode preallocation depending on the size of
51 * size of the file could be the resulting file size we would have after 51 * the file. The size of the file could be the resulting file size we
52 * allocation or the current file size which ever is larger. If the size is 52 * would have after allocation, or the current file size, which ever
53 * less that sbi->s_mb_stream_request we select the group 53 * is larger. If the size is less than sbi->s_mb_stream_request we
54 * preallocation. The default value of s_mb_stream_request is 16 54 * select to use the group preallocation. The default value of
55 * blocks. This can also be tuned via 55 * s_mb_stream_request is 16 blocks. This can also be tuned via
56 * /proc/fs/ext4/<partition>/stream_req. The value is represented in terms 56 * /sys/fs/ext4/<partition>/mb_stream_req. The value is represented in
57 * of number of blocks. 57 * terms of number of blocks.
58 * 58 *
59 * The main motivation for having small file use group preallocation is to 59 * The main motivation for having small file use group preallocation is to
60 * ensure that we have small file closer in the disk. 60 * ensure that we have small files closer together on the disk.
61 * 61 *
62 * First stage the allocator looks at the inode prealloc list 62 * First stage the allocator looks at the inode prealloc list,
63 * ext4_inode_info->i_prealloc_list contain list of prealloc spaces for 63 * ext4_inode_info->i_prealloc_list, which contains list of prealloc
64 * this particular inode. The inode prealloc space is represented as: 64 * spaces for this particular inode. The inode prealloc space is
65 * represented as:
65 * 66 *
66 * pa_lstart -> the logical start block for this prealloc space 67 * pa_lstart -> the logical start block for this prealloc space
67 * pa_pstart -> the physical start block for this prealloc space 68 * pa_pstart -> the physical start block for this prealloc space
@@ -121,29 +122,29 @@
121 * list. In case of inode preallocation we follow a list of heuristics 122 * list. In case of inode preallocation we follow a list of heuristics
122 * based on file size. This can be found in ext4_mb_normalize_request. If 123 * based on file size. This can be found in ext4_mb_normalize_request. If
123 * we are doing a group prealloc we try to normalize the request to 124 * we are doing a group prealloc we try to normalize the request to
124 * sbi->s_mb_group_prealloc. Default value of s_mb_group_prealloc is set to 125 * sbi->s_mb_group_prealloc. Default value of s_mb_group_prealloc is
125 * 512 blocks. This can be tuned via 126 * 512 blocks. This can be tuned via
126 * /proc/fs/ext4/<partition/group_prealloc. The value is represented in 127 * /sys/fs/ext4/<partition/mb_group_prealloc. The value is represented in
127 * terms of number of blocks. If we have mounted the file system with -O 128 * terms of number of blocks. If we have mounted the file system with -O
128 * stripe=<value> option the group prealloc request is normalized to the 129 * stripe=<value> option the group prealloc request is normalized to the
129 * stripe value (sbi->s_stripe) 130 * stripe value (sbi->s_stripe)
130 * 131 *
131 * The regular allocator(using the buddy cache) support few tunables. 132 * The regular allocator(using the buddy cache) supports few tunables.
132 * 133 *
133 * /proc/fs/ext4/<partition>/min_to_scan 134 * /sys/fs/ext4/<partition>/mb_min_to_scan
134 * /proc/fs/ext4/<partition>/max_to_scan 135 * /sys/fs/ext4/<partition>/mb_max_to_scan
135 * /proc/fs/ext4/<partition>/order2_req 136 * /sys/fs/ext4/<partition>/mb_order2_req
136 * 137 *
137 * The regular allocator use buddy scan only if the request len is power of 138 * The regular allocator uses buddy scan only if the request len is power of
138 * 2 blocks and the order of allocation is >= sbi->s_mb_order2_reqs. The 139 * 2 blocks and the order of allocation is >= sbi->s_mb_order2_reqs. The
139 * value of s_mb_order2_reqs can be tuned via 140 * value of s_mb_order2_reqs can be tuned via
140 * /proc/fs/ext4/<partition>/order2_req. If the request len is equal to 141 * /sys/fs/ext4/<partition>/mb_order2_req. If the request len is equal to
141 * stripe size (sbi->s_stripe), we try to search for contigous block in 142 * stripe size (sbi->s_stripe), we try to search for contigous block in
142 * stripe size. This should result in better allocation on RAID setup. If 143 * stripe size. This should result in better allocation on RAID setups. If
143 * not we search in the specific group using bitmap for best extents. The 144 * not, we search in the specific group using bitmap for best extents. The
144 * tunable min_to_scan and max_to_scan controll the behaviour here. 145 * tunable min_to_scan and max_to_scan control the behaviour here.
145 * min_to_scan indicate how long the mballoc __must__ look for a best 146 * min_to_scan indicate how long the mballoc __must__ look for a best
146 * extent and max_to_scanindicate how long the mballoc __can__ look for a 147 * extent and max_to_scan indicates how long the mballoc __can__ look for a
147 * best extent in the found extents. Searching for the blocks starts with 148 * best extent in the found extents. Searching for the blocks starts with
148 * the group specified as the goal value in allocation context via 149 * the group specified as the goal value in allocation context via
149 * ac_g_ex. Each group is first checked based on the criteria whether it 150 * ac_g_ex. Each group is first checked based on the criteria whether it
@@ -337,8 +338,6 @@ static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
337 ext4_group_t group); 338 ext4_group_t group);
338static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, 339static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
339 ext4_group_t group); 340 ext4_group_t group);
340static int ext4_mb_init_per_dev_proc(struct super_block *sb);
341static int ext4_mb_destroy_per_dev_proc(struct super_block *sb);
342static void release_blocks_on_commit(journal_t *journal, transaction_t *txn); 341static void release_blocks_on_commit(journal_t *journal, transaction_t *txn);
343 342
344 343
@@ -1726,6 +1725,7 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
1726{ 1725{
1727 unsigned free, fragments; 1726 unsigned free, fragments;
1728 unsigned i, bits; 1727 unsigned i, bits;
1728 int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb));
1729 struct ext4_group_desc *desc; 1729 struct ext4_group_desc *desc;
1730 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group); 1730 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
1731 1731
@@ -1747,6 +1747,12 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
1747 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) 1747 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
1748 return 0; 1748 return 0;
1749 1749
1750 /* Avoid using the first bg of a flexgroup for data files */
1751 if ((ac->ac_flags & EXT4_MB_HINT_DATA) &&
1752 (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) &&
1753 ((group % flex_size) == 0))
1754 return 0;
1755
1750 bits = ac->ac_sb->s_blocksize_bits + 1; 1756 bits = ac->ac_sb->s_blocksize_bits + 1;
1751 for (i = ac->ac_2order; i <= bits; i++) 1757 for (i = ac->ac_2order; i <= bits; i++)
1752 if (grp->bb_counters[i] > 0) 1758 if (grp->bb_counters[i] > 0)
@@ -1971,7 +1977,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
1971 /* 1977 /*
1972 * We search using buddy data only if the order of the request 1978 * We search using buddy data only if the order of the request
1973 * is greater than equal to the sbi_s_mb_order2_reqs 1979 * is greater than equal to the sbi_s_mb_order2_reqs
1974 * You can tune it via /proc/fs/ext4/<partition>/order2_req 1980 * You can tune it via /sys/fs/ext4/<partition>/mb_order2_req
1975 */ 1981 */
1976 if (i >= sbi->s_mb_order2_reqs) { 1982 if (i >= sbi->s_mb_order2_reqs) {
1977 /* 1983 /*
@@ -2693,7 +2699,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2693 i = (sb->s_blocksize_bits + 2) * sizeof(unsigned int); 2699 i = (sb->s_blocksize_bits + 2) * sizeof(unsigned int);
2694 sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); 2700 sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
2695 if (sbi->s_mb_maxs == NULL) { 2701 if (sbi->s_mb_maxs == NULL) {
2696 kfree(sbi->s_mb_maxs); 2702 kfree(sbi->s_mb_offsets);
2697 return -ENOMEM; 2703 return -ENOMEM;
2698 } 2704 }
2699 2705
@@ -2746,7 +2752,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2746 spin_lock_init(&lg->lg_prealloc_lock); 2752 spin_lock_init(&lg->lg_prealloc_lock);
2747 } 2753 }
2748 2754
2749 ext4_mb_init_per_dev_proc(sb);
2750 ext4_mb_history_init(sb); 2755 ext4_mb_history_init(sb);
2751 2756
2752 if (sbi->s_journal) 2757 if (sbi->s_journal)
@@ -2829,7 +2834,6 @@ int ext4_mb_release(struct super_block *sb)
2829 2834
2830 free_percpu(sbi->s_locality_groups); 2835 free_percpu(sbi->s_locality_groups);
2831 ext4_mb_history_release(sb); 2836 ext4_mb_history_release(sb);
2832 ext4_mb_destroy_per_dev_proc(sb);
2833 2837
2834 return 0; 2838 return 0;
2835} 2839}
@@ -2890,62 +2894,6 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2890 mb_debug("freed %u blocks in %u structures\n", count, count2); 2894 mb_debug("freed %u blocks in %u structures\n", count, count2);
2891} 2895}
2892 2896
2893#define EXT4_MB_STATS_NAME "stats"
2894#define EXT4_MB_MAX_TO_SCAN_NAME "max_to_scan"
2895#define EXT4_MB_MIN_TO_SCAN_NAME "min_to_scan"
2896#define EXT4_MB_ORDER2_REQ "order2_req"
2897#define EXT4_MB_STREAM_REQ "stream_req"
2898#define EXT4_MB_GROUP_PREALLOC "group_prealloc"
2899
2900static int ext4_mb_init_per_dev_proc(struct super_block *sb)
2901{
2902#ifdef CONFIG_PROC_FS
2903 mode_t mode = S_IFREG | S_IRUGO | S_IWUSR;
2904 struct ext4_sb_info *sbi = EXT4_SB(sb);
2905 struct proc_dir_entry *proc;
2906
2907 if (sbi->s_proc == NULL)
2908 return -EINVAL;
2909
2910 EXT4_PROC_HANDLER(EXT4_MB_STATS_NAME, mb_stats);
2911 EXT4_PROC_HANDLER(EXT4_MB_MAX_TO_SCAN_NAME, mb_max_to_scan);
2912 EXT4_PROC_HANDLER(EXT4_MB_MIN_TO_SCAN_NAME, mb_min_to_scan);
2913 EXT4_PROC_HANDLER(EXT4_MB_ORDER2_REQ, mb_order2_reqs);
2914 EXT4_PROC_HANDLER(EXT4_MB_STREAM_REQ, mb_stream_request);
2915 EXT4_PROC_HANDLER(EXT4_MB_GROUP_PREALLOC, mb_group_prealloc);
2916 return 0;
2917
2918err_out:
2919 remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_proc);
2920 remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_proc);
2921 remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_proc);
2922 remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc);
2923 remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc);
2924 remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc);
2925 return -ENOMEM;
2926#else
2927 return 0;
2928#endif
2929}
2930
2931static int ext4_mb_destroy_per_dev_proc(struct super_block *sb)
2932{
2933#ifdef CONFIG_PROC_FS
2934 struct ext4_sb_info *sbi = EXT4_SB(sb);
2935
2936 if (sbi->s_proc == NULL)
2937 return -EINVAL;
2938
2939 remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_proc);
2940 remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_proc);
2941 remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_proc);
2942 remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc);
2943 remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc);
2944 remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc);
2945#endif
2946 return 0;
2947}
2948
2949int __init init_ext4_mballoc(void) 2897int __init init_ext4_mballoc(void)
2950{ 2898{
2951 ext4_pspace_cachep = 2899 ext4_pspace_cachep =
@@ -3096,9 +3044,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
3096 if (sbi->s_log_groups_per_flex) { 3044 if (sbi->s_log_groups_per_flex) {
3097 ext4_group_t flex_group = ext4_flex_group(sbi, 3045 ext4_group_t flex_group = ext4_flex_group(sbi,
3098 ac->ac_b_ex.fe_group); 3046 ac->ac_b_ex.fe_group);
3099 spin_lock(sb_bgl_lock(sbi, flex_group)); 3047 atomic_sub(ac->ac_b_ex.fe_len,
3100 sbi->s_flex_groups[flex_group].free_blocks -= ac->ac_b_ex.fe_len; 3048 &sbi->s_flex_groups[flex_group].free_blocks);
3101 spin_unlock(sb_bgl_lock(sbi, flex_group));
3102 } 3049 }
3103 3050
3104 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); 3051 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
@@ -3116,7 +3063,7 @@ out_err:
3116 * here we normalize request for locality group 3063 * here we normalize request for locality group
3117 * Group request are normalized to s_strip size if we set the same via mount 3064 * Group request are normalized to s_strip size if we set the same via mount
3118 * option. If not we set it to s_mb_group_prealloc which can be configured via 3065 * option. If not we set it to s_mb_group_prealloc which can be configured via
3119 * /proc/fs/ext4/<partition>/group_prealloc 3066 * /sys/fs/ext4/<partition>/mb_group_prealloc
3120 * 3067 *
3121 * XXX: should we try to preallocate more than the group has now? 3068 * XXX: should we try to preallocate more than the group has now?
3122 */ 3069 */
@@ -3608,8 +3555,11 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
3608 spin_unlock(&pa->pa_lock); 3555 spin_unlock(&pa->pa_lock);
3609 3556
3610 grp_blk = pa->pa_pstart; 3557 grp_blk = pa->pa_pstart;
3611 /* If linear, pa_pstart may be in the next group when pa is used up */ 3558 /*
3612 if (pa->pa_linear) 3559 * If doing group-based preallocation, pa_pstart may be in the
3560 * next group when pa is used up
3561 */
3562 if (pa->pa_type == MB_GROUP_PA)
3613 grp_blk--; 3563 grp_blk--;
3614 3564
3615 ext4_get_group_no_and_offset(sb, grp_blk, &grp, NULL); 3565 ext4_get_group_no_and_offset(sb, grp_blk, &grp, NULL);
@@ -3704,7 +3654,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
3704 INIT_LIST_HEAD(&pa->pa_inode_list); 3654 INIT_LIST_HEAD(&pa->pa_inode_list);
3705 INIT_LIST_HEAD(&pa->pa_group_list); 3655 INIT_LIST_HEAD(&pa->pa_group_list);
3706 pa->pa_deleted = 0; 3656 pa->pa_deleted = 0;
3707 pa->pa_linear = 0; 3657 pa->pa_type = MB_INODE_PA;
3708 3658
3709 mb_debug("new inode pa %p: %llu/%u for %u\n", pa, 3659 mb_debug("new inode pa %p: %llu/%u for %u\n", pa,
3710 pa->pa_pstart, pa->pa_len, pa->pa_lstart); 3660 pa->pa_pstart, pa->pa_len, pa->pa_lstart);
@@ -3767,7 +3717,7 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
3767 INIT_LIST_HEAD(&pa->pa_inode_list); 3717 INIT_LIST_HEAD(&pa->pa_inode_list);
3768 INIT_LIST_HEAD(&pa->pa_group_list); 3718 INIT_LIST_HEAD(&pa->pa_group_list);
3769 pa->pa_deleted = 0; 3719 pa->pa_deleted = 0;
3770 pa->pa_linear = 1; 3720 pa->pa_type = MB_GROUP_PA;
3771 3721
3772 mb_debug("new group pa %p: %llu/%u for %u\n", pa, 3722 mb_debug("new group pa %p: %llu/%u for %u\n", pa,
3773 pa->pa_pstart, pa->pa_len, pa->pa_lstart); 3723 pa->pa_pstart, pa->pa_len, pa->pa_lstart);
@@ -4021,7 +3971,7 @@ repeat:
4021 list_del_rcu(&pa->pa_inode_list); 3971 list_del_rcu(&pa->pa_inode_list);
4022 spin_unlock(pa->pa_obj_lock); 3972 spin_unlock(pa->pa_obj_lock);
4023 3973
4024 if (pa->pa_linear) 3974 if (pa->pa_type == MB_GROUP_PA)
4025 ext4_mb_release_group_pa(&e4b, pa, ac); 3975 ext4_mb_release_group_pa(&e4b, pa, ac);
4026 else 3976 else
4027 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac); 3977 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
@@ -4121,7 +4071,7 @@ repeat:
4121 spin_unlock(&ei->i_prealloc_lock); 4071 spin_unlock(&ei->i_prealloc_lock);
4122 4072
4123 list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) { 4073 list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
4124 BUG_ON(pa->pa_linear != 0); 4074 BUG_ON(pa->pa_type != MB_INODE_PA);
4125 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL); 4075 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
4126 4076
4127 err = ext4_mb_load_buddy(sb, group, &e4b); 4077 err = ext4_mb_load_buddy(sb, group, &e4b);
@@ -4232,7 +4182,7 @@ static inline void ext4_mb_show_ac(struct ext4_allocation_context *ac)
4232 * file is determined by the current size or the resulting size after 4182 * file is determined by the current size or the resulting size after
4233 * allocation which ever is larger 4183 * allocation which ever is larger
4234 * 4184 *
4235 * One can tune this size via /proc/fs/ext4/<partition>/stream_req 4185 * One can tune this size via /sys/fs/ext4/<partition>/mb_stream_req
4236 */ 4186 */
4237static void ext4_mb_group_or_file(struct ext4_allocation_context *ac) 4187static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
4238{ 4188{
@@ -4373,7 +4323,7 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
4373 continue; 4323 continue;
4374 } 4324 }
4375 /* only lg prealloc space */ 4325 /* only lg prealloc space */
4376 BUG_ON(!pa->pa_linear); 4326 BUG_ON(pa->pa_type != MB_GROUP_PA);
4377 4327
4378 /* seems this one can be freed ... */ 4328 /* seems this one can be freed ... */
4379 pa->pa_deleted = 1; 4329 pa->pa_deleted = 1;
@@ -4442,7 +4392,7 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
4442 pa_inode_list) { 4392 pa_inode_list) {
4443 spin_lock(&tmp_pa->pa_lock); 4393 spin_lock(&tmp_pa->pa_lock);
4444 if (tmp_pa->pa_deleted) { 4394 if (tmp_pa->pa_deleted) {
4445 spin_unlock(&pa->pa_lock); 4395 spin_unlock(&tmp_pa->pa_lock);
4446 continue; 4396 continue;
4447 } 4397 }
4448 if (!added && pa->pa_free < tmp_pa->pa_free) { 4398 if (!added && pa->pa_free < tmp_pa->pa_free) {
@@ -4479,7 +4429,7 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac)
4479{ 4429{
4480 struct ext4_prealloc_space *pa = ac->ac_pa; 4430 struct ext4_prealloc_space *pa = ac->ac_pa;
4481 if (pa) { 4431 if (pa) {
4482 if (pa->pa_linear) { 4432 if (pa->pa_type == MB_GROUP_PA) {
4483 /* see comment in ext4_mb_use_group_pa() */ 4433 /* see comment in ext4_mb_use_group_pa() */
4484 spin_lock(&pa->pa_lock); 4434 spin_lock(&pa->pa_lock);
4485 pa->pa_pstart += ac->ac_b_ex.fe_len; 4435 pa->pa_pstart += ac->ac_b_ex.fe_len;
@@ -4499,7 +4449,7 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac)
4499 * doesn't grow big. We need to release 4449 * doesn't grow big. We need to release
4500 * alloc_semp before calling ext4_mb_add_n_trim() 4450 * alloc_semp before calling ext4_mb_add_n_trim()
4501 */ 4451 */
4502 if (pa->pa_linear && likely(pa->pa_free)) { 4452 if ((pa->pa_type == MB_GROUP_PA) && likely(pa->pa_free)) {
4503 spin_lock(pa->pa_obj_lock); 4453 spin_lock(pa->pa_obj_lock);
4504 list_del_rcu(&pa->pa_inode_list); 4454 list_del_rcu(&pa->pa_inode_list);
4505 spin_unlock(pa->pa_obj_lock); 4455 spin_unlock(pa->pa_obj_lock);
@@ -4936,9 +4886,7 @@ do_more:
4936 4886
4937 if (sbi->s_log_groups_per_flex) { 4887 if (sbi->s_log_groups_per_flex) {
4938 ext4_group_t flex_group = ext4_flex_group(sbi, block_group); 4888 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
4939 spin_lock(sb_bgl_lock(sbi, flex_group)); 4889 atomic_add(count, &sbi->s_flex_groups[flex_group].free_blocks);
4940 sbi->s_flex_groups[flex_group].free_blocks += count;
4941 spin_unlock(sb_bgl_lock(sbi, flex_group));
4942 } 4890 }
4943 4891
4944 ext4_mb_release_desc(&e4b); 4892 ext4_mb_release_desc(&e4b);
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 10a2921baf14..dd9e6cd5f6cf 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -132,12 +132,15 @@ struct ext4_prealloc_space {
132 ext4_lblk_t pa_lstart; /* log. block */ 132 ext4_lblk_t pa_lstart; /* log. block */
133 unsigned short pa_len; /* len of preallocated chunk */ 133 unsigned short pa_len; /* len of preallocated chunk */
134 unsigned short pa_free; /* how many blocks are free */ 134 unsigned short pa_free; /* how many blocks are free */
135 unsigned short pa_linear; /* consumed in one direction 135 unsigned short pa_type; /* pa type. inode or group */
136 * strictly, for grp prealloc */
137 spinlock_t *pa_obj_lock; 136 spinlock_t *pa_obj_lock;
138 struct inode *pa_inode; /* hack, for history only */ 137 struct inode *pa_inode; /* hack, for history only */
139}; 138};
140 139
140enum {
141 MB_INODE_PA = 0,
142 MB_GROUP_PA = 1
143};
141 144
142struct ext4_free_extent { 145struct ext4_free_extent {
143 ext4_lblk_t fe_logical; 146 ext4_lblk_t fe_logical;
@@ -247,7 +250,6 @@ static inline void ext4_mb_store_history(struct ext4_allocation_context *ac)
247 250
248#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) 251#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
249 252
250struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t);
251static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb, 253static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
252 struct ext4_free_extent *fex) 254 struct ext4_free_extent *fex)
253{ 255{
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 83410244d3ee..22098e1cd085 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -161,12 +161,12 @@ static struct dx_frame *dx_probe(const struct qstr *d_name,
161 struct dx_frame *frame, 161 struct dx_frame *frame,
162 int *err); 162 int *err);
163static void dx_release(struct dx_frame *frames); 163static void dx_release(struct dx_frame *frames);
164static int dx_make_map(struct ext4_dir_entry_2 *de, int size, 164static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize,
165 struct dx_hash_info *hinfo, struct dx_map_entry map[]); 165 struct dx_hash_info *hinfo, struct dx_map_entry map[]);
166static void dx_sort_map(struct dx_map_entry *map, unsigned count); 166static void dx_sort_map(struct dx_map_entry *map, unsigned count);
167static struct ext4_dir_entry_2 *dx_move_dirents(char *from, char *to, 167static struct ext4_dir_entry_2 *dx_move_dirents(char *from, char *to,
168 struct dx_map_entry *offsets, int count); 168 struct dx_map_entry *offsets, int count, unsigned blocksize);
169static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size); 169static struct ext4_dir_entry_2* dx_pack_dirents(char *base, unsigned blocksize);
170static void dx_insert_block(struct dx_frame *frame, 170static void dx_insert_block(struct dx_frame *frame,
171 u32 hash, ext4_lblk_t block); 171 u32 hash, ext4_lblk_t block);
172static int ext4_htree_next_block(struct inode *dir, __u32 hash, 172static int ext4_htree_next_block(struct inode *dir, __u32 hash,
@@ -180,14 +180,38 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
180static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, 180static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
181 struct inode *inode); 181 struct inode *inode);
182 182
183unsigned int ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize)
184{
185 unsigned len = le16_to_cpu(dlen);
186
187 if (len == EXT4_MAX_REC_LEN || len == 0)
188 return blocksize;
189 return (len & 65532) | ((len & 3) << 16);
190}
191
192__le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize)
193{
194 if ((len > blocksize) || (blocksize > (1 << 18)) || (len & 3))
195 BUG();
196 if (len < 65536)
197 return cpu_to_le16(len);
198 if (len == blocksize) {
199 if (blocksize == 65536)
200 return cpu_to_le16(EXT4_MAX_REC_LEN);
201 else
202 return cpu_to_le16(0);
203 }
204 return cpu_to_le16((len & 65532) | ((len >> 16) & 3));
205}
206
183/* 207/*
184 * p is at least 6 bytes before the end of page 208 * p is at least 6 bytes before the end of page
185 */ 209 */
186static inline struct ext4_dir_entry_2 * 210static inline struct ext4_dir_entry_2 *
187ext4_next_entry(struct ext4_dir_entry_2 *p) 211ext4_next_entry(struct ext4_dir_entry_2 *p, unsigned long blocksize)
188{ 212{
189 return (struct ext4_dir_entry_2 *)((char *)p + 213 return (struct ext4_dir_entry_2 *)((char *)p +
190 ext4_rec_len_from_disk(p->rec_len)); 214 ext4_rec_len_from_disk(p->rec_len, blocksize));
191} 215}
192 216
193/* 217/*
@@ -294,7 +318,7 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext4_dir_ent
294 space += EXT4_DIR_REC_LEN(de->name_len); 318 space += EXT4_DIR_REC_LEN(de->name_len);
295 names++; 319 names++;
296 } 320 }
297 de = ext4_next_entry(de); 321 de = ext4_next_entry(de, size);
298 } 322 }
299 printk("(%i)\n", names); 323 printk("(%i)\n", names);
300 return (struct stats) { names, space, 1 }; 324 return (struct stats) { names, space, 1 };
@@ -585,7 +609,7 @@ static int htree_dirblock_to_tree(struct file *dir_file,
585 top = (struct ext4_dir_entry_2 *) ((char *) de + 609 top = (struct ext4_dir_entry_2 *) ((char *) de +
586 dir->i_sb->s_blocksize - 610 dir->i_sb->s_blocksize -
587 EXT4_DIR_REC_LEN(0)); 611 EXT4_DIR_REC_LEN(0));
588 for (; de < top; de = ext4_next_entry(de)) { 612 for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) {
589 if (!ext4_check_dir_entry("htree_dirblock_to_tree", dir, de, bh, 613 if (!ext4_check_dir_entry("htree_dirblock_to_tree", dir, de, bh,
590 (block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb)) 614 (block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb))
591 +((char *)de - bh->b_data))) { 615 +((char *)de - bh->b_data))) {
@@ -663,7 +687,7 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
663 } 687 }
664 if (start_hash < 2 || (start_hash ==2 && start_minor_hash==0)) { 688 if (start_hash < 2 || (start_hash ==2 && start_minor_hash==0)) {
665 de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data; 689 de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data;
666 de = ext4_next_entry(de); 690 de = ext4_next_entry(de, dir->i_sb->s_blocksize);
667 if ((err = ext4_htree_store_dirent(dir_file, 2, 0, de)) != 0) 691 if ((err = ext4_htree_store_dirent(dir_file, 2, 0, de)) != 0)
668 goto errout; 692 goto errout;
669 count++; 693 count++;
@@ -713,15 +737,15 @@ errout:
713 * Create map of hash values, offsets, and sizes, stored at end of block. 737 * Create map of hash values, offsets, and sizes, stored at end of block.
714 * Returns number of entries mapped. 738 * Returns number of entries mapped.
715 */ 739 */
716static int dx_make_map (struct ext4_dir_entry_2 *de, int size, 740static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize,
717 struct dx_hash_info *hinfo, struct dx_map_entry *map_tail) 741 struct dx_hash_info *hinfo,
742 struct dx_map_entry *map_tail)
718{ 743{
719 int count = 0; 744 int count = 0;
720 char *base = (char *) de; 745 char *base = (char *) de;
721 struct dx_hash_info h = *hinfo; 746 struct dx_hash_info h = *hinfo;
722 747
723 while ((char *) de < base + size) 748 while ((char *) de < base + blocksize) {
724 {
725 if (de->name_len && de->inode) { 749 if (de->name_len && de->inode) {
726 ext4fs_dirhash(de->name, de->name_len, &h); 750 ext4fs_dirhash(de->name, de->name_len, &h);
727 map_tail--; 751 map_tail--;
@@ -732,7 +756,7 @@ static int dx_make_map (struct ext4_dir_entry_2 *de, int size,
732 cond_resched(); 756 cond_resched();
733 } 757 }
734 /* XXX: do we need to check rec_len == 0 case? -Chris */ 758 /* XXX: do we need to check rec_len == 0 case? -Chris */
735 de = ext4_next_entry(de); 759 de = ext4_next_entry(de, blocksize);
736 } 760 }
737 return count; 761 return count;
738} 762}
@@ -832,7 +856,8 @@ static inline int search_dirblock(struct buffer_head *bh,
832 return 1; 856 return 1;
833 } 857 }
834 /* prevent looping on a bad block */ 858 /* prevent looping on a bad block */
835 de_len = ext4_rec_len_from_disk(de->rec_len); 859 de_len = ext4_rec_len_from_disk(de->rec_len,
860 dir->i_sb->s_blocksize);
836 if (de_len <= 0) 861 if (de_len <= 0)
837 return -1; 862 return -1;
838 offset += de_len; 863 offset += de_len;
@@ -996,7 +1021,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q
996 de = (struct ext4_dir_entry_2 *) bh->b_data; 1021 de = (struct ext4_dir_entry_2 *) bh->b_data;
997 top = (struct ext4_dir_entry_2 *) ((char *) de + sb->s_blocksize - 1022 top = (struct ext4_dir_entry_2 *) ((char *) de + sb->s_blocksize -
998 EXT4_DIR_REC_LEN(0)); 1023 EXT4_DIR_REC_LEN(0));
999 for (; de < top; de = ext4_next_entry(de)) { 1024 for (; de < top; de = ext4_next_entry(de, sb->s_blocksize)) {
1000 int off = (block << EXT4_BLOCK_SIZE_BITS(sb)) 1025 int off = (block << EXT4_BLOCK_SIZE_BITS(sb))
1001 + ((char *) de - bh->b_data); 1026 + ((char *) de - bh->b_data);
1002 1027
@@ -1052,8 +1077,16 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru
1052 return ERR_PTR(-EIO); 1077 return ERR_PTR(-EIO);
1053 } 1078 }
1054 inode = ext4_iget(dir->i_sb, ino); 1079 inode = ext4_iget(dir->i_sb, ino);
1055 if (IS_ERR(inode)) 1080 if (unlikely(IS_ERR(inode))) {
1056 return ERR_CAST(inode); 1081 if (PTR_ERR(inode) == -ESTALE) {
1082 ext4_error(dir->i_sb, __func__,
1083 "deleted inode referenced: %u",
1084 ino);
1085 return ERR_PTR(-EIO);
1086 } else {
1087 return ERR_CAST(inode);
1088 }
1089 }
1057 } 1090 }
1058 return d_splice_alias(inode, dentry); 1091 return d_splice_alias(inode, dentry);
1059} 1092}
@@ -1109,7 +1142,8 @@ static inline void ext4_set_de_type(struct super_block *sb,
1109 * Returns pointer to last entry moved. 1142 * Returns pointer to last entry moved.
1110 */ 1143 */
1111static struct ext4_dir_entry_2 * 1144static struct ext4_dir_entry_2 *
1112dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count) 1145dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count,
1146 unsigned blocksize)
1113{ 1147{
1114 unsigned rec_len = 0; 1148 unsigned rec_len = 0;
1115 1149
@@ -1118,7 +1152,7 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
1118 rec_len = EXT4_DIR_REC_LEN(de->name_len); 1152 rec_len = EXT4_DIR_REC_LEN(de->name_len);
1119 memcpy (to, de, rec_len); 1153 memcpy (to, de, rec_len);
1120 ((struct ext4_dir_entry_2 *) to)->rec_len = 1154 ((struct ext4_dir_entry_2 *) to)->rec_len =
1121 ext4_rec_len_to_disk(rec_len); 1155 ext4_rec_len_to_disk(rec_len, blocksize);
1122 de->inode = 0; 1156 de->inode = 0;
1123 map++; 1157 map++;
1124 to += rec_len; 1158 to += rec_len;
@@ -1130,19 +1164,19 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
1130 * Compact each dir entry in the range to the minimal rec_len. 1164 * Compact each dir entry in the range to the minimal rec_len.
1131 * Returns pointer to last entry in range. 1165 * Returns pointer to last entry in range.
1132 */ 1166 */
1133static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size) 1167static struct ext4_dir_entry_2* dx_pack_dirents(char *base, unsigned blocksize)
1134{ 1168{
1135 struct ext4_dir_entry_2 *next, *to, *prev, *de = (struct ext4_dir_entry_2 *) base; 1169 struct ext4_dir_entry_2 *next, *to, *prev, *de = (struct ext4_dir_entry_2 *) base;
1136 unsigned rec_len = 0; 1170 unsigned rec_len = 0;
1137 1171
1138 prev = to = de; 1172 prev = to = de;
1139 while ((char*)de < base + size) { 1173 while ((char*)de < base + blocksize) {
1140 next = ext4_next_entry(de); 1174 next = ext4_next_entry(de, blocksize);
1141 if (de->inode && de->name_len) { 1175 if (de->inode && de->name_len) {
1142 rec_len = EXT4_DIR_REC_LEN(de->name_len); 1176 rec_len = EXT4_DIR_REC_LEN(de->name_len);
1143 if (de > to) 1177 if (de > to)
1144 memmove(to, de, rec_len); 1178 memmove(to, de, rec_len);
1145 to->rec_len = ext4_rec_len_to_disk(rec_len); 1179 to->rec_len = ext4_rec_len_to_disk(rec_len, blocksize);
1146 prev = to; 1180 prev = to;
1147 to = (struct ext4_dir_entry_2 *) (((char *) to) + rec_len); 1181 to = (struct ext4_dir_entry_2 *) (((char *) to) + rec_len);
1148 } 1182 }
@@ -1215,10 +1249,12 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1215 hash2, split, count-split)); 1249 hash2, split, count-split));
1216 1250
1217 /* Fancy dance to stay within two buffers */ 1251 /* Fancy dance to stay within two buffers */
1218 de2 = dx_move_dirents(data1, data2, map + split, count - split); 1252 de2 = dx_move_dirents(data1, data2, map + split, count - split, blocksize);
1219 de = dx_pack_dirents(data1, blocksize); 1253 de = dx_pack_dirents(data1, blocksize);
1220 de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de); 1254 de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de,
1221 de2->rec_len = ext4_rec_len_to_disk(data2 + blocksize - (char *) de2); 1255 blocksize);
1256 de2->rec_len = ext4_rec_len_to_disk(data2 + blocksize - (char *) de2,
1257 blocksize);
1222 dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1)); 1258 dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1));
1223 dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data2, blocksize, 1)); 1259 dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data2, blocksize, 1));
1224 1260
@@ -1268,6 +1304,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
1268 const char *name = dentry->d_name.name; 1304 const char *name = dentry->d_name.name;
1269 int namelen = dentry->d_name.len; 1305 int namelen = dentry->d_name.len;
1270 unsigned int offset = 0; 1306 unsigned int offset = 0;
1307 unsigned int blocksize = dir->i_sb->s_blocksize;
1271 unsigned short reclen; 1308 unsigned short reclen;
1272 int nlen, rlen, err; 1309 int nlen, rlen, err;
1273 char *top; 1310 char *top;
@@ -1275,7 +1312,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
1275 reclen = EXT4_DIR_REC_LEN(namelen); 1312 reclen = EXT4_DIR_REC_LEN(namelen);
1276 if (!de) { 1313 if (!de) {
1277 de = (struct ext4_dir_entry_2 *)bh->b_data; 1314 de = (struct ext4_dir_entry_2 *)bh->b_data;
1278 top = bh->b_data + dir->i_sb->s_blocksize - reclen; 1315 top = bh->b_data + blocksize - reclen;
1279 while ((char *) de <= top) { 1316 while ((char *) de <= top) {
1280 if (!ext4_check_dir_entry("ext4_add_entry", dir, de, 1317 if (!ext4_check_dir_entry("ext4_add_entry", dir, de,
1281 bh, offset)) { 1318 bh, offset)) {
@@ -1287,7 +1324,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
1287 return -EEXIST; 1324 return -EEXIST;
1288 } 1325 }
1289 nlen = EXT4_DIR_REC_LEN(de->name_len); 1326 nlen = EXT4_DIR_REC_LEN(de->name_len);
1290 rlen = ext4_rec_len_from_disk(de->rec_len); 1327 rlen = ext4_rec_len_from_disk(de->rec_len, blocksize);
1291 if ((de->inode? rlen - nlen: rlen) >= reclen) 1328 if ((de->inode? rlen - nlen: rlen) >= reclen)
1292 break; 1329 break;
1293 de = (struct ext4_dir_entry_2 *)((char *)de + rlen); 1330 de = (struct ext4_dir_entry_2 *)((char *)de + rlen);
@@ -1306,11 +1343,11 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
1306 1343
1307 /* By now the buffer is marked for journaling */ 1344 /* By now the buffer is marked for journaling */
1308 nlen = EXT4_DIR_REC_LEN(de->name_len); 1345 nlen = EXT4_DIR_REC_LEN(de->name_len);
1309 rlen = ext4_rec_len_from_disk(de->rec_len); 1346 rlen = ext4_rec_len_from_disk(de->rec_len, blocksize);
1310 if (de->inode) { 1347 if (de->inode) {
1311 struct ext4_dir_entry_2 *de1 = (struct ext4_dir_entry_2 *)((char *)de + nlen); 1348 struct ext4_dir_entry_2 *de1 = (struct ext4_dir_entry_2 *)((char *)de + nlen);
1312 de1->rec_len = ext4_rec_len_to_disk(rlen - nlen); 1349 de1->rec_len = ext4_rec_len_to_disk(rlen - nlen, blocksize);
1313 de->rec_len = ext4_rec_len_to_disk(nlen); 1350 de->rec_len = ext4_rec_len_to_disk(nlen, blocksize);
1314 de = de1; 1351 de = de1;
1315 } 1352 }
1316 de->file_type = EXT4_FT_UNKNOWN; 1353 de->file_type = EXT4_FT_UNKNOWN;
@@ -1380,7 +1417,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
1380 /* The 0th block becomes the root, move the dirents out */ 1417 /* The 0th block becomes the root, move the dirents out */
1381 fde = &root->dotdot; 1418 fde = &root->dotdot;
1382 de = (struct ext4_dir_entry_2 *)((char *)fde + 1419 de = (struct ext4_dir_entry_2 *)((char *)fde +
1383 ext4_rec_len_from_disk(fde->rec_len)); 1420 ext4_rec_len_from_disk(fde->rec_len, blocksize));
1384 if ((char *) de >= (((char *) root) + blocksize)) { 1421 if ((char *) de >= (((char *) root) + blocksize)) {
1385 ext4_error(dir->i_sb, __func__, 1422 ext4_error(dir->i_sb, __func__,
1386 "invalid rec_len for '..' in inode %lu", 1423 "invalid rec_len for '..' in inode %lu",
@@ -1402,12 +1439,14 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
1402 memcpy (data1, de, len); 1439 memcpy (data1, de, len);
1403 de = (struct ext4_dir_entry_2 *) data1; 1440 de = (struct ext4_dir_entry_2 *) data1;
1404 top = data1 + len; 1441 top = data1 + len;
1405 while ((char *)(de2 = ext4_next_entry(de)) < top) 1442 while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top)
1406 de = de2; 1443 de = de2;
1407 de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de); 1444 de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de,
1445 blocksize);
1408 /* Initialize the root; the dot dirents already exist */ 1446 /* Initialize the root; the dot dirents already exist */
1409 de = (struct ext4_dir_entry_2 *) (&root->dotdot); 1447 de = (struct ext4_dir_entry_2 *) (&root->dotdot);
1410 de->rec_len = ext4_rec_len_to_disk(blocksize - EXT4_DIR_REC_LEN(2)); 1448 de->rec_len = ext4_rec_len_to_disk(blocksize - EXT4_DIR_REC_LEN(2),
1449 blocksize);
1411 memset (&root->info, 0, sizeof(root->info)); 1450 memset (&root->info, 0, sizeof(root->info));
1412 root->info.info_length = sizeof(root->info); 1451 root->info.info_length = sizeof(root->info);
1413 root->info.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; 1452 root->info.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
@@ -1488,7 +1527,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
1488 return retval; 1527 return retval;
1489 de = (struct ext4_dir_entry_2 *) bh->b_data; 1528 de = (struct ext4_dir_entry_2 *) bh->b_data;
1490 de->inode = 0; 1529 de->inode = 0;
1491 de->rec_len = ext4_rec_len_to_disk(blocksize); 1530 de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize);
1492 return add_dirent_to_buf(handle, dentry, inode, de, bh); 1531 return add_dirent_to_buf(handle, dentry, inode, de, bh);
1493} 1532}
1494 1533
@@ -1551,7 +1590,8 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
1551 goto cleanup; 1590 goto cleanup;
1552 node2 = (struct dx_node *)(bh2->b_data); 1591 node2 = (struct dx_node *)(bh2->b_data);
1553 entries2 = node2->entries; 1592 entries2 = node2->entries;
1554 node2->fake.rec_len = ext4_rec_len_to_disk(sb->s_blocksize); 1593 node2->fake.rec_len = ext4_rec_len_to_disk(sb->s_blocksize,
1594 sb->s_blocksize);
1555 node2->fake.inode = 0; 1595 node2->fake.inode = 0;
1556 BUFFER_TRACE(frame->bh, "get_write_access"); 1596 BUFFER_TRACE(frame->bh, "get_write_access");
1557 err = ext4_journal_get_write_access(handle, frame->bh); 1597 err = ext4_journal_get_write_access(handle, frame->bh);
@@ -1639,6 +1679,7 @@ static int ext4_delete_entry(handle_t *handle,
1639 struct buffer_head *bh) 1679 struct buffer_head *bh)
1640{ 1680{
1641 struct ext4_dir_entry_2 *de, *pde; 1681 struct ext4_dir_entry_2 *de, *pde;
1682 unsigned int blocksize = dir->i_sb->s_blocksize;
1642 int i; 1683 int i;
1643 1684
1644 i = 0; 1685 i = 0;
@@ -1652,8 +1693,11 @@ static int ext4_delete_entry(handle_t *handle,
1652 ext4_journal_get_write_access(handle, bh); 1693 ext4_journal_get_write_access(handle, bh);
1653 if (pde) 1694 if (pde)
1654 pde->rec_len = ext4_rec_len_to_disk( 1695 pde->rec_len = ext4_rec_len_to_disk(
1655 ext4_rec_len_from_disk(pde->rec_len) + 1696 ext4_rec_len_from_disk(pde->rec_len,
1656 ext4_rec_len_from_disk(de->rec_len)); 1697 blocksize) +
1698 ext4_rec_len_from_disk(de->rec_len,
1699 blocksize),
1700 blocksize);
1657 else 1701 else
1658 de->inode = 0; 1702 de->inode = 0;
1659 dir->i_version++; 1703 dir->i_version++;
@@ -1661,9 +1705,9 @@ static int ext4_delete_entry(handle_t *handle,
1661 ext4_handle_dirty_metadata(handle, dir, bh); 1705 ext4_handle_dirty_metadata(handle, dir, bh);
1662 return 0; 1706 return 0;
1663 } 1707 }
1664 i += ext4_rec_len_from_disk(de->rec_len); 1708 i += ext4_rec_len_from_disk(de->rec_len, blocksize);
1665 pde = de; 1709 pde = de;
1666 de = ext4_next_entry(de); 1710 de = ext4_next_entry(de, blocksize);
1667 } 1711 }
1668 return -ENOENT; 1712 return -ENOENT;
1669} 1713}
@@ -1793,6 +1837,7 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1793 struct inode *inode; 1837 struct inode *inode;
1794 struct buffer_head *dir_block; 1838 struct buffer_head *dir_block;
1795 struct ext4_dir_entry_2 *de; 1839 struct ext4_dir_entry_2 *de;
1840 unsigned int blocksize = dir->i_sb->s_blocksize;
1796 int err, retries = 0; 1841 int err, retries = 0;
1797 1842
1798 if (EXT4_DIR_LINK_MAX(dir)) 1843 if (EXT4_DIR_LINK_MAX(dir))
@@ -1824,13 +1869,14 @@ retry:
1824 de = (struct ext4_dir_entry_2 *) dir_block->b_data; 1869 de = (struct ext4_dir_entry_2 *) dir_block->b_data;
1825 de->inode = cpu_to_le32(inode->i_ino); 1870 de->inode = cpu_to_le32(inode->i_ino);
1826 de->name_len = 1; 1871 de->name_len = 1;
1827 de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len)); 1872 de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len),
1873 blocksize);
1828 strcpy(de->name, "."); 1874 strcpy(de->name, ".");
1829 ext4_set_de_type(dir->i_sb, de, S_IFDIR); 1875 ext4_set_de_type(dir->i_sb, de, S_IFDIR);
1830 de = ext4_next_entry(de); 1876 de = ext4_next_entry(de, blocksize);
1831 de->inode = cpu_to_le32(dir->i_ino); 1877 de->inode = cpu_to_le32(dir->i_ino);
1832 de->rec_len = ext4_rec_len_to_disk(inode->i_sb->s_blocksize - 1878 de->rec_len = ext4_rec_len_to_disk(blocksize - EXT4_DIR_REC_LEN(1),
1833 EXT4_DIR_REC_LEN(1)); 1879 blocksize);
1834 de->name_len = 2; 1880 de->name_len = 2;
1835 strcpy(de->name, ".."); 1881 strcpy(de->name, "..");
1836 ext4_set_de_type(dir->i_sb, de, S_IFDIR); 1882 ext4_set_de_type(dir->i_sb, de, S_IFDIR);
@@ -1885,7 +1931,7 @@ static int empty_dir(struct inode *inode)
1885 return 1; 1931 return 1;
1886 } 1932 }
1887 de = (struct ext4_dir_entry_2 *) bh->b_data; 1933 de = (struct ext4_dir_entry_2 *) bh->b_data;
1888 de1 = ext4_next_entry(de); 1934 de1 = ext4_next_entry(de, sb->s_blocksize);
1889 if (le32_to_cpu(de->inode) != inode->i_ino || 1935 if (le32_to_cpu(de->inode) != inode->i_ino ||
1890 !le32_to_cpu(de1->inode) || 1936 !le32_to_cpu(de1->inode) ||
1891 strcmp(".", de->name) || 1937 strcmp(".", de->name) ||
@@ -1896,9 +1942,9 @@ static int empty_dir(struct inode *inode)
1896 brelse(bh); 1942 brelse(bh);
1897 return 1; 1943 return 1;
1898 } 1944 }
1899 offset = ext4_rec_len_from_disk(de->rec_len) + 1945 offset = ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize) +
1900 ext4_rec_len_from_disk(de1->rec_len); 1946 ext4_rec_len_from_disk(de1->rec_len, sb->s_blocksize);
1901 de = ext4_next_entry(de1); 1947 de = ext4_next_entry(de1, sb->s_blocksize);
1902 while (offset < inode->i_size) { 1948 while (offset < inode->i_size) {
1903 if (!bh || 1949 if (!bh ||
1904 (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) { 1950 (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
@@ -1927,8 +1973,8 @@ static int empty_dir(struct inode *inode)
1927 brelse(bh); 1973 brelse(bh);
1928 return 0; 1974 return 0;
1929 } 1975 }
1930 offset += ext4_rec_len_from_disk(de->rec_len); 1976 offset += ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize);
1931 de = ext4_next_entry(de); 1977 de = ext4_next_entry(de, sb->s_blocksize);
1932 } 1978 }
1933 brelse(bh); 1979 brelse(bh);
1934 return 1; 1980 return 1;
@@ -2297,8 +2343,8 @@ retry:
2297 return err; 2343 return err;
2298} 2344}
2299 2345
2300#define PARENT_INO(buffer) \ 2346#define PARENT_INO(buffer, size) \
2301 (ext4_next_entry((struct ext4_dir_entry_2 *)(buffer))->inode) 2347 (ext4_next_entry((struct ext4_dir_entry_2 *)(buffer), size)->inode)
2302 2348
2303/* 2349/*
2304 * Anybody can rename anything with this: the permission checks are left to the 2350 * Anybody can rename anything with this: the permission checks are left to the
@@ -2311,7 +2357,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
2311 struct inode *old_inode, *new_inode; 2357 struct inode *old_inode, *new_inode;
2312 struct buffer_head *old_bh, *new_bh, *dir_bh; 2358 struct buffer_head *old_bh, *new_bh, *dir_bh;
2313 struct ext4_dir_entry_2 *old_de, *new_de; 2359 struct ext4_dir_entry_2 *old_de, *new_de;
2314 int retval; 2360 int retval, force_da_alloc = 0;
2315 2361
2316 old_bh = new_bh = dir_bh = NULL; 2362 old_bh = new_bh = dir_bh = NULL;
2317 2363
@@ -2358,7 +2404,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
2358 dir_bh = ext4_bread(handle, old_inode, 0, 0, &retval); 2404 dir_bh = ext4_bread(handle, old_inode, 0, 0, &retval);
2359 if (!dir_bh) 2405 if (!dir_bh)
2360 goto end_rename; 2406 goto end_rename;
2361 if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino) 2407 if (le32_to_cpu(PARENT_INO(dir_bh->b_data,
2408 old_dir->i_sb->s_blocksize)) != old_dir->i_ino)
2362 goto end_rename; 2409 goto end_rename;
2363 retval = -EMLINK; 2410 retval = -EMLINK;
2364 if (!new_inode && new_dir != old_dir && 2411 if (!new_inode && new_dir != old_dir &&
@@ -2430,7 +2477,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
2430 if (dir_bh) { 2477 if (dir_bh) {
2431 BUFFER_TRACE(dir_bh, "get_write_access"); 2478 BUFFER_TRACE(dir_bh, "get_write_access");
2432 ext4_journal_get_write_access(handle, dir_bh); 2479 ext4_journal_get_write_access(handle, dir_bh);
2433 PARENT_INO(dir_bh->b_data) = cpu_to_le32(new_dir->i_ino); 2480 PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) =
2481 cpu_to_le32(new_dir->i_ino);
2434 BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata"); 2482 BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata");
2435 ext4_handle_dirty_metadata(handle, old_dir, dir_bh); 2483 ext4_handle_dirty_metadata(handle, old_dir, dir_bh);
2436 ext4_dec_count(handle, old_dir); 2484 ext4_dec_count(handle, old_dir);
@@ -2449,6 +2497,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
2449 ext4_mark_inode_dirty(handle, new_inode); 2497 ext4_mark_inode_dirty(handle, new_inode);
2450 if (!new_inode->i_nlink) 2498 if (!new_inode->i_nlink)
2451 ext4_orphan_add(handle, new_inode); 2499 ext4_orphan_add(handle, new_inode);
2500 if (!test_opt(new_dir->i_sb, NO_AUTO_DA_ALLOC))
2501 force_da_alloc = 1;
2452 } 2502 }
2453 retval = 0; 2503 retval = 0;
2454 2504
@@ -2457,6 +2507,8 @@ end_rename:
2457 brelse(old_bh); 2507 brelse(old_bh);
2458 brelse(new_bh); 2508 brelse(new_bh);
2459 ext4_journal_stop(handle); 2509 ext4_journal_stop(handle);
2510 if (retval == 0 && force_da_alloc)
2511 ext4_alloc_da_blocks(old_inode);
2460 return retval; 2512 return retval;
2461} 2513}
2462 2514
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index c06886abd658..546c7dd869e1 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -938,10 +938,10 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
938 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) { 938 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
939 ext4_group_t flex_group; 939 ext4_group_t flex_group;
940 flex_group = ext4_flex_group(sbi, input->group); 940 flex_group = ext4_flex_group(sbi, input->group);
941 sbi->s_flex_groups[flex_group].free_blocks += 941 atomic_add(input->free_blocks_count,
942 input->free_blocks_count; 942 &sbi->s_flex_groups[flex_group].free_blocks);
943 sbi->s_flex_groups[flex_group].free_inodes += 943 atomic_add(EXT4_INODES_PER_GROUP(sb),
944 EXT4_INODES_PER_GROUP(sb); 944 &sbi->s_flex_groups[flex_group].free_inodes);
945 } 945 }
946 946
947 ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh); 947 ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index f7371a6a923d..9987bba99db3 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -35,6 +35,7 @@
35#include <linux/quotaops.h> 35#include <linux/quotaops.h>
36#include <linux/seq_file.h> 36#include <linux/seq_file.h>
37#include <linux/proc_fs.h> 37#include <linux/proc_fs.h>
38#include <linux/ctype.h>
38#include <linux/marker.h> 39#include <linux/marker.h>
39#include <linux/log2.h> 40#include <linux/log2.h>
40#include <linux/crc16.h> 41#include <linux/crc16.h>
@@ -48,6 +49,7 @@
48#include "group.h" 49#include "group.h"
49 50
50struct proc_dir_entry *ext4_proc_root; 51struct proc_dir_entry *ext4_proc_root;
52static struct kset *ext4_kset;
51 53
52static int ext4_load_journal(struct super_block *, struct ext4_super_block *, 54static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
53 unsigned long journal_devnum); 55 unsigned long journal_devnum);
@@ -577,9 +579,9 @@ static void ext4_put_super(struct super_block *sb)
577 ext4_commit_super(sb, es, 1); 579 ext4_commit_super(sb, es, 1);
578 } 580 }
579 if (sbi->s_proc) { 581 if (sbi->s_proc) {
580 remove_proc_entry("inode_readahead_blks", sbi->s_proc);
581 remove_proc_entry(sb->s_id, ext4_proc_root); 582 remove_proc_entry(sb->s_id, ext4_proc_root);
582 } 583 }
584 kobject_del(&sbi->s_kobj);
583 585
584 for (i = 0; i < sbi->s_gdb_count; i++) 586 for (i = 0; i < sbi->s_gdb_count; i++)
585 brelse(sbi->s_group_desc[i]); 587 brelse(sbi->s_group_desc[i]);
@@ -615,6 +617,17 @@ static void ext4_put_super(struct super_block *sb)
615 ext4_blkdev_remove(sbi); 617 ext4_blkdev_remove(sbi);
616 } 618 }
617 sb->s_fs_info = NULL; 619 sb->s_fs_info = NULL;
620 /*
621 * Now that we are completely done shutting down the
622 * superblock, we need to actually destroy the kobject.
623 */
624 unlock_kernel();
625 unlock_super(sb);
626 kobject_put(&sbi->s_kobj);
627 wait_for_completion(&sbi->s_kobj_unregister);
628 lock_super(sb);
629 lock_kernel();
630 kfree(sbi->s_blockgroup_lock);
618 kfree(sbi); 631 kfree(sbi);
619 return; 632 return;
620} 633}
@@ -803,8 +816,6 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
803 if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL)) 816 if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL))
804 seq_puts(seq, ",noacl"); 817 seq_puts(seq, ",noacl");
805#endif 818#endif
806 if (!test_opt(sb, RESERVATION))
807 seq_puts(seq, ",noreservation");
808 if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) { 819 if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) {
809 seq_printf(seq, ",commit=%u", 820 seq_printf(seq, ",commit=%u",
810 (unsigned) (sbi->s_commit_interval / HZ)); 821 (unsigned) (sbi->s_commit_interval / HZ));
@@ -855,6 +866,9 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
855 if (test_opt(sb, DATA_ERR_ABORT)) 866 if (test_opt(sb, DATA_ERR_ABORT))
856 seq_puts(seq, ",data_err=abort"); 867 seq_puts(seq, ",data_err=abort");
857 868
869 if (test_opt(sb, NO_AUTO_DA_ALLOC))
870 seq_puts(seq, ",noauto_da_alloc");
871
858 ext4_show_quota_options(seq, sb); 872 ext4_show_quota_options(seq, sb);
859 return 0; 873 return 0;
860} 874}
@@ -1004,7 +1018,7 @@ enum {
1004 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, 1018 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
1005 Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov, 1019 Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov,
1006 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, 1020 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
1007 Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh, 1021 Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload, Opt_nobh, Opt_bh,
1008 Opt_commit, Opt_min_batch_time, Opt_max_batch_time, 1022 Opt_commit, Opt_min_batch_time, Opt_max_batch_time,
1009 Opt_journal_update, Opt_journal_dev, 1023 Opt_journal_update, Opt_journal_dev,
1010 Opt_journal_checksum, Opt_journal_async_commit, 1024 Opt_journal_checksum, Opt_journal_async_commit,
@@ -1012,8 +1026,8 @@ enum {
1012 Opt_data_err_abort, Opt_data_err_ignore, 1026 Opt_data_err_abort, Opt_data_err_ignore,
1013 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 1027 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
1014 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, 1028 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
1015 Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, 1029 Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, Opt_resize,
1016 Opt_grpquota, Opt_i_version, 1030 Opt_usrquota, Opt_grpquota, Opt_i_version,
1017 Opt_stripe, Opt_delalloc, Opt_nodelalloc, 1031 Opt_stripe, Opt_delalloc, Opt_nodelalloc,
1018 Opt_inode_readahead_blks, Opt_journal_ioprio 1032 Opt_inode_readahead_blks, Opt_journal_ioprio
1019}; 1033};
@@ -1039,8 +1053,6 @@ static const match_table_t tokens = {
1039 {Opt_nouser_xattr, "nouser_xattr"}, 1053 {Opt_nouser_xattr, "nouser_xattr"},
1040 {Opt_acl, "acl"}, 1054 {Opt_acl, "acl"},
1041 {Opt_noacl, "noacl"}, 1055 {Opt_noacl, "noacl"},
1042 {Opt_reservation, "reservation"},
1043 {Opt_noreservation, "noreservation"},
1044 {Opt_noload, "noload"}, 1056 {Opt_noload, "noload"},
1045 {Opt_nobh, "nobh"}, 1057 {Opt_nobh, "nobh"},
1046 {Opt_bh, "bh"}, 1058 {Opt_bh, "bh"},
@@ -1068,6 +1080,8 @@ static const match_table_t tokens = {
1068 {Opt_quota, "quota"}, 1080 {Opt_quota, "quota"},
1069 {Opt_usrquota, "usrquota"}, 1081 {Opt_usrquota, "usrquota"},
1070 {Opt_barrier, "barrier=%u"}, 1082 {Opt_barrier, "barrier=%u"},
1083 {Opt_barrier, "barrier"},
1084 {Opt_nobarrier, "nobarrier"},
1071 {Opt_i_version, "i_version"}, 1085 {Opt_i_version, "i_version"},
1072 {Opt_stripe, "stripe=%u"}, 1086 {Opt_stripe, "stripe=%u"},
1073 {Opt_resize, "resize"}, 1087 {Opt_resize, "resize"},
@@ -1075,6 +1089,9 @@ static const match_table_t tokens = {
1075 {Opt_nodelalloc, "nodelalloc"}, 1089 {Opt_nodelalloc, "nodelalloc"},
1076 {Opt_inode_readahead_blks, "inode_readahead_blks=%u"}, 1090 {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
1077 {Opt_journal_ioprio, "journal_ioprio=%u"}, 1091 {Opt_journal_ioprio, "journal_ioprio=%u"},
1092 {Opt_auto_da_alloc, "auto_da_alloc=%u"},
1093 {Opt_auto_da_alloc, "auto_da_alloc"},
1094 {Opt_noauto_da_alloc, "noauto_da_alloc"},
1078 {Opt_err, NULL}, 1095 {Opt_err, NULL},
1079}; 1096};
1080 1097
@@ -1207,12 +1224,6 @@ static int parse_options(char *options, struct super_block *sb,
1207 "not supported\n"); 1224 "not supported\n");
1208 break; 1225 break;
1209#endif 1226#endif
1210 case Opt_reservation:
1211 set_opt(sbi->s_mount_opt, RESERVATION);
1212 break;
1213 case Opt_noreservation:
1214 clear_opt(sbi->s_mount_opt, RESERVATION);
1215 break;
1216 case Opt_journal_update: 1227 case Opt_journal_update:
1217 /* @@@ FIXME */ 1228 /* @@@ FIXME */
1218 /* Eventually we will want to be able to create 1229 /* Eventually we will want to be able to create
@@ -1415,9 +1426,14 @@ set_qf_format:
1415 case Opt_abort: 1426 case Opt_abort:
1416 set_opt(sbi->s_mount_opt, ABORT); 1427 set_opt(sbi->s_mount_opt, ABORT);
1417 break; 1428 break;
1429 case Opt_nobarrier:
1430 clear_opt(sbi->s_mount_opt, BARRIER);
1431 break;
1418 case Opt_barrier: 1432 case Opt_barrier:
1419 if (match_int(&args[0], &option)) 1433 if (match_int(&args[0], &option)) {
1420 return 0; 1434 set_opt(sbi->s_mount_opt, BARRIER);
1435 break;
1436 }
1421 if (option) 1437 if (option)
1422 set_opt(sbi->s_mount_opt, BARRIER); 1438 set_opt(sbi->s_mount_opt, BARRIER);
1423 else 1439 else
@@ -1463,6 +1479,11 @@ set_qf_format:
1463 return 0; 1479 return 0;
1464 if (option < 0 || option > (1 << 30)) 1480 if (option < 0 || option > (1 << 30))
1465 return 0; 1481 return 0;
1482 if (option & (option - 1)) {
1483 printk(KERN_ERR "EXT4-fs: inode_readahead_blks"
1484 " must be a power of 2\n");
1485 return 0;
1486 }
1466 sbi->s_inode_readahead_blks = option; 1487 sbi->s_inode_readahead_blks = option;
1467 break; 1488 break;
1468 case Opt_journal_ioprio: 1489 case Opt_journal_ioprio:
@@ -1473,6 +1494,19 @@ set_qf_format:
1473 *journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 1494 *journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE,
1474 option); 1495 option);
1475 break; 1496 break;
1497 case Opt_noauto_da_alloc:
1498 set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
1499 break;
1500 case Opt_auto_da_alloc:
1501 if (match_int(&args[0], &option)) {
1502 clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC);
1503 break;
1504 }
1505 if (option)
1506 clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC);
1507 else
1508 set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
1509 break;
1476 default: 1510 default:
1477 printk(KERN_ERR 1511 printk(KERN_ERR
1478 "EXT4-fs: Unrecognized mount option \"%s\" " 1512 "EXT4-fs: Unrecognized mount option \"%s\" "
@@ -1612,10 +1646,12 @@ static int ext4_fill_flex_info(struct super_block *sb)
1612 gdp = ext4_get_group_desc(sb, i, &bh); 1646 gdp = ext4_get_group_desc(sb, i, &bh);
1613 1647
1614 flex_group = ext4_flex_group(sbi, i); 1648 flex_group = ext4_flex_group(sbi, i);
1615 sbi->s_flex_groups[flex_group].free_inodes += 1649 atomic_set(&sbi->s_flex_groups[flex_group].free_inodes,
1616 ext4_free_inodes_count(sb, gdp); 1650 ext4_free_inodes_count(sb, gdp));
1617 sbi->s_flex_groups[flex_group].free_blocks += 1651 atomic_set(&sbi->s_flex_groups[flex_group].free_blocks,
1618 ext4_free_blks_count(sb, gdp); 1652 ext4_free_blks_count(sb, gdp));
1653 atomic_set(&sbi->s_flex_groups[flex_group].used_dirs,
1654 ext4_used_dirs_count(sb, gdp));
1619 } 1655 }
1620 1656
1621 return 1; 1657 return 1;
@@ -1991,6 +2027,181 @@ static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
1991 return 0; 2027 return 0;
1992} 2028}
1993 2029
2030/* sysfs supprt */
2031
2032struct ext4_attr {
2033 struct attribute attr;
2034 ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *);
2035 ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *,
2036 const char *, size_t);
2037 int offset;
2038};
2039
2040static int parse_strtoul(const char *buf,
2041 unsigned long max, unsigned long *value)
2042{
2043 char *endp;
2044
2045 while (*buf && isspace(*buf))
2046 buf++;
2047 *value = simple_strtoul(buf, &endp, 0);
2048 while (*endp && isspace(*endp))
2049 endp++;
2050 if (*endp || *value > max)
2051 return -EINVAL;
2052
2053 return 0;
2054}
2055
2056static ssize_t delayed_allocation_blocks_show(struct ext4_attr *a,
2057 struct ext4_sb_info *sbi,
2058 char *buf)
2059{
2060 return snprintf(buf, PAGE_SIZE, "%llu\n",
2061 (s64) percpu_counter_sum(&sbi->s_dirtyblocks_counter));
2062}
2063
2064static ssize_t session_write_kbytes_show(struct ext4_attr *a,
2065 struct ext4_sb_info *sbi, char *buf)
2066{
2067 struct super_block *sb = sbi->s_buddy_cache->i_sb;
2068
2069 return snprintf(buf, PAGE_SIZE, "%lu\n",
2070 (part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
2071 sbi->s_sectors_written_start) >> 1);
2072}
2073
2074static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a,
2075 struct ext4_sb_info *sbi, char *buf)
2076{
2077 struct super_block *sb = sbi->s_buddy_cache->i_sb;
2078
2079 return snprintf(buf, PAGE_SIZE, "%llu\n",
2080 sbi->s_kbytes_written +
2081 ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
2082 EXT4_SB(sb)->s_sectors_written_start) >> 1));
2083}
2084
2085static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
2086 struct ext4_sb_info *sbi,
2087 const char *buf, size_t count)
2088{
2089 unsigned long t;
2090
2091 if (parse_strtoul(buf, 0x40000000, &t))
2092 return -EINVAL;
2093
2094 /* inode_readahead_blks must be a power of 2 */
2095 if (t & (t-1))
2096 return -EINVAL;
2097
2098 sbi->s_inode_readahead_blks = t;
2099 return count;
2100}
2101
2102static ssize_t sbi_ui_show(struct ext4_attr *a,
2103 struct ext4_sb_info *sbi, char *buf)
2104{
2105 unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset);
2106
2107 return snprintf(buf, PAGE_SIZE, "%u\n", *ui);
2108}
2109
2110static ssize_t sbi_ui_store(struct ext4_attr *a,
2111 struct ext4_sb_info *sbi,
2112 const char *buf, size_t count)
2113{
2114 unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset);
2115 unsigned long t;
2116
2117 if (parse_strtoul(buf, 0xffffffff, &t))
2118 return -EINVAL;
2119 *ui = t;
2120 return count;
2121}
2122
2123#define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \
2124static struct ext4_attr ext4_attr_##_name = { \
2125 .attr = {.name = __stringify(_name), .mode = _mode }, \
2126 .show = _show, \
2127 .store = _store, \
2128 .offset = offsetof(struct ext4_sb_info, _elname), \
2129}
2130#define EXT4_ATTR(name, mode, show, store) \
2131static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store)
2132
2133#define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL)
2134#define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store)
2135#define EXT4_RW_ATTR_SBI_UI(name, elname) \
2136 EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname)
2137#define ATTR_LIST(name) &ext4_attr_##name.attr
2138
2139EXT4_RO_ATTR(delayed_allocation_blocks);
2140EXT4_RO_ATTR(session_write_kbytes);
2141EXT4_RO_ATTR(lifetime_write_kbytes);
2142EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show,
2143 inode_readahead_blks_store, s_inode_readahead_blks);
2144EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats);
2145EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
2146EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
2147EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
2148EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);
2149EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
2150
2151static struct attribute *ext4_attrs[] = {
2152 ATTR_LIST(delayed_allocation_blocks),
2153 ATTR_LIST(session_write_kbytes),
2154 ATTR_LIST(lifetime_write_kbytes),
2155 ATTR_LIST(inode_readahead_blks),
2156 ATTR_LIST(mb_stats),
2157 ATTR_LIST(mb_max_to_scan),
2158 ATTR_LIST(mb_min_to_scan),
2159 ATTR_LIST(mb_order2_req),
2160 ATTR_LIST(mb_stream_req),
2161 ATTR_LIST(mb_group_prealloc),
2162 NULL,
2163};
2164
2165static ssize_t ext4_attr_show(struct kobject *kobj,
2166 struct attribute *attr, char *buf)
2167{
2168 struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
2169 s_kobj);
2170 struct ext4_attr *a = container_of(attr, struct ext4_attr, attr);
2171
2172 return a->show ? a->show(a, sbi, buf) : 0;
2173}
2174
2175static ssize_t ext4_attr_store(struct kobject *kobj,
2176 struct attribute *attr,
2177 const char *buf, size_t len)
2178{
2179 struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
2180 s_kobj);
2181 struct ext4_attr *a = container_of(attr, struct ext4_attr, attr);
2182
2183 return a->store ? a->store(a, sbi, buf, len) : 0;
2184}
2185
2186static void ext4_sb_release(struct kobject *kobj)
2187{
2188 struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
2189 s_kobj);
2190 complete(&sbi->s_kobj_unregister);
2191}
2192
2193
2194static struct sysfs_ops ext4_attr_ops = {
2195 .show = ext4_attr_show,
2196 .store = ext4_attr_store,
2197};
2198
2199static struct kobj_type ext4_ktype = {
2200 .default_attrs = ext4_attrs,
2201 .sysfs_ops = &ext4_attr_ops,
2202 .release = ext4_sb_release,
2203};
2204
1994static int ext4_fill_super(struct super_block *sb, void *data, int silent) 2205static int ext4_fill_super(struct super_block *sb, void *data, int silent)
1995 __releases(kernel_lock) 2206 __releases(kernel_lock)
1996 __acquires(kernel_lock) 2207 __acquires(kernel_lock)
@@ -2021,12 +2232,21 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2021 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 2232 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
2022 if (!sbi) 2233 if (!sbi)
2023 return -ENOMEM; 2234 return -ENOMEM;
2235
2236 sbi->s_blockgroup_lock =
2237 kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
2238 if (!sbi->s_blockgroup_lock) {
2239 kfree(sbi);
2240 return -ENOMEM;
2241 }
2024 sb->s_fs_info = sbi; 2242 sb->s_fs_info = sbi;
2025 sbi->s_mount_opt = 0; 2243 sbi->s_mount_opt = 0;
2026 sbi->s_resuid = EXT4_DEF_RESUID; 2244 sbi->s_resuid = EXT4_DEF_RESUID;
2027 sbi->s_resgid = EXT4_DEF_RESGID; 2245 sbi->s_resgid = EXT4_DEF_RESGID;
2028 sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; 2246 sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
2029 sbi->s_sb_block = sb_block; 2247 sbi->s_sb_block = sb_block;
2248 sbi->s_sectors_written_start = part_stat_read(sb->s_bdev->bd_part,
2249 sectors[1]);
2030 2250
2031 unlock_kernel(); 2251 unlock_kernel();
2032 2252
@@ -2064,6 +2284,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2064 sb->s_magic = le16_to_cpu(es->s_magic); 2284 sb->s_magic = le16_to_cpu(es->s_magic);
2065 if (sb->s_magic != EXT4_SUPER_MAGIC) 2285 if (sb->s_magic != EXT4_SUPER_MAGIC)
2066 goto cantfind_ext4; 2286 goto cantfind_ext4;
2287 sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written);
2067 2288
2068 /* Set defaults before we parse the mount options */ 2289 /* Set defaults before we parse the mount options */
2069 def_mount_opts = le32_to_cpu(es->s_default_mount_opts); 2290 def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
@@ -2101,7 +2322,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2101 sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME; 2322 sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
2102 sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME; 2323 sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
2103 2324
2104 set_opt(sbi->s_mount_opt, RESERVATION);
2105 set_opt(sbi->s_mount_opt, BARRIER); 2325 set_opt(sbi->s_mount_opt, BARRIER);
2106 2326
2107 /* 2327 /*
@@ -2325,14 +2545,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2325#ifdef CONFIG_PROC_FS 2545#ifdef CONFIG_PROC_FS
2326 if (ext4_proc_root) 2546 if (ext4_proc_root)
2327 sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); 2547 sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root);
2328
2329 if (sbi->s_proc)
2330 proc_create_data("inode_readahead_blks", 0644, sbi->s_proc,
2331 &ext4_ui_proc_fops,
2332 &sbi->s_inode_readahead_blks);
2333#endif 2548#endif
2334 2549
2335 bgl_lock_init(&sbi->s_blockgroup_lock); 2550 bgl_lock_init(sbi->s_blockgroup_lock);
2336 2551
2337 for (i = 0; i < db_count; i++) { 2552 for (i = 0; i < db_count; i++) {
2338 block = descriptor_loc(sb, logical_sb_block, i); 2553 block = descriptor_loc(sb, logical_sb_block, i);
@@ -2564,6 +2779,16 @@ no_journal:
2564 goto failed_mount4; 2779 goto failed_mount4;
2565 } 2780 }
2566 2781
2782 sbi->s_kobj.kset = ext4_kset;
2783 init_completion(&sbi->s_kobj_unregister);
2784 err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL,
2785 "%s", sb->s_id);
2786 if (err) {
2787 ext4_mb_release(sb);
2788 ext4_ext_release(sb);
2789 goto failed_mount4;
2790 };
2791
2567 /* 2792 /*
2568 * akpm: core read_super() calls in here with the superblock locked. 2793 * akpm: core read_super() calls in here with the superblock locked.
2569 * That deadlocks, because orphan cleanup needs to lock the superblock 2794 * That deadlocks, because orphan cleanup needs to lock the superblock
@@ -2618,7 +2843,6 @@ failed_mount2:
2618 kfree(sbi->s_group_desc); 2843 kfree(sbi->s_group_desc);
2619failed_mount: 2844failed_mount:
2620 if (sbi->s_proc) { 2845 if (sbi->s_proc) {
2621 remove_proc_entry("inode_readahead_blks", sbi->s_proc);
2622 remove_proc_entry(sb->s_id, ext4_proc_root); 2846 remove_proc_entry(sb->s_id, ext4_proc_root);
2623 } 2847 }
2624#ifdef CONFIG_QUOTA 2848#ifdef CONFIG_QUOTA
@@ -2913,6 +3137,10 @@ static int ext4_commit_super(struct super_block *sb,
2913 set_buffer_uptodate(sbh); 3137 set_buffer_uptodate(sbh);
2914 } 3138 }
2915 es->s_wtime = cpu_to_le32(get_seconds()); 3139 es->s_wtime = cpu_to_le32(get_seconds());
3140 es->s_kbytes_written =
3141 cpu_to_le64(EXT4_SB(sb)->s_kbytes_written +
3142 ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
3143 EXT4_SB(sb)->s_sectors_written_start) >> 1));
2916 ext4_free_blocks_count_set(es, percpu_counter_sum_positive( 3144 ext4_free_blocks_count_set(es, percpu_counter_sum_positive(
2917 &EXT4_SB(sb)->s_freeblocks_counter)); 3145 &EXT4_SB(sb)->s_freeblocks_counter));
2918 es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive( 3146 es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive(
@@ -3647,45 +3875,6 @@ static int ext4_get_sb(struct file_system_type *fs_type,
3647 return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt); 3875 return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt);
3648} 3876}
3649 3877
3650#ifdef CONFIG_PROC_FS
3651static int ext4_ui_proc_show(struct seq_file *m, void *v)
3652{
3653 unsigned int *p = m->private;
3654
3655 seq_printf(m, "%u\n", *p);
3656 return 0;
3657}
3658
3659static int ext4_ui_proc_open(struct inode *inode, struct file *file)
3660{
3661 return single_open(file, ext4_ui_proc_show, PDE(inode)->data);
3662}
3663
3664static ssize_t ext4_ui_proc_write(struct file *file, const char __user *buf,
3665 size_t cnt, loff_t *ppos)
3666{
3667 unsigned long *p = PDE(file->f_path.dentry->d_inode)->data;
3668 char str[32];
3669
3670 if (cnt >= sizeof(str))
3671 return -EINVAL;
3672 if (copy_from_user(str, buf, cnt))
3673 return -EFAULT;
3674
3675 *p = simple_strtoul(str, NULL, 0);
3676 return cnt;
3677}
3678
3679const struct file_operations ext4_ui_proc_fops = {
3680 .owner = THIS_MODULE,
3681 .open = ext4_ui_proc_open,
3682 .read = seq_read,
3683 .llseek = seq_lseek,
3684 .release = single_release,
3685 .write = ext4_ui_proc_write,
3686};
3687#endif
3688
3689static struct file_system_type ext4_fs_type = { 3878static struct file_system_type ext4_fs_type = {
3690 .owner = THIS_MODULE, 3879 .owner = THIS_MODULE,
3691 .name = "ext4", 3880 .name = "ext4",
@@ -3719,6 +3908,9 @@ static int __init init_ext4_fs(void)
3719{ 3908{
3720 int err; 3909 int err;
3721 3910
3911 ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
3912 if (!ext4_kset)
3913 return -ENOMEM;
3722 ext4_proc_root = proc_mkdir("fs/ext4", NULL); 3914 ext4_proc_root = proc_mkdir("fs/ext4", NULL);
3723 err = init_ext4_mballoc(); 3915 err = init_ext4_mballoc();
3724 if (err) 3916 if (err)
@@ -3760,6 +3952,7 @@ static void __exit exit_ext4_fs(void)
3760 exit_ext4_xattr(); 3952 exit_ext4_xattr();
3761 exit_ext4_mballoc(); 3953 exit_ext4_mballoc();
3762 remove_proc_entry("fs/ext4", NULL); 3954 remove_proc_entry("fs/ext4", NULL);
3955 kset_unregister(ext4_kset);
3763} 3956}
3764 3957
3765MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); 3958MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
diff --git a/fs/fcntl.c b/fs/fcntl.c
index d865ca66ccba..cc8e4de2fee5 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -531,6 +531,12 @@ int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fap
531 if (!new) 531 if (!new)
532 return -ENOMEM; 532 return -ENOMEM;
533 } 533 }
534
535 /*
536 * We need to take f_lock first since it's not an IRQ-safe
537 * lock.
538 */
539 spin_lock(&filp->f_lock);
534 write_lock_irq(&fasync_lock); 540 write_lock_irq(&fasync_lock);
535 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { 541 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
536 if (fa->fa_file == filp) { 542 if (fa->fa_file == filp) {
@@ -555,14 +561,12 @@ int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fap
555 result = 1; 561 result = 1;
556 } 562 }
557out: 563out:
558 /* Fix up FASYNC bit while still holding fasync_lock */
559 spin_lock(&filp->f_lock);
560 if (on) 564 if (on)
561 filp->f_flags |= FASYNC; 565 filp->f_flags |= FASYNC;
562 else 566 else
563 filp->f_flags &= ~FASYNC; 567 filp->f_flags &= ~FASYNC;
564 spin_unlock(&filp->f_lock);
565 write_unlock_irq(&fasync_lock); 568 write_unlock_irq(&fasync_lock);
569 spin_unlock(&filp->f_lock);
566 return result; 570 return result;
567} 571}
568 572
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index d9fdb7cec538..4e340fedf768 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1234,8 +1234,9 @@ static void fuse_vma_close(struct vm_area_struct *vma)
1234 * - sync(2) 1234 * - sync(2)
1235 * - try_to_free_pages() with order > PAGE_ALLOC_COSTLY_ORDER 1235 * - try_to_free_pages() with order > PAGE_ALLOC_COSTLY_ORDER
1236 */ 1236 */
1237static int fuse_page_mkwrite(struct vm_area_struct *vma, struct page *page) 1237static int fuse_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
1238{ 1238{
1239 struct page *page = vmf->page;
1239 /* 1240 /*
1240 * Don't use page->mapping as it may become NULL from a 1241 * Don't use page->mapping as it may become NULL from a
1241 * concurrent truncate. 1242 * concurrent truncate.
@@ -1465,7 +1466,7 @@ static loff_t fuse_file_llseek(struct file *file, loff_t offset, int origin)
1465 case SEEK_END: 1466 case SEEK_END:
1466 retval = fuse_update_attributes(inode, NULL, file, NULL); 1467 retval = fuse_update_attributes(inode, NULL, file, NULL);
1467 if (retval) 1468 if (retval)
1468 return retval; 1469 goto exit;
1469 offset += i_size_read(inode); 1470 offset += i_size_read(inode);
1470 break; 1471 break;
1471 case SEEK_CUR: 1472 case SEEK_CUR:
@@ -1479,6 +1480,7 @@ static loff_t fuse_file_llseek(struct file *file, loff_t offset, int origin)
1479 } 1480 }
1480 retval = offset; 1481 retval = offset;
1481 } 1482 }
1483exit:
1482 mutex_unlock(&inode->i_mutex); 1484 mutex_unlock(&inode->i_mutex);
1483 return retval; 1485 return retval;
1484} 1486}
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index 3b9e8de3500b..70b9b8548945 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -337,8 +337,9 @@ static int gfs2_allocate_page_backing(struct page *page)
337 * blocks allocated on disk to back that page. 337 * blocks allocated on disk to back that page.
338 */ 338 */
339 339
340static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page) 340static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
341{ 341{
342 struct page *page = vmf->page;
342 struct inode *inode = vma->vm_file->f_path.dentry->d_inode; 343 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
343 struct gfs2_inode *ip = GFS2_I(inode); 344 struct gfs2_inode *ip = GFS2_I(inode);
344 struct gfs2_sbd *sdp = GFS2_SB(inode); 345 struct gfs2_sbd *sdp = GFS2_SB(inode);
@@ -412,6 +413,8 @@ out_unlock:
412 gfs2_glock_dq(&gh); 413 gfs2_glock_dq(&gh);
413out: 414out:
414 gfs2_holder_uninit(&gh); 415 gfs2_holder_uninit(&gh);
416 if (ret)
417 ret = VM_FAULT_SIGBUS;
415 return ret; 418 return ret;
416} 419}
417 420
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 9b800d97a687..23a3c76711e0 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -943,14 +943,13 @@ static struct vfsmount *hugetlbfs_vfsmount;
943 943
944static int can_do_hugetlb_shm(void) 944static int can_do_hugetlb_shm(void)
945{ 945{
946 return likely(capable(CAP_IPC_LOCK) || 946 return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group);
947 in_group_p(sysctl_hugetlb_shm_group) ||
948 can_do_mlock());
949} 947}
950 948
951struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag) 949struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag)
952{ 950{
953 int error = -ENOMEM; 951 int error = -ENOMEM;
952 int unlock_shm = 0;
954 struct file *file; 953 struct file *file;
955 struct inode *inode; 954 struct inode *inode;
956 struct dentry *dentry, *root; 955 struct dentry *dentry, *root;
@@ -960,11 +959,14 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag)
960 if (!hugetlbfs_vfsmount) 959 if (!hugetlbfs_vfsmount)
961 return ERR_PTR(-ENOENT); 960 return ERR_PTR(-ENOENT);
962 961
963 if (!can_do_hugetlb_shm()) 962 if (!can_do_hugetlb_shm()) {
964 return ERR_PTR(-EPERM); 963 if (user_shm_lock(size, user)) {
965 964 unlock_shm = 1;
966 if (!user_shm_lock(size, user)) 965 WARN_ONCE(1,
967 return ERR_PTR(-ENOMEM); 966 "Using mlock ulimits for SHM_HUGETLB deprecated\n");
967 } else
968 return ERR_PTR(-EPERM);
969 }
968 970
969 root = hugetlbfs_vfsmount->mnt_root; 971 root = hugetlbfs_vfsmount->mnt_root;
970 quick_string.name = name; 972 quick_string.name = name;
@@ -1004,7 +1006,8 @@ out_inode:
1004out_dentry: 1006out_dentry:
1005 dput(dentry); 1007 dput(dentry);
1006out_shm_unlock: 1008out_shm_unlock:
1007 user_shm_unlock(size, user); 1009 if (unlock_shm)
1010 user_shm_unlock(size, user);
1008 return ERR_PTR(error); 1011 return ERR_PTR(error);
1009} 1012}
1010 1013
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 62804e57a44c..4ea72377c7a2 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -367,6 +367,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
367 int tag_bytes = journal_tag_bytes(journal); 367 int tag_bytes = journal_tag_bytes(journal);
368 struct buffer_head *cbh = NULL; /* For transactional checksums */ 368 struct buffer_head *cbh = NULL; /* For transactional checksums */
369 __u32 crc32_sum = ~0; 369 __u32 crc32_sum = ~0;
370 int write_op = WRITE;
370 371
371 /* 372 /*
372 * First job: lock down the current transaction and wait for 373 * First job: lock down the current transaction and wait for
@@ -401,6 +402,8 @@ void jbd2_journal_commit_transaction(journal_t *journal)
401 spin_lock(&journal->j_state_lock); 402 spin_lock(&journal->j_state_lock);
402 commit_transaction->t_state = T_LOCKED; 403 commit_transaction->t_state = T_LOCKED;
403 404
405 if (commit_transaction->t_synchronous_commit)
406 write_op = WRITE_SYNC;
404 stats.u.run.rs_wait = commit_transaction->t_max_wait; 407 stats.u.run.rs_wait = commit_transaction->t_max_wait;
405 stats.u.run.rs_locked = jiffies; 408 stats.u.run.rs_locked = jiffies;
406 stats.u.run.rs_running = jbd2_time_diff(commit_transaction->t_start, 409 stats.u.run.rs_running = jbd2_time_diff(commit_transaction->t_start,
@@ -680,7 +683,7 @@ start_journal_io:
680 clear_buffer_dirty(bh); 683 clear_buffer_dirty(bh);
681 set_buffer_uptodate(bh); 684 set_buffer_uptodate(bh);
682 bh->b_end_io = journal_end_buffer_io_sync; 685 bh->b_end_io = journal_end_buffer_io_sync;
683 submit_bh(WRITE, bh); 686 submit_bh(write_op, bh);
684 } 687 }
685 cond_resched(); 688 cond_resched();
686 stats.u.run.rs_blocks_logged += bufs; 689 stats.u.run.rs_blocks_logged += bufs;
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 257ff2625765..bbe6d592d8b3 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -55,6 +55,25 @@
55 * need do nothing. 55 * need do nothing.
56 * RevokeValid set, Revoked set: 56 * RevokeValid set, Revoked set:
57 * buffer has been revoked. 57 * buffer has been revoked.
58 *
59 * Locking rules:
60 * We keep two hash tables of revoke records. One hashtable belongs to the
61 * running transaction (is pointed to by journal->j_revoke), the other one
62 * belongs to the committing transaction. Accesses to the second hash table
63 * happen only from the kjournald and no other thread touches this table. Also
64 * journal_switch_revoke_table() which switches which hashtable belongs to the
65 * running and which to the committing transaction is called only from
66 * kjournald. Therefore we need no locks when accessing the hashtable belonging
67 * to the committing transaction.
68 *
69 * All users operating on the hash table belonging to the running transaction
70 * have a handle to the transaction. Therefore they are safe from kjournald
71 * switching hash tables under them. For operations on the lists of entries in
72 * the hash table j_revoke_lock is used.
73 *
74 * Finally, also replay code uses the hash tables but at this moment noone else
75 * can touch them (filesystem isn't mounted yet) and hence no locking is
76 * needed.
58 */ 77 */
59 78
60#ifndef __KERNEL__ 79#ifndef __KERNEL__
@@ -401,8 +420,6 @@ int jbd2_journal_revoke(handle_t *handle, unsigned long long blocknr,
401 * the second time we would still have a pending revoke to cancel. So, 420 * the second time we would still have a pending revoke to cancel. So,
402 * do not trust the Revoked bit on buffers unless RevokeValid is also 421 * do not trust the Revoked bit on buffers unless RevokeValid is also
403 * set. 422 * set.
404 *
405 * The caller must have the journal locked.
406 */ 423 */
407int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh) 424int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
408{ 425{
@@ -480,10 +497,7 @@ void jbd2_journal_switch_revoke_table(journal_t *journal)
480/* 497/*
481 * Write revoke records to the journal for all entries in the current 498 * Write revoke records to the journal for all entries in the current
482 * revoke hash, deleting the entries as we go. 499 * revoke hash, deleting the entries as we go.
483 *
484 * Called with the journal lock held.
485 */ 500 */
486
487void jbd2_journal_write_revoke_records(journal_t *journal, 501void jbd2_journal_write_revoke_records(journal_t *journal,
488 transaction_t *transaction) 502 transaction_t *transaction)
489{ 503{
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 28ce21d8598e..996ffda06bf3 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1315,6 +1315,8 @@ int jbd2_journal_stop(handle_t *handle)
1315 } 1315 }
1316 } 1316 }
1317 1317
1318 if (handle->h_sync)
1319 transaction->t_synchronous_commit = 1;
1318 current->journal_info = NULL; 1320 current->journal_info = NULL;
1319 spin_lock(&journal->j_state_lock); 1321 spin_lock(&journal->j_state_lock);
1320 spin_lock(&transaction->t_handle_lock); 1322 spin_lock(&transaction->t_handle_lock);
diff --git a/fs/jfs/Kconfig b/fs/jfs/Kconfig
index 9ff619a6f9cc..57cef19951db 100644
--- a/fs/jfs/Kconfig
+++ b/fs/jfs/Kconfig
@@ -1,6 +1,7 @@
1config JFS_FS 1config JFS_FS
2 tristate "JFS filesystem support" 2 tristate "JFS filesystem support"
3 select NLS 3 select NLS
4 select CRC32
4 help 5 help
5 This is a port of IBM's Journaled Filesystem . More information is 6 This is a port of IBM's Journaled Filesystem . More information is
6 available in the file <file:Documentation/filesystems/jfs.txt>. 7 available in the file <file:Documentation/filesystems/jfs.txt>.
diff --git a/fs/jfs/jfs_debug.c b/fs/jfs/jfs_debug.c
index 6a73de84bcef..dd824d9b0b1a 100644
--- a/fs/jfs/jfs_debug.c
+++ b/fs/jfs/jfs_debug.c
@@ -90,7 +90,6 @@ void jfs_proc_init(void)
90 90
91 if (!(base = proc_mkdir("fs/jfs", NULL))) 91 if (!(base = proc_mkdir("fs/jfs", NULL)))
92 return; 92 return;
93 base->owner = THIS_MODULE;
94 93
95 for (i = 0; i < NPROCENT; i++) 94 for (i = 0; i < NPROCENT; i++)
96 proc_create(Entries[i].name, 0, base, Entries[i].proc_fops); 95 proc_create(Entries[i].name, 0, base, Entries[i].proc_fops);
diff --git a/fs/jfs/jfs_extent.c b/fs/jfs/jfs_extent.c
index 169802ea07f9..bbbd5f202e37 100644
--- a/fs/jfs/jfs_extent.c
+++ b/fs/jfs/jfs_extent.c
@@ -362,11 +362,12 @@ exit:
362int extHint(struct inode *ip, s64 offset, xad_t * xp) 362int extHint(struct inode *ip, s64 offset, xad_t * xp)
363{ 363{
364 struct super_block *sb = ip->i_sb; 364 struct super_block *sb = ip->i_sb;
365 struct xadlist xadl; 365 int nbperpage = JFS_SBI(sb)->nbperpage;
366 struct lxdlist lxdl;
367 lxd_t lxd;
368 s64 prev; 366 s64 prev;
369 int rc, nbperpage = JFS_SBI(sb)->nbperpage; 367 int rc = 0;
368 s64 xaddr;
369 int xlen;
370 int xflag;
370 371
371 /* init the hint as "no hint provided" */ 372 /* init the hint as "no hint provided" */
372 XADaddress(xp, 0); 373 XADaddress(xp, 0);
@@ -376,46 +377,30 @@ int extHint(struct inode *ip, s64 offset, xad_t * xp)
376 */ 377 */
377 prev = ((offset & ~POFFSET) >> JFS_SBI(sb)->l2bsize) - nbperpage; 378 prev = ((offset & ~POFFSET) >> JFS_SBI(sb)->l2bsize) - nbperpage;
378 379
379 /* if the offsets in the first page of the file, 380 /* if the offset is in the first page of the file, no hint provided.
380 * no hint provided.
381 */ 381 */
382 if (prev < 0) 382 if (prev < 0)
383 return (0); 383 goto out;
384
385 /* prepare to lookup the previous page's extent info */
386 lxdl.maxnlxd = 1;
387 lxdl.nlxd = 1;
388 lxdl.lxd = &lxd;
389 LXDoffset(&lxd, prev)
390 LXDlength(&lxd, nbperpage);
391
392 xadl.maxnxad = 1;
393 xadl.nxad = 0;
394 xadl.xad = xp;
395
396 /* perform the lookup */
397 if ((rc = xtLookupList(ip, &lxdl, &xadl, 0)))
398 return (rc);
399
400 /* check if no extent exists for the previous page.
401 * this is possible for sparse files.
402 */
403 if (xadl.nxad == 0) {
404// assert(ISSPARSE(ip));
405 return (0);
406 }
407 384
408 /* only preserve the abnr flag within the xad flags 385 rc = xtLookup(ip, prev, nbperpage, &xflag, &xaddr, &xlen, 0);
409 * of the returned hint.
410 */
411 xp->flag &= XAD_NOTRECORDED;
412 386
413 if(xadl.nxad != 1 || lengthXAD(xp) != nbperpage) { 387 if ((rc == 0) && xlen) {
414 jfs_error(ip->i_sb, "extHint: corrupt xtree"); 388 if (xlen != nbperpage) {
415 return -EIO; 389 jfs_error(ip->i_sb, "extHint: corrupt xtree");
416 } 390 rc = -EIO;
391 }
392 XADaddress(xp, xaddr);
393 XADlength(xp, xlen);
394 /*
395 * only preserve the abnr flag within the xad flags
396 * of the returned hint.
397 */
398 xp->flag = xflag & XAD_NOTRECORDED;
399 } else
400 rc = 0;
417 401
418 return (0); 402out:
403 return (rc);
419} 404}
420 405
421 406
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index 0f94381ca6d0..346057218edc 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -57,12 +57,6 @@
57#include "jfs_debug.h" 57#include "jfs_debug.h"
58 58
59/* 59/*
60 * __mark_inode_dirty expects inodes to be hashed. Since we don't want
61 * special inodes in the fileset inode space, we make them appear hashed,
62 * but do not put on any lists.
63 */
64
65/*
66 * imap locks 60 * imap locks
67 */ 61 */
68/* iag free list lock */ 62/* iag free list lock */
@@ -497,7 +491,9 @@ struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary)
497 release_metapage(mp); 491 release_metapage(mp);
498 492
499 /* 493 /*
500 * that will look hashed, but won't be on any list; hlist_del() 494 * __mark_inode_dirty expects inodes to be hashed. Since we don't
495 * want special inodes in the fileset inode space, we make them
496 * appear hashed, but do not put on any lists. hlist_del()
501 * will work fine and require no locking. 497 * will work fine and require no locking.
502 */ 498 */
503 ip->i_hash.pprev = &ip->i_hash.next; 499 ip->i_hash.pprev = &ip->i_hash.next;
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index c350057087dd..07b6c5dfb4b6 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -369,6 +369,7 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc)
369 unsigned long bio_bytes = 0; 369 unsigned long bio_bytes = 0;
370 unsigned long bio_offset = 0; 370 unsigned long bio_offset = 0;
371 int offset; 371 int offset;
372 int bad_blocks = 0;
372 373
373 page_start = (sector_t)page->index << 374 page_start = (sector_t)page->index <<
374 (PAGE_CACHE_SHIFT - inode->i_blkbits); 375 (PAGE_CACHE_SHIFT - inode->i_blkbits);
@@ -394,6 +395,7 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc)
394 } 395 }
395 396
396 clear_bit(META_dirty, &mp->flag); 397 clear_bit(META_dirty, &mp->flag);
398 set_bit(META_io, &mp->flag);
397 block_offset = offset >> inode->i_blkbits; 399 block_offset = offset >> inode->i_blkbits;
398 lblock = page_start + block_offset; 400 lblock = page_start + block_offset;
399 if (bio) { 401 if (bio) {
@@ -402,7 +404,6 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc)
402 len = min(xlen, blocks_per_mp); 404 len = min(xlen, blocks_per_mp);
403 xlen -= len; 405 xlen -= len;
404 bio_bytes += len << inode->i_blkbits; 406 bio_bytes += len << inode->i_blkbits;
405 set_bit(META_io, &mp->flag);
406 continue; 407 continue;
407 } 408 }
408 /* Not contiguous */ 409 /* Not contiguous */
@@ -424,12 +425,14 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc)
424 xlen = (PAGE_CACHE_SIZE - offset) >> inode->i_blkbits; 425 xlen = (PAGE_CACHE_SIZE - offset) >> inode->i_blkbits;
425 pblock = metapage_get_blocks(inode, lblock, &xlen); 426 pblock = metapage_get_blocks(inode, lblock, &xlen);
426 if (!pblock) { 427 if (!pblock) {
427 /* Need better error handling */
428 printk(KERN_ERR "JFS: metapage_get_blocks failed\n"); 428 printk(KERN_ERR "JFS: metapage_get_blocks failed\n");
429 dec_io(page, last_write_complete); 429 /*
430 * We already called inc_io(), but can't cancel it
431 * with dec_io() until we're done with the page
432 */
433 bad_blocks++;
430 continue; 434 continue;
431 } 435 }
432 set_bit(META_io, &mp->flag);
433 len = min(xlen, (int)JFS_SBI(inode->i_sb)->nbperpage); 436 len = min(xlen, (int)JFS_SBI(inode->i_sb)->nbperpage);
434 437
435 bio = bio_alloc(GFP_NOFS, 1); 438 bio = bio_alloc(GFP_NOFS, 1);
@@ -459,6 +462,9 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc)
459 462
460 unlock_page(page); 463 unlock_page(page);
461 464
465 if (bad_blocks)
466 goto err_out;
467
462 if (nr_underway == 0) 468 if (nr_underway == 0)
463 end_page_writeback(page); 469 end_page_writeback(page);
464 470
@@ -474,7 +480,9 @@ skip:
474 bio_put(bio); 480 bio_put(bio);
475 unlock_page(page); 481 unlock_page(page);
476 dec_io(page, last_write_complete); 482 dec_io(page, last_write_complete);
477 483err_out:
484 while (bad_blocks--)
485 dec_io(page, last_write_complete);
478 return -EIO; 486 return -EIO;
479} 487}
480 488
diff --git a/fs/jfs/jfs_types.h b/fs/jfs/jfs_types.h
index 649f9817accd..43ea3713c083 100644
--- a/fs/jfs/jfs_types.h
+++ b/fs/jfs/jfs_types.h
@@ -58,35 +58,6 @@ struct timestruc_t {
58#define ONES 0xffffffffu /* all bit on */ 58#define ONES 0xffffffffu /* all bit on */
59 59
60/* 60/*
61 * logical xd (lxd)
62 */
63typedef struct {
64 unsigned len:24;
65 unsigned off1:8;
66 u32 off2;
67} lxd_t;
68
69/* lxd_t field construction */
70#define LXDlength(lxd, length32) ( (lxd)->len = length32 )
71#define LXDoffset(lxd, offset64)\
72{\
73 (lxd)->off1 = ((s64)offset64) >> 32;\
74 (lxd)->off2 = (offset64) & 0xffffffff;\
75}
76
77/* lxd_t field extraction */
78#define lengthLXD(lxd) ( (lxd)->len )
79#define offsetLXD(lxd)\
80 ( ((s64)((lxd)->off1)) << 32 | (lxd)->off2 )
81
82/* lxd list */
83struct lxdlist {
84 s16 maxnlxd;
85 s16 nlxd;
86 lxd_t *lxd;
87};
88
89/*
90 * physical xd (pxd) 61 * physical xd (pxd)
91 */ 62 */
92typedef struct { 63typedef struct {
diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c
index a27e26c90568..d654a6458648 100644
--- a/fs/jfs/jfs_xtree.c
+++ b/fs/jfs/jfs_xtree.c
@@ -164,11 +164,8 @@ int xtLookup(struct inode *ip, s64 lstart,
164 /* is lookup offset beyond eof ? */ 164 /* is lookup offset beyond eof ? */
165 size = ((u64) ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >> 165 size = ((u64) ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
166 JFS_SBI(ip->i_sb)->l2bsize; 166 JFS_SBI(ip->i_sb)->l2bsize;
167 if (lstart >= size) { 167 if (lstart >= size)
168 jfs_err("xtLookup: lstart (0x%lx) >= size (0x%lx)",
169 (ulong) lstart, (ulong) size);
170 return 0; 168 return 0;
171 }
172 } 169 }
173 170
174 /* 171 /*
@@ -220,264 +217,6 @@ int xtLookup(struct inode *ip, s64 lstart,
220 return rc; 217 return rc;
221} 218}
222 219
223
224/*
225 * xtLookupList()
226 *
227 * function: map a single logical extent into a list of physical extent;
228 *
229 * parameter:
230 * struct inode *ip,
231 * struct lxdlist *lxdlist, lxd list (in)
232 * struct xadlist *xadlist, xad list (in/out)
233 * int flag)
234 *
235 * coverage of lxd by xad under assumption of
236 * . lxd's are ordered and disjoint.
237 * . xad's are ordered and disjoint.
238 *
239 * return:
240 * 0: success
241 *
242 * note: a page being written (even a single byte) is backed fully,
243 * except the last page which is only backed with blocks
244 * required to cover the last byte;
245 * the extent backing a page is fully contained within an xad;
246 */
247int xtLookupList(struct inode *ip, struct lxdlist * lxdlist,
248 struct xadlist * xadlist, int flag)
249{
250 int rc = 0;
251 struct btstack btstack;
252 int cmp;
253 s64 bn;
254 struct metapage *mp;
255 xtpage_t *p;
256 int index;
257 lxd_t *lxd;
258 xad_t *xad, *pxd;
259 s64 size, lstart, lend, xstart, xend, pstart;
260 s64 llen, xlen, plen;
261 s64 xaddr, paddr;
262 int nlxd, npxd, maxnpxd;
263
264 npxd = xadlist->nxad = 0;
265 maxnpxd = xadlist->maxnxad;
266 pxd = xadlist->xad;
267
268 nlxd = lxdlist->nlxd;
269 lxd = lxdlist->lxd;
270
271 lstart = offsetLXD(lxd);
272 llen = lengthLXD(lxd);
273 lend = lstart + llen;
274
275 size = (ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
276 JFS_SBI(ip->i_sb)->l2bsize;
277
278 /*
279 * search for the xad entry covering the logical extent
280 */
281 search:
282 if (lstart >= size)
283 return 0;
284
285 if ((rc = xtSearch(ip, lstart, NULL, &cmp, &btstack, 0)))
286 return rc;
287
288 /*
289 * compute the physical extent covering logical extent
290 *
291 * N.B. search may have failed (e.g., hole in sparse file),
292 * and returned the index of the next entry.
293 */
294//map:
295 /* retrieve search result */
296 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
297
298 /* is xad on the next sibling page ? */
299 if (index == le16_to_cpu(p->header.nextindex)) {
300 if (p->header.flag & BT_ROOT)
301 goto mapend;
302
303 if ((bn = le64_to_cpu(p->header.next)) == 0)
304 goto mapend;
305
306 XT_PUTPAGE(mp);
307
308 /* get next sibling page */
309 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
310 if (rc)
311 return rc;
312
313 index = XTENTRYSTART;
314 }
315
316 xad = &p->xad[index];
317
318 /*
319 * is lxd covered by xad ?
320 */
321 compare:
322 xstart = offsetXAD(xad);
323 xlen = lengthXAD(xad);
324 xend = xstart + xlen;
325 xaddr = addressXAD(xad);
326
327 compare1:
328 if (xstart < lstart)
329 goto compare2;
330
331 /* (lstart <= xstart) */
332
333 /* lxd is NOT covered by xad */
334 if (lend <= xstart) {
335 /*
336 * get next lxd
337 */
338 if (--nlxd == 0)
339 goto mapend;
340 lxd++;
341
342 lstart = offsetLXD(lxd);
343 llen = lengthLXD(lxd);
344 lend = lstart + llen;
345 if (lstart >= size)
346 goto mapend;
347
348 /* compare with the current xad */
349 goto compare1;
350 }
351 /* lxd is covered by xad */
352 else { /* (xstart < lend) */
353
354 /* initialize new pxd */
355 pstart = xstart;
356 plen = min(lend - xstart, xlen);
357 paddr = xaddr;
358
359 goto cover;
360 }
361
362 /* (xstart < lstart) */
363 compare2:
364 /* lxd is covered by xad */
365 if (lstart < xend) {
366 /* initialize new pxd */
367 pstart = lstart;
368 plen = min(xend - lstart, llen);
369 paddr = xaddr + (lstart - xstart);
370
371 goto cover;
372 }
373 /* lxd is NOT covered by xad */
374 else { /* (xend <= lstart) */
375
376 /*
377 * get next xad
378 *
379 * linear search next xad covering lxd on
380 * the current xad page, and then tree search
381 */
382 if (index == le16_to_cpu(p->header.nextindex) - 1) {
383 if (p->header.flag & BT_ROOT)
384 goto mapend;
385
386 XT_PUTPAGE(mp);
387 goto search;
388 } else {
389 index++;
390 xad++;
391
392 /* compare with new xad */
393 goto compare;
394 }
395 }
396
397 /*
398 * lxd is covered by xad and a new pxd has been initialized
399 * (lstart <= xstart < lend) or (xstart < lstart < xend)
400 */
401 cover:
402 /* finalize pxd corresponding to current xad */
403 XT_PUTENTRY(pxd, xad->flag, pstart, plen, paddr);
404
405 if (++npxd >= maxnpxd)
406 goto mapend;
407 pxd++;
408
409 /*
410 * lxd is fully covered by xad
411 */
412 if (lend <= xend) {
413 /*
414 * get next lxd
415 */
416 if (--nlxd == 0)
417 goto mapend;
418 lxd++;
419
420 lstart = offsetLXD(lxd);
421 llen = lengthLXD(lxd);
422 lend = lstart + llen;
423 if (lstart >= size)
424 goto mapend;
425
426 /*
427 * test for old xad covering new lxd
428 * (old xstart < new lstart)
429 */
430 goto compare2;
431 }
432 /*
433 * lxd is partially covered by xad
434 */
435 else { /* (xend < lend) */
436
437 /*
438 * get next xad
439 *
440 * linear search next xad covering lxd on
441 * the current xad page, and then next xad page search
442 */
443 if (index == le16_to_cpu(p->header.nextindex) - 1) {
444 if (p->header.flag & BT_ROOT)
445 goto mapend;
446
447 if ((bn = le64_to_cpu(p->header.next)) == 0)
448 goto mapend;
449
450 XT_PUTPAGE(mp);
451
452 /* get next sibling page */
453 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
454 if (rc)
455 return rc;
456
457 index = XTENTRYSTART;
458 xad = &p->xad[index];
459 } else {
460 index++;
461 xad++;
462 }
463
464 /*
465 * test for new xad covering old lxd
466 * (old lstart < new xstart)
467 */
468 goto compare;
469 }
470
471 mapend:
472 xadlist->nxad = npxd;
473
474//out:
475 XT_PUTPAGE(mp);
476
477 return rc;
478}
479
480
481/* 220/*
482 * xtSearch() 221 * xtSearch()
483 * 222 *
diff --git a/fs/jfs/jfs_xtree.h b/fs/jfs/jfs_xtree.h
index 70815c8a3d6a..08c0c749b986 100644
--- a/fs/jfs/jfs_xtree.h
+++ b/fs/jfs/jfs_xtree.h
@@ -110,8 +110,6 @@ typedef union {
110 */ 110 */
111extern int xtLookup(struct inode *ip, s64 lstart, s64 llen, 111extern int xtLookup(struct inode *ip, s64 lstart, s64 llen,
112 int *pflag, s64 * paddr, int *plen, int flag); 112 int *pflag, s64 * paddr, int *plen, int flag);
113extern int xtLookupList(struct inode *ip, struct lxdlist * lxdlist,
114 struct xadlist * xadlist, int flag);
115extern void xtInitRoot(tid_t tid, struct inode *ip); 113extern void xtInitRoot(tid_t tid, struct inode *ip);
116extern int xtInsert(tid_t tid, struct inode *ip, 114extern int xtInsert(tid_t tid, struct inode *ip,
117 int xflag, s64 xoff, int xlen, s64 * xaddrp, int flag); 115 int xflag, s64 xoff, int xlen, s64 * xaddrp, int flag);
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index b37d1f78b854..6f21adf9479a 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -29,6 +29,7 @@
29#include <linux/posix_acl.h> 29#include <linux/posix_acl.h>
30#include <linux/buffer_head.h> 30#include <linux/buffer_head.h>
31#include <linux/exportfs.h> 31#include <linux/exportfs.h>
32#include <linux/crc32.h>
32#include <asm/uaccess.h> 33#include <asm/uaccess.h>
33#include <linux/seq_file.h> 34#include <linux/seq_file.h>
34 35
@@ -168,6 +169,9 @@ static int jfs_statfs(struct dentry *dentry, struct kstatfs *buf)
168 buf->f_files = maxinodes; 169 buf->f_files = maxinodes;
169 buf->f_ffree = maxinodes - (atomic_read(&imap->im_numinos) - 170 buf->f_ffree = maxinodes - (atomic_read(&imap->im_numinos) -
170 atomic_read(&imap->im_numfree)); 171 atomic_read(&imap->im_numfree));
172 buf->f_fsid.val[0] = (u32)crc32_le(0, sbi->uuid, sizeof(sbi->uuid)/2);
173 buf->f_fsid.val[1] = (u32)crc32_le(0, sbi->uuid + sizeof(sbi->uuid)/2,
174 sizeof(sbi->uuid)/2);
171 175
172 buf->f_namelen = JFS_NAME_MAX; 176 buf->f_namelen = JFS_NAME_MAX;
173 return 0; 177 return 0;
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index aedc47a264c1..1f3b0fc0d351 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -139,55 +139,6 @@ int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout)
139 return 0; 139 return 0;
140} 140}
141 141
142#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
143static const struct in6_addr *nlmclnt_map_v4addr(const struct sockaddr *sap,
144 struct in6_addr *addr_mapped)
145{
146 const struct sockaddr_in *sin = (const struct sockaddr_in *)sap;
147
148 switch (sap->sa_family) {
149 case AF_INET6:
150 return &((const struct sockaddr_in6 *)sap)->sin6_addr;
151 case AF_INET:
152 ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, addr_mapped);
153 return addr_mapped;
154 }
155
156 return NULL;
157}
158
159/*
160 * If lockd is using a PF_INET6 listener, all incoming requests appear
161 * to come from AF_INET6 remotes. The address of AF_INET remotes are
162 * mapped to AF_INET6 automatically by the network layer. In case the
163 * user passed an AF_INET server address at mount time, ensure both
164 * addresses are AF_INET6 before comparing them.
165 */
166static int nlmclnt_cmp_addr(const struct nlm_host *host,
167 const struct sockaddr *sap)
168{
169 const struct in6_addr *addr1;
170 const struct in6_addr *addr2;
171 struct in6_addr addr1_mapped;
172 struct in6_addr addr2_mapped;
173
174 addr1 = nlmclnt_map_v4addr(nlm_addr(host), &addr1_mapped);
175 if (likely(addr1 != NULL)) {
176 addr2 = nlmclnt_map_v4addr(sap, &addr2_mapped);
177 if (likely(addr2 != NULL))
178 return ipv6_addr_equal(addr1, addr2);
179 }
180
181 return 0;
182}
183#else /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */
184static int nlmclnt_cmp_addr(const struct nlm_host *host,
185 const struct sockaddr *sap)
186{
187 return nlm_cmp_addr(nlm_addr(host), sap);
188}
189#endif /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */
190
191/* 142/*
192 * The server lockd has called us back to tell us the lock was granted 143 * The server lockd has called us back to tell us the lock was granted
193 */ 144 */
@@ -215,7 +166,7 @@ __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock)
215 */ 166 */
216 if (fl_blocked->fl_u.nfs_fl.owner->pid != lock->svid) 167 if (fl_blocked->fl_u.nfs_fl.owner->pid != lock->svid)
217 continue; 168 continue;
218 if (!nlmclnt_cmp_addr(block->b_host, addr)) 169 if (!nlm_cmp_addr(nlm_addr(block->b_host), addr))
219 continue; 170 continue;
220 if (nfs_compare_fh(NFS_FH(fl_blocked->fl_file->f_path.dentry->d_inode) ,fh) != 0) 171 if (nfs_compare_fh(NFS_FH(fl_blocked->fl_file->f_path.dentry->d_inode) ,fh) != 0)
221 continue; 172 continue;
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 5e2c4d5ac827..6d5d4a4169e5 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -16,6 +16,8 @@
16#include <linux/sunrpc/svc.h> 16#include <linux/sunrpc/svc.h>
17#include <linux/lockd/lockd.h> 17#include <linux/lockd/lockd.h>
18 18
19#include <asm/unaligned.h>
20
19#define NLMDBG_FACILITY NLMDBG_MONITOR 21#define NLMDBG_FACILITY NLMDBG_MONITOR
20#define NSM_PROGRAM 100024 22#define NSM_PROGRAM 100024
21#define NSM_VERSION 1 23#define NSM_VERSION 1
@@ -274,10 +276,12 @@ static void nsm_init_private(struct nsm_handle *nsm)
274{ 276{
275 u64 *p = (u64 *)&nsm->sm_priv.data; 277 u64 *p = (u64 *)&nsm->sm_priv.data;
276 struct timespec ts; 278 struct timespec ts;
279 s64 ns;
277 280
278 ktime_get_ts(&ts); 281 ktime_get_ts(&ts);
279 *p++ = timespec_to_ns(&ts); 282 ns = timespec_to_ns(&ts);
280 *p = (unsigned long)nsm; 283 put_unaligned(ns, p);
284 put_unaligned((unsigned long)nsm, p + 1);
281} 285}
282 286
283static struct nsm_handle *nsm_create_handle(const struct sockaddr *sap, 287static struct nsm_handle *nsm_create_handle(const struct sockaddr *sap,
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 64f1c31b5853..abf83881f68a 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -53,17 +53,6 @@ static struct svc_rqst *nlmsvc_rqst;
53unsigned long nlmsvc_timeout; 53unsigned long nlmsvc_timeout;
54 54
55/* 55/*
56 * If the kernel has IPv6 support available, always listen for
57 * both AF_INET and AF_INET6 requests.
58 */
59#if (defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)) && \
60 defined(CONFIG_SUNRPC_REGISTER_V4)
61static const sa_family_t nlmsvc_family = AF_INET6;
62#else /* (CONFIG_IPV6 || CONFIG_IPV6_MODULE) && CONFIG_SUNRPC_REGISTER_V4 */
63static const sa_family_t nlmsvc_family = AF_INET;
64#endif /* (CONFIG_IPV6 || CONFIG_IPV6_MODULE) && CONFIG_SUNRPC_REGISTER_V4 */
65
66/*
67 * These can be set at insmod time (useful for NFS as root filesystem), 56 * These can be set at insmod time (useful for NFS as root filesystem),
68 * and also changed through the sysctl interface. -- Jamie Lokier, Aug 2003 57 * and also changed through the sysctl interface. -- Jamie Lokier, Aug 2003
69 */ 58 */
@@ -204,19 +193,30 @@ lockd(void *vrqstp)
204 return 0; 193 return 0;
205} 194}
206 195
207static int create_lockd_listener(struct svc_serv *serv, char *name, 196static int create_lockd_listener(struct svc_serv *serv, const char *name,
208 unsigned short port) 197 const int family, const unsigned short port)
209{ 198{
210 struct svc_xprt *xprt; 199 struct svc_xprt *xprt;
211 200
212 xprt = svc_find_xprt(serv, name, 0, 0); 201 xprt = svc_find_xprt(serv, name, family, 0);
213 if (xprt == NULL) 202 if (xprt == NULL)
214 return svc_create_xprt(serv, name, port, SVC_SOCK_DEFAULTS); 203 return svc_create_xprt(serv, name, family, port,
215 204 SVC_SOCK_DEFAULTS);
216 svc_xprt_put(xprt); 205 svc_xprt_put(xprt);
217 return 0; 206 return 0;
218} 207}
219 208
209static int create_lockd_family(struct svc_serv *serv, const int family)
210{
211 int err;
212
213 err = create_lockd_listener(serv, "udp", family, nlm_udpport);
214 if (err < 0)
215 return err;
216
217 return create_lockd_listener(serv, "tcp", family, nlm_tcpport);
218}
219
220/* 220/*
221 * Ensure there are active UDP and TCP listeners for lockd. 221 * Ensure there are active UDP and TCP listeners for lockd.
222 * 222 *
@@ -232,13 +232,15 @@ static int make_socks(struct svc_serv *serv)
232 static int warned; 232 static int warned;
233 int err; 233 int err;
234 234
235 err = create_lockd_listener(serv, "udp", nlm_udpport); 235 err = create_lockd_family(serv, PF_INET);
236 if (err < 0) 236 if (err < 0)
237 goto out_err; 237 goto out_err;
238 238
239 err = create_lockd_listener(serv, "tcp", nlm_tcpport); 239#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
240 if (err < 0) 240 err = create_lockd_family(serv, PF_INET6);
241 if (err < 0 && err != -EAFNOSUPPORT)
241 goto out_err; 242 goto out_err;
243#endif /* CONFIG_IPV6 || CONFIG_IPV6_MODULE */
242 244
243 warned = 0; 245 warned = 0;
244 return 0; 246 return 0;
@@ -274,7 +276,7 @@ int lockd_up(void)
274 "lockd_up: no pid, %d users??\n", nlmsvc_users); 276 "lockd_up: no pid, %d users??\n", nlmsvc_users);
275 277
276 error = -ENOMEM; 278 error = -ENOMEM;
277 serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, nlmsvc_family, NULL); 279 serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, NULL);
278 if (!serv) { 280 if (!serv) {
279 printk(KERN_WARNING "lockd_up: create service failed\n"); 281 printk(KERN_WARNING "lockd_up: create service failed\n");
280 goto out; 282 goto out;
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 3e634f2a1083..a886e692ddd0 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -38,19 +38,10 @@ static struct svc_program nfs4_callback_program;
38 38
39unsigned int nfs_callback_set_tcpport; 39unsigned int nfs_callback_set_tcpport;
40unsigned short nfs_callback_tcpport; 40unsigned short nfs_callback_tcpport;
41unsigned short nfs_callback_tcpport6;
41static const int nfs_set_port_min = 0; 42static const int nfs_set_port_min = 0;
42static const int nfs_set_port_max = 65535; 43static const int nfs_set_port_max = 65535;
43 44
44/*
45 * If the kernel has IPv6 support available, always listen for
46 * both AF_INET and AF_INET6 requests.
47 */
48#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
49static const sa_family_t nfs_callback_family = AF_INET6;
50#else
51static const sa_family_t nfs_callback_family = AF_INET;
52#endif
53
54static int param_set_port(const char *val, struct kernel_param *kp) 45static int param_set_port(const char *val, struct kernel_param *kp)
55{ 46{
56 char *endp; 47 char *endp;
@@ -116,19 +107,29 @@ int nfs_callback_up(void)
116 mutex_lock(&nfs_callback_mutex); 107 mutex_lock(&nfs_callback_mutex);
117 if (nfs_callback_info.users++ || nfs_callback_info.task != NULL) 108 if (nfs_callback_info.users++ || nfs_callback_info.task != NULL)
118 goto out; 109 goto out;
119 serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, 110 serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, NULL);
120 nfs_callback_family, NULL);
121 ret = -ENOMEM; 111 ret = -ENOMEM;
122 if (!serv) 112 if (!serv)
123 goto out_err; 113 goto out_err;
124 114
125 ret = svc_create_xprt(serv, "tcp", nfs_callback_set_tcpport, 115 ret = svc_create_xprt(serv, "tcp", PF_INET,
126 SVC_SOCK_ANONYMOUS); 116 nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);
127 if (ret <= 0) 117 if (ret <= 0)
128 goto out_err; 118 goto out_err;
129 nfs_callback_tcpport = ret; 119 nfs_callback_tcpport = ret;
130 dprintk("NFS: Callback listener port = %u (af %u)\n", 120 dprintk("NFS: Callback listener port = %u (af %u)\n",
131 nfs_callback_tcpport, nfs_callback_family); 121 nfs_callback_tcpport, PF_INET);
122
123#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
124 ret = svc_create_xprt(serv, "tcp", PF_INET6,
125 nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);
126 if (ret > 0) {
127 nfs_callback_tcpport6 = ret;
128 dprintk("NFS: Callback listener port = %u (af %u)\n",
129 nfs_callback_tcpport6, PF_INET6);
130 } else if (ret != -EAFNOSUPPORT)
131 goto out_err;
132#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
132 133
133 nfs_callback_info.rqst = svc_prepare_thread(serv, &serv->sv_pools[0]); 134 nfs_callback_info.rqst = svc_prepare_thread(serv, &serv->sv_pools[0]);
134 if (IS_ERR(nfs_callback_info.rqst)) { 135 if (IS_ERR(nfs_callback_info.rqst)) {
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index bb25d2135ff1..e110e286a262 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -72,5 +72,6 @@ extern void nfs_callback_down(void);
72 72
73extern unsigned int nfs_callback_set_tcpport; 73extern unsigned int nfs_callback_set_tcpport;
74extern unsigned short nfs_callback_tcpport; 74extern unsigned short nfs_callback_tcpport;
75extern unsigned short nfs_callback_tcpport6;
75 76
76#endif /* __LINUX_FS_NFS_CALLBACK_H */ 77#endif /* __LINUX_FS_NFS_CALLBACK_H */
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 574158ae2398..aba38017bdef 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -224,38 +224,6 @@ void nfs_put_client(struct nfs_client *clp)
224} 224}
225 225
226#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 226#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
227static const struct in6_addr *nfs_map_ipv4_addr(const struct sockaddr *sa, struct in6_addr *addr_mapped)
228{
229 switch (sa->sa_family) {
230 default:
231 return NULL;
232 case AF_INET6:
233 return &((const struct sockaddr_in6 *)sa)->sin6_addr;
234 break;
235 case AF_INET:
236 ipv6_addr_set_v4mapped(((const struct sockaddr_in *)sa)->sin_addr.s_addr,
237 addr_mapped);
238 return addr_mapped;
239 }
240}
241
242static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1,
243 const struct sockaddr *sa2)
244{
245 const struct in6_addr *addr1;
246 const struct in6_addr *addr2;
247 struct in6_addr addr1_mapped;
248 struct in6_addr addr2_mapped;
249
250 addr1 = nfs_map_ipv4_addr(sa1, &addr1_mapped);
251 if (likely(addr1 != NULL)) {
252 addr2 = nfs_map_ipv4_addr(sa2, &addr2_mapped);
253 if (likely(addr2 != NULL))
254 return ipv6_addr_equal(addr1, addr2);
255 }
256 return 0;
257}
258
259/* 227/*
260 * Test if two ip6 socket addresses refer to the same socket by 228 * Test if two ip6 socket addresses refer to the same socket by
261 * comparing relevant fields. The padding bytes specifically, are not 229 * comparing relevant fields. The padding bytes specifically, are not
@@ -267,38 +235,21 @@ static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1,
267 * 235 *
268 * The caller should ensure both socket addresses are AF_INET6. 236 * The caller should ensure both socket addresses are AF_INET6.
269 */ 237 */
270static int nfs_sockaddr_cmp_ip6(const struct sockaddr *sa1, 238static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1,
271 const struct sockaddr *sa2) 239 const struct sockaddr *sa2)
272{ 240{
273 const struct sockaddr_in6 *saddr1 = (const struct sockaddr_in6 *)sa1; 241 const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sa1;
274 const struct sockaddr_in6 *saddr2 = (const struct sockaddr_in6 *)sa2; 242 const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sa2;
275 243
276 if (!ipv6_addr_equal(&saddr1->sin6_addr, 244 if (ipv6_addr_scope(&sin1->sin6_addr) == IPV6_ADDR_SCOPE_LINKLOCAL &&
277 &saddr1->sin6_addr)) 245 sin1->sin6_scope_id != sin2->sin6_scope_id)
278 return 0; 246 return 0;
279 if (ipv6_addr_scope(&saddr1->sin6_addr) == IPV6_ADDR_SCOPE_LINKLOCAL &&
280 saddr1->sin6_scope_id != saddr2->sin6_scope_id)
281 return 0;
282 return saddr1->sin6_port == saddr2->sin6_port;
283}
284#else
285static int nfs_sockaddr_match_ipaddr4(const struct sockaddr_in *sa1,
286 const struct sockaddr_in *sa2)
287{
288 return sa1->sin_addr.s_addr == sa2->sin_addr.s_addr;
289}
290 247
291static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1, 248 return ipv6_addr_equal(&sin1->sin6_addr, &sin1->sin6_addr);
292 const struct sockaddr *sa2)
293{
294 if (unlikely(sa1->sa_family != AF_INET || sa2->sa_family != AF_INET))
295 return 0;
296 return nfs_sockaddr_match_ipaddr4((const struct sockaddr_in *)sa1,
297 (const struct sockaddr_in *)sa2);
298} 249}
299 250#else /* !defined(CONFIG_IPV6) && !defined(CONFIG_IPV6_MODULE) */
300static int nfs_sockaddr_cmp_ip6(const struct sockaddr * sa1, 251static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1,
301 const struct sockaddr * sa2) 252 const struct sockaddr *sa2)
302{ 253{
303 return 0; 254 return 0;
304} 255}
@@ -311,20 +262,57 @@ static int nfs_sockaddr_cmp_ip6(const struct sockaddr * sa1,
311 * 262 *
312 * The caller should ensure both socket addresses are AF_INET. 263 * The caller should ensure both socket addresses are AF_INET.
313 */ 264 */
265static int nfs_sockaddr_match_ipaddr4(const struct sockaddr *sa1,
266 const struct sockaddr *sa2)
267{
268 const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sa1;
269 const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sa2;
270
271 return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr;
272}
273
274static int nfs_sockaddr_cmp_ip6(const struct sockaddr *sa1,
275 const struct sockaddr *sa2)
276{
277 const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sa1;
278 const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sa2;
279
280 return nfs_sockaddr_match_ipaddr6(sa1, sa2) &&
281 (sin1->sin6_port == sin2->sin6_port);
282}
283
314static int nfs_sockaddr_cmp_ip4(const struct sockaddr *sa1, 284static int nfs_sockaddr_cmp_ip4(const struct sockaddr *sa1,
315 const struct sockaddr *sa2) 285 const struct sockaddr *sa2)
316{ 286{
317 const struct sockaddr_in *saddr1 = (const struct sockaddr_in *)sa1; 287 const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sa1;
318 const struct sockaddr_in *saddr2 = (const struct sockaddr_in *)sa2; 288 const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sa2;
289
290 return nfs_sockaddr_match_ipaddr4(sa1, sa2) &&
291 (sin1->sin_port == sin2->sin_port);
292}
319 293
320 if (saddr1->sin_addr.s_addr != saddr2->sin_addr.s_addr) 294/*
295 * Test if two socket addresses represent the same actual socket,
296 * by comparing (only) relevant fields, excluding the port number.
297 */
298static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1,
299 const struct sockaddr *sa2)
300{
301 if (sa1->sa_family != sa2->sa_family)
321 return 0; 302 return 0;
322 return saddr1->sin_port == saddr2->sin_port; 303
304 switch (sa1->sa_family) {
305 case AF_INET:
306 return nfs_sockaddr_match_ipaddr4(sa1, sa2);
307 case AF_INET6:
308 return nfs_sockaddr_match_ipaddr6(sa1, sa2);
309 }
310 return 0;
323} 311}
324 312
325/* 313/*
326 * Test if two socket addresses represent the same actual socket, 314 * Test if two socket addresses represent the same actual socket,
327 * by comparing (only) relevant fields. 315 * by comparing (only) relevant fields, including the port number.
328 */ 316 */
329static int nfs_sockaddr_cmp(const struct sockaddr *sa1, 317static int nfs_sockaddr_cmp(const struct sockaddr *sa1,
330 const struct sockaddr *sa2) 318 const struct sockaddr *sa2)
@@ -1606,8 +1594,6 @@ int __init nfs_fs_proc_init(void)
1606 if (!proc_fs_nfs) 1594 if (!proc_fs_nfs)
1607 goto error_0; 1595 goto error_0;
1608 1596
1609 proc_fs_nfs->owner = THIS_MODULE;
1610
1611 /* a file of servers with which we're dealing */ 1597 /* a file of servers with which we're dealing */
1612 p = proc_create("servers", S_IFREG|S_IRUGO, 1598 p = proc_create("servers", S_IFREG|S_IRUGO,
1613 proc_fs_nfs, &nfs_server_list_fops); 1599 proc_fs_nfs, &nfs_server_list_fops);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 78bf72fc1db3..370b190a09d1 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1624,8 +1624,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1624 } else if (atomic_read(&new_dentry->d_count) > 1) 1624 } else if (atomic_read(&new_dentry->d_count) > 1)
1625 /* dentry still busy? */ 1625 /* dentry still busy? */
1626 goto out; 1626 goto out;
1627 } else 1627 }
1628 nfs_drop_nlink(new_inode);
1629 1628
1630go_ahead: 1629go_ahead:
1631 /* 1630 /*
@@ -1638,10 +1637,8 @@ go_ahead:
1638 } 1637 }
1639 nfs_inode_return_delegation(old_inode); 1638 nfs_inode_return_delegation(old_inode);
1640 1639
1641 if (new_inode != NULL) { 1640 if (new_inode != NULL)
1642 nfs_inode_return_delegation(new_inode); 1641 nfs_inode_return_delegation(new_inode);
1643 d_delete(new_dentry);
1644 }
1645 1642
1646 error = NFS_PROTO(old_dir)->rename(old_dir, &old_dentry->d_name, 1643 error = NFS_PROTO(old_dir)->rename(old_dir, &old_dentry->d_name,
1647 new_dir, &new_dentry->d_name); 1644 new_dir, &new_dentry->d_name);
@@ -1650,6 +1647,8 @@ out:
1650 if (rehash) 1647 if (rehash)
1651 d_rehash(rehash); 1648 d_rehash(rehash);
1652 if (!error) { 1649 if (!error) {
1650 if (new_inode != NULL)
1651 nfs_drop_nlink(new_inode);
1653 d_move(old_dentry, new_dentry); 1652 d_move(old_dentry, new_dentry);
1654 nfs_set_verifier(new_dentry, 1653 nfs_set_verifier(new_dentry,
1655 nfs_save_change_attribute(new_dir)); 1654 nfs_save_change_attribute(new_dir));
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 90f292b520d2..0abf3f331f56 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -64,11 +64,7 @@ const struct file_operations nfs_file_operations = {
64 .write = do_sync_write, 64 .write = do_sync_write,
65 .aio_read = nfs_file_read, 65 .aio_read = nfs_file_read,
66 .aio_write = nfs_file_write, 66 .aio_write = nfs_file_write,
67#ifdef CONFIG_MMU
68 .mmap = nfs_file_mmap, 67 .mmap = nfs_file_mmap,
69#else
70 .mmap = generic_file_mmap,
71#endif
72 .open = nfs_file_open, 68 .open = nfs_file_open,
73 .flush = nfs_file_flush, 69 .flush = nfs_file_flush,
74 .release = nfs_file_release, 70 .release = nfs_file_release,
@@ -141,9 +137,6 @@ nfs_file_release(struct inode *inode, struct file *filp)
141 dentry->d_parent->d_name.name, 137 dentry->d_parent->d_name.name,
142 dentry->d_name.name); 138 dentry->d_name.name);
143 139
144 /* Ensure that dirty pages are flushed out with the right creds */
145 if (filp->f_mode & FMODE_WRITE)
146 nfs_wb_all(dentry->d_inode);
147 nfs_inc_stats(inode, NFSIOS_VFSRELEASE); 140 nfs_inc_stats(inode, NFSIOS_VFSRELEASE);
148 return nfs_release(inode, filp); 141 return nfs_release(inode, filp);
149} 142}
@@ -235,7 +228,6 @@ nfs_file_flush(struct file *file, fl_owner_t id)
235 struct nfs_open_context *ctx = nfs_file_open_context(file); 228 struct nfs_open_context *ctx = nfs_file_open_context(file);
236 struct dentry *dentry = file->f_path.dentry; 229 struct dentry *dentry = file->f_path.dentry;
237 struct inode *inode = dentry->d_inode; 230 struct inode *inode = dentry->d_inode;
238 int status;
239 231
240 dprintk("NFS: flush(%s/%s)\n", 232 dprintk("NFS: flush(%s/%s)\n",
241 dentry->d_parent->d_name.name, 233 dentry->d_parent->d_name.name,
@@ -245,11 +237,8 @@ nfs_file_flush(struct file *file, fl_owner_t id)
245 return 0; 237 return 0;
246 nfs_inc_stats(inode, NFSIOS_VFSFLUSH); 238 nfs_inc_stats(inode, NFSIOS_VFSFLUSH);
247 239
248 /* Ensure that data+attribute caches are up to date after close() */ 240 /* Flush writes to the server and return any errors */
249 status = nfs_do_fsync(ctx, inode); 241 return nfs_do_fsync(ctx, inode);
250 if (!status)
251 nfs_revalidate_inode(NFS_SERVER(inode), inode);
252 return status;
253} 242}
254 243
255static ssize_t 244static ssize_t
@@ -304,11 +293,13 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
304 dprintk("NFS: mmap(%s/%s)\n", 293 dprintk("NFS: mmap(%s/%s)\n",
305 dentry->d_parent->d_name.name, dentry->d_name.name); 294 dentry->d_parent->d_name.name, dentry->d_name.name);
306 295
307 status = nfs_revalidate_mapping(inode, file->f_mapping); 296 /* Note: generic_file_mmap() returns ENOSYS on nommu systems
297 * so we call that before revalidating the mapping
298 */
299 status = generic_file_mmap(file, vma);
308 if (!status) { 300 if (!status) {
309 vma->vm_ops = &nfs_file_vm_ops; 301 vma->vm_ops = &nfs_file_vm_ops;
310 vma->vm_flags |= VM_CAN_NONLINEAR; 302 status = nfs_revalidate_mapping(inode, file->f_mapping);
311 file_accessed(file);
312 } 303 }
313 return status; 304 return status;
314} 305}
@@ -354,6 +345,15 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
354 file->f_path.dentry->d_name.name, 345 file->f_path.dentry->d_name.name,
355 mapping->host->i_ino, len, (long long) pos); 346 mapping->host->i_ino, len, (long long) pos);
356 347
348 /*
349 * Prevent starvation issues if someone is doing a consistency
350 * sync-to-disk
351 */
352 ret = wait_on_bit(&NFS_I(mapping->host)->flags, NFS_INO_FLUSHING,
353 nfs_wait_bit_killable, TASK_KILLABLE);
354 if (ret)
355 return ret;
356
357 page = grab_cache_page_write_begin(mapping, index, flags); 357 page = grab_cache_page_write_begin(mapping, index, flags);
358 if (!page) 358 if (!page)
359 return -ENOMEM; 359 return -ENOMEM;
@@ -451,8 +451,9 @@ const struct address_space_operations nfs_file_aops = {
451 .launder_page = nfs_launder_page, 451 .launder_page = nfs_launder_page,
452}; 452};
453 453
454static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) 454static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
455{ 455{
456 struct page *page = vmf->page;
456 struct file *filp = vma->vm_file; 457 struct file *filp = vma->vm_file;
457 struct dentry *dentry = filp->f_path.dentry; 458 struct dentry *dentry = filp->f_path.dentry;
458 unsigned pagelen; 459 unsigned pagelen;
@@ -483,6 +484,8 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
483 ret = pagelen; 484 ret = pagelen;
484out_unlock: 485out_unlock:
485 unlock_page(page); 486 unlock_page(page);
487 if (ret)
488 ret = VM_FAULT_SIGBUS;
486 return ret; 489 return ret;
487} 490}
488 491
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index b7c9b2df1f29..46177cb87064 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -156,7 +156,7 @@ int nfs4_path_walk(struct nfs_server *server,
156 return ret; 156 return ret;
157 } 157 }
158 158
159 if (fattr.type != NFDIR) { 159 if (!S_ISDIR(fattr.mode)) {
160 printk(KERN_ERR "nfs4_get_root:" 160 printk(KERN_ERR "nfs4_get_root:"
161 " getroot encountered non-directory\n"); 161 " getroot encountered non-directory\n");
162 return -ENOTDIR; 162 return -ENOTDIR;
@@ -213,7 +213,7 @@ eat_dot_dir:
213 return ret; 213 return ret;
214 } 214 }
215 215
216 if (fattr.type != NFDIR) { 216 if (!S_ISDIR(fattr.mode)) {
217 printk(KERN_ERR "nfs4_get_root:" 217 printk(KERN_ERR "nfs4_get_root:"
218 " lookupfh encountered non-directory\n"); 218 " lookupfh encountered non-directory\n");
219 return -ENOTDIR; 219 return -ENOTDIR;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 0c381686171e..a834d1d850b7 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -66,6 +66,18 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
66} 66}
67 67
68/** 68/**
69 * nfs_wait_bit_killable - helper for functions that are sleeping on bit locks
70 * @word: long word containing the bit lock
71 */
72int nfs_wait_bit_killable(void *word)
73{
74 if (fatal_signal_pending(current))
75 return -ERESTARTSYS;
76 schedule();
77 return 0;
78}
79
80/**
69 * nfs_compat_user_ino64 - returns the user-visible inode number 81 * nfs_compat_user_ino64 - returns the user-visible inode number
70 * @fileid: 64-bit fileid 82 * @fileid: 64-bit fileid
71 * 83 *
@@ -249,13 +261,10 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
249 struct inode *inode = ERR_PTR(-ENOENT); 261 struct inode *inode = ERR_PTR(-ENOENT);
250 unsigned long hash; 262 unsigned long hash;
251 263
252 if ((fattr->valid & NFS_ATTR_FATTR) == 0) 264 if ((fattr->valid & NFS_ATTR_FATTR_FILEID) == 0)
253 goto out_no_inode; 265 goto out_no_inode;
254 266 if ((fattr->valid & NFS_ATTR_FATTR_TYPE) == 0)
255 if (!fattr->nlink) {
256 printk("NFS: Buggy server - nlink == 0!\n");
257 goto out_no_inode; 267 goto out_no_inode;
258 }
259 268
260 hash = nfs_fattr_to_ino_t(fattr); 269 hash = nfs_fattr_to_ino_t(fattr);
261 270
@@ -291,7 +300,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
291 && fattr->size <= NFS_LIMIT_READDIRPLUS) 300 && fattr->size <= NFS_LIMIT_READDIRPLUS)
292 set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags); 301 set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
293 /* Deal with crossing mountpoints */ 302 /* Deal with crossing mountpoints */
294 if (!nfs_fsid_equal(&NFS_SB(sb)->fsid, &fattr->fsid)) { 303 if ((fattr->valid & NFS_ATTR_FATTR_FSID)
304 && !nfs_fsid_equal(&NFS_SB(sb)->fsid, &fattr->fsid)) {
295 if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) 305 if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)
296 inode->i_op = &nfs_referral_inode_operations; 306 inode->i_op = &nfs_referral_inode_operations;
297 else 307 else
@@ -304,28 +314,45 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
304 else 314 else
305 init_special_inode(inode, inode->i_mode, fattr->rdev); 315 init_special_inode(inode, inode->i_mode, fattr->rdev);
306 316
317 memset(&inode->i_atime, 0, sizeof(inode->i_atime));
318 memset(&inode->i_mtime, 0, sizeof(inode->i_mtime));
319 memset(&inode->i_ctime, 0, sizeof(inode->i_ctime));
320 nfsi->change_attr = 0;
321 inode->i_size = 0;
322 inode->i_nlink = 0;
323 inode->i_uid = -2;
324 inode->i_gid = -2;
325 inode->i_blocks = 0;
326 memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
327
307 nfsi->read_cache_jiffies = fattr->time_start; 328 nfsi->read_cache_jiffies = fattr->time_start;
308 nfsi->attr_gencount = fattr->gencount; 329 nfsi->attr_gencount = fattr->gencount;
309 inode->i_atime = fattr->atime; 330 if (fattr->valid & NFS_ATTR_FATTR_ATIME)
310 inode->i_mtime = fattr->mtime; 331 inode->i_atime = fattr->atime;
311 inode->i_ctime = fattr->ctime; 332 if (fattr->valid & NFS_ATTR_FATTR_MTIME)
312 if (fattr->valid & NFS_ATTR_FATTR_V4) 333 inode->i_mtime = fattr->mtime;
334 if (fattr->valid & NFS_ATTR_FATTR_CTIME)
335 inode->i_ctime = fattr->ctime;
336 if (fattr->valid & NFS_ATTR_FATTR_CHANGE)
313 nfsi->change_attr = fattr->change_attr; 337 nfsi->change_attr = fattr->change_attr;
314 inode->i_size = nfs_size_to_loff_t(fattr->size); 338 if (fattr->valid & NFS_ATTR_FATTR_SIZE)
315 inode->i_nlink = fattr->nlink; 339 inode->i_size = nfs_size_to_loff_t(fattr->size);
316 inode->i_uid = fattr->uid; 340 if (fattr->valid & NFS_ATTR_FATTR_NLINK)
317 inode->i_gid = fattr->gid; 341 inode->i_nlink = fattr->nlink;
318 if (fattr->valid & (NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4)) { 342 if (fattr->valid & NFS_ATTR_FATTR_OWNER)
343 inode->i_uid = fattr->uid;
344 if (fattr->valid & NFS_ATTR_FATTR_GROUP)
345 inode->i_gid = fattr->gid;
346 if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
347 inode->i_blocks = fattr->du.nfs2.blocks;
348 if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
319 /* 349 /*
320 * report the blocks in 512byte units 350 * report the blocks in 512byte units
321 */ 351 */
322 inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used); 352 inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used);
323 } else {
324 inode->i_blocks = fattr->du.nfs2.blocks;
325 } 353 }
326 nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); 354 nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
327 nfsi->attrtimeo_timestamp = now; 355 nfsi->attrtimeo_timestamp = now;
328 memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
329 nfsi->access_cache = RB_ROOT; 356 nfsi->access_cache = RB_ROOT;
330 357
331 unlock_new_inode(inode); 358 unlock_new_inode(inode);
@@ -514,6 +541,32 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
514 return err; 541 return err;
515} 542}
516 543
544/**
545 * nfs_close_context - Common close_context() routine NFSv2/v3
546 * @ctx: pointer to context
547 * @is_sync: is this a synchronous close
548 *
549 * always ensure that the attributes are up to date if we're mounted
550 * with close-to-open semantics
551 */
552void nfs_close_context(struct nfs_open_context *ctx, int is_sync)
553{
554 struct inode *inode;
555 struct nfs_server *server;
556
557 if (!(ctx->mode & FMODE_WRITE))
558 return;
559 if (!is_sync)
560 return;
561 inode = ctx->path.dentry->d_inode;
562 if (!list_empty(&NFS_I(inode)->open_files))
563 return;
564 server = NFS_SERVER(inode);
565 if (server->flags & NFS_MOUNT_NOCTO)
566 return;
567 nfs_revalidate_inode(server, inode);
568}
569
517static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, struct dentry *dentry, struct rpc_cred *cred) 570static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, struct dentry *dentry, struct rpc_cred *cred)
518{ 571{
519 struct nfs_open_context *ctx; 572 struct nfs_open_context *ctx;
@@ -540,24 +593,15 @@ struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx)
540 return ctx; 593 return ctx;
541} 594}
542 595
543static void __put_nfs_open_context(struct nfs_open_context *ctx, int wait) 596static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
544{ 597{
545 struct inode *inode; 598 struct inode *inode = ctx->path.dentry->d_inode;
546
547 if (ctx == NULL)
548 return;
549 599
550 inode = ctx->path.dentry->d_inode;
551 if (!atomic_dec_and_lock(&ctx->count, &inode->i_lock)) 600 if (!atomic_dec_and_lock(&ctx->count, &inode->i_lock))
552 return; 601 return;
553 list_del(&ctx->list); 602 list_del(&ctx->list);
554 spin_unlock(&inode->i_lock); 603 spin_unlock(&inode->i_lock);
555 if (ctx->state != NULL) { 604 NFS_PROTO(inode)->close_context(ctx, is_sync);
556 if (wait)
557 nfs4_close_sync(&ctx->path, ctx->state, ctx->mode);
558 else
559 nfs4_close_state(&ctx->path, ctx->state, ctx->mode);
560 }
561 if (ctx->cred != NULL) 605 if (ctx->cred != NULL)
562 put_rpccred(ctx->cred); 606 put_rpccred(ctx->cred);
563 path_put(&ctx->path); 607 path_put(&ctx->path);
@@ -670,9 +714,6 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
670 if (NFS_STALE(inode)) 714 if (NFS_STALE(inode))
671 goto out; 715 goto out;
672 716
673 if (NFS_STALE(inode))
674 goto out;
675
676 nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE); 717 nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE);
677 status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), &fattr); 718 status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), &fattr);
678 if (status != 0) { 719 if (status != 0) {
@@ -815,25 +856,31 @@ static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr)
815{ 856{
816 struct nfs_inode *nfsi = NFS_I(inode); 857 struct nfs_inode *nfsi = NFS_I(inode);
817 858
818 if ((fattr->valid & NFS_ATTR_WCC_V4) != 0 && 859 if ((fattr->valid & NFS_ATTR_FATTR_PRECHANGE)
819 nfsi->change_attr == fattr->pre_change_attr) { 860 && (fattr->valid & NFS_ATTR_FATTR_CHANGE)
861 && nfsi->change_attr == fattr->pre_change_attr) {
820 nfsi->change_attr = fattr->change_attr; 862 nfsi->change_attr = fattr->change_attr;
821 if (S_ISDIR(inode->i_mode)) 863 if (S_ISDIR(inode->i_mode))
822 nfsi->cache_validity |= NFS_INO_INVALID_DATA; 864 nfsi->cache_validity |= NFS_INO_INVALID_DATA;
823 } 865 }
824 /* If we have atomic WCC data, we may update some attributes */ 866 /* If we have atomic WCC data, we may update some attributes */
825 if ((fattr->valid & NFS_ATTR_WCC) != 0) { 867 if ((fattr->valid & NFS_ATTR_FATTR_PRECTIME)
826 if (timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) 868 && (fattr->valid & NFS_ATTR_FATTR_CTIME)
869 && timespec_equal(&inode->i_ctime, &fattr->pre_ctime))
827 memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); 870 memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
828 if (timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) { 871
872 if ((fattr->valid & NFS_ATTR_FATTR_PREMTIME)
873 && (fattr->valid & NFS_ATTR_FATTR_MTIME)
874 && timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) {
829 memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); 875 memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
830 if (S_ISDIR(inode->i_mode)) 876 if (S_ISDIR(inode->i_mode))
831 nfsi->cache_validity |= NFS_INO_INVALID_DATA; 877 nfsi->cache_validity |= NFS_INO_INVALID_DATA;
832 }
833 if (i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size) &&
834 nfsi->npages == 0)
835 i_size_write(inode, nfs_size_to_loff_t(fattr->size));
836 } 878 }
879 if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE)
880 && (fattr->valid & NFS_ATTR_FATTR_SIZE)
881 && i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size)
882 && nfsi->npages == 0)
883 i_size_write(inode, nfs_size_to_loff_t(fattr->size));
837} 884}
838 885
839/** 886/**
@@ -853,35 +900,39 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
853 900
854 901
855 /* Has the inode gone and changed behind our back? */ 902 /* Has the inode gone and changed behind our back? */
856 if (nfsi->fileid != fattr->fileid 903 if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid)
857 || (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) { 904 return -EIO;
905 if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT))
858 return -EIO; 906 return -EIO;
859 }
860 907
861 if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 && 908 if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 &&
862 nfsi->change_attr != fattr->change_attr) 909 nfsi->change_attr != fattr->change_attr)
863 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; 910 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
864 911
865 /* Verify a few of the more important attributes */ 912 /* Verify a few of the more important attributes */
866 if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) 913 if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec_equal(&inode->i_mtime, &fattr->mtime))
867 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; 914 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
868 915
869 cur_size = i_size_read(inode); 916 if (fattr->valid & NFS_ATTR_FATTR_SIZE) {
870 new_isize = nfs_size_to_loff_t(fattr->size); 917 cur_size = i_size_read(inode);
871 if (cur_size != new_isize && nfsi->npages == 0) 918 new_isize = nfs_size_to_loff_t(fattr->size);
872 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; 919 if (cur_size != new_isize && nfsi->npages == 0)
920 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
921 }
873 922
874 /* Have any file permissions changed? */ 923 /* Have any file permissions changed? */
875 if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) 924 if ((fattr->valid & NFS_ATTR_FATTR_MODE) && (inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO))
876 || inode->i_uid != fattr->uid 925 invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
877 || inode->i_gid != fattr->gid) 926 if ((fattr->valid & NFS_ATTR_FATTR_OWNER) && inode->i_uid != fattr->uid)
927 invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
928 if ((fattr->valid & NFS_ATTR_FATTR_GROUP) && inode->i_gid != fattr->gid)
878 invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; 929 invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
879 930
880 /* Has the link count changed? */ 931 /* Has the link count changed? */
881 if (inode->i_nlink != fattr->nlink) 932 if ((fattr->valid & NFS_ATTR_FATTR_NLINK) && inode->i_nlink != fattr->nlink)
882 invalid |= NFS_INO_INVALID_ATTR; 933 invalid |= NFS_INO_INVALID_ATTR;
883 934
884 if (!timespec_equal(&inode->i_atime, &fattr->atime)) 935 if ((fattr->valid & NFS_ATTR_FATTR_ATIME) && !timespec_equal(&inode->i_atime, &fattr->atime))
885 invalid |= NFS_INO_INVALID_ATIME; 936 invalid |= NFS_INO_INVALID_ATIME;
886 937
887 if (invalid != 0) 938 if (invalid != 0)
@@ -893,11 +944,15 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
893 944
894static int nfs_ctime_need_update(const struct inode *inode, const struct nfs_fattr *fattr) 945static int nfs_ctime_need_update(const struct inode *inode, const struct nfs_fattr *fattr)
895{ 946{
947 if (!(fattr->valid & NFS_ATTR_FATTR_CTIME))
948 return 0;
896 return timespec_compare(&fattr->ctime, &inode->i_ctime) > 0; 949 return timespec_compare(&fattr->ctime, &inode->i_ctime) > 0;
897} 950}
898 951
899static int nfs_size_need_update(const struct inode *inode, const struct nfs_fattr *fattr) 952static int nfs_size_need_update(const struct inode *inode, const struct nfs_fattr *fattr)
900{ 953{
954 if (!(fattr->valid & NFS_ATTR_FATTR_SIZE))
955 return 0;
901 return nfs_size_to_loff_t(fattr->size) > i_size_read(inode); 956 return nfs_size_to_loff_t(fattr->size) > i_size_read(inode);
902} 957}
903 958
@@ -1033,20 +1088,31 @@ int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fa
1033 /* Don't do a WCC update if these attributes are already stale */ 1088 /* Don't do a WCC update if these attributes are already stale */
1034 if ((fattr->valid & NFS_ATTR_FATTR) == 0 || 1089 if ((fattr->valid & NFS_ATTR_FATTR) == 0 ||
1035 !nfs_inode_attrs_need_update(inode, fattr)) { 1090 !nfs_inode_attrs_need_update(inode, fattr)) {
1036 fattr->valid &= ~(NFS_ATTR_WCC_V4|NFS_ATTR_WCC); 1091 fattr->valid &= ~(NFS_ATTR_FATTR_PRECHANGE
1092 | NFS_ATTR_FATTR_PRESIZE
1093 | NFS_ATTR_FATTR_PREMTIME
1094 | NFS_ATTR_FATTR_PRECTIME);
1037 goto out_noforce; 1095 goto out_noforce;
1038 } 1096 }
1039 if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 && 1097 if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 &&
1040 (fattr->valid & NFS_ATTR_WCC_V4) == 0) { 1098 (fattr->valid & NFS_ATTR_FATTR_PRECHANGE) == 0) {
1041 fattr->pre_change_attr = NFS_I(inode)->change_attr; 1099 fattr->pre_change_attr = NFS_I(inode)->change_attr;
1042 fattr->valid |= NFS_ATTR_WCC_V4; 1100 fattr->valid |= NFS_ATTR_FATTR_PRECHANGE;
1043 } 1101 }
1044 if ((fattr->valid & NFS_ATTR_FATTR) != 0 && 1102 if ((fattr->valid & NFS_ATTR_FATTR_CTIME) != 0 &&
1045 (fattr->valid & NFS_ATTR_WCC) == 0) { 1103 (fattr->valid & NFS_ATTR_FATTR_PRECTIME) == 0) {
1046 memcpy(&fattr->pre_ctime, &inode->i_ctime, sizeof(fattr->pre_ctime)); 1104 memcpy(&fattr->pre_ctime, &inode->i_ctime, sizeof(fattr->pre_ctime));
1105 fattr->valid |= NFS_ATTR_FATTR_PRECTIME;
1106 }
1107 if ((fattr->valid & NFS_ATTR_FATTR_MTIME) != 0 &&
1108 (fattr->valid & NFS_ATTR_FATTR_PREMTIME) == 0) {
1047 memcpy(&fattr->pre_mtime, &inode->i_mtime, sizeof(fattr->pre_mtime)); 1109 memcpy(&fattr->pre_mtime, &inode->i_mtime, sizeof(fattr->pre_mtime));
1110 fattr->valid |= NFS_ATTR_FATTR_PREMTIME;
1111 }
1112 if ((fattr->valid & NFS_ATTR_FATTR_SIZE) != 0 &&
1113 (fattr->valid & NFS_ATTR_FATTR_PRESIZE) == 0) {
1048 fattr->pre_size = i_size_read(inode); 1114 fattr->pre_size = i_size_read(inode);
1049 fattr->valid |= NFS_ATTR_WCC; 1115 fattr->valid |= NFS_ATTR_FATTR_PRESIZE;
1050 } 1116 }
1051out_noforce: 1117out_noforce:
1052 status = nfs_post_op_update_inode_locked(inode, fattr); 1118 status = nfs_post_op_update_inode_locked(inode, fattr);
@@ -1078,18 +1144,18 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1078 __func__, inode->i_sb->s_id, inode->i_ino, 1144 __func__, inode->i_sb->s_id, inode->i_ino,
1079 atomic_read(&inode->i_count), fattr->valid); 1145 atomic_read(&inode->i_count), fattr->valid);
1080 1146
1081 if (nfsi->fileid != fattr->fileid) 1147 if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid)
1082 goto out_fileid; 1148 goto out_fileid;
1083 1149
1084 /* 1150 /*
1085 * Make sure the inode's type hasn't changed. 1151 * Make sure the inode's type hasn't changed.
1086 */ 1152 */
1087 if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) 1153 if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT))
1088 goto out_changed; 1154 goto out_changed;
1089 1155
1090 server = NFS_SERVER(inode); 1156 server = NFS_SERVER(inode);
1091 /* Update the fsid? */ 1157 /* Update the fsid? */
1092 if (S_ISDIR(inode->i_mode) && 1158 if (S_ISDIR(inode->i_mode) && (fattr->valid & NFS_ATTR_FATTR_FSID) &&
1093 !nfs_fsid_equal(&server->fsid, &fattr->fsid) && 1159 !nfs_fsid_equal(&server->fsid, &fattr->fsid) &&
1094 !test_bit(NFS_INO_MOUNTPOINT, &nfsi->flags)) 1160 !test_bit(NFS_INO_MOUNTPOINT, &nfsi->flags))
1095 server->fsid = fattr->fsid; 1161 server->fsid = fattr->fsid;
@@ -1099,14 +1165,27 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1099 */ 1165 */
1100 nfsi->read_cache_jiffies = fattr->time_start; 1166 nfsi->read_cache_jiffies = fattr->time_start;
1101 1167
1102 nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ATIME 1168 if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) || (fattr->valid & (NFS_ATTR_FATTR_MTIME|NFS_ATTR_FATTR_CTIME)))
1103 | NFS_INO_REVAL_PAGECACHE); 1169 nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR
1170 | NFS_INO_INVALID_ATIME
1171 | NFS_INO_REVAL_PAGECACHE);
1104 1172
1105 /* Do atomic weak cache consistency updates */ 1173 /* Do atomic weak cache consistency updates */
1106 nfs_wcc_update_inode(inode, fattr); 1174 nfs_wcc_update_inode(inode, fattr);
1107 1175
1108 /* More cache consistency checks */ 1176 /* More cache consistency checks */
1109 if (!(fattr->valid & NFS_ATTR_FATTR_V4)) { 1177 if (fattr->valid & NFS_ATTR_FATTR_CHANGE) {
1178 if (nfsi->change_attr != fattr->change_attr) {
1179 dprintk("NFS: change_attr change on server for file %s/%ld\n",
1180 inode->i_sb->s_id, inode->i_ino);
1181 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
1182 if (S_ISDIR(inode->i_mode))
1183 nfs_force_lookup_revalidate(inode);
1184 nfsi->change_attr = fattr->change_attr;
1185 }
1186 }
1187
1188 if (fattr->valid & NFS_ATTR_FATTR_MTIME) {
1110 /* NFSv2/v3: Check if the mtime agrees */ 1189 /* NFSv2/v3: Check if the mtime agrees */
1111 if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) { 1190 if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) {
1112 dprintk("NFS: mtime change on server for file %s/%ld\n", 1191 dprintk("NFS: mtime change on server for file %s/%ld\n",
@@ -1114,59 +1193,80 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1114 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; 1193 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
1115 if (S_ISDIR(inode->i_mode)) 1194 if (S_ISDIR(inode->i_mode))
1116 nfs_force_lookup_revalidate(inode); 1195 nfs_force_lookup_revalidate(inode);
1196 memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
1117 } 1197 }
1198 }
1199 if (fattr->valid & NFS_ATTR_FATTR_CTIME) {
1118 /* If ctime has changed we should definitely clear access+acl caches */ 1200 /* If ctime has changed we should definitely clear access+acl caches */
1119 if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) 1201 if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) {
1120 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; 1202 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
1121 } else if (nfsi->change_attr != fattr->change_attr) { 1203 /* and probably clear data for a directory too as utimes can cause
1122 dprintk("NFS: change_attr change on server for file %s/%ld\n", 1204 * havoc with our cache.
1123 inode->i_sb->s_id, inode->i_ino); 1205 */
1124 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; 1206 if (S_ISDIR(inode->i_mode)) {
1125 if (S_ISDIR(inode->i_mode)) 1207 invalid |= NFS_INO_INVALID_DATA;
1126 nfs_force_lookup_revalidate(inode); 1208 nfs_force_lookup_revalidate(inode);
1209 }
1210 memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
1211 }
1127 } 1212 }
1128 1213
1129 /* Check if our cached file size is stale */ 1214 /* Check if our cached file size is stale */
1130 new_isize = nfs_size_to_loff_t(fattr->size); 1215 if (fattr->valid & NFS_ATTR_FATTR_SIZE) {
1131 cur_isize = i_size_read(inode); 1216 new_isize = nfs_size_to_loff_t(fattr->size);
1132 if (new_isize != cur_isize) { 1217 cur_isize = i_size_read(inode);
1133 /* Do we perhaps have any outstanding writes, or has 1218 if (new_isize != cur_isize) {
1134 * the file grown beyond our last write? */ 1219 /* Do we perhaps have any outstanding writes, or has
1135 if (nfsi->npages == 0 || new_isize > cur_isize) { 1220 * the file grown beyond our last write? */
1136 i_size_write(inode, new_isize); 1221 if (nfsi->npages == 0 || new_isize > cur_isize) {
1137 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; 1222 i_size_write(inode, new_isize);
1223 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
1224 }
1225 dprintk("NFS: isize change on server for file %s/%ld\n",
1226 inode->i_sb->s_id, inode->i_ino);
1138 } 1227 }
1139 dprintk("NFS: isize change on server for file %s/%ld\n",
1140 inode->i_sb->s_id, inode->i_ino);
1141 } 1228 }
1142 1229
1143 1230
1144 memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); 1231 if (fattr->valid & NFS_ATTR_FATTR_ATIME)
1145 memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); 1232 memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime));
1146 memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime));
1147 nfsi->change_attr = fattr->change_attr;
1148
1149 if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) ||
1150 inode->i_uid != fattr->uid ||
1151 inode->i_gid != fattr->gid)
1152 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
1153 1233
1154 if (inode->i_nlink != fattr->nlink) 1234 if (fattr->valid & NFS_ATTR_FATTR_MODE) {
1155 invalid |= NFS_INO_INVALID_ATTR; 1235 if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) {
1236 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
1237 inode->i_mode = fattr->mode;
1238 }
1239 }
1240 if (fattr->valid & NFS_ATTR_FATTR_OWNER) {
1241 if (inode->i_uid != fattr->uid) {
1242 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
1243 inode->i_uid = fattr->uid;
1244 }
1245 }
1246 if (fattr->valid & NFS_ATTR_FATTR_GROUP) {
1247 if (inode->i_gid != fattr->gid) {
1248 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
1249 inode->i_gid = fattr->gid;
1250 }
1251 }
1156 1252
1157 inode->i_mode = fattr->mode; 1253 if (fattr->valid & NFS_ATTR_FATTR_NLINK) {
1158 inode->i_nlink = fattr->nlink; 1254 if (inode->i_nlink != fattr->nlink) {
1159 inode->i_uid = fattr->uid; 1255 invalid |= NFS_INO_INVALID_ATTR;
1160 inode->i_gid = fattr->gid; 1256 if (S_ISDIR(inode->i_mode))
1257 invalid |= NFS_INO_INVALID_DATA;
1258 inode->i_nlink = fattr->nlink;
1259 }
1260 }
1161 1261
1162 if (fattr->valid & (NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4)) { 1262 if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
1163 /* 1263 /*
1164 * report the blocks in 512byte units 1264 * report the blocks in 512byte units
1165 */ 1265 */
1166 inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used); 1266 inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used);
1167 } else {
1168 inode->i_blocks = fattr->du.nfs2.blocks;
1169 } 1267 }
1268 if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
1269 inode->i_blocks = fattr->du.nfs2.blocks;
1170 1270
1171 /* Update attrtimeo value if we're out of the unstable period */ 1271 /* Update attrtimeo value if we're out of the unstable period */
1172 if (invalid & NFS_INO_INVALID_ATTR) { 1272 if (invalid & NFS_INO_INVALID_ATTR) {
@@ -1274,7 +1374,6 @@ static void init_once(void *foo)
1274 INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); 1374 INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
1275 INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); 1375 INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
1276 INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC); 1376 INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC);
1277 nfsi->ncommit = 0;
1278 nfsi->npages = 0; 1377 nfsi->npages = 0;
1279 atomic_set(&nfsi->silly_count, 1); 1378 atomic_set(&nfsi->silly_count, 1);
1280 INIT_HLIST_HEAD(&nfsi->silly_list); 1379 INIT_HLIST_HEAD(&nfsi->silly_list);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 340ede8f608f..2041f68ff1cc 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -152,6 +152,9 @@ extern __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus);
152extern struct rpc_procinfo nfs4_procedures[]; 152extern struct rpc_procinfo nfs4_procedures[];
153#endif 153#endif
154 154
155/* proc.c */
156void nfs_close_context(struct nfs_open_context *ctx, int is_sync);
157
155/* dir.c */ 158/* dir.c */
156extern int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask); 159extern int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask);
157 160
@@ -165,6 +168,7 @@ extern void nfs_clear_inode(struct inode *);
165extern void nfs4_clear_inode(struct inode *); 168extern void nfs4_clear_inode(struct inode *);
166#endif 169#endif
167void nfs_zap_acl_cache(struct inode *inode); 170void nfs_zap_acl_cache(struct inode *inode);
171extern int nfs_wait_bit_killable(void *word);
168 172
169/* super.c */ 173/* super.c */
170void nfs_parse_ip_address(char *, size_t, struct sockaddr *, size_t *); 174void nfs_parse_ip_address(char *, size_t, struct sockaddr *, size_t *);
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 28bab67d1519..c862c9340f9a 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -120,8 +120,8 @@ xdr_decode_time(__be32 *p, struct timespec *timep)
120static __be32 * 120static __be32 *
121xdr_decode_fattr(__be32 *p, struct nfs_fattr *fattr) 121xdr_decode_fattr(__be32 *p, struct nfs_fattr *fattr)
122{ 122{
123 u32 rdev; 123 u32 rdev, type;
124 fattr->type = (enum nfs_ftype) ntohl(*p++); 124 type = ntohl(*p++);
125 fattr->mode = ntohl(*p++); 125 fattr->mode = ntohl(*p++);
126 fattr->nlink = ntohl(*p++); 126 fattr->nlink = ntohl(*p++);
127 fattr->uid = ntohl(*p++); 127 fattr->uid = ntohl(*p++);
@@ -136,10 +136,9 @@ xdr_decode_fattr(__be32 *p, struct nfs_fattr *fattr)
136 p = xdr_decode_time(p, &fattr->atime); 136 p = xdr_decode_time(p, &fattr->atime);
137 p = xdr_decode_time(p, &fattr->mtime); 137 p = xdr_decode_time(p, &fattr->mtime);
138 p = xdr_decode_time(p, &fattr->ctime); 138 p = xdr_decode_time(p, &fattr->ctime);
139 fattr->valid |= NFS_ATTR_FATTR; 139 fattr->valid |= NFS_ATTR_FATTR_V2;
140 fattr->rdev = new_decode_dev(rdev); 140 fattr->rdev = new_decode_dev(rdev);
141 if (fattr->type == NFCHR && rdev == NFS2_FIFO_DEV) { 141 if (type == NFCHR && rdev == NFS2_FIFO_DEV) {
142 fattr->type = NFFIFO;
143 fattr->mode = (fattr->mode & ~S_IFMT) | S_IFIFO; 142 fattr->mode = (fattr->mode & ~S_IFMT) | S_IFIFO;
144 fattr->rdev = 0; 143 fattr->rdev = 0;
145 } 144 }
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index c55be7a7679e..b82fe6847f14 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -834,4 +834,5 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
834 .commit_done = nfs3_commit_done, 834 .commit_done = nfs3_commit_done,
835 .lock = nfs3_proc_lock, 835 .lock = nfs3_proc_lock,
836 .clear_acl_cache = nfs3_forget_cached_acls, 836 .clear_acl_cache = nfs3_forget_cached_acls,
837 .close_context = nfs_close_context,
837}; 838};
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 6cdeacffde46..e6a1932c7110 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -91,19 +91,15 @@
91/* 91/*
92 * Map file type to S_IFMT bits 92 * Map file type to S_IFMT bits
93 */ 93 */
94static struct { 94static const umode_t nfs_type2fmt[] = {
95 unsigned int mode; 95 [NF3BAD] = 0,
96 unsigned int nfs2type; 96 [NF3REG] = S_IFREG,
97} nfs_type2fmt[] = { 97 [NF3DIR] = S_IFDIR,
98 { 0, NFNON }, 98 [NF3BLK] = S_IFBLK,
99 { S_IFREG, NFREG }, 99 [NF3CHR] = S_IFCHR,
100 { S_IFDIR, NFDIR }, 100 [NF3LNK] = S_IFLNK,
101 { S_IFBLK, NFBLK }, 101 [NF3SOCK] = S_IFSOCK,
102 { S_IFCHR, NFCHR }, 102 [NF3FIFO] = S_IFIFO,
103 { S_IFLNK, NFLNK },
104 { S_IFSOCK, NFSOCK },
105 { S_IFIFO, NFFIFO },
106 { 0, NFBAD }
107}; 103};
108 104
109/* 105/*
@@ -148,13 +144,12 @@ static __be32 *
148xdr_decode_fattr(__be32 *p, struct nfs_fattr *fattr) 144xdr_decode_fattr(__be32 *p, struct nfs_fattr *fattr)
149{ 145{
150 unsigned int type, major, minor; 146 unsigned int type, major, minor;
151 int fmode; 147 umode_t fmode;
152 148
153 type = ntohl(*p++); 149 type = ntohl(*p++);
154 if (type >= NF3BAD) 150 if (type > NF3FIFO)
155 type = NF3BAD; 151 type = NF3NON;
156 fmode = nfs_type2fmt[type].mode; 152 fmode = nfs_type2fmt[type];
157 fattr->type = nfs_type2fmt[type].nfs2type;
158 fattr->mode = (ntohl(*p++) & ~S_IFMT) | fmode; 153 fattr->mode = (ntohl(*p++) & ~S_IFMT) | fmode;
159 fattr->nlink = ntohl(*p++); 154 fattr->nlink = ntohl(*p++);
160 fattr->uid = ntohl(*p++); 155 fattr->uid = ntohl(*p++);
@@ -177,7 +172,7 @@ xdr_decode_fattr(__be32 *p, struct nfs_fattr *fattr)
177 p = xdr_decode_time3(p, &fattr->ctime); 172 p = xdr_decode_time3(p, &fattr->ctime);
178 173
179 /* Update the mode bits */ 174 /* Update the mode bits */
180 fattr->valid |= (NFS_ATTR_FATTR | NFS_ATTR_FATTR_V3); 175 fattr->valid |= NFS_ATTR_FATTR_V3;
181 return p; 176 return p;
182} 177}
183 178
@@ -233,7 +228,9 @@ xdr_decode_wcc_attr(__be32 *p, struct nfs_fattr *fattr)
233 p = xdr_decode_hyper(p, &fattr->pre_size); 228 p = xdr_decode_hyper(p, &fattr->pre_size);
234 p = xdr_decode_time3(p, &fattr->pre_mtime); 229 p = xdr_decode_time3(p, &fattr->pre_mtime);
235 p = xdr_decode_time3(p, &fattr->pre_ctime); 230 p = xdr_decode_time3(p, &fattr->pre_ctime);
236 fattr->valid |= NFS_ATTR_WCC; 231 fattr->valid |= NFS_ATTR_FATTR_PRESIZE
232 | NFS_ATTR_FATTR_PREMTIME
233 | NFS_ATTR_FATTR_PRECTIME;
237 return p; 234 return p;
238} 235}
239 236
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 8dde84b988d9..97bacccff579 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -193,14 +193,6 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent
193 kunmap_atomic(start, KM_USER0); 193 kunmap_atomic(start, KM_USER0);
194} 194}
195 195
196static int nfs4_wait_bit_killable(void *word)
197{
198 if (fatal_signal_pending(current))
199 return -ERESTARTSYS;
200 schedule();
201 return 0;
202}
203
204static int nfs4_wait_clnt_recover(struct nfs_client *clp) 196static int nfs4_wait_clnt_recover(struct nfs_client *clp)
205{ 197{
206 int res; 198 int res;
@@ -208,7 +200,7 @@ static int nfs4_wait_clnt_recover(struct nfs_client *clp)
208 might_sleep(); 200 might_sleep();
209 201
210 res = wait_on_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING, 202 res = wait_on_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING,
211 nfs4_wait_bit_killable, TASK_KILLABLE); 203 nfs_wait_bit_killable, TASK_KILLABLE);
212 return res; 204 return res;
213} 205}
214 206
@@ -1439,7 +1431,7 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait)
1439 if (calldata->arg.seqid == NULL) 1431 if (calldata->arg.seqid == NULL)
1440 goto out_free_calldata; 1432 goto out_free_calldata;
1441 calldata->arg.fmode = 0; 1433 calldata->arg.fmode = 0;
1442 calldata->arg.bitmask = server->attr_bitmask; 1434 calldata->arg.bitmask = server->cache_consistency_bitmask;
1443 calldata->res.fattr = &calldata->fattr; 1435 calldata->res.fattr = &calldata->fattr;
1444 calldata->res.seqid = calldata->arg.seqid; 1436 calldata->res.seqid = calldata->arg.seqid;
1445 calldata->res.server = server; 1437 calldata->res.server = server;
@@ -1580,6 +1572,15 @@ out_drop:
1580 return 0; 1572 return 0;
1581} 1573}
1582 1574
1575void nfs4_close_context(struct nfs_open_context *ctx, int is_sync)
1576{
1577 if (ctx->state == NULL)
1578 return;
1579 if (is_sync)
1580 nfs4_close_sync(&ctx->path, ctx->state, ctx->mode);
1581 else
1582 nfs4_close_state(&ctx->path, ctx->state, ctx->mode);
1583}
1583 1584
1584static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) 1585static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
1585{ 1586{
@@ -1600,6 +1601,9 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
1600 server->caps |= NFS_CAP_HARDLINKS; 1601 server->caps |= NFS_CAP_HARDLINKS;
1601 if (res.has_symlinks != 0) 1602 if (res.has_symlinks != 0)
1602 server->caps |= NFS_CAP_SYMLINKS; 1603 server->caps |= NFS_CAP_SYMLINKS;
1604 memcpy(server->cache_consistency_bitmask, res.attr_bitmask, sizeof(server->cache_consistency_bitmask));
1605 server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE;
1606 server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY;
1603 server->acl_bitmask = res.acl_bitmask; 1607 server->acl_bitmask = res.acl_bitmask;
1604 } 1608 }
1605 return status; 1609 return status;
@@ -2079,7 +2083,7 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir)
2079 struct nfs_removeargs *args = msg->rpc_argp; 2083 struct nfs_removeargs *args = msg->rpc_argp;
2080 struct nfs_removeres *res = msg->rpc_resp; 2084 struct nfs_removeres *res = msg->rpc_resp;
2081 2085
2082 args->bitmask = server->attr_bitmask; 2086 args->bitmask = server->cache_consistency_bitmask;
2083 res->server = server; 2087 res->server = server;
2084 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE]; 2088 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE];
2085} 2089}
@@ -2323,7 +2327,7 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
2323 .pages = &page, 2327 .pages = &page,
2324 .pgbase = 0, 2328 .pgbase = 0,
2325 .count = count, 2329 .count = count,
2326 .bitmask = NFS_SERVER(dentry->d_inode)->attr_bitmask, 2330 .bitmask = NFS_SERVER(dentry->d_inode)->cache_consistency_bitmask,
2327 }; 2331 };
2328 struct nfs4_readdir_res res; 2332 struct nfs4_readdir_res res;
2329 struct rpc_message msg = { 2333 struct rpc_message msg = {
@@ -2552,7 +2556,7 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag
2552{ 2556{
2553 struct nfs_server *server = NFS_SERVER(data->inode); 2557 struct nfs_server *server = NFS_SERVER(data->inode);
2554 2558
2555 data->args.bitmask = server->attr_bitmask; 2559 data->args.bitmask = server->cache_consistency_bitmask;
2556 data->res.server = server; 2560 data->res.server = server;
2557 data->timestamp = jiffies; 2561 data->timestamp = jiffies;
2558 2562
@@ -2575,7 +2579,7 @@ static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_messa
2575{ 2579{
2576 struct nfs_server *server = NFS_SERVER(data->inode); 2580 struct nfs_server *server = NFS_SERVER(data->inode);
2577 2581
2578 data->args.bitmask = server->attr_bitmask; 2582 data->args.bitmask = server->cache_consistency_bitmask;
2579 data->res.server = server; 2583 data->res.server = server;
2580 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT]; 2584 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT];
2581} 2585}
@@ -3678,6 +3682,19 @@ ssize_t nfs4_listxattr(struct dentry *dentry, char *buf, size_t buflen)
3678 return len; 3682 return len;
3679} 3683}
3680 3684
3685static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr)
3686{
3687 if (!((fattr->valid & NFS_ATTR_FATTR_FILEID) &&
3688 (fattr->valid & NFS_ATTR_FATTR_FSID) &&
3689 (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)))
3690 return;
3691
3692 fattr->valid |= NFS_ATTR_FATTR_TYPE | NFS_ATTR_FATTR_MODE |
3693 NFS_ATTR_FATTR_NLINK;
3694 fattr->mode = S_IFDIR | S_IRUGO | S_IXUGO;
3695 fattr->nlink = 2;
3696}
3697
3681int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, 3698int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
3682 struct nfs4_fs_locations *fs_locations, struct page *page) 3699 struct nfs4_fs_locations *fs_locations, struct page *page)
3683{ 3700{
@@ -3704,6 +3721,7 @@ int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
3704 fs_locations->server = server; 3721 fs_locations->server = server;
3705 fs_locations->nlocations = 0; 3722 fs_locations->nlocations = 0;
3706 status = rpc_call_sync(server->client, &msg, 0); 3723 status = rpc_call_sync(server->client, &msg, 0);
3724 nfs_fixup_referral_attributes(&fs_locations->fattr);
3707 dprintk("%s: returned status = %d\n", __func__, status); 3725 dprintk("%s: returned status = %d\n", __func__, status);
3708 return status; 3726 return status;
3709} 3727}
@@ -3767,6 +3785,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
3767 .commit_done = nfs4_commit_done, 3785 .commit_done = nfs4_commit_done,
3768 .lock = nfs4_proc_lock, 3786 .lock = nfs4_proc_lock,
3769 .clear_acl_cache = nfs4_zap_acl_attr, 3787 .clear_acl_cache = nfs4_zap_acl_attr,
3788 .close_context = nfs4_close_context,
3770}; 3789};
3771 3790
3772/* 3791/*
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 2022fe47966f..0298e909559f 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -62,8 +62,14 @@ static LIST_HEAD(nfs4_clientid_list);
62 62
63static int nfs4_init_client(struct nfs_client *clp, struct rpc_cred *cred) 63static int nfs4_init_client(struct nfs_client *clp, struct rpc_cred *cred)
64{ 64{
65 int status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, 65 unsigned short port;
66 nfs_callback_tcpport, cred); 66 int status;
67
68 port = nfs_callback_tcpport;
69 if (clp->cl_addr.ss_family == AF_INET6)
70 port = nfs_callback_tcpport6;
71
72 status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred);
67 if (status == 0) 73 if (status == 0)
68 status = nfs4_proc_setclientid_confirm(clp, cred); 74 status = nfs4_proc_setclientid_confirm(clp, cred);
69 if (status == 0) 75 if (status == 0)
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index d1e4c8f8a0a9..1690f0e44b91 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -522,20 +522,17 @@ static int nfs4_stat_to_errno(int);
522 decode_lookup_maxsz + \ 522 decode_lookup_maxsz + \
523 decode_fs_locations_maxsz) 523 decode_fs_locations_maxsz)
524 524
525static struct { 525static const umode_t nfs_type2fmt[] = {
526 unsigned int mode; 526 [NF4BAD] = 0,
527 unsigned int nfs2type; 527 [NF4REG] = S_IFREG,
528} nfs_type2fmt[] = { 528 [NF4DIR] = S_IFDIR,
529 { 0, NFNON }, 529 [NF4BLK] = S_IFBLK,
530 { S_IFREG, NFREG }, 530 [NF4CHR] = S_IFCHR,
531 { S_IFDIR, NFDIR }, 531 [NF4LNK] = S_IFLNK,
532 { S_IFBLK, NFBLK }, 532 [NF4SOCK] = S_IFSOCK,
533 { S_IFCHR, NFCHR }, 533 [NF4FIFO] = S_IFIFO,
534 { S_IFLNK, NFLNK }, 534 [NF4ATTRDIR] = 0,
535 { S_IFSOCK, NFSOCK }, 535 [NF4NAMEDATTR] = 0,
536 { S_IFIFO, NFFIFO },
537 { 0, NFNON },
538 { 0, NFNON },
539}; 536};
540 537
541struct compound_hdr { 538struct compound_hdr {
@@ -2160,6 +2157,7 @@ static int decode_attr_supported(struct xdr_stream *xdr, uint32_t *bitmap, uint3
2160static int decode_attr_type(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *type) 2157static int decode_attr_type(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *type)
2161{ 2158{
2162 __be32 *p; 2159 __be32 *p;
2160 int ret = 0;
2163 2161
2164 *type = 0; 2162 *type = 0;
2165 if (unlikely(bitmap[0] & (FATTR4_WORD0_TYPE - 1U))) 2163 if (unlikely(bitmap[0] & (FATTR4_WORD0_TYPE - 1U)))
@@ -2172,14 +2170,16 @@ static int decode_attr_type(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *
2172 return -EIO; 2170 return -EIO;
2173 } 2171 }
2174 bitmap[0] &= ~FATTR4_WORD0_TYPE; 2172 bitmap[0] &= ~FATTR4_WORD0_TYPE;
2173 ret = NFS_ATTR_FATTR_TYPE;
2175 } 2174 }
2176 dprintk("%s: type=0%o\n", __func__, nfs_type2fmt[*type].nfs2type); 2175 dprintk("%s: type=0%o\n", __func__, nfs_type2fmt[*type]);
2177 return 0; 2176 return ret;
2178} 2177}
2179 2178
2180static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *change) 2179static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *change)
2181{ 2180{
2182 __be32 *p; 2181 __be32 *p;
2182 int ret = 0;
2183 2183
2184 *change = 0; 2184 *change = 0;
2185 if (unlikely(bitmap[0] & (FATTR4_WORD0_CHANGE - 1U))) 2185 if (unlikely(bitmap[0] & (FATTR4_WORD0_CHANGE - 1U)))
@@ -2188,15 +2188,17 @@ static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t
2188 READ_BUF(8); 2188 READ_BUF(8);
2189 READ64(*change); 2189 READ64(*change);
2190 bitmap[0] &= ~FATTR4_WORD0_CHANGE; 2190 bitmap[0] &= ~FATTR4_WORD0_CHANGE;
2191 ret = NFS_ATTR_FATTR_CHANGE;
2191 } 2192 }
2192 dprintk("%s: change attribute=%Lu\n", __func__, 2193 dprintk("%s: change attribute=%Lu\n", __func__,
2193 (unsigned long long)*change); 2194 (unsigned long long)*change);
2194 return 0; 2195 return ret;
2195} 2196}
2196 2197
2197static int decode_attr_size(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *size) 2198static int decode_attr_size(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *size)
2198{ 2199{
2199 __be32 *p; 2200 __be32 *p;
2201 int ret = 0;
2200 2202
2201 *size = 0; 2203 *size = 0;
2202 if (unlikely(bitmap[0] & (FATTR4_WORD0_SIZE - 1U))) 2204 if (unlikely(bitmap[0] & (FATTR4_WORD0_SIZE - 1U)))
@@ -2205,9 +2207,10 @@ static int decode_attr_size(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *
2205 READ_BUF(8); 2207 READ_BUF(8);
2206 READ64(*size); 2208 READ64(*size);
2207 bitmap[0] &= ~FATTR4_WORD0_SIZE; 2209 bitmap[0] &= ~FATTR4_WORD0_SIZE;
2210 ret = NFS_ATTR_FATTR_SIZE;
2208 } 2211 }
2209 dprintk("%s: file size=%Lu\n", __func__, (unsigned long long)*size); 2212 dprintk("%s: file size=%Lu\n", __func__, (unsigned long long)*size);
2210 return 0; 2213 return ret;
2211} 2214}
2212 2215
2213static int decode_attr_link_support(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) 2216static int decode_attr_link_support(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -2245,6 +2248,7 @@ static int decode_attr_symlink_support(struct xdr_stream *xdr, uint32_t *bitmap,
2245static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_fsid *fsid) 2248static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_fsid *fsid)
2246{ 2249{
2247 __be32 *p; 2250 __be32 *p;
2251 int ret = 0;
2248 2252
2249 fsid->major = 0; 2253 fsid->major = 0;
2250 fsid->minor = 0; 2254 fsid->minor = 0;
@@ -2255,11 +2259,12 @@ static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs
2255 READ64(fsid->major); 2259 READ64(fsid->major);
2256 READ64(fsid->minor); 2260 READ64(fsid->minor);
2257 bitmap[0] &= ~FATTR4_WORD0_FSID; 2261 bitmap[0] &= ~FATTR4_WORD0_FSID;
2262 ret = NFS_ATTR_FATTR_FSID;
2258 } 2263 }
2259 dprintk("%s: fsid=(0x%Lx/0x%Lx)\n", __func__, 2264 dprintk("%s: fsid=(0x%Lx/0x%Lx)\n", __func__,
2260 (unsigned long long)fsid->major, 2265 (unsigned long long)fsid->major,
2261 (unsigned long long)fsid->minor); 2266 (unsigned long long)fsid->minor);
2262 return 0; 2267 return ret;
2263} 2268}
2264 2269
2265static int decode_attr_lease_time(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) 2270static int decode_attr_lease_time(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -2297,6 +2302,7 @@ static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint
2297static int decode_attr_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid) 2302static int decode_attr_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid)
2298{ 2303{
2299 __be32 *p; 2304 __be32 *p;
2305 int ret = 0;
2300 2306
2301 *fileid = 0; 2307 *fileid = 0;
2302 if (unlikely(bitmap[0] & (FATTR4_WORD0_FILEID - 1U))) 2308 if (unlikely(bitmap[0] & (FATTR4_WORD0_FILEID - 1U)))
@@ -2305,14 +2311,16 @@ static int decode_attr_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t
2305 READ_BUF(8); 2311 READ_BUF(8);
2306 READ64(*fileid); 2312 READ64(*fileid);
2307 bitmap[0] &= ~FATTR4_WORD0_FILEID; 2313 bitmap[0] &= ~FATTR4_WORD0_FILEID;
2314 ret = NFS_ATTR_FATTR_FILEID;
2308 } 2315 }
2309 dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid); 2316 dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid);
2310 return 0; 2317 return ret;
2311} 2318}
2312 2319
2313static int decode_attr_mounted_on_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid) 2320static int decode_attr_mounted_on_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid)
2314{ 2321{
2315 __be32 *p; 2322 __be32 *p;
2323 int ret = 0;
2316 2324
2317 *fileid = 0; 2325 *fileid = 0;
2318 if (unlikely(bitmap[1] & (FATTR4_WORD1_MOUNTED_ON_FILEID - 1U))) 2326 if (unlikely(bitmap[1] & (FATTR4_WORD1_MOUNTED_ON_FILEID - 1U)))
@@ -2321,9 +2329,10 @@ static int decode_attr_mounted_on_fileid(struct xdr_stream *xdr, uint32_t *bitma
2321 READ_BUF(8); 2329 READ_BUF(8);
2322 READ64(*fileid); 2330 READ64(*fileid);
2323 bitmap[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID; 2331 bitmap[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
2332 ret = NFS_ATTR_FATTR_FILEID;
2324 } 2333 }
2325 dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid); 2334 dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid);
2326 return 0; 2335 return ret;
2327} 2336}
2328 2337
2329static int decode_attr_files_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) 2338static int decode_attr_files_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -2479,6 +2488,8 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
2479 if (res->nlocations < NFS4_FS_LOCATIONS_MAXENTRIES) 2488 if (res->nlocations < NFS4_FS_LOCATIONS_MAXENTRIES)
2480 res->nlocations++; 2489 res->nlocations++;
2481 } 2490 }
2491 if (res->nlocations != 0)
2492 status = NFS_ATTR_FATTR_V4_REFERRAL;
2482out: 2493out:
2483 dprintk("%s: fs_locations done, error = %d\n", __func__, status); 2494 dprintk("%s: fs_locations done, error = %d\n", __func__, status);
2484 return status; 2495 return status;
@@ -2580,26 +2591,30 @@ static int decode_attr_maxwrite(struct xdr_stream *xdr, uint32_t *bitmap, uint32
2580 return status; 2591 return status;
2581} 2592}
2582 2593
2583static int decode_attr_mode(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *mode) 2594static int decode_attr_mode(struct xdr_stream *xdr, uint32_t *bitmap, umode_t *mode)
2584{ 2595{
2596 uint32_t tmp;
2585 __be32 *p; 2597 __be32 *p;
2598 int ret = 0;
2586 2599
2587 *mode = 0; 2600 *mode = 0;
2588 if (unlikely(bitmap[1] & (FATTR4_WORD1_MODE - 1U))) 2601 if (unlikely(bitmap[1] & (FATTR4_WORD1_MODE - 1U)))
2589 return -EIO; 2602 return -EIO;
2590 if (likely(bitmap[1] & FATTR4_WORD1_MODE)) { 2603 if (likely(bitmap[1] & FATTR4_WORD1_MODE)) {
2591 READ_BUF(4); 2604 READ_BUF(4);
2592 READ32(*mode); 2605 READ32(tmp);
2593 *mode &= ~S_IFMT; 2606 *mode = tmp & ~S_IFMT;
2594 bitmap[1] &= ~FATTR4_WORD1_MODE; 2607 bitmap[1] &= ~FATTR4_WORD1_MODE;
2608 ret = NFS_ATTR_FATTR_MODE;
2595 } 2609 }
2596 dprintk("%s: file mode=0%o\n", __func__, (unsigned int)*mode); 2610 dprintk("%s: file mode=0%o\n", __func__, (unsigned int)*mode);
2597 return 0; 2611 return ret;
2598} 2612}
2599 2613
2600static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *nlink) 2614static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *nlink)
2601{ 2615{
2602 __be32 *p; 2616 __be32 *p;
2617 int ret = 0;
2603 2618
2604 *nlink = 1; 2619 *nlink = 1;
2605 if (unlikely(bitmap[1] & (FATTR4_WORD1_NUMLINKS - 1U))) 2620 if (unlikely(bitmap[1] & (FATTR4_WORD1_NUMLINKS - 1U)))
@@ -2608,15 +2623,17 @@ static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t
2608 READ_BUF(4); 2623 READ_BUF(4);
2609 READ32(*nlink); 2624 READ32(*nlink);
2610 bitmap[1] &= ~FATTR4_WORD1_NUMLINKS; 2625 bitmap[1] &= ~FATTR4_WORD1_NUMLINKS;
2626 ret = NFS_ATTR_FATTR_NLINK;
2611 } 2627 }
2612 dprintk("%s: nlink=%u\n", __func__, (unsigned int)*nlink); 2628 dprintk("%s: nlink=%u\n", __func__, (unsigned int)*nlink);
2613 return 0; 2629 return ret;
2614} 2630}
2615 2631
2616static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, uint32_t *uid) 2632static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, uint32_t *uid)
2617{ 2633{
2618 uint32_t len; 2634 uint32_t len;
2619 __be32 *p; 2635 __be32 *p;
2636 int ret = 0;
2620 2637
2621 *uid = -2; 2638 *uid = -2;
2622 if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER - 1U))) 2639 if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER - 1U)))
@@ -2626,7 +2643,9 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nf
2626 READ32(len); 2643 READ32(len);
2627 READ_BUF(len); 2644 READ_BUF(len);
2628 if (len < XDR_MAX_NETOBJ) { 2645 if (len < XDR_MAX_NETOBJ) {
2629 if (nfs_map_name_to_uid(clp, (char *)p, len, uid) != 0) 2646 if (nfs_map_name_to_uid(clp, (char *)p, len, uid) == 0)
2647 ret = NFS_ATTR_FATTR_OWNER;
2648 else
2630 dprintk("%s: nfs_map_name_to_uid failed!\n", 2649 dprintk("%s: nfs_map_name_to_uid failed!\n",
2631 __func__); 2650 __func__);
2632 } else 2651 } else
@@ -2635,13 +2654,14 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nf
2635 bitmap[1] &= ~FATTR4_WORD1_OWNER; 2654 bitmap[1] &= ~FATTR4_WORD1_OWNER;
2636 } 2655 }
2637 dprintk("%s: uid=%d\n", __func__, (int)*uid); 2656 dprintk("%s: uid=%d\n", __func__, (int)*uid);
2638 return 0; 2657 return ret;
2639} 2658}
2640 2659
2641static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, uint32_t *gid) 2660static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, uint32_t *gid)
2642{ 2661{
2643 uint32_t len; 2662 uint32_t len;
2644 __be32 *p; 2663 __be32 *p;
2664 int ret = 0;
2645 2665
2646 *gid = -2; 2666 *gid = -2;
2647 if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER_GROUP - 1U))) 2667 if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER_GROUP - 1U)))
@@ -2651,7 +2671,9 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nf
2651 READ32(len); 2671 READ32(len);
2652 READ_BUF(len); 2672 READ_BUF(len);
2653 if (len < XDR_MAX_NETOBJ) { 2673 if (len < XDR_MAX_NETOBJ) {
2654 if (nfs_map_group_to_gid(clp, (char *)p, len, gid) != 0) 2674 if (nfs_map_group_to_gid(clp, (char *)p, len, gid) == 0)
2675 ret = NFS_ATTR_FATTR_GROUP;
2676 else
2655 dprintk("%s: nfs_map_group_to_gid failed!\n", 2677 dprintk("%s: nfs_map_group_to_gid failed!\n",
2656 __func__); 2678 __func__);
2657 } else 2679 } else
@@ -2660,13 +2682,14 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nf
2660 bitmap[1] &= ~FATTR4_WORD1_OWNER_GROUP; 2682 bitmap[1] &= ~FATTR4_WORD1_OWNER_GROUP;
2661 } 2683 }
2662 dprintk("%s: gid=%d\n", __func__, (int)*gid); 2684 dprintk("%s: gid=%d\n", __func__, (int)*gid);
2663 return 0; 2685 return ret;
2664} 2686}
2665 2687
2666static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rdev) 2688static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rdev)
2667{ 2689{
2668 uint32_t major = 0, minor = 0; 2690 uint32_t major = 0, minor = 0;
2669 __be32 *p; 2691 __be32 *p;
2692 int ret = 0;
2670 2693
2671 *rdev = MKDEV(0,0); 2694 *rdev = MKDEV(0,0);
2672 if (unlikely(bitmap[1] & (FATTR4_WORD1_RAWDEV - 1U))) 2695 if (unlikely(bitmap[1] & (FATTR4_WORD1_RAWDEV - 1U)))
@@ -2681,9 +2704,10 @@ static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rde
2681 if (MAJOR(tmp) == major && MINOR(tmp) == minor) 2704 if (MAJOR(tmp) == major && MINOR(tmp) == minor)
2682 *rdev = tmp; 2705 *rdev = tmp;
2683 bitmap[1] &= ~ FATTR4_WORD1_RAWDEV; 2706 bitmap[1] &= ~ FATTR4_WORD1_RAWDEV;
2707 ret = NFS_ATTR_FATTR_RDEV;
2684 } 2708 }
2685 dprintk("%s: rdev=(0x%x:0x%x)\n", __func__, major, minor); 2709 dprintk("%s: rdev=(0x%x:0x%x)\n", __func__, major, minor);
2686 return 0; 2710 return ret;
2687} 2711}
2688 2712
2689static int decode_attr_space_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) 2713static int decode_attr_space_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -2740,6 +2764,7 @@ static int decode_attr_space_total(struct xdr_stream *xdr, uint32_t *bitmap, uin
2740static int decode_attr_space_used(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *used) 2764static int decode_attr_space_used(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *used)
2741{ 2765{
2742 __be32 *p; 2766 __be32 *p;
2767 int ret = 0;
2743 2768
2744 *used = 0; 2769 *used = 0;
2745 if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_USED - 1U))) 2770 if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_USED - 1U)))
@@ -2748,10 +2773,11 @@ static int decode_attr_space_used(struct xdr_stream *xdr, uint32_t *bitmap, uint
2748 READ_BUF(8); 2773 READ_BUF(8);
2749 READ64(*used); 2774 READ64(*used);
2750 bitmap[1] &= ~FATTR4_WORD1_SPACE_USED; 2775 bitmap[1] &= ~FATTR4_WORD1_SPACE_USED;
2776 ret = NFS_ATTR_FATTR_SPACE_USED;
2751 } 2777 }
2752 dprintk("%s: space used=%Lu\n", __func__, 2778 dprintk("%s: space used=%Lu\n", __func__,
2753 (unsigned long long)*used); 2779 (unsigned long long)*used);
2754 return 0; 2780 return ret;
2755} 2781}
2756 2782
2757static int decode_attr_time(struct xdr_stream *xdr, struct timespec *time) 2783static int decode_attr_time(struct xdr_stream *xdr, struct timespec *time)
@@ -2778,6 +2804,8 @@ static int decode_attr_time_access(struct xdr_stream *xdr, uint32_t *bitmap, str
2778 return -EIO; 2804 return -EIO;
2779 if (likely(bitmap[1] & FATTR4_WORD1_TIME_ACCESS)) { 2805 if (likely(bitmap[1] & FATTR4_WORD1_TIME_ACCESS)) {
2780 status = decode_attr_time(xdr, time); 2806 status = decode_attr_time(xdr, time);
2807 if (status == 0)
2808 status = NFS_ATTR_FATTR_ATIME;
2781 bitmap[1] &= ~FATTR4_WORD1_TIME_ACCESS; 2809 bitmap[1] &= ~FATTR4_WORD1_TIME_ACCESS;
2782 } 2810 }
2783 dprintk("%s: atime=%ld\n", __func__, (long)time->tv_sec); 2811 dprintk("%s: atime=%ld\n", __func__, (long)time->tv_sec);
@@ -2794,6 +2822,8 @@ static int decode_attr_time_metadata(struct xdr_stream *xdr, uint32_t *bitmap, s
2794 return -EIO; 2822 return -EIO;
2795 if (likely(bitmap[1] & FATTR4_WORD1_TIME_METADATA)) { 2823 if (likely(bitmap[1] & FATTR4_WORD1_TIME_METADATA)) {
2796 status = decode_attr_time(xdr, time); 2824 status = decode_attr_time(xdr, time);
2825 if (status == 0)
2826 status = NFS_ATTR_FATTR_CTIME;
2797 bitmap[1] &= ~FATTR4_WORD1_TIME_METADATA; 2827 bitmap[1] &= ~FATTR4_WORD1_TIME_METADATA;
2798 } 2828 }
2799 dprintk("%s: ctime=%ld\n", __func__, (long)time->tv_sec); 2829 dprintk("%s: ctime=%ld\n", __func__, (long)time->tv_sec);
@@ -2810,6 +2840,8 @@ static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, str
2810 return -EIO; 2840 return -EIO;
2811 if (likely(bitmap[1] & FATTR4_WORD1_TIME_MODIFY)) { 2841 if (likely(bitmap[1] & FATTR4_WORD1_TIME_MODIFY)) {
2812 status = decode_attr_time(xdr, time); 2842 status = decode_attr_time(xdr, time);
2843 if (status == 0)
2844 status = NFS_ATTR_FATTR_MTIME;
2813 bitmap[1] &= ~FATTR4_WORD1_TIME_MODIFY; 2845 bitmap[1] &= ~FATTR4_WORD1_TIME_MODIFY;
2814 } 2846 }
2815 dprintk("%s: mtime=%ld\n", __func__, (long)time->tv_sec); 2847 dprintk("%s: mtime=%ld\n", __func__, (long)time->tv_sec);
@@ -2994,63 +3026,116 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, cons
2994 uint32_t attrlen, 3026 uint32_t attrlen,
2995 bitmap[2] = {0}, 3027 bitmap[2] = {0},
2996 type; 3028 type;
2997 int status, fmode = 0; 3029 int status;
3030 umode_t fmode = 0;
2998 uint64_t fileid; 3031 uint64_t fileid;
2999 3032
3000 if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) 3033 status = decode_op_hdr(xdr, OP_GETATTR);
3001 goto xdr_error; 3034 if (status < 0)
3002 if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
3003 goto xdr_error; 3035 goto xdr_error;
3004 3036
3005 fattr->bitmap[0] = bitmap[0]; 3037 status = decode_attr_bitmap(xdr, bitmap);
3006 fattr->bitmap[1] = bitmap[1]; 3038 if (status < 0)
3039 goto xdr_error;
3007 3040
3008 if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0) 3041 status = decode_attr_length(xdr, &attrlen, &savep);
3042 if (status < 0)
3009 goto xdr_error; 3043 goto xdr_error;
3010 3044
3011 3045
3012 if ((status = decode_attr_type(xdr, bitmap, &type)) != 0) 3046 status = decode_attr_type(xdr, bitmap, &type);
3047 if (status < 0)
3013 goto xdr_error; 3048 goto xdr_error;
3014 fattr->type = nfs_type2fmt[type].nfs2type; 3049 fattr->mode = 0;
3015 fmode = nfs_type2fmt[type].mode; 3050 if (status != 0) {
3051 fattr->mode |= nfs_type2fmt[type];
3052 fattr->valid |= status;
3053 }
3016 3054
3017 if ((status = decode_attr_change(xdr, bitmap, &fattr->change_attr)) != 0) 3055 status = decode_attr_change(xdr, bitmap, &fattr->change_attr);
3056 if (status < 0)
3018 goto xdr_error; 3057 goto xdr_error;
3019 if ((status = decode_attr_size(xdr, bitmap, &fattr->size)) != 0) 3058 fattr->valid |= status;
3059
3060 status = decode_attr_size(xdr, bitmap, &fattr->size);
3061 if (status < 0)
3020 goto xdr_error; 3062 goto xdr_error;
3021 if ((status = decode_attr_fsid(xdr, bitmap, &fattr->fsid)) != 0) 3063 fattr->valid |= status;
3064
3065 status = decode_attr_fsid(xdr, bitmap, &fattr->fsid);
3066 if (status < 0)
3022 goto xdr_error; 3067 goto xdr_error;
3023 if ((status = decode_attr_fileid(xdr, bitmap, &fattr->fileid)) != 0) 3068 fattr->valid |= status;
3069
3070 status = decode_attr_fileid(xdr, bitmap, &fattr->fileid);
3071 if (status < 0)
3024 goto xdr_error; 3072 goto xdr_error;
3025 if ((status = decode_attr_fs_locations(xdr, bitmap, container_of(fattr, 3073 fattr->valid |= status;
3074
3075 status = decode_attr_fs_locations(xdr, bitmap, container_of(fattr,
3026 struct nfs4_fs_locations, 3076 struct nfs4_fs_locations,
3027 fattr))) != 0) 3077 fattr));
3078 if (status < 0)
3028 goto xdr_error; 3079 goto xdr_error;
3029 if ((status = decode_attr_mode(xdr, bitmap, &fattr->mode)) != 0) 3080 fattr->valid |= status;
3081
3082 status = decode_attr_mode(xdr, bitmap, &fmode);
3083 if (status < 0)
3030 goto xdr_error; 3084 goto xdr_error;
3031 fattr->mode |= fmode; 3085 if (status != 0) {
3032 if ((status = decode_attr_nlink(xdr, bitmap, &fattr->nlink)) != 0) 3086 fattr->mode |= fmode;
3087 fattr->valid |= status;
3088 }
3089
3090 status = decode_attr_nlink(xdr, bitmap, &fattr->nlink);
3091 if (status < 0)
3033 goto xdr_error; 3092 goto xdr_error;
3034 if ((status = decode_attr_owner(xdr, bitmap, server->nfs_client, &fattr->uid)) != 0) 3093 fattr->valid |= status;
3094
3095 status = decode_attr_owner(xdr, bitmap, server->nfs_client, &fattr->uid);
3096 if (status < 0)
3035 goto xdr_error; 3097 goto xdr_error;
3036 if ((status = decode_attr_group(xdr, bitmap, server->nfs_client, &fattr->gid)) != 0) 3098 fattr->valid |= status;
3099
3100 status = decode_attr_group(xdr, bitmap, server->nfs_client, &fattr->gid);
3101 if (status < 0)
3037 goto xdr_error; 3102 goto xdr_error;
3038 if ((status = decode_attr_rdev(xdr, bitmap, &fattr->rdev)) != 0) 3103 fattr->valid |= status;
3104
3105 status = decode_attr_rdev(xdr, bitmap, &fattr->rdev);
3106 if (status < 0)
3039 goto xdr_error; 3107 goto xdr_error;
3040 if ((status = decode_attr_space_used(xdr, bitmap, &fattr->du.nfs3.used)) != 0) 3108 fattr->valid |= status;
3109
3110 status = decode_attr_space_used(xdr, bitmap, &fattr->du.nfs3.used);
3111 if (status < 0)
3041 goto xdr_error; 3112 goto xdr_error;
3042 if ((status = decode_attr_time_access(xdr, bitmap, &fattr->atime)) != 0) 3113 fattr->valid |= status;
3114
3115 status = decode_attr_time_access(xdr, bitmap, &fattr->atime);
3116 if (status < 0)
3043 goto xdr_error; 3117 goto xdr_error;
3044 if ((status = decode_attr_time_metadata(xdr, bitmap, &fattr->ctime)) != 0) 3118 fattr->valid |= status;
3119
3120 status = decode_attr_time_metadata(xdr, bitmap, &fattr->ctime);
3121 if (status < 0)
3045 goto xdr_error; 3122 goto xdr_error;
3046 if ((status = decode_attr_time_modify(xdr, bitmap, &fattr->mtime)) != 0) 3123 fattr->valid |= status;
3124
3125 status = decode_attr_time_modify(xdr, bitmap, &fattr->mtime);
3126 if (status < 0)
3047 goto xdr_error; 3127 goto xdr_error;
3048 if ((status = decode_attr_mounted_on_fileid(xdr, bitmap, &fileid)) != 0) 3128 fattr->valid |= status;
3129
3130 status = decode_attr_mounted_on_fileid(xdr, bitmap, &fileid);
3131 if (status < 0)
3049 goto xdr_error; 3132 goto xdr_error;
3050 if (fattr->fileid == 0 && fileid != 0) 3133 if (status != 0 && !(fattr->valid & status)) {
3051 fattr->fileid = fileid; 3134 fattr->fileid = fileid;
3052 if ((status = verify_attr_len(xdr, savep, attrlen)) == 0) 3135 fattr->valid |= status;
3053 fattr->valid = NFS_ATTR_FATTR | NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4; 3136 }
3137
3138 status = verify_attr_len(xdr, savep, attrlen);
3054xdr_error: 3139xdr_error:
3055 dprintk("%s: xdr returned %d\n", __func__, -status); 3140 dprintk("%s: xdr returned %d\n", __func__, -status);
3056 return status; 3141 return status;
@@ -4078,9 +4163,7 @@ static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs_se
4078 status = decode_setattr(&xdr, res); 4163 status = decode_setattr(&xdr, res);
4079 if (status) 4164 if (status)
4080 goto out; 4165 goto out;
4081 status = decode_getfattr(&xdr, res->fattr, res->server); 4166 decode_getfattr(&xdr, res->fattr, res->server);
4082 if (status == NFS4ERR_DELAY)
4083 status = 0;
4084out: 4167out:
4085 return status; 4168 return status;
4086} 4169}
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 7f079209d70a..e2975939126a 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -176,17 +176,6 @@ void nfs_release_request(struct nfs_page *req)
176 kref_put(&req->wb_kref, nfs_free_request); 176 kref_put(&req->wb_kref, nfs_free_request);
177} 177}
178 178
179static int nfs_wait_bit_killable(void *word)
180{
181 int ret = 0;
182
183 if (fatal_signal_pending(current))
184 ret = -ERESTARTSYS;
185 else
186 schedule();
187 return ret;
188}
189
190/** 179/**
191 * nfs_wait_on_request - Wait for a request to complete. 180 * nfs_wait_on_request - Wait for a request to complete.
192 * @req: request to wait upon. 181 * @req: request to wait upon.
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 193465210d7c..7be72d90d49d 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -663,4 +663,5 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
663 .commit_setup = nfs_proc_commit_setup, 663 .commit_setup = nfs_proc_commit_setup,
664 .lock = nfs_proc_lock, 664 .lock = nfs_proc_lock,
665 .lock_check_bounds = nfs_lock_check_bounds, 665 .lock_check_bounds = nfs_lock_check_bounds,
666 .close_context = nfs_close_context,
666}; 667};
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index d6686f4786dc..0942fcbbad3c 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1018,6 +1018,7 @@ static int nfs_parse_mount_options(char *raw,
1018 case Opt_rdma: 1018 case Opt_rdma:
1019 mnt->flags |= NFS_MOUNT_TCP; /* for side protocols */ 1019 mnt->flags |= NFS_MOUNT_TCP; /* for side protocols */
1020 mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA; 1020 mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA;
1021 xprt_load_transport(p);
1021 break; 1022 break;
1022 case Opt_acl: 1023 case Opt_acl:
1023 mnt->flags &= ~NFS_MOUNT_NOACL; 1024 mnt->flags &= ~NFS_MOUNT_NOACL;
@@ -1205,12 +1206,14 @@ static int nfs_parse_mount_options(char *raw,
1205 /* vector side protocols to TCP */ 1206 /* vector side protocols to TCP */
1206 mnt->flags |= NFS_MOUNT_TCP; 1207 mnt->flags |= NFS_MOUNT_TCP;
1207 mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA; 1208 mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA;
1209 xprt_load_transport(string);
1208 break; 1210 break;
1209 default: 1211 default:
1210 errors++; 1212 errors++;
1211 dfprintk(MOUNT, "NFS: unrecognized " 1213 dfprintk(MOUNT, "NFS: unrecognized "
1212 "transport protocol\n"); 1214 "transport protocol\n");
1213 } 1215 }
1216 kfree(string);
1214 break; 1217 break;
1215 case Opt_mountproto: 1218 case Opt_mountproto:
1216 string = match_strdup(args); 1219 string = match_strdup(args);
@@ -1218,7 +1221,6 @@ static int nfs_parse_mount_options(char *raw,
1218 goto out_nomem; 1221 goto out_nomem;
1219 token = match_token(string, 1222 token = match_token(string,
1220 nfs_xprt_protocol_tokens, args); 1223 nfs_xprt_protocol_tokens, args);
1221 kfree(string);
1222 1224
1223 switch (token) { 1225 switch (token) {
1224 case Opt_xprt_udp: 1226 case Opt_xprt_udp:
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 9f9845859fc1..e560a78995a3 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -313,19 +313,34 @@ static int nfs_writepages_callback(struct page *page, struct writeback_control *
313int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) 313int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
314{ 314{
315 struct inode *inode = mapping->host; 315 struct inode *inode = mapping->host;
316 unsigned long *bitlock = &NFS_I(inode)->flags;
316 struct nfs_pageio_descriptor pgio; 317 struct nfs_pageio_descriptor pgio;
317 int err; 318 int err;
318 319
320 /* Stop dirtying of new pages while we sync */
321 err = wait_on_bit_lock(bitlock, NFS_INO_FLUSHING,
322 nfs_wait_bit_killable, TASK_KILLABLE);
323 if (err)
324 goto out_err;
325
319 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); 326 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
320 327
321 nfs_pageio_init_write(&pgio, inode, wb_priority(wbc)); 328 nfs_pageio_init_write(&pgio, inode, wb_priority(wbc));
322 err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio); 329 err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
323 nfs_pageio_complete(&pgio); 330 nfs_pageio_complete(&pgio);
331
332 clear_bit_unlock(NFS_INO_FLUSHING, bitlock);
333 smp_mb__after_clear_bit();
334 wake_up_bit(bitlock, NFS_INO_FLUSHING);
335
324 if (err < 0) 336 if (err < 0)
325 return err; 337 goto out_err;
326 if (pgio.pg_error < 0) 338 err = pgio.pg_error;
327 return pgio.pg_error; 339 if (err < 0)
340 goto out_err;
328 return 0; 341 return 0;
342out_err:
343 return err;
329} 344}
330 345
331/* 346/*
@@ -404,7 +419,6 @@ nfs_mark_request_commit(struct nfs_page *req)
404 struct nfs_inode *nfsi = NFS_I(inode); 419 struct nfs_inode *nfsi = NFS_I(inode);
405 420
406 spin_lock(&inode->i_lock); 421 spin_lock(&inode->i_lock);
407 nfsi->ncommit++;
408 set_bit(PG_CLEAN, &(req)->wb_flags); 422 set_bit(PG_CLEAN, &(req)->wb_flags);
409 radix_tree_tag_set(&nfsi->nfs_page_tree, 423 radix_tree_tag_set(&nfsi->nfs_page_tree,
410 req->wb_index, 424 req->wb_index,
@@ -524,6 +538,12 @@ static void nfs_cancel_commit_list(struct list_head *head)
524} 538}
525 539
526#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 540#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
541static int
542nfs_need_commit(struct nfs_inode *nfsi)
543{
544 return radix_tree_tagged(&nfsi->nfs_page_tree, NFS_PAGE_TAG_COMMIT);
545}
546
527/* 547/*
528 * nfs_scan_commit - Scan an inode for commit requests 548 * nfs_scan_commit - Scan an inode for commit requests
529 * @inode: NFS inode to scan 549 * @inode: NFS inode to scan
@@ -538,16 +558,18 @@ static int
538nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages) 558nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages)
539{ 559{
540 struct nfs_inode *nfsi = NFS_I(inode); 560 struct nfs_inode *nfsi = NFS_I(inode);
541 int res = 0;
542 561
543 if (nfsi->ncommit != 0) { 562 if (!nfs_need_commit(nfsi))
544 res = nfs_scan_list(nfsi, dst, idx_start, npages, 563 return 0;
545 NFS_PAGE_TAG_COMMIT); 564
546 nfsi->ncommit -= res; 565 return nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT);
547 }
548 return res;
549} 566}
550#else 567#else
568static inline int nfs_need_commit(struct nfs_inode *nfsi)
569{
570 return 0;
571}
572
551static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages) 573static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages)
552{ 574{
553 return 0; 575 return 0;
@@ -820,7 +842,7 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
820 data->args.stable = NFS_UNSTABLE; 842 data->args.stable = NFS_UNSTABLE;
821 if (how & FLUSH_STABLE) { 843 if (how & FLUSH_STABLE) {
822 data->args.stable = NFS_DATA_SYNC; 844 data->args.stable = NFS_DATA_SYNC;
823 if (!NFS_I(inode)->ncommit) 845 if (!nfs_need_commit(NFS_I(inode)))
824 data->args.stable = NFS_FILE_SYNC; 846 data->args.stable = NFS_FILE_SYNC;
825 } 847 }
826 848
@@ -1425,18 +1447,13 @@ static int nfs_write_mapping(struct address_space *mapping, int how)
1425{ 1447{
1426 struct writeback_control wbc = { 1448 struct writeback_control wbc = {
1427 .bdi = mapping->backing_dev_info, 1449 .bdi = mapping->backing_dev_info,
1428 .sync_mode = WB_SYNC_NONE, 1450 .sync_mode = WB_SYNC_ALL,
1429 .nr_to_write = LONG_MAX, 1451 .nr_to_write = LONG_MAX,
1430 .range_start = 0, 1452 .range_start = 0,
1431 .range_end = LLONG_MAX, 1453 .range_end = LLONG_MAX,
1432 .for_writepages = 1, 1454 .for_writepages = 1,
1433 }; 1455 };
1434 int ret;
1435 1456
1436 ret = __nfs_write_mapping(mapping, &wbc, how);
1437 if (ret < 0)
1438 return ret;
1439 wbc.sync_mode = WB_SYNC_ALL;
1440 return __nfs_write_mapping(mapping, &wbc, how); 1457 return __nfs_write_mapping(mapping, &wbc, how);
1441} 1458}
1442 1459
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 3d93b2064ce5..a4ed8644d69c 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -938,10 +938,12 @@ static ssize_t __write_ports(struct file *file, char *buf, size_t size)
938 char transport[16]; 938 char transport[16];
939 int port; 939 int port;
940 if (sscanf(buf, "%15s %4d", transport, &port) == 2) { 940 if (sscanf(buf, "%15s %4d", transport, &port) == 2) {
941 if (port < 1 || port > 65535)
942 return -EINVAL;
941 err = nfsd_create_serv(); 943 err = nfsd_create_serv();
942 if (!err) { 944 if (!err) {
943 err = svc_create_xprt(nfsd_serv, 945 err = svc_create_xprt(nfsd_serv,
944 transport, port, 946 transport, PF_INET, port,
945 SVC_SOCK_ANONYMOUS); 947 SVC_SOCK_ANONYMOUS);
946 if (err == -ENOENT) 948 if (err == -ENOENT)
947 /* Give a reasonable perror msg for 949 /* Give a reasonable perror msg for
@@ -960,7 +962,7 @@ static ssize_t __write_ports(struct file *file, char *buf, size_t size)
960 char transport[16]; 962 char transport[16];
961 int port; 963 int port;
962 if (sscanf(&buf[1], "%15s %4d", transport, &port) == 2) { 964 if (sscanf(&buf[1], "%15s %4d", transport, &port) == 2) {
963 if (port == 0) 965 if (port < 1 || port > 65535)
964 return -EINVAL; 966 return -EINVAL;
965 if (nfsd_serv) { 967 if (nfsd_serv) {
966 xprt = svc_find_xprt(nfsd_serv, transport, 968 xprt = svc_find_xprt(nfsd_serv, transport,
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 07e4f5d7baa8..bc3567bab8c4 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -229,7 +229,6 @@ int nfsd_create_serv(void)
229 229
230 atomic_set(&nfsd_busy, 0); 230 atomic_set(&nfsd_busy, 0);
231 nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, 231 nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize,
232 AF_INET,
233 nfsd_last_thread, nfsd, THIS_MODULE); 232 nfsd_last_thread, nfsd, THIS_MODULE);
234 if (nfsd_serv == NULL) 233 if (nfsd_serv == NULL)
235 err = -ENOMEM; 234 err = -ENOMEM;
@@ -244,7 +243,7 @@ static int nfsd_init_socks(int port)
244 if (!list_empty(&nfsd_serv->sv_permsocks)) 243 if (!list_empty(&nfsd_serv->sv_permsocks))
245 return 0; 244 return 0;
246 245
247 error = svc_create_xprt(nfsd_serv, "udp", port, 246 error = svc_create_xprt(nfsd_serv, "udp", PF_INET, port,
248 SVC_SOCK_DEFAULTS); 247 SVC_SOCK_DEFAULTS);
249 if (error < 0) 248 if (error < 0)
250 return error; 249 return error;
@@ -253,7 +252,7 @@ static int nfsd_init_socks(int port)
253 if (error < 0) 252 if (error < 0)
254 return error; 253 return error;
255 254
256 error = svc_create_xprt(nfsd_serv, "tcp", port, 255 error = svc_create_xprt(nfsd_serv, "tcp", PF_INET, port,
257 SVC_SOCK_DEFAULTS); 256 SVC_SOCK_DEFAULTS);
258 if (error < 0) 257 if (error < 0)
259 return error; 258 return error;
diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c
index 34314b33dbd4..5a9e34475e37 100644
--- a/fs/ntfs/dir.c
+++ b/fs/ntfs/dir.c
@@ -32,8 +32,8 @@
32/** 32/**
33 * The little endian Unicode string $I30 as a global constant. 33 * The little endian Unicode string $I30 as a global constant.
34 */ 34 */
35ntfschar I30[5] = { const_cpu_to_le16('$'), const_cpu_to_le16('I'), 35ntfschar I30[5] = { cpu_to_le16('$'), cpu_to_le16('I'),
36 const_cpu_to_le16('3'), const_cpu_to_le16('0'), 0 }; 36 cpu_to_le16('3'), cpu_to_le16('0'), 0 };
37 37
38/** 38/**
39 * ntfs_lookup_inode_by_name - find an inode in a directory given its name 39 * ntfs_lookup_inode_by_name - find an inode in a directory given its name
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 86bef156cf0a..82c5085559c6 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -1975,8 +1975,7 @@ int ntfs_read_inode_mount(struct inode *vi)
1975 goto em_put_err_out; 1975 goto em_put_err_out;
1976 next_al_entry = (ATTR_LIST_ENTRY*)((u8*)al_entry + 1976 next_al_entry = (ATTR_LIST_ENTRY*)((u8*)al_entry +
1977 le16_to_cpu(al_entry->length)); 1977 le16_to_cpu(al_entry->length));
1978 if (le32_to_cpu(al_entry->type) > 1978 if (le32_to_cpu(al_entry->type) > le32_to_cpu(AT_DATA))
1979 const_le32_to_cpu(AT_DATA))
1980 goto em_put_err_out; 1979 goto em_put_err_out;
1981 if (AT_DATA != al_entry->type) 1980 if (AT_DATA != al_entry->type)
1982 continue; 1981 continue;
diff --git a/fs/ntfs/layout.h b/fs/ntfs/layout.h
index 1e383328eceb..50931b1ce4b9 100644
--- a/fs/ntfs/layout.h
+++ b/fs/ntfs/layout.h
@@ -31,19 +31,8 @@
31 31
32#include "types.h" 32#include "types.h"
33 33
34/*
35 * Constant endianness conversion defines.
36 */
37#define const_le16_to_cpu(x) __constant_le16_to_cpu(x)
38#define const_le32_to_cpu(x) __constant_le32_to_cpu(x)
39#define const_le64_to_cpu(x) __constant_le64_to_cpu(x)
40
41#define const_cpu_to_le16(x) __constant_cpu_to_le16(x)
42#define const_cpu_to_le32(x) __constant_cpu_to_le32(x)
43#define const_cpu_to_le64(x) __constant_cpu_to_le64(x)
44
45/* The NTFS oem_id "NTFS " */ 34/* The NTFS oem_id "NTFS " */
46#define magicNTFS const_cpu_to_le64(0x202020205346544eULL) 35#define magicNTFS cpu_to_le64(0x202020205346544eULL)
47 36
48/* 37/*
49 * Location of bootsector on partition: 38 * Location of bootsector on partition:
@@ -114,25 +103,25 @@ typedef struct {
114 */ 103 */
115enum { 104enum {
116 /* Found in $MFT/$DATA. */ 105 /* Found in $MFT/$DATA. */
117 magic_FILE = const_cpu_to_le32(0x454c4946), /* Mft entry. */ 106 magic_FILE = cpu_to_le32(0x454c4946), /* Mft entry. */
118 magic_INDX = const_cpu_to_le32(0x58444e49), /* Index buffer. */ 107 magic_INDX = cpu_to_le32(0x58444e49), /* Index buffer. */
119 magic_HOLE = const_cpu_to_le32(0x454c4f48), /* ? (NTFS 3.0+?) */ 108 magic_HOLE = cpu_to_le32(0x454c4f48), /* ? (NTFS 3.0+?) */
120 109
121 /* Found in $LogFile/$DATA. */ 110 /* Found in $LogFile/$DATA. */
122 magic_RSTR = const_cpu_to_le32(0x52545352), /* Restart page. */ 111 magic_RSTR = cpu_to_le32(0x52545352), /* Restart page. */
123 magic_RCRD = const_cpu_to_le32(0x44524352), /* Log record page. */ 112 magic_RCRD = cpu_to_le32(0x44524352), /* Log record page. */
124 113
125 /* Found in $LogFile/$DATA. (May be found in $MFT/$DATA, also?) */ 114 /* Found in $LogFile/$DATA. (May be found in $MFT/$DATA, also?) */
126 magic_CHKD = const_cpu_to_le32(0x444b4843), /* Modified by chkdsk. */ 115 magic_CHKD = cpu_to_le32(0x444b4843), /* Modified by chkdsk. */
127 116
128 /* Found in all ntfs record containing records. */ 117 /* Found in all ntfs record containing records. */
129 magic_BAAD = const_cpu_to_le32(0x44414142), /* Failed multi sector 118 magic_BAAD = cpu_to_le32(0x44414142), /* Failed multi sector
130 transfer was detected. */ 119 transfer was detected. */
131 /* 120 /*
132 * Found in $LogFile/$DATA when a page is full of 0xff bytes and is 121 * Found in $LogFile/$DATA when a page is full of 0xff bytes and is
133 * thus not initialized. Page must be initialized before using it. 122 * thus not initialized. Page must be initialized before using it.
134 */ 123 */
135 magic_empty = const_cpu_to_le32(0xffffffff) /* Record is empty. */ 124 magic_empty = cpu_to_le32(0xffffffff) /* Record is empty. */
136}; 125};
137 126
138typedef le32 NTFS_RECORD_TYPE; 127typedef le32 NTFS_RECORD_TYPE;
@@ -258,8 +247,8 @@ typedef enum {
258 * information about the mft record in which they are present. 247 * information about the mft record in which they are present.
259 */ 248 */
260enum { 249enum {
261 MFT_RECORD_IN_USE = const_cpu_to_le16(0x0001), 250 MFT_RECORD_IN_USE = cpu_to_le16(0x0001),
262 MFT_RECORD_IS_DIRECTORY = const_cpu_to_le16(0x0002), 251 MFT_RECORD_IS_DIRECTORY = cpu_to_le16(0x0002),
263} __attribute__ ((__packed__)); 252} __attribute__ ((__packed__));
264 253
265typedef le16 MFT_RECORD_FLAGS; 254typedef le16 MFT_RECORD_FLAGS;
@@ -309,7 +298,7 @@ typedef le16 MFT_RECORD_FLAGS;
309 * Note: The _LE versions will return a CPU endian formatted value! 298 * Note: The _LE versions will return a CPU endian formatted value!
310 */ 299 */
311#define MFT_REF_MASK_CPU 0x0000ffffffffffffULL 300#define MFT_REF_MASK_CPU 0x0000ffffffffffffULL
312#define MFT_REF_MASK_LE const_cpu_to_le64(MFT_REF_MASK_CPU) 301#define MFT_REF_MASK_LE cpu_to_le64(MFT_REF_MASK_CPU)
313 302
314typedef u64 MFT_REF; 303typedef u64 MFT_REF;
315typedef le64 leMFT_REF; 304typedef le64 leMFT_REF;
@@ -477,25 +466,25 @@ typedef struct {
477 * a revealing choice of symbol I do not know what is... (-; 466 * a revealing choice of symbol I do not know what is... (-;
478 */ 467 */
479enum { 468enum {
480 AT_UNUSED = const_cpu_to_le32( 0), 469 AT_UNUSED = cpu_to_le32( 0),
481 AT_STANDARD_INFORMATION = const_cpu_to_le32( 0x10), 470 AT_STANDARD_INFORMATION = cpu_to_le32( 0x10),
482 AT_ATTRIBUTE_LIST = const_cpu_to_le32( 0x20), 471 AT_ATTRIBUTE_LIST = cpu_to_le32( 0x20),
483 AT_FILE_NAME = const_cpu_to_le32( 0x30), 472 AT_FILE_NAME = cpu_to_le32( 0x30),
484 AT_OBJECT_ID = const_cpu_to_le32( 0x40), 473 AT_OBJECT_ID = cpu_to_le32( 0x40),
485 AT_SECURITY_DESCRIPTOR = const_cpu_to_le32( 0x50), 474 AT_SECURITY_DESCRIPTOR = cpu_to_le32( 0x50),
486 AT_VOLUME_NAME = const_cpu_to_le32( 0x60), 475 AT_VOLUME_NAME = cpu_to_le32( 0x60),
487 AT_VOLUME_INFORMATION = const_cpu_to_le32( 0x70), 476 AT_VOLUME_INFORMATION = cpu_to_le32( 0x70),
488 AT_DATA = const_cpu_to_le32( 0x80), 477 AT_DATA = cpu_to_le32( 0x80),
489 AT_INDEX_ROOT = const_cpu_to_le32( 0x90), 478 AT_INDEX_ROOT = cpu_to_le32( 0x90),
490 AT_INDEX_ALLOCATION = const_cpu_to_le32( 0xa0), 479 AT_INDEX_ALLOCATION = cpu_to_le32( 0xa0),
491 AT_BITMAP = const_cpu_to_le32( 0xb0), 480 AT_BITMAP = cpu_to_le32( 0xb0),
492 AT_REPARSE_POINT = const_cpu_to_le32( 0xc0), 481 AT_REPARSE_POINT = cpu_to_le32( 0xc0),
493 AT_EA_INFORMATION = const_cpu_to_le32( 0xd0), 482 AT_EA_INFORMATION = cpu_to_le32( 0xd0),
494 AT_EA = const_cpu_to_le32( 0xe0), 483 AT_EA = cpu_to_le32( 0xe0),
495 AT_PROPERTY_SET = const_cpu_to_le32( 0xf0), 484 AT_PROPERTY_SET = cpu_to_le32( 0xf0),
496 AT_LOGGED_UTILITY_STREAM = const_cpu_to_le32( 0x100), 485 AT_LOGGED_UTILITY_STREAM = cpu_to_le32( 0x100),
497 AT_FIRST_USER_DEFINED_ATTRIBUTE = const_cpu_to_le32( 0x1000), 486 AT_FIRST_USER_DEFINED_ATTRIBUTE = cpu_to_le32( 0x1000),
498 AT_END = const_cpu_to_le32(0xffffffff) 487 AT_END = cpu_to_le32(0xffffffff)
499}; 488};
500 489
501typedef le32 ATTR_TYPE; 490typedef le32 ATTR_TYPE;
@@ -539,13 +528,13 @@ typedef le32 ATTR_TYPE;
539 * equal then the second le32 values would be compared, etc. 528 * equal then the second le32 values would be compared, etc.
540 */ 529 */
541enum { 530enum {
542 COLLATION_BINARY = const_cpu_to_le32(0x00), 531 COLLATION_BINARY = cpu_to_le32(0x00),
543 COLLATION_FILE_NAME = const_cpu_to_le32(0x01), 532 COLLATION_FILE_NAME = cpu_to_le32(0x01),
544 COLLATION_UNICODE_STRING = const_cpu_to_le32(0x02), 533 COLLATION_UNICODE_STRING = cpu_to_le32(0x02),
545 COLLATION_NTOFS_ULONG = const_cpu_to_le32(0x10), 534 COLLATION_NTOFS_ULONG = cpu_to_le32(0x10),
546 COLLATION_NTOFS_SID = const_cpu_to_le32(0x11), 535 COLLATION_NTOFS_SID = cpu_to_le32(0x11),
547 COLLATION_NTOFS_SECURITY_HASH = const_cpu_to_le32(0x12), 536 COLLATION_NTOFS_SECURITY_HASH = cpu_to_le32(0x12),
548 COLLATION_NTOFS_ULONGS = const_cpu_to_le32(0x13), 537 COLLATION_NTOFS_ULONGS = cpu_to_le32(0x13),
549}; 538};
550 539
551typedef le32 COLLATION_RULE; 540typedef le32 COLLATION_RULE;
@@ -559,25 +548,25 @@ typedef le32 COLLATION_RULE;
559 * NT4. 548 * NT4.
560 */ 549 */
561enum { 550enum {
562 ATTR_DEF_INDEXABLE = const_cpu_to_le32(0x02), /* Attribute can be 551 ATTR_DEF_INDEXABLE = cpu_to_le32(0x02), /* Attribute can be
563 indexed. */ 552 indexed. */
564 ATTR_DEF_MULTIPLE = const_cpu_to_le32(0x04), /* Attribute type 553 ATTR_DEF_MULTIPLE = cpu_to_le32(0x04), /* Attribute type
565 can be present multiple times in the 554 can be present multiple times in the
566 mft records of an inode. */ 555 mft records of an inode. */
567 ATTR_DEF_NOT_ZERO = const_cpu_to_le32(0x08), /* Attribute value 556 ATTR_DEF_NOT_ZERO = cpu_to_le32(0x08), /* Attribute value
568 must contain at least one non-zero 557 must contain at least one non-zero
569 byte. */ 558 byte. */
570 ATTR_DEF_INDEXED_UNIQUE = const_cpu_to_le32(0x10), /* Attribute must be 559 ATTR_DEF_INDEXED_UNIQUE = cpu_to_le32(0x10), /* Attribute must be
571 indexed and the attribute value must be 560 indexed and the attribute value must be
572 unique for the attribute type in all of 561 unique for the attribute type in all of
573 the mft records of an inode. */ 562 the mft records of an inode. */
574 ATTR_DEF_NAMED_UNIQUE = const_cpu_to_le32(0x20), /* Attribute must be 563 ATTR_DEF_NAMED_UNIQUE = cpu_to_le32(0x20), /* Attribute must be
575 named and the name must be unique for 564 named and the name must be unique for
576 the attribute type in all of the mft 565 the attribute type in all of the mft
577 records of an inode. */ 566 records of an inode. */
578 ATTR_DEF_RESIDENT = const_cpu_to_le32(0x40), /* Attribute must be 567 ATTR_DEF_RESIDENT = cpu_to_le32(0x40), /* Attribute must be
579 resident. */ 568 resident. */
580 ATTR_DEF_ALWAYS_LOG = const_cpu_to_le32(0x80), /* Always log 569 ATTR_DEF_ALWAYS_LOG = cpu_to_le32(0x80), /* Always log
581 modifications to this attribute, 570 modifications to this attribute,
582 regardless of whether it is resident or 571 regardless of whether it is resident or
583 non-resident. Without this, only log 572 non-resident. Without this, only log
@@ -614,12 +603,12 @@ typedef struct {
614 * Attribute flags (16-bit). 603 * Attribute flags (16-bit).
615 */ 604 */
616enum { 605enum {
617 ATTR_IS_COMPRESSED = const_cpu_to_le16(0x0001), 606 ATTR_IS_COMPRESSED = cpu_to_le16(0x0001),
618 ATTR_COMPRESSION_MASK = const_cpu_to_le16(0x00ff), /* Compression method 607 ATTR_COMPRESSION_MASK = cpu_to_le16(0x00ff), /* Compression method
619 mask. Also, first 608 mask. Also, first
620 illegal value. */ 609 illegal value. */
621 ATTR_IS_ENCRYPTED = const_cpu_to_le16(0x4000), 610 ATTR_IS_ENCRYPTED = cpu_to_le16(0x4000),
622 ATTR_IS_SPARSE = const_cpu_to_le16(0x8000), 611 ATTR_IS_SPARSE = cpu_to_le16(0x8000),
623} __attribute__ ((__packed__)); 612} __attribute__ ((__packed__));
624 613
625typedef le16 ATTR_FLAGS; 614typedef le16 ATTR_FLAGS;
@@ -811,32 +800,32 @@ typedef ATTR_RECORD ATTR_REC;
811 * flags appear in all of the above. 800 * flags appear in all of the above.
812 */ 801 */
813enum { 802enum {
814 FILE_ATTR_READONLY = const_cpu_to_le32(0x00000001), 803 FILE_ATTR_READONLY = cpu_to_le32(0x00000001),
815 FILE_ATTR_HIDDEN = const_cpu_to_le32(0x00000002), 804 FILE_ATTR_HIDDEN = cpu_to_le32(0x00000002),
816 FILE_ATTR_SYSTEM = const_cpu_to_le32(0x00000004), 805 FILE_ATTR_SYSTEM = cpu_to_le32(0x00000004),
817 /* Old DOS volid. Unused in NT. = const_cpu_to_le32(0x00000008), */ 806 /* Old DOS volid. Unused in NT. = cpu_to_le32(0x00000008), */
818 807
819 FILE_ATTR_DIRECTORY = const_cpu_to_le32(0x00000010), 808 FILE_ATTR_DIRECTORY = cpu_to_le32(0x00000010),
820 /* Note, FILE_ATTR_DIRECTORY is not considered valid in NT. It is 809 /* Note, FILE_ATTR_DIRECTORY is not considered valid in NT. It is
821 reserved for the DOS SUBDIRECTORY flag. */ 810 reserved for the DOS SUBDIRECTORY flag. */
822 FILE_ATTR_ARCHIVE = const_cpu_to_le32(0x00000020), 811 FILE_ATTR_ARCHIVE = cpu_to_le32(0x00000020),
823 FILE_ATTR_DEVICE = const_cpu_to_le32(0x00000040), 812 FILE_ATTR_DEVICE = cpu_to_le32(0x00000040),
824 FILE_ATTR_NORMAL = const_cpu_to_le32(0x00000080), 813 FILE_ATTR_NORMAL = cpu_to_le32(0x00000080),
825 814
826 FILE_ATTR_TEMPORARY = const_cpu_to_le32(0x00000100), 815 FILE_ATTR_TEMPORARY = cpu_to_le32(0x00000100),
827 FILE_ATTR_SPARSE_FILE = const_cpu_to_le32(0x00000200), 816 FILE_ATTR_SPARSE_FILE = cpu_to_le32(0x00000200),
828 FILE_ATTR_REPARSE_POINT = const_cpu_to_le32(0x00000400), 817 FILE_ATTR_REPARSE_POINT = cpu_to_le32(0x00000400),
829 FILE_ATTR_COMPRESSED = const_cpu_to_le32(0x00000800), 818 FILE_ATTR_COMPRESSED = cpu_to_le32(0x00000800),
830 819
831 FILE_ATTR_OFFLINE = const_cpu_to_le32(0x00001000), 820 FILE_ATTR_OFFLINE = cpu_to_le32(0x00001000),
832 FILE_ATTR_NOT_CONTENT_INDEXED = const_cpu_to_le32(0x00002000), 821 FILE_ATTR_NOT_CONTENT_INDEXED = cpu_to_le32(0x00002000),
833 FILE_ATTR_ENCRYPTED = const_cpu_to_le32(0x00004000), 822 FILE_ATTR_ENCRYPTED = cpu_to_le32(0x00004000),
834 823
835 FILE_ATTR_VALID_FLAGS = const_cpu_to_le32(0x00007fb7), 824 FILE_ATTR_VALID_FLAGS = cpu_to_le32(0x00007fb7),
836 /* Note, FILE_ATTR_VALID_FLAGS masks out the old DOS VolId and the 825 /* Note, FILE_ATTR_VALID_FLAGS masks out the old DOS VolId and the
837 FILE_ATTR_DEVICE and preserves everything else. This mask is used 826 FILE_ATTR_DEVICE and preserves everything else. This mask is used
838 to obtain all flags that are valid for reading. */ 827 to obtain all flags that are valid for reading. */
839 FILE_ATTR_VALID_SET_FLAGS = const_cpu_to_le32(0x000031a7), 828 FILE_ATTR_VALID_SET_FLAGS = cpu_to_le32(0x000031a7),
840 /* Note, FILE_ATTR_VALID_SET_FLAGS masks out the old DOS VolId, the 829 /* Note, FILE_ATTR_VALID_SET_FLAGS masks out the old DOS VolId, the
841 F_A_DEVICE, F_A_DIRECTORY, F_A_SPARSE_FILE, F_A_REPARSE_POINT, 830 F_A_DEVICE, F_A_DIRECTORY, F_A_SPARSE_FILE, F_A_REPARSE_POINT,
842 F_A_COMPRESSED, and F_A_ENCRYPTED and preserves the rest. This mask 831 F_A_COMPRESSED, and F_A_ENCRYPTED and preserves the rest. This mask
@@ -846,11 +835,11 @@ enum {
846 * FILENAME_ATTR attributes but not in the STANDARD_INFORMATION 835 * FILENAME_ATTR attributes but not in the STANDARD_INFORMATION
847 * attribute of an mft record. 836 * attribute of an mft record.
848 */ 837 */
849 FILE_ATTR_DUP_FILE_NAME_INDEX_PRESENT = const_cpu_to_le32(0x10000000), 838 FILE_ATTR_DUP_FILE_NAME_INDEX_PRESENT = cpu_to_le32(0x10000000),
850 /* Note, this is a copy of the corresponding bit from the mft record, 839 /* Note, this is a copy of the corresponding bit from the mft record,
851 telling us whether this is a directory or not, i.e. whether it has 840 telling us whether this is a directory or not, i.e. whether it has
852 an index root attribute or not. */ 841 an index root attribute or not. */
853 FILE_ATTR_DUP_VIEW_INDEX_PRESENT = const_cpu_to_le32(0x20000000), 842 FILE_ATTR_DUP_VIEW_INDEX_PRESENT = cpu_to_le32(0x20000000),
854 /* Note, this is a copy of the corresponding bit from the mft record, 843 /* Note, this is a copy of the corresponding bit from the mft record,
855 telling us whether this file has a view index present (eg. object id 844 telling us whether this file has a view index present (eg. object id
856 index, quota index, one of the security indexes or the encrypting 845 index, quota index, one of the security indexes or the encrypting
@@ -1446,42 +1435,42 @@ enum {
1446 /* Specific rights for files and directories are as follows: */ 1435 /* Specific rights for files and directories are as follows: */
1447 1436
1448 /* Right to read data from the file. (FILE) */ 1437 /* Right to read data from the file. (FILE) */
1449 FILE_READ_DATA = const_cpu_to_le32(0x00000001), 1438 FILE_READ_DATA = cpu_to_le32(0x00000001),
1450 /* Right to list contents of a directory. (DIRECTORY) */ 1439 /* Right to list contents of a directory. (DIRECTORY) */
1451 FILE_LIST_DIRECTORY = const_cpu_to_le32(0x00000001), 1440 FILE_LIST_DIRECTORY = cpu_to_le32(0x00000001),
1452 1441
1453 /* Right to write data to the file. (FILE) */ 1442 /* Right to write data to the file. (FILE) */
1454 FILE_WRITE_DATA = const_cpu_to_le32(0x00000002), 1443 FILE_WRITE_DATA = cpu_to_le32(0x00000002),
1455 /* Right to create a file in the directory. (DIRECTORY) */ 1444 /* Right to create a file in the directory. (DIRECTORY) */
1456 FILE_ADD_FILE = const_cpu_to_le32(0x00000002), 1445 FILE_ADD_FILE = cpu_to_le32(0x00000002),
1457 1446
1458 /* Right to append data to the file. (FILE) */ 1447 /* Right to append data to the file. (FILE) */
1459 FILE_APPEND_DATA = const_cpu_to_le32(0x00000004), 1448 FILE_APPEND_DATA = cpu_to_le32(0x00000004),
1460 /* Right to create a subdirectory. (DIRECTORY) */ 1449 /* Right to create a subdirectory. (DIRECTORY) */
1461 FILE_ADD_SUBDIRECTORY = const_cpu_to_le32(0x00000004), 1450 FILE_ADD_SUBDIRECTORY = cpu_to_le32(0x00000004),
1462 1451
1463 /* Right to read extended attributes. (FILE/DIRECTORY) */ 1452 /* Right to read extended attributes. (FILE/DIRECTORY) */
1464 FILE_READ_EA = const_cpu_to_le32(0x00000008), 1453 FILE_READ_EA = cpu_to_le32(0x00000008),
1465 1454
1466 /* Right to write extended attributes. (FILE/DIRECTORY) */ 1455 /* Right to write extended attributes. (FILE/DIRECTORY) */
1467 FILE_WRITE_EA = const_cpu_to_le32(0x00000010), 1456 FILE_WRITE_EA = cpu_to_le32(0x00000010),
1468 1457
1469 /* Right to execute a file. (FILE) */ 1458 /* Right to execute a file. (FILE) */
1470 FILE_EXECUTE = const_cpu_to_le32(0x00000020), 1459 FILE_EXECUTE = cpu_to_le32(0x00000020),
1471 /* Right to traverse the directory. (DIRECTORY) */ 1460 /* Right to traverse the directory. (DIRECTORY) */
1472 FILE_TRAVERSE = const_cpu_to_le32(0x00000020), 1461 FILE_TRAVERSE = cpu_to_le32(0x00000020),
1473 1462
1474 /* 1463 /*
1475 * Right to delete a directory and all the files it contains (its 1464 * Right to delete a directory and all the files it contains (its
1476 * children), even if the files are read-only. (DIRECTORY) 1465 * children), even if the files are read-only. (DIRECTORY)
1477 */ 1466 */
1478 FILE_DELETE_CHILD = const_cpu_to_le32(0x00000040), 1467 FILE_DELETE_CHILD = cpu_to_le32(0x00000040),
1479 1468
1480 /* Right to read file attributes. (FILE/DIRECTORY) */ 1469 /* Right to read file attributes. (FILE/DIRECTORY) */
1481 FILE_READ_ATTRIBUTES = const_cpu_to_le32(0x00000080), 1470 FILE_READ_ATTRIBUTES = cpu_to_le32(0x00000080),
1482 1471
1483 /* Right to change file attributes. (FILE/DIRECTORY) */ 1472 /* Right to change file attributes. (FILE/DIRECTORY) */
1484 FILE_WRITE_ATTRIBUTES = const_cpu_to_le32(0x00000100), 1473 FILE_WRITE_ATTRIBUTES = cpu_to_le32(0x00000100),
1485 1474
1486 /* 1475 /*
1487 * The standard rights (bits 16 to 23). These are independent of the 1476 * The standard rights (bits 16 to 23). These are independent of the
@@ -1489,27 +1478,27 @@ enum {
1489 */ 1478 */
1490 1479
1491 /* Right to delete the object. */ 1480 /* Right to delete the object. */
1492 DELETE = const_cpu_to_le32(0x00010000), 1481 DELETE = cpu_to_le32(0x00010000),
1493 1482
1494 /* 1483 /*
1495 * Right to read the information in the object's security descriptor, 1484 * Right to read the information in the object's security descriptor,
1496 * not including the information in the SACL, i.e. right to read the 1485 * not including the information in the SACL, i.e. right to read the
1497 * security descriptor and owner. 1486 * security descriptor and owner.
1498 */ 1487 */
1499 READ_CONTROL = const_cpu_to_le32(0x00020000), 1488 READ_CONTROL = cpu_to_le32(0x00020000),
1500 1489
1501 /* Right to modify the DACL in the object's security descriptor. */ 1490 /* Right to modify the DACL in the object's security descriptor. */
1502 WRITE_DAC = const_cpu_to_le32(0x00040000), 1491 WRITE_DAC = cpu_to_le32(0x00040000),
1503 1492
1504 /* Right to change the owner in the object's security descriptor. */ 1493 /* Right to change the owner in the object's security descriptor. */
1505 WRITE_OWNER = const_cpu_to_le32(0x00080000), 1494 WRITE_OWNER = cpu_to_le32(0x00080000),
1506 1495
1507 /* 1496 /*
1508 * Right to use the object for synchronization. Enables a process to 1497 * Right to use the object for synchronization. Enables a process to
1509 * wait until the object is in the signalled state. Some object types 1498 * wait until the object is in the signalled state. Some object types
1510 * do not support this access right. 1499 * do not support this access right.
1511 */ 1500 */
1512 SYNCHRONIZE = const_cpu_to_le32(0x00100000), 1501 SYNCHRONIZE = cpu_to_le32(0x00100000),
1513 1502
1514 /* 1503 /*
1515 * The following STANDARD_RIGHTS_* are combinations of the above for 1504 * The following STANDARD_RIGHTS_* are combinations of the above for
@@ -1517,25 +1506,25 @@ enum {
1517 */ 1506 */
1518 1507
1519 /* These are currently defined to READ_CONTROL. */ 1508 /* These are currently defined to READ_CONTROL. */
1520 STANDARD_RIGHTS_READ = const_cpu_to_le32(0x00020000), 1509 STANDARD_RIGHTS_READ = cpu_to_le32(0x00020000),
1521 STANDARD_RIGHTS_WRITE = const_cpu_to_le32(0x00020000), 1510 STANDARD_RIGHTS_WRITE = cpu_to_le32(0x00020000),
1522 STANDARD_RIGHTS_EXECUTE = const_cpu_to_le32(0x00020000), 1511 STANDARD_RIGHTS_EXECUTE = cpu_to_le32(0x00020000),
1523 1512
1524 /* Combines DELETE, READ_CONTROL, WRITE_DAC, and WRITE_OWNER access. */ 1513 /* Combines DELETE, READ_CONTROL, WRITE_DAC, and WRITE_OWNER access. */
1525 STANDARD_RIGHTS_REQUIRED = const_cpu_to_le32(0x000f0000), 1514 STANDARD_RIGHTS_REQUIRED = cpu_to_le32(0x000f0000),
1526 1515
1527 /* 1516 /*
1528 * Combines DELETE, READ_CONTROL, WRITE_DAC, WRITE_OWNER, and 1517 * Combines DELETE, READ_CONTROL, WRITE_DAC, WRITE_OWNER, and
1529 * SYNCHRONIZE access. 1518 * SYNCHRONIZE access.
1530 */ 1519 */
1531 STANDARD_RIGHTS_ALL = const_cpu_to_le32(0x001f0000), 1520 STANDARD_RIGHTS_ALL = cpu_to_le32(0x001f0000),
1532 1521
1533 /* 1522 /*
1534 * The access system ACL and maximum allowed access types (bits 24 to 1523 * The access system ACL and maximum allowed access types (bits 24 to
1535 * 25, bits 26 to 27 are reserved). 1524 * 25, bits 26 to 27 are reserved).
1536 */ 1525 */
1537 ACCESS_SYSTEM_SECURITY = const_cpu_to_le32(0x01000000), 1526 ACCESS_SYSTEM_SECURITY = cpu_to_le32(0x01000000),
1538 MAXIMUM_ALLOWED = const_cpu_to_le32(0x02000000), 1527 MAXIMUM_ALLOWED = cpu_to_le32(0x02000000),
1539 1528
1540 /* 1529 /*
1541 * The generic rights (bits 28 to 31). These map onto the standard and 1530 * The generic rights (bits 28 to 31). These map onto the standard and
@@ -1543,10 +1532,10 @@ enum {
1543 */ 1532 */
1544 1533
1545 /* Read, write, and execute access. */ 1534 /* Read, write, and execute access. */
1546 GENERIC_ALL = const_cpu_to_le32(0x10000000), 1535 GENERIC_ALL = cpu_to_le32(0x10000000),
1547 1536
1548 /* Execute access. */ 1537 /* Execute access. */
1549 GENERIC_EXECUTE = const_cpu_to_le32(0x20000000), 1538 GENERIC_EXECUTE = cpu_to_le32(0x20000000),
1550 1539
1551 /* 1540 /*
1552 * Write access. For files, this maps onto: 1541 * Write access. For files, this maps onto:
@@ -1555,7 +1544,7 @@ enum {
1555 * For directories, the mapping has the same numerical value. See 1544 * For directories, the mapping has the same numerical value. See
1556 * above for the descriptions of the rights granted. 1545 * above for the descriptions of the rights granted.
1557 */ 1546 */
1558 GENERIC_WRITE = const_cpu_to_le32(0x40000000), 1547 GENERIC_WRITE = cpu_to_le32(0x40000000),
1559 1548
1560 /* 1549 /*
1561 * Read access. For files, this maps onto: 1550 * Read access. For files, this maps onto:
@@ -1564,7 +1553,7 @@ enum {
1564 * For directories, the mapping has the same numberical value. See 1553 * For directories, the mapping has the same numberical value. See
1565 * above for the descriptions of the rights granted. 1554 * above for the descriptions of the rights granted.
1566 */ 1555 */
1567 GENERIC_READ = const_cpu_to_le32(0x80000000), 1556 GENERIC_READ = cpu_to_le32(0x80000000),
1568}; 1557};
1569 1558
1570typedef le32 ACCESS_MASK; 1559typedef le32 ACCESS_MASK;
@@ -1604,8 +1593,8 @@ typedef struct {
1604 * The object ACE flags (32-bit). 1593 * The object ACE flags (32-bit).
1605 */ 1594 */
1606enum { 1595enum {
1607 ACE_OBJECT_TYPE_PRESENT = const_cpu_to_le32(1), 1596 ACE_OBJECT_TYPE_PRESENT = cpu_to_le32(1),
1608 ACE_INHERITED_OBJECT_TYPE_PRESENT = const_cpu_to_le32(2), 1597 ACE_INHERITED_OBJECT_TYPE_PRESENT = cpu_to_le32(2),
1609}; 1598};
1610 1599
1611typedef le32 OBJECT_ACE_FLAGS; 1600typedef le32 OBJECT_ACE_FLAGS;
@@ -1706,23 +1695,23 @@ typedef enum {
1706 * expressed as offsets from the beginning of the security descriptor. 1695 * expressed as offsets from the beginning of the security descriptor.
1707 */ 1696 */
1708enum { 1697enum {
1709 SE_OWNER_DEFAULTED = const_cpu_to_le16(0x0001), 1698 SE_OWNER_DEFAULTED = cpu_to_le16(0x0001),
1710 SE_GROUP_DEFAULTED = const_cpu_to_le16(0x0002), 1699 SE_GROUP_DEFAULTED = cpu_to_le16(0x0002),
1711 SE_DACL_PRESENT = const_cpu_to_le16(0x0004), 1700 SE_DACL_PRESENT = cpu_to_le16(0x0004),
1712 SE_DACL_DEFAULTED = const_cpu_to_le16(0x0008), 1701 SE_DACL_DEFAULTED = cpu_to_le16(0x0008),
1713 1702
1714 SE_SACL_PRESENT = const_cpu_to_le16(0x0010), 1703 SE_SACL_PRESENT = cpu_to_le16(0x0010),
1715 SE_SACL_DEFAULTED = const_cpu_to_le16(0x0020), 1704 SE_SACL_DEFAULTED = cpu_to_le16(0x0020),
1716 1705
1717 SE_DACL_AUTO_INHERIT_REQ = const_cpu_to_le16(0x0100), 1706 SE_DACL_AUTO_INHERIT_REQ = cpu_to_le16(0x0100),
1718 SE_SACL_AUTO_INHERIT_REQ = const_cpu_to_le16(0x0200), 1707 SE_SACL_AUTO_INHERIT_REQ = cpu_to_le16(0x0200),
1719 SE_DACL_AUTO_INHERITED = const_cpu_to_le16(0x0400), 1708 SE_DACL_AUTO_INHERITED = cpu_to_le16(0x0400),
1720 SE_SACL_AUTO_INHERITED = const_cpu_to_le16(0x0800), 1709 SE_SACL_AUTO_INHERITED = cpu_to_le16(0x0800),
1721 1710
1722 SE_DACL_PROTECTED = const_cpu_to_le16(0x1000), 1711 SE_DACL_PROTECTED = cpu_to_le16(0x1000),
1723 SE_SACL_PROTECTED = const_cpu_to_le16(0x2000), 1712 SE_SACL_PROTECTED = cpu_to_le16(0x2000),
1724 SE_RM_CONTROL_VALID = const_cpu_to_le16(0x4000), 1713 SE_RM_CONTROL_VALID = cpu_to_le16(0x4000),
1725 SE_SELF_RELATIVE = const_cpu_to_le16(0x8000) 1714 SE_SELF_RELATIVE = cpu_to_le16(0x8000)
1726} __attribute__ ((__packed__)); 1715} __attribute__ ((__packed__));
1727 1716
1728typedef le16 SECURITY_DESCRIPTOR_CONTROL; 1717typedef le16 SECURITY_DESCRIPTOR_CONTROL;
@@ -1910,21 +1899,21 @@ typedef struct {
1910 * Possible flags for the volume (16-bit). 1899 * Possible flags for the volume (16-bit).
1911 */ 1900 */
1912enum { 1901enum {
1913 VOLUME_IS_DIRTY = const_cpu_to_le16(0x0001), 1902 VOLUME_IS_DIRTY = cpu_to_le16(0x0001),
1914 VOLUME_RESIZE_LOG_FILE = const_cpu_to_le16(0x0002), 1903 VOLUME_RESIZE_LOG_FILE = cpu_to_le16(0x0002),
1915 VOLUME_UPGRADE_ON_MOUNT = const_cpu_to_le16(0x0004), 1904 VOLUME_UPGRADE_ON_MOUNT = cpu_to_le16(0x0004),
1916 VOLUME_MOUNTED_ON_NT4 = const_cpu_to_le16(0x0008), 1905 VOLUME_MOUNTED_ON_NT4 = cpu_to_le16(0x0008),
1917 1906
1918 VOLUME_DELETE_USN_UNDERWAY = const_cpu_to_le16(0x0010), 1907 VOLUME_DELETE_USN_UNDERWAY = cpu_to_le16(0x0010),
1919 VOLUME_REPAIR_OBJECT_ID = const_cpu_to_le16(0x0020), 1908 VOLUME_REPAIR_OBJECT_ID = cpu_to_le16(0x0020),
1920 1909
1921 VOLUME_CHKDSK_UNDERWAY = const_cpu_to_le16(0x4000), 1910 VOLUME_CHKDSK_UNDERWAY = cpu_to_le16(0x4000),
1922 VOLUME_MODIFIED_BY_CHKDSK = const_cpu_to_le16(0x8000), 1911 VOLUME_MODIFIED_BY_CHKDSK = cpu_to_le16(0x8000),
1923 1912
1924 VOLUME_FLAGS_MASK = const_cpu_to_le16(0xc03f), 1913 VOLUME_FLAGS_MASK = cpu_to_le16(0xc03f),
1925 1914
1926 /* To make our life easier when checking if we must mount read-only. */ 1915 /* To make our life easier when checking if we must mount read-only. */
1927 VOLUME_MUST_MOUNT_RO_MASK = const_cpu_to_le16(0xc027), 1916 VOLUME_MUST_MOUNT_RO_MASK = cpu_to_le16(0xc027),
1928} __attribute__ ((__packed__)); 1917} __attribute__ ((__packed__));
1929 1918
1930typedef le16 VOLUME_FLAGS; 1919typedef le16 VOLUME_FLAGS;
@@ -2109,26 +2098,26 @@ typedef struct {
2109 * The user quota flags. Names explain meaning. 2098 * The user quota flags. Names explain meaning.
2110 */ 2099 */
2111enum { 2100enum {
2112 QUOTA_FLAG_DEFAULT_LIMITS = const_cpu_to_le32(0x00000001), 2101 QUOTA_FLAG_DEFAULT_LIMITS = cpu_to_le32(0x00000001),
2113 QUOTA_FLAG_LIMIT_REACHED = const_cpu_to_le32(0x00000002), 2102 QUOTA_FLAG_LIMIT_REACHED = cpu_to_le32(0x00000002),
2114 QUOTA_FLAG_ID_DELETED = const_cpu_to_le32(0x00000004), 2103 QUOTA_FLAG_ID_DELETED = cpu_to_le32(0x00000004),
2115 2104
2116 QUOTA_FLAG_USER_MASK = const_cpu_to_le32(0x00000007), 2105 QUOTA_FLAG_USER_MASK = cpu_to_le32(0x00000007),
2117 /* This is a bit mask for the user quota flags. */ 2106 /* This is a bit mask for the user quota flags. */
2118 2107
2119 /* 2108 /*
2120 * These flags are only present in the quota defaults index entry, i.e. 2109 * These flags are only present in the quota defaults index entry, i.e.
2121 * in the entry where owner_id = QUOTA_DEFAULTS_ID. 2110 * in the entry where owner_id = QUOTA_DEFAULTS_ID.
2122 */ 2111 */
2123 QUOTA_FLAG_TRACKING_ENABLED = const_cpu_to_le32(0x00000010), 2112 QUOTA_FLAG_TRACKING_ENABLED = cpu_to_le32(0x00000010),
2124 QUOTA_FLAG_ENFORCEMENT_ENABLED = const_cpu_to_le32(0x00000020), 2113 QUOTA_FLAG_ENFORCEMENT_ENABLED = cpu_to_le32(0x00000020),
2125 QUOTA_FLAG_TRACKING_REQUESTED = const_cpu_to_le32(0x00000040), 2114 QUOTA_FLAG_TRACKING_REQUESTED = cpu_to_le32(0x00000040),
2126 QUOTA_FLAG_LOG_THRESHOLD = const_cpu_to_le32(0x00000080), 2115 QUOTA_FLAG_LOG_THRESHOLD = cpu_to_le32(0x00000080),
2127 2116
2128 QUOTA_FLAG_LOG_LIMIT = const_cpu_to_le32(0x00000100), 2117 QUOTA_FLAG_LOG_LIMIT = cpu_to_le32(0x00000100),
2129 QUOTA_FLAG_OUT_OF_DATE = const_cpu_to_le32(0x00000200), 2118 QUOTA_FLAG_OUT_OF_DATE = cpu_to_le32(0x00000200),
2130 QUOTA_FLAG_CORRUPT = const_cpu_to_le32(0x00000400), 2119 QUOTA_FLAG_CORRUPT = cpu_to_le32(0x00000400),
2131 QUOTA_FLAG_PENDING_DELETES = const_cpu_to_le32(0x00000800), 2120 QUOTA_FLAG_PENDING_DELETES = cpu_to_le32(0x00000800),
2132}; 2121};
2133 2122
2134typedef le32 QUOTA_FLAGS; 2123typedef le32 QUOTA_FLAGS;
@@ -2172,9 +2161,9 @@ typedef struct {
2172 * Predefined owner_id values (32-bit). 2161 * Predefined owner_id values (32-bit).
2173 */ 2162 */
2174enum { 2163enum {
2175 QUOTA_INVALID_ID = const_cpu_to_le32(0x00000000), 2164 QUOTA_INVALID_ID = cpu_to_le32(0x00000000),
2176 QUOTA_DEFAULTS_ID = const_cpu_to_le32(0x00000001), 2165 QUOTA_DEFAULTS_ID = cpu_to_le32(0x00000001),
2177 QUOTA_FIRST_USER_ID = const_cpu_to_le32(0x00000100), 2166 QUOTA_FIRST_USER_ID = cpu_to_le32(0x00000100),
2178}; 2167};
2179 2168
2180/* 2169/*
@@ -2189,14 +2178,14 @@ typedef enum {
2189 * Index entry flags (16-bit). 2178 * Index entry flags (16-bit).
2190 */ 2179 */
2191enum { 2180enum {
2192 INDEX_ENTRY_NODE = const_cpu_to_le16(1), /* This entry contains a 2181 INDEX_ENTRY_NODE = cpu_to_le16(1), /* This entry contains a
2193 sub-node, i.e. a reference to an index block in form of 2182 sub-node, i.e. a reference to an index block in form of
2194 a virtual cluster number (see below). */ 2183 a virtual cluster number (see below). */
2195 INDEX_ENTRY_END = const_cpu_to_le16(2), /* This signifies the last 2184 INDEX_ENTRY_END = cpu_to_le16(2), /* This signifies the last
2196 entry in an index block. The index entry does not 2185 entry in an index block. The index entry does not
2197 represent a file but it can point to a sub-node. */ 2186 represent a file but it can point to a sub-node. */
2198 2187
2199 INDEX_ENTRY_SPACE_FILLER = const_cpu_to_le16(0xffff), /* gcc: Force 2188 INDEX_ENTRY_SPACE_FILLER = cpu_to_le16(0xffff), /* gcc: Force
2200 enum bit width to 16-bit. */ 2189 enum bit width to 16-bit. */
2201} __attribute__ ((__packed__)); 2190} __attribute__ ((__packed__));
2202 2191
@@ -2334,26 +2323,26 @@ typedef struct {
2334 * These are the predefined reparse point tags: 2323 * These are the predefined reparse point tags:
2335 */ 2324 */
2336enum { 2325enum {
2337 IO_REPARSE_TAG_IS_ALIAS = const_cpu_to_le32(0x20000000), 2326 IO_REPARSE_TAG_IS_ALIAS = cpu_to_le32(0x20000000),
2338 IO_REPARSE_TAG_IS_HIGH_LATENCY = const_cpu_to_le32(0x40000000), 2327 IO_REPARSE_TAG_IS_HIGH_LATENCY = cpu_to_le32(0x40000000),
2339 IO_REPARSE_TAG_IS_MICROSOFT = const_cpu_to_le32(0x80000000), 2328 IO_REPARSE_TAG_IS_MICROSOFT = cpu_to_le32(0x80000000),
2340 2329
2341 IO_REPARSE_TAG_RESERVED_ZERO = const_cpu_to_le32(0x00000000), 2330 IO_REPARSE_TAG_RESERVED_ZERO = cpu_to_le32(0x00000000),
2342 IO_REPARSE_TAG_RESERVED_ONE = const_cpu_to_le32(0x00000001), 2331 IO_REPARSE_TAG_RESERVED_ONE = cpu_to_le32(0x00000001),
2343 IO_REPARSE_TAG_RESERVED_RANGE = const_cpu_to_le32(0x00000001), 2332 IO_REPARSE_TAG_RESERVED_RANGE = cpu_to_le32(0x00000001),
2344 2333
2345 IO_REPARSE_TAG_NSS = const_cpu_to_le32(0x68000005), 2334 IO_REPARSE_TAG_NSS = cpu_to_le32(0x68000005),
2346 IO_REPARSE_TAG_NSS_RECOVER = const_cpu_to_le32(0x68000006), 2335 IO_REPARSE_TAG_NSS_RECOVER = cpu_to_le32(0x68000006),
2347 IO_REPARSE_TAG_SIS = const_cpu_to_le32(0x68000007), 2336 IO_REPARSE_TAG_SIS = cpu_to_le32(0x68000007),
2348 IO_REPARSE_TAG_DFS = const_cpu_to_le32(0x68000008), 2337 IO_REPARSE_TAG_DFS = cpu_to_le32(0x68000008),
2349 2338
2350 IO_REPARSE_TAG_MOUNT_POINT = const_cpu_to_le32(0x88000003), 2339 IO_REPARSE_TAG_MOUNT_POINT = cpu_to_le32(0x88000003),
2351 2340
2352 IO_REPARSE_TAG_HSM = const_cpu_to_le32(0xa8000004), 2341 IO_REPARSE_TAG_HSM = cpu_to_le32(0xa8000004),
2353 2342
2354 IO_REPARSE_TAG_SYMBOLIC_LINK = const_cpu_to_le32(0xe8000000), 2343 IO_REPARSE_TAG_SYMBOLIC_LINK = cpu_to_le32(0xe8000000),
2355 2344
2356 IO_REPARSE_TAG_VALID_VALUES = const_cpu_to_le32(0xe000ffff), 2345 IO_REPARSE_TAG_VALID_VALUES = cpu_to_le32(0xe000ffff),
2357}; 2346};
2358 2347
2359/* 2348/*
diff --git a/fs/ntfs/logfile.h b/fs/ntfs/logfile.h
index 9468e1c45ae3..b5a6f08bd35c 100644
--- a/fs/ntfs/logfile.h
+++ b/fs/ntfs/logfile.h
@@ -104,7 +104,7 @@ typedef struct {
104 * in this particular client array. Also inside the client records themselves, 104 * in this particular client array. Also inside the client records themselves,
105 * this means that there are no client records preceding or following this one. 105 * this means that there are no client records preceding or following this one.
106 */ 106 */
107#define LOGFILE_NO_CLIENT const_cpu_to_le16(0xffff) 107#define LOGFILE_NO_CLIENT cpu_to_le16(0xffff)
108#define LOGFILE_NO_CLIENT_CPU 0xffff 108#define LOGFILE_NO_CLIENT_CPU 0xffff
109 109
110/* 110/*
@@ -112,8 +112,8 @@ typedef struct {
112 * information about the log file in which they are present. 112 * information about the log file in which they are present.
113 */ 113 */
114enum { 114enum {
115 RESTART_VOLUME_IS_CLEAN = const_cpu_to_le16(0x0002), 115 RESTART_VOLUME_IS_CLEAN = cpu_to_le16(0x0002),
116 RESTART_SPACE_FILLER = const_cpu_to_le16(0xffff), /* gcc: Force enum bit width to 16. */ 116 RESTART_SPACE_FILLER = cpu_to_le16(0xffff), /* gcc: Force enum bit width to 16. */
117} __attribute__ ((__packed__)); 117} __attribute__ ((__packed__));
118 118
119typedef le16 RESTART_AREA_FLAGS; 119typedef le16 RESTART_AREA_FLAGS;
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 17d32ca6bc35..23bf68453d7d 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -2839,7 +2839,7 @@ int ntfs_extent_mft_record_free(ntfs_inode *ni, MFT_RECORD *m)
2839 */ 2839 */
2840 2840
2841 /* Mark the mft record as not in use. */ 2841 /* Mark the mft record as not in use. */
2842 m->flags &= const_cpu_to_le16(~const_le16_to_cpu(MFT_RECORD_IN_USE)); 2842 m->flags &= ~MFT_RECORD_IN_USE;
2843 2843
2844 /* Increment the sequence number, skipping zero, if it is not zero. */ 2844 /* Increment the sequence number, skipping zero, if it is not zero. */
2845 old_seq_no = m->sequence_number; 2845 old_seq_no = m->sequence_number;
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 4a46743b5077..f76951dcd4a6 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -618,7 +618,7 @@ static bool is_boot_sector_ntfs(const struct super_block *sb,
618 * many BIOSes will refuse to boot from a bootsector if the magic is 618 * many BIOSes will refuse to boot from a bootsector if the magic is
619 * incorrect, so we emit a warning. 619 * incorrect, so we emit a warning.
620 */ 620 */
621 if (!silent && b->end_of_sector_marker != const_cpu_to_le16(0xaa55)) 621 if (!silent && b->end_of_sector_marker != cpu_to_le16(0xaa55))
622 ntfs_warning(sb, "Invalid end of sector marker."); 622 ntfs_warning(sb, "Invalid end of sector marker.");
623 return true; 623 return true;
624not_ntfs: 624not_ntfs:
@@ -1242,13 +1242,13 @@ static int check_windows_hibernation_status(ntfs_volume *vol)
1242 u32 *kaddr, *kend; 1242 u32 *kaddr, *kend;
1243 ntfs_name *name = NULL; 1243 ntfs_name *name = NULL;
1244 int ret = 1; 1244 int ret = 1;
1245 static const ntfschar hiberfil[13] = { const_cpu_to_le16('h'), 1245 static const ntfschar hiberfil[13] = { cpu_to_le16('h'),
1246 const_cpu_to_le16('i'), const_cpu_to_le16('b'), 1246 cpu_to_le16('i'), cpu_to_le16('b'),
1247 const_cpu_to_le16('e'), const_cpu_to_le16('r'), 1247 cpu_to_le16('e'), cpu_to_le16('r'),
1248 const_cpu_to_le16('f'), const_cpu_to_le16('i'), 1248 cpu_to_le16('f'), cpu_to_le16('i'),
1249 const_cpu_to_le16('l'), const_cpu_to_le16('.'), 1249 cpu_to_le16('l'), cpu_to_le16('.'),
1250 const_cpu_to_le16('s'), const_cpu_to_le16('y'), 1250 cpu_to_le16('s'), cpu_to_le16('y'),
1251 const_cpu_to_le16('s'), 0 }; 1251 cpu_to_le16('s'), 0 };
1252 1252
1253 ntfs_debug("Entering."); 1253 ntfs_debug("Entering.");
1254 /* 1254 /*
@@ -1296,7 +1296,7 @@ static int check_windows_hibernation_status(ntfs_volume *vol)
1296 goto iput_out; 1296 goto iput_out;
1297 } 1297 }
1298 kaddr = (u32*)page_address(page); 1298 kaddr = (u32*)page_address(page);
1299 if (*(le32*)kaddr == const_cpu_to_le32(0x72626968)/*'hibr'*/) { 1299 if (*(le32*)kaddr == cpu_to_le32(0x72626968)/*'hibr'*/) {
1300 ntfs_debug("Magic \"hibr\" found in hiberfil.sys. Windows is " 1300 ntfs_debug("Magic \"hibr\" found in hiberfil.sys. Windows is "
1301 "hibernated on the volume. This is the " 1301 "hibernated on the volume. This is the "
1302 "system volume."); 1302 "system volume.");
@@ -1337,12 +1337,12 @@ static bool load_and_init_quota(ntfs_volume *vol)
1337 MFT_REF mref; 1337 MFT_REF mref;
1338 struct inode *tmp_ino; 1338 struct inode *tmp_ino;
1339 ntfs_name *name = NULL; 1339 ntfs_name *name = NULL;
1340 static const ntfschar Quota[7] = { const_cpu_to_le16('$'), 1340 static const ntfschar Quota[7] = { cpu_to_le16('$'),
1341 const_cpu_to_le16('Q'), const_cpu_to_le16('u'), 1341 cpu_to_le16('Q'), cpu_to_le16('u'),
1342 const_cpu_to_le16('o'), const_cpu_to_le16('t'), 1342 cpu_to_le16('o'), cpu_to_le16('t'),
1343 const_cpu_to_le16('a'), 0 }; 1343 cpu_to_le16('a'), 0 };
1344 static ntfschar Q[3] = { const_cpu_to_le16('$'), 1344 static ntfschar Q[3] = { cpu_to_le16('$'),
1345 const_cpu_to_le16('Q'), 0 }; 1345 cpu_to_le16('Q'), 0 };
1346 1346
1347 ntfs_debug("Entering."); 1347 ntfs_debug("Entering.");
1348 /* 1348 /*
@@ -1416,16 +1416,16 @@ static bool load_and_init_usnjrnl(ntfs_volume *vol)
1416 struct page *page; 1416 struct page *page;
1417 ntfs_name *name = NULL; 1417 ntfs_name *name = NULL;
1418 USN_HEADER *uh; 1418 USN_HEADER *uh;
1419 static const ntfschar UsnJrnl[9] = { const_cpu_to_le16('$'), 1419 static const ntfschar UsnJrnl[9] = { cpu_to_le16('$'),
1420 const_cpu_to_le16('U'), const_cpu_to_le16('s'), 1420 cpu_to_le16('U'), cpu_to_le16('s'),
1421 const_cpu_to_le16('n'), const_cpu_to_le16('J'), 1421 cpu_to_le16('n'), cpu_to_le16('J'),
1422 const_cpu_to_le16('r'), const_cpu_to_le16('n'), 1422 cpu_to_le16('r'), cpu_to_le16('n'),
1423 const_cpu_to_le16('l'), 0 }; 1423 cpu_to_le16('l'), 0 };
1424 static ntfschar Max[5] = { const_cpu_to_le16('$'), 1424 static ntfschar Max[5] = { cpu_to_le16('$'),
1425 const_cpu_to_le16('M'), const_cpu_to_le16('a'), 1425 cpu_to_le16('M'), cpu_to_le16('a'),
1426 const_cpu_to_le16('x'), 0 }; 1426 cpu_to_le16('x'), 0 };
1427 static ntfschar J[3] = { const_cpu_to_le16('$'), 1427 static ntfschar J[3] = { cpu_to_le16('$'),
1428 const_cpu_to_le16('J'), 0 }; 1428 cpu_to_le16('J'), 0 };
1429 1429
1430 ntfs_debug("Entering."); 1430 ntfs_debug("Entering.");
1431 /* 1431 /*
diff --git a/fs/ntfs/usnjrnl.h b/fs/ntfs/usnjrnl.h
index 4087fbdac327..00d8e6bd7c36 100644
--- a/fs/ntfs/usnjrnl.h
+++ b/fs/ntfs/usnjrnl.h
@@ -116,27 +116,27 @@ typedef struct {
116 * documentation: http://www.linux-ntfs.org/ 116 * documentation: http://www.linux-ntfs.org/
117 */ 117 */
118enum { 118enum {
119 USN_REASON_DATA_OVERWRITE = const_cpu_to_le32(0x00000001), 119 USN_REASON_DATA_OVERWRITE = cpu_to_le32(0x00000001),
120 USN_REASON_DATA_EXTEND = const_cpu_to_le32(0x00000002), 120 USN_REASON_DATA_EXTEND = cpu_to_le32(0x00000002),
121 USN_REASON_DATA_TRUNCATION = const_cpu_to_le32(0x00000004), 121 USN_REASON_DATA_TRUNCATION = cpu_to_le32(0x00000004),
122 USN_REASON_NAMED_DATA_OVERWRITE = const_cpu_to_le32(0x00000010), 122 USN_REASON_NAMED_DATA_OVERWRITE = cpu_to_le32(0x00000010),
123 USN_REASON_NAMED_DATA_EXTEND = const_cpu_to_le32(0x00000020), 123 USN_REASON_NAMED_DATA_EXTEND = cpu_to_le32(0x00000020),
124 USN_REASON_NAMED_DATA_TRUNCATION= const_cpu_to_le32(0x00000040), 124 USN_REASON_NAMED_DATA_TRUNCATION= cpu_to_le32(0x00000040),
125 USN_REASON_FILE_CREATE = const_cpu_to_le32(0x00000100), 125 USN_REASON_FILE_CREATE = cpu_to_le32(0x00000100),
126 USN_REASON_FILE_DELETE = const_cpu_to_le32(0x00000200), 126 USN_REASON_FILE_DELETE = cpu_to_le32(0x00000200),
127 USN_REASON_EA_CHANGE = const_cpu_to_le32(0x00000400), 127 USN_REASON_EA_CHANGE = cpu_to_le32(0x00000400),
128 USN_REASON_SECURITY_CHANGE = const_cpu_to_le32(0x00000800), 128 USN_REASON_SECURITY_CHANGE = cpu_to_le32(0x00000800),
129 USN_REASON_RENAME_OLD_NAME = const_cpu_to_le32(0x00001000), 129 USN_REASON_RENAME_OLD_NAME = cpu_to_le32(0x00001000),
130 USN_REASON_RENAME_NEW_NAME = const_cpu_to_le32(0x00002000), 130 USN_REASON_RENAME_NEW_NAME = cpu_to_le32(0x00002000),
131 USN_REASON_INDEXABLE_CHANGE = const_cpu_to_le32(0x00004000), 131 USN_REASON_INDEXABLE_CHANGE = cpu_to_le32(0x00004000),
132 USN_REASON_BASIC_INFO_CHANGE = const_cpu_to_le32(0x00008000), 132 USN_REASON_BASIC_INFO_CHANGE = cpu_to_le32(0x00008000),
133 USN_REASON_HARD_LINK_CHANGE = const_cpu_to_le32(0x00010000), 133 USN_REASON_HARD_LINK_CHANGE = cpu_to_le32(0x00010000),
134 USN_REASON_COMPRESSION_CHANGE = const_cpu_to_le32(0x00020000), 134 USN_REASON_COMPRESSION_CHANGE = cpu_to_le32(0x00020000),
135 USN_REASON_ENCRYPTION_CHANGE = const_cpu_to_le32(0x00040000), 135 USN_REASON_ENCRYPTION_CHANGE = cpu_to_le32(0x00040000),
136 USN_REASON_OBJECT_ID_CHANGE = const_cpu_to_le32(0x00080000), 136 USN_REASON_OBJECT_ID_CHANGE = cpu_to_le32(0x00080000),
137 USN_REASON_REPARSE_POINT_CHANGE = const_cpu_to_le32(0x00100000), 137 USN_REASON_REPARSE_POINT_CHANGE = cpu_to_le32(0x00100000),
138 USN_REASON_STREAM_CHANGE = const_cpu_to_le32(0x00200000), 138 USN_REASON_STREAM_CHANGE = cpu_to_le32(0x00200000),
139 USN_REASON_CLOSE = const_cpu_to_le32(0x80000000), 139 USN_REASON_CLOSE = cpu_to_le32(0x80000000),
140}; 140};
141 141
142typedef le32 USN_REASON_FLAGS; 142typedef le32 USN_REASON_FLAGS;
@@ -148,9 +148,9 @@ typedef le32 USN_REASON_FLAGS;
148 * http://www.linux-ntfs.org/ 148 * http://www.linux-ntfs.org/
149 */ 149 */
150enum { 150enum {
151 USN_SOURCE_DATA_MANAGEMENT = const_cpu_to_le32(0x00000001), 151 USN_SOURCE_DATA_MANAGEMENT = cpu_to_le32(0x00000001),
152 USN_SOURCE_AUXILIARY_DATA = const_cpu_to_le32(0x00000002), 152 USN_SOURCE_AUXILIARY_DATA = cpu_to_le32(0x00000002),
153 USN_SOURCE_REPLICATION_MANAGEMENT = const_cpu_to_le32(0x00000004), 153 USN_SOURCE_REPLICATION_MANAGEMENT = cpu_to_le32(0x00000004),
154}; 154};
155 155
156typedef le32 USN_SOURCE_INFO_FLAGS; 156typedef le32 USN_SOURCE_INFO_FLAGS;
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index eea1d24713ea..b606496b72ec 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -154,8 +154,9 @@ out:
154 return ret; 154 return ret;
155} 155}
156 156
157static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page) 157static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
158{ 158{
159 struct page *page = vmf->page;
159 struct inode *inode = vma->vm_file->f_path.dentry->d_inode; 160 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
160 struct buffer_head *di_bh = NULL; 161 struct buffer_head *di_bh = NULL;
161 sigset_t blocked, oldset; 162 sigset_t blocked, oldset;
@@ -196,7 +197,8 @@ out:
196 ret2 = ocfs2_vm_op_unblock_sigs(&oldset); 197 ret2 = ocfs2_vm_op_unblock_sigs(&oldset);
197 if (ret2 < 0) 198 if (ret2 < 0)
198 mlog_errno(ret2); 199 mlog_errno(ret2);
199 200 if (ret)
201 ret = VM_FAULT_SIGBUS;
200 return ret; 202 return ret;
201} 203}
202 204
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 5d2989e9dcc1..fa678abc9db1 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -37,7 +37,7 @@ static int proc_match(int len, const char *name, struct proc_dir_entry *de)
37#define PROC_BLOCK_SIZE (PAGE_SIZE - 1024) 37#define PROC_BLOCK_SIZE (PAGE_SIZE - 1024)
38 38
39static ssize_t 39static ssize_t
40proc_file_read(struct file *file, char __user *buf, size_t nbytes, 40__proc_file_read(struct file *file, char __user *buf, size_t nbytes,
41 loff_t *ppos) 41 loff_t *ppos)
42{ 42{
43 struct inode * inode = file->f_path.dentry->d_inode; 43 struct inode * inode = file->f_path.dentry->d_inode;
@@ -183,19 +183,47 @@ proc_file_read(struct file *file, char __user *buf, size_t nbytes,
183} 183}
184 184
185static ssize_t 185static ssize_t
186proc_file_read(struct file *file, char __user *buf, size_t nbytes,
187 loff_t *ppos)
188{
189 struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
190 ssize_t rv = -EIO;
191
192 spin_lock(&pde->pde_unload_lock);
193 if (!pde->proc_fops) {
194 spin_unlock(&pde->pde_unload_lock);
195 return rv;
196 }
197 pde->pde_users++;
198 spin_unlock(&pde->pde_unload_lock);
199
200 rv = __proc_file_read(file, buf, nbytes, ppos);
201
202 pde_users_dec(pde);
203 return rv;
204}
205
206static ssize_t
186proc_file_write(struct file *file, const char __user *buffer, 207proc_file_write(struct file *file, const char __user *buffer,
187 size_t count, loff_t *ppos) 208 size_t count, loff_t *ppos)
188{ 209{
189 struct inode *inode = file->f_path.dentry->d_inode; 210 struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
190 struct proc_dir_entry * dp; 211 ssize_t rv = -EIO;
191 212
192 dp = PDE(inode); 213 if (pde->write_proc) {
193 214 spin_lock(&pde->pde_unload_lock);
194 if (!dp->write_proc) 215 if (!pde->proc_fops) {
195 return -EIO; 216 spin_unlock(&pde->pde_unload_lock);
217 return rv;
218 }
219 pde->pde_users++;
220 spin_unlock(&pde->pde_unload_lock);
196 221
197 /* FIXME: does this routine need ppos? probably... */ 222 /* FIXME: does this routine need ppos? probably... */
198 return dp->write_proc(file, buffer, count, dp->data); 223 rv = pde->write_proc(file, buffer, count, pde->data);
224 pde_users_dec(pde);
225 }
226 return rv;
199} 227}
200 228
201 229
@@ -307,6 +335,21 @@ static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */
307/* 335/*
308 * Return an inode number between PROC_DYNAMIC_FIRST and 336 * Return an inode number between PROC_DYNAMIC_FIRST and
309 * 0xffffffff, or zero on failure. 337 * 0xffffffff, or zero on failure.
338 *
339 * Current inode allocations in the proc-fs (hex-numbers):
340 *
341 * 00000000 reserved
342 * 00000001-00000fff static entries (goners)
343 * 001 root-ino
344 *
345 * 00001000-00001fff unused
346 * 0001xxxx-7fffxxxx pid-dir entries for pid 1-7fff
347 * 80000000-efffffff unused
348 * f0000000-ffffffff dynamic entries
349 *
350 * Goal:
351 * Once we split the thing into several virtual filesystems,
352 * we will get rid of magical ranges (and this comment, BTW).
310 */ 353 */
311static unsigned int get_inode_number(void) 354static unsigned int get_inode_number(void)
312{ 355{
diff --git a/fs/proc/inode-alloc.txt b/fs/proc/inode-alloc.txt
deleted file mode 100644
index 77212f938c2c..000000000000
--- a/fs/proc/inode-alloc.txt
+++ /dev/null
@@ -1,14 +0,0 @@
1Current inode allocations in the proc-fs (hex-numbers):
2
3 00000000 reserved
4 00000001-00000fff static entries (goners)
5 001 root-ino
6
7 00001000-00001fff unused
8 0001xxxx-7fffxxxx pid-dir entries for pid 1-7fff
9 80000000-efffffff unused
10 f0000000-ffffffff dynamic entries
11
12Goal:
13 a) once we'll split the thing into several virtual filesystems we
14 will get rid of magical ranges (and this file, BTW).
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index d8bb5c671f42..d78ade305541 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -58,11 +58,8 @@ static void proc_delete_inode(struct inode *inode)
58 58
59 /* Let go of any associated proc directory entry */ 59 /* Let go of any associated proc directory entry */
60 de = PROC_I(inode)->pde; 60 de = PROC_I(inode)->pde;
61 if (de) { 61 if (de)
62 if (de->owner)
63 module_put(de->owner);
64 de_put(de); 62 de_put(de);
65 }
66 if (PROC_I(inode)->sysctl) 63 if (PROC_I(inode)->sysctl)
67 sysctl_head_put(PROC_I(inode)->sysctl); 64 sysctl_head_put(PROC_I(inode)->sysctl);
68 clear_inode(inode); 65 clear_inode(inode);
@@ -127,7 +124,7 @@ static void __pde_users_dec(struct proc_dir_entry *pde)
127 complete(pde->pde_unload_completion); 124 complete(pde->pde_unload_completion);
128} 125}
129 126
130static void pde_users_dec(struct proc_dir_entry *pde) 127void pde_users_dec(struct proc_dir_entry *pde)
131{ 128{
132 spin_lock(&pde->pde_unload_lock); 129 spin_lock(&pde->pde_unload_lock);
133 __pde_users_dec(pde); 130 __pde_users_dec(pde);
@@ -449,12 +446,9 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino,
449{ 446{
450 struct inode * inode; 447 struct inode * inode;
451 448
452 if (!try_module_get(de->owner))
453 goto out_mod;
454
455 inode = iget_locked(sb, ino); 449 inode = iget_locked(sb, ino);
456 if (!inode) 450 if (!inode)
457 goto out_ino; 451 return NULL;
458 if (inode->i_state & I_NEW) { 452 if (inode->i_state & I_NEW) {
459 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 453 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
460 PROC_I(inode)->fd = 0; 454 PROC_I(inode)->fd = 0;
@@ -485,16 +479,9 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino,
485 } 479 }
486 } 480 }
487 unlock_new_inode(inode); 481 unlock_new_inode(inode);
488 } else { 482 } else
489 module_put(de->owner);
490 de_put(de); 483 de_put(de);
491 }
492 return inode; 484 return inode;
493
494out_ino:
495 module_put(de->owner);
496out_mod:
497 return NULL;
498} 485}
499 486
500int proc_fill_super(struct super_block *s) 487int proc_fill_super(struct super_block *s)
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index cd53ff838498..f6db9618a888 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -91,3 +91,4 @@ struct pde_opener {
91 int (*release)(struct inode *, struct file *); 91 int (*release)(struct inode *, struct file *);
92 struct list_head lh; 92 struct list_head lh;
93}; 93};
94void pde_users_dec(struct proc_dir_entry *pde);
diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c
index d153946d6d15..83adcc869437 100644
--- a/fs/proc/proc_tty.c
+++ b/fs/proc/proc_tty.c
@@ -144,17 +144,12 @@ void proc_tty_register_driver(struct tty_driver *driver)
144{ 144{
145 struct proc_dir_entry *ent; 145 struct proc_dir_entry *ent;
146 146
147 if (!driver->ops->read_proc || !driver->driver_name || 147 if (!driver->driver_name || driver->proc_entry ||
148 driver->proc_entry) 148 !driver->ops->proc_fops)
149 return; 149 return;
150 150
151 ent = create_proc_entry(driver->driver_name, 0, proc_tty_driver); 151 ent = proc_create_data(driver->driver_name, 0, proc_tty_driver,
152 if (!ent) 152 driver->ops->proc_fops, driver);
153 return;
154 ent->read_proc = driver->ops->read_proc;
155 ent->owner = driver->owner;
156 ent->data = driver;
157
158 driver->proc_entry = ent; 153 driver->proc_entry = ent;
159} 154}
160 155
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 94063840832a..b0ae0be4801f 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -693,8 +693,8 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
693 goto out_pages; 693 goto out_pages;
694 } 694 }
695 695
696 pm.out = (u64 *)buf; 696 pm.out = (u64 __user *)buf;
697 pm.end = (u64 *)(buf + count); 697 pm.end = (u64 __user *)(buf + count);
698 698
699 pagemap_walk.pmd_entry = pagemap_pte_range; 699 pagemap_walk.pmd_entry = pagemap_pte_range;
700 pagemap_walk.pte_hole = pagemap_pte_hole; 700 pagemap_walk.pte_hole = pagemap_pte_hole;
@@ -720,9 +720,9 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
720 if (ret == PM_END_OF_BUFFER) 720 if (ret == PM_END_OF_BUFFER)
721 ret = 0; 721 ret = 0;
722 /* don't need mmap_sem for these, but this looks cleaner */ 722 /* don't need mmap_sem for these, but this looks cleaner */
723 *ppos += (char *)pm.out - buf; 723 *ppos += (char __user *)pm.out - buf;
724 if (!ret) 724 if (!ret)
725 ret = (char *)pm.out - buf; 725 ret = (char __user *)pm.out - buf;
726 726
727out_pages: 727out_pages:
728 for (; pagecount; pagecount--) { 728 for (; pagecount; pagecount--) {
diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c
index df26aa88fa47..0c10a0b3f146 100644
--- a/fs/proc/uptime.c
+++ b/fs/proc/uptime.c
@@ -1,45 +1,43 @@
1#include <linux/fs.h>
1#include <linux/init.h> 2#include <linux/init.h>
2#include <linux/proc_fs.h> 3#include <linux/proc_fs.h>
3#include <linux/sched.h> 4#include <linux/sched.h>
5#include <linux/seq_file.h>
4#include <linux/time.h> 6#include <linux/time.h>
5#include <asm/cputime.h> 7#include <asm/cputime.h>
6 8
7static int proc_calc_metrics(char *page, char **start, off_t off, 9static int uptime_proc_show(struct seq_file *m, void *v)
8 int count, int *eof, int len)
9{
10 if (len <= off + count)
11 *eof = 1;
12 *start = page + off;
13 len -= off;
14 if (len > count)
15 len = count;
16 if (len < 0)
17 len = 0;
18 return len;
19}
20
21static int uptime_read_proc(char *page, char **start, off_t off, int count,
22 int *eof, void *data)
23{ 10{
24 struct timespec uptime; 11 struct timespec uptime;
25 struct timespec idle; 12 struct timespec idle;
26 int len;
27 cputime_t idletime = cputime_add(init_task.utime, init_task.stime); 13 cputime_t idletime = cputime_add(init_task.utime, init_task.stime);
28 14
29 do_posix_clock_monotonic_gettime(&uptime); 15 do_posix_clock_monotonic_gettime(&uptime);
30 monotonic_to_bootbased(&uptime); 16 monotonic_to_bootbased(&uptime);
31 cputime_to_timespec(idletime, &idle); 17 cputime_to_timespec(idletime, &idle);
32 len = sprintf(page, "%lu.%02lu %lu.%02lu\n", 18 seq_printf(m, "%lu.%02lu %lu.%02lu\n",
33 (unsigned long) uptime.tv_sec, 19 (unsigned long) uptime.tv_sec,
34 (uptime.tv_nsec / (NSEC_PER_SEC / 100)), 20 (uptime.tv_nsec / (NSEC_PER_SEC / 100)),
35 (unsigned long) idle.tv_sec, 21 (unsigned long) idle.tv_sec,
36 (idle.tv_nsec / (NSEC_PER_SEC / 100))); 22 (idle.tv_nsec / (NSEC_PER_SEC / 100)));
37 return proc_calc_metrics(page, start, off, count, eof, len); 23 return 0;
38} 24}
39 25
26static int uptime_proc_open(struct inode *inode, struct file *file)
27{
28 return single_open(file, uptime_proc_show, NULL);
29}
30
31static const struct file_operations uptime_proc_fops = {
32 .open = uptime_proc_open,
33 .read = seq_read,
34 .llseek = seq_lseek,
35 .release = single_release,
36};
37
40static int __init proc_uptime_init(void) 38static int __init proc_uptime_init(void)
41{ 39{
42 create_proc_read_entry("uptime", 0, NULL, uptime_read_proc, NULL); 40 proc_create("uptime", 0, NULL, &uptime_proc_fops);
43 return 0; 41 return 0;
44} 42}
45module_init(proc_uptime_init); 43module_init(proc_uptime_init);
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 995ef1d6686c..ebb2c417912c 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -59,7 +59,6 @@ const struct inode_operations ramfs_file_inode_operations = {
59 */ 59 */
60int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize) 60int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
61{ 61{
62 struct pagevec lru_pvec;
63 unsigned long npages, xpages, loop, limit; 62 unsigned long npages, xpages, loop, limit;
64 struct page *pages; 63 struct page *pages;
65 unsigned order; 64 unsigned order;
@@ -102,24 +101,20 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
102 memset(data, 0, newsize); 101 memset(data, 0, newsize);
103 102
104 /* attach all the pages to the inode's address space */ 103 /* attach all the pages to the inode's address space */
105 pagevec_init(&lru_pvec, 0);
106 for (loop = 0; loop < npages; loop++) { 104 for (loop = 0; loop < npages; loop++) {
107 struct page *page = pages + loop; 105 struct page *page = pages + loop;
108 106
109 ret = add_to_page_cache(page, inode->i_mapping, loop, GFP_KERNEL); 107 ret = add_to_page_cache_lru(page, inode->i_mapping, loop,
108 GFP_KERNEL);
110 if (ret < 0) 109 if (ret < 0)
111 goto add_error; 110 goto add_error;
112 111
113 if (!pagevec_add(&lru_pvec, page))
114 __pagevec_lru_add_file(&lru_pvec);
115
116 /* prevent the page from being discarded on memory pressure */ 112 /* prevent the page from being discarded on memory pressure */
117 SetPageDirty(page); 113 SetPageDirty(page);
118 114
119 unlock_page(page); 115 unlock_page(page);
120 } 116 }
121 117
122 pagevec_lru_add_file(&lru_pvec);
123 return 0; 118 return 0;
124 119
125 fsize_exceeded: 120 fsize_exceeded:
@@ -128,10 +123,8 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
128 return -EFBIG; 123 return -EFBIG;
129 124
130 add_error: 125 add_error:
131 pagevec_lru_add_file(&lru_pvec); 126 while (loop < npages)
132 page_cache_release(pages + loop); 127 __free_page(pages + loop++);
133 for (loop++; loop < npages; loop++)
134 __free_page(pages + loop);
135 return ret; 128 return ret;
136} 129}
137 130
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index b7e6ac706b87..a404fb88e456 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -33,12 +33,15 @@
33#include <linux/backing-dev.h> 33#include <linux/backing-dev.h>
34#include <linux/ramfs.h> 34#include <linux/ramfs.h>
35#include <linux/sched.h> 35#include <linux/sched.h>
36#include <linux/parser.h>
36#include <asm/uaccess.h> 37#include <asm/uaccess.h>
37#include "internal.h" 38#include "internal.h"
38 39
39/* some random number */ 40/* some random number */
40#define RAMFS_MAGIC 0x858458f6 41#define RAMFS_MAGIC 0x858458f6
41 42
43#define RAMFS_DEFAULT_MODE 0755
44
42static const struct super_operations ramfs_ops; 45static const struct super_operations ramfs_ops;
43static const struct inode_operations ramfs_dir_inode_operations; 46static const struct inode_operations ramfs_dir_inode_operations;
44 47
@@ -158,12 +161,75 @@ static const struct inode_operations ramfs_dir_inode_operations = {
158static const struct super_operations ramfs_ops = { 161static const struct super_operations ramfs_ops = {
159 .statfs = simple_statfs, 162 .statfs = simple_statfs,
160 .drop_inode = generic_delete_inode, 163 .drop_inode = generic_delete_inode,
164 .show_options = generic_show_options,
165};
166
167struct ramfs_mount_opts {
168 umode_t mode;
169};
170
171enum {
172 Opt_mode,
173 Opt_err
174};
175
176static const match_table_t tokens = {
177 {Opt_mode, "mode=%o"},
178 {Opt_err, NULL}
179};
180
181struct ramfs_fs_info {
182 struct ramfs_mount_opts mount_opts;
161}; 183};
162 184
185static int ramfs_parse_options(char *data, struct ramfs_mount_opts *opts)
186{
187 substring_t args[MAX_OPT_ARGS];
188 int option;
189 int token;
190 char *p;
191
192 opts->mode = RAMFS_DEFAULT_MODE;
193
194 while ((p = strsep(&data, ",")) != NULL) {
195 if (!*p)
196 continue;
197
198 token = match_token(p, tokens, args);
199 switch (token) {
200 case Opt_mode:
201 if (match_octal(&args[0], &option))
202 return -EINVAL;
203 opts->mode = option & S_IALLUGO;
204 break;
205 default:
206 printk(KERN_ERR "ramfs: bad mount option: %s\n", p);
207 return -EINVAL;
208 }
209 }
210
211 return 0;
212}
213
163static int ramfs_fill_super(struct super_block * sb, void * data, int silent) 214static int ramfs_fill_super(struct super_block * sb, void * data, int silent)
164{ 215{
165 struct inode * inode; 216 struct ramfs_fs_info *fsi;
166 struct dentry * root; 217 struct inode *inode = NULL;
218 struct dentry *root;
219 int err;
220
221 save_mount_options(sb, data);
222
223 fsi = kzalloc(sizeof(struct ramfs_fs_info), GFP_KERNEL);
224 if (!fsi) {
225 err = -ENOMEM;
226 goto fail;
227 }
228 sb->s_fs_info = fsi;
229
230 err = ramfs_parse_options(data, &fsi->mount_opts);
231 if (err)
232 goto fail;
167 233
168 sb->s_maxbytes = MAX_LFS_FILESIZE; 234 sb->s_maxbytes = MAX_LFS_FILESIZE;
169 sb->s_blocksize = PAGE_CACHE_SIZE; 235 sb->s_blocksize = PAGE_CACHE_SIZE;
@@ -171,17 +237,23 @@ static int ramfs_fill_super(struct super_block * sb, void * data, int silent)
171 sb->s_magic = RAMFS_MAGIC; 237 sb->s_magic = RAMFS_MAGIC;
172 sb->s_op = &ramfs_ops; 238 sb->s_op = &ramfs_ops;
173 sb->s_time_gran = 1; 239 sb->s_time_gran = 1;
174 inode = ramfs_get_inode(sb, S_IFDIR | 0755, 0); 240 inode = ramfs_get_inode(sb, S_IFDIR | fsi->mount_opts.mode, 0);
175 if (!inode) 241 if (!inode) {
176 return -ENOMEM; 242 err = -ENOMEM;
243 goto fail;
244 }
177 245
178 root = d_alloc_root(inode); 246 root = d_alloc_root(inode);
179 if (!root) { 247 if (!root) {
180 iput(inode); 248 err = -ENOMEM;
181 return -ENOMEM; 249 goto fail;
182 } 250 }
183 sb->s_root = root; 251 sb->s_root = root;
184 return 0; 252 return 0;
253fail:
254 kfree(fsi);
255 iput(inode);
256 return err;
185} 257}
186 258
187int ramfs_get_sb(struct file_system_type *fs_type, 259int ramfs_get_sb(struct file_system_type *fs_type,
@@ -197,10 +269,16 @@ static int rootfs_get_sb(struct file_system_type *fs_type,
197 mnt); 269 mnt);
198} 270}
199 271
272static void ramfs_kill_sb(struct super_block *sb)
273{
274 kfree(sb->s_fs_info);
275 kill_litter_super(sb);
276}
277
200static struct file_system_type ramfs_fs_type = { 278static struct file_system_type ramfs_fs_type = {
201 .name = "ramfs", 279 .name = "ramfs",
202 .get_sb = ramfs_get_sb, 280 .get_sb = ramfs_get_sb,
203 .kill_sb = kill_litter_super, 281 .kill_sb = ramfs_kill_sb,
204}; 282};
205static struct file_system_type rootfs_fs_type = { 283static struct file_system_type rootfs_fs_type = {
206 .name = "rootfs", 284 .name = "rootfs",
diff --git a/fs/reiserfs/Makefile b/fs/reiserfs/Makefile
index 0eb7ac080484..7c5ab6330dd6 100644
--- a/fs/reiserfs/Makefile
+++ b/fs/reiserfs/Makefile
@@ -7,10 +7,10 @@ obj-$(CONFIG_REISERFS_FS) += reiserfs.o
7reiserfs-objs := bitmap.o do_balan.o namei.o inode.o file.o dir.o fix_node.o \ 7reiserfs-objs := bitmap.o do_balan.o namei.o inode.o file.o dir.o fix_node.o \
8 super.o prints.o objectid.o lbalance.o ibalance.o stree.o \ 8 super.o prints.o objectid.o lbalance.o ibalance.o stree.o \
9 hashes.o tail_conversion.o journal.o resize.o \ 9 hashes.o tail_conversion.o journal.o resize.o \
10 item_ops.o ioctl.o procfs.o 10 item_ops.o ioctl.o procfs.o xattr.o
11 11
12ifeq ($(CONFIG_REISERFS_FS_XATTR),y) 12ifeq ($(CONFIG_REISERFS_FS_XATTR),y)
13reiserfs-objs += xattr.o xattr_user.o xattr_trusted.o 13reiserfs-objs += xattr_user.o xattr_trusted.o
14endif 14endif
15 15
16ifeq ($(CONFIG_REISERFS_FS_SECURITY),y) 16ifeq ($(CONFIG_REISERFS_FS_SECURITY),y)
diff --git a/fs/reiserfs/README b/fs/reiserfs/README
index 90e1670e4e6f..14e8c9d460e5 100644
--- a/fs/reiserfs/README
+++ b/fs/reiserfs/README
@@ -1,4 +1,4 @@
1[LICENSING] 1[LICENSING]
2 2
3ReiserFS is hereby licensed under the GNU General 3ReiserFS is hereby licensed under the GNU General
4Public License version 2. 4Public License version 2.
@@ -31,7 +31,7 @@ the GPL as not allowing those additional licensing options, you read
31it wrongly, and Richard Stallman agrees with me, when carefully read 31it wrongly, and Richard Stallman agrees with me, when carefully read
32you can see that those restrictions on additional terms do not apply 32you can see that those restrictions on additional terms do not apply
33to the owner of the copyright, and my interpretation of this shall 33to the owner of the copyright, and my interpretation of this shall
34govern for this license. 34govern for this license.
35 35
36Finally, nothing in this license shall be interpreted to allow you to 36Finally, nothing in this license shall be interpreted to allow you to
37fail to fairly credit me, or to remove my credits, without my 37fail to fairly credit me, or to remove my credits, without my
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index f32d1425cc9f..e716161ab325 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -40,8 +40,8 @@
40 40
41#define SET_OPTION(optname) \ 41#define SET_OPTION(optname) \
42 do { \ 42 do { \
43 reiserfs_warning(s, "reiserfs: option \"%s\" is set", #optname); \ 43 reiserfs_info(s, "block allocator option \"%s\" is set", #optname); \
44 set_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s)); \ 44 set_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s)); \
45 } while(0) 45 } while(0)
46#define TEST_OPTION(optname, s) \ 46#define TEST_OPTION(optname, s) \
47 test_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s)) 47 test_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s))
@@ -64,9 +64,9 @@ int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value)
64 unsigned int bmap_count = reiserfs_bmap_count(s); 64 unsigned int bmap_count = reiserfs_bmap_count(s);
65 65
66 if (block == 0 || block >= SB_BLOCK_COUNT(s)) { 66 if (block == 0 || block >= SB_BLOCK_COUNT(s)) {
67 reiserfs_warning(s, 67 reiserfs_error(s, "vs-4010",
68 "vs-4010: is_reusable: block number is out of range %lu (%u)", 68 "block number is out of range %lu (%u)",
69 block, SB_BLOCK_COUNT(s)); 69 block, SB_BLOCK_COUNT(s));
70 return 0; 70 return 0;
71 } 71 }
72 72
@@ -79,31 +79,30 @@ int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value)
79 b_blocknr_t bmap1 = REISERFS_SB(s)->s_sbh->b_blocknr + 1; 79 b_blocknr_t bmap1 = REISERFS_SB(s)->s_sbh->b_blocknr + 1;
80 if (block >= bmap1 && 80 if (block >= bmap1 &&
81 block <= bmap1 + bmap_count) { 81 block <= bmap1 + bmap_count) {
82 reiserfs_warning(s, "vs: 4019: is_reusable: " 82 reiserfs_error(s, "vs-4019", "bitmap block %lu(%u) "
83 "bitmap block %lu(%u) can't be freed or reused", 83 "can't be freed or reused",
84 block, bmap_count); 84 block, bmap_count);
85 return 0; 85 return 0;
86 } 86 }
87 } else { 87 } else {
88 if (offset == 0) { 88 if (offset == 0) {
89 reiserfs_warning(s, "vs: 4020: is_reusable: " 89 reiserfs_error(s, "vs-4020", "bitmap block %lu(%u) "
90 "bitmap block %lu(%u) can't be freed or reused", 90 "can't be freed or reused",
91 block, bmap_count); 91 block, bmap_count);
92 return 0; 92 return 0;
93 } 93 }
94 } 94 }
95 95
96 if (bmap >= bmap_count) { 96 if (bmap >= bmap_count) {
97 reiserfs_warning(s, 97 reiserfs_error(s, "vs-4030", "bitmap for requested block "
98 "vs-4030: is_reusable: there is no so many bitmap blocks: " 98 "is out of range: block=%lu, bitmap_nr=%u",
99 "block=%lu, bitmap_nr=%u", block, bmap); 99 block, bmap);
100 return 0; 100 return 0;
101 } 101 }
102 102
103 if (bit_value == 0 && block == SB_ROOT_BLOCK(s)) { 103 if (bit_value == 0 && block == SB_ROOT_BLOCK(s)) {
104 reiserfs_warning(s, 104 reiserfs_error(s, "vs-4050", "this is root block (%u), "
105 "vs-4050: is_reusable: this is root block (%u), " 105 "it must be busy", SB_ROOT_BLOCK(s));
106 "it must be busy", SB_ROOT_BLOCK(s));
107 return 0; 106 return 0;
108 } 107 }
109 108
@@ -154,8 +153,8 @@ static int scan_bitmap_block(struct reiserfs_transaction_handle *th,
154/* - I mean `a window of zero bits' as in description of this function - Zam. */ 153/* - I mean `a window of zero bits' as in description of this function - Zam. */
155 154
156 if (!bi) { 155 if (!bi) {
157 reiserfs_warning(s, "NULL bitmap info pointer for bitmap %d", 156 reiserfs_error(s, "jdm-4055", "NULL bitmap info pointer "
158 bmap_n); 157 "for bitmap %d", bmap_n);
159 return 0; 158 return 0;
160 } 159 }
161 160
@@ -400,11 +399,8 @@ static void _reiserfs_free_block(struct reiserfs_transaction_handle *th,
400 get_bit_address(s, block, &nr, &offset); 399 get_bit_address(s, block, &nr, &offset);
401 400
402 if (nr >= reiserfs_bmap_count(s)) { 401 if (nr >= reiserfs_bmap_count(s)) {
403 reiserfs_warning(s, "vs-4075: reiserfs_free_block: " 402 reiserfs_error(s, "vs-4075", "block %lu is out of range",
404 "block %lu is out of range on %s " 403 block);
405 "(nr=%u,max=%u)", block,
406 reiserfs_bdevname(s), nr,
407 reiserfs_bmap_count(s));
408 return; 404 return;
409 } 405 }
410 406
@@ -416,9 +412,8 @@ static void _reiserfs_free_block(struct reiserfs_transaction_handle *th,
416 412
417 /* clear bit for the given block in bit map */ 413 /* clear bit for the given block in bit map */
418 if (!reiserfs_test_and_clear_le_bit(offset, bmbh->b_data)) { 414 if (!reiserfs_test_and_clear_le_bit(offset, bmbh->b_data)) {
419 reiserfs_warning(s, "vs-4080: reiserfs_free_block: " 415 reiserfs_error(s, "vs-4080",
420 "free_block (%s:%lu)[dev:blocknr]: bit already cleared", 416 "block %lu: bit already cleared", block);
421 reiserfs_bdevname(s), block);
422 } 417 }
423 apbi[nr].free_count++; 418 apbi[nr].free_count++;
424 journal_mark_dirty(th, s, bmbh); 419 journal_mark_dirty(th, s, bmbh);
@@ -445,7 +440,7 @@ void reiserfs_free_block(struct reiserfs_transaction_handle *th,
445 return; 440 return;
446 441
447 if (block > sb_block_count(REISERFS_SB(s)->s_rs)) { 442 if (block > sb_block_count(REISERFS_SB(s)->s_rs)) {
448 reiserfs_panic(th->t_super, "bitmap-4072", 443 reiserfs_error(th->t_super, "bitmap-4072",
449 "Trying to free block outside file system " 444 "Trying to free block outside file system "
450 "boundaries (%lu > %lu)", 445 "boundaries (%lu > %lu)",
451 block, sb_block_count(REISERFS_SB(s)->s_rs)); 446 block, sb_block_count(REISERFS_SB(s)->s_rs));
@@ -477,9 +472,8 @@ static void __discard_prealloc(struct reiserfs_transaction_handle *th,
477 BUG_ON(!th->t_trans_id); 472 BUG_ON(!th->t_trans_id);
478#ifdef CONFIG_REISERFS_CHECK 473#ifdef CONFIG_REISERFS_CHECK
479 if (ei->i_prealloc_count < 0) 474 if (ei->i_prealloc_count < 0)
480 reiserfs_warning(th->t_super, 475 reiserfs_error(th->t_super, "zam-4001",
481 "zam-4001:%s: inode has negative prealloc blocks count.", 476 "inode has negative prealloc blocks count.");
482 __func__);
483#endif 477#endif
484 while (ei->i_prealloc_count > 0) { 478 while (ei->i_prealloc_count > 0) {
485 reiserfs_free_prealloc_block(th, inode, ei->i_prealloc_block); 479 reiserfs_free_prealloc_block(th, inode, ei->i_prealloc_block);
@@ -515,9 +509,9 @@ void reiserfs_discard_all_prealloc(struct reiserfs_transaction_handle *th)
515 i_prealloc_list); 509 i_prealloc_list);
516#ifdef CONFIG_REISERFS_CHECK 510#ifdef CONFIG_REISERFS_CHECK
517 if (!ei->i_prealloc_count) { 511 if (!ei->i_prealloc_count) {
518 reiserfs_warning(th->t_super, 512 reiserfs_error(th->t_super, "zam-4001",
519 "zam-4001:%s: inode is in prealloc list but has no preallocated blocks.", 513 "inode is in prealloc list but has "
520 __func__); 514 "no preallocated blocks.");
521 } 515 }
522#endif 516#endif
523 __discard_prealloc(th, ei); 517 __discard_prealloc(th, ei);
@@ -631,12 +625,12 @@ int reiserfs_parse_alloc_options(struct super_block *s, char *options)
631 continue; 625 continue;
632 } 626 }
633 627
634 reiserfs_warning(s, "zam-4001: %s : unknown option - %s", 628 reiserfs_warning(s, "zam-4001", "unknown option - %s",
635 __func__, this_char); 629 this_char);
636 return 1; 630 return 1;
637 } 631 }
638 632
639 reiserfs_warning(s, "allocator options = [%08x]\n", SB_ALLOC_OPTS(s)); 633 reiserfs_info(s, "allocator options = [%08x]\n", SB_ALLOC_OPTS(s));
640 return 0; 634 return 0;
641} 635}
642 636
@@ -1221,7 +1215,9 @@ void reiserfs_cache_bitmap_metadata(struct super_block *sb,
1221 unsigned long *cur = (unsigned long *)(bh->b_data + bh->b_size); 1215 unsigned long *cur = (unsigned long *)(bh->b_data + bh->b_size);
1222 1216
1223 /* The first bit must ALWAYS be 1 */ 1217 /* The first bit must ALWAYS be 1 */
1224 BUG_ON(!reiserfs_test_le_bit(0, (unsigned long *)bh->b_data)); 1218 if (!reiserfs_test_le_bit(0, (unsigned long *)bh->b_data))
1219 reiserfs_error(sb, "reiserfs-2025", "bitmap block %lu is "
1220 "corrupted: first bit must be 1", bh->b_blocknr);
1225 1221
1226 info->free_count = 0; 1222 info->free_count = 0;
1227 1223
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index e6b03d2020c1..67a80d7e59e2 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -41,10 +41,10 @@ static int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry,
41 41
42#define store_ih(where,what) copy_item_head (where, what) 42#define store_ih(where,what) copy_item_head (where, what)
43 43
44// 44int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent,
45static int reiserfs_readdir(struct file *filp, void *dirent, filldir_t filldir) 45 filldir_t filldir, loff_t *pos)
46{ 46{
47 struct inode *inode = filp->f_path.dentry->d_inode; 47 struct inode *inode = dentry->d_inode;
48 struct cpu_key pos_key; /* key of current position in the directory (key of directory entry) */ 48 struct cpu_key pos_key; /* key of current position in the directory (key of directory entry) */
49 INITIALIZE_PATH(path_to_entry); 49 INITIALIZE_PATH(path_to_entry);
50 struct buffer_head *bh; 50 struct buffer_head *bh;
@@ -64,13 +64,9 @@ static int reiserfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
64 64
65 /* form key for search the next directory entry using f_pos field of 65 /* form key for search the next directory entry using f_pos field of
66 file structure */ 66 file structure */
67 make_cpu_key(&pos_key, inode, 67 make_cpu_key(&pos_key, inode, *pos ?: DOT_OFFSET, TYPE_DIRENTRY, 3);
68 (filp->f_pos) ? (filp->f_pos) : DOT_OFFSET, TYPE_DIRENTRY,
69 3);
70 next_pos = cpu_key_k_offset(&pos_key); 68 next_pos = cpu_key_k_offset(&pos_key);
71 69
72 /* reiserfs_warning (inode->i_sb, "reiserfs_readdir 1: f_pos = %Ld", filp->f_pos); */
73
74 path_to_entry.reada = PATH_READA; 70 path_to_entry.reada = PATH_READA;
75 while (1) { 71 while (1) {
76 research: 72 research:
@@ -144,7 +140,7 @@ static int reiserfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
144 /* Ignore the .reiserfs_priv entry */ 140 /* Ignore the .reiserfs_priv entry */
145 if (reiserfs_xattrs(inode->i_sb) && 141 if (reiserfs_xattrs(inode->i_sb) &&
146 !old_format_only(inode->i_sb) && 142 !old_format_only(inode->i_sb) &&
147 filp->f_path.dentry == inode->i_sb->s_root && 143 dentry == inode->i_sb->s_root &&
148 REISERFS_SB(inode->i_sb)->priv_root && 144 REISERFS_SB(inode->i_sb)->priv_root &&
149 REISERFS_SB(inode->i_sb)->priv_root->d_inode 145 REISERFS_SB(inode->i_sb)->priv_root->d_inode
150 && deh_objectid(deh) == 146 && deh_objectid(deh) ==
@@ -156,7 +152,7 @@ static int reiserfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
156 } 152 }
157 153
158 d_off = deh_offset(deh); 154 d_off = deh_offset(deh);
159 filp->f_pos = d_off; 155 *pos = d_off;
160 d_ino = deh_objectid(deh); 156 d_ino = deh_objectid(deh);
161 if (d_reclen <= 32) { 157 if (d_reclen <= 32) {
162 local_buf = small_buf; 158 local_buf = small_buf;
@@ -223,15 +219,21 @@ static int reiserfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
223 219
224 } /* while */ 220 } /* while */
225 221
226 end: 222end:
227 filp->f_pos = next_pos; 223 *pos = next_pos;
228 pathrelse(&path_to_entry); 224 pathrelse(&path_to_entry);
229 reiserfs_check_path(&path_to_entry); 225 reiserfs_check_path(&path_to_entry);
230 out: 226out:
231 reiserfs_write_unlock(inode->i_sb); 227 reiserfs_write_unlock(inode->i_sb);
232 return ret; 228 return ret;
233} 229}
234 230
231static int reiserfs_readdir(struct file *file, void *dirent, filldir_t filldir)
232{
233 struct dentry *dentry = file->f_path.dentry;
234 return reiserfs_readdir_dentry(dentry, dirent, filldir, &file->f_pos);
235}
236
235/* compose directory item containing "." and ".." entries (entries are 237/* compose directory item containing "." and ".." entries (entries are
236 not aligned to 4 byte boundary) */ 238 not aligned to 4 byte boundary) */
237/* the last four params are LE */ 239/* the last four params are LE */
diff --git a/fs/reiserfs/do_balan.c b/fs/reiserfs/do_balan.c
index 2f87f5b14630..4beb964a2a3e 100644
--- a/fs/reiserfs/do_balan.c
+++ b/fs/reiserfs/do_balan.c
@@ -29,6 +29,43 @@ struct tree_balance *cur_tb = NULL; /* detects whether more than one
29 is interrupting do_balance */ 29 is interrupting do_balance */
30#endif 30#endif
31 31
32static inline void buffer_info_init_left(struct tree_balance *tb,
33 struct buffer_info *bi)
34{
35 bi->tb = tb;
36 bi->bi_bh = tb->L[0];
37 bi->bi_parent = tb->FL[0];
38 bi->bi_position = get_left_neighbor_position(tb, 0);
39}
40
41static inline void buffer_info_init_right(struct tree_balance *tb,
42 struct buffer_info *bi)
43{
44 bi->tb = tb;
45 bi->bi_bh = tb->R[0];
46 bi->bi_parent = tb->FR[0];
47 bi->bi_position = get_right_neighbor_position(tb, 0);
48}
49
50static inline void buffer_info_init_tbS0(struct tree_balance *tb,
51 struct buffer_info *bi)
52{
53 bi->tb = tb;
54 bi->bi_bh = PATH_PLAST_BUFFER(tb->tb_path);
55 bi->bi_parent = PATH_H_PPARENT(tb->tb_path, 0);
56 bi->bi_position = PATH_H_POSITION(tb->tb_path, 1);
57}
58
59static inline void buffer_info_init_bh(struct tree_balance *tb,
60 struct buffer_info *bi,
61 struct buffer_head *bh)
62{
63 bi->tb = tb;
64 bi->bi_bh = bh;
65 bi->bi_parent = NULL;
66 bi->bi_position = 0;
67}
68
32inline void do_balance_mark_leaf_dirty(struct tree_balance *tb, 69inline void do_balance_mark_leaf_dirty(struct tree_balance *tb,
33 struct buffer_head *bh, int flag) 70 struct buffer_head *bh, int flag)
34{ 71{
@@ -39,21 +76,21 @@ inline void do_balance_mark_leaf_dirty(struct tree_balance *tb,
39#define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty 76#define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty
40#define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty 77#define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty
41 78
42/* summary: 79/* summary:
43 if deleting something ( tb->insert_size[0] < 0 ) 80 if deleting something ( tb->insert_size[0] < 0 )
44 return(balance_leaf_when_delete()); (flag d handled here) 81 return(balance_leaf_when_delete()); (flag d handled here)
45 else 82 else
46 if lnum is larger than 0 we put items into the left node 83 if lnum is larger than 0 we put items into the left node
47 if rnum is larger than 0 we put items into the right node 84 if rnum is larger than 0 we put items into the right node
48 if snum1 is larger than 0 we put items into the new node s1 85 if snum1 is larger than 0 we put items into the new node s1
49 if snum2 is larger than 0 we put items into the new node s2 86 if snum2 is larger than 0 we put items into the new node s2
50Note that all *num* count new items being created. 87Note that all *num* count new items being created.
51 88
52It would be easier to read balance_leaf() if each of these summary 89It would be easier to read balance_leaf() if each of these summary
53lines was a separate procedure rather than being inlined. I think 90lines was a separate procedure rather than being inlined. I think
54that there are many passages here and in balance_leaf_when_delete() in 91that there are many passages here and in balance_leaf_when_delete() in
55which two calls to one procedure can replace two passages, and it 92which two calls to one procedure can replace two passages, and it
56might save cache space and improve software maintenance costs to do so. 93might save cache space and improve software maintenance costs to do so.
57 94
58Vladimir made the perceptive comment that we should offload most of 95Vladimir made the perceptive comment that we should offload most of
59the decision making in this function into fix_nodes/check_balance, and 96the decision making in this function into fix_nodes/check_balance, and
@@ -86,6 +123,7 @@ static int balance_leaf_when_delete(struct tree_balance *tb, int flag)
86 "PAP-12010: tree can not be empty"); 123 "PAP-12010: tree can not be empty");
87 124
88 ih = B_N_PITEM_HEAD(tbS0, item_pos); 125 ih = B_N_PITEM_HEAD(tbS0, item_pos);
126 buffer_info_init_tbS0(tb, &bi);
89 127
90 /* Delete or truncate the item */ 128 /* Delete or truncate the item */
91 129
@@ -96,10 +134,6 @@ static int balance_leaf_when_delete(struct tree_balance *tb, int flag)
96 "vs-12013: mode Delete, insert size %d, ih to be deleted %h", 134 "vs-12013: mode Delete, insert size %d, ih to be deleted %h",
97 -tb->insert_size[0], ih); 135 -tb->insert_size[0], ih);
98 136
99 bi.tb = tb;
100 bi.bi_bh = tbS0;
101 bi.bi_parent = PATH_H_PPARENT(tb->tb_path, 0);
102 bi.bi_position = PATH_H_POSITION(tb->tb_path, 1);
103 leaf_delete_items(&bi, 0, item_pos, 1, -1); 137 leaf_delete_items(&bi, 0, item_pos, 1, -1);
104 138
105 if (!item_pos && tb->CFL[0]) { 139 if (!item_pos && tb->CFL[0]) {
@@ -121,10 +155,6 @@ static int balance_leaf_when_delete(struct tree_balance *tb, int flag)
121 break; 155 break;
122 156
123 case M_CUT:{ /* cut item in S[0] */ 157 case M_CUT:{ /* cut item in S[0] */
124 bi.tb = tb;
125 bi.bi_bh = tbS0;
126 bi.bi_parent = PATH_H_PPARENT(tb->tb_path, 0);
127 bi.bi_position = PATH_H_POSITION(tb->tb_path, 1);
128 if (is_direntry_le_ih(ih)) { 158 if (is_direntry_le_ih(ih)) {
129 159
130 /* UFS unlink semantics are such that you can only delete one directory entry at a time. */ 160 /* UFS unlink semantics are such that you can only delete one directory entry at a time. */
@@ -153,8 +183,8 @@ static int balance_leaf_when_delete(struct tree_balance *tb, int flag)
153 183
154 default: 184 default:
155 print_cur_tb("12040"); 185 print_cur_tb("12040");
156 reiserfs_panic(tb->tb_sb, 186 reiserfs_panic(tb->tb_sb, "PAP-12040",
157 "PAP-12040: balance_leaf_when_delete: unexpectable mode: %s(%d)", 187 "unexpected mode: %s(%d)",
158 (flag == 188 (flag ==
159 M_PASTE) ? "PASTE" : ((flag == 189 M_PASTE) ? "PASTE" : ((flag ==
160 M_INSERT) ? "INSERT" : 190 M_INSERT) ? "INSERT" :
@@ -258,15 +288,15 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h
258 ) 288 )
259{ 289{
260 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); 290 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
261 int item_pos = PATH_LAST_POSITION(tb->tb_path); /* index into the array of item headers in S[0] 291 int item_pos = PATH_LAST_POSITION(tb->tb_path); /* index into the array of item headers in S[0]
262 of the affected item */ 292 of the affected item */
263 struct buffer_info bi; 293 struct buffer_info bi;
264 struct buffer_head *S_new[2]; /* new nodes allocated to hold what could not fit into S */ 294 struct buffer_head *S_new[2]; /* new nodes allocated to hold what could not fit into S */
265 int snum[2]; /* number of items that will be placed 295 int snum[2]; /* number of items that will be placed
266 into S_new (includes partially shifted 296 into S_new (includes partially shifted
267 items) */ 297 items) */
268 int sbytes[2]; /* if an item is partially shifted into S_new then 298 int sbytes[2]; /* if an item is partially shifted into S_new then
269 if it is a directory item 299 if it is a directory item
270 it is the number of entries from the item that are shifted into S_new 300 it is the number of entries from the item that are shifted into S_new
271 else 301 else
272 it is the number of bytes from the item that are shifted into S_new 302 it is the number of bytes from the item that are shifted into S_new
@@ -325,11 +355,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h
325 ih_item_len(ih)); 355 ih_item_len(ih));
326 356
327 /* Insert new item into L[0] */ 357 /* Insert new item into L[0] */
328 bi.tb = tb; 358 buffer_info_init_left(tb, &bi);
329 bi.bi_bh = tb->L[0];
330 bi.bi_parent = tb->FL[0];
331 bi.bi_position =
332 get_left_neighbor_position(tb, 0);
333 leaf_insert_into_buf(&bi, 359 leaf_insert_into_buf(&bi,
334 n + item_pos - 360 n + item_pos -
335 ret_val, ih, body, 361 ret_val, ih, body,
@@ -369,11 +395,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h
369 leaf_shift_left(tb, tb->lnum[0] - 1, 395 leaf_shift_left(tb, tb->lnum[0] - 1,
370 tb->lbytes); 396 tb->lbytes);
371 /* Insert new item into L[0] */ 397 /* Insert new item into L[0] */
372 bi.tb = tb; 398 buffer_info_init_left(tb, &bi);
373 bi.bi_bh = tb->L[0];
374 bi.bi_parent = tb->FL[0];
375 bi.bi_position =
376 get_left_neighbor_position(tb, 0);
377 leaf_insert_into_buf(&bi, 399 leaf_insert_into_buf(&bi,
378 n + item_pos - 400 n + item_pos -
379 ret_val, ih, body, 401 ret_val, ih, body,
@@ -429,13 +451,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h
429 } 451 }
430 452
431 /* Append given directory entry to directory item */ 453 /* Append given directory entry to directory item */
432 bi.tb = tb; 454 buffer_info_init_left(tb, &bi);
433 bi.bi_bh = tb->L[0];
434 bi.bi_parent =
435 tb->FL[0];
436 bi.bi_position =
437 get_left_neighbor_position
438 (tb, 0);
439 leaf_paste_in_buffer 455 leaf_paste_in_buffer
440 (&bi, 456 (&bi,
441 n + item_pos - 457 n + item_pos -
@@ -449,8 +465,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h
449 /* when we have merge directory item, pos_in_item has been changed too */ 465 /* when we have merge directory item, pos_in_item has been changed too */
450 466
451 /* paste new directory entry. 1 is entry number */ 467 /* paste new directory entry. 1 is entry number */
452 leaf_paste_entries(bi. 468 leaf_paste_entries(&bi,
453 bi_bh,
454 n + 469 n +
455 item_pos 470 item_pos
456 - 471 -
@@ -524,13 +539,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h
524 (tbS0, 539 (tbS0,
525 item_pos))); 540 item_pos)));
526 /* Append to body of item in L[0] */ 541 /* Append to body of item in L[0] */
527 bi.tb = tb; 542 buffer_info_init_left(tb, &bi);
528 bi.bi_bh = tb->L[0];
529 bi.bi_parent =
530 tb->FL[0];
531 bi.bi_position =
532 get_left_neighbor_position
533 (tb, 0);
534 leaf_paste_in_buffer 543 leaf_paste_in_buffer
535 (&bi, 544 (&bi,
536 n + item_pos - 545 n + item_pos -
@@ -681,11 +690,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h
681 leaf_shift_left(tb, tb->lnum[0], 690 leaf_shift_left(tb, tb->lnum[0],
682 tb->lbytes); 691 tb->lbytes);
683 /* Append to body of item in L[0] */ 692 /* Append to body of item in L[0] */
684 bi.tb = tb; 693 buffer_info_init_left(tb, &bi);
685 bi.bi_bh = tb->L[0];
686 bi.bi_parent = tb->FL[0];
687 bi.bi_position =
688 get_left_neighbor_position(tb, 0);
689 leaf_paste_in_buffer(&bi, 694 leaf_paste_in_buffer(&bi,
690 n + item_pos - 695 n + item_pos -
691 ret_val, 696 ret_val,
@@ -699,7 +704,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h
699 n + item_pos - 704 n + item_pos -
700 ret_val); 705 ret_val);
701 if (is_direntry_le_ih(pasted)) 706 if (is_direntry_le_ih(pasted))
702 leaf_paste_entries(bi.bi_bh, 707 leaf_paste_entries(&bi,
703 n + 708 n +
704 item_pos - 709 item_pos -
705 ret_val, 710 ret_val,
@@ -722,8 +727,9 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h
722 } 727 }
723 break; 728 break;
724 default: /* cases d and t */ 729 default: /* cases d and t */
725 reiserfs_panic(tb->tb_sb, 730 reiserfs_panic(tb->tb_sb, "PAP-12130",
726 "PAP-12130: balance_leaf: lnum > 0: unexpectable mode: %s(%d)", 731 "lnum > 0: unexpected mode: "
732 " %s(%d)",
727 (flag == 733 (flag ==
728 M_DELETE) ? "DELETE" : ((flag == 734 M_DELETE) ? "DELETE" : ((flag ==
729 M_CUT) 735 M_CUT)
@@ -776,11 +782,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h
776 set_le_ih_k_offset(ih, offset); 782 set_le_ih_k_offset(ih, offset);
777 put_ih_item_len(ih, tb->rbytes); 783 put_ih_item_len(ih, tb->rbytes);
778 /* Insert part of the item into R[0] */ 784 /* Insert part of the item into R[0] */
779 bi.tb = tb; 785 buffer_info_init_right(tb, &bi);
780 bi.bi_bh = tb->R[0];
781 bi.bi_parent = tb->FR[0];
782 bi.bi_position =
783 get_right_neighbor_position(tb, 0);
784 if ((old_len - tb->rbytes) > zeros_num) { 786 if ((old_len - tb->rbytes) > zeros_num) {
785 r_zeros_number = 0; 787 r_zeros_number = 0;
786 r_body = 788 r_body =
@@ -817,11 +819,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h
817 tb->rnum[0] - 1, 819 tb->rnum[0] - 1,
818 tb->rbytes); 820 tb->rbytes);
819 /* Insert new item into R[0] */ 821 /* Insert new item into R[0] */
820 bi.tb = tb; 822 buffer_info_init_right(tb, &bi);
821 bi.bi_bh = tb->R[0];
822 bi.bi_parent = tb->FR[0];
823 bi.bi_position =
824 get_right_neighbor_position(tb, 0);
825 leaf_insert_into_buf(&bi, 823 leaf_insert_into_buf(&bi,
826 item_pos - n + 824 item_pos - n +
827 tb->rnum[0] - 1, 825 tb->rnum[0] - 1,
@@ -881,21 +879,14 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h
881 pos_in_item - 879 pos_in_item -
882 entry_count + 880 entry_count +
883 tb->rbytes - 1; 881 tb->rbytes - 1;
884 bi.tb = tb; 882 buffer_info_init_right(tb, &bi);
885 bi.bi_bh = tb->R[0];
886 bi.bi_parent =
887 tb->FR[0];
888 bi.bi_position =
889 get_right_neighbor_position
890 (tb, 0);
891 leaf_paste_in_buffer 883 leaf_paste_in_buffer
892 (&bi, 0, 884 (&bi, 0,
893 paste_entry_position, 885 paste_entry_position,
894 tb->insert_size[0], 886 tb->insert_size[0],
895 body, zeros_num); 887 body, zeros_num);
896 /* paste entry */ 888 /* paste entry */
897 leaf_paste_entries(bi. 889 leaf_paste_entries(&bi,
898 bi_bh,
899 0, 890 0,
900 paste_entry_position, 891 paste_entry_position,
901 1, 892 1,
@@ -1019,12 +1010,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h
1019 (tb, tb->CFR[0], 0); 1010 (tb, tb->CFR[0], 0);
1020 1011
1021 /* Append part of body into R[0] */ 1012 /* Append part of body into R[0] */
1022 bi.tb = tb; 1013 buffer_info_init_right(tb, &bi);
1023 bi.bi_bh = tb->R[0];
1024 bi.bi_parent = tb->FR[0];
1025 bi.bi_position =
1026 get_right_neighbor_position
1027 (tb, 0);
1028 if (n_rem > zeros_num) { 1014 if (n_rem > zeros_num) {
1029 r_zeros_number = 0; 1015 r_zeros_number = 0;
1030 r_body = 1016 r_body =
@@ -1071,12 +1057,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h
1071 tb->rbytes); 1057 tb->rbytes);
1072 /* append item in R[0] */ 1058 /* append item in R[0] */
1073 if (pos_in_item >= 0) { 1059 if (pos_in_item >= 0) {
1074 bi.tb = tb; 1060 buffer_info_init_right(tb, &bi);
1075 bi.bi_bh = tb->R[0];
1076 bi.bi_parent = tb->FR[0];
1077 bi.bi_position =
1078 get_right_neighbor_position
1079 (tb, 0);
1080 leaf_paste_in_buffer(&bi, 1061 leaf_paste_in_buffer(&bi,
1081 item_pos - 1062 item_pos -
1082 n + 1063 n +
@@ -1096,7 +1077,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h
1096 tb->rnum[0]); 1077 tb->rnum[0]);
1097 if (is_direntry_le_ih(pasted) 1078 if (is_direntry_le_ih(pasted)
1098 && pos_in_item >= 0) { 1079 && pos_in_item >= 0) {
1099 leaf_paste_entries(bi.bi_bh, 1080 leaf_paste_entries(&bi,
1100 item_pos - 1081 item_pos -
1101 n + 1082 n +
1102 tb->rnum[0], 1083 tb->rnum[0],
@@ -1136,8 +1117,8 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h
1136 } 1117 }
1137 break; 1118 break;
1138 default: /* cases d and t */ 1119 default: /* cases d and t */
1139 reiserfs_panic(tb->tb_sb, 1120 reiserfs_panic(tb->tb_sb, "PAP-12175",
1140 "PAP-12175: balance_leaf: rnum > 0: unexpectable mode: %s(%d)", 1121 "rnum > 0: unexpected mode: %s(%d)",
1141 (flag == 1122 (flag ==
1142 M_DELETE) ? "DELETE" : ((flag == 1123 M_DELETE) ? "DELETE" : ((flag ==
1143 M_CUT) ? "CUT" 1124 M_CUT) ? "CUT"
@@ -1167,8 +1148,8 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h
1167 not set correctly */ 1148 not set correctly */
1168 if (tb->CFL[0]) { 1149 if (tb->CFL[0]) {
1169 if (!tb->CFR[0]) 1150 if (!tb->CFR[0])
1170 reiserfs_panic(tb->tb_sb, 1151 reiserfs_panic(tb->tb_sb, "vs-12195",
1171 "vs-12195: balance_leaf: CFR not initialized"); 1152 "CFR not initialized");
1172 copy_key(B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0]), 1153 copy_key(B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0]),
1173 B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0])); 1154 B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0]));
1174 do_balance_mark_internal_dirty(tb, tb->CFL[0], 0); 1155 do_balance_mark_internal_dirty(tb, tb->CFL[0], 0);
@@ -1232,10 +1213,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h
1232 put_ih_item_len(ih, sbytes[i]); 1213 put_ih_item_len(ih, sbytes[i]);
1233 1214
1234 /* Insert part of the item into S_new[i] before 0-th item */ 1215 /* Insert part of the item into S_new[i] before 0-th item */
1235 bi.tb = tb; 1216 buffer_info_init_bh(tb, &bi, S_new[i]);
1236 bi.bi_bh = S_new[i];
1237 bi.bi_parent = NULL;
1238 bi.bi_position = 0;
1239 1217
1240 if ((old_len - sbytes[i]) > zeros_num) { 1218 if ((old_len - sbytes[i]) > zeros_num) {
1241 r_zeros_number = 0; 1219 r_zeros_number = 0;
@@ -1267,10 +1245,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h
1267 S_new[i]); 1245 S_new[i]);
1268 1246
1269 /* Insert new item into S_new[i] */ 1247 /* Insert new item into S_new[i] */
1270 bi.tb = tb; 1248 buffer_info_init_bh(tb, &bi, S_new[i]);
1271 bi.bi_bh = S_new[i];
1272 bi.bi_parent = NULL;
1273 bi.bi_position = 0;
1274 leaf_insert_into_buf(&bi, 1249 leaf_insert_into_buf(&bi,
1275 item_pos - n + 1250 item_pos - n +
1276 snum[i] - 1, ih, 1251 snum[i] - 1, ih,
@@ -1327,10 +1302,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h
1327 sbytes[i] - 1, 1302 sbytes[i] - 1,
1328 S_new[i]); 1303 S_new[i]);
1329 /* Paste given directory entry to directory item */ 1304 /* Paste given directory entry to directory item */
1330 bi.tb = tb; 1305 buffer_info_init_bh(tb, &bi, S_new[i]);
1331 bi.bi_bh = S_new[i];
1332 bi.bi_parent = NULL;
1333 bi.bi_position = 0;
1334 leaf_paste_in_buffer 1306 leaf_paste_in_buffer
1335 (&bi, 0, 1307 (&bi, 0,
1336 pos_in_item - 1308 pos_in_item -
@@ -1339,8 +1311,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h
1339 tb->insert_size[0], 1311 tb->insert_size[0],
1340 body, zeros_num); 1312 body, zeros_num);
1341 /* paste new directory entry */ 1313 /* paste new directory entry */
1342 leaf_paste_entries(bi. 1314 leaf_paste_entries(&bi,
1343 bi_bh,
1344 0, 1315 0,
1345 pos_in_item 1316 pos_in_item
1346 - 1317 -
@@ -1401,11 +1372,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h
1401 if (n_rem < 0) 1372 if (n_rem < 0)
1402 n_rem = 0; 1373 n_rem = 0;
1403 /* Append part of body into S_new[0] */ 1374 /* Append part of body into S_new[0] */
1404 bi.tb = tb; 1375 buffer_info_init_bh(tb, &bi, S_new[i]);
1405 bi.bi_bh = S_new[i];
1406 bi.bi_parent = NULL;
1407 bi.bi_position = 0;
1408
1409 if (n_rem > zeros_num) { 1376 if (n_rem > zeros_num) {
1410 r_zeros_number = 0; 1377 r_zeros_number = 0;
1411 r_body = 1378 r_body =
@@ -1475,7 +1442,10 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h
1475 && (pos_in_item != ih_item_len(ih_check) 1442 && (pos_in_item != ih_item_len(ih_check)
1476 || tb->insert_size[0] <= 0)) 1443 || tb->insert_size[0] <= 0))
1477 reiserfs_panic(tb->tb_sb, 1444 reiserfs_panic(tb->tb_sb,
1478 "PAP-12235: balance_leaf: pos_in_item must be equal to ih_item_len"); 1445 "PAP-12235",
1446 "pos_in_item "
1447 "must be equal "
1448 "to ih_item_len");
1479#endif /* CONFIG_REISERFS_CHECK */ 1449#endif /* CONFIG_REISERFS_CHECK */
1480 1450
1481 leaf_mi = 1451 leaf_mi =
@@ -1489,10 +1459,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h
1489 leaf_mi); 1459 leaf_mi);
1490 1460
1491 /* paste into item */ 1461 /* paste into item */
1492 bi.tb = tb; 1462 buffer_info_init_bh(tb, &bi, S_new[i]);
1493 bi.bi_bh = S_new[i];
1494 bi.bi_parent = NULL;
1495 bi.bi_position = 0;
1496 leaf_paste_in_buffer(&bi, 1463 leaf_paste_in_buffer(&bi,
1497 item_pos - n + 1464 item_pos - n +
1498 snum[i], 1465 snum[i],
@@ -1505,7 +1472,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h
1505 item_pos - n + 1472 item_pos - n +
1506 snum[i]); 1473 snum[i]);
1507 if (is_direntry_le_ih(pasted)) { 1474 if (is_direntry_le_ih(pasted)) {
1508 leaf_paste_entries(bi.bi_bh, 1475 leaf_paste_entries(&bi,
1509 item_pos - 1476 item_pos -
1510 n + snum[i], 1477 n + snum[i],
1511 pos_in_item, 1478 pos_in_item,
@@ -1535,8 +1502,8 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h
1535 } 1502 }
1536 break; 1503 break;
1537 default: /* cases d and t */ 1504 default: /* cases d and t */
1538 reiserfs_panic(tb->tb_sb, 1505 reiserfs_panic(tb->tb_sb, "PAP-12245",
1539 "PAP-12245: balance_leaf: blknum > 2: unexpectable mode: %s(%d)", 1506 "blknum > 2: unexpected mode: %s(%d)",
1540 (flag == 1507 (flag ==
1541 M_DELETE) ? "DELETE" : ((flag == 1508 M_DELETE) ? "DELETE" : ((flag ==
1542 M_CUT) ? "CUT" 1509 M_CUT) ? "CUT"
@@ -1559,10 +1526,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h
1559 1526
1560 switch (flag) { 1527 switch (flag) {
1561 case M_INSERT: /* insert item into S[0] */ 1528 case M_INSERT: /* insert item into S[0] */
1562 bi.tb = tb; 1529 buffer_info_init_tbS0(tb, &bi);
1563 bi.bi_bh = tbS0;
1564 bi.bi_parent = PATH_H_PPARENT(tb->tb_path, 0);
1565 bi.bi_position = PATH_H_POSITION(tb->tb_path, 1);
1566 leaf_insert_into_buf(&bi, item_pos, ih, body, 1530 leaf_insert_into_buf(&bi, item_pos, ih, body,
1567 zeros_num); 1531 zeros_num);
1568 1532
@@ -1589,14 +1553,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h
1589 "PAP-12260: insert_size is 0 already"); 1553 "PAP-12260: insert_size is 0 already");
1590 1554
1591 /* prepare space */ 1555 /* prepare space */
1592 bi.tb = tb; 1556 buffer_info_init_tbS0(tb, &bi);
1593 bi.bi_bh = tbS0;
1594 bi.bi_parent =
1595 PATH_H_PPARENT(tb->tb_path,
1596 0);
1597 bi.bi_position =
1598 PATH_H_POSITION(tb->tb_path,
1599 1);
1600 leaf_paste_in_buffer(&bi, 1557 leaf_paste_in_buffer(&bi,
1601 item_pos, 1558 item_pos,
1602 pos_in_item, 1559 pos_in_item,
@@ -1606,7 +1563,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h
1606 zeros_num); 1563 zeros_num);
1607 1564
1608 /* paste entry */ 1565 /* paste entry */
1609 leaf_paste_entries(bi.bi_bh, 1566 leaf_paste_entries(&bi,
1610 item_pos, 1567 item_pos,
1611 pos_in_item, 1568 pos_in_item,
1612 1, 1569 1,
@@ -1644,14 +1601,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h
1644 RFALSE(tb->insert_size[0] <= 0, 1601 RFALSE(tb->insert_size[0] <= 0,
1645 "PAP-12275: insert size must not be %d", 1602 "PAP-12275: insert size must not be %d",
1646 tb->insert_size[0]); 1603 tb->insert_size[0]);
1647 bi.tb = tb; 1604 buffer_info_init_tbS0(tb, &bi);
1648 bi.bi_bh = tbS0;
1649 bi.bi_parent =
1650 PATH_H_PPARENT(tb->tb_path,
1651 0);
1652 bi.bi_position =
1653 PATH_H_POSITION(tb->tb_path,
1654 1);
1655 leaf_paste_in_buffer(&bi, 1605 leaf_paste_in_buffer(&bi,
1656 item_pos, 1606 item_pos,
1657 pos_in_item, 1607 pos_in_item,
@@ -1681,10 +1631,11 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h
1681 print_cur_tb("12285"); 1631 print_cur_tb("12285");
1682 reiserfs_panic(tb-> 1632 reiserfs_panic(tb->
1683 tb_sb, 1633 tb_sb,
1684 "PAP-12285: balance_leaf: insert_size must be 0 (%d)", 1634 "PAP-12285",
1685 tb-> 1635 "insert_size "
1686 insert_size 1636 "must be 0 "
1687 [0]); 1637 "(%d)",
1638 tb->insert_size[0]);
1688 } 1639 }
1689 } 1640 }
1690#endif /* CONFIG_REISERFS_CHECK */ 1641#endif /* CONFIG_REISERFS_CHECK */
@@ -1697,11 +1648,10 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h
1697 if (flag == M_PASTE && tb->insert_size[0]) { 1648 if (flag == M_PASTE && tb->insert_size[0]) {
1698 print_cur_tb("12290"); 1649 print_cur_tb("12290");
1699 reiserfs_panic(tb->tb_sb, 1650 reiserfs_panic(tb->tb_sb,
1700 "PAP-12290: balance_leaf: insert_size is still not 0 (%d)", 1651 "PAP-12290", "insert_size is still not 0 (%d)",
1701 tb->insert_size[0]); 1652 tb->insert_size[0]);
1702 } 1653 }
1703#endif /* CONFIG_REISERFS_CHECK */ 1654#endif /* CONFIG_REISERFS_CHECK */
1704
1705 return 0; 1655 return 0;
1706} /* Leaf level of the tree is balanced (end of balance_leaf) */ 1656} /* Leaf level of the tree is balanced (end of balance_leaf) */
1707 1657
@@ -1724,7 +1674,6 @@ void make_empty_node(struct buffer_info *bi)
1724struct buffer_head *get_FEB(struct tree_balance *tb) 1674struct buffer_head *get_FEB(struct tree_balance *tb)
1725{ 1675{
1726 int i; 1676 int i;
1727 struct buffer_head *first_b;
1728 struct buffer_info bi; 1677 struct buffer_info bi;
1729 1678
1730 for (i = 0; i < MAX_FEB_SIZE; i++) 1679 for (i = 0; i < MAX_FEB_SIZE; i++)
@@ -1732,19 +1681,15 @@ struct buffer_head *get_FEB(struct tree_balance *tb)
1732 break; 1681 break;
1733 1682
1734 if (i == MAX_FEB_SIZE) 1683 if (i == MAX_FEB_SIZE)
1735 reiserfs_panic(tb->tb_sb, 1684 reiserfs_panic(tb->tb_sb, "vs-12300", "FEB list is empty");
1736 "vs-12300: get_FEB: FEB list is empty");
1737 1685
1738 bi.tb = tb; 1686 buffer_info_init_bh(tb, &bi, tb->FEB[i]);
1739 bi.bi_bh = first_b = tb->FEB[i];
1740 bi.bi_parent = NULL;
1741 bi.bi_position = 0;
1742 make_empty_node(&bi); 1687 make_empty_node(&bi);
1743 set_buffer_uptodate(first_b); 1688 set_buffer_uptodate(tb->FEB[i]);
1689 tb->used[i] = tb->FEB[i];
1744 tb->FEB[i] = NULL; 1690 tb->FEB[i] = NULL;
1745 tb->used[i] = first_b;
1746 1691
1747 return (first_b); 1692 return tb->used[i];
1748} 1693}
1749 1694
1750/* This is now used because reiserfs_free_block has to be able to 1695/* This is now used because reiserfs_free_block has to be able to
@@ -1755,15 +1700,16 @@ static void store_thrown(struct tree_balance *tb, struct buffer_head *bh)
1755 int i; 1700 int i;
1756 1701
1757 if (buffer_dirty(bh)) 1702 if (buffer_dirty(bh))
1758 reiserfs_warning(tb->tb_sb, 1703 reiserfs_warning(tb->tb_sb, "reiserfs-12320",
1759 "store_thrown deals with dirty buffer"); 1704 "called with dirty buffer");
1760 for (i = 0; i < ARRAY_SIZE(tb->thrown); i++) 1705 for (i = 0; i < ARRAY_SIZE(tb->thrown); i++)
1761 if (!tb->thrown[i]) { 1706 if (!tb->thrown[i]) {
1762 tb->thrown[i] = bh; 1707 tb->thrown[i] = bh;
1763 get_bh(bh); /* free_thrown puts this */ 1708 get_bh(bh); /* free_thrown puts this */
1764 return; 1709 return;
1765 } 1710 }
1766 reiserfs_warning(tb->tb_sb, "store_thrown: too many thrown buffers"); 1711 reiserfs_warning(tb->tb_sb, "reiserfs-12321",
1712 "too many thrown buffers");
1767} 1713}
1768 1714
1769static void free_thrown(struct tree_balance *tb) 1715static void free_thrown(struct tree_balance *tb)
@@ -1774,8 +1720,8 @@ static void free_thrown(struct tree_balance *tb)
1774 if (tb->thrown[i]) { 1720 if (tb->thrown[i]) {
1775 blocknr = tb->thrown[i]->b_blocknr; 1721 blocknr = tb->thrown[i]->b_blocknr;
1776 if (buffer_dirty(tb->thrown[i])) 1722 if (buffer_dirty(tb->thrown[i]))
1777 reiserfs_warning(tb->tb_sb, 1723 reiserfs_warning(tb->tb_sb, "reiserfs-12322",
1778 "free_thrown deals with dirty buffer %d", 1724 "called with dirty buffer %d",
1779 blocknr); 1725 blocknr);
1780 brelse(tb->thrown[i]); /* incremented in store_thrown */ 1726 brelse(tb->thrown[i]); /* incremented in store_thrown */
1781 reiserfs_free_block(tb->transaction_handle, NULL, 1727 reiserfs_free_block(tb->transaction_handle, NULL,
@@ -1873,20 +1819,19 @@ static void check_internal_node(struct super_block *s, struct buffer_head *bh,
1873 for (i = 0; i <= B_NR_ITEMS(bh); i++, dc++) { 1819 for (i = 0; i <= B_NR_ITEMS(bh); i++, dc++) {
1874 if (!is_reusable(s, dc_block_number(dc), 1)) { 1820 if (!is_reusable(s, dc_block_number(dc), 1)) {
1875 print_cur_tb(mes); 1821 print_cur_tb(mes);
1876 reiserfs_panic(s, 1822 reiserfs_panic(s, "PAP-12338",
1877 "PAP-12338: check_internal_node: invalid child pointer %y in %b", 1823 "invalid child pointer %y in %b",
1878 dc, bh); 1824 dc, bh);
1879 } 1825 }
1880 } 1826 }
1881} 1827}
1882 1828
1883static int locked_or_not_in_tree(struct buffer_head *bh, char *which) 1829static int locked_or_not_in_tree(struct tree_balance *tb,
1830 struct buffer_head *bh, char *which)
1884{ 1831{
1885 if ((!buffer_journal_prepared(bh) && buffer_locked(bh)) || 1832 if ((!buffer_journal_prepared(bh) && buffer_locked(bh)) ||
1886 !B_IS_IN_TREE(bh)) { 1833 !B_IS_IN_TREE(bh)) {
1887 reiserfs_warning(NULL, 1834 reiserfs_warning(tb->tb_sb, "vs-12339", "%s (%b)", which, bh);
1888 "vs-12339: locked_or_not_in_tree: %s (%b)",
1889 which, bh);
1890 return 1; 1835 return 1;
1891 } 1836 }
1892 return 0; 1837 return 0;
@@ -1897,26 +1842,28 @@ static int check_before_balancing(struct tree_balance *tb)
1897 int retval = 0; 1842 int retval = 0;
1898 1843
1899 if (cur_tb) { 1844 if (cur_tb) {
1900 reiserfs_panic(tb->tb_sb, "vs-12335: check_before_balancing: " 1845 reiserfs_panic(tb->tb_sb, "vs-12335", "suspect that schedule "
1901 "suspect that schedule occurred based on cur_tb not being null at this point in code. " 1846 "occurred based on cur_tb not being null at "
1902 "do_balance cannot properly handle schedule occurring while it runs."); 1847 "this point in code. do_balance cannot properly "
1848 "handle schedule occurring while it runs.");
1903 } 1849 }
1904 1850
1905 /* double check that buffers that we will modify are unlocked. (fix_nodes should already have 1851 /* double check that buffers that we will modify are unlocked. (fix_nodes should already have
1906 prepped all of these for us). */ 1852 prepped all of these for us). */
1907 if (tb->lnum[0]) { 1853 if (tb->lnum[0]) {
1908 retval |= locked_or_not_in_tree(tb->L[0], "L[0]"); 1854 retval |= locked_or_not_in_tree(tb, tb->L[0], "L[0]");
1909 retval |= locked_or_not_in_tree(tb->FL[0], "FL[0]"); 1855 retval |= locked_or_not_in_tree(tb, tb->FL[0], "FL[0]");
1910 retval |= locked_or_not_in_tree(tb->CFL[0], "CFL[0]"); 1856 retval |= locked_or_not_in_tree(tb, tb->CFL[0], "CFL[0]");
1911 check_leaf(tb->L[0]); 1857 check_leaf(tb->L[0]);
1912 } 1858 }
1913 if (tb->rnum[0]) { 1859 if (tb->rnum[0]) {
1914 retval |= locked_or_not_in_tree(tb->R[0], "R[0]"); 1860 retval |= locked_or_not_in_tree(tb, tb->R[0], "R[0]");
1915 retval |= locked_or_not_in_tree(tb->FR[0], "FR[0]"); 1861 retval |= locked_or_not_in_tree(tb, tb->FR[0], "FR[0]");
1916 retval |= locked_or_not_in_tree(tb->CFR[0], "CFR[0]"); 1862 retval |= locked_or_not_in_tree(tb, tb->CFR[0], "CFR[0]");
1917 check_leaf(tb->R[0]); 1863 check_leaf(tb->R[0]);
1918 } 1864 }
1919 retval |= locked_or_not_in_tree(PATH_PLAST_BUFFER(tb->tb_path), "S[0]"); 1865 retval |= locked_or_not_in_tree(tb, PATH_PLAST_BUFFER(tb->tb_path),
1866 "S[0]");
1920 check_leaf(PATH_PLAST_BUFFER(tb->tb_path)); 1867 check_leaf(PATH_PLAST_BUFFER(tb->tb_path));
1921 1868
1922 return retval; 1869 return retval;
@@ -1930,8 +1877,8 @@ static void check_after_balance_leaf(struct tree_balance *tb)
1930 dc_size(B_N_CHILD 1877 dc_size(B_N_CHILD
1931 (tb->FL[0], get_left_neighbor_position(tb, 0)))) { 1878 (tb->FL[0], get_left_neighbor_position(tb, 0)))) {
1932 print_cur_tb("12221"); 1879 print_cur_tb("12221");
1933 reiserfs_panic(tb->tb_sb, 1880 reiserfs_panic(tb->tb_sb, "PAP-12355",
1934 "PAP-12355: check_after_balance_leaf: shift to left was incorrect"); 1881 "shift to left was incorrect");
1935 } 1882 }
1936 } 1883 }
1937 if (tb->rnum[0]) { 1884 if (tb->rnum[0]) {
@@ -1940,8 +1887,8 @@ static void check_after_balance_leaf(struct tree_balance *tb)
1940 dc_size(B_N_CHILD 1887 dc_size(B_N_CHILD
1941 (tb->FR[0], get_right_neighbor_position(tb, 0)))) { 1888 (tb->FR[0], get_right_neighbor_position(tb, 0)))) {
1942 print_cur_tb("12222"); 1889 print_cur_tb("12222");
1943 reiserfs_panic(tb->tb_sb, 1890 reiserfs_panic(tb->tb_sb, "PAP-12360",
1944 "PAP-12360: check_after_balance_leaf: shift to right was incorrect"); 1891 "shift to right was incorrect");
1945 } 1892 }
1946 } 1893 }
1947 if (PATH_H_PBUFFER(tb->tb_path, 1) && 1894 if (PATH_H_PBUFFER(tb->tb_path, 1) &&
@@ -1955,7 +1902,7 @@ static void check_after_balance_leaf(struct tree_balance *tb)
1955 PATH_H_POSITION(tb->tb_path, 1902 PATH_H_POSITION(tb->tb_path,
1956 1)))); 1903 1))));
1957 print_cur_tb("12223"); 1904 print_cur_tb("12223");
1958 reiserfs_warning(tb->tb_sb, 1905 reiserfs_warning(tb->tb_sb, "reiserfs-12363",
1959 "B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) = %d; " 1906 "B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) = %d; "
1960 "MAX_CHILD_SIZE (%d) - dc_size( %y, %d ) [%d] = %d", 1907 "MAX_CHILD_SIZE (%d) - dc_size( %y, %d ) [%d] = %d",
1961 left, 1908 left,
@@ -1966,8 +1913,7 @@ static void check_after_balance_leaf(struct tree_balance *tb)
1966 (PATH_H_PBUFFER(tb->tb_path, 1), 1913 (PATH_H_PBUFFER(tb->tb_path, 1),
1967 PATH_H_POSITION(tb->tb_path, 1))), 1914 PATH_H_POSITION(tb->tb_path, 1))),
1968 right); 1915 right);
1969 reiserfs_panic(tb->tb_sb, 1916 reiserfs_panic(tb->tb_sb, "PAP-12365", "S is incorrect");
1970 "PAP-12365: check_after_balance_leaf: S is incorrect");
1971 } 1917 }
1972} 1918}
1973 1919
@@ -2037,7 +1983,7 @@ static inline void do_balance_starts(struct tree_balance *tb)
2037 /* store_print_tb (tb); */ 1983 /* store_print_tb (tb); */
2038 1984
2039 /* do not delete, just comment it out */ 1985 /* do not delete, just comment it out */
2040/* print_tb(flag, PATH_LAST_POSITION(tb->tb_path), tb->tb_path->pos_in_item, tb, 1986/* print_tb(flag, PATH_LAST_POSITION(tb->tb_path), tb->tb_path->pos_in_item, tb,
2041 "check");*/ 1987 "check");*/
2042 RFALSE(check_before_balancing(tb), "PAP-12340: locked buffers in TB"); 1988 RFALSE(check_before_balancing(tb), "PAP-12340: locked buffers in TB");
2043#ifdef CONFIG_REISERFS_CHECK 1989#ifdef CONFIG_REISERFS_CHECK
@@ -2102,14 +2048,13 @@ void do_balance(struct tree_balance *tb, /* tree_balance structure */
2102 tb->need_balance_dirty = 0; 2048 tb->need_balance_dirty = 0;
2103 2049
2104 if (FILESYSTEM_CHANGED_TB(tb)) { 2050 if (FILESYSTEM_CHANGED_TB(tb)) {
2105 reiserfs_panic(tb->tb_sb, 2051 reiserfs_panic(tb->tb_sb, "clm-6000", "fs generation has "
2106 "clm-6000: do_balance, fs generation has changed\n"); 2052 "changed");
2107 } 2053 }
2108 /* if we have no real work to do */ 2054 /* if we have no real work to do */
2109 if (!tb->insert_size[0]) { 2055 if (!tb->insert_size[0]) {
2110 reiserfs_warning(tb->tb_sb, 2056 reiserfs_warning(tb->tb_sb, "PAP-12350",
2111 "PAP-12350: do_balance: insert_size == 0, mode == %c", 2057 "insert_size == 0, mode == %c", flag);
2112 flag);
2113 unfix_nodes(tb); 2058 unfix_nodes(tb);
2114 return; 2059 return;
2115 } 2060 }
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 33408417038c..9f436668b7f8 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -20,14 +20,14 @@
20** insertion/balancing, for files that are written in one write. 20** insertion/balancing, for files that are written in one write.
21** It avoids unnecessary tail packings (balances) for files that are written in 21** It avoids unnecessary tail packings (balances) for files that are written in
22** multiple writes and are small enough to have tails. 22** multiple writes and are small enough to have tails.
23** 23**
24** file_release is called by the VFS layer when the file is closed. If 24** file_release is called by the VFS layer when the file is closed. If
25** this is the last open file descriptor, and the file 25** this is the last open file descriptor, and the file
26** small enough to have a tail, and the tail is currently in an 26** small enough to have a tail, and the tail is currently in an
27** unformatted node, the tail is converted back into a direct item. 27** unformatted node, the tail is converted back into a direct item.
28** 28**
29** We use reiserfs_truncate_file to pack the tail, since it already has 29** We use reiserfs_truncate_file to pack the tail, since it already has
30** all the conditions coded. 30** all the conditions coded.
31*/ 31*/
32static int reiserfs_file_release(struct inode *inode, struct file *filp) 32static int reiserfs_file_release(struct inode *inode, struct file *filp)
33{ 33{
@@ -76,7 +76,7 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp)
76 * and let the admin know what is going on. 76 * and let the admin know what is going on.
77 */ 77 */
78 igrab(inode); 78 igrab(inode);
79 reiserfs_warning(inode->i_sb, 79 reiserfs_warning(inode->i_sb, "clm-9001",
80 "pinning inode %lu because the " 80 "pinning inode %lu because the "
81 "preallocation can't be freed", 81 "preallocation can't be freed",
82 inode->i_ino); 82 inode->i_ino);
@@ -134,23 +134,23 @@ static void reiserfs_vfs_truncate_file(struct inode *inode)
134 * be removed... 134 * be removed...
135 */ 135 */
136 136
137static int reiserfs_sync_file(struct file *p_s_filp, 137static int reiserfs_sync_file(struct file *filp,
138 struct dentry *p_s_dentry, int datasync) 138 struct dentry *dentry, int datasync)
139{ 139{
140 struct inode *p_s_inode = p_s_dentry->d_inode; 140 struct inode *inode = dentry->d_inode;
141 int n_err; 141 int err;
142 int barrier_done; 142 int barrier_done;
143 143
144 BUG_ON(!S_ISREG(p_s_inode->i_mode)); 144 BUG_ON(!S_ISREG(inode->i_mode));
145 n_err = sync_mapping_buffers(p_s_inode->i_mapping); 145 err = sync_mapping_buffers(inode->i_mapping);
146 reiserfs_write_lock(p_s_inode->i_sb); 146 reiserfs_write_lock(inode->i_sb);
147 barrier_done = reiserfs_commit_for_inode(p_s_inode); 147 barrier_done = reiserfs_commit_for_inode(inode);
148 reiserfs_write_unlock(p_s_inode->i_sb); 148 reiserfs_write_unlock(inode->i_sb);
149 if (barrier_done != 1 && reiserfs_barrier_flush(p_s_inode->i_sb)) 149 if (barrier_done != 1 && reiserfs_barrier_flush(inode->i_sb))
150 blkdev_issue_flush(p_s_inode->i_sb->s_bdev, NULL); 150 blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
151 if (barrier_done < 0) 151 if (barrier_done < 0)
152 return barrier_done; 152 return barrier_done;
153 return (n_err < 0) ? -EIO : 0; 153 return (err < 0) ? -EIO : 0;
154} 154}
155 155
156/* taken fs/buffer.c:__block_commit_write */ 156/* taken fs/buffer.c:__block_commit_write */
@@ -223,7 +223,7 @@ int reiserfs_commit_page(struct inode *inode, struct page *page,
223} 223}
224 224
225/* Write @count bytes at position @ppos in a file indicated by @file 225/* Write @count bytes at position @ppos in a file indicated by @file
226 from the buffer @buf. 226 from the buffer @buf.
227 227
228 generic_file_write() is only appropriate for filesystems that are not seeking to optimize performance and want 228 generic_file_write() is only appropriate for filesystems that are not seeking to optimize performance and want
229 something simple that works. It is not for serious use by general purpose filesystems, excepting the one that it was 229 something simple that works. It is not for serious use by general purpose filesystems, excepting the one that it was
diff --git a/fs/reiserfs/fix_node.c b/fs/reiserfs/fix_node.c
index 07d05e0842b7..5e5a4e6fbaf8 100644
--- a/fs/reiserfs/fix_node.c
+++ b/fs/reiserfs/fix_node.c
@@ -30,8 +30,8 @@
30 ** get_direct_parent 30 ** get_direct_parent
31 ** get_neighbors 31 ** get_neighbors
32 ** fix_nodes 32 ** fix_nodes
33 ** 33 **
34 ** 34 **
35 **/ 35 **/
36 36
37#include <linux/time.h> 37#include <linux/time.h>
@@ -135,8 +135,7 @@ static void create_virtual_node(struct tree_balance *tb, int h)
135 vn->vn_free_ptr += 135 vn->vn_free_ptr +=
136 op_create_vi(vn, vi, is_affected, tb->insert_size[0]); 136 op_create_vi(vn, vi, is_affected, tb->insert_size[0]);
137 if (tb->vn_buf + tb->vn_buf_size < vn->vn_free_ptr) 137 if (tb->vn_buf + tb->vn_buf_size < vn->vn_free_ptr)
138 reiserfs_panic(tb->tb_sb, 138 reiserfs_panic(tb->tb_sb, "vs-8030",
139 "vs-8030: create_virtual_node: "
140 "virtual node space consumed"); 139 "virtual node space consumed");
141 140
142 if (!is_affected) 141 if (!is_affected)
@@ -186,8 +185,9 @@ static void create_virtual_node(struct tree_balance *tb, int h)
186 && I_ENTRY_COUNT(B_N_PITEM_HEAD(Sh, 0)) == 1)) { 185 && I_ENTRY_COUNT(B_N_PITEM_HEAD(Sh, 0)) == 1)) {
187 /* node contains more than 1 item, or item is not directory item, or this item contains more than 1 entry */ 186 /* node contains more than 1 item, or item is not directory item, or this item contains more than 1 entry */
188 print_block(Sh, 0, -1, -1); 187 print_block(Sh, 0, -1, -1);
189 reiserfs_panic(tb->tb_sb, 188 reiserfs_panic(tb->tb_sb, "vs-8045",
190 "vs-8045: create_virtual_node: rdkey %k, affected item==%d (mode==%c) Must be %c", 189 "rdkey %k, affected item==%d "
190 "(mode==%c) Must be %c",
191 key, vn->vn_affected_item_num, 191 key, vn->vn_affected_item_num,
192 vn->vn_mode, M_DELETE); 192 vn->vn_mode, M_DELETE);
193 } 193 }
@@ -377,9 +377,9 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h,
377 int needed_nodes; 377 int needed_nodes;
378 int start_item, /* position of item we start filling node from */ 378 int start_item, /* position of item we start filling node from */
379 end_item, /* position of item we finish filling node by */ 379 end_item, /* position of item we finish filling node by */
380 start_bytes, /* number of first bytes (entries for directory) of start_item-th item 380 start_bytes, /* number of first bytes (entries for directory) of start_item-th item
381 we do not include into node that is being filled */ 381 we do not include into node that is being filled */
382 end_bytes; /* number of last bytes (entries for directory) of end_item-th item 382 end_bytes; /* number of last bytes (entries for directory) of end_item-th item
383 we do node include into node that is being filled */ 383 we do node include into node that is being filled */
384 int split_item_positions[2]; /* these are positions in virtual item of 384 int split_item_positions[2]; /* these are positions in virtual item of
385 items, that are split between S[0] and 385 items, that are split between S[0] and
@@ -496,8 +496,8 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h,
496 snum012[needed_nodes - 1 + 3] = units; 496 snum012[needed_nodes - 1 + 3] = units;
497 497
498 if (needed_nodes > 2) 498 if (needed_nodes > 2)
499 reiserfs_warning(tb->tb_sb, "vs-8111: get_num_ver: " 499 reiserfs_warning(tb->tb_sb, "vs-8111",
500 "split_item_position is out of boundary"); 500 "split_item_position is out of range");
501 snum012[needed_nodes - 1]++; 501 snum012[needed_nodes - 1]++;
502 split_item_positions[needed_nodes - 1] = i; 502 split_item_positions[needed_nodes - 1] = i;
503 needed_nodes++; 503 needed_nodes++;
@@ -533,8 +533,8 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h,
533 533
534 if (vn->vn_vi[split_item_num].vi_index != TYPE_DIRENTRY && 534 if (vn->vn_vi[split_item_num].vi_index != TYPE_DIRENTRY &&
535 vn->vn_vi[split_item_num].vi_index != TYPE_INDIRECT) 535 vn->vn_vi[split_item_num].vi_index != TYPE_INDIRECT)
536 reiserfs_warning(tb->tb_sb, "vs-8115: get_num_ver: not " 536 reiserfs_warning(tb->tb_sb, "vs-8115",
537 "directory or indirect item"); 537 "not directory or indirect item");
538 } 538 }
539 539
540 /* now we know S2bytes, calculate S1bytes */ 540 /* now we know S2bytes, calculate S1bytes */
@@ -569,7 +569,7 @@ extern struct tree_balance *cur_tb;
569 569
570/* Set parameters for balancing. 570/* Set parameters for balancing.
571 * Performs write of results of analysis of balancing into structure tb, 571 * Performs write of results of analysis of balancing into structure tb,
572 * where it will later be used by the functions that actually do the balancing. 572 * where it will later be used by the functions that actually do the balancing.
573 * Parameters: 573 * Parameters:
574 * tb tree_balance structure; 574 * tb tree_balance structure;
575 * h current level of the node; 575 * h current level of the node;
@@ -749,25 +749,26 @@ else \
749 -1, -1);\ 749 -1, -1);\
750} 750}
751 751
752static void free_buffers_in_tb(struct tree_balance *p_s_tb) 752static void free_buffers_in_tb(struct tree_balance *tb)
753{ 753{
754 int n_counter; 754 int i;
755 755
756 decrement_counters_in_path(p_s_tb->tb_path); 756 pathrelse(tb->tb_path);
757 757
758 for (n_counter = 0; n_counter < MAX_HEIGHT; n_counter++) { 758 for (i = 0; i < MAX_HEIGHT; i++) {
759 decrement_bcount(p_s_tb->L[n_counter]); 759 brelse(tb->L[i]);
760 p_s_tb->L[n_counter] = NULL; 760 brelse(tb->R[i]);
761 decrement_bcount(p_s_tb->R[n_counter]); 761 brelse(tb->FL[i]);
762 p_s_tb->R[n_counter] = NULL; 762 brelse(tb->FR[i]);
763 decrement_bcount(p_s_tb->FL[n_counter]); 763 brelse(tb->CFL[i]);
764 p_s_tb->FL[n_counter] = NULL; 764 brelse(tb->CFR[i]);
765 decrement_bcount(p_s_tb->FR[n_counter]); 765
766 p_s_tb->FR[n_counter] = NULL; 766 tb->L[i] = NULL;
767 decrement_bcount(p_s_tb->CFL[n_counter]); 767 tb->R[i] = NULL;
768 p_s_tb->CFL[n_counter] = NULL; 768 tb->FL[i] = NULL;
769 decrement_bcount(p_s_tb->CFR[n_counter]); 769 tb->FR[i] = NULL;
770 p_s_tb->CFR[n_counter] = NULL; 770 tb->CFL[i] = NULL;
771 tb->CFR[i] = NULL;
771 } 772 }
772} 773}
773 774
@@ -777,14 +778,14 @@ static void free_buffers_in_tb(struct tree_balance *p_s_tb)
777 * NO_DISK_SPACE - no disk space. 778 * NO_DISK_SPACE - no disk space.
778 */ 779 */
779/* The function is NOT SCHEDULE-SAFE! */ 780/* The function is NOT SCHEDULE-SAFE! */
780static int get_empty_nodes(struct tree_balance *p_s_tb, int n_h) 781static int get_empty_nodes(struct tree_balance *tb, int h)
781{ 782{
782 struct buffer_head *p_s_new_bh, 783 struct buffer_head *new_bh,
783 *p_s_Sh = PATH_H_PBUFFER(p_s_tb->tb_path, n_h); 784 *Sh = PATH_H_PBUFFER(tb->tb_path, h);
784 b_blocknr_t *p_n_blocknr, a_n_blocknrs[MAX_AMOUNT_NEEDED] = { 0, }; 785 b_blocknr_t *blocknr, blocknrs[MAX_AMOUNT_NEEDED] = { 0, };
785 int n_counter, n_number_of_freeblk, n_amount_needed, /* number of needed empty blocks */ 786 int counter, number_of_freeblk, amount_needed, /* number of needed empty blocks */
786 n_retval = CARRY_ON; 787 retval = CARRY_ON;
787 struct super_block *p_s_sb = p_s_tb->tb_sb; 788 struct super_block *sb = tb->tb_sb;
788 789
789 /* number_of_freeblk is the number of empty blocks which have been 790 /* number_of_freeblk is the number of empty blocks which have been
790 acquired for use by the balancing algorithm minus the number of 791 acquired for use by the balancing algorithm minus the number of
@@ -792,7 +793,7 @@ static int get_empty_nodes(struct tree_balance *p_s_tb, int n_h)
792 number_of_freeblk = tb->cur_blknum can be non-zero if a schedule occurs 793 number_of_freeblk = tb->cur_blknum can be non-zero if a schedule occurs
793 after empty blocks are acquired, and the balancing analysis is 794 after empty blocks are acquired, and the balancing analysis is
794 then restarted, amount_needed is the number needed by this level 795 then restarted, amount_needed is the number needed by this level
795 (n_h) of the balancing analysis. 796 (h) of the balancing analysis.
796 797
797 Note that for systems with many processes writing, it would be 798 Note that for systems with many processes writing, it would be
798 more layout optimal to calculate the total number needed by all 799 more layout optimal to calculate the total number needed by all
@@ -800,54 +801,54 @@ static int get_empty_nodes(struct tree_balance *p_s_tb, int n_h)
800 801
801 /* Initiate number_of_freeblk to the amount acquired prior to the restart of 802 /* Initiate number_of_freeblk to the amount acquired prior to the restart of
802 the analysis or 0 if not restarted, then subtract the amount needed 803 the analysis or 0 if not restarted, then subtract the amount needed
803 by all of the levels of the tree below n_h. */ 804 by all of the levels of the tree below h. */
804 /* blknum includes S[n_h], so we subtract 1 in this calculation */ 805 /* blknum includes S[h], so we subtract 1 in this calculation */
805 for (n_counter = 0, n_number_of_freeblk = p_s_tb->cur_blknum; 806 for (counter = 0, number_of_freeblk = tb->cur_blknum;
806 n_counter < n_h; n_counter++) 807 counter < h; counter++)
807 n_number_of_freeblk -= 808 number_of_freeblk -=
808 (p_s_tb->blknum[n_counter]) ? (p_s_tb->blknum[n_counter] - 809 (tb->blknum[counter]) ? (tb->blknum[counter] -
809 1) : 0; 810 1) : 0;
810 811
811 /* Allocate missing empty blocks. */ 812 /* Allocate missing empty blocks. */
812 /* if p_s_Sh == 0 then we are getting a new root */ 813 /* if Sh == 0 then we are getting a new root */
813 n_amount_needed = (p_s_Sh) ? (p_s_tb->blknum[n_h] - 1) : 1; 814 amount_needed = (Sh) ? (tb->blknum[h] - 1) : 1;
814 /* Amount_needed = the amount that we need more than the amount that we have. */ 815 /* Amount_needed = the amount that we need more than the amount that we have. */
815 if (n_amount_needed > n_number_of_freeblk) 816 if (amount_needed > number_of_freeblk)
816 n_amount_needed -= n_number_of_freeblk; 817 amount_needed -= number_of_freeblk;
817 else /* If we have enough already then there is nothing to do. */ 818 else /* If we have enough already then there is nothing to do. */
818 return CARRY_ON; 819 return CARRY_ON;
819 820
820 /* No need to check quota - is not allocated for blocks used for formatted nodes */ 821 /* No need to check quota - is not allocated for blocks used for formatted nodes */
821 if (reiserfs_new_form_blocknrs(p_s_tb, a_n_blocknrs, 822 if (reiserfs_new_form_blocknrs(tb, blocknrs,
822 n_amount_needed) == NO_DISK_SPACE) 823 amount_needed) == NO_DISK_SPACE)
823 return NO_DISK_SPACE; 824 return NO_DISK_SPACE;
824 825
825 /* for each blocknumber we just got, get a buffer and stick it on FEB */ 826 /* for each blocknumber we just got, get a buffer and stick it on FEB */
826 for (p_n_blocknr = a_n_blocknrs, n_counter = 0; 827 for (blocknr = blocknrs, counter = 0;
827 n_counter < n_amount_needed; p_n_blocknr++, n_counter++) { 828 counter < amount_needed; blocknr++, counter++) {
828 829
829 RFALSE(!*p_n_blocknr, 830 RFALSE(!*blocknr,
830 "PAP-8135: reiserfs_new_blocknrs failed when got new blocks"); 831 "PAP-8135: reiserfs_new_blocknrs failed when got new blocks");
831 832
832 p_s_new_bh = sb_getblk(p_s_sb, *p_n_blocknr); 833 new_bh = sb_getblk(sb, *blocknr);
833 RFALSE(buffer_dirty(p_s_new_bh) || 834 RFALSE(buffer_dirty(new_bh) ||
834 buffer_journaled(p_s_new_bh) || 835 buffer_journaled(new_bh) ||
835 buffer_journal_dirty(p_s_new_bh), 836 buffer_journal_dirty(new_bh),
836 "PAP-8140: journlaled or dirty buffer %b for the new block", 837 "PAP-8140: journlaled or dirty buffer %b for the new block",
837 p_s_new_bh); 838 new_bh);
838 839
839 /* Put empty buffers into the array. */ 840 /* Put empty buffers into the array. */
840 RFALSE(p_s_tb->FEB[p_s_tb->cur_blknum], 841 RFALSE(tb->FEB[tb->cur_blknum],
841 "PAP-8141: busy slot for new buffer"); 842 "PAP-8141: busy slot for new buffer");
842 843
843 set_buffer_journal_new(p_s_new_bh); 844 set_buffer_journal_new(new_bh);
844 p_s_tb->FEB[p_s_tb->cur_blknum++] = p_s_new_bh; 845 tb->FEB[tb->cur_blknum++] = new_bh;
845 } 846 }
846 847
847 if (n_retval == CARRY_ON && FILESYSTEM_CHANGED_TB(p_s_tb)) 848 if (retval == CARRY_ON && FILESYSTEM_CHANGED_TB(tb))
848 n_retval = REPEAT_SEARCH; 849 retval = REPEAT_SEARCH;
849 850
850 return n_retval; 851 return retval;
851} 852}
852 853
853/* Get free space of the left neighbor, which is stored in the parent 854/* Get free space of the left neighbor, which is stored in the parent
@@ -895,35 +896,36 @@ static int get_rfree(struct tree_balance *tb, int h)
895} 896}
896 897
897/* Check whether left neighbor is in memory. */ 898/* Check whether left neighbor is in memory. */
898static int is_left_neighbor_in_cache(struct tree_balance *p_s_tb, int n_h) 899static int is_left_neighbor_in_cache(struct tree_balance *tb, int h)
899{ 900{
900 struct buffer_head *p_s_father, *left; 901 struct buffer_head *father, *left;
901 struct super_block *p_s_sb = p_s_tb->tb_sb; 902 struct super_block *sb = tb->tb_sb;
902 b_blocknr_t n_left_neighbor_blocknr; 903 b_blocknr_t left_neighbor_blocknr;
903 int n_left_neighbor_position; 904 int left_neighbor_position;
904 905
905 if (!p_s_tb->FL[n_h]) /* Father of the left neighbor does not exist. */ 906 /* Father of the left neighbor does not exist. */
907 if (!tb->FL[h])
906 return 0; 908 return 0;
907 909
908 /* Calculate father of the node to be balanced. */ 910 /* Calculate father of the node to be balanced. */
909 p_s_father = PATH_H_PBUFFER(p_s_tb->tb_path, n_h + 1); 911 father = PATH_H_PBUFFER(tb->tb_path, h + 1);
910 912
911 RFALSE(!p_s_father || 913 RFALSE(!father ||
912 !B_IS_IN_TREE(p_s_father) || 914 !B_IS_IN_TREE(father) ||
913 !B_IS_IN_TREE(p_s_tb->FL[n_h]) || 915 !B_IS_IN_TREE(tb->FL[h]) ||
914 !buffer_uptodate(p_s_father) || 916 !buffer_uptodate(father) ||
915 !buffer_uptodate(p_s_tb->FL[n_h]), 917 !buffer_uptodate(tb->FL[h]),
916 "vs-8165: F[h] (%b) or FL[h] (%b) is invalid", 918 "vs-8165: F[h] (%b) or FL[h] (%b) is invalid",
917 p_s_father, p_s_tb->FL[n_h]); 919 father, tb->FL[h]);
918 920
919 /* Get position of the pointer to the left neighbor into the left father. */ 921 /* Get position of the pointer to the left neighbor into the left father. */
920 n_left_neighbor_position = (p_s_father == p_s_tb->FL[n_h]) ? 922 left_neighbor_position = (father == tb->FL[h]) ?
921 p_s_tb->lkey[n_h] : B_NR_ITEMS(p_s_tb->FL[n_h]); 923 tb->lkey[h] : B_NR_ITEMS(tb->FL[h]);
922 /* Get left neighbor block number. */ 924 /* Get left neighbor block number. */
923 n_left_neighbor_blocknr = 925 left_neighbor_blocknr =
924 B_N_CHILD_NUM(p_s_tb->FL[n_h], n_left_neighbor_position); 926 B_N_CHILD_NUM(tb->FL[h], left_neighbor_position);
925 /* Look for the left neighbor in the cache. */ 927 /* Look for the left neighbor in the cache. */
926 if ((left = sb_find_get_block(p_s_sb, n_left_neighbor_blocknr))) { 928 if ((left = sb_find_get_block(sb, left_neighbor_blocknr))) {
927 929
928 RFALSE(buffer_uptodate(left) && !B_IS_IN_TREE(left), 930 RFALSE(buffer_uptodate(left) && !B_IS_IN_TREE(left),
929 "vs-8170: left neighbor (%b %z) is not in the tree", 931 "vs-8170: left neighbor (%b %z) is not in the tree",
@@ -938,10 +940,10 @@ static int is_left_neighbor_in_cache(struct tree_balance *p_s_tb, int n_h)
938#define LEFT_PARENTS 'l' 940#define LEFT_PARENTS 'l'
939#define RIGHT_PARENTS 'r' 941#define RIGHT_PARENTS 'r'
940 942
941static void decrement_key(struct cpu_key *p_s_key) 943static void decrement_key(struct cpu_key *key)
942{ 944{
943 // call item specific function for this key 945 // call item specific function for this key
944 item_ops[cpu_key_k_type(p_s_key)]->decrement_key(p_s_key); 946 item_ops[cpu_key_k_type(key)]->decrement_key(key);
945} 947}
946 948
947/* Calculate far left/right parent of the left/right neighbor of the current node, that 949/* Calculate far left/right parent of the left/right neighbor of the current node, that
@@ -952,77 +954,77 @@ static void decrement_key(struct cpu_key *p_s_key)
952 SCHEDULE_OCCURRED - schedule occurred while the function worked; 954 SCHEDULE_OCCURRED - schedule occurred while the function worked;
953 * CARRY_ON - schedule didn't occur while the function worked; 955 * CARRY_ON - schedule didn't occur while the function worked;
954 */ 956 */
955static int get_far_parent(struct tree_balance *p_s_tb, 957static int get_far_parent(struct tree_balance *tb,
956 int n_h, 958 int h,
957 struct buffer_head **pp_s_father, 959 struct buffer_head **pfather,
958 struct buffer_head **pp_s_com_father, char c_lr_par) 960 struct buffer_head **pcom_father, char c_lr_par)
959{ 961{
960 struct buffer_head *p_s_parent; 962 struct buffer_head *parent;
961 INITIALIZE_PATH(s_path_to_neighbor_father); 963 INITIALIZE_PATH(s_path_to_neighbor_father);
962 struct treepath *p_s_path = p_s_tb->tb_path; 964 struct treepath *path = tb->tb_path;
963 struct cpu_key s_lr_father_key; 965 struct cpu_key s_lr_father_key;
964 int n_counter, 966 int counter,
965 n_position = INT_MAX, 967 position = INT_MAX,
966 n_first_last_position = 0, 968 first_last_position = 0,
967 n_path_offset = PATH_H_PATH_OFFSET(p_s_path, n_h); 969 path_offset = PATH_H_PATH_OFFSET(path, h);
968 970
969 /* Starting from F[n_h] go upwards in the tree, and look for the common 971 /* Starting from F[h] go upwards in the tree, and look for the common
970 ancestor of F[n_h], and its neighbor l/r, that should be obtained. */ 972 ancestor of F[h], and its neighbor l/r, that should be obtained. */
971 973
972 n_counter = n_path_offset; 974 counter = path_offset;
973 975
974 RFALSE(n_counter < FIRST_PATH_ELEMENT_OFFSET, 976 RFALSE(counter < FIRST_PATH_ELEMENT_OFFSET,
975 "PAP-8180: invalid path length"); 977 "PAP-8180: invalid path length");
976 978
977 for (; n_counter > FIRST_PATH_ELEMENT_OFFSET; n_counter--) { 979 for (; counter > FIRST_PATH_ELEMENT_OFFSET; counter--) {
978 /* Check whether parent of the current buffer in the path is really parent in the tree. */ 980 /* Check whether parent of the current buffer in the path is really parent in the tree. */
979 if (!B_IS_IN_TREE 981 if (!B_IS_IN_TREE
980 (p_s_parent = PATH_OFFSET_PBUFFER(p_s_path, n_counter - 1))) 982 (parent = PATH_OFFSET_PBUFFER(path, counter - 1)))
981 return REPEAT_SEARCH; 983 return REPEAT_SEARCH;
982 /* Check whether position in the parent is correct. */ 984 /* Check whether position in the parent is correct. */
983 if ((n_position = 985 if ((position =
984 PATH_OFFSET_POSITION(p_s_path, 986 PATH_OFFSET_POSITION(path,
985 n_counter - 1)) > 987 counter - 1)) >
986 B_NR_ITEMS(p_s_parent)) 988 B_NR_ITEMS(parent))
987 return REPEAT_SEARCH; 989 return REPEAT_SEARCH;
988 /* Check whether parent at the path really points to the child. */ 990 /* Check whether parent at the path really points to the child. */
989 if (B_N_CHILD_NUM(p_s_parent, n_position) != 991 if (B_N_CHILD_NUM(parent, position) !=
990 PATH_OFFSET_PBUFFER(p_s_path, n_counter)->b_blocknr) 992 PATH_OFFSET_PBUFFER(path, counter)->b_blocknr)
991 return REPEAT_SEARCH; 993 return REPEAT_SEARCH;
992 /* Return delimiting key if position in the parent is not equal to first/last one. */ 994 /* Return delimiting key if position in the parent is not equal to first/last one. */
993 if (c_lr_par == RIGHT_PARENTS) 995 if (c_lr_par == RIGHT_PARENTS)
994 n_first_last_position = B_NR_ITEMS(p_s_parent); 996 first_last_position = B_NR_ITEMS(parent);
995 if (n_position != n_first_last_position) { 997 if (position != first_last_position) {
996 *pp_s_com_father = p_s_parent; 998 *pcom_father = parent;
997 get_bh(*pp_s_com_father); 999 get_bh(*pcom_father);
998 /*(*pp_s_com_father = p_s_parent)->b_count++; */ 1000 /*(*pcom_father = parent)->b_count++; */
999 break; 1001 break;
1000 } 1002 }
1001 } 1003 }
1002 1004
1003 /* if we are in the root of the tree, then there is no common father */ 1005 /* if we are in the root of the tree, then there is no common father */
1004 if (n_counter == FIRST_PATH_ELEMENT_OFFSET) { 1006 if (counter == FIRST_PATH_ELEMENT_OFFSET) {
1005 /* Check whether first buffer in the path is the root of the tree. */ 1007 /* Check whether first buffer in the path is the root of the tree. */
1006 if (PATH_OFFSET_PBUFFER 1008 if (PATH_OFFSET_PBUFFER
1007 (p_s_tb->tb_path, 1009 (tb->tb_path,
1008 FIRST_PATH_ELEMENT_OFFSET)->b_blocknr == 1010 FIRST_PATH_ELEMENT_OFFSET)->b_blocknr ==
1009 SB_ROOT_BLOCK(p_s_tb->tb_sb)) { 1011 SB_ROOT_BLOCK(tb->tb_sb)) {
1010 *pp_s_father = *pp_s_com_father = NULL; 1012 *pfather = *pcom_father = NULL;
1011 return CARRY_ON; 1013 return CARRY_ON;
1012 } 1014 }
1013 return REPEAT_SEARCH; 1015 return REPEAT_SEARCH;
1014 } 1016 }
1015 1017
1016 RFALSE(B_LEVEL(*pp_s_com_father) <= DISK_LEAF_NODE_LEVEL, 1018 RFALSE(B_LEVEL(*pcom_father) <= DISK_LEAF_NODE_LEVEL,
1017 "PAP-8185: (%b %z) level too small", 1019 "PAP-8185: (%b %z) level too small",
1018 *pp_s_com_father, *pp_s_com_father); 1020 *pcom_father, *pcom_father);
1019 1021
1020 /* Check whether the common parent is locked. */ 1022 /* Check whether the common parent is locked. */
1021 1023
1022 if (buffer_locked(*pp_s_com_father)) { 1024 if (buffer_locked(*pcom_father)) {
1023 __wait_on_buffer(*pp_s_com_father); 1025 __wait_on_buffer(*pcom_father);
1024 if (FILESYSTEM_CHANGED_TB(p_s_tb)) { 1026 if (FILESYSTEM_CHANGED_TB(tb)) {
1025 decrement_bcount(*pp_s_com_father); 1027 brelse(*pcom_father);
1026 return REPEAT_SEARCH; 1028 return REPEAT_SEARCH;
1027 } 1029 }
1028 } 1030 }
@@ -1032,128 +1034,131 @@ static int get_far_parent(struct tree_balance *p_s_tb,
1032 1034
1033 /* Form key to get parent of the left/right neighbor. */ 1035 /* Form key to get parent of the left/right neighbor. */
1034 le_key2cpu_key(&s_lr_father_key, 1036 le_key2cpu_key(&s_lr_father_key,
1035 B_N_PDELIM_KEY(*pp_s_com_father, 1037 B_N_PDELIM_KEY(*pcom_father,
1036 (c_lr_par == 1038 (c_lr_par ==
1037 LEFT_PARENTS) ? (p_s_tb->lkey[n_h - 1] = 1039 LEFT_PARENTS) ? (tb->lkey[h - 1] =
1038 n_position - 1040 position -
1039 1) : (p_s_tb->rkey[n_h - 1041 1) : (tb->rkey[h -
1040 1] = 1042 1] =
1041 n_position))); 1043 position)));
1042 1044
1043 if (c_lr_par == LEFT_PARENTS) 1045 if (c_lr_par == LEFT_PARENTS)
1044 decrement_key(&s_lr_father_key); 1046 decrement_key(&s_lr_father_key);
1045 1047
1046 if (search_by_key 1048 if (search_by_key
1047 (p_s_tb->tb_sb, &s_lr_father_key, &s_path_to_neighbor_father, 1049 (tb->tb_sb, &s_lr_father_key, &s_path_to_neighbor_father,
1048 n_h + 1) == IO_ERROR) 1050 h + 1) == IO_ERROR)
1049 // path is released 1051 // path is released
1050 return IO_ERROR; 1052 return IO_ERROR;
1051 1053
1052 if (FILESYSTEM_CHANGED_TB(p_s_tb)) { 1054 if (FILESYSTEM_CHANGED_TB(tb)) {
1053 decrement_counters_in_path(&s_path_to_neighbor_father); 1055 pathrelse(&s_path_to_neighbor_father);
1054 decrement_bcount(*pp_s_com_father); 1056 brelse(*pcom_father);
1055 return REPEAT_SEARCH; 1057 return REPEAT_SEARCH;
1056 } 1058 }
1057 1059
1058 *pp_s_father = PATH_PLAST_BUFFER(&s_path_to_neighbor_father); 1060 *pfather = PATH_PLAST_BUFFER(&s_path_to_neighbor_father);
1059 1061
1060 RFALSE(B_LEVEL(*pp_s_father) != n_h + 1, 1062 RFALSE(B_LEVEL(*pfather) != h + 1,
1061 "PAP-8190: (%b %z) level too small", *pp_s_father, *pp_s_father); 1063 "PAP-8190: (%b %z) level too small", *pfather, *pfather);
1062 RFALSE(s_path_to_neighbor_father.path_length < 1064 RFALSE(s_path_to_neighbor_father.path_length <
1063 FIRST_PATH_ELEMENT_OFFSET, "PAP-8192: path length is too small"); 1065 FIRST_PATH_ELEMENT_OFFSET, "PAP-8192: path length is too small");
1064 1066
1065 s_path_to_neighbor_father.path_length--; 1067 s_path_to_neighbor_father.path_length--;
1066 decrement_counters_in_path(&s_path_to_neighbor_father); 1068 pathrelse(&s_path_to_neighbor_father);
1067 return CARRY_ON; 1069 return CARRY_ON;
1068} 1070}
1069 1071
1070/* Get parents of neighbors of node in the path(S[n_path_offset]) and common parents of 1072/* Get parents of neighbors of node in the path(S[path_offset]) and common parents of
1071 * S[n_path_offset] and L[n_path_offset]/R[n_path_offset]: F[n_path_offset], FL[n_path_offset], 1073 * S[path_offset] and L[path_offset]/R[path_offset]: F[path_offset], FL[path_offset],
1072 * FR[n_path_offset], CFL[n_path_offset], CFR[n_path_offset]. 1074 * FR[path_offset], CFL[path_offset], CFR[path_offset].
1073 * Calculate numbers of left and right delimiting keys position: lkey[n_path_offset], rkey[n_path_offset]. 1075 * Calculate numbers of left and right delimiting keys position: lkey[path_offset], rkey[path_offset].
1074 * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked; 1076 * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked;
1075 * CARRY_ON - schedule didn't occur while the function worked; 1077 * CARRY_ON - schedule didn't occur while the function worked;
1076 */ 1078 */
1077static int get_parents(struct tree_balance *p_s_tb, int n_h) 1079static int get_parents(struct tree_balance *tb, int h)
1078{ 1080{
1079 struct treepath *p_s_path = p_s_tb->tb_path; 1081 struct treepath *path = tb->tb_path;
1080 int n_position, 1082 int position,
1081 n_ret_value, 1083 ret,
1082 n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h); 1084 path_offset = PATH_H_PATH_OFFSET(tb->tb_path, h);
1083 struct buffer_head *p_s_curf, *p_s_curcf; 1085 struct buffer_head *curf, *curcf;
1084 1086
1085 /* Current node is the root of the tree or will be root of the tree */ 1087 /* Current node is the root of the tree or will be root of the tree */
1086 if (n_path_offset <= FIRST_PATH_ELEMENT_OFFSET) { 1088 if (path_offset <= FIRST_PATH_ELEMENT_OFFSET) {
1087 /* The root can not have parents. 1089 /* The root can not have parents.
1088 Release nodes which previously were obtained as parents of the current node neighbors. */ 1090 Release nodes which previously were obtained as parents of the current node neighbors. */
1089 decrement_bcount(p_s_tb->FL[n_h]); 1091 brelse(tb->FL[h]);
1090 decrement_bcount(p_s_tb->CFL[n_h]); 1092 brelse(tb->CFL[h]);
1091 decrement_bcount(p_s_tb->FR[n_h]); 1093 brelse(tb->FR[h]);
1092 decrement_bcount(p_s_tb->CFR[n_h]); 1094 brelse(tb->CFR[h]);
1093 p_s_tb->FL[n_h] = p_s_tb->CFL[n_h] = p_s_tb->FR[n_h] = 1095 tb->FL[h] = NULL;
1094 p_s_tb->CFR[n_h] = NULL; 1096 tb->CFL[h] = NULL;
1097 tb->FR[h] = NULL;
1098 tb->CFR[h] = NULL;
1095 return CARRY_ON; 1099 return CARRY_ON;
1096 } 1100 }
1097 1101
1098 /* Get parent FL[n_path_offset] of L[n_path_offset]. */ 1102 /* Get parent FL[path_offset] of L[path_offset]. */
1099 if ((n_position = PATH_OFFSET_POSITION(p_s_path, n_path_offset - 1))) { 1103 position = PATH_OFFSET_POSITION(path, path_offset - 1);
1104 if (position) {
1100 /* Current node is not the first child of its parent. */ 1105 /* Current node is not the first child of its parent. */
1101 /*(p_s_curf = p_s_curcf = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1))->b_count += 2; */ 1106 curf = PATH_OFFSET_PBUFFER(path, path_offset - 1);
1102 p_s_curf = p_s_curcf = 1107 curcf = PATH_OFFSET_PBUFFER(path, path_offset - 1);
1103 PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1); 1108 get_bh(curf);
1104 get_bh(p_s_curf); 1109 get_bh(curf);
1105 get_bh(p_s_curf); 1110 tb->lkey[h] = position - 1;
1106 p_s_tb->lkey[n_h] = n_position - 1;
1107 } else { 1111 } else {
1108 /* Calculate current parent of L[n_path_offset], which is the left neighbor of the current node. 1112 /* Calculate current parent of L[path_offset], which is the left neighbor of the current node.
1109 Calculate current common parent of L[n_path_offset] and the current node. Note that 1113 Calculate current common parent of L[path_offset] and the current node. Note that
1110 CFL[n_path_offset] not equal FL[n_path_offset] and CFL[n_path_offset] not equal F[n_path_offset]. 1114 CFL[path_offset] not equal FL[path_offset] and CFL[path_offset] not equal F[path_offset].
1111 Calculate lkey[n_path_offset]. */ 1115 Calculate lkey[path_offset]. */
1112 if ((n_ret_value = get_far_parent(p_s_tb, n_h + 1, &p_s_curf, 1116 if ((ret = get_far_parent(tb, h + 1, &curf,
1113 &p_s_curcf, 1117 &curcf,
1114 LEFT_PARENTS)) != CARRY_ON) 1118 LEFT_PARENTS)) != CARRY_ON)
1115 return n_ret_value; 1119 return ret;
1116 } 1120 }
1117 1121
1118 decrement_bcount(p_s_tb->FL[n_h]); 1122 brelse(tb->FL[h]);
1119 p_s_tb->FL[n_h] = p_s_curf; /* New initialization of FL[n_h]. */ 1123 tb->FL[h] = curf; /* New initialization of FL[h]. */
1120 decrement_bcount(p_s_tb->CFL[n_h]); 1124 brelse(tb->CFL[h]);
1121 p_s_tb->CFL[n_h] = p_s_curcf; /* New initialization of CFL[n_h]. */ 1125 tb->CFL[h] = curcf; /* New initialization of CFL[h]. */
1122 1126
1123 RFALSE((p_s_curf && !B_IS_IN_TREE(p_s_curf)) || 1127 RFALSE((curf && !B_IS_IN_TREE(curf)) ||
1124 (p_s_curcf && !B_IS_IN_TREE(p_s_curcf)), 1128 (curcf && !B_IS_IN_TREE(curcf)),
1125 "PAP-8195: FL (%b) or CFL (%b) is invalid", p_s_curf, p_s_curcf); 1129 "PAP-8195: FL (%b) or CFL (%b) is invalid", curf, curcf);
1126 1130
1127/* Get parent FR[n_h] of R[n_h]. */ 1131/* Get parent FR[h] of R[h]. */
1128 1132
1129/* Current node is the last child of F[n_h]. FR[n_h] != F[n_h]. */ 1133/* Current node is the last child of F[h]. FR[h] != F[h]. */
1130 if (n_position == B_NR_ITEMS(PATH_H_PBUFFER(p_s_path, n_h + 1))) { 1134 if (position == B_NR_ITEMS(PATH_H_PBUFFER(path, h + 1))) {
1131/* Calculate current parent of R[n_h], which is the right neighbor of F[n_h]. 1135/* Calculate current parent of R[h], which is the right neighbor of F[h].
1132 Calculate current common parent of R[n_h] and current node. Note that CFR[n_h] 1136 Calculate current common parent of R[h] and current node. Note that CFR[h]
1133 not equal FR[n_path_offset] and CFR[n_h] not equal F[n_h]. */ 1137 not equal FR[path_offset] and CFR[h] not equal F[h]. */
1134 if ((n_ret_value = 1138 if ((ret =
1135 get_far_parent(p_s_tb, n_h + 1, &p_s_curf, &p_s_curcf, 1139 get_far_parent(tb, h + 1, &curf, &curcf,
1136 RIGHT_PARENTS)) != CARRY_ON) 1140 RIGHT_PARENTS)) != CARRY_ON)
1137 return n_ret_value; 1141 return ret;
1138 } else { 1142 } else {
1139/* Current node is not the last child of its parent F[n_h]. */ 1143/* Current node is not the last child of its parent F[h]. */
1140 /*(p_s_curf = p_s_curcf = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1))->b_count += 2; */ 1144 curf = PATH_OFFSET_PBUFFER(path, path_offset - 1);
1141 p_s_curf = p_s_curcf = 1145 curcf = PATH_OFFSET_PBUFFER(path, path_offset - 1);
1142 PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1); 1146 get_bh(curf);
1143 get_bh(p_s_curf); 1147 get_bh(curf);
1144 get_bh(p_s_curf); 1148 tb->rkey[h] = position;
1145 p_s_tb->rkey[n_h] = n_position;
1146 } 1149 }
1147 1150
1148 decrement_bcount(p_s_tb->FR[n_h]); 1151 brelse(tb->FR[h]);
1149 p_s_tb->FR[n_h] = p_s_curf; /* New initialization of FR[n_path_offset]. */ 1152 /* New initialization of FR[path_offset]. */
1153 tb->FR[h] = curf;
1150 1154
1151 decrement_bcount(p_s_tb->CFR[n_h]); 1155 brelse(tb->CFR[h]);
1152 p_s_tb->CFR[n_h] = p_s_curcf; /* New initialization of CFR[n_path_offset]. */ 1156 /* New initialization of CFR[path_offset]. */
1157 tb->CFR[h] = curcf;
1153 1158
1154 RFALSE((p_s_curf && !B_IS_IN_TREE(p_s_curf)) || 1159 RFALSE((curf && !B_IS_IN_TREE(curf)) ||
1155 (p_s_curcf && !B_IS_IN_TREE(p_s_curcf)), 1160 (curcf && !B_IS_IN_TREE(curcf)),
1156 "PAP-8205: FR (%b) or CFR (%b) is invalid", p_s_curf, p_s_curcf); 1161 "PAP-8205: FR (%b) or CFR (%b) is invalid", curf, curcf);
1157 1162
1158 return CARRY_ON; 1163 return CARRY_ON;
1159} 1164}
@@ -1203,7 +1208,7 @@ static inline int can_node_be_removed(int mode, int lfree, int sfree, int rfree,
1203 * h current level of the node; 1208 * h current level of the node;
1204 * inum item number in S[h]; 1209 * inum item number in S[h];
1205 * mode i - insert, p - paste; 1210 * mode i - insert, p - paste;
1206 * Returns: 1 - schedule occurred; 1211 * Returns: 1 - schedule occurred;
1207 * 0 - balancing for higher levels needed; 1212 * 0 - balancing for higher levels needed;
1208 * -1 - no balancing for higher levels needed; 1213 * -1 - no balancing for higher levels needed;
1209 * -2 - no disk space. 1214 * -2 - no disk space.
@@ -1217,7 +1222,7 @@ static int ip_check_balance(struct tree_balance *tb, int h)
1217 contains node being balanced. The mnemonic is 1222 contains node being balanced. The mnemonic is
1218 that the attempted change in node space used level 1223 that the attempted change in node space used level
1219 is levbytes bytes. */ 1224 is levbytes bytes. */
1220 n_ret_value; 1225 ret;
1221 1226
1222 int lfree, sfree, rfree /* free space in L, S and R */ ; 1227 int lfree, sfree, rfree /* free space in L, S and R */ ;
1223 1228
@@ -1238,7 +1243,7 @@ static int ip_check_balance(struct tree_balance *tb, int h)
1238 /* we perform 8 calls to get_num_ver(). For each call we calculate five parameters. 1243 /* we perform 8 calls to get_num_ver(). For each call we calculate five parameters.
1239 where 4th parameter is s1bytes and 5th - s2bytes 1244 where 4th parameter is s1bytes and 5th - s2bytes
1240 */ 1245 */
1241 short snum012[40] = { 0, }; /* s0num, s1num, s2num for 8 cases 1246 short snum012[40] = { 0, }; /* s0num, s1num, s2num for 8 cases
1242 0,1 - do not shift and do not shift but bottle 1247 0,1 - do not shift and do not shift but bottle
1243 2 - shift only whole item to left 1248 2 - shift only whole item to left
1244 3 - shift to left and bottle as much as possible 1249 3 - shift to left and bottle as much as possible
@@ -1255,24 +1260,24 @@ static int ip_check_balance(struct tree_balance *tb, int h)
1255 /* Calculate balance parameters for creating new root. */ 1260 /* Calculate balance parameters for creating new root. */
1256 if (!Sh) { 1261 if (!Sh) {
1257 if (!h) 1262 if (!h)
1258 reiserfs_panic(tb->tb_sb, 1263 reiserfs_panic(tb->tb_sb, "vs-8210",
1259 "vs-8210: ip_check_balance: S[0] can not be 0"); 1264 "S[0] can not be 0");
1260 switch (n_ret_value = get_empty_nodes(tb, h)) { 1265 switch (ret = get_empty_nodes(tb, h)) {
1261 case CARRY_ON: 1266 case CARRY_ON:
1262 set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); 1267 set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1263 return NO_BALANCING_NEEDED; /* no balancing for higher levels needed */ 1268 return NO_BALANCING_NEEDED; /* no balancing for higher levels needed */
1264 1269
1265 case NO_DISK_SPACE: 1270 case NO_DISK_SPACE:
1266 case REPEAT_SEARCH: 1271 case REPEAT_SEARCH:
1267 return n_ret_value; 1272 return ret;
1268 default: 1273 default:
1269 reiserfs_panic(tb->tb_sb, 1274 reiserfs_panic(tb->tb_sb, "vs-8215", "incorrect "
1270 "vs-8215: ip_check_balance: incorrect return value of get_empty_nodes"); 1275 "return value of get_empty_nodes");
1271 } 1276 }
1272 } 1277 }
1273 1278
1274 if ((n_ret_value = get_parents(tb, h)) != CARRY_ON) /* get parents of S[h] neighbors. */ 1279 if ((ret = get_parents(tb, h)) != CARRY_ON) /* get parents of S[h] neighbors. */
1275 return n_ret_value; 1280 return ret;
1276 1281
1277 sfree = B_FREE_SPACE(Sh); 1282 sfree = B_FREE_SPACE(Sh);
1278 1283
@@ -1287,7 +1292,7 @@ static int ip_check_balance(struct tree_balance *tb, int h)
1287 1292
1288 create_virtual_node(tb, h); 1293 create_virtual_node(tb, h);
1289 1294
1290 /* 1295 /*
1291 determine maximal number of items we can shift to the left neighbor (in tb structure) 1296 determine maximal number of items we can shift to the left neighbor (in tb structure)
1292 and the maximal number of bytes that can flow to the left neighbor 1297 and the maximal number of bytes that can flow to the left neighbor
1293 from the left most liquid item that cannot be shifted from S[0] entirely (returned value) 1298 from the left most liquid item that cannot be shifted from S[0] entirely (returned value)
@@ -1348,13 +1353,13 @@ static int ip_check_balance(struct tree_balance *tb, int h)
1348 1353
1349 { 1354 {
1350 int lpar, rpar, nset, lset, rset, lrset; 1355 int lpar, rpar, nset, lset, rset, lrset;
1351 /* 1356 /*
1352 * regular overflowing of the node 1357 * regular overflowing of the node
1353 */ 1358 */
1354 1359
1355 /* get_num_ver works in 2 modes (FLOW & NO_FLOW) 1360 /* get_num_ver works in 2 modes (FLOW & NO_FLOW)
1356 lpar, rpar - number of items we can shift to left/right neighbor (including splitting item) 1361 lpar, rpar - number of items we can shift to left/right neighbor (including splitting item)
1357 nset, lset, rset, lrset - shows, whether flowing items give better packing 1362 nset, lset, rset, lrset - shows, whether flowing items give better packing
1358 */ 1363 */
1359#define FLOW 1 1364#define FLOW 1
1360#define NO_FLOW 0 /* do not any splitting */ 1365#define NO_FLOW 0 /* do not any splitting */
@@ -1544,7 +1549,7 @@ static int ip_check_balance(struct tree_balance *tb, int h)
1544 * h current level of the node; 1549 * h current level of the node;
1545 * inum item number in S[h]; 1550 * inum item number in S[h];
1546 * mode i - insert, p - paste; 1551 * mode i - insert, p - paste;
1547 * Returns: 1 - schedule occurred; 1552 * Returns: 1 - schedule occurred;
1548 * 0 - balancing for higher levels needed; 1553 * 0 - balancing for higher levels needed;
1549 * -1 - no balancing for higher levels needed; 1554 * -1 - no balancing for higher levels needed;
1550 * -2 - no disk space. 1555 * -2 - no disk space.
@@ -1559,7 +1564,7 @@ static int dc_check_balance_internal(struct tree_balance *tb, int h)
1559 /* Sh is the node whose balance is currently being checked, 1564 /* Sh is the node whose balance is currently being checked,
1560 and Fh is its father. */ 1565 and Fh is its father. */
1561 struct buffer_head *Sh, *Fh; 1566 struct buffer_head *Sh, *Fh;
1562 int maxsize, n_ret_value; 1567 int maxsize, ret;
1563 int lfree, rfree /* free space in L and R */ ; 1568 int lfree, rfree /* free space in L and R */ ;
1564 1569
1565 Sh = PATH_H_PBUFFER(tb->tb_path, h); 1570 Sh = PATH_H_PBUFFER(tb->tb_path, h);
@@ -1584,8 +1589,8 @@ static int dc_check_balance_internal(struct tree_balance *tb, int h)
1584 return CARRY_ON; 1589 return CARRY_ON;
1585 } 1590 }
1586 1591
1587 if ((n_ret_value = get_parents(tb, h)) != CARRY_ON) 1592 if ((ret = get_parents(tb, h)) != CARRY_ON)
1588 return n_ret_value; 1593 return ret;
1589 1594
1590 /* get free space of neighbors */ 1595 /* get free space of neighbors */
1591 rfree = get_rfree(tb, h); 1596 rfree = get_rfree(tb, h);
@@ -1727,7 +1732,7 @@ static int dc_check_balance_internal(struct tree_balance *tb, int h)
1727 * h current level of the node; 1732 * h current level of the node;
1728 * inum item number in S[h]; 1733 * inum item number in S[h];
1729 * mode i - insert, p - paste; 1734 * mode i - insert, p - paste;
1730 * Returns: 1 - schedule occurred; 1735 * Returns: 1 - schedule occurred;
1731 * 0 - balancing for higher levels needed; 1736 * 0 - balancing for higher levels needed;
1732 * -1 - no balancing for higher levels needed; 1737 * -1 - no balancing for higher levels needed;
1733 * -2 - no disk space. 1738 * -2 - no disk space.
@@ -1742,7 +1747,7 @@ static int dc_check_balance_leaf(struct tree_balance *tb, int h)
1742 attempted change in node space used level is levbytes bytes. */ 1747 attempted change in node space used level is levbytes bytes. */
1743 int levbytes; 1748 int levbytes;
1744 /* the maximal item size */ 1749 /* the maximal item size */
1745 int maxsize, n_ret_value; 1750 int maxsize, ret;
1746 /* S0 is the node whose balance is currently being checked, 1751 /* S0 is the node whose balance is currently being checked,
1747 and F0 is its father. */ 1752 and F0 is its father. */
1748 struct buffer_head *S0, *F0; 1753 struct buffer_head *S0, *F0;
@@ -1764,8 +1769,8 @@ static int dc_check_balance_leaf(struct tree_balance *tb, int h)
1764 return NO_BALANCING_NEEDED; 1769 return NO_BALANCING_NEEDED;
1765 } 1770 }
1766 1771
1767 if ((n_ret_value = get_parents(tb, h)) != CARRY_ON) 1772 if ((ret = get_parents(tb, h)) != CARRY_ON)
1768 return n_ret_value; 1773 return ret;
1769 1774
1770 /* get free space of neighbors */ 1775 /* get free space of neighbors */
1771 rfree = get_rfree(tb, h); 1776 rfree = get_rfree(tb, h);
@@ -1821,7 +1826,7 @@ static int dc_check_balance_leaf(struct tree_balance *tb, int h)
1821 * h current level of the node; 1826 * h current level of the node;
1822 * inum item number in S[h]; 1827 * inum item number in S[h];
1823 * mode d - delete, c - cut. 1828 * mode d - delete, c - cut.
1824 * Returns: 1 - schedule occurred; 1829 * Returns: 1 - schedule occurred;
1825 * 0 - balancing for higher levels needed; 1830 * 0 - balancing for higher levels needed;
1826 * -1 - no balancing for higher levels needed; 1831 * -1 - no balancing for higher levels needed;
1827 * -2 - no disk space. 1832 * -2 - no disk space.
@@ -1850,7 +1855,7 @@ static int dc_check_balance(struct tree_balance *tb, int h)
1850 * h current level of the node; 1855 * h current level of the node;
1851 * inum item number in S[h]; 1856 * inum item number in S[h];
1852 * mode i - insert, p - paste, d - delete, c - cut. 1857 * mode i - insert, p - paste, d - delete, c - cut.
1853 * Returns: 1 - schedule occurred; 1858 * Returns: 1 - schedule occurred;
1854 * 0 - balancing for higher levels needed; 1859 * 0 - balancing for higher levels needed;
1855 * -1 - no balancing for higher levels needed; 1860 * -1 - no balancing for higher levels needed;
1856 * -2 - no disk space. 1861 * -2 - no disk space.
@@ -1884,137 +1889,138 @@ static int check_balance(int mode,
1884} 1889}
1885 1890
1886/* Check whether parent at the path is the really parent of the current node.*/ 1891/* Check whether parent at the path is the really parent of the current node.*/
1887static int get_direct_parent(struct tree_balance *p_s_tb, int n_h) 1892static int get_direct_parent(struct tree_balance *tb, int h)
1888{ 1893{
1889 struct buffer_head *p_s_bh; 1894 struct buffer_head *bh;
1890 struct treepath *p_s_path = p_s_tb->tb_path; 1895 struct treepath *path = tb->tb_path;
1891 int n_position, 1896 int position,
1892 n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h); 1897 path_offset = PATH_H_PATH_OFFSET(tb->tb_path, h);
1893 1898
1894 /* We are in the root or in the new root. */ 1899 /* We are in the root or in the new root. */
1895 if (n_path_offset <= FIRST_PATH_ELEMENT_OFFSET) { 1900 if (path_offset <= FIRST_PATH_ELEMENT_OFFSET) {
1896 1901
1897 RFALSE(n_path_offset < FIRST_PATH_ELEMENT_OFFSET - 1, 1902 RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET - 1,
1898 "PAP-8260: invalid offset in the path"); 1903 "PAP-8260: invalid offset in the path");
1899 1904
1900 if (PATH_OFFSET_PBUFFER(p_s_path, FIRST_PATH_ELEMENT_OFFSET)-> 1905 if (PATH_OFFSET_PBUFFER(path, FIRST_PATH_ELEMENT_OFFSET)->
1901 b_blocknr == SB_ROOT_BLOCK(p_s_tb->tb_sb)) { 1906 b_blocknr == SB_ROOT_BLOCK(tb->tb_sb)) {
1902 /* Root is not changed. */ 1907 /* Root is not changed. */
1903 PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1) = NULL; 1908 PATH_OFFSET_PBUFFER(path, path_offset - 1) = NULL;
1904 PATH_OFFSET_POSITION(p_s_path, n_path_offset - 1) = 0; 1909 PATH_OFFSET_POSITION(path, path_offset - 1) = 0;
1905 return CARRY_ON; 1910 return CARRY_ON;
1906 } 1911 }
1907 return REPEAT_SEARCH; /* Root is changed and we must recalculate the path. */ 1912 return REPEAT_SEARCH; /* Root is changed and we must recalculate the path. */
1908 } 1913 }
1909 1914
1910 if (!B_IS_IN_TREE 1915 if (!B_IS_IN_TREE
1911 (p_s_bh = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1))) 1916 (bh = PATH_OFFSET_PBUFFER(path, path_offset - 1)))
1912 return REPEAT_SEARCH; /* Parent in the path is not in the tree. */ 1917 return REPEAT_SEARCH; /* Parent in the path is not in the tree. */
1913 1918
1914 if ((n_position = 1919 if ((position =
1915 PATH_OFFSET_POSITION(p_s_path, 1920 PATH_OFFSET_POSITION(path,
1916 n_path_offset - 1)) > B_NR_ITEMS(p_s_bh)) 1921 path_offset - 1)) > B_NR_ITEMS(bh))
1917 return REPEAT_SEARCH; 1922 return REPEAT_SEARCH;
1918 1923
1919 if (B_N_CHILD_NUM(p_s_bh, n_position) != 1924 if (B_N_CHILD_NUM(bh, position) !=
1920 PATH_OFFSET_PBUFFER(p_s_path, n_path_offset)->b_blocknr) 1925 PATH_OFFSET_PBUFFER(path, path_offset)->b_blocknr)
1921 /* Parent in the path is not parent of the current node in the tree. */ 1926 /* Parent in the path is not parent of the current node in the tree. */
1922 return REPEAT_SEARCH; 1927 return REPEAT_SEARCH;
1923 1928
1924 if (buffer_locked(p_s_bh)) { 1929 if (buffer_locked(bh)) {
1925 __wait_on_buffer(p_s_bh); 1930 __wait_on_buffer(bh);
1926 if (FILESYSTEM_CHANGED_TB(p_s_tb)) 1931 if (FILESYSTEM_CHANGED_TB(tb))
1927 return REPEAT_SEARCH; 1932 return REPEAT_SEARCH;
1928 } 1933 }
1929 1934
1930 return CARRY_ON; /* Parent in the path is unlocked and really parent of the current node. */ 1935 return CARRY_ON; /* Parent in the path is unlocked and really parent of the current node. */
1931} 1936}
1932 1937
1933/* Using lnum[n_h] and rnum[n_h] we should determine what neighbors 1938/* Using lnum[h] and rnum[h] we should determine what neighbors
1934 * of S[n_h] we 1939 * of S[h] we
1935 * need in order to balance S[n_h], and get them if necessary. 1940 * need in order to balance S[h], and get them if necessary.
1936 * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked; 1941 * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked;
1937 * CARRY_ON - schedule didn't occur while the function worked; 1942 * CARRY_ON - schedule didn't occur while the function worked;
1938 */ 1943 */
1939static int get_neighbors(struct tree_balance *p_s_tb, int n_h) 1944static int get_neighbors(struct tree_balance *tb, int h)
1940{ 1945{
1941 int n_child_position, 1946 int child_position,
1942 n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h + 1); 1947 path_offset = PATH_H_PATH_OFFSET(tb->tb_path, h + 1);
1943 unsigned long n_son_number; 1948 unsigned long son_number;
1944 struct super_block *p_s_sb = p_s_tb->tb_sb; 1949 struct super_block *sb = tb->tb_sb;
1945 struct buffer_head *p_s_bh; 1950 struct buffer_head *bh;
1946 1951
1947 PROC_INFO_INC(p_s_sb, get_neighbors[n_h]); 1952 PROC_INFO_INC(sb, get_neighbors[h]);
1948 1953
1949 if (p_s_tb->lnum[n_h]) { 1954 if (tb->lnum[h]) {
1950 /* We need left neighbor to balance S[n_h]. */ 1955 /* We need left neighbor to balance S[h]. */
1951 PROC_INFO_INC(p_s_sb, need_l_neighbor[n_h]); 1956 PROC_INFO_INC(sb, need_l_neighbor[h]);
1952 p_s_bh = PATH_OFFSET_PBUFFER(p_s_tb->tb_path, n_path_offset); 1957 bh = PATH_OFFSET_PBUFFER(tb->tb_path, path_offset);
1953 1958
1954 RFALSE(p_s_bh == p_s_tb->FL[n_h] && 1959 RFALSE(bh == tb->FL[h] &&
1955 !PATH_OFFSET_POSITION(p_s_tb->tb_path, n_path_offset), 1960 !PATH_OFFSET_POSITION(tb->tb_path, path_offset),
1956 "PAP-8270: invalid position in the parent"); 1961 "PAP-8270: invalid position in the parent");
1957 1962
1958 n_child_position = 1963 child_position =
1959 (p_s_bh == 1964 (bh ==
1960 p_s_tb->FL[n_h]) ? p_s_tb->lkey[n_h] : B_NR_ITEMS(p_s_tb-> 1965 tb->FL[h]) ? tb->lkey[h] : B_NR_ITEMS(tb->
1961 FL[n_h]); 1966 FL[h]);
1962 n_son_number = B_N_CHILD_NUM(p_s_tb->FL[n_h], n_child_position); 1967 son_number = B_N_CHILD_NUM(tb->FL[h], child_position);
1963 p_s_bh = sb_bread(p_s_sb, n_son_number); 1968 bh = sb_bread(sb, son_number);
1964 if (!p_s_bh) 1969 if (!bh)
1965 return IO_ERROR; 1970 return IO_ERROR;
1966 if (FILESYSTEM_CHANGED_TB(p_s_tb)) { 1971 if (FILESYSTEM_CHANGED_TB(tb)) {
1967 decrement_bcount(p_s_bh); 1972 brelse(bh);
1968 PROC_INFO_INC(p_s_sb, get_neighbors_restart[n_h]); 1973 PROC_INFO_INC(sb, get_neighbors_restart[h]);
1969 return REPEAT_SEARCH; 1974 return REPEAT_SEARCH;
1970 } 1975 }
1971 1976
1972 RFALSE(!B_IS_IN_TREE(p_s_tb->FL[n_h]) || 1977 RFALSE(!B_IS_IN_TREE(tb->FL[h]) ||
1973 n_child_position > B_NR_ITEMS(p_s_tb->FL[n_h]) || 1978 child_position > B_NR_ITEMS(tb->FL[h]) ||
1974 B_N_CHILD_NUM(p_s_tb->FL[n_h], n_child_position) != 1979 B_N_CHILD_NUM(tb->FL[h], child_position) !=
1975 p_s_bh->b_blocknr, "PAP-8275: invalid parent"); 1980 bh->b_blocknr, "PAP-8275: invalid parent");
1976 RFALSE(!B_IS_IN_TREE(p_s_bh), "PAP-8280: invalid child"); 1981 RFALSE(!B_IS_IN_TREE(bh), "PAP-8280: invalid child");
1977 RFALSE(!n_h && 1982 RFALSE(!h &&
1978 B_FREE_SPACE(p_s_bh) != 1983 B_FREE_SPACE(bh) !=
1979 MAX_CHILD_SIZE(p_s_bh) - 1984 MAX_CHILD_SIZE(bh) -
1980 dc_size(B_N_CHILD(p_s_tb->FL[0], n_child_position)), 1985 dc_size(B_N_CHILD(tb->FL[0], child_position)),
1981 "PAP-8290: invalid child size of left neighbor"); 1986 "PAP-8290: invalid child size of left neighbor");
1982 1987
1983 decrement_bcount(p_s_tb->L[n_h]); 1988 brelse(tb->L[h]);
1984 p_s_tb->L[n_h] = p_s_bh; 1989 tb->L[h] = bh;
1985 } 1990 }
1986 1991
1987 if (p_s_tb->rnum[n_h]) { /* We need right neighbor to balance S[n_path_offset]. */ 1992 /* We need right neighbor to balance S[path_offset]. */
1988 PROC_INFO_INC(p_s_sb, need_r_neighbor[n_h]); 1993 if (tb->rnum[h]) { /* We need right neighbor to balance S[path_offset]. */
1989 p_s_bh = PATH_OFFSET_PBUFFER(p_s_tb->tb_path, n_path_offset); 1994 PROC_INFO_INC(sb, need_r_neighbor[h]);
1995 bh = PATH_OFFSET_PBUFFER(tb->tb_path, path_offset);
1990 1996
1991 RFALSE(p_s_bh == p_s_tb->FR[n_h] && 1997 RFALSE(bh == tb->FR[h] &&
1992 PATH_OFFSET_POSITION(p_s_tb->tb_path, 1998 PATH_OFFSET_POSITION(tb->tb_path,
1993 n_path_offset) >= 1999 path_offset) >=
1994 B_NR_ITEMS(p_s_bh), 2000 B_NR_ITEMS(bh),
1995 "PAP-8295: invalid position in the parent"); 2001 "PAP-8295: invalid position in the parent");
1996 2002
1997 n_child_position = 2003 child_position =
1998 (p_s_bh == p_s_tb->FR[n_h]) ? p_s_tb->rkey[n_h] + 1 : 0; 2004 (bh == tb->FR[h]) ? tb->rkey[h] + 1 : 0;
1999 n_son_number = B_N_CHILD_NUM(p_s_tb->FR[n_h], n_child_position); 2005 son_number = B_N_CHILD_NUM(tb->FR[h], child_position);
2000 p_s_bh = sb_bread(p_s_sb, n_son_number); 2006 bh = sb_bread(sb, son_number);
2001 if (!p_s_bh) 2007 if (!bh)
2002 return IO_ERROR; 2008 return IO_ERROR;
2003 if (FILESYSTEM_CHANGED_TB(p_s_tb)) { 2009 if (FILESYSTEM_CHANGED_TB(tb)) {
2004 decrement_bcount(p_s_bh); 2010 brelse(bh);
2005 PROC_INFO_INC(p_s_sb, get_neighbors_restart[n_h]); 2011 PROC_INFO_INC(sb, get_neighbors_restart[h]);
2006 return REPEAT_SEARCH; 2012 return REPEAT_SEARCH;
2007 } 2013 }
2008 decrement_bcount(p_s_tb->R[n_h]); 2014 brelse(tb->R[h]);
2009 p_s_tb->R[n_h] = p_s_bh; 2015 tb->R[h] = bh;
2010 2016
2011 RFALSE(!n_h 2017 RFALSE(!h
2012 && B_FREE_SPACE(p_s_bh) != 2018 && B_FREE_SPACE(bh) !=
2013 MAX_CHILD_SIZE(p_s_bh) - 2019 MAX_CHILD_SIZE(bh) -
2014 dc_size(B_N_CHILD(p_s_tb->FR[0], n_child_position)), 2020 dc_size(B_N_CHILD(tb->FR[0], child_position)),
2015 "PAP-8300: invalid child size of right neighbor (%d != %d - %d)", 2021 "PAP-8300: invalid child size of right neighbor (%d != %d - %d)",
2016 B_FREE_SPACE(p_s_bh), MAX_CHILD_SIZE(p_s_bh), 2022 B_FREE_SPACE(bh), MAX_CHILD_SIZE(bh),
2017 dc_size(B_N_CHILD(p_s_tb->FR[0], n_child_position))); 2023 dc_size(B_N_CHILD(tb->FR[0], child_position)));
2018 2024
2019 } 2025 }
2020 return CARRY_ON; 2026 return CARRY_ON;
@@ -2088,52 +2094,46 @@ static int get_mem_for_virtual_node(struct tree_balance *tb)
2088} 2094}
2089 2095
2090#ifdef CONFIG_REISERFS_CHECK 2096#ifdef CONFIG_REISERFS_CHECK
2091static void tb_buffer_sanity_check(struct super_block *p_s_sb, 2097static void tb_buffer_sanity_check(struct super_block *sb,
2092 struct buffer_head *p_s_bh, 2098 struct buffer_head *bh,
2093 const char *descr, int level) 2099 const char *descr, int level)
2094{ 2100{
2095 if (p_s_bh) { 2101 if (bh) {
2096 if (atomic_read(&(p_s_bh->b_count)) <= 0) { 2102 if (atomic_read(&(bh->b_count)) <= 0)
2097 2103
2098 reiserfs_panic(p_s_sb, 2104 reiserfs_panic(sb, "jmacd-1", "negative or zero "
2099 "jmacd-1: tb_buffer_sanity_check(): negative or zero reference counter for buffer %s[%d] (%b)\n", 2105 "reference counter for buffer %s[%d] "
2100 descr, level, p_s_bh); 2106 "(%b)", descr, level, bh);
2101 } 2107
2102 2108 if (!buffer_uptodate(bh))
2103 if (!buffer_uptodate(p_s_bh)) { 2109 reiserfs_panic(sb, "jmacd-2", "buffer is not up "
2104 reiserfs_panic(p_s_sb, 2110 "to date %s[%d] (%b)",
2105 "jmacd-2: tb_buffer_sanity_check(): buffer is not up to date %s[%d] (%b)\n", 2111 descr, level, bh);
2106 descr, level, p_s_bh); 2112
2107 } 2113 if (!B_IS_IN_TREE(bh))
2108 2114 reiserfs_panic(sb, "jmacd-3", "buffer is not "
2109 if (!B_IS_IN_TREE(p_s_bh)) { 2115 "in tree %s[%d] (%b)",
2110 reiserfs_panic(p_s_sb, 2116 descr, level, bh);
2111 "jmacd-3: tb_buffer_sanity_check(): buffer is not in tree %s[%d] (%b)\n", 2117
2112 descr, level, p_s_bh); 2118 if (bh->b_bdev != sb->s_bdev)
2113 } 2119 reiserfs_panic(sb, "jmacd-4", "buffer has wrong "
2114 2120 "device %s[%d] (%b)",
2115 if (p_s_bh->b_bdev != p_s_sb->s_bdev) { 2121 descr, level, bh);
2116 reiserfs_panic(p_s_sb, 2122
2117 "jmacd-4: tb_buffer_sanity_check(): buffer has wrong device %s[%d] (%b)\n", 2123 if (bh->b_size != sb->s_blocksize)
2118 descr, level, p_s_bh); 2124 reiserfs_panic(sb, "jmacd-5", "buffer has wrong "
2119 } 2125 "blocksize %s[%d] (%b)",
2120 2126 descr, level, bh);
2121 if (p_s_bh->b_size != p_s_sb->s_blocksize) { 2127
2122 reiserfs_panic(p_s_sb, 2128 if (bh->b_blocknr > SB_BLOCK_COUNT(sb))
2123 "jmacd-5: tb_buffer_sanity_check(): buffer has wrong blocksize %s[%d] (%b)\n", 2129 reiserfs_panic(sb, "jmacd-6", "buffer block "
2124 descr, level, p_s_bh); 2130 "number too high %s[%d] (%b)",
2125 } 2131 descr, level, bh);
2126
2127 if (p_s_bh->b_blocknr > SB_BLOCK_COUNT(p_s_sb)) {
2128 reiserfs_panic(p_s_sb,
2129 "jmacd-6: tb_buffer_sanity_check(): buffer block number too high %s[%d] (%b)\n",
2130 descr, level, p_s_bh);
2131 }
2132 } 2132 }
2133} 2133}
2134#else 2134#else
2135static void tb_buffer_sanity_check(struct super_block *p_s_sb, 2135static void tb_buffer_sanity_check(struct super_block *sb,
2136 struct buffer_head *p_s_bh, 2136 struct buffer_head *bh,
2137 const char *descr, int level) 2137 const char *descr, int level)
2138{; 2138{;
2139} 2139}
@@ -2144,7 +2144,7 @@ static int clear_all_dirty_bits(struct super_block *s, struct buffer_head *bh)
2144 return reiserfs_prepare_for_journal(s, bh, 0); 2144 return reiserfs_prepare_for_journal(s, bh, 0);
2145} 2145}
2146 2146
2147static int wait_tb_buffers_until_unlocked(struct tree_balance *p_s_tb) 2147static int wait_tb_buffers_until_unlocked(struct tree_balance *tb)
2148{ 2148{
2149 struct buffer_head *locked; 2149 struct buffer_head *locked;
2150#ifdef CONFIG_REISERFS_CHECK 2150#ifdef CONFIG_REISERFS_CHECK
@@ -2156,95 +2156,94 @@ static int wait_tb_buffers_until_unlocked(struct tree_balance *p_s_tb)
2156 2156
2157 locked = NULL; 2157 locked = NULL;
2158 2158
2159 for (i = p_s_tb->tb_path->path_length; 2159 for (i = tb->tb_path->path_length;
2160 !locked && i > ILLEGAL_PATH_ELEMENT_OFFSET; i--) { 2160 !locked && i > ILLEGAL_PATH_ELEMENT_OFFSET; i--) {
2161 if (PATH_OFFSET_PBUFFER(p_s_tb->tb_path, i)) { 2161 if (PATH_OFFSET_PBUFFER(tb->tb_path, i)) {
2162 /* if I understand correctly, we can only be sure the last buffer 2162 /* if I understand correctly, we can only be sure the last buffer
2163 ** in the path is in the tree --clm 2163 ** in the path is in the tree --clm
2164 */ 2164 */
2165#ifdef CONFIG_REISERFS_CHECK 2165#ifdef CONFIG_REISERFS_CHECK
2166 if (PATH_PLAST_BUFFER(p_s_tb->tb_path) == 2166 if (PATH_PLAST_BUFFER(tb->tb_path) ==
2167 PATH_OFFSET_PBUFFER(p_s_tb->tb_path, i)) { 2167 PATH_OFFSET_PBUFFER(tb->tb_path, i))
2168 tb_buffer_sanity_check(p_s_tb->tb_sb, 2168 tb_buffer_sanity_check(tb->tb_sb,
2169 PATH_OFFSET_PBUFFER 2169 PATH_OFFSET_PBUFFER
2170 (p_s_tb->tb_path, 2170 (tb->tb_path,
2171 i), "S", 2171 i), "S",
2172 p_s_tb->tb_path-> 2172 tb->tb_path->
2173 path_length - i); 2173 path_length - i);
2174 }
2175#endif 2174#endif
2176 if (!clear_all_dirty_bits(p_s_tb->tb_sb, 2175 if (!clear_all_dirty_bits(tb->tb_sb,
2177 PATH_OFFSET_PBUFFER 2176 PATH_OFFSET_PBUFFER
2178 (p_s_tb->tb_path, 2177 (tb->tb_path,
2179 i))) { 2178 i))) {
2180 locked = 2179 locked =
2181 PATH_OFFSET_PBUFFER(p_s_tb->tb_path, 2180 PATH_OFFSET_PBUFFER(tb->tb_path,
2182 i); 2181 i);
2183 } 2182 }
2184 } 2183 }
2185 } 2184 }
2186 2185
2187 for (i = 0; !locked && i < MAX_HEIGHT && p_s_tb->insert_size[i]; 2186 for (i = 0; !locked && i < MAX_HEIGHT && tb->insert_size[i];
2188 i++) { 2187 i++) {
2189 2188
2190 if (p_s_tb->lnum[i]) { 2189 if (tb->lnum[i]) {
2191 2190
2192 if (p_s_tb->L[i]) { 2191 if (tb->L[i]) {
2193 tb_buffer_sanity_check(p_s_tb->tb_sb, 2192 tb_buffer_sanity_check(tb->tb_sb,
2194 p_s_tb->L[i], 2193 tb->L[i],
2195 "L", i); 2194 "L", i);
2196 if (!clear_all_dirty_bits 2195 if (!clear_all_dirty_bits
2197 (p_s_tb->tb_sb, p_s_tb->L[i])) 2196 (tb->tb_sb, tb->L[i]))
2198 locked = p_s_tb->L[i]; 2197 locked = tb->L[i];
2199 } 2198 }
2200 2199
2201 if (!locked && p_s_tb->FL[i]) { 2200 if (!locked && tb->FL[i]) {
2202 tb_buffer_sanity_check(p_s_tb->tb_sb, 2201 tb_buffer_sanity_check(tb->tb_sb,
2203 p_s_tb->FL[i], 2202 tb->FL[i],
2204 "FL", i); 2203 "FL", i);
2205 if (!clear_all_dirty_bits 2204 if (!clear_all_dirty_bits
2206 (p_s_tb->tb_sb, p_s_tb->FL[i])) 2205 (tb->tb_sb, tb->FL[i]))
2207 locked = p_s_tb->FL[i]; 2206 locked = tb->FL[i];
2208 } 2207 }
2209 2208
2210 if (!locked && p_s_tb->CFL[i]) { 2209 if (!locked && tb->CFL[i]) {
2211 tb_buffer_sanity_check(p_s_tb->tb_sb, 2210 tb_buffer_sanity_check(tb->tb_sb,
2212 p_s_tb->CFL[i], 2211 tb->CFL[i],
2213 "CFL", i); 2212 "CFL", i);
2214 if (!clear_all_dirty_bits 2213 if (!clear_all_dirty_bits
2215 (p_s_tb->tb_sb, p_s_tb->CFL[i])) 2214 (tb->tb_sb, tb->CFL[i]))
2216 locked = p_s_tb->CFL[i]; 2215 locked = tb->CFL[i];
2217 } 2216 }
2218 2217
2219 } 2218 }
2220 2219
2221 if (!locked && (p_s_tb->rnum[i])) { 2220 if (!locked && (tb->rnum[i])) {
2222 2221
2223 if (p_s_tb->R[i]) { 2222 if (tb->R[i]) {
2224 tb_buffer_sanity_check(p_s_tb->tb_sb, 2223 tb_buffer_sanity_check(tb->tb_sb,
2225 p_s_tb->R[i], 2224 tb->R[i],
2226 "R", i); 2225 "R", i);
2227 if (!clear_all_dirty_bits 2226 if (!clear_all_dirty_bits
2228 (p_s_tb->tb_sb, p_s_tb->R[i])) 2227 (tb->tb_sb, tb->R[i]))
2229 locked = p_s_tb->R[i]; 2228 locked = tb->R[i];
2230 } 2229 }
2231 2230
2232 if (!locked && p_s_tb->FR[i]) { 2231 if (!locked && tb->FR[i]) {
2233 tb_buffer_sanity_check(p_s_tb->tb_sb, 2232 tb_buffer_sanity_check(tb->tb_sb,
2234 p_s_tb->FR[i], 2233 tb->FR[i],
2235 "FR", i); 2234 "FR", i);
2236 if (!clear_all_dirty_bits 2235 if (!clear_all_dirty_bits
2237 (p_s_tb->tb_sb, p_s_tb->FR[i])) 2236 (tb->tb_sb, tb->FR[i]))
2238 locked = p_s_tb->FR[i]; 2237 locked = tb->FR[i];
2239 } 2238 }
2240 2239
2241 if (!locked && p_s_tb->CFR[i]) { 2240 if (!locked && tb->CFR[i]) {
2242 tb_buffer_sanity_check(p_s_tb->tb_sb, 2241 tb_buffer_sanity_check(tb->tb_sb,
2243 p_s_tb->CFR[i], 2242 tb->CFR[i],
2244 "CFR", i); 2243 "CFR", i);
2245 if (!clear_all_dirty_bits 2244 if (!clear_all_dirty_bits
2246 (p_s_tb->tb_sb, p_s_tb->CFR[i])) 2245 (tb->tb_sb, tb->CFR[i]))
2247 locked = p_s_tb->CFR[i]; 2246 locked = tb->CFR[i];
2248 } 2247 }
2249 } 2248 }
2250 } 2249 }
@@ -2257,10 +2256,10 @@ static int wait_tb_buffers_until_unlocked(struct tree_balance *p_s_tb)
2257 ** --clm 2256 ** --clm
2258 */ 2257 */
2259 for (i = 0; !locked && i < MAX_FEB_SIZE; i++) { 2258 for (i = 0; !locked && i < MAX_FEB_SIZE; i++) {
2260 if (p_s_tb->FEB[i]) { 2259 if (tb->FEB[i]) {
2261 if (!clear_all_dirty_bits 2260 if (!clear_all_dirty_bits
2262 (p_s_tb->tb_sb, p_s_tb->FEB[i])) 2261 (tb->tb_sb, tb->FEB[i]))
2263 locked = p_s_tb->FEB[i]; 2262 locked = tb->FEB[i];
2264 } 2263 }
2265 } 2264 }
2266 2265
@@ -2268,21 +2267,20 @@ static int wait_tb_buffers_until_unlocked(struct tree_balance *p_s_tb)
2268#ifdef CONFIG_REISERFS_CHECK 2267#ifdef CONFIG_REISERFS_CHECK
2269 repeat_counter++; 2268 repeat_counter++;
2270 if ((repeat_counter % 10000) == 0) { 2269 if ((repeat_counter % 10000) == 0) {
2271 reiserfs_warning(p_s_tb->tb_sb, 2270 reiserfs_warning(tb->tb_sb, "reiserfs-8200",
2272 "wait_tb_buffers_until_released(): too many " 2271 "too many iterations waiting "
2273 "iterations waiting for buffer to unlock " 2272 "for buffer to unlock "
2274 "(%b)", locked); 2273 "(%b)", locked);
2275 2274
2276 /* Don't loop forever. Try to recover from possible error. */ 2275 /* Don't loop forever. Try to recover from possible error. */
2277 2276
2278 return (FILESYSTEM_CHANGED_TB(p_s_tb)) ? 2277 return (FILESYSTEM_CHANGED_TB(tb)) ?
2279 REPEAT_SEARCH : CARRY_ON; 2278 REPEAT_SEARCH : CARRY_ON;
2280 } 2279 }
2281#endif 2280#endif
2282 __wait_on_buffer(locked); 2281 __wait_on_buffer(locked);
2283 if (FILESYSTEM_CHANGED_TB(p_s_tb)) { 2282 if (FILESYSTEM_CHANGED_TB(tb))
2284 return REPEAT_SEARCH; 2283 return REPEAT_SEARCH;
2285 }
2286 } 2284 }
2287 2285
2288 } while (locked); 2286 } while (locked);
@@ -2295,15 +2293,15 @@ static int wait_tb_buffers_until_unlocked(struct tree_balance *p_s_tb)
2295 * analyze what and where should be moved; 2293 * analyze what and where should be moved;
2296 * get sufficient number of new nodes; 2294 * get sufficient number of new nodes;
2297 * Balancing will start only after all resources will be collected at a time. 2295 * Balancing will start only after all resources will be collected at a time.
2298 * 2296 *
2299 * When ported to SMP kernels, only at the last moment after all needed nodes 2297 * When ported to SMP kernels, only at the last moment after all needed nodes
2300 * are collected in cache, will the resources be locked using the usual 2298 * are collected in cache, will the resources be locked using the usual
2301 * textbook ordered lock acquisition algorithms. Note that ensuring that 2299 * textbook ordered lock acquisition algorithms. Note that ensuring that
2302 * this code neither write locks what it does not need to write lock nor locks out of order 2300 * this code neither write locks what it does not need to write lock nor locks out of order
2303 * will be a pain in the butt that could have been avoided. Grumble grumble. -Hans 2301 * will be a pain in the butt that could have been avoided. Grumble grumble. -Hans
2304 * 2302 *
2305 * fix is meant in the sense of render unchanging 2303 * fix is meant in the sense of render unchanging
2306 * 2304 *
2307 * Latency might be improved by first gathering a list of what buffers are needed 2305 * Latency might be improved by first gathering a list of what buffers are needed
2308 * and then getting as many of them in parallel as possible? -Hans 2306 * and then getting as many of them in parallel as possible? -Hans
2309 * 2307 *
@@ -2312,159 +2310,160 @@ static int wait_tb_buffers_until_unlocked(struct tree_balance *p_s_tb)
2312 * tb tree_balance structure; 2310 * tb tree_balance structure;
2313 * inum item number in S[h]; 2311 * inum item number in S[h];
2314 * pos_in_item - comment this if you can 2312 * pos_in_item - comment this if you can
2315 * ins_ih & ins_sd are used when inserting 2313 * ins_ih item head of item being inserted
2314 * data inserted item or data to be pasted
2316 * Returns: 1 - schedule occurred while the function worked; 2315 * Returns: 1 - schedule occurred while the function worked;
2317 * 0 - schedule didn't occur while the function worked; 2316 * 0 - schedule didn't occur while the function worked;
2318 * -1 - if no_disk_space 2317 * -1 - if no_disk_space
2319 */ 2318 */
2320 2319
2321int fix_nodes(int n_op_mode, struct tree_balance *p_s_tb, struct item_head *p_s_ins_ih, // item head of item being inserted 2320int fix_nodes(int op_mode, struct tree_balance *tb,
2322 const void *data // inserted item or data to be pasted 2321 struct item_head *ins_ih, const void *data)
2323 )
2324{ 2322{
2325 int n_ret_value, n_h, n_item_num = PATH_LAST_POSITION(p_s_tb->tb_path); 2323 int ret, h, item_num = PATH_LAST_POSITION(tb->tb_path);
2326 int n_pos_in_item; 2324 int pos_in_item;
2327 2325
2328 /* we set wait_tb_buffers_run when we have to restore any dirty bits cleared 2326 /* we set wait_tb_buffers_run when we have to restore any dirty bits cleared
2329 ** during wait_tb_buffers_run 2327 ** during wait_tb_buffers_run
2330 */ 2328 */
2331 int wait_tb_buffers_run = 0; 2329 int wait_tb_buffers_run = 0;
2332 struct buffer_head *p_s_tbS0 = PATH_PLAST_BUFFER(p_s_tb->tb_path); 2330 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
2333 2331
2334 ++REISERFS_SB(p_s_tb->tb_sb)->s_fix_nodes; 2332 ++REISERFS_SB(tb->tb_sb)->s_fix_nodes;
2335 2333
2336 n_pos_in_item = p_s_tb->tb_path->pos_in_item; 2334 pos_in_item = tb->tb_path->pos_in_item;
2337 2335
2338 p_s_tb->fs_gen = get_generation(p_s_tb->tb_sb); 2336 tb->fs_gen = get_generation(tb->tb_sb);
2339 2337
2340 /* we prepare and log the super here so it will already be in the 2338 /* we prepare and log the super here so it will already be in the
2341 ** transaction when do_balance needs to change it. 2339 ** transaction when do_balance needs to change it.
2342 ** This way do_balance won't have to schedule when trying to prepare 2340 ** This way do_balance won't have to schedule when trying to prepare
2343 ** the super for logging 2341 ** the super for logging
2344 */ 2342 */
2345 reiserfs_prepare_for_journal(p_s_tb->tb_sb, 2343 reiserfs_prepare_for_journal(tb->tb_sb,
2346 SB_BUFFER_WITH_SB(p_s_tb->tb_sb), 1); 2344 SB_BUFFER_WITH_SB(tb->tb_sb), 1);
2347 journal_mark_dirty(p_s_tb->transaction_handle, p_s_tb->tb_sb, 2345 journal_mark_dirty(tb->transaction_handle, tb->tb_sb,
2348 SB_BUFFER_WITH_SB(p_s_tb->tb_sb)); 2346 SB_BUFFER_WITH_SB(tb->tb_sb));
2349 if (FILESYSTEM_CHANGED_TB(p_s_tb)) 2347 if (FILESYSTEM_CHANGED_TB(tb))
2350 return REPEAT_SEARCH; 2348 return REPEAT_SEARCH;
2351 2349
2352 /* if it possible in indirect_to_direct conversion */ 2350 /* if it possible in indirect_to_direct conversion */
2353 if (buffer_locked(p_s_tbS0)) { 2351 if (buffer_locked(tbS0)) {
2354 __wait_on_buffer(p_s_tbS0); 2352 __wait_on_buffer(tbS0);
2355 if (FILESYSTEM_CHANGED_TB(p_s_tb)) 2353 if (FILESYSTEM_CHANGED_TB(tb))
2356 return REPEAT_SEARCH; 2354 return REPEAT_SEARCH;
2357 } 2355 }
2358#ifdef CONFIG_REISERFS_CHECK 2356#ifdef CONFIG_REISERFS_CHECK
2359 if (cur_tb) { 2357 if (cur_tb) {
2360 print_cur_tb("fix_nodes"); 2358 print_cur_tb("fix_nodes");
2361 reiserfs_panic(p_s_tb->tb_sb, 2359 reiserfs_panic(tb->tb_sb, "PAP-8305",
2362 "PAP-8305: fix_nodes: there is pending do_balance"); 2360 "there is pending do_balance");
2363 } 2361 }
2364 2362
2365 if (!buffer_uptodate(p_s_tbS0) || !B_IS_IN_TREE(p_s_tbS0)) { 2363 if (!buffer_uptodate(tbS0) || !B_IS_IN_TREE(tbS0))
2366 reiserfs_panic(p_s_tb->tb_sb, 2364 reiserfs_panic(tb->tb_sb, "PAP-8320", "S[0] (%b %z) is "
2367 "PAP-8320: fix_nodes: S[0] (%b %z) is not uptodate " 2365 "not uptodate at the beginning of fix_nodes "
2368 "at the beginning of fix_nodes or not in tree (mode %c)", 2366 "or not in tree (mode %c)",
2369 p_s_tbS0, p_s_tbS0, n_op_mode); 2367 tbS0, tbS0, op_mode);
2370 }
2371 2368
2372 /* Check parameters. */ 2369 /* Check parameters. */
2373 switch (n_op_mode) { 2370 switch (op_mode) {
2374 case M_INSERT: 2371 case M_INSERT:
2375 if (n_item_num <= 0 || n_item_num > B_NR_ITEMS(p_s_tbS0)) 2372 if (item_num <= 0 || item_num > B_NR_ITEMS(tbS0))
2376 reiserfs_panic(p_s_tb->tb_sb, 2373 reiserfs_panic(tb->tb_sb, "PAP-8330", "Incorrect "
2377 "PAP-8330: fix_nodes: Incorrect item number %d (in S0 - %d) in case of insert", 2374 "item number %d (in S0 - %d) in case "
2378 n_item_num, B_NR_ITEMS(p_s_tbS0)); 2375 "of insert", item_num,
2376 B_NR_ITEMS(tbS0));
2379 break; 2377 break;
2380 case M_PASTE: 2378 case M_PASTE:
2381 case M_DELETE: 2379 case M_DELETE:
2382 case M_CUT: 2380 case M_CUT:
2383 if (n_item_num < 0 || n_item_num >= B_NR_ITEMS(p_s_tbS0)) { 2381 if (item_num < 0 || item_num >= B_NR_ITEMS(tbS0)) {
2384 print_block(p_s_tbS0, 0, -1, -1); 2382 print_block(tbS0, 0, -1, -1);
2385 reiserfs_panic(p_s_tb->tb_sb, 2383 reiserfs_panic(tb->tb_sb, "PAP-8335", "Incorrect "
2386 "PAP-8335: fix_nodes: Incorrect item number(%d); mode = %c insert_size = %d\n", 2384 "item number(%d); mode = %c "
2387 n_item_num, n_op_mode, 2385 "insert_size = %d",
2388 p_s_tb->insert_size[0]); 2386 item_num, op_mode,
2387 tb->insert_size[0]);
2389 } 2388 }
2390 break; 2389 break;
2391 default: 2390 default:
2392 reiserfs_panic(p_s_tb->tb_sb, 2391 reiserfs_panic(tb->tb_sb, "PAP-8340", "Incorrect mode "
2393 "PAP-8340: fix_nodes: Incorrect mode of operation"); 2392 "of operation");
2394 } 2393 }
2395#endif 2394#endif
2396 2395
2397 if (get_mem_for_virtual_node(p_s_tb) == REPEAT_SEARCH) 2396 if (get_mem_for_virtual_node(tb) == REPEAT_SEARCH)
2398 // FIXME: maybe -ENOMEM when tb->vn_buf == 0? Now just repeat 2397 // FIXME: maybe -ENOMEM when tb->vn_buf == 0? Now just repeat
2399 return REPEAT_SEARCH; 2398 return REPEAT_SEARCH;
2400 2399
2401 /* Starting from the leaf level; for all levels n_h of the tree. */ 2400 /* Starting from the leaf level; for all levels h of the tree. */
2402 for (n_h = 0; n_h < MAX_HEIGHT && p_s_tb->insert_size[n_h]; n_h++) { 2401 for (h = 0; h < MAX_HEIGHT && tb->insert_size[h]; h++) {
2403 if ((n_ret_value = get_direct_parent(p_s_tb, n_h)) != CARRY_ON) { 2402 ret = get_direct_parent(tb, h);
2403 if (ret != CARRY_ON)
2404 goto repeat; 2404 goto repeat;
2405 }
2406 2405
2407 if ((n_ret_value = 2406 ret = check_balance(op_mode, tb, h, item_num,
2408 check_balance(n_op_mode, p_s_tb, n_h, n_item_num, 2407 pos_in_item, ins_ih, data);
2409 n_pos_in_item, p_s_ins_ih, 2408 if (ret != CARRY_ON) {
2410 data)) != CARRY_ON) { 2409 if (ret == NO_BALANCING_NEEDED) {
2411 if (n_ret_value == NO_BALANCING_NEEDED) {
2412 /* No balancing for higher levels needed. */ 2410 /* No balancing for higher levels needed. */
2413 if ((n_ret_value = 2411 ret = get_neighbors(tb, h);
2414 get_neighbors(p_s_tb, n_h)) != CARRY_ON) { 2412 if (ret != CARRY_ON)
2415 goto repeat; 2413 goto repeat;
2416 } 2414 if (h != MAX_HEIGHT - 1)
2417 if (n_h != MAX_HEIGHT - 1) 2415 tb->insert_size[h + 1] = 0;
2418 p_s_tb->insert_size[n_h + 1] = 0;
2419 /* ok, analysis and resource gathering are complete */ 2416 /* ok, analysis and resource gathering are complete */
2420 break; 2417 break;
2421 } 2418 }
2422 goto repeat; 2419 goto repeat;
2423 } 2420 }
2424 2421
2425 if ((n_ret_value = get_neighbors(p_s_tb, n_h)) != CARRY_ON) { 2422 ret = get_neighbors(tb, h);
2423 if (ret != CARRY_ON)
2426 goto repeat; 2424 goto repeat;
2427 }
2428 2425
2429 if ((n_ret_value = get_empty_nodes(p_s_tb, n_h)) != CARRY_ON) { 2426 /* No disk space, or schedule occurred and analysis may be
2430 goto repeat; /* No disk space, or schedule occurred and 2427 * invalid and needs to be redone. */
2431 analysis may be invalid and needs to be redone. */ 2428 ret = get_empty_nodes(tb, h);
2432 } 2429 if (ret != CARRY_ON)
2430 goto repeat;
2433 2431
2434 if (!PATH_H_PBUFFER(p_s_tb->tb_path, n_h)) { 2432 if (!PATH_H_PBUFFER(tb->tb_path, h)) {
2435 /* We have a positive insert size but no nodes exist on this 2433 /* We have a positive insert size but no nodes exist on this
2436 level, this means that we are creating a new root. */ 2434 level, this means that we are creating a new root. */
2437 2435
2438 RFALSE(p_s_tb->blknum[n_h] != 1, 2436 RFALSE(tb->blknum[h] != 1,
2439 "PAP-8350: creating new empty root"); 2437 "PAP-8350: creating new empty root");
2440 2438
2441 if (n_h < MAX_HEIGHT - 1) 2439 if (h < MAX_HEIGHT - 1)
2442 p_s_tb->insert_size[n_h + 1] = 0; 2440 tb->insert_size[h + 1] = 0;
2443 } else if (!PATH_H_PBUFFER(p_s_tb->tb_path, n_h + 1)) { 2441 } else if (!PATH_H_PBUFFER(tb->tb_path, h + 1)) {
2444 if (p_s_tb->blknum[n_h] > 1) { 2442 if (tb->blknum[h] > 1) {
2445 /* The tree needs to be grown, so this node S[n_h] 2443 /* The tree needs to be grown, so this node S[h]
2446 which is the root node is split into two nodes, 2444 which is the root node is split into two nodes,
2447 and a new node (S[n_h+1]) will be created to 2445 and a new node (S[h+1]) will be created to
2448 become the root node. */ 2446 become the root node. */
2449 2447
2450 RFALSE(n_h == MAX_HEIGHT - 1, 2448 RFALSE(h == MAX_HEIGHT - 1,
2451 "PAP-8355: attempt to create too high of a tree"); 2449 "PAP-8355: attempt to create too high of a tree");
2452 2450
2453 p_s_tb->insert_size[n_h + 1] = 2451 tb->insert_size[h + 1] =
2454 (DC_SIZE + 2452 (DC_SIZE +
2455 KEY_SIZE) * (p_s_tb->blknum[n_h] - 1) + 2453 KEY_SIZE) * (tb->blknum[h] - 1) +
2456 DC_SIZE; 2454 DC_SIZE;
2457 } else if (n_h < MAX_HEIGHT - 1) 2455 } else if (h < MAX_HEIGHT - 1)
2458 p_s_tb->insert_size[n_h + 1] = 0; 2456 tb->insert_size[h + 1] = 0;
2459 } else 2457 } else
2460 p_s_tb->insert_size[n_h + 1] = 2458 tb->insert_size[h + 1] =
2461 (DC_SIZE + KEY_SIZE) * (p_s_tb->blknum[n_h] - 1); 2459 (DC_SIZE + KEY_SIZE) * (tb->blknum[h] - 1);
2462 } 2460 }
2463 2461
2464 if ((n_ret_value = wait_tb_buffers_until_unlocked(p_s_tb)) == CARRY_ON) { 2462 ret = wait_tb_buffers_until_unlocked(tb);
2465 if (FILESYSTEM_CHANGED_TB(p_s_tb)) { 2463 if (ret == CARRY_ON) {
2464 if (FILESYSTEM_CHANGED_TB(tb)) {
2466 wait_tb_buffers_run = 1; 2465 wait_tb_buffers_run = 1;
2467 n_ret_value = REPEAT_SEARCH; 2466 ret = REPEAT_SEARCH;
2468 goto repeat; 2467 goto repeat;
2469 } else { 2468 } else {
2470 return CARRY_ON; 2469 return CARRY_ON;
@@ -2485,57 +2484,57 @@ int fix_nodes(int n_op_mode, struct tree_balance *p_s_tb, struct item_head *p_s_
2485 2484
2486 /* Release path buffers. */ 2485 /* Release path buffers. */
2487 if (wait_tb_buffers_run) { 2486 if (wait_tb_buffers_run) {
2488 pathrelse_and_restore(p_s_tb->tb_sb, p_s_tb->tb_path); 2487 pathrelse_and_restore(tb->tb_sb, tb->tb_path);
2489 } else { 2488 } else {
2490 pathrelse(p_s_tb->tb_path); 2489 pathrelse(tb->tb_path);
2491 } 2490 }
2492 /* brelse all resources collected for balancing */ 2491 /* brelse all resources collected for balancing */
2493 for (i = 0; i < MAX_HEIGHT; i++) { 2492 for (i = 0; i < MAX_HEIGHT; i++) {
2494 if (wait_tb_buffers_run) { 2493 if (wait_tb_buffers_run) {
2495 reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, 2494 reiserfs_restore_prepared_buffer(tb->tb_sb,
2496 p_s_tb->L[i]); 2495 tb->L[i]);
2497 reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, 2496 reiserfs_restore_prepared_buffer(tb->tb_sb,
2498 p_s_tb->R[i]); 2497 tb->R[i]);
2499 reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, 2498 reiserfs_restore_prepared_buffer(tb->tb_sb,
2500 p_s_tb->FL[i]); 2499 tb->FL[i]);
2501 reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, 2500 reiserfs_restore_prepared_buffer(tb->tb_sb,
2502 p_s_tb->FR[i]); 2501 tb->FR[i]);
2503 reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, 2502 reiserfs_restore_prepared_buffer(tb->tb_sb,
2504 p_s_tb-> 2503 tb->
2505 CFL[i]); 2504 CFL[i]);
2506 reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, 2505 reiserfs_restore_prepared_buffer(tb->tb_sb,
2507 p_s_tb-> 2506 tb->
2508 CFR[i]); 2507 CFR[i]);
2509 } 2508 }
2510 2509
2511 brelse(p_s_tb->L[i]); 2510 brelse(tb->L[i]);
2512 p_s_tb->L[i] = NULL; 2511 brelse(tb->R[i]);
2513 brelse(p_s_tb->R[i]); 2512 brelse(tb->FL[i]);
2514 p_s_tb->R[i] = NULL; 2513 brelse(tb->FR[i]);
2515 brelse(p_s_tb->FL[i]); 2514 brelse(tb->CFL[i]);
2516 p_s_tb->FL[i] = NULL; 2515 brelse(tb->CFR[i]);
2517 brelse(p_s_tb->FR[i]); 2516
2518 p_s_tb->FR[i] = NULL; 2517 tb->L[i] = NULL;
2519 brelse(p_s_tb->CFL[i]); 2518 tb->R[i] = NULL;
2520 p_s_tb->CFL[i] = NULL; 2519 tb->FL[i] = NULL;
2521 brelse(p_s_tb->CFR[i]); 2520 tb->FR[i] = NULL;
2522 p_s_tb->CFR[i] = NULL; 2521 tb->CFL[i] = NULL;
2522 tb->CFR[i] = NULL;
2523 } 2523 }
2524 2524
2525 if (wait_tb_buffers_run) { 2525 if (wait_tb_buffers_run) {
2526 for (i = 0; i < MAX_FEB_SIZE; i++) { 2526 for (i = 0; i < MAX_FEB_SIZE; i++) {
2527 if (p_s_tb->FEB[i]) { 2527 if (tb->FEB[i])
2528 reiserfs_restore_prepared_buffer 2528 reiserfs_restore_prepared_buffer
2529 (p_s_tb->tb_sb, p_s_tb->FEB[i]); 2529 (tb->tb_sb, tb->FEB[i]);
2530 }
2531 } 2530 }
2532 } 2531 }
2533 return n_ret_value; 2532 return ret;
2534 } 2533 }
2535 2534
2536} 2535}
2537 2536
2538/* Anatoly will probably forgive me renaming p_s_tb to tb. I just 2537/* Anatoly will probably forgive me renaming tb to tb. I just
2539 wanted to make lines shorter */ 2538 wanted to make lines shorter */
2540void unfix_nodes(struct tree_balance *tb) 2539void unfix_nodes(struct tree_balance *tb)
2541{ 2540{
diff --git a/fs/reiserfs/hashes.c b/fs/reiserfs/hashes.c
index e664ac16fad9..6471c670743e 100644
--- a/fs/reiserfs/hashes.c
+++ b/fs/reiserfs/hashes.c
@@ -7,7 +7,7 @@
7 * (see Applied Cryptography, 2nd edition, p448). 7 * (see Applied Cryptography, 2nd edition, p448).
8 * 8 *
9 * Jeremy Fitzhardinge <jeremy@zip.com.au> 1998 9 * Jeremy Fitzhardinge <jeremy@zip.com.au> 1998
10 * 10 *
11 * Jeremy has agreed to the contents of reiserfs/README. -Hans 11 * Jeremy has agreed to the contents of reiserfs/README. -Hans
12 * Yura's function is added (04/07/2000) 12 * Yura's function is added (04/07/2000)
13 */ 13 */
diff --git a/fs/reiserfs/ibalance.c b/fs/reiserfs/ibalance.c
index de391a82b999..2074fd95046b 100644
--- a/fs/reiserfs/ibalance.c
+++ b/fs/reiserfs/ibalance.c
@@ -105,8 +105,8 @@ static void internal_define_dest_src_infos(int shift_mode,
105 break; 105 break;
106 106
107 default: 107 default:
108 reiserfs_panic(tb->tb_sb, 108 reiserfs_panic(tb->tb_sb, "ibalance-1",
109 "internal_define_dest_src_infos: shift type is unknown (%d)", 109 "shift type is unknown (%d)",
110 shift_mode); 110 shift_mode);
111 } 111 }
112} 112}
@@ -278,7 +278,7 @@ static void internal_delete_childs(struct buffer_info *cur_bi, int from, int n)
278 278
279/* copy cpy_num node pointers and cpy_num - 1 items from buffer src to buffer dest 279/* copy cpy_num node pointers and cpy_num - 1 items from buffer src to buffer dest
280* last_first == FIRST_TO_LAST means, that we copy first items from src to tail of dest 280* last_first == FIRST_TO_LAST means, that we copy first items from src to tail of dest
281 * last_first == LAST_TO_FIRST means, that we copy last items from src to head of dest 281 * last_first == LAST_TO_FIRST means, that we copy last items from src to head of dest
282 */ 282 */
283static void internal_copy_pointers_items(struct buffer_info *dest_bi, 283static void internal_copy_pointers_items(struct buffer_info *dest_bi,
284 struct buffer_head *src, 284 struct buffer_head *src,
@@ -385,7 +385,7 @@ static void internal_move_pointers_items(struct buffer_info *dest_bi,
385 if (last_first == FIRST_TO_LAST) { /* shift_left occurs */ 385 if (last_first == FIRST_TO_LAST) { /* shift_left occurs */
386 first_pointer = 0; 386 first_pointer = 0;
387 first_item = 0; 387 first_item = 0;
388 /* delete cpy_num - del_par pointers and keys starting for pointers with first_pointer, 388 /* delete cpy_num - del_par pointers and keys starting for pointers with first_pointer,
389 for key - with first_item */ 389 for key - with first_item */
390 internal_delete_pointers_items(src_bi, first_pointer, 390 internal_delete_pointers_items(src_bi, first_pointer,
391 first_item, cpy_num - del_par); 391 first_item, cpy_num - del_par);
@@ -453,7 +453,7 @@ static void internal_insert_key(struct buffer_info *dest_bi, int dest_position_b
453 } 453 }
454} 454}
455 455
456/* Insert d_key'th (delimiting) key from buffer cfl to tail of dest. 456/* Insert d_key'th (delimiting) key from buffer cfl to tail of dest.
457 * Copy pointer_amount node pointers and pointer_amount - 1 items from buffer src to buffer dest. 457 * Copy pointer_amount node pointers and pointer_amount - 1 items from buffer src to buffer dest.
458 * Replace d_key'th key in buffer cfl. 458 * Replace d_key'th key in buffer cfl.
459 * Delete pointer_amount items and node pointers from buffer src. 459 * Delete pointer_amount items and node pointers from buffer src.
@@ -518,7 +518,7 @@ static void internal_shift1_left(struct tree_balance *tb,
518 /* internal_move_pointers_items (tb->L[h], tb->S[h], FIRST_TO_LAST, pointer_amount, 1); */ 518 /* internal_move_pointers_items (tb->L[h], tb->S[h], FIRST_TO_LAST, pointer_amount, 1); */
519} 519}
520 520
521/* Insert d_key'th (delimiting) key from buffer cfr to head of dest. 521/* Insert d_key'th (delimiting) key from buffer cfr to head of dest.
522 * Copy n node pointers and n - 1 items from buffer src to buffer dest. 522 * Copy n node pointers and n - 1 items from buffer src to buffer dest.
523 * Replace d_key'th key in buffer cfr. 523 * Replace d_key'th key in buffer cfr.
524 * Delete n items and node pointers from buffer src. 524 * Delete n items and node pointers from buffer src.
@@ -702,8 +702,8 @@ static void balance_internal_when_delete(struct tree_balance *tb,
702 702
703 return; 703 return;
704 } 704 }
705 reiserfs_panic(tb->tb_sb, 705 reiserfs_panic(tb->tb_sb, "ibalance-2",
706 "balance_internal_when_delete: unexpected tb->lnum[%d]==%d or tb->rnum[%d]==%d", 706 "unexpected tb->lnum[%d]==%d or tb->rnum[%d]==%d",
707 h, tb->lnum[h], h, tb->rnum[h]); 707 h, tb->lnum[h], h, tb->rnum[h]);
708} 708}
709 709
@@ -749,7 +749,7 @@ int balance_internal(struct tree_balance *tb, /* tree_balance structure
749 this means that new pointers and items must be inserted AFTER * 749 this means that new pointers and items must be inserted AFTER *
750 child_pos 750 child_pos
751 } 751 }
752 else 752 else
753 { 753 {
754 it is the position of the leftmost pointer that must be deleted (together with 754 it is the position of the leftmost pointer that must be deleted (together with
755 its corresponding key to the left of the pointer) 755 its corresponding key to the left of the pointer)
@@ -940,8 +940,8 @@ int balance_internal(struct tree_balance *tb, /* tree_balance structure
940 struct block_head *blkh; 940 struct block_head *blkh;
941 941
942 if (tb->blknum[h] != 1) 942 if (tb->blknum[h] != 1)
943 reiserfs_panic(NULL, 943 reiserfs_panic(NULL, "ibalance-3", "One new node "
944 "balance_internal: One new node required for creating the new root"); 944 "required for creating the new root");
945 /* S[h] = empty buffer from the list FEB. */ 945 /* S[h] = empty buffer from the list FEB. */
946 tbSh = get_FEB(tb); 946 tbSh = get_FEB(tb);
947 blkh = B_BLK_HEAD(tbSh); 947 blkh = B_BLK_HEAD(tbSh);
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 823227a7662a..6fd0f47e45db 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -363,7 +363,7 @@ static int _get_block_create_0(struct inode *inode, sector_t block,
363 } 363 }
364 /* make sure we don't read more bytes than actually exist in 364 /* make sure we don't read more bytes than actually exist in
365 ** the file. This can happen in odd cases where i_size isn't 365 ** the file. This can happen in odd cases where i_size isn't
366 ** correct, and when direct item padding results in a few 366 ** correct, and when direct item padding results in a few
367 ** extra bytes at the end of the direct item 367 ** extra bytes at the end of the direct item
368 */ 368 */
369 if ((le_ih_k_offset(ih) + path.pos_in_item) > inode->i_size) 369 if ((le_ih_k_offset(ih) + path.pos_in_item) > inode->i_size)
@@ -438,15 +438,15 @@ static int reiserfs_bmap(struct inode *inode, sector_t block,
438** -ENOENT instead of a valid buffer. block_prepare_write expects to 438** -ENOENT instead of a valid buffer. block_prepare_write expects to
439** be able to do i/o on the buffers returned, unless an error value 439** be able to do i/o on the buffers returned, unless an error value
440** is also returned. 440** is also returned.
441** 441**
442** So, this allows block_prepare_write to be used for reading a single block 442** So, this allows block_prepare_write to be used for reading a single block
443** in a page. Where it does not produce a valid page for holes, or past the 443** in a page. Where it does not produce a valid page for holes, or past the
444** end of the file. This turns out to be exactly what we need for reading 444** end of the file. This turns out to be exactly what we need for reading
445** tails for conversion. 445** tails for conversion.
446** 446**
447** The point of the wrapper is forcing a certain value for create, even 447** The point of the wrapper is forcing a certain value for create, even
448** though the VFS layer is calling this function with create==1. If you 448** though the VFS layer is calling this function with create==1. If you
449** don't want to send create == GET_BLOCK_NO_HOLE to reiserfs_get_block, 449** don't want to send create == GET_BLOCK_NO_HOLE to reiserfs_get_block,
450** don't use this function. 450** don't use this function.
451*/ 451*/
452static int reiserfs_get_block_create_0(struct inode *inode, sector_t block, 452static int reiserfs_get_block_create_0(struct inode *inode, sector_t block,
@@ -602,7 +602,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
602 int done; 602 int done;
603 int fs_gen; 603 int fs_gen;
604 struct reiserfs_transaction_handle *th = NULL; 604 struct reiserfs_transaction_handle *th = NULL;
605 /* space reserved in transaction batch: 605 /* space reserved in transaction batch:
606 . 3 balancings in direct->indirect conversion 606 . 3 balancings in direct->indirect conversion
607 . 1 block involved into reiserfs_update_sd() 607 . 1 block involved into reiserfs_update_sd()
608 XXX in practically impossible worst case direct2indirect() 608 XXX in practically impossible worst case direct2indirect()
@@ -754,7 +754,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
754 reiserfs_write_unlock(inode->i_sb); 754 reiserfs_write_unlock(inode->i_sb);
755 755
756 /* the item was found, so new blocks were not added to the file 756 /* the item was found, so new blocks were not added to the file
757 ** there is no need to make sure the inode is updated with this 757 ** there is no need to make sure the inode is updated with this
758 ** transaction 758 ** transaction
759 */ 759 */
760 return retval; 760 return retval;
@@ -841,10 +841,12 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
841 tail_offset); 841 tail_offset);
842 if (retval) { 842 if (retval) {
843 if (retval != -ENOSPC) 843 if (retval != -ENOSPC)
844 reiserfs_warning(inode->i_sb, 844 reiserfs_error(inode->i_sb,
845 "clm-6004: convert tail failed inode %lu, error %d", 845 "clm-6004",
846 inode->i_ino, 846 "convert tail failed "
847 retval); 847 "inode %lu, error %d",
848 inode->i_ino,
849 retval);
848 if (allocated_block_nr) { 850 if (allocated_block_nr) {
849 /* the bitmap, the super, and the stat data == 3 */ 851 /* the bitmap, the super, and the stat data == 3 */
850 if (!th) 852 if (!th)
@@ -984,7 +986,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
984 986
985 /* this loop could log more blocks than we had originally asked 987 /* this loop could log more blocks than we had originally asked
986 ** for. So, we have to allow the transaction to end if it is 988 ** for. So, we have to allow the transaction to end if it is
987 ** too big or too full. Update the inode so things are 989 ** too big or too full. Update the inode so things are
988 ** consistent if we crash before the function returns 990 ** consistent if we crash before the function returns
989 ** 991 **
990 ** release the path so that anybody waiting on the path before 992 ** release the path so that anybody waiting on the path before
@@ -995,7 +997,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
995 if (retval) 997 if (retval)
996 goto failure; 998 goto failure;
997 } 999 }
998 /* inserting indirect pointers for a hole can take a 1000 /* inserting indirect pointers for a hole can take a
999 ** long time. reschedule if needed 1001 ** long time. reschedule if needed
1000 */ 1002 */
1001 cond_resched(); 1003 cond_resched();
@@ -1006,8 +1008,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
1006 goto failure; 1008 goto failure;
1007 } 1009 }
1008 if (retval == POSITION_FOUND) { 1010 if (retval == POSITION_FOUND) {
1009 reiserfs_warning(inode->i_sb, 1011 reiserfs_warning(inode->i_sb, "vs-825",
1010 "vs-825: reiserfs_get_block: "
1011 "%K should not be found", &key); 1012 "%K should not be found", &key);
1012 retval = -EEXIST; 1013 retval = -EEXIST;
1013 if (allocated_block_nr) 1014 if (allocated_block_nr)
@@ -1299,8 +1300,7 @@ static void update_stat_data(struct treepath *path, struct inode *inode,
1299 ih = PATH_PITEM_HEAD(path); 1300 ih = PATH_PITEM_HEAD(path);
1300 1301
1301 if (!is_statdata_le_ih(ih)) 1302 if (!is_statdata_le_ih(ih))
1302 reiserfs_panic(inode->i_sb, 1303 reiserfs_panic(inode->i_sb, "vs-13065", "key %k, found item %h",
1303 "vs-13065: update_stat_data: key %k, found item %h",
1304 INODE_PKEY(inode), ih); 1304 INODE_PKEY(inode), ih);
1305 1305
1306 if (stat_data_v1(ih)) { 1306 if (stat_data_v1(ih)) {
@@ -1332,10 +1332,9 @@ void reiserfs_update_sd_size(struct reiserfs_transaction_handle *th,
1332 /* look for the object's stat data */ 1332 /* look for the object's stat data */
1333 retval = search_item(inode->i_sb, &key, &path); 1333 retval = search_item(inode->i_sb, &key, &path);
1334 if (retval == IO_ERROR) { 1334 if (retval == IO_ERROR) {
1335 reiserfs_warning(inode->i_sb, 1335 reiserfs_error(inode->i_sb, "vs-13050",
1336 "vs-13050: reiserfs_update_sd: " 1336 "i/o failure occurred trying to "
1337 "i/o failure occurred trying to update %K stat data", 1337 "update %K stat data", &key);
1338 &key);
1339 return; 1338 return;
1340 } 1339 }
1341 if (retval == ITEM_NOT_FOUND) { 1340 if (retval == ITEM_NOT_FOUND) {
@@ -1345,9 +1344,9 @@ void reiserfs_update_sd_size(struct reiserfs_transaction_handle *th,
1345 /*reiserfs_warning (inode->i_sb, "vs-13050: reiserfs_update_sd: i_nlink == 0, stat data not found"); */ 1344 /*reiserfs_warning (inode->i_sb, "vs-13050: reiserfs_update_sd: i_nlink == 0, stat data not found"); */
1346 return; 1345 return;
1347 } 1346 }
1348 reiserfs_warning(inode->i_sb, 1347 reiserfs_warning(inode->i_sb, "vs-13060",
1349 "vs-13060: reiserfs_update_sd: " 1348 "stat data of object %k (nlink == %d) "
1350 "stat data of object %k (nlink == %d) not found (pos %d)", 1349 "not found (pos %d)",
1351 INODE_PKEY(inode), inode->i_nlink, 1350 INODE_PKEY(inode), inode->i_nlink,
1352 pos); 1351 pos);
1353 reiserfs_check_path(&path); 1352 reiserfs_check_path(&path);
@@ -1424,10 +1423,9 @@ void reiserfs_read_locked_inode(struct inode *inode,
1424 /* look for the object's stat data */ 1423 /* look for the object's stat data */
1425 retval = search_item(inode->i_sb, &key, &path_to_sd); 1424 retval = search_item(inode->i_sb, &key, &path_to_sd);
1426 if (retval == IO_ERROR) { 1425 if (retval == IO_ERROR) {
1427 reiserfs_warning(inode->i_sb, 1426 reiserfs_error(inode->i_sb, "vs-13070",
1428 "vs-13070: reiserfs_read_locked_inode: " 1427 "i/o failure occurred trying to find "
1429 "i/o failure occurred trying to find stat data of %K", 1428 "stat data of %K", &key);
1430 &key);
1431 reiserfs_make_bad_inode(inode); 1429 reiserfs_make_bad_inode(inode);
1432 return; 1430 return;
1433 } 1431 }
@@ -1446,7 +1444,7 @@ void reiserfs_read_locked_inode(struct inode *inode,
1446 update sd on unlink all that is required is to check for nlink 1444 update sd on unlink all that is required is to check for nlink
1447 here. This bug was first found by Sizif when debugging 1445 here. This bug was first found by Sizif when debugging
1448 SquidNG/Butterfly, forgotten, and found again after Philippe 1446 SquidNG/Butterfly, forgotten, and found again after Philippe
1449 Gramoulle <philippe.gramoulle@mmania.com> reproduced it. 1447 Gramoulle <philippe.gramoulle@mmania.com> reproduced it.
1450 1448
1451 More logical fix would require changes in fs/inode.c:iput() to 1449 More logical fix would require changes in fs/inode.c:iput() to
1452 remove inode from hash-table _after_ fs cleaned disk stuff up and 1450 remove inode from hash-table _after_ fs cleaned disk stuff up and
@@ -1457,8 +1455,7 @@ void reiserfs_read_locked_inode(struct inode *inode,
1457 during mount (fs/reiserfs/super.c:finish_unfinished()). */ 1455 during mount (fs/reiserfs/super.c:finish_unfinished()). */
1458 if ((inode->i_nlink == 0) && 1456 if ((inode->i_nlink == 0) &&
1459 !REISERFS_SB(inode->i_sb)->s_is_unlinked_ok) { 1457 !REISERFS_SB(inode->i_sb)->s_is_unlinked_ok) {
1460 reiserfs_warning(inode->i_sb, 1458 reiserfs_warning(inode->i_sb, "vs-13075",
1461 "vs-13075: reiserfs_read_locked_inode: "
1462 "dead inode read from disk %K. " 1459 "dead inode read from disk %K. "
1463 "This is likely to be race with knfsd. Ignore", 1460 "This is likely to be race with knfsd. Ignore",
1464 &key); 1461 &key);
@@ -1555,7 +1552,7 @@ struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
1555 */ 1552 */
1556 if (fh_type > fh_len) { 1553 if (fh_type > fh_len) {
1557 if (fh_type != 6 || fh_len != 5) 1554 if (fh_type != 6 || fh_len != 5)
1558 reiserfs_warning(sb, 1555 reiserfs_warning(sb, "reiserfs-13077",
1559 "nfsd/reiserfs, fhtype=%d, len=%d - odd", 1556 "nfsd/reiserfs, fhtype=%d, len=%d - odd",
1560 fh_type, fh_len); 1557 fh_type, fh_len);
1561 fh_type = 5; 1558 fh_type = 5;
@@ -1622,7 +1619,7 @@ int reiserfs_write_inode(struct inode *inode, int do_sync)
1622 if (inode->i_sb->s_flags & MS_RDONLY) 1619 if (inode->i_sb->s_flags & MS_RDONLY)
1623 return -EROFS; 1620 return -EROFS;
1624 /* memory pressure can sometimes initiate write_inode calls with sync == 1, 1621 /* memory pressure can sometimes initiate write_inode calls with sync == 1,
1625 ** these cases are just when the system needs ram, not when the 1622 ** these cases are just when the system needs ram, not when the
1626 ** inode needs to reach disk for safety, and they can safely be 1623 ** inode needs to reach disk for safety, and they can safely be
1627 ** ignored because the altered inode has already been logged. 1624 ** ignored because the altered inode has already been logged.
1628 */ 1625 */
@@ -1680,13 +1677,13 @@ static int reiserfs_new_directory(struct reiserfs_transaction_handle *th,
1680 /* look for place in the tree for new item */ 1677 /* look for place in the tree for new item */
1681 retval = search_item(sb, &key, path); 1678 retval = search_item(sb, &key, path);
1682 if (retval == IO_ERROR) { 1679 if (retval == IO_ERROR) {
1683 reiserfs_warning(sb, "vs-13080: reiserfs_new_directory: " 1680 reiserfs_error(sb, "vs-13080",
1684 "i/o failure occurred creating new directory"); 1681 "i/o failure occurred creating new directory");
1685 return -EIO; 1682 return -EIO;
1686 } 1683 }
1687 if (retval == ITEM_FOUND) { 1684 if (retval == ITEM_FOUND) {
1688 pathrelse(path); 1685 pathrelse(path);
1689 reiserfs_warning(sb, "vs-13070: reiserfs_new_directory: " 1686 reiserfs_warning(sb, "vs-13070",
1690 "object with this key exists (%k)", 1687 "object with this key exists (%k)",
1691 &(ih->ih_key)); 1688 &(ih->ih_key));
1692 return -EEXIST; 1689 return -EEXIST;
@@ -1720,13 +1717,13 @@ static int reiserfs_new_symlink(struct reiserfs_transaction_handle *th, struct i
1720 /* look for place in the tree for new item */ 1717 /* look for place in the tree for new item */
1721 retval = search_item(sb, &key, path); 1718 retval = search_item(sb, &key, path);
1722 if (retval == IO_ERROR) { 1719 if (retval == IO_ERROR) {
1723 reiserfs_warning(sb, "vs-13080: reiserfs_new_symlinik: " 1720 reiserfs_error(sb, "vs-13080",
1724 "i/o failure occurred creating new symlink"); 1721 "i/o failure occurred creating new symlink");
1725 return -EIO; 1722 return -EIO;
1726 } 1723 }
1727 if (retval == ITEM_FOUND) { 1724 if (retval == ITEM_FOUND) {
1728 pathrelse(path); 1725 pathrelse(path);
1729 reiserfs_warning(sb, "vs-13080: reiserfs_new_symlink: " 1726 reiserfs_warning(sb, "vs-13080",
1730 "object with this key exists (%k)", 1727 "object with this key exists (%k)",
1731 &(ih->ih_key)); 1728 &(ih->ih_key));
1732 return -EEXIST; 1729 return -EEXIST;
@@ -1739,7 +1736,7 @@ static int reiserfs_new_symlink(struct reiserfs_transaction_handle *th, struct i
1739/* inserts the stat data into the tree, and then calls 1736/* inserts the stat data into the tree, and then calls
1740 reiserfs_new_directory (to insert ".", ".." item if new object is 1737 reiserfs_new_directory (to insert ".", ".." item if new object is
1741 directory) or reiserfs_new_symlink (to insert symlink body if new 1738 directory) or reiserfs_new_symlink (to insert symlink body if new
1742 object is symlink) or nothing (if new object is regular file) 1739 object is symlink) or nothing (if new object is regular file)
1743 1740
1744 NOTE! uid and gid must already be set in the inode. If we return 1741 NOTE! uid and gid must already be set in the inode. If we return
1745 non-zero due to an error, we have to drop the quota previously allocated 1742 non-zero due to an error, we have to drop the quota previously allocated
@@ -1747,10 +1744,11 @@ static int reiserfs_new_symlink(struct reiserfs_transaction_handle *th, struct i
1747 if we return non-zero, we also end the transaction. */ 1744 if we return non-zero, we also end the transaction. */
1748int reiserfs_new_inode(struct reiserfs_transaction_handle *th, 1745int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1749 struct inode *dir, int mode, const char *symname, 1746 struct inode *dir, int mode, const char *symname,
1750 /* 0 for regular, EMTRY_DIR_SIZE for dirs, 1747 /* 0 for regular, EMTRY_DIR_SIZE for dirs,
1751 strlen (symname) for symlinks) */ 1748 strlen (symname) for symlinks) */
1752 loff_t i_size, struct dentry *dentry, 1749 loff_t i_size, struct dentry *dentry,
1753 struct inode *inode) 1750 struct inode *inode,
1751 struct reiserfs_security_handle *security)
1754{ 1752{
1755 struct super_block *sb; 1753 struct super_block *sb;
1756 struct reiserfs_iget_args args; 1754 struct reiserfs_iget_args args;
@@ -1796,7 +1794,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1796 goto out_bad_inode; 1794 goto out_bad_inode;
1797 } 1795 }
1798 if (old_format_only(sb)) 1796 if (old_format_only(sb))
1799 /* not a perfect generation count, as object ids can be reused, but 1797 /* not a perfect generation count, as object ids can be reused, but
1800 ** this is as good as reiserfs can do right now. 1798 ** this is as good as reiserfs can do right now.
1801 ** note that the private part of inode isn't filled in yet, we have 1799 ** note that the private part of inode isn't filled in yet, we have
1802 ** to use the directory. 1800 ** to use the directory.
@@ -1917,9 +1915,8 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1917 goto out_inserted_sd; 1915 goto out_inserted_sd;
1918 } 1916 }
1919 1917
1920 /* XXX CHECK THIS */
1921 if (reiserfs_posixacl(inode->i_sb)) { 1918 if (reiserfs_posixacl(inode->i_sb)) {
1922 retval = reiserfs_inherit_default_acl(dir, dentry, inode); 1919 retval = reiserfs_inherit_default_acl(th, dir, dentry, inode);
1923 if (retval) { 1920 if (retval) {
1924 err = retval; 1921 err = retval;
1925 reiserfs_check_path(&path_to_key); 1922 reiserfs_check_path(&path_to_key);
@@ -1927,10 +1924,23 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1927 goto out_inserted_sd; 1924 goto out_inserted_sd;
1928 } 1925 }
1929 } else if (inode->i_sb->s_flags & MS_POSIXACL) { 1926 } else if (inode->i_sb->s_flags & MS_POSIXACL) {
1930 reiserfs_warning(inode->i_sb, "ACLs aren't enabled in the fs, " 1927 reiserfs_warning(inode->i_sb, "jdm-13090",
1928 "ACLs aren't enabled in the fs, "
1931 "but vfs thinks they are!"); 1929 "but vfs thinks they are!");
1932 } else if (is_reiserfs_priv_object(dir)) { 1930 } else if (IS_PRIVATE(dir))
1933 reiserfs_mark_inode_private(inode); 1931 inode->i_flags |= S_PRIVATE;
1932
1933 if (security->name) {
1934 retval = reiserfs_security_write(th, inode, security);
1935 if (retval) {
1936 err = retval;
1937 reiserfs_check_path(&path_to_key);
1938 retval = journal_end(th, th->t_super,
1939 th->t_blocks_allocated);
1940 if (retval)
1941 err = retval;
1942 goto out_inserted_sd;
1943 }
1934 } 1944 }
1935 1945
1936 reiserfs_update_sd(th, inode); 1946 reiserfs_update_sd(th, inode);
@@ -1960,19 +1970,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1960 inode->i_nlink = 0; 1970 inode->i_nlink = 0;
1961 th->t_trans_id = 0; /* so the caller can't use this handle later */ 1971 th->t_trans_id = 0; /* so the caller can't use this handle later */
1962 unlock_new_inode(inode); /* OK to do even if we hadn't locked it */ 1972 unlock_new_inode(inode); /* OK to do even if we hadn't locked it */
1963 1973 iput(inode);
1964 /* If we were inheriting an ACL, we need to release the lock so that
1965 * iput doesn't deadlock in reiserfs_delete_xattrs. The locking
1966 * code really needs to be reworked, but this will take care of it
1967 * for now. -jeffm */
1968#ifdef CONFIG_REISERFS_FS_POSIX_ACL
1969 if (REISERFS_I(dir)->i_acl_default && !IS_ERR(REISERFS_I(dir)->i_acl_default)) {
1970 reiserfs_write_unlock_xattrs(dir->i_sb);
1971 iput(inode);
1972 reiserfs_write_lock_xattrs(dir->i_sb);
1973 } else
1974#endif
1975 iput(inode);
1976 return err; 1974 return err;
1977} 1975}
1978 1976
@@ -1989,7 +1987,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1989** 1987**
1990** on failure, nonzero is returned, page_result and bh_result are untouched. 1988** on failure, nonzero is returned, page_result and bh_result are untouched.
1991*/ 1989*/
1992static int grab_tail_page(struct inode *p_s_inode, 1990static int grab_tail_page(struct inode *inode,
1993 struct page **page_result, 1991 struct page **page_result,
1994 struct buffer_head **bh_result) 1992 struct buffer_head **bh_result)
1995{ 1993{
@@ -1997,11 +1995,11 @@ static int grab_tail_page(struct inode *p_s_inode,
1997 /* we want the page with the last byte in the file, 1995 /* we want the page with the last byte in the file,
1998 ** not the page that will hold the next byte for appending 1996 ** not the page that will hold the next byte for appending
1999 */ 1997 */
2000 unsigned long index = (p_s_inode->i_size - 1) >> PAGE_CACHE_SHIFT; 1998 unsigned long index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT;
2001 unsigned long pos = 0; 1999 unsigned long pos = 0;
2002 unsigned long start = 0; 2000 unsigned long start = 0;
2003 unsigned long blocksize = p_s_inode->i_sb->s_blocksize; 2001 unsigned long blocksize = inode->i_sb->s_blocksize;
2004 unsigned long offset = (p_s_inode->i_size) & (PAGE_CACHE_SIZE - 1); 2002 unsigned long offset = (inode->i_size) & (PAGE_CACHE_SIZE - 1);
2005 struct buffer_head *bh; 2003 struct buffer_head *bh;
2006 struct buffer_head *head; 2004 struct buffer_head *head;
2007 struct page *page; 2005 struct page *page;
@@ -2015,7 +2013,7 @@ static int grab_tail_page(struct inode *p_s_inode,
2015 if ((offset & (blocksize - 1)) == 0) { 2013 if ((offset & (blocksize - 1)) == 0) {
2016 return -ENOENT; 2014 return -ENOENT;
2017 } 2015 }
2018 page = grab_cache_page(p_s_inode->i_mapping, index); 2016 page = grab_cache_page(inode->i_mapping, index);
2019 error = -ENOMEM; 2017 error = -ENOMEM;
2020 if (!page) { 2018 if (!page) {
2021 goto out; 2019 goto out;
@@ -2044,10 +2042,8 @@ static int grab_tail_page(struct inode *p_s_inode,
2044 ** I've screwed up the code to find the buffer, or the code to 2042 ** I've screwed up the code to find the buffer, or the code to
2045 ** call prepare_write 2043 ** call prepare_write
2046 */ 2044 */
2047 reiserfs_warning(p_s_inode->i_sb, 2045 reiserfs_error(inode->i_sb, "clm-6000",
2048 "clm-6000: error reading block %lu on dev %s", 2046 "error reading block %lu", bh->b_blocknr);
2049 bh->b_blocknr,
2050 reiserfs_bdevname(p_s_inode->i_sb));
2051 error = -EIO; 2047 error = -EIO;
2052 goto unlock; 2048 goto unlock;
2053 } 2049 }
@@ -2069,57 +2065,58 @@ static int grab_tail_page(struct inode *p_s_inode,
2069** 2065**
2070** some code taken from block_truncate_page 2066** some code taken from block_truncate_page
2071*/ 2067*/
2072int reiserfs_truncate_file(struct inode *p_s_inode, int update_timestamps) 2068int reiserfs_truncate_file(struct inode *inode, int update_timestamps)
2073{ 2069{
2074 struct reiserfs_transaction_handle th; 2070 struct reiserfs_transaction_handle th;
2075 /* we want the offset for the first byte after the end of the file */ 2071 /* we want the offset for the first byte after the end of the file */
2076 unsigned long offset = p_s_inode->i_size & (PAGE_CACHE_SIZE - 1); 2072 unsigned long offset = inode->i_size & (PAGE_CACHE_SIZE - 1);
2077 unsigned blocksize = p_s_inode->i_sb->s_blocksize; 2073 unsigned blocksize = inode->i_sb->s_blocksize;
2078 unsigned length; 2074 unsigned length;
2079 struct page *page = NULL; 2075 struct page *page = NULL;
2080 int error; 2076 int error;
2081 struct buffer_head *bh = NULL; 2077 struct buffer_head *bh = NULL;
2082 int err2; 2078 int err2;
2083 2079
2084 reiserfs_write_lock(p_s_inode->i_sb); 2080 reiserfs_write_lock(inode->i_sb);
2085 2081
2086 if (p_s_inode->i_size > 0) { 2082 if (inode->i_size > 0) {
2087 if ((error = grab_tail_page(p_s_inode, &page, &bh))) { 2083 error = grab_tail_page(inode, &page, &bh);
2088 // -ENOENT means we truncated past the end of the file, 2084 if (error) {
2085 // -ENOENT means we truncated past the end of the file,
2089 // and get_block_create_0 could not find a block to read in, 2086 // and get_block_create_0 could not find a block to read in,
2090 // which is ok. 2087 // which is ok.
2091 if (error != -ENOENT) 2088 if (error != -ENOENT)
2092 reiserfs_warning(p_s_inode->i_sb, 2089 reiserfs_error(inode->i_sb, "clm-6001",
2093 "clm-6001: grab_tail_page failed %d", 2090 "grab_tail_page failed %d",
2094 error); 2091 error);
2095 page = NULL; 2092 page = NULL;
2096 bh = NULL; 2093 bh = NULL;
2097 } 2094 }
2098 } 2095 }
2099 2096
2100 /* so, if page != NULL, we have a buffer head for the offset at 2097 /* so, if page != NULL, we have a buffer head for the offset at
2101 ** the end of the file. if the bh is mapped, and bh->b_blocknr != 0, 2098 ** the end of the file. if the bh is mapped, and bh->b_blocknr != 0,
2102 ** then we have an unformatted node. Otherwise, we have a direct item, 2099 ** then we have an unformatted node. Otherwise, we have a direct item,
2103 ** and no zeroing is required on disk. We zero after the truncate, 2100 ** and no zeroing is required on disk. We zero after the truncate,
2104 ** because the truncate might pack the item anyway 2101 ** because the truncate might pack the item anyway
2105 ** (it will unmap bh if it packs). 2102 ** (it will unmap bh if it packs).
2106 */ 2103 */
2107 /* it is enough to reserve space in transaction for 2 balancings: 2104 /* it is enough to reserve space in transaction for 2 balancings:
2108 one for "save" link adding and another for the first 2105 one for "save" link adding and another for the first
2109 cut_from_item. 1 is for update_sd */ 2106 cut_from_item. 1 is for update_sd */
2110 error = journal_begin(&th, p_s_inode->i_sb, 2107 error = journal_begin(&th, inode->i_sb,
2111 JOURNAL_PER_BALANCE_CNT * 2 + 1); 2108 JOURNAL_PER_BALANCE_CNT * 2 + 1);
2112 if (error) 2109 if (error)
2113 goto out; 2110 goto out;
2114 reiserfs_update_inode_transaction(p_s_inode); 2111 reiserfs_update_inode_transaction(inode);
2115 if (update_timestamps) 2112 if (update_timestamps)
2116 /* we are doing real truncate: if the system crashes before the last 2113 /* we are doing real truncate: if the system crashes before the last
2117 transaction of truncating gets committed - on reboot the file 2114 transaction of truncating gets committed - on reboot the file
2118 either appears truncated properly or not truncated at all */ 2115 either appears truncated properly or not truncated at all */
2119 add_save_link(&th, p_s_inode, 1); 2116 add_save_link(&th, inode, 1);
2120 err2 = reiserfs_do_truncate(&th, p_s_inode, page, update_timestamps); 2117 err2 = reiserfs_do_truncate(&th, inode, page, update_timestamps);
2121 error = 2118 error =
2122 journal_end(&th, p_s_inode->i_sb, JOURNAL_PER_BALANCE_CNT * 2 + 1); 2119 journal_end(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 2 + 1);
2123 if (error) 2120 if (error)
2124 goto out; 2121 goto out;
2125 2122
@@ -2130,7 +2127,7 @@ int reiserfs_truncate_file(struct inode *p_s_inode, int update_timestamps)
2130 } 2127 }
2131 2128
2132 if (update_timestamps) { 2129 if (update_timestamps) {
2133 error = remove_save_link(p_s_inode, 1 /* truncate */ ); 2130 error = remove_save_link(inode, 1 /* truncate */);
2134 if (error) 2131 if (error)
2135 goto out; 2132 goto out;
2136 } 2133 }
@@ -2149,14 +2146,14 @@ int reiserfs_truncate_file(struct inode *p_s_inode, int update_timestamps)
2149 page_cache_release(page); 2146 page_cache_release(page);
2150 } 2147 }
2151 2148
2152 reiserfs_write_unlock(p_s_inode->i_sb); 2149 reiserfs_write_unlock(inode->i_sb);
2153 return 0; 2150 return 0;
2154 out: 2151 out:
2155 if (page) { 2152 if (page) {
2156 unlock_page(page); 2153 unlock_page(page);
2157 page_cache_release(page); 2154 page_cache_release(page);
2158 } 2155 }
2159 reiserfs_write_unlock(p_s_inode->i_sb); 2156 reiserfs_write_unlock(inode->i_sb);
2160 return error; 2157 return error;
2161} 2158}
2162 2159
@@ -2208,9 +2205,8 @@ static int map_block_for_writepage(struct inode *inode,
2208 /* we've found an unformatted node */ 2205 /* we've found an unformatted node */
2209 if (indirect_item_found(retval, ih)) { 2206 if (indirect_item_found(retval, ih)) {
2210 if (bytes_copied > 0) { 2207 if (bytes_copied > 0) {
2211 reiserfs_warning(inode->i_sb, 2208 reiserfs_warning(inode->i_sb, "clm-6002",
2212 "clm-6002: bytes_copied %d", 2209 "bytes_copied %d", bytes_copied);
2213 bytes_copied);
2214 } 2210 }
2215 if (!get_block_num(item, pos_in_item)) { 2211 if (!get_block_num(item, pos_in_item)) {
2216 /* crap, we are writing to a hole */ 2212 /* crap, we are writing to a hole */
@@ -2267,9 +2263,8 @@ static int map_block_for_writepage(struct inode *inode,
2267 goto research; 2263 goto research;
2268 } 2264 }
2269 } else { 2265 } else {
2270 reiserfs_warning(inode->i_sb, 2266 reiserfs_warning(inode->i_sb, "clm-6003",
2271 "clm-6003: bad item inode %lu, device %s", 2267 "bad item inode %lu", inode->i_ino);
2272 inode->i_ino, reiserfs_bdevname(inode->i_sb));
2273 retval = -EIO; 2268 retval = -EIO;
2274 goto out; 2269 goto out;
2275 } 2270 }
@@ -2312,8 +2307,8 @@ static int map_block_for_writepage(struct inode *inode,
2312 return retval; 2307 return retval;
2313} 2308}
2314 2309
2315/* 2310/*
2316 * mason@suse.com: updated in 2.5.54 to follow the same general io 2311 * mason@suse.com: updated in 2.5.54 to follow the same general io
2317 * start/recovery path as __block_write_full_page, along with special 2312 * start/recovery path as __block_write_full_page, along with special
2318 * code to handle reiserfs tails. 2313 * code to handle reiserfs tails.
2319 */ 2314 */
@@ -2453,7 +2448,7 @@ static int reiserfs_write_full_page(struct page *page,
2453 unlock_page(page); 2448 unlock_page(page);
2454 2449
2455 /* 2450 /*
2456 * since any buffer might be the only dirty buffer on the page, 2451 * since any buffer might be the only dirty buffer on the page,
2457 * the first submit_bh can bring the page out of writeback. 2452 * the first submit_bh can bring the page out of writeback.
2458 * be careful with the buffers. 2453 * be careful with the buffers.
2459 */ 2454 */
@@ -2472,8 +2467,8 @@ static int reiserfs_write_full_page(struct page *page,
2472 if (nr == 0) { 2467 if (nr == 0) {
2473 /* 2468 /*
2474 * if this page only had a direct item, it is very possible for 2469 * if this page only had a direct item, it is very possible for
2475 * no io to be required without there being an error. Or, 2470 * no io to be required without there being an error. Or,
2476 * someone else could have locked them and sent them down the 2471 * someone else could have locked them and sent them down the
2477 * pipe without locking the page 2472 * pipe without locking the page
2478 */ 2473 */
2479 bh = head; 2474 bh = head;
@@ -2492,7 +2487,7 @@ static int reiserfs_write_full_page(struct page *page,
2492 2487
2493 fail: 2488 fail:
2494 /* catches various errors, we need to make sure any valid dirty blocks 2489 /* catches various errors, we need to make sure any valid dirty blocks
2495 * get to the media. The page is currently locked and not marked for 2490 * get to the media. The page is currently locked and not marked for
2496 * writeback 2491 * writeback
2497 */ 2492 */
2498 ClearPageUptodate(page); 2493 ClearPageUptodate(page);
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 830332021ed4..0ccc3fdda7bf 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -189,7 +189,7 @@ int reiserfs_unpack(struct inode *inode, struct file *filp)
189 } 189 }
190 190
191 /* we unpack by finding the page with the tail, and calling 191 /* we unpack by finding the page with the tail, and calling
192 ** reiserfs_prepare_write on that page. This will force a 192 ** reiserfs_prepare_write on that page. This will force a
193 ** reiserfs_get_block to unpack the tail for us. 193 ** reiserfs_get_block to unpack the tail for us.
194 */ 194 */
195 index = inode->i_size >> PAGE_CACHE_SHIFT; 195 index = inode->i_size >> PAGE_CACHE_SHIFT;
diff --git a/fs/reiserfs/item_ops.c b/fs/reiserfs/item_ops.c
index 9475557ab499..72cb1cc51b87 100644
--- a/fs/reiserfs/item_ops.c
+++ b/fs/reiserfs/item_ops.c
@@ -97,7 +97,8 @@ static int sd_unit_num(struct virtual_item *vi)
97 97
98static void sd_print_vi(struct virtual_item *vi) 98static void sd_print_vi(struct virtual_item *vi)
99{ 99{
100 reiserfs_warning(NULL, "STATDATA, index %d, type 0x%x, %h", 100 reiserfs_warning(NULL, "reiserfs-16100",
101 "STATDATA, index %d, type 0x%x, %h",
101 vi->vi_index, vi->vi_type, vi->vi_ih); 102 vi->vi_index, vi->vi_type, vi->vi_ih);
102} 103}
103 104
@@ -190,7 +191,8 @@ static int direct_unit_num(struct virtual_item *vi)
190 191
191static void direct_print_vi(struct virtual_item *vi) 192static void direct_print_vi(struct virtual_item *vi)
192{ 193{
193 reiserfs_warning(NULL, "DIRECT, index %d, type 0x%x, %h", 194 reiserfs_warning(NULL, "reiserfs-16101",
195 "DIRECT, index %d, type 0x%x, %h",
194 vi->vi_index, vi->vi_type, vi->vi_ih); 196 vi->vi_index, vi->vi_type, vi->vi_ih);
195} 197}
196 198
@@ -278,7 +280,7 @@ static void indirect_print_item(struct item_head *ih, char *item)
278 unp = (__le32 *) item; 280 unp = (__le32 *) item;
279 281
280 if (ih_item_len(ih) % UNFM_P_SIZE) 282 if (ih_item_len(ih) % UNFM_P_SIZE)
281 reiserfs_warning(NULL, "indirect_print_item: invalid item len"); 283 reiserfs_warning(NULL, "reiserfs-16102", "invalid item len");
282 284
283 printk("%d pointers\n[ ", (int)I_UNFM_NUM(ih)); 285 printk("%d pointers\n[ ", (int)I_UNFM_NUM(ih));
284 for (j = 0; j < I_UNFM_NUM(ih); j++) { 286 for (j = 0; j < I_UNFM_NUM(ih); j++) {
@@ -334,7 +336,8 @@ static int indirect_unit_num(struct virtual_item *vi)
334 336
335static void indirect_print_vi(struct virtual_item *vi) 337static void indirect_print_vi(struct virtual_item *vi)
336{ 338{
337 reiserfs_warning(NULL, "INDIRECT, index %d, type 0x%x, %h", 339 reiserfs_warning(NULL, "reiserfs-16103",
340 "INDIRECT, index %d, type 0x%x, %h",
338 vi->vi_index, vi->vi_type, vi->vi_ih); 341 vi->vi_index, vi->vi_type, vi->vi_ih);
339} 342}
340 343
@@ -359,7 +362,7 @@ static struct item_operations indirect_ops = {
359 362
360static int direntry_bytes_number(struct item_head *ih, int block_size) 363static int direntry_bytes_number(struct item_head *ih, int block_size)
361{ 364{
362 reiserfs_warning(NULL, "vs-16090: direntry_bytes_number: " 365 reiserfs_warning(NULL, "vs-16090",
363 "bytes number is asked for direntry"); 366 "bytes number is asked for direntry");
364 return 0; 367 return 0;
365} 368}
@@ -514,8 +517,9 @@ static int direntry_create_vi(struct virtual_node *vn,
514 ((is_affected 517 ((is_affected
515 && (vn->vn_mode == M_PASTE 518 && (vn->vn_mode == M_PASTE
516 || vn->vn_mode == M_CUT)) ? insert_size : 0)) { 519 || vn->vn_mode == M_CUT)) ? insert_size : 0)) {
517 reiserfs_panic(NULL, 520 reiserfs_panic(NULL, "vs-8025", "(mode==%c, "
518 "vs-8025: set_entry_sizes: (mode==%c, insert_size==%d), invalid length of directory item", 521 "insert_size==%d), invalid length of "
522 "directory item",
519 vn->vn_mode, insert_size); 523 vn->vn_mode, insert_size);
520 } 524 }
521 } 525 }
@@ -546,7 +550,8 @@ static int direntry_check_left(struct virtual_item *vi, int free,
546 } 550 }
547 551
548 if (entries == dir_u->entry_count) { 552 if (entries == dir_u->entry_count) {
549 reiserfs_panic(NULL, "free space %d, entry_count %d\n", free, 553 reiserfs_panic(NULL, "item_ops-1",
554 "free space %d, entry_count %d", free,
550 dir_u->entry_count); 555 dir_u->entry_count);
551 } 556 }
552 557
@@ -614,7 +619,8 @@ static void direntry_print_vi(struct virtual_item *vi)
614 int i; 619 int i;
615 struct direntry_uarea *dir_u = vi->vi_uarea; 620 struct direntry_uarea *dir_u = vi->vi_uarea;
616 621
617 reiserfs_warning(NULL, "DIRENTRY, index %d, type 0x%x, %h, flags 0x%x", 622 reiserfs_warning(NULL, "reiserfs-16104",
623 "DIRENTRY, index %d, type 0x%x, %h, flags 0x%x",
618 vi->vi_index, vi->vi_type, vi->vi_ih, dir_u->flags); 624 vi->vi_index, vi->vi_type, vi->vi_ih, dir_u->flags);
619 printk("%d entries: ", dir_u->entry_count); 625 printk("%d entries: ", dir_u->entry_count);
620 for (i = 0; i < dir_u->entry_count; i++) 626 for (i = 0; i < dir_u->entry_count; i++)
@@ -642,43 +648,43 @@ static struct item_operations direntry_ops = {
642// 648//
643static int errcatch_bytes_number(struct item_head *ih, int block_size) 649static int errcatch_bytes_number(struct item_head *ih, int block_size)
644{ 650{
645 reiserfs_warning(NULL, 651 reiserfs_warning(NULL, "green-16001",
646 "green-16001: Invalid item type observed, run fsck ASAP"); 652 "Invalid item type observed, run fsck ASAP");
647 return 0; 653 return 0;
648} 654}
649 655
650static void errcatch_decrement_key(struct cpu_key *key) 656static void errcatch_decrement_key(struct cpu_key *key)
651{ 657{
652 reiserfs_warning(NULL, 658 reiserfs_warning(NULL, "green-16002",
653 "green-16002: Invalid item type observed, run fsck ASAP"); 659 "Invalid item type observed, run fsck ASAP");
654} 660}
655 661
656static int errcatch_is_left_mergeable(struct reiserfs_key *key, 662static int errcatch_is_left_mergeable(struct reiserfs_key *key,
657 unsigned long bsize) 663 unsigned long bsize)
658{ 664{
659 reiserfs_warning(NULL, 665 reiserfs_warning(NULL, "green-16003",
660 "green-16003: Invalid item type observed, run fsck ASAP"); 666 "Invalid item type observed, run fsck ASAP");
661 return 0; 667 return 0;
662} 668}
663 669
664static void errcatch_print_item(struct item_head *ih, char *item) 670static void errcatch_print_item(struct item_head *ih, char *item)
665{ 671{
666 reiserfs_warning(NULL, 672 reiserfs_warning(NULL, "green-16004",
667 "green-16004: Invalid item type observed, run fsck ASAP"); 673 "Invalid item type observed, run fsck ASAP");
668} 674}
669 675
670static void errcatch_check_item(struct item_head *ih, char *item) 676static void errcatch_check_item(struct item_head *ih, char *item)
671{ 677{
672 reiserfs_warning(NULL, 678 reiserfs_warning(NULL, "green-16005",
673 "green-16005: Invalid item type observed, run fsck ASAP"); 679 "Invalid item type observed, run fsck ASAP");
674} 680}
675 681
676static int errcatch_create_vi(struct virtual_node *vn, 682static int errcatch_create_vi(struct virtual_node *vn,
677 struct virtual_item *vi, 683 struct virtual_item *vi,
678 int is_affected, int insert_size) 684 int is_affected, int insert_size)
679{ 685{
680 reiserfs_warning(NULL, 686 reiserfs_warning(NULL, "green-16006",
681 "green-16006: Invalid item type observed, run fsck ASAP"); 687 "Invalid item type observed, run fsck ASAP");
682 return 0; // We might return -1 here as well, but it won't help as create_virtual_node() from where 688 return 0; // We might return -1 here as well, but it won't help as create_virtual_node() from where
683 // this operation is called from is of return type void. 689 // this operation is called from is of return type void.
684} 690}
@@ -686,36 +692,36 @@ static int errcatch_create_vi(struct virtual_node *vn,
686static int errcatch_check_left(struct virtual_item *vi, int free, 692static int errcatch_check_left(struct virtual_item *vi, int free,
687 int start_skip, int end_skip) 693 int start_skip, int end_skip)
688{ 694{
689 reiserfs_warning(NULL, 695 reiserfs_warning(NULL, "green-16007",
690 "green-16007: Invalid item type observed, run fsck ASAP"); 696 "Invalid item type observed, run fsck ASAP");
691 return -1; 697 return -1;
692} 698}
693 699
694static int errcatch_check_right(struct virtual_item *vi, int free) 700static int errcatch_check_right(struct virtual_item *vi, int free)
695{ 701{
696 reiserfs_warning(NULL, 702 reiserfs_warning(NULL, "green-16008",
697 "green-16008: Invalid item type observed, run fsck ASAP"); 703 "Invalid item type observed, run fsck ASAP");
698 return -1; 704 return -1;
699} 705}
700 706
701static int errcatch_part_size(struct virtual_item *vi, int first, int count) 707static int errcatch_part_size(struct virtual_item *vi, int first, int count)
702{ 708{
703 reiserfs_warning(NULL, 709 reiserfs_warning(NULL, "green-16009",
704 "green-16009: Invalid item type observed, run fsck ASAP"); 710 "Invalid item type observed, run fsck ASAP");
705 return 0; 711 return 0;
706} 712}
707 713
708static int errcatch_unit_num(struct virtual_item *vi) 714static int errcatch_unit_num(struct virtual_item *vi)
709{ 715{
710 reiserfs_warning(NULL, 716 reiserfs_warning(NULL, "green-16010",
711 "green-16010: Invalid item type observed, run fsck ASAP"); 717 "Invalid item type observed, run fsck ASAP");
712 return 0; 718 return 0;
713} 719}
714 720
715static void errcatch_print_vi(struct virtual_item *vi) 721static void errcatch_print_vi(struct virtual_item *vi)
716{ 722{
717 reiserfs_warning(NULL, 723 reiserfs_warning(NULL, "green-16011",
718 "green-16011: Invalid item type observed, run fsck ASAP"); 724 "Invalid item type observed, run fsck ASAP");
719} 725}
720 726
721static struct item_operations errcatch_ops = { 727static struct item_operations errcatch_ops = {
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 9643c3bbeb3b..77f5bb746bf0 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -1,36 +1,36 @@
1/* 1/*
2** Write ahead logging implementation copyright Chris Mason 2000 2** Write ahead logging implementation copyright Chris Mason 2000
3** 3**
4** The background commits make this code very interelated, and 4** The background commits make this code very interelated, and
5** overly complex. I need to rethink things a bit....The major players: 5** overly complex. I need to rethink things a bit....The major players:
6** 6**
7** journal_begin -- call with the number of blocks you expect to log. 7** journal_begin -- call with the number of blocks you expect to log.
8** If the current transaction is too 8** If the current transaction is too
9** old, it will block until the current transaction is 9** old, it will block until the current transaction is
10** finished, and then start a new one. 10** finished, and then start a new one.
11** Usually, your transaction will get joined in with 11** Usually, your transaction will get joined in with
12** previous ones for speed. 12** previous ones for speed.
13** 13**
14** journal_join -- same as journal_begin, but won't block on the current 14** journal_join -- same as journal_begin, but won't block on the current
15** transaction regardless of age. Don't ever call 15** transaction regardless of age. Don't ever call
16** this. Ever. There are only two places it should be 16** this. Ever. There are only two places it should be
17** called from, and they are both inside this file. 17** called from, and they are both inside this file.
18** 18**
19** journal_mark_dirty -- adds blocks into this transaction. clears any flags 19** journal_mark_dirty -- adds blocks into this transaction. clears any flags
20** that might make them get sent to disk 20** that might make them get sent to disk
21** and then marks them BH_JDirty. Puts the buffer head 21** and then marks them BH_JDirty. Puts the buffer head
22** into the current transaction hash. 22** into the current transaction hash.
23** 23**
24** journal_end -- if the current transaction is batchable, it does nothing 24** journal_end -- if the current transaction is batchable, it does nothing
25** otherwise, it could do an async/synchronous commit, or 25** otherwise, it could do an async/synchronous commit, or
26** a full flush of all log and real blocks in the 26** a full flush of all log and real blocks in the
27** transaction. 27** transaction.
28** 28**
29** flush_old_commits -- if the current transaction is too old, it is ended and 29** flush_old_commits -- if the current transaction is too old, it is ended and
30** commit blocks are sent to disk. Forces commit blocks 30** commit blocks are sent to disk. Forces commit blocks
31** to disk for all backgrounded commits that have been 31** to disk for all backgrounded commits that have been
32** around too long. 32** around too long.
33** -- Note, if you call this as an immediate flush from 33** -- Note, if you call this as an immediate flush from
34** from within kupdate, it will ignore the immediate flag 34** from within kupdate, it will ignore the immediate flag
35*/ 35*/
36 36
@@ -97,7 +97,7 @@ static int flush_commit_list(struct super_block *s,
97 struct reiserfs_journal_list *jl, int flushall); 97 struct reiserfs_journal_list *jl, int flushall);
98static int can_dirty(struct reiserfs_journal_cnode *cn); 98static int can_dirty(struct reiserfs_journal_cnode *cn);
99static int journal_join(struct reiserfs_transaction_handle *th, 99static int journal_join(struct reiserfs_transaction_handle *th,
100 struct super_block *p_s_sb, unsigned long nblocks); 100 struct super_block *sb, unsigned long nblocks);
101static int release_journal_dev(struct super_block *super, 101static int release_journal_dev(struct super_block *super,
102 struct reiserfs_journal *journal); 102 struct reiserfs_journal *journal);
103static int dirty_one_transaction(struct super_block *s, 103static int dirty_one_transaction(struct super_block *s,
@@ -113,12 +113,12 @@ enum {
113}; 113};
114 114
115static int do_journal_begin_r(struct reiserfs_transaction_handle *th, 115static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
116 struct super_block *p_s_sb, 116 struct super_block *sb,
117 unsigned long nblocks, int join); 117 unsigned long nblocks, int join);
118 118
119static void init_journal_hash(struct super_block *p_s_sb) 119static void init_journal_hash(struct super_block *sb)
120{ 120{
121 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 121 struct reiserfs_journal *journal = SB_JOURNAL(sb);
122 memset(journal->j_hash_table, 0, 122 memset(journal->j_hash_table, 0,
123 JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *)); 123 JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *));
124} 124}
@@ -145,7 +145,7 @@ static void disable_barrier(struct super_block *s)
145} 145}
146 146
147static struct reiserfs_bitmap_node *allocate_bitmap_node(struct super_block 147static struct reiserfs_bitmap_node *allocate_bitmap_node(struct super_block
148 *p_s_sb) 148 *sb)
149{ 149{
150 struct reiserfs_bitmap_node *bn; 150 struct reiserfs_bitmap_node *bn;
151 static int id; 151 static int id;
@@ -154,7 +154,7 @@ static struct reiserfs_bitmap_node *allocate_bitmap_node(struct super_block
154 if (!bn) { 154 if (!bn) {
155 return NULL; 155 return NULL;
156 } 156 }
157 bn->data = kzalloc(p_s_sb->s_blocksize, GFP_NOFS); 157 bn->data = kzalloc(sb->s_blocksize, GFP_NOFS);
158 if (!bn->data) { 158 if (!bn->data) {
159 kfree(bn); 159 kfree(bn);
160 return NULL; 160 return NULL;
@@ -164,9 +164,9 @@ static struct reiserfs_bitmap_node *allocate_bitmap_node(struct super_block
164 return bn; 164 return bn;
165} 165}
166 166
167static struct reiserfs_bitmap_node *get_bitmap_node(struct super_block *p_s_sb) 167static struct reiserfs_bitmap_node *get_bitmap_node(struct super_block *sb)
168{ 168{
169 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 169 struct reiserfs_journal *journal = SB_JOURNAL(sb);
170 struct reiserfs_bitmap_node *bn = NULL; 170 struct reiserfs_bitmap_node *bn = NULL;
171 struct list_head *entry = journal->j_bitmap_nodes.next; 171 struct list_head *entry = journal->j_bitmap_nodes.next;
172 172
@@ -176,21 +176,21 @@ static struct reiserfs_bitmap_node *get_bitmap_node(struct super_block *p_s_sb)
176 if (entry != &journal->j_bitmap_nodes) { 176 if (entry != &journal->j_bitmap_nodes) {
177 bn = list_entry(entry, struct reiserfs_bitmap_node, list); 177 bn = list_entry(entry, struct reiserfs_bitmap_node, list);
178 list_del(entry); 178 list_del(entry);
179 memset(bn->data, 0, p_s_sb->s_blocksize); 179 memset(bn->data, 0, sb->s_blocksize);
180 journal->j_free_bitmap_nodes--; 180 journal->j_free_bitmap_nodes--;
181 return bn; 181 return bn;
182 } 182 }
183 bn = allocate_bitmap_node(p_s_sb); 183 bn = allocate_bitmap_node(sb);
184 if (!bn) { 184 if (!bn) {
185 yield(); 185 yield();
186 goto repeat; 186 goto repeat;
187 } 187 }
188 return bn; 188 return bn;
189} 189}
190static inline void free_bitmap_node(struct super_block *p_s_sb, 190static inline void free_bitmap_node(struct super_block *sb,
191 struct reiserfs_bitmap_node *bn) 191 struct reiserfs_bitmap_node *bn)
192{ 192{
193 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 193 struct reiserfs_journal *journal = SB_JOURNAL(sb);
194 journal->j_used_bitmap_nodes--; 194 journal->j_used_bitmap_nodes--;
195 if (journal->j_free_bitmap_nodes > REISERFS_MAX_BITMAP_NODES) { 195 if (journal->j_free_bitmap_nodes > REISERFS_MAX_BITMAP_NODES) {
196 kfree(bn->data); 196 kfree(bn->data);
@@ -201,46 +201,46 @@ static inline void free_bitmap_node(struct super_block *p_s_sb,
201 } 201 }
202} 202}
203 203
204static void allocate_bitmap_nodes(struct super_block *p_s_sb) 204static void allocate_bitmap_nodes(struct super_block *sb)
205{ 205{
206 int i; 206 int i;
207 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 207 struct reiserfs_journal *journal = SB_JOURNAL(sb);
208 struct reiserfs_bitmap_node *bn = NULL; 208 struct reiserfs_bitmap_node *bn = NULL;
209 for (i = 0; i < REISERFS_MIN_BITMAP_NODES; i++) { 209 for (i = 0; i < REISERFS_MIN_BITMAP_NODES; i++) {
210 bn = allocate_bitmap_node(p_s_sb); 210 bn = allocate_bitmap_node(sb);
211 if (bn) { 211 if (bn) {
212 list_add(&bn->list, &journal->j_bitmap_nodes); 212 list_add(&bn->list, &journal->j_bitmap_nodes);
213 journal->j_free_bitmap_nodes++; 213 journal->j_free_bitmap_nodes++;
214 } else { 214 } else {
215 break; // this is ok, we'll try again when more are needed 215 break; /* this is ok, we'll try again when more are needed */
216 } 216 }
217 } 217 }
218} 218}
219 219
220static int set_bit_in_list_bitmap(struct super_block *p_s_sb, 220static int set_bit_in_list_bitmap(struct super_block *sb,
221 b_blocknr_t block, 221 b_blocknr_t block,
222 struct reiserfs_list_bitmap *jb) 222 struct reiserfs_list_bitmap *jb)
223{ 223{
224 unsigned int bmap_nr = block / (p_s_sb->s_blocksize << 3); 224 unsigned int bmap_nr = block / (sb->s_blocksize << 3);
225 unsigned int bit_nr = block % (p_s_sb->s_blocksize << 3); 225 unsigned int bit_nr = block % (sb->s_blocksize << 3);
226 226
227 if (!jb->bitmaps[bmap_nr]) { 227 if (!jb->bitmaps[bmap_nr]) {
228 jb->bitmaps[bmap_nr] = get_bitmap_node(p_s_sb); 228 jb->bitmaps[bmap_nr] = get_bitmap_node(sb);
229 } 229 }
230 set_bit(bit_nr, (unsigned long *)jb->bitmaps[bmap_nr]->data); 230 set_bit(bit_nr, (unsigned long *)jb->bitmaps[bmap_nr]->data);
231 return 0; 231 return 0;
232} 232}
233 233
234static void cleanup_bitmap_list(struct super_block *p_s_sb, 234static void cleanup_bitmap_list(struct super_block *sb,
235 struct reiserfs_list_bitmap *jb) 235 struct reiserfs_list_bitmap *jb)
236{ 236{
237 int i; 237 int i;
238 if (jb->bitmaps == NULL) 238 if (jb->bitmaps == NULL)
239 return; 239 return;
240 240
241 for (i = 0; i < reiserfs_bmap_count(p_s_sb); i++) { 241 for (i = 0; i < reiserfs_bmap_count(sb); i++) {
242 if (jb->bitmaps[i]) { 242 if (jb->bitmaps[i]) {
243 free_bitmap_node(p_s_sb, jb->bitmaps[i]); 243 free_bitmap_node(sb, jb->bitmaps[i]);
244 jb->bitmaps[i] = NULL; 244 jb->bitmaps[i] = NULL;
245 } 245 }
246 } 246 }
@@ -249,7 +249,7 @@ static void cleanup_bitmap_list(struct super_block *p_s_sb,
249/* 249/*
250** only call this on FS unmount. 250** only call this on FS unmount.
251*/ 251*/
252static int free_list_bitmaps(struct super_block *p_s_sb, 252static int free_list_bitmaps(struct super_block *sb,
253 struct reiserfs_list_bitmap *jb_array) 253 struct reiserfs_list_bitmap *jb_array)
254{ 254{
255 int i; 255 int i;
@@ -257,16 +257,16 @@ static int free_list_bitmaps(struct super_block *p_s_sb,
257 for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) { 257 for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) {
258 jb = jb_array + i; 258 jb = jb_array + i;
259 jb->journal_list = NULL; 259 jb->journal_list = NULL;
260 cleanup_bitmap_list(p_s_sb, jb); 260 cleanup_bitmap_list(sb, jb);
261 vfree(jb->bitmaps); 261 vfree(jb->bitmaps);
262 jb->bitmaps = NULL; 262 jb->bitmaps = NULL;
263 } 263 }
264 return 0; 264 return 0;
265} 265}
266 266
267static int free_bitmap_nodes(struct super_block *p_s_sb) 267static int free_bitmap_nodes(struct super_block *sb)
268{ 268{
269 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 269 struct reiserfs_journal *journal = SB_JOURNAL(sb);
270 struct list_head *next = journal->j_bitmap_nodes.next; 270 struct list_head *next = journal->j_bitmap_nodes.next;
271 struct reiserfs_bitmap_node *bn; 271 struct reiserfs_bitmap_node *bn;
272 272
@@ -283,10 +283,10 @@ static int free_bitmap_nodes(struct super_block *p_s_sb)
283} 283}
284 284
285/* 285/*
286** get memory for JOURNAL_NUM_BITMAPS worth of bitmaps. 286** get memory for JOURNAL_NUM_BITMAPS worth of bitmaps.
287** jb_array is the array to be filled in. 287** jb_array is the array to be filled in.
288*/ 288*/
289int reiserfs_allocate_list_bitmaps(struct super_block *p_s_sb, 289int reiserfs_allocate_list_bitmaps(struct super_block *sb,
290 struct reiserfs_list_bitmap *jb_array, 290 struct reiserfs_list_bitmap *jb_array,
291 unsigned int bmap_nr) 291 unsigned int bmap_nr)
292{ 292{
@@ -300,30 +300,30 @@ int reiserfs_allocate_list_bitmaps(struct super_block *p_s_sb,
300 jb->journal_list = NULL; 300 jb->journal_list = NULL;
301 jb->bitmaps = vmalloc(mem); 301 jb->bitmaps = vmalloc(mem);
302 if (!jb->bitmaps) { 302 if (!jb->bitmaps) {
303 reiserfs_warning(p_s_sb, 303 reiserfs_warning(sb, "clm-2000", "unable to "
304 "clm-2000, unable to allocate bitmaps for journal lists"); 304 "allocate bitmaps for journal lists");
305 failed = 1; 305 failed = 1;
306 break; 306 break;
307 } 307 }
308 memset(jb->bitmaps, 0, mem); 308 memset(jb->bitmaps, 0, mem);
309 } 309 }
310 if (failed) { 310 if (failed) {
311 free_list_bitmaps(p_s_sb, jb_array); 311 free_list_bitmaps(sb, jb_array);
312 return -1; 312 return -1;
313 } 313 }
314 return 0; 314 return 0;
315} 315}
316 316
317/* 317/*
318** find an available list bitmap. If you can't find one, flush a commit list 318** find an available list bitmap. If you can't find one, flush a commit list
319** and try again 319** and try again
320*/ 320*/
321static struct reiserfs_list_bitmap *get_list_bitmap(struct super_block *p_s_sb, 321static struct reiserfs_list_bitmap *get_list_bitmap(struct super_block *sb,
322 struct reiserfs_journal_list 322 struct reiserfs_journal_list
323 *jl) 323 *jl)
324{ 324{
325 int i, j; 325 int i, j;
326 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 326 struct reiserfs_journal *journal = SB_JOURNAL(sb);
327 struct reiserfs_list_bitmap *jb = NULL; 327 struct reiserfs_list_bitmap *jb = NULL;
328 328
329 for (j = 0; j < (JOURNAL_NUM_BITMAPS * 3); j++) { 329 for (j = 0; j < (JOURNAL_NUM_BITMAPS * 3); j++) {
@@ -331,7 +331,7 @@ static struct reiserfs_list_bitmap *get_list_bitmap(struct super_block *p_s_sb,
331 journal->j_list_bitmap_index = (i + 1) % JOURNAL_NUM_BITMAPS; 331 journal->j_list_bitmap_index = (i + 1) % JOURNAL_NUM_BITMAPS;
332 jb = journal->j_list_bitmap + i; 332 jb = journal->j_list_bitmap + i;
333 if (journal->j_list_bitmap[i].journal_list) { 333 if (journal->j_list_bitmap[i].journal_list) {
334 flush_commit_list(p_s_sb, 334 flush_commit_list(sb,
335 journal->j_list_bitmap[i]. 335 journal->j_list_bitmap[i].
336 journal_list, 1); 336 journal_list, 1);
337 if (!journal->j_list_bitmap[i].journal_list) { 337 if (!journal->j_list_bitmap[i].journal_list) {
@@ -348,7 +348,7 @@ static struct reiserfs_list_bitmap *get_list_bitmap(struct super_block *p_s_sb,
348 return jb; 348 return jb;
349} 349}
350 350
351/* 351/*
352** allocates a new chunk of X nodes, and links them all together as a list. 352** allocates a new chunk of X nodes, and links them all together as a list.
353** Uses the cnode->next and cnode->prev pointers 353** Uses the cnode->next and cnode->prev pointers
354** returns NULL on failure 354** returns NULL on failure
@@ -376,14 +376,14 @@ static struct reiserfs_journal_cnode *allocate_cnodes(int num_cnodes)
376} 376}
377 377
378/* 378/*
379** pulls a cnode off the free list, or returns NULL on failure 379** pulls a cnode off the free list, or returns NULL on failure
380*/ 380*/
381static struct reiserfs_journal_cnode *get_cnode(struct super_block *p_s_sb) 381static struct reiserfs_journal_cnode *get_cnode(struct super_block *sb)
382{ 382{
383 struct reiserfs_journal_cnode *cn; 383 struct reiserfs_journal_cnode *cn;
384 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 384 struct reiserfs_journal *journal = SB_JOURNAL(sb);
385 385
386 reiserfs_check_lock_depth(p_s_sb, "get_cnode"); 386 reiserfs_check_lock_depth(sb, "get_cnode");
387 387
388 if (journal->j_cnode_free <= 0) { 388 if (journal->j_cnode_free <= 0) {
389 return NULL; 389 return NULL;
@@ -403,14 +403,14 @@ static struct reiserfs_journal_cnode *get_cnode(struct super_block *p_s_sb)
403} 403}
404 404
405/* 405/*
406** returns a cnode to the free list 406** returns a cnode to the free list
407*/ 407*/
408static void free_cnode(struct super_block *p_s_sb, 408static void free_cnode(struct super_block *sb,
409 struct reiserfs_journal_cnode *cn) 409 struct reiserfs_journal_cnode *cn)
410{ 410{
411 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 411 struct reiserfs_journal *journal = SB_JOURNAL(sb);
412 412
413 reiserfs_check_lock_depth(p_s_sb, "free_cnode"); 413 reiserfs_check_lock_depth(sb, "free_cnode");
414 414
415 journal->j_cnode_used--; 415 journal->j_cnode_used--;
416 journal->j_cnode_free++; 416 journal->j_cnode_free++;
@@ -436,8 +436,8 @@ void reiserfs_check_lock_depth(struct super_block *sb, char *caller)
436{ 436{
437#ifdef CONFIG_SMP 437#ifdef CONFIG_SMP
438 if (current->lock_depth < 0) { 438 if (current->lock_depth < 0) {
439 reiserfs_panic(sb, "%s called without kernel lock held", 439 reiserfs_panic(sb, "journal-1", "%s called without kernel "
440 caller); 440 "lock held", caller);
441 } 441 }
442#else 442#else
443 ; 443 ;
@@ -481,11 +481,11 @@ static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct
481** reject it on the next call to reiserfs_in_journal 481** reject it on the next call to reiserfs_in_journal
482** 482**
483*/ 483*/
484int reiserfs_in_journal(struct super_block *p_s_sb, 484int reiserfs_in_journal(struct super_block *sb,
485 unsigned int bmap_nr, int bit_nr, int search_all, 485 unsigned int bmap_nr, int bit_nr, int search_all,
486 b_blocknr_t * next_zero_bit) 486 b_blocknr_t * next_zero_bit)
487{ 487{
488 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 488 struct reiserfs_journal *journal = SB_JOURNAL(sb);
489 struct reiserfs_journal_cnode *cn; 489 struct reiserfs_journal_cnode *cn;
490 struct reiserfs_list_bitmap *jb; 490 struct reiserfs_list_bitmap *jb;
491 int i; 491 int i;
@@ -493,14 +493,14 @@ int reiserfs_in_journal(struct super_block *p_s_sb,
493 493
494 *next_zero_bit = 0; /* always start this at zero. */ 494 *next_zero_bit = 0; /* always start this at zero. */
495 495
496 PROC_INFO_INC(p_s_sb, journal.in_journal); 496 PROC_INFO_INC(sb, journal.in_journal);
497 /* If we aren't doing a search_all, this is a metablock, and it will be logged before use. 497 /* If we aren't doing a search_all, this is a metablock, and it will be logged before use.
498 ** if we crash before the transaction that freed it commits, this transaction won't 498 ** if we crash before the transaction that freed it commits, this transaction won't
499 ** have committed either, and the block will never be written 499 ** have committed either, and the block will never be written
500 */ 500 */
501 if (search_all) { 501 if (search_all) {
502 for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) { 502 for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) {
503 PROC_INFO_INC(p_s_sb, journal.in_journal_bitmap); 503 PROC_INFO_INC(sb, journal.in_journal_bitmap);
504 jb = journal->j_list_bitmap + i; 504 jb = journal->j_list_bitmap + i;
505 if (jb->journal_list && jb->bitmaps[bmap_nr] && 505 if (jb->journal_list && jb->bitmaps[bmap_nr] &&
506 test_bit(bit_nr, 506 test_bit(bit_nr,
@@ -510,28 +510,28 @@ int reiserfs_in_journal(struct super_block *p_s_sb,
510 find_next_zero_bit((unsigned long *) 510 find_next_zero_bit((unsigned long *)
511 (jb->bitmaps[bmap_nr]-> 511 (jb->bitmaps[bmap_nr]->
512 data), 512 data),
513 p_s_sb->s_blocksize << 3, 513 sb->s_blocksize << 3,
514 bit_nr + 1); 514 bit_nr + 1);
515 return 1; 515 return 1;
516 } 516 }
517 } 517 }
518 } 518 }
519 519
520 bl = bmap_nr * (p_s_sb->s_blocksize << 3) + bit_nr; 520 bl = bmap_nr * (sb->s_blocksize << 3) + bit_nr;
521 /* is it in any old transactions? */ 521 /* is it in any old transactions? */
522 if (search_all 522 if (search_all
523 && (cn = 523 && (cn =
524 get_journal_hash_dev(p_s_sb, journal->j_list_hash_table, bl))) { 524 get_journal_hash_dev(sb, journal->j_list_hash_table, bl))) {
525 return 1; 525 return 1;
526 } 526 }
527 527
528 /* is it in the current transaction. This should never happen */ 528 /* is it in the current transaction. This should never happen */
529 if ((cn = get_journal_hash_dev(p_s_sb, journal->j_hash_table, bl))) { 529 if ((cn = get_journal_hash_dev(sb, journal->j_hash_table, bl))) {
530 BUG(); 530 BUG();
531 return 1; 531 return 1;
532 } 532 }
533 533
534 PROC_INFO_INC(p_s_sb, journal.in_journal_reusable); 534 PROC_INFO_INC(sb, journal.in_journal_reusable);
535 /* safe for reuse */ 535 /* safe for reuse */
536 return 0; 536 return 0;
537} 537}
@@ -553,16 +553,16 @@ static inline void insert_journal_hash(struct reiserfs_journal_cnode **table,
553} 553}
554 554
555/* lock the current transaction */ 555/* lock the current transaction */
556static inline void lock_journal(struct super_block *p_s_sb) 556static inline void lock_journal(struct super_block *sb)
557{ 557{
558 PROC_INFO_INC(p_s_sb, journal.lock_journal); 558 PROC_INFO_INC(sb, journal.lock_journal);
559 mutex_lock(&SB_JOURNAL(p_s_sb)->j_mutex); 559 mutex_lock(&SB_JOURNAL(sb)->j_mutex);
560} 560}
561 561
562/* unlock the current transaction */ 562/* unlock the current transaction */
563static inline void unlock_journal(struct super_block *p_s_sb) 563static inline void unlock_journal(struct super_block *sb)
564{ 564{
565 mutex_unlock(&SB_JOURNAL(p_s_sb)->j_mutex); 565 mutex_unlock(&SB_JOURNAL(sb)->j_mutex);
566} 566}
567 567
568static inline void get_journal_list(struct reiserfs_journal_list *jl) 568static inline void get_journal_list(struct reiserfs_journal_list *jl)
@@ -574,7 +574,7 @@ static inline void put_journal_list(struct super_block *s,
574 struct reiserfs_journal_list *jl) 574 struct reiserfs_journal_list *jl)
575{ 575{
576 if (jl->j_refcount < 1) { 576 if (jl->j_refcount < 1) {
577 reiserfs_panic(s, "trans id %lu, refcount at %d", 577 reiserfs_panic(s, "journal-2", "trans id %u, refcount at %d",
578 jl->j_trans_id, jl->j_refcount); 578 jl->j_trans_id, jl->j_refcount);
579 } 579 }
580 if (--jl->j_refcount == 0) 580 if (--jl->j_refcount == 0)
@@ -586,20 +586,20 @@ static inline void put_journal_list(struct super_block *s,
586** it gets called by flush_commit_list, and cleans up any data stored about blocks freed during a 586** it gets called by flush_commit_list, and cleans up any data stored about blocks freed during a
587** transaction. 587** transaction.
588*/ 588*/
589static void cleanup_freed_for_journal_list(struct super_block *p_s_sb, 589static void cleanup_freed_for_journal_list(struct super_block *sb,
590 struct reiserfs_journal_list *jl) 590 struct reiserfs_journal_list *jl)
591{ 591{
592 592
593 struct reiserfs_list_bitmap *jb = jl->j_list_bitmap; 593 struct reiserfs_list_bitmap *jb = jl->j_list_bitmap;
594 if (jb) { 594 if (jb) {
595 cleanup_bitmap_list(p_s_sb, jb); 595 cleanup_bitmap_list(sb, jb);
596 } 596 }
597 jl->j_list_bitmap->journal_list = NULL; 597 jl->j_list_bitmap->journal_list = NULL;
598 jl->j_list_bitmap = NULL; 598 jl->j_list_bitmap = NULL;
599} 599}
600 600
601static int journal_list_still_alive(struct super_block *s, 601static int journal_list_still_alive(struct super_block *s,
602 unsigned long trans_id) 602 unsigned int trans_id)
603{ 603{
604 struct reiserfs_journal *journal = SB_JOURNAL(s); 604 struct reiserfs_journal *journal = SB_JOURNAL(s);
605 struct list_head *entry = &journal->j_journal_list; 605 struct list_head *entry = &journal->j_journal_list;
@@ -644,8 +644,8 @@ static void reiserfs_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
644 char b[BDEVNAME_SIZE]; 644 char b[BDEVNAME_SIZE];
645 645
646 if (buffer_journaled(bh)) { 646 if (buffer_journaled(bh)) {
647 reiserfs_warning(NULL, 647 reiserfs_warning(NULL, "clm-2084",
648 "clm-2084: pinned buffer %lu:%s sent to disk", 648 "pinned buffer %lu:%s sent to disk",
649 bh->b_blocknr, bdevname(bh->b_bdev, b)); 649 bh->b_blocknr, bdevname(bh->b_bdev, b));
650 } 650 }
651 if (uptodate) 651 if (uptodate)
@@ -933,9 +933,9 @@ static int flush_older_commits(struct super_block *s,
933 struct reiserfs_journal_list *other_jl; 933 struct reiserfs_journal_list *other_jl;
934 struct reiserfs_journal_list *first_jl; 934 struct reiserfs_journal_list *first_jl;
935 struct list_head *entry; 935 struct list_head *entry;
936 unsigned long trans_id = jl->j_trans_id; 936 unsigned int trans_id = jl->j_trans_id;
937 unsigned long other_trans_id; 937 unsigned int other_trans_id;
938 unsigned long first_trans_id; 938 unsigned int first_trans_id;
939 939
940 find_first: 940 find_first:
941 /* 941 /*
@@ -1014,7 +1014,7 @@ static int flush_commit_list(struct super_block *s,
1014 int i; 1014 int i;
1015 b_blocknr_t bn; 1015 b_blocknr_t bn;
1016 struct buffer_head *tbh = NULL; 1016 struct buffer_head *tbh = NULL;
1017 unsigned long trans_id = jl->j_trans_id; 1017 unsigned int trans_id = jl->j_trans_id;
1018 struct reiserfs_journal *journal = SB_JOURNAL(s); 1018 struct reiserfs_journal *journal = SB_JOURNAL(s);
1019 int barrier = 0; 1019 int barrier = 0;
1020 int retval = 0; 1020 int retval = 0;
@@ -1122,7 +1122,8 @@ static int flush_commit_list(struct super_block *s,
1122 sync_dirty_buffer(tbh); 1122 sync_dirty_buffer(tbh);
1123 if (unlikely(!buffer_uptodate(tbh))) { 1123 if (unlikely(!buffer_uptodate(tbh))) {
1124#ifdef CONFIG_REISERFS_CHECK 1124#ifdef CONFIG_REISERFS_CHECK
1125 reiserfs_warning(s, "journal-601, buffer write failed"); 1125 reiserfs_warning(s, "journal-601",
1126 "buffer write failed");
1126#endif 1127#endif
1127 retval = -EIO; 1128 retval = -EIO;
1128 } 1129 }
@@ -1154,14 +1155,14 @@ static int flush_commit_list(struct super_block *s,
1154 * up propagating the write error out to the filesystem. */ 1155 * up propagating the write error out to the filesystem. */
1155 if (unlikely(!buffer_uptodate(jl->j_commit_bh))) { 1156 if (unlikely(!buffer_uptodate(jl->j_commit_bh))) {
1156#ifdef CONFIG_REISERFS_CHECK 1157#ifdef CONFIG_REISERFS_CHECK
1157 reiserfs_warning(s, "journal-615: buffer write failed"); 1158 reiserfs_warning(s, "journal-615", "buffer write failed");
1158#endif 1159#endif
1159 retval = -EIO; 1160 retval = -EIO;
1160 } 1161 }
1161 bforget(jl->j_commit_bh); 1162 bforget(jl->j_commit_bh);
1162 if (journal->j_last_commit_id != 0 && 1163 if (journal->j_last_commit_id != 0 &&
1163 (jl->j_trans_id - journal->j_last_commit_id) != 1) { 1164 (jl->j_trans_id - journal->j_last_commit_id) != 1) {
1164 reiserfs_warning(s, "clm-2200: last commit %lu, current %lu", 1165 reiserfs_warning(s, "clm-2200", "last commit %lu, current %lu",
1165 journal->j_last_commit_id, jl->j_trans_id); 1166 journal->j_last_commit_id, jl->j_trans_id);
1166 } 1167 }
1167 journal->j_last_commit_id = jl->j_trans_id; 1168 journal->j_last_commit_id = jl->j_trans_id;
@@ -1191,8 +1192,8 @@ static int flush_commit_list(struct super_block *s,
1191} 1192}
1192 1193
1193/* 1194/*
1194** flush_journal_list frequently needs to find a newer transaction for a given block. This does that, or 1195** flush_journal_list frequently needs to find a newer transaction for a given block. This does that, or
1195** returns NULL if it can't find anything 1196** returns NULL if it can't find anything
1196*/ 1197*/
1197static struct reiserfs_journal_list *find_newer_jl_for_cn(struct 1198static struct reiserfs_journal_list *find_newer_jl_for_cn(struct
1198 reiserfs_journal_cnode 1199 reiserfs_journal_cnode
@@ -1236,11 +1237,11 @@ static void remove_journal_hash(struct super_block *,
1236** journal list for this transaction. Aside from freeing the cnode, this also allows the 1237** journal list for this transaction. Aside from freeing the cnode, this also allows the
1237** block to be reallocated for data blocks if it had been deleted. 1238** block to be reallocated for data blocks if it had been deleted.
1238*/ 1239*/
1239static void remove_all_from_journal_list(struct super_block *p_s_sb, 1240static void remove_all_from_journal_list(struct super_block *sb,
1240 struct reiserfs_journal_list *jl, 1241 struct reiserfs_journal_list *jl,
1241 int debug) 1242 int debug)
1242{ 1243{
1243 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 1244 struct reiserfs_journal *journal = SB_JOURNAL(sb);
1244 struct reiserfs_journal_cnode *cn, *last; 1245 struct reiserfs_journal_cnode *cn, *last;
1245 cn = jl->j_realblock; 1246 cn = jl->j_realblock;
1246 1247
@@ -1250,18 +1251,18 @@ static void remove_all_from_journal_list(struct super_block *p_s_sb,
1250 while (cn) { 1251 while (cn) {
1251 if (cn->blocknr != 0) { 1252 if (cn->blocknr != 0) {
1252 if (debug) { 1253 if (debug) {
1253 reiserfs_warning(p_s_sb, 1254 reiserfs_warning(sb, "reiserfs-2201",
1254 "block %u, bh is %d, state %ld", 1255 "block %u, bh is %d, state %ld",
1255 cn->blocknr, cn->bh ? 1 : 0, 1256 cn->blocknr, cn->bh ? 1 : 0,
1256 cn->state); 1257 cn->state);
1257 } 1258 }
1258 cn->state = 0; 1259 cn->state = 0;
1259 remove_journal_hash(p_s_sb, journal->j_list_hash_table, 1260 remove_journal_hash(sb, journal->j_list_hash_table,
1260 jl, cn->blocknr, 1); 1261 jl, cn->blocknr, 1);
1261 } 1262 }
1262 last = cn; 1263 last = cn;
1263 cn = cn->next; 1264 cn = cn->next;
1264 free_cnode(p_s_sb, last); 1265 free_cnode(sb, last);
1265 } 1266 }
1266 jl->j_realblock = NULL; 1267 jl->j_realblock = NULL;
1267} 1268}
@@ -1273,12 +1274,12 @@ static void remove_all_from_journal_list(struct super_block *p_s_sb,
1273** called by flush_journal_list, before it calls remove_all_from_journal_list 1274** called by flush_journal_list, before it calls remove_all_from_journal_list
1274** 1275**
1275*/ 1276*/
1276static int _update_journal_header_block(struct super_block *p_s_sb, 1277static int _update_journal_header_block(struct super_block *sb,
1277 unsigned long offset, 1278 unsigned long offset,
1278 unsigned long trans_id) 1279 unsigned int trans_id)
1279{ 1280{
1280 struct reiserfs_journal_header *jh; 1281 struct reiserfs_journal_header *jh;
1281 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 1282 struct reiserfs_journal *journal = SB_JOURNAL(sb);
1282 1283
1283 if (reiserfs_is_journal_aborted(journal)) 1284 if (reiserfs_is_journal_aborted(journal))
1284 return -EIO; 1285 return -EIO;
@@ -1288,8 +1289,8 @@ static int _update_journal_header_block(struct super_block *p_s_sb,
1288 wait_on_buffer((journal->j_header_bh)); 1289 wait_on_buffer((journal->j_header_bh));
1289 if (unlikely(!buffer_uptodate(journal->j_header_bh))) { 1290 if (unlikely(!buffer_uptodate(journal->j_header_bh))) {
1290#ifdef CONFIG_REISERFS_CHECK 1291#ifdef CONFIG_REISERFS_CHECK
1291 reiserfs_warning(p_s_sb, 1292 reiserfs_warning(sb, "journal-699",
1292 "journal-699: buffer write failed"); 1293 "buffer write failed");
1293#endif 1294#endif
1294 return -EIO; 1295 return -EIO;
1295 } 1296 }
@@ -1302,49 +1303,49 @@ static int _update_journal_header_block(struct super_block *p_s_sb,
1302 jh->j_first_unflushed_offset = cpu_to_le32(offset); 1303 jh->j_first_unflushed_offset = cpu_to_le32(offset);
1303 jh->j_mount_id = cpu_to_le32(journal->j_mount_id); 1304 jh->j_mount_id = cpu_to_le32(journal->j_mount_id);
1304 1305
1305 if (reiserfs_barrier_flush(p_s_sb)) { 1306 if (reiserfs_barrier_flush(sb)) {
1306 int ret; 1307 int ret;
1307 lock_buffer(journal->j_header_bh); 1308 lock_buffer(journal->j_header_bh);
1308 ret = submit_barrier_buffer(journal->j_header_bh); 1309 ret = submit_barrier_buffer(journal->j_header_bh);
1309 if (ret == -EOPNOTSUPP) { 1310 if (ret == -EOPNOTSUPP) {
1310 set_buffer_uptodate(journal->j_header_bh); 1311 set_buffer_uptodate(journal->j_header_bh);
1311 disable_barrier(p_s_sb); 1312 disable_barrier(sb);
1312 goto sync; 1313 goto sync;
1313 } 1314 }
1314 wait_on_buffer(journal->j_header_bh); 1315 wait_on_buffer(journal->j_header_bh);
1315 check_barrier_completion(p_s_sb, journal->j_header_bh); 1316 check_barrier_completion(sb, journal->j_header_bh);
1316 } else { 1317 } else {
1317 sync: 1318 sync:
1318 set_buffer_dirty(journal->j_header_bh); 1319 set_buffer_dirty(journal->j_header_bh);
1319 sync_dirty_buffer(journal->j_header_bh); 1320 sync_dirty_buffer(journal->j_header_bh);
1320 } 1321 }
1321 if (!buffer_uptodate(journal->j_header_bh)) { 1322 if (!buffer_uptodate(journal->j_header_bh)) {
1322 reiserfs_warning(p_s_sb, 1323 reiserfs_warning(sb, "journal-837",
1323 "journal-837: IO error during journal replay"); 1324 "IO error during journal replay");
1324 return -EIO; 1325 return -EIO;
1325 } 1326 }
1326 } 1327 }
1327 return 0; 1328 return 0;
1328} 1329}
1329 1330
1330static int update_journal_header_block(struct super_block *p_s_sb, 1331static int update_journal_header_block(struct super_block *sb,
1331 unsigned long offset, 1332 unsigned long offset,
1332 unsigned long trans_id) 1333 unsigned int trans_id)
1333{ 1334{
1334 return _update_journal_header_block(p_s_sb, offset, trans_id); 1335 return _update_journal_header_block(sb, offset, trans_id);
1335} 1336}
1336 1337
1337/* 1338/*
1338** flush any and all journal lists older than you are 1339** flush any and all journal lists older than you are
1339** can only be called from flush_journal_list 1340** can only be called from flush_journal_list
1340*/ 1341*/
1341static int flush_older_journal_lists(struct super_block *p_s_sb, 1342static int flush_older_journal_lists(struct super_block *sb,
1342 struct reiserfs_journal_list *jl) 1343 struct reiserfs_journal_list *jl)
1343{ 1344{
1344 struct list_head *entry; 1345 struct list_head *entry;
1345 struct reiserfs_journal_list *other_jl; 1346 struct reiserfs_journal_list *other_jl;
1346 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 1347 struct reiserfs_journal *journal = SB_JOURNAL(sb);
1347 unsigned long trans_id = jl->j_trans_id; 1348 unsigned int trans_id = jl->j_trans_id;
1348 1349
1349 /* we know we are the only ones flushing things, no extra race 1350 /* we know we are the only ones flushing things, no extra race
1350 * protection is required. 1351 * protection is required.
@@ -1358,7 +1359,7 @@ static int flush_older_journal_lists(struct super_block *p_s_sb,
1358 if (other_jl->j_trans_id < trans_id) { 1359 if (other_jl->j_trans_id < trans_id) {
1359 BUG_ON(other_jl->j_refcount <= 0); 1360 BUG_ON(other_jl->j_refcount <= 0);
1360 /* do not flush all */ 1361 /* do not flush all */
1361 flush_journal_list(p_s_sb, other_jl, 0); 1362 flush_journal_list(sb, other_jl, 0);
1362 1363
1363 /* other_jl is now deleted from the list */ 1364 /* other_jl is now deleted from the list */
1364 goto restart; 1365 goto restart;
@@ -1381,8 +1382,8 @@ static void del_from_work_list(struct super_block *s,
1381** always set flushall to 1, unless you are calling from inside 1382** always set flushall to 1, unless you are calling from inside
1382** flush_journal_list 1383** flush_journal_list
1383** 1384**
1384** IMPORTANT. This can only be called while there are no journal writers, 1385** IMPORTANT. This can only be called while there are no journal writers,
1385** and the journal is locked. That means it can only be called from 1386** and the journal is locked. That means it can only be called from
1386** do_journal_end, or by journal_release 1387** do_journal_end, or by journal_release
1387*/ 1388*/
1388static int flush_journal_list(struct super_block *s, 1389static int flush_journal_list(struct super_block *s,
@@ -1401,8 +1402,7 @@ static int flush_journal_list(struct super_block *s,
1401 BUG_ON(j_len_saved <= 0); 1402 BUG_ON(j_len_saved <= 0);
1402 1403
1403 if (atomic_read(&journal->j_wcount) != 0) { 1404 if (atomic_read(&journal->j_wcount) != 0) {
1404 reiserfs_warning(s, 1405 reiserfs_warning(s, "clm-2048", "called with wcount %d",
1405 "clm-2048: flush_journal_list called with wcount %d",
1406 atomic_read(&journal->j_wcount)); 1406 atomic_read(&journal->j_wcount));
1407 } 1407 }
1408 BUG_ON(jl->j_trans_id == 0); 1408 BUG_ON(jl->j_trans_id == 0);
@@ -1416,8 +1416,7 @@ static int flush_journal_list(struct super_block *s,
1416 1416
1417 count = 0; 1417 count = 0;
1418 if (j_len_saved > journal->j_trans_max) { 1418 if (j_len_saved > journal->j_trans_max) {
1419 reiserfs_panic(s, 1419 reiserfs_panic(s, "journal-715", "length is %lu, trans id %lu",
1420 "journal-715: flush_journal_list, length is %lu, trans id %lu\n",
1421 j_len_saved, jl->j_trans_id); 1420 j_len_saved, jl->j_trans_id);
1422 return 0; 1421 return 0;
1423 } 1422 }
@@ -1430,7 +1429,7 @@ static int flush_journal_list(struct super_block *s,
1430 goto flush_older_and_return; 1429 goto flush_older_and_return;
1431 } 1430 }
1432 1431
1433 /* start by putting the commit list on disk. This will also flush 1432 /* start by putting the commit list on disk. This will also flush
1434 ** the commit lists of any olders transactions 1433 ** the commit lists of any olders transactions
1435 */ 1434 */
1436 flush_commit_list(s, jl, 1); 1435 flush_commit_list(s, jl, 1);
@@ -1445,12 +1444,12 @@ static int flush_journal_list(struct super_block *s,
1445 goto flush_older_and_return; 1444 goto flush_older_and_return;
1446 } 1445 }
1447 1446
1448 /* loop through each cnode, see if we need to write it, 1447 /* loop through each cnode, see if we need to write it,
1449 ** or wait on a more recent transaction, or just ignore it 1448 ** or wait on a more recent transaction, or just ignore it
1450 */ 1449 */
1451 if (atomic_read(&(journal->j_wcount)) != 0) { 1450 if (atomic_read(&(journal->j_wcount)) != 0) {
1452 reiserfs_panic(s, 1451 reiserfs_panic(s, "journal-844", "journal list is flushing, "
1453 "journal-844: panic journal list is flushing, wcount is not 0\n"); 1452 "wcount is not 0");
1454 } 1453 }
1455 cn = jl->j_realblock; 1454 cn = jl->j_realblock;
1456 while (cn) { 1455 while (cn) {
@@ -1474,8 +1473,8 @@ static int flush_journal_list(struct super_block *s,
1474 if (!pjl && cn->bh) { 1473 if (!pjl && cn->bh) {
1475 saved_bh = cn->bh; 1474 saved_bh = cn->bh;
1476 1475
1477 /* we do this to make sure nobody releases the buffer while 1476 /* we do this to make sure nobody releases the buffer while
1478 ** we are working with it 1477 ** we are working with it
1479 */ 1478 */
1480 get_bh(saved_bh); 1479 get_bh(saved_bh);
1481 1480
@@ -1498,8 +1497,8 @@ static int flush_journal_list(struct super_block *s,
1498 goto free_cnode; 1497 goto free_cnode;
1499 } 1498 }
1500 1499
1501 /* bh == NULL when the block got to disk on its own, OR, 1500 /* bh == NULL when the block got to disk on its own, OR,
1502 ** the block got freed in a future transaction 1501 ** the block got freed in a future transaction
1503 */ 1502 */
1504 if (saved_bh == NULL) { 1503 if (saved_bh == NULL) {
1505 goto free_cnode; 1504 goto free_cnode;
@@ -1510,8 +1509,8 @@ static int flush_journal_list(struct super_block *s,
1510 ** is not marked JDirty_wait 1509 ** is not marked JDirty_wait
1511 */ 1510 */
1512 if ((!was_jwait) && !buffer_locked(saved_bh)) { 1511 if ((!was_jwait) && !buffer_locked(saved_bh)) {
1513 reiserfs_warning(s, 1512 reiserfs_warning(s, "journal-813",
1514 "journal-813: BAD! buffer %llu %cdirty %cjwait, " 1513 "BAD! buffer %llu %cdirty %cjwait, "
1515 "not in a newer tranasction", 1514 "not in a newer tranasction",
1516 (unsigned long long)saved_bh-> 1515 (unsigned long long)saved_bh->
1517 b_blocknr, was_dirty ? ' ' : '!', 1516 b_blocknr, was_dirty ? ' ' : '!',
@@ -1529,8 +1528,8 @@ static int flush_journal_list(struct super_block *s,
1529 unlock_buffer(saved_bh); 1528 unlock_buffer(saved_bh);
1530 count++; 1529 count++;
1531 } else { 1530 } else {
1532 reiserfs_warning(s, 1531 reiserfs_warning(s, "clm-2082",
1533 "clm-2082: Unable to flush buffer %llu in %s", 1532 "Unable to flush buffer %llu in %s",
1534 (unsigned long long)saved_bh-> 1533 (unsigned long long)saved_bh->
1535 b_blocknr, __func__); 1534 b_blocknr, __func__);
1536 } 1535 }
@@ -1541,8 +1540,8 @@ static int flush_journal_list(struct super_block *s,
1541 /* we incremented this to keep others from taking the buffer head away */ 1540 /* we incremented this to keep others from taking the buffer head away */
1542 put_bh(saved_bh); 1541 put_bh(saved_bh);
1543 if (atomic_read(&(saved_bh->b_count)) < 0) { 1542 if (atomic_read(&(saved_bh->b_count)) < 0) {
1544 reiserfs_warning(s, 1543 reiserfs_warning(s, "journal-945",
1545 "journal-945: saved_bh->b_count < 0"); 1544 "saved_bh->b_count < 0");
1546 } 1545 }
1547 } 1546 }
1548 } 1547 }
@@ -1551,18 +1550,18 @@ static int flush_journal_list(struct super_block *s,
1551 while (cn) { 1550 while (cn) {
1552 if (test_bit(BLOCK_NEEDS_FLUSH, &cn->state)) { 1551 if (test_bit(BLOCK_NEEDS_FLUSH, &cn->state)) {
1553 if (!cn->bh) { 1552 if (!cn->bh) {
1554 reiserfs_panic(s, 1553 reiserfs_panic(s, "journal-1011",
1555 "journal-1011: cn->bh is NULL\n"); 1554 "cn->bh is NULL");
1556 } 1555 }
1557 wait_on_buffer(cn->bh); 1556 wait_on_buffer(cn->bh);
1558 if (!cn->bh) { 1557 if (!cn->bh) {
1559 reiserfs_panic(s, 1558 reiserfs_panic(s, "journal-1012",
1560 "journal-1012: cn->bh is NULL\n"); 1559 "cn->bh is NULL");
1561 } 1560 }
1562 if (unlikely(!buffer_uptodate(cn->bh))) { 1561 if (unlikely(!buffer_uptodate(cn->bh))) {
1563#ifdef CONFIG_REISERFS_CHECK 1562#ifdef CONFIG_REISERFS_CHECK
1564 reiserfs_warning(s, 1563 reiserfs_warning(s, "journal-949",
1565 "journal-949: buffer write failed\n"); 1564 "buffer write failed");
1566#endif 1565#endif
1567 err = -EIO; 1566 err = -EIO;
1568 } 1567 }
@@ -1587,7 +1586,7 @@ static int flush_journal_list(struct super_block *s,
1587 __func__); 1586 __func__);
1588 flush_older_and_return: 1587 flush_older_and_return:
1589 1588
1590 /* before we can update the journal header block, we _must_ flush all 1589 /* before we can update the journal header block, we _must_ flush all
1591 ** real blocks from all older transactions to disk. This is because 1590 ** real blocks from all older transactions to disk. This is because
1592 ** once the header block is updated, this transaction will not be 1591 ** once the header block is updated, this transaction will not be
1593 ** replayed after a crash 1592 ** replayed after a crash
@@ -1597,7 +1596,7 @@ static int flush_journal_list(struct super_block *s,
1597 } 1596 }
1598 1597
1599 err = journal->j_errno; 1598 err = journal->j_errno;
1600 /* before we can remove everything from the hash tables for this 1599 /* before we can remove everything from the hash tables for this
1601 ** transaction, we must make sure it can never be replayed 1600 ** transaction, we must make sure it can never be replayed
1602 ** 1601 **
1603 ** since we are only called from do_journal_end, we know for sure there 1602 ** since we are only called from do_journal_end, we know for sure there
@@ -1623,7 +1622,7 @@ static int flush_journal_list(struct super_block *s,
1623 1622
1624 if (journal->j_last_flush_id != 0 && 1623 if (journal->j_last_flush_id != 0 &&
1625 (jl->j_trans_id - journal->j_last_flush_id) != 1) { 1624 (jl->j_trans_id - journal->j_last_flush_id) != 1) {
1626 reiserfs_warning(s, "clm-2201: last flush %lu, current %lu", 1625 reiserfs_warning(s, "clm-2201", "last flush %lu, current %lu",
1627 journal->j_last_flush_id, jl->j_trans_id); 1626 journal->j_last_flush_id, jl->j_trans_id);
1628 } 1627 }
1629 journal->j_last_flush_id = jl->j_trans_id; 1628 journal->j_last_flush_id = jl->j_trans_id;
@@ -1758,13 +1757,13 @@ static int dirty_one_transaction(struct super_block *s,
1758static int kupdate_transactions(struct super_block *s, 1757static int kupdate_transactions(struct super_block *s,
1759 struct reiserfs_journal_list *jl, 1758 struct reiserfs_journal_list *jl,
1760 struct reiserfs_journal_list **next_jl, 1759 struct reiserfs_journal_list **next_jl,
1761 unsigned long *next_trans_id, 1760 unsigned int *next_trans_id,
1762 int num_blocks, int num_trans) 1761 int num_blocks, int num_trans)
1763{ 1762{
1764 int ret = 0; 1763 int ret = 0;
1765 int written = 0; 1764 int written = 0;
1766 int transactions_flushed = 0; 1765 int transactions_flushed = 0;
1767 unsigned long orig_trans_id = jl->j_trans_id; 1766 unsigned int orig_trans_id = jl->j_trans_id;
1768 struct buffer_chunk chunk; 1767 struct buffer_chunk chunk;
1769 struct list_head *entry; 1768 struct list_head *entry;
1770 struct reiserfs_journal *journal = SB_JOURNAL(s); 1769 struct reiserfs_journal *journal = SB_JOURNAL(s);
@@ -1833,7 +1832,7 @@ static int flush_used_journal_lists(struct super_block *s,
1833 int limit = 256; 1832 int limit = 256;
1834 struct reiserfs_journal_list *tjl; 1833 struct reiserfs_journal_list *tjl;
1835 struct reiserfs_journal_list *flush_jl; 1834 struct reiserfs_journal_list *flush_jl;
1836 unsigned long trans_id; 1835 unsigned int trans_id;
1837 struct reiserfs_journal *journal = SB_JOURNAL(s); 1836 struct reiserfs_journal *journal = SB_JOURNAL(s);
1838 1837
1839 flush_jl = tjl = jl; 1838 flush_jl = tjl = jl;
@@ -1909,22 +1908,22 @@ void remove_journal_hash(struct super_block *sb,
1909 } 1908 }
1910} 1909}
1911 1910
1912static void free_journal_ram(struct super_block *p_s_sb) 1911static void free_journal_ram(struct super_block *sb)
1913{ 1912{
1914 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 1913 struct reiserfs_journal *journal = SB_JOURNAL(sb);
1915 kfree(journal->j_current_jl); 1914 kfree(journal->j_current_jl);
1916 journal->j_num_lists--; 1915 journal->j_num_lists--;
1917 1916
1918 vfree(journal->j_cnode_free_orig); 1917 vfree(journal->j_cnode_free_orig);
1919 free_list_bitmaps(p_s_sb, journal->j_list_bitmap); 1918 free_list_bitmaps(sb, journal->j_list_bitmap);
1920 free_bitmap_nodes(p_s_sb); /* must be after free_list_bitmaps */ 1919 free_bitmap_nodes(sb); /* must be after free_list_bitmaps */
1921 if (journal->j_header_bh) { 1920 if (journal->j_header_bh) {
1922 brelse(journal->j_header_bh); 1921 brelse(journal->j_header_bh);
1923 } 1922 }
1924 /* j_header_bh is on the journal dev, make sure not to release the journal 1923 /* j_header_bh is on the journal dev, make sure not to release the journal
1925 * dev until we brelse j_header_bh 1924 * dev until we brelse j_header_bh
1926 */ 1925 */
1927 release_journal_dev(p_s_sb, journal); 1926 release_journal_dev(sb, journal);
1928 vfree(journal); 1927 vfree(journal);
1929} 1928}
1930 1929
@@ -1933,27 +1932,27 @@ static void free_journal_ram(struct super_block *p_s_sb)
1933** of read_super() yet. Any other caller must keep error at 0. 1932** of read_super() yet. Any other caller must keep error at 0.
1934*/ 1933*/
1935static int do_journal_release(struct reiserfs_transaction_handle *th, 1934static int do_journal_release(struct reiserfs_transaction_handle *th,
1936 struct super_block *p_s_sb, int error) 1935 struct super_block *sb, int error)
1937{ 1936{
1938 struct reiserfs_transaction_handle myth; 1937 struct reiserfs_transaction_handle myth;
1939 int flushed = 0; 1938 int flushed = 0;
1940 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 1939 struct reiserfs_journal *journal = SB_JOURNAL(sb);
1941 1940
1942 /* we only want to flush out transactions if we were called with error == 0 1941 /* we only want to flush out transactions if we were called with error == 0
1943 */ 1942 */
1944 if (!error && !(p_s_sb->s_flags & MS_RDONLY)) { 1943 if (!error && !(sb->s_flags & MS_RDONLY)) {
1945 /* end the current trans */ 1944 /* end the current trans */
1946 BUG_ON(!th->t_trans_id); 1945 BUG_ON(!th->t_trans_id);
1947 do_journal_end(th, p_s_sb, 10, FLUSH_ALL); 1946 do_journal_end(th, sb, 10, FLUSH_ALL);
1948 1947
1949 /* make sure something gets logged to force our way into the flush code */ 1948 /* make sure something gets logged to force our way into the flush code */
1950 if (!journal_join(&myth, p_s_sb, 1)) { 1949 if (!journal_join(&myth, sb, 1)) {
1951 reiserfs_prepare_for_journal(p_s_sb, 1950 reiserfs_prepare_for_journal(sb,
1952 SB_BUFFER_WITH_SB(p_s_sb), 1951 SB_BUFFER_WITH_SB(sb),
1953 1); 1952 1);
1954 journal_mark_dirty(&myth, p_s_sb, 1953 journal_mark_dirty(&myth, sb,
1955 SB_BUFFER_WITH_SB(p_s_sb)); 1954 SB_BUFFER_WITH_SB(sb));
1956 do_journal_end(&myth, p_s_sb, 1, FLUSH_ALL); 1955 do_journal_end(&myth, sb, 1, FLUSH_ALL);
1957 flushed = 1; 1956 flushed = 1;
1958 } 1957 }
1959 } 1958 }
@@ -1961,26 +1960,26 @@ static int do_journal_release(struct reiserfs_transaction_handle *th,
1961 /* this also catches errors during the do_journal_end above */ 1960 /* this also catches errors during the do_journal_end above */
1962 if (!error && reiserfs_is_journal_aborted(journal)) { 1961 if (!error && reiserfs_is_journal_aborted(journal)) {
1963 memset(&myth, 0, sizeof(myth)); 1962 memset(&myth, 0, sizeof(myth));
1964 if (!journal_join_abort(&myth, p_s_sb, 1)) { 1963 if (!journal_join_abort(&myth, sb, 1)) {
1965 reiserfs_prepare_for_journal(p_s_sb, 1964 reiserfs_prepare_for_journal(sb,
1966 SB_BUFFER_WITH_SB(p_s_sb), 1965 SB_BUFFER_WITH_SB(sb),
1967 1); 1966 1);
1968 journal_mark_dirty(&myth, p_s_sb, 1967 journal_mark_dirty(&myth, sb,
1969 SB_BUFFER_WITH_SB(p_s_sb)); 1968 SB_BUFFER_WITH_SB(sb));
1970 do_journal_end(&myth, p_s_sb, 1, FLUSH_ALL); 1969 do_journal_end(&myth, sb, 1, FLUSH_ALL);
1971 } 1970 }
1972 } 1971 }
1973 1972
1974 reiserfs_mounted_fs_count--; 1973 reiserfs_mounted_fs_count--;
1975 /* wait for all commits to finish */ 1974 /* wait for all commits to finish */
1976 cancel_delayed_work(&SB_JOURNAL(p_s_sb)->j_work); 1975 cancel_delayed_work(&SB_JOURNAL(sb)->j_work);
1977 flush_workqueue(commit_wq); 1976 flush_workqueue(commit_wq);
1978 if (!reiserfs_mounted_fs_count) { 1977 if (!reiserfs_mounted_fs_count) {
1979 destroy_workqueue(commit_wq); 1978 destroy_workqueue(commit_wq);
1980 commit_wq = NULL; 1979 commit_wq = NULL;
1981 } 1980 }
1982 1981
1983 free_journal_ram(p_s_sb); 1982 free_journal_ram(sb);
1984 1983
1985 return 0; 1984 return 0;
1986} 1985}
@@ -1989,41 +1988,41 @@ static int do_journal_release(struct reiserfs_transaction_handle *th,
1989** call on unmount. flush all journal trans, release all alloc'd ram 1988** call on unmount. flush all journal trans, release all alloc'd ram
1990*/ 1989*/
1991int journal_release(struct reiserfs_transaction_handle *th, 1990int journal_release(struct reiserfs_transaction_handle *th,
1992 struct super_block *p_s_sb) 1991 struct super_block *sb)
1993{ 1992{
1994 return do_journal_release(th, p_s_sb, 0); 1993 return do_journal_release(th, sb, 0);
1995} 1994}
1996 1995
1997/* 1996/*
1998** only call from an error condition inside reiserfs_read_super! 1997** only call from an error condition inside reiserfs_read_super!
1999*/ 1998*/
2000int journal_release_error(struct reiserfs_transaction_handle *th, 1999int journal_release_error(struct reiserfs_transaction_handle *th,
2001 struct super_block *p_s_sb) 2000 struct super_block *sb)
2002{ 2001{
2003 return do_journal_release(th, p_s_sb, 1); 2002 return do_journal_release(th, sb, 1);
2004} 2003}
2005 2004
2006/* compares description block with commit block. returns 1 if they differ, 0 if they are the same */ 2005/* compares description block with commit block. returns 1 if they differ, 0 if they are the same */
2007static int journal_compare_desc_commit(struct super_block *p_s_sb, 2006static int journal_compare_desc_commit(struct super_block *sb,
2008 struct reiserfs_journal_desc *desc, 2007 struct reiserfs_journal_desc *desc,
2009 struct reiserfs_journal_commit *commit) 2008 struct reiserfs_journal_commit *commit)
2010{ 2009{
2011 if (get_commit_trans_id(commit) != get_desc_trans_id(desc) || 2010 if (get_commit_trans_id(commit) != get_desc_trans_id(desc) ||
2012 get_commit_trans_len(commit) != get_desc_trans_len(desc) || 2011 get_commit_trans_len(commit) != get_desc_trans_len(desc) ||
2013 get_commit_trans_len(commit) > SB_JOURNAL(p_s_sb)->j_trans_max || 2012 get_commit_trans_len(commit) > SB_JOURNAL(sb)->j_trans_max ||
2014 get_commit_trans_len(commit) <= 0) { 2013 get_commit_trans_len(commit) <= 0) {
2015 return 1; 2014 return 1;
2016 } 2015 }
2017 return 0; 2016 return 0;
2018} 2017}
2019 2018
2020/* returns 0 if it did not find a description block 2019/* returns 0 if it did not find a description block
2021** returns -1 if it found a corrupt commit block 2020** returns -1 if it found a corrupt commit block
2022** returns 1 if both desc and commit were valid 2021** returns 1 if both desc and commit were valid
2023*/ 2022*/
2024static int journal_transaction_is_valid(struct super_block *p_s_sb, 2023static int journal_transaction_is_valid(struct super_block *sb,
2025 struct buffer_head *d_bh, 2024 struct buffer_head *d_bh,
2026 unsigned long *oldest_invalid_trans_id, 2025 unsigned int *oldest_invalid_trans_id,
2027 unsigned long *newest_mount_id) 2026 unsigned long *newest_mount_id)
2028{ 2027{
2029 struct reiserfs_journal_desc *desc; 2028 struct reiserfs_journal_desc *desc;
@@ -2039,7 +2038,7 @@ static int journal_transaction_is_valid(struct super_block *p_s_sb,
2039 && !memcmp(get_journal_desc_magic(d_bh), JOURNAL_DESC_MAGIC, 8)) { 2038 && !memcmp(get_journal_desc_magic(d_bh), JOURNAL_DESC_MAGIC, 8)) {
2040 if (oldest_invalid_trans_id && *oldest_invalid_trans_id 2039 if (oldest_invalid_trans_id && *oldest_invalid_trans_id
2041 && get_desc_trans_id(desc) > *oldest_invalid_trans_id) { 2040 && get_desc_trans_id(desc) > *oldest_invalid_trans_id) {
2042 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, 2041 reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2043 "journal-986: transaction " 2042 "journal-986: transaction "
2044 "is valid returning because trans_id %d is greater than " 2043 "is valid returning because trans_id %d is greater than "
2045 "oldest_invalid %lu", 2044 "oldest_invalid %lu",
@@ -2049,7 +2048,7 @@ static int journal_transaction_is_valid(struct super_block *p_s_sb,
2049 } 2048 }
2050 if (newest_mount_id 2049 if (newest_mount_id
2051 && *newest_mount_id > get_desc_mount_id(desc)) { 2050 && *newest_mount_id > get_desc_mount_id(desc)) {
2052 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, 2051 reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2053 "journal-1087: transaction " 2052 "journal-1087: transaction "
2054 "is valid returning because mount_id %d is less than " 2053 "is valid returning because mount_id %d is less than "
2055 "newest_mount_id %lu", 2054 "newest_mount_id %lu",
@@ -2057,36 +2056,37 @@ static int journal_transaction_is_valid(struct super_block *p_s_sb,
2057 *newest_mount_id); 2056 *newest_mount_id);
2058 return -1; 2057 return -1;
2059 } 2058 }
2060 if (get_desc_trans_len(desc) > SB_JOURNAL(p_s_sb)->j_trans_max) { 2059 if (get_desc_trans_len(desc) > SB_JOURNAL(sb)->j_trans_max) {
2061 reiserfs_warning(p_s_sb, 2060 reiserfs_warning(sb, "journal-2018",
2062 "journal-2018: Bad transaction length %d encountered, ignoring transaction", 2061 "Bad transaction length %d "
2062 "encountered, ignoring transaction",
2063 get_desc_trans_len(desc)); 2063 get_desc_trans_len(desc));
2064 return -1; 2064 return -1;
2065 } 2065 }
2066 offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb); 2066 offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(sb);
2067 2067
2068 /* ok, we have a journal description block, lets see if the transaction was valid */ 2068 /* ok, we have a journal description block, lets see if the transaction was valid */
2069 c_bh = 2069 c_bh =
2070 journal_bread(p_s_sb, 2070 journal_bread(sb,
2071 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + 2071 SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
2072 ((offset + get_desc_trans_len(desc) + 2072 ((offset + get_desc_trans_len(desc) +
2073 1) % SB_ONDISK_JOURNAL_SIZE(p_s_sb))); 2073 1) % SB_ONDISK_JOURNAL_SIZE(sb)));
2074 if (!c_bh) 2074 if (!c_bh)
2075 return 0; 2075 return 0;
2076 commit = (struct reiserfs_journal_commit *)c_bh->b_data; 2076 commit = (struct reiserfs_journal_commit *)c_bh->b_data;
2077 if (journal_compare_desc_commit(p_s_sb, desc, commit)) { 2077 if (journal_compare_desc_commit(sb, desc, commit)) {
2078 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, 2078 reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2079 "journal_transaction_is_valid, commit offset %ld had bad " 2079 "journal_transaction_is_valid, commit offset %ld had bad "
2080 "time %d or length %d", 2080 "time %d or length %d",
2081 c_bh->b_blocknr - 2081 c_bh->b_blocknr -
2082 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), 2082 SB_ONDISK_JOURNAL_1st_BLOCK(sb),
2083 get_commit_trans_id(commit), 2083 get_commit_trans_id(commit),
2084 get_commit_trans_len(commit)); 2084 get_commit_trans_len(commit));
2085 brelse(c_bh); 2085 brelse(c_bh);
2086 if (oldest_invalid_trans_id) { 2086 if (oldest_invalid_trans_id) {
2087 *oldest_invalid_trans_id = 2087 *oldest_invalid_trans_id =
2088 get_desc_trans_id(desc); 2088 get_desc_trans_id(desc);
2089 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, 2089 reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2090 "journal-1004: " 2090 "journal-1004: "
2091 "transaction_is_valid setting oldest invalid trans_id " 2091 "transaction_is_valid setting oldest invalid trans_id "
2092 "to %d", 2092 "to %d",
@@ -2095,11 +2095,11 @@ static int journal_transaction_is_valid(struct super_block *p_s_sb,
2095 return -1; 2095 return -1;
2096 } 2096 }
2097 brelse(c_bh); 2097 brelse(c_bh);
2098 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, 2098 reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2099 "journal-1006: found valid " 2099 "journal-1006: found valid "
2100 "transaction start offset %llu, len %d id %d", 2100 "transaction start offset %llu, len %d id %d",
2101 d_bh->b_blocknr - 2101 d_bh->b_blocknr -
2102 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), 2102 SB_ONDISK_JOURNAL_1st_BLOCK(sb),
2103 get_desc_trans_len(desc), 2103 get_desc_trans_len(desc),
2104 get_desc_trans_id(desc)); 2104 get_desc_trans_id(desc));
2105 return 1; 2105 return 1;
@@ -2121,63 +2121,63 @@ static void brelse_array(struct buffer_head **heads, int num)
2121** this either reads in a replays a transaction, or returns because the transaction 2121** this either reads in a replays a transaction, or returns because the transaction
2122** is invalid, or too old. 2122** is invalid, or too old.
2123*/ 2123*/
2124static int journal_read_transaction(struct super_block *p_s_sb, 2124static int journal_read_transaction(struct super_block *sb,
2125 unsigned long cur_dblock, 2125 unsigned long cur_dblock,
2126 unsigned long oldest_start, 2126 unsigned long oldest_start,
2127 unsigned long oldest_trans_id, 2127 unsigned int oldest_trans_id,
2128 unsigned long newest_mount_id) 2128 unsigned long newest_mount_id)
2129{ 2129{
2130 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 2130 struct reiserfs_journal *journal = SB_JOURNAL(sb);
2131 struct reiserfs_journal_desc *desc; 2131 struct reiserfs_journal_desc *desc;
2132 struct reiserfs_journal_commit *commit; 2132 struct reiserfs_journal_commit *commit;
2133 unsigned long trans_id = 0; 2133 unsigned int trans_id = 0;
2134 struct buffer_head *c_bh; 2134 struct buffer_head *c_bh;
2135 struct buffer_head *d_bh; 2135 struct buffer_head *d_bh;
2136 struct buffer_head **log_blocks = NULL; 2136 struct buffer_head **log_blocks = NULL;
2137 struct buffer_head **real_blocks = NULL; 2137 struct buffer_head **real_blocks = NULL;
2138 unsigned long trans_offset; 2138 unsigned int trans_offset;
2139 int i; 2139 int i;
2140 int trans_half; 2140 int trans_half;
2141 2141
2142 d_bh = journal_bread(p_s_sb, cur_dblock); 2142 d_bh = journal_bread(sb, cur_dblock);
2143 if (!d_bh) 2143 if (!d_bh)
2144 return 1; 2144 return 1;
2145 desc = (struct reiserfs_journal_desc *)d_bh->b_data; 2145 desc = (struct reiserfs_journal_desc *)d_bh->b_data;
2146 trans_offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb); 2146 trans_offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(sb);
2147 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1037: " 2147 reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1037: "
2148 "journal_read_transaction, offset %llu, len %d mount_id %d", 2148 "journal_read_transaction, offset %llu, len %d mount_id %d",
2149 d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), 2149 d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(sb),
2150 get_desc_trans_len(desc), get_desc_mount_id(desc)); 2150 get_desc_trans_len(desc), get_desc_mount_id(desc));
2151 if (get_desc_trans_id(desc) < oldest_trans_id) { 2151 if (get_desc_trans_id(desc) < oldest_trans_id) {
2152 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1039: " 2152 reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1039: "
2153 "journal_read_trans skipping because %lu is too old", 2153 "journal_read_trans skipping because %lu is too old",
2154 cur_dblock - 2154 cur_dblock -
2155 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb)); 2155 SB_ONDISK_JOURNAL_1st_BLOCK(sb));
2156 brelse(d_bh); 2156 brelse(d_bh);
2157 return 1; 2157 return 1;
2158 } 2158 }
2159 if (get_desc_mount_id(desc) != newest_mount_id) { 2159 if (get_desc_mount_id(desc) != newest_mount_id) {
2160 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1146: " 2160 reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1146: "
2161 "journal_read_trans skipping because %d is != " 2161 "journal_read_trans skipping because %d is != "
2162 "newest_mount_id %lu", get_desc_mount_id(desc), 2162 "newest_mount_id %lu", get_desc_mount_id(desc),
2163 newest_mount_id); 2163 newest_mount_id);
2164 brelse(d_bh); 2164 brelse(d_bh);
2165 return 1; 2165 return 1;
2166 } 2166 }
2167 c_bh = journal_bread(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + 2167 c_bh = journal_bread(sb, SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
2168 ((trans_offset + get_desc_trans_len(desc) + 1) % 2168 ((trans_offset + get_desc_trans_len(desc) + 1) %
2169 SB_ONDISK_JOURNAL_SIZE(p_s_sb))); 2169 SB_ONDISK_JOURNAL_SIZE(sb)));
2170 if (!c_bh) { 2170 if (!c_bh) {
2171 brelse(d_bh); 2171 brelse(d_bh);
2172 return 1; 2172 return 1;
2173 } 2173 }
2174 commit = (struct reiserfs_journal_commit *)c_bh->b_data; 2174 commit = (struct reiserfs_journal_commit *)c_bh->b_data;
2175 if (journal_compare_desc_commit(p_s_sb, desc, commit)) { 2175 if (journal_compare_desc_commit(sb, desc, commit)) {
2176 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, 2176 reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2177 "journal_read_transaction, " 2177 "journal_read_transaction, "
2178 "commit offset %llu had bad time %d or length %d", 2178 "commit offset %llu had bad time %d or length %d",
2179 c_bh->b_blocknr - 2179 c_bh->b_blocknr -
2180 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), 2180 SB_ONDISK_JOURNAL_1st_BLOCK(sb),
2181 get_commit_trans_id(commit), 2181 get_commit_trans_id(commit),
2182 get_commit_trans_len(commit)); 2182 get_commit_trans_len(commit));
2183 brelse(c_bh); 2183 brelse(c_bh);
@@ -2195,38 +2195,41 @@ static int journal_read_transaction(struct super_block *p_s_sb,
2195 brelse(d_bh); 2195 brelse(d_bh);
2196 kfree(log_blocks); 2196 kfree(log_blocks);
2197 kfree(real_blocks); 2197 kfree(real_blocks);
2198 reiserfs_warning(p_s_sb, 2198 reiserfs_warning(sb, "journal-1169",
2199 "journal-1169: kmalloc failed, unable to mount FS"); 2199 "kmalloc failed, unable to mount FS");
2200 return -1; 2200 return -1;
2201 } 2201 }
2202 /* get all the buffer heads */ 2202 /* get all the buffer heads */
2203 trans_half = journal_trans_half(p_s_sb->s_blocksize); 2203 trans_half = journal_trans_half(sb->s_blocksize);
2204 for (i = 0; i < get_desc_trans_len(desc); i++) { 2204 for (i = 0; i < get_desc_trans_len(desc); i++) {
2205 log_blocks[i] = 2205 log_blocks[i] =
2206 journal_getblk(p_s_sb, 2206 journal_getblk(sb,
2207 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + 2207 SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
2208 (trans_offset + 1 + 2208 (trans_offset + 1 +
2209 i) % SB_ONDISK_JOURNAL_SIZE(p_s_sb)); 2209 i) % SB_ONDISK_JOURNAL_SIZE(sb));
2210 if (i < trans_half) { 2210 if (i < trans_half) {
2211 real_blocks[i] = 2211 real_blocks[i] =
2212 sb_getblk(p_s_sb, 2212 sb_getblk(sb,
2213 le32_to_cpu(desc->j_realblock[i])); 2213 le32_to_cpu(desc->j_realblock[i]));
2214 } else { 2214 } else {
2215 real_blocks[i] = 2215 real_blocks[i] =
2216 sb_getblk(p_s_sb, 2216 sb_getblk(sb,
2217 le32_to_cpu(commit-> 2217 le32_to_cpu(commit->
2218 j_realblock[i - trans_half])); 2218 j_realblock[i - trans_half]));
2219 } 2219 }
2220 if (real_blocks[i]->b_blocknr > SB_BLOCK_COUNT(p_s_sb)) { 2220 if (real_blocks[i]->b_blocknr > SB_BLOCK_COUNT(sb)) {
2221 reiserfs_warning(p_s_sb, 2221 reiserfs_warning(sb, "journal-1207",
2222 "journal-1207: REPLAY FAILURE fsck required! Block to replay is outside of filesystem"); 2222 "REPLAY FAILURE fsck required! "
2223 "Block to replay is outside of "
2224 "filesystem");
2223 goto abort_replay; 2225 goto abort_replay;
2224 } 2226 }
2225 /* make sure we don't try to replay onto log or reserved area */ 2227 /* make sure we don't try to replay onto log or reserved area */
2226 if (is_block_in_log_or_reserved_area 2228 if (is_block_in_log_or_reserved_area
2227 (p_s_sb, real_blocks[i]->b_blocknr)) { 2229 (sb, real_blocks[i]->b_blocknr)) {
2228 reiserfs_warning(p_s_sb, 2230 reiserfs_warning(sb, "journal-1204",
2229 "journal-1204: REPLAY FAILURE fsck required! Trying to replay onto a log block"); 2231 "REPLAY FAILURE fsck required! "
2232 "Trying to replay onto a log block");
2230 abort_replay: 2233 abort_replay:
2231 brelse_array(log_blocks, i); 2234 brelse_array(log_blocks, i);
2232 brelse_array(real_blocks, i); 2235 brelse_array(real_blocks, i);
@@ -2242,8 +2245,9 @@ static int journal_read_transaction(struct super_block *p_s_sb,
2242 for (i = 0; i < get_desc_trans_len(desc); i++) { 2245 for (i = 0; i < get_desc_trans_len(desc); i++) {
2243 wait_on_buffer(log_blocks[i]); 2246 wait_on_buffer(log_blocks[i]);
2244 if (!buffer_uptodate(log_blocks[i])) { 2247 if (!buffer_uptodate(log_blocks[i])) {
2245 reiserfs_warning(p_s_sb, 2248 reiserfs_warning(sb, "journal-1212",
2246 "journal-1212: REPLAY FAILURE fsck required! buffer write failed"); 2249 "REPLAY FAILURE fsck required! "
2250 "buffer write failed");
2247 brelse_array(log_blocks + i, 2251 brelse_array(log_blocks + i,
2248 get_desc_trans_len(desc) - i); 2252 get_desc_trans_len(desc) - i);
2249 brelse_array(real_blocks, get_desc_trans_len(desc)); 2253 brelse_array(real_blocks, get_desc_trans_len(desc));
@@ -2266,8 +2270,9 @@ static int journal_read_transaction(struct super_block *p_s_sb,
2266 for (i = 0; i < get_desc_trans_len(desc); i++) { 2270 for (i = 0; i < get_desc_trans_len(desc); i++) {
2267 wait_on_buffer(real_blocks[i]); 2271 wait_on_buffer(real_blocks[i]);
2268 if (!buffer_uptodate(real_blocks[i])) { 2272 if (!buffer_uptodate(real_blocks[i])) {
2269 reiserfs_warning(p_s_sb, 2273 reiserfs_warning(sb, "journal-1226",
2270 "journal-1226: REPLAY FAILURE, fsck required! buffer write failed"); 2274 "REPLAY FAILURE, fsck required! "
2275 "buffer write failed");
2271 brelse_array(real_blocks + i, 2276 brelse_array(real_blocks + i,
2272 get_desc_trans_len(desc) - i); 2277 get_desc_trans_len(desc) - i);
2273 brelse(c_bh); 2278 brelse(c_bh);
@@ -2279,15 +2284,15 @@ static int journal_read_transaction(struct super_block *p_s_sb,
2279 brelse(real_blocks[i]); 2284 brelse(real_blocks[i]);
2280 } 2285 }
2281 cur_dblock = 2286 cur_dblock =
2282 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + 2287 SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
2283 ((trans_offset + get_desc_trans_len(desc) + 2288 ((trans_offset + get_desc_trans_len(desc) +
2284 2) % SB_ONDISK_JOURNAL_SIZE(p_s_sb)); 2289 2) % SB_ONDISK_JOURNAL_SIZE(sb));
2285 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, 2290 reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2286 "journal-1095: setting journal " "start to offset %ld", 2291 "journal-1095: setting journal " "start to offset %ld",
2287 cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb)); 2292 cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb));
2288 2293
2289 /* init starting values for the first transaction, in case this is the last transaction to be replayed. */ 2294 /* init starting values for the first transaction, in case this is the last transaction to be replayed. */
2290 journal->j_start = cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb); 2295 journal->j_start = cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb);
2291 journal->j_last_flush_trans_id = trans_id; 2296 journal->j_last_flush_trans_id = trans_id;
2292 journal->j_trans_id = trans_id + 1; 2297 journal->j_trans_id = trans_id + 1;
2293 /* check for trans_id overflow */ 2298 /* check for trans_id overflow */
@@ -2352,12 +2357,12 @@ static struct buffer_head *reiserfs_breada(struct block_device *dev,
2352** 2357**
2353** On exit, it sets things up so the first transaction will work correctly. 2358** On exit, it sets things up so the first transaction will work correctly.
2354*/ 2359*/
2355static int journal_read(struct super_block *p_s_sb) 2360static int journal_read(struct super_block *sb)
2356{ 2361{
2357 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 2362 struct reiserfs_journal *journal = SB_JOURNAL(sb);
2358 struct reiserfs_journal_desc *desc; 2363 struct reiserfs_journal_desc *desc;
2359 unsigned long oldest_trans_id = 0; 2364 unsigned int oldest_trans_id = 0;
2360 unsigned long oldest_invalid_trans_id = 0; 2365 unsigned int oldest_invalid_trans_id = 0;
2361 time_t start; 2366 time_t start;
2362 unsigned long oldest_start = 0; 2367 unsigned long oldest_start = 0;
2363 unsigned long cur_dblock = 0; 2368 unsigned long cur_dblock = 0;
@@ -2370,46 +2375,46 @@ static int journal_read(struct super_block *p_s_sb)
2370 int ret; 2375 int ret;
2371 char b[BDEVNAME_SIZE]; 2376 char b[BDEVNAME_SIZE];
2372 2377
2373 cur_dblock = SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb); 2378 cur_dblock = SB_ONDISK_JOURNAL_1st_BLOCK(sb);
2374 reiserfs_info(p_s_sb, "checking transaction log (%s)\n", 2379 reiserfs_info(sb, "checking transaction log (%s)\n",
2375 bdevname(journal->j_dev_bd, b)); 2380 bdevname(journal->j_dev_bd, b));
2376 start = get_seconds(); 2381 start = get_seconds();
2377 2382
2378 /* step 1, read in the journal header block. Check the transaction it says 2383 /* step 1, read in the journal header block. Check the transaction it says
2379 ** is the first unflushed, and if that transaction is not valid, 2384 ** is the first unflushed, and if that transaction is not valid,
2380 ** replay is done 2385 ** replay is done
2381 */ 2386 */
2382 journal->j_header_bh = journal_bread(p_s_sb, 2387 journal->j_header_bh = journal_bread(sb,
2383 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) 2388 SB_ONDISK_JOURNAL_1st_BLOCK(sb)
2384 + SB_ONDISK_JOURNAL_SIZE(p_s_sb)); 2389 + SB_ONDISK_JOURNAL_SIZE(sb));
2385 if (!journal->j_header_bh) { 2390 if (!journal->j_header_bh) {
2386 return 1; 2391 return 1;
2387 } 2392 }
2388 jh = (struct reiserfs_journal_header *)(journal->j_header_bh->b_data); 2393 jh = (struct reiserfs_journal_header *)(journal->j_header_bh->b_data);
2389 if (le32_to_cpu(jh->j_first_unflushed_offset) < 2394 if (le32_to_cpu(jh->j_first_unflushed_offset) <
2390 SB_ONDISK_JOURNAL_SIZE(p_s_sb) 2395 SB_ONDISK_JOURNAL_SIZE(sb)
2391 && le32_to_cpu(jh->j_last_flush_trans_id) > 0) { 2396 && le32_to_cpu(jh->j_last_flush_trans_id) > 0) {
2392 oldest_start = 2397 oldest_start =
2393 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + 2398 SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
2394 le32_to_cpu(jh->j_first_unflushed_offset); 2399 le32_to_cpu(jh->j_first_unflushed_offset);
2395 oldest_trans_id = le32_to_cpu(jh->j_last_flush_trans_id) + 1; 2400 oldest_trans_id = le32_to_cpu(jh->j_last_flush_trans_id) + 1;
2396 newest_mount_id = le32_to_cpu(jh->j_mount_id); 2401 newest_mount_id = le32_to_cpu(jh->j_mount_id);
2397 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, 2402 reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2398 "journal-1153: found in " 2403 "journal-1153: found in "
2399 "header: first_unflushed_offset %d, last_flushed_trans_id " 2404 "header: first_unflushed_offset %d, last_flushed_trans_id "
2400 "%lu", le32_to_cpu(jh->j_first_unflushed_offset), 2405 "%lu", le32_to_cpu(jh->j_first_unflushed_offset),
2401 le32_to_cpu(jh->j_last_flush_trans_id)); 2406 le32_to_cpu(jh->j_last_flush_trans_id));
2402 valid_journal_header = 1; 2407 valid_journal_header = 1;
2403 2408
2404 /* now, we try to read the first unflushed offset. If it is not valid, 2409 /* now, we try to read the first unflushed offset. If it is not valid,
2405 ** there is nothing more we can do, and it makes no sense to read 2410 ** there is nothing more we can do, and it makes no sense to read
2406 ** through the whole log. 2411 ** through the whole log.
2407 */ 2412 */
2408 d_bh = 2413 d_bh =
2409 journal_bread(p_s_sb, 2414 journal_bread(sb,
2410 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + 2415 SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
2411 le32_to_cpu(jh->j_first_unflushed_offset)); 2416 le32_to_cpu(jh->j_first_unflushed_offset));
2412 ret = journal_transaction_is_valid(p_s_sb, d_bh, NULL, NULL); 2417 ret = journal_transaction_is_valid(sb, d_bh, NULL, NULL);
2413 if (!ret) { 2418 if (!ret) {
2414 continue_replay = 0; 2419 continue_replay = 0;
2415 } 2420 }
@@ -2417,9 +2422,9 @@ static int journal_read(struct super_block *p_s_sb)
2417 goto start_log_replay; 2422 goto start_log_replay;
2418 } 2423 }
2419 2424
2420 if (continue_replay && bdev_read_only(p_s_sb->s_bdev)) { 2425 if (continue_replay && bdev_read_only(sb->s_bdev)) {
2421 reiserfs_warning(p_s_sb, 2426 reiserfs_warning(sb, "clm-2076",
2422 "clm-2076: device is readonly, unable to replay log"); 2427 "device is readonly, unable to replay log");
2423 return -1; 2428 return -1;
2424 } 2429 }
2425 2430
@@ -2428,17 +2433,17 @@ static int journal_read(struct super_block *p_s_sb)
2428 */ 2433 */
2429 while (continue_replay 2434 while (continue_replay
2430 && cur_dblock < 2435 && cur_dblock <
2431 (SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + 2436 (SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
2432 SB_ONDISK_JOURNAL_SIZE(p_s_sb))) { 2437 SB_ONDISK_JOURNAL_SIZE(sb))) {
2433 /* Note that it is required for blocksize of primary fs device and journal 2438 /* Note that it is required for blocksize of primary fs device and journal
2434 device to be the same */ 2439 device to be the same */
2435 d_bh = 2440 d_bh =
2436 reiserfs_breada(journal->j_dev_bd, cur_dblock, 2441 reiserfs_breada(journal->j_dev_bd, cur_dblock,
2437 p_s_sb->s_blocksize, 2442 sb->s_blocksize,
2438 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + 2443 SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
2439 SB_ONDISK_JOURNAL_SIZE(p_s_sb)); 2444 SB_ONDISK_JOURNAL_SIZE(sb));
2440 ret = 2445 ret =
2441 journal_transaction_is_valid(p_s_sb, d_bh, 2446 journal_transaction_is_valid(sb, d_bh,
2442 &oldest_invalid_trans_id, 2447 &oldest_invalid_trans_id,
2443 &newest_mount_id); 2448 &newest_mount_id);
2444 if (ret == 1) { 2449 if (ret == 1) {
@@ -2447,26 +2452,26 @@ static int journal_read(struct super_block *p_s_sb)
2447 oldest_trans_id = get_desc_trans_id(desc); 2452 oldest_trans_id = get_desc_trans_id(desc);
2448 oldest_start = d_bh->b_blocknr; 2453 oldest_start = d_bh->b_blocknr;
2449 newest_mount_id = get_desc_mount_id(desc); 2454 newest_mount_id = get_desc_mount_id(desc);
2450 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, 2455 reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2451 "journal-1179: Setting " 2456 "journal-1179: Setting "
2452 "oldest_start to offset %llu, trans_id %lu", 2457 "oldest_start to offset %llu, trans_id %lu",
2453 oldest_start - 2458 oldest_start -
2454 SB_ONDISK_JOURNAL_1st_BLOCK 2459 SB_ONDISK_JOURNAL_1st_BLOCK
2455 (p_s_sb), oldest_trans_id); 2460 (sb), oldest_trans_id);
2456 } else if (oldest_trans_id > get_desc_trans_id(desc)) { 2461 } else if (oldest_trans_id > get_desc_trans_id(desc)) {
2457 /* one we just read was older */ 2462 /* one we just read was older */
2458 oldest_trans_id = get_desc_trans_id(desc); 2463 oldest_trans_id = get_desc_trans_id(desc);
2459 oldest_start = d_bh->b_blocknr; 2464 oldest_start = d_bh->b_blocknr;
2460 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, 2465 reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2461 "journal-1180: Resetting " 2466 "journal-1180: Resetting "
2462 "oldest_start to offset %lu, trans_id %lu", 2467 "oldest_start to offset %lu, trans_id %lu",
2463 oldest_start - 2468 oldest_start -
2464 SB_ONDISK_JOURNAL_1st_BLOCK 2469 SB_ONDISK_JOURNAL_1st_BLOCK
2465 (p_s_sb), oldest_trans_id); 2470 (sb), oldest_trans_id);
2466 } 2471 }
2467 if (newest_mount_id < get_desc_mount_id(desc)) { 2472 if (newest_mount_id < get_desc_mount_id(desc)) {
2468 newest_mount_id = get_desc_mount_id(desc); 2473 newest_mount_id = get_desc_mount_id(desc);
2469 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, 2474 reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2470 "journal-1299: Setting " 2475 "journal-1299: Setting "
2471 "newest_mount_id to %d", 2476 "newest_mount_id to %d",
2472 get_desc_mount_id(desc)); 2477 get_desc_mount_id(desc));
@@ -2481,17 +2486,17 @@ static int journal_read(struct super_block *p_s_sb)
2481 start_log_replay: 2486 start_log_replay:
2482 cur_dblock = oldest_start; 2487 cur_dblock = oldest_start;
2483 if (oldest_trans_id) { 2488 if (oldest_trans_id) {
2484 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, 2489 reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2485 "journal-1206: Starting replay " 2490 "journal-1206: Starting replay "
2486 "from offset %llu, trans_id %lu", 2491 "from offset %llu, trans_id %lu",
2487 cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), 2492 cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb),
2488 oldest_trans_id); 2493 oldest_trans_id);
2489 2494
2490 } 2495 }
2491 replay_count = 0; 2496 replay_count = 0;
2492 while (continue_replay && oldest_trans_id > 0) { 2497 while (continue_replay && oldest_trans_id > 0) {
2493 ret = 2498 ret =
2494 journal_read_transaction(p_s_sb, cur_dblock, oldest_start, 2499 journal_read_transaction(sb, cur_dblock, oldest_start,
2495 oldest_trans_id, newest_mount_id); 2500 oldest_trans_id, newest_mount_id);
2496 if (ret < 0) { 2501 if (ret < 0) {
2497 return ret; 2502 return ret;
@@ -2499,14 +2504,14 @@ static int journal_read(struct super_block *p_s_sb)
2499 break; 2504 break;
2500 } 2505 }
2501 cur_dblock = 2506 cur_dblock =
2502 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + journal->j_start; 2507 SB_ONDISK_JOURNAL_1st_BLOCK(sb) + journal->j_start;
2503 replay_count++; 2508 replay_count++;
2504 if (cur_dblock == oldest_start) 2509 if (cur_dblock == oldest_start)
2505 break; 2510 break;
2506 } 2511 }
2507 2512
2508 if (oldest_trans_id == 0) { 2513 if (oldest_trans_id == 0) {
2509 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, 2514 reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2510 "journal-1225: No valid " "transactions found"); 2515 "journal-1225: No valid " "transactions found");
2511 } 2516 }
2512 /* j_start does not get set correctly if we don't replay any transactions. 2517 /* j_start does not get set correctly if we don't replay any transactions.
@@ -2526,16 +2531,16 @@ static int journal_read(struct super_block *p_s_sb)
2526 } else { 2531 } else {
2527 journal->j_mount_id = newest_mount_id + 1; 2532 journal->j_mount_id = newest_mount_id + 1;
2528 } 2533 }
2529 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1299: Setting " 2534 reiserfs_debug(sb, REISERFS_DEBUG_CODE, "journal-1299: Setting "
2530 "newest_mount_id to %lu", journal->j_mount_id); 2535 "newest_mount_id to %lu", journal->j_mount_id);
2531 journal->j_first_unflushed_offset = journal->j_start; 2536 journal->j_first_unflushed_offset = journal->j_start;
2532 if (replay_count > 0) { 2537 if (replay_count > 0) {
2533 reiserfs_info(p_s_sb, 2538 reiserfs_info(sb,
2534 "replayed %d transactions in %lu seconds\n", 2539 "replayed %d transactions in %lu seconds\n",
2535 replay_count, get_seconds() - start); 2540 replay_count, get_seconds() - start);
2536 } 2541 }
2537 if (!bdev_read_only(p_s_sb->s_bdev) && 2542 if (!bdev_read_only(sb->s_bdev) &&
2538 _update_journal_header_block(p_s_sb, journal->j_start, 2543 _update_journal_header_block(sb, journal->j_start,
2539 journal->j_last_flush_trans_id)) { 2544 journal->j_last_flush_trans_id)) {
2540 /* replay failed, caller must call free_journal_ram and abort 2545 /* replay failed, caller must call free_journal_ram and abort
2541 ** the mount 2546 ** the mount
@@ -2560,9 +2565,9 @@ static struct reiserfs_journal_list *alloc_journal_list(struct super_block *s)
2560 return jl; 2565 return jl;
2561} 2566}
2562 2567
2563static void journal_list_init(struct super_block *p_s_sb) 2568static void journal_list_init(struct super_block *sb)
2564{ 2569{
2565 SB_JOURNAL(p_s_sb)->j_current_jl = alloc_journal_list(p_s_sb); 2570 SB_JOURNAL(sb)->j_current_jl = alloc_journal_list(sb);
2566} 2571}
2567 2572
2568static int release_journal_dev(struct super_block *super, 2573static int release_journal_dev(struct super_block *super,
@@ -2580,9 +2585,8 @@ static int release_journal_dev(struct super_block *super,
2580 } 2585 }
2581 2586
2582 if (result != 0) { 2587 if (result != 0) {
2583 reiserfs_warning(super, 2588 reiserfs_warning(super, "sh-457",
2584 "sh-457: release_journal_dev: Cannot release journal device: %i", 2589 "Cannot release journal device: %i", result);
2585 result);
2586 } 2590 }
2587 return result; 2591 return result;
2588} 2592}
@@ -2612,7 +2616,7 @@ static int journal_init_dev(struct super_block *super,
2612 if (IS_ERR(journal->j_dev_bd)) { 2616 if (IS_ERR(journal->j_dev_bd)) {
2613 result = PTR_ERR(journal->j_dev_bd); 2617 result = PTR_ERR(journal->j_dev_bd);
2614 journal->j_dev_bd = NULL; 2618 journal->j_dev_bd = NULL;
2615 reiserfs_warning(super, "sh-458: journal_init_dev: " 2619 reiserfs_warning(super, "sh-458",
2616 "cannot init journal device '%s': %i", 2620 "cannot init journal device '%s': %i",
2617 __bdevname(jdev, b), result); 2621 __bdevname(jdev, b), result);
2618 return result; 2622 return result;
@@ -2662,30 +2666,30 @@ static int journal_init_dev(struct super_block *super,
2662 */ 2666 */
2663#define REISERFS_STANDARD_BLKSIZE (4096) 2667#define REISERFS_STANDARD_BLKSIZE (4096)
2664 2668
2665static int check_advise_trans_params(struct super_block *p_s_sb, 2669static int check_advise_trans_params(struct super_block *sb,
2666 struct reiserfs_journal *journal) 2670 struct reiserfs_journal *journal)
2667{ 2671{
2668 if (journal->j_trans_max) { 2672 if (journal->j_trans_max) {
2669 /* Non-default journal params. 2673 /* Non-default journal params.
2670 Do sanity check for them. */ 2674 Do sanity check for them. */
2671 int ratio = 1; 2675 int ratio = 1;
2672 if (p_s_sb->s_blocksize < REISERFS_STANDARD_BLKSIZE) 2676 if (sb->s_blocksize < REISERFS_STANDARD_BLKSIZE)
2673 ratio = REISERFS_STANDARD_BLKSIZE / p_s_sb->s_blocksize; 2677 ratio = REISERFS_STANDARD_BLKSIZE / sb->s_blocksize;
2674 2678
2675 if (journal->j_trans_max > JOURNAL_TRANS_MAX_DEFAULT / ratio || 2679 if (journal->j_trans_max > JOURNAL_TRANS_MAX_DEFAULT / ratio ||
2676 journal->j_trans_max < JOURNAL_TRANS_MIN_DEFAULT / ratio || 2680 journal->j_trans_max < JOURNAL_TRANS_MIN_DEFAULT / ratio ||
2677 SB_ONDISK_JOURNAL_SIZE(p_s_sb) / journal->j_trans_max < 2681 SB_ONDISK_JOURNAL_SIZE(sb) / journal->j_trans_max <
2678 JOURNAL_MIN_RATIO) { 2682 JOURNAL_MIN_RATIO) {
2679 reiserfs_warning(p_s_sb, 2683 reiserfs_warning(sb, "sh-462",
2680 "sh-462: bad transaction max size (%u). FSCK?", 2684 "bad transaction max size (%u). "
2681 journal->j_trans_max); 2685 "FSCK?", journal->j_trans_max);
2682 return 1; 2686 return 1;
2683 } 2687 }
2684 if (journal->j_max_batch != (journal->j_trans_max) * 2688 if (journal->j_max_batch != (journal->j_trans_max) *
2685 JOURNAL_MAX_BATCH_DEFAULT/JOURNAL_TRANS_MAX_DEFAULT) { 2689 JOURNAL_MAX_BATCH_DEFAULT/JOURNAL_TRANS_MAX_DEFAULT) {
2686 reiserfs_warning(p_s_sb, 2690 reiserfs_warning(sb, "sh-463",
2687 "sh-463: bad transaction max batch (%u). FSCK?", 2691 "bad transaction max batch (%u). "
2688 journal->j_max_batch); 2692 "FSCK?", journal->j_max_batch);
2689 return 1; 2693 return 1;
2690 } 2694 }
2691 } else { 2695 } else {
@@ -2693,9 +2697,11 @@ static int check_advise_trans_params(struct super_block *p_s_sb,
2693 The file system was created by old version 2697 The file system was created by old version
2694 of mkreiserfs, so some fields contain zeros, 2698 of mkreiserfs, so some fields contain zeros,
2695 and we need to advise proper values for them */ 2699 and we need to advise proper values for them */
2696 if (p_s_sb->s_blocksize != REISERFS_STANDARD_BLKSIZE) 2700 if (sb->s_blocksize != REISERFS_STANDARD_BLKSIZE) {
2697 reiserfs_panic(p_s_sb, "sh-464: bad blocksize (%u)", 2701 reiserfs_warning(sb, "sh-464", "bad blocksize (%u)",
2698 p_s_sb->s_blocksize); 2702 sb->s_blocksize);
2703 return 1;
2704 }
2699 journal->j_trans_max = JOURNAL_TRANS_MAX_DEFAULT; 2705 journal->j_trans_max = JOURNAL_TRANS_MAX_DEFAULT;
2700 journal->j_max_batch = JOURNAL_MAX_BATCH_DEFAULT; 2706 journal->j_max_batch = JOURNAL_MAX_BATCH_DEFAULT;
2701 journal->j_max_commit_age = JOURNAL_MAX_COMMIT_AGE; 2707 journal->j_max_commit_age = JOURNAL_MAX_COMMIT_AGE;
@@ -2706,10 +2712,10 @@ static int check_advise_trans_params(struct super_block *p_s_sb,
2706/* 2712/*
2707** must be called once on fs mount. calls journal_read for you 2713** must be called once on fs mount. calls journal_read for you
2708*/ 2714*/
2709int journal_init(struct super_block *p_s_sb, const char *j_dev_name, 2715int journal_init(struct super_block *sb, const char *j_dev_name,
2710 int old_format, unsigned int commit_max_age) 2716 int old_format, unsigned int commit_max_age)
2711{ 2717{
2712 int num_cnodes = SB_ONDISK_JOURNAL_SIZE(p_s_sb) * 2; 2718 int num_cnodes = SB_ONDISK_JOURNAL_SIZE(sb) * 2;
2713 struct buffer_head *bhjh; 2719 struct buffer_head *bhjh;
2714 struct reiserfs_super_block *rs; 2720 struct reiserfs_super_block *rs;
2715 struct reiserfs_journal_header *jh; 2721 struct reiserfs_journal_header *jh;
@@ -2717,10 +2723,10 @@ int journal_init(struct super_block *p_s_sb, const char *j_dev_name,
2717 struct reiserfs_journal_list *jl; 2723 struct reiserfs_journal_list *jl;
2718 char b[BDEVNAME_SIZE]; 2724 char b[BDEVNAME_SIZE];
2719 2725
2720 journal = SB_JOURNAL(p_s_sb) = vmalloc(sizeof(struct reiserfs_journal)); 2726 journal = SB_JOURNAL(sb) = vmalloc(sizeof(struct reiserfs_journal));
2721 if (!journal) { 2727 if (!journal) {
2722 reiserfs_warning(p_s_sb, 2728 reiserfs_warning(sb, "journal-1256",
2723 "journal-1256: unable to get memory for journal structure"); 2729 "unable to get memory for journal structure");
2724 return 1; 2730 return 1;
2725 } 2731 }
2726 memset(journal, 0, sizeof(struct reiserfs_journal)); 2732 memset(journal, 0, sizeof(struct reiserfs_journal));
@@ -2729,51 +2735,51 @@ int journal_init(struct super_block *p_s_sb, const char *j_dev_name,
2729 INIT_LIST_HEAD(&journal->j_working_list); 2735 INIT_LIST_HEAD(&journal->j_working_list);
2730 INIT_LIST_HEAD(&journal->j_journal_list); 2736 INIT_LIST_HEAD(&journal->j_journal_list);
2731 journal->j_persistent_trans = 0; 2737 journal->j_persistent_trans = 0;
2732 if (reiserfs_allocate_list_bitmaps(p_s_sb, 2738 if (reiserfs_allocate_list_bitmaps(sb,
2733 journal->j_list_bitmap, 2739 journal->j_list_bitmap,
2734 reiserfs_bmap_count(p_s_sb))) 2740 reiserfs_bmap_count(sb)))
2735 goto free_and_return; 2741 goto free_and_return;
2736 allocate_bitmap_nodes(p_s_sb); 2742 allocate_bitmap_nodes(sb);
2737 2743
2738 /* reserved for journal area support */ 2744 /* reserved for journal area support */
2739 SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb) = (old_format ? 2745 SB_JOURNAL_1st_RESERVED_BLOCK(sb) = (old_format ?
2740 REISERFS_OLD_DISK_OFFSET_IN_BYTES 2746 REISERFS_OLD_DISK_OFFSET_IN_BYTES
2741 / p_s_sb->s_blocksize + 2747 / sb->s_blocksize +
2742 reiserfs_bmap_count(p_s_sb) + 2748 reiserfs_bmap_count(sb) +
2743 1 : 2749 1 :
2744 REISERFS_DISK_OFFSET_IN_BYTES / 2750 REISERFS_DISK_OFFSET_IN_BYTES /
2745 p_s_sb->s_blocksize + 2); 2751 sb->s_blocksize + 2);
2746 2752
2747 /* Sanity check to see is the standard journal fitting withing first bitmap 2753 /* Sanity check to see is the standard journal fitting withing first bitmap
2748 (actual for small blocksizes) */ 2754 (actual for small blocksizes) */
2749 if (!SB_ONDISK_JOURNAL_DEVICE(p_s_sb) && 2755 if (!SB_ONDISK_JOURNAL_DEVICE(sb) &&
2750 (SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb) + 2756 (SB_JOURNAL_1st_RESERVED_BLOCK(sb) +
2751 SB_ONDISK_JOURNAL_SIZE(p_s_sb) > p_s_sb->s_blocksize * 8)) { 2757 SB_ONDISK_JOURNAL_SIZE(sb) > sb->s_blocksize * 8)) {
2752 reiserfs_warning(p_s_sb, 2758 reiserfs_warning(sb, "journal-1393",
2753 "journal-1393: journal does not fit for area " 2759 "journal does not fit for area addressed "
2754 "addressed by first of bitmap blocks. It starts at " 2760 "by first of bitmap blocks. It starts at "
2755 "%u and its size is %u. Block size %ld", 2761 "%u and its size is %u. Block size %ld",
2756 SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb), 2762 SB_JOURNAL_1st_RESERVED_BLOCK(sb),
2757 SB_ONDISK_JOURNAL_SIZE(p_s_sb), 2763 SB_ONDISK_JOURNAL_SIZE(sb),
2758 p_s_sb->s_blocksize); 2764 sb->s_blocksize);
2759 goto free_and_return; 2765 goto free_and_return;
2760 } 2766 }
2761 2767
2762 if (journal_init_dev(p_s_sb, journal, j_dev_name) != 0) { 2768 if (journal_init_dev(sb, journal, j_dev_name) != 0) {
2763 reiserfs_warning(p_s_sb, 2769 reiserfs_warning(sb, "sh-462",
2764 "sh-462: unable to initialize jornal device"); 2770 "unable to initialize jornal device");
2765 goto free_and_return; 2771 goto free_and_return;
2766 } 2772 }
2767 2773
2768 rs = SB_DISK_SUPER_BLOCK(p_s_sb); 2774 rs = SB_DISK_SUPER_BLOCK(sb);
2769 2775
2770 /* read journal header */ 2776 /* read journal header */
2771 bhjh = journal_bread(p_s_sb, 2777 bhjh = journal_bread(sb,
2772 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + 2778 SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
2773 SB_ONDISK_JOURNAL_SIZE(p_s_sb)); 2779 SB_ONDISK_JOURNAL_SIZE(sb));
2774 if (!bhjh) { 2780 if (!bhjh) {
2775 reiserfs_warning(p_s_sb, 2781 reiserfs_warning(sb, "sh-459",
2776 "sh-459: unable to read journal header"); 2782 "unable to read journal header");
2777 goto free_and_return; 2783 goto free_and_return;
2778 } 2784 }
2779 jh = (struct reiserfs_journal_header *)(bhjh->b_data); 2785 jh = (struct reiserfs_journal_header *)(bhjh->b_data);
@@ -2782,10 +2788,10 @@ int journal_init(struct super_block *p_s_sb, const char *j_dev_name,
2782 if (is_reiserfs_jr(rs) 2788 if (is_reiserfs_jr(rs)
2783 && (le32_to_cpu(jh->jh_journal.jp_journal_magic) != 2789 && (le32_to_cpu(jh->jh_journal.jp_journal_magic) !=
2784 sb_jp_journal_magic(rs))) { 2790 sb_jp_journal_magic(rs))) {
2785 reiserfs_warning(p_s_sb, 2791 reiserfs_warning(sb, "sh-460",
2786 "sh-460: journal header magic %x " 2792 "journal header magic %x (device %s) does "
2787 "(device %s) does not match to magic found in super " 2793 "not match to magic found in super block %x",
2788 "block %x", jh->jh_journal.jp_journal_magic, 2794 jh->jh_journal.jp_journal_magic,
2789 bdevname(journal->j_dev_bd, b), 2795 bdevname(journal->j_dev_bd, b),
2790 sb_jp_journal_magic(rs)); 2796 sb_jp_journal_magic(rs));
2791 brelse(bhjh); 2797 brelse(bhjh);
@@ -2798,7 +2804,7 @@ int journal_init(struct super_block *p_s_sb, const char *j_dev_name,
2798 le32_to_cpu(jh->jh_journal.jp_journal_max_commit_age); 2804 le32_to_cpu(jh->jh_journal.jp_journal_max_commit_age);
2799 journal->j_max_trans_age = JOURNAL_MAX_TRANS_AGE; 2805 journal->j_max_trans_age = JOURNAL_MAX_TRANS_AGE;
2800 2806
2801 if (check_advise_trans_params(p_s_sb, journal) != 0) 2807 if (check_advise_trans_params(sb, journal) != 0)
2802 goto free_and_return; 2808 goto free_and_return;
2803 journal->j_default_max_commit_age = journal->j_max_commit_age; 2809 journal->j_default_max_commit_age = journal->j_max_commit_age;
2804 2810
@@ -2807,12 +2813,12 @@ int journal_init(struct super_block *p_s_sb, const char *j_dev_name,
2807 journal->j_max_trans_age = commit_max_age; 2813 journal->j_max_trans_age = commit_max_age;
2808 } 2814 }
2809 2815
2810 reiserfs_info(p_s_sb, "journal params: device %s, size %u, " 2816 reiserfs_info(sb, "journal params: device %s, size %u, "
2811 "journal first block %u, max trans len %u, max batch %u, " 2817 "journal first block %u, max trans len %u, max batch %u, "
2812 "max commit age %u, max trans age %u\n", 2818 "max commit age %u, max trans age %u\n",
2813 bdevname(journal->j_dev_bd, b), 2819 bdevname(journal->j_dev_bd, b),
2814 SB_ONDISK_JOURNAL_SIZE(p_s_sb), 2820 SB_ONDISK_JOURNAL_SIZE(sb),
2815 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), 2821 SB_ONDISK_JOURNAL_1st_BLOCK(sb),
2816 journal->j_trans_max, 2822 journal->j_trans_max,
2817 journal->j_max_batch, 2823 journal->j_max_batch,
2818 journal->j_max_commit_age, journal->j_max_trans_age); 2824 journal->j_max_commit_age, journal->j_max_trans_age);
@@ -2820,7 +2826,7 @@ int journal_init(struct super_block *p_s_sb, const char *j_dev_name,
2820 brelse(bhjh); 2826 brelse(bhjh);
2821 2827
2822 journal->j_list_bitmap_index = 0; 2828 journal->j_list_bitmap_index = 0;
2823 journal_list_init(p_s_sb); 2829 journal_list_init(sb);
2824 2830
2825 memset(journal->j_list_hash_table, 0, 2831 memset(journal->j_list_hash_table, 0,
2826 JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *)); 2832 JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *));
@@ -2852,7 +2858,7 @@ int journal_init(struct super_block *p_s_sb, const char *j_dev_name,
2852 journal->j_must_wait = 0; 2858 journal->j_must_wait = 0;
2853 2859
2854 if (journal->j_cnode_free == 0) { 2860 if (journal->j_cnode_free == 0) {
2855 reiserfs_warning(p_s_sb, "journal-2004: Journal cnode memory " 2861 reiserfs_warning(sb, "journal-2004", "Journal cnode memory "
2856 "allocation failed (%ld bytes). Journal is " 2862 "allocation failed (%ld bytes). Journal is "
2857 "too large for available memory. Usually " 2863 "too large for available memory. Usually "
2858 "this is due to a journal that is too large.", 2864 "this is due to a journal that is too large.",
@@ -2860,16 +2866,17 @@ int journal_init(struct super_block *p_s_sb, const char *j_dev_name,
2860 goto free_and_return; 2866 goto free_and_return;
2861 } 2867 }
2862 2868
2863 init_journal_hash(p_s_sb); 2869 init_journal_hash(sb);
2864 jl = journal->j_current_jl; 2870 jl = journal->j_current_jl;
2865 jl->j_list_bitmap = get_list_bitmap(p_s_sb, jl); 2871 jl->j_list_bitmap = get_list_bitmap(sb, jl);
2866 if (!jl->j_list_bitmap) { 2872 if (!jl->j_list_bitmap) {
2867 reiserfs_warning(p_s_sb, 2873 reiserfs_warning(sb, "journal-2005",
2868 "journal-2005, get_list_bitmap failed for journal list 0"); 2874 "get_list_bitmap failed for journal list 0");
2869 goto free_and_return; 2875 goto free_and_return;
2870 } 2876 }
2871 if (journal_read(p_s_sb) < 0) { 2877 if (journal_read(sb) < 0) {
2872 reiserfs_warning(p_s_sb, "Replay Failure, unable to mount"); 2878 reiserfs_warning(sb, "reiserfs-2006",
2879 "Replay Failure, unable to mount");
2873 goto free_and_return; 2880 goto free_and_return;
2874 } 2881 }
2875 2882
@@ -2878,10 +2885,10 @@ int journal_init(struct super_block *p_s_sb, const char *j_dev_name,
2878 commit_wq = create_workqueue("reiserfs"); 2885 commit_wq = create_workqueue("reiserfs");
2879 2886
2880 INIT_DELAYED_WORK(&journal->j_work, flush_async_commits); 2887 INIT_DELAYED_WORK(&journal->j_work, flush_async_commits);
2881 journal->j_work_sb = p_s_sb; 2888 journal->j_work_sb = sb;
2882 return 0; 2889 return 0;
2883 free_and_return: 2890 free_and_return:
2884 free_journal_ram(p_s_sb); 2891 free_journal_ram(sb);
2885 return 1; 2892 return 1;
2886} 2893}
2887 2894
@@ -2912,7 +2919,7 @@ int journal_transaction_should_end(struct reiserfs_transaction_handle *th,
2912 return 0; 2919 return 0;
2913} 2920}
2914 2921
2915/* this must be called inside a transaction, and requires the 2922/* this must be called inside a transaction, and requires the
2916** kernel_lock to be held 2923** kernel_lock to be held
2917*/ 2924*/
2918void reiserfs_block_writes(struct reiserfs_transaction_handle *th) 2925void reiserfs_block_writes(struct reiserfs_transaction_handle *th)
@@ -2970,7 +2977,7 @@ static void wake_queued_writers(struct super_block *s)
2970 wake_up(&journal->j_join_wait); 2977 wake_up(&journal->j_join_wait);
2971} 2978}
2972 2979
2973static void let_transaction_grow(struct super_block *sb, unsigned long trans_id) 2980static void let_transaction_grow(struct super_block *sb, unsigned int trans_id)
2974{ 2981{
2975 struct reiserfs_journal *journal = SB_JOURNAL(sb); 2982 struct reiserfs_journal *journal = SB_JOURNAL(sb);
2976 unsigned long bcount = journal->j_bcount; 2983 unsigned long bcount = journal->j_bcount;
@@ -2997,43 +3004,43 @@ static void let_transaction_grow(struct super_block *sb, unsigned long trans_id)
2997** expect to use in nblocks. 3004** expect to use in nblocks.
2998*/ 3005*/
2999static int do_journal_begin_r(struct reiserfs_transaction_handle *th, 3006static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
3000 struct super_block *p_s_sb, unsigned long nblocks, 3007 struct super_block *sb, unsigned long nblocks,
3001 int join) 3008 int join)
3002{ 3009{
3003 time_t now = get_seconds(); 3010 time_t now = get_seconds();
3004 int old_trans_id; 3011 unsigned int old_trans_id;
3005 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 3012 struct reiserfs_journal *journal = SB_JOURNAL(sb);
3006 struct reiserfs_transaction_handle myth; 3013 struct reiserfs_transaction_handle myth;
3007 int sched_count = 0; 3014 int sched_count = 0;
3008 int retval; 3015 int retval;
3009 3016
3010 reiserfs_check_lock_depth(p_s_sb, "journal_begin"); 3017 reiserfs_check_lock_depth(sb, "journal_begin");
3011 BUG_ON(nblocks > journal->j_trans_max); 3018 BUG_ON(nblocks > journal->j_trans_max);
3012 3019
3013 PROC_INFO_INC(p_s_sb, journal.journal_being); 3020 PROC_INFO_INC(sb, journal.journal_being);
3014 /* set here for journal_join */ 3021 /* set here for journal_join */
3015 th->t_refcount = 1; 3022 th->t_refcount = 1;
3016 th->t_super = p_s_sb; 3023 th->t_super = sb;
3017 3024
3018 relock: 3025 relock:
3019 lock_journal(p_s_sb); 3026 lock_journal(sb);
3020 if (join != JBEGIN_ABORT && reiserfs_is_journal_aborted(journal)) { 3027 if (join != JBEGIN_ABORT && reiserfs_is_journal_aborted(journal)) {
3021 unlock_journal(p_s_sb); 3028 unlock_journal(sb);
3022 retval = journal->j_errno; 3029 retval = journal->j_errno;
3023 goto out_fail; 3030 goto out_fail;
3024 } 3031 }
3025 journal->j_bcount++; 3032 journal->j_bcount++;
3026 3033
3027 if (test_bit(J_WRITERS_BLOCKED, &journal->j_state)) { 3034 if (test_bit(J_WRITERS_BLOCKED, &journal->j_state)) {
3028 unlock_journal(p_s_sb); 3035 unlock_journal(sb);
3029 reiserfs_wait_on_write_block(p_s_sb); 3036 reiserfs_wait_on_write_block(sb);
3030 PROC_INFO_INC(p_s_sb, journal.journal_relock_writers); 3037 PROC_INFO_INC(sb, journal.journal_relock_writers);
3031 goto relock; 3038 goto relock;
3032 } 3039 }
3033 now = get_seconds(); 3040 now = get_seconds();
3034 3041
3035 /* if there is no room in the journal OR 3042 /* if there is no room in the journal OR
3036 ** if this transaction is too old, and we weren't called joinable, wait for it to finish before beginning 3043 ** if this transaction is too old, and we weren't called joinable, wait for it to finish before beginning
3037 ** we don't sleep if there aren't other writers 3044 ** we don't sleep if there aren't other writers
3038 */ 3045 */
3039 3046
@@ -3048,7 +3055,7 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
3048 || (!join && journal->j_cnode_free < (journal->j_trans_max * 3))) { 3055 || (!join && journal->j_cnode_free < (journal->j_trans_max * 3))) {
3049 3056
3050 old_trans_id = journal->j_trans_id; 3057 old_trans_id = journal->j_trans_id;
3051 unlock_journal(p_s_sb); /* allow others to finish this transaction */ 3058 unlock_journal(sb); /* allow others to finish this transaction */
3052 3059
3053 if (!join && (journal->j_len_alloc + nblocks + 2) >= 3060 if (!join && (journal->j_len_alloc + nblocks + 2) >=
3054 journal->j_max_batch && 3061 journal->j_max_batch &&
@@ -3056,7 +3063,7 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
3056 (journal->j_len_alloc * 75)) { 3063 (journal->j_len_alloc * 75)) {
3057 if (atomic_read(&journal->j_wcount) > 10) { 3064 if (atomic_read(&journal->j_wcount) > 10) {
3058 sched_count++; 3065 sched_count++;
3059 queue_log_writer(p_s_sb); 3066 queue_log_writer(sb);
3060 goto relock; 3067 goto relock;
3061 } 3068 }
3062 } 3069 }
@@ -3066,25 +3073,25 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
3066 if (atomic_read(&journal->j_jlock)) { 3073 if (atomic_read(&journal->j_jlock)) {
3067 while (journal->j_trans_id == old_trans_id && 3074 while (journal->j_trans_id == old_trans_id &&
3068 atomic_read(&journal->j_jlock)) { 3075 atomic_read(&journal->j_jlock)) {
3069 queue_log_writer(p_s_sb); 3076 queue_log_writer(sb);
3070 } 3077 }
3071 goto relock; 3078 goto relock;
3072 } 3079 }
3073 retval = journal_join(&myth, p_s_sb, 1); 3080 retval = journal_join(&myth, sb, 1);
3074 if (retval) 3081 if (retval)
3075 goto out_fail; 3082 goto out_fail;
3076 3083
3077 /* someone might have ended the transaction while we joined */ 3084 /* someone might have ended the transaction while we joined */
3078 if (old_trans_id != journal->j_trans_id) { 3085 if (old_trans_id != journal->j_trans_id) {
3079 retval = do_journal_end(&myth, p_s_sb, 1, 0); 3086 retval = do_journal_end(&myth, sb, 1, 0);
3080 } else { 3087 } else {
3081 retval = do_journal_end(&myth, p_s_sb, 1, COMMIT_NOW); 3088 retval = do_journal_end(&myth, sb, 1, COMMIT_NOW);
3082 } 3089 }
3083 3090
3084 if (retval) 3091 if (retval)
3085 goto out_fail; 3092 goto out_fail;
3086 3093
3087 PROC_INFO_INC(p_s_sb, journal.journal_relock_wcount); 3094 PROC_INFO_INC(sb, journal.journal_relock_wcount);
3088 goto relock; 3095 goto relock;
3089 } 3096 }
3090 /* we are the first writer, set trans_id */ 3097 /* we are the first writer, set trans_id */
@@ -3096,7 +3103,7 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
3096 th->t_blocks_logged = 0; 3103 th->t_blocks_logged = 0;
3097 th->t_blocks_allocated = nblocks; 3104 th->t_blocks_allocated = nblocks;
3098 th->t_trans_id = journal->j_trans_id; 3105 th->t_trans_id = journal->j_trans_id;
3099 unlock_journal(p_s_sb); 3106 unlock_journal(sb);
3100 INIT_LIST_HEAD(&th->t_list); 3107 INIT_LIST_HEAD(&th->t_list);
3101 get_fs_excl(); 3108 get_fs_excl();
3102 return 0; 3109 return 0;
@@ -3106,7 +3113,7 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
3106 /* Re-set th->t_super, so we can properly keep track of how many 3113 /* Re-set th->t_super, so we can properly keep track of how many
3107 * persistent transactions there are. We need to do this so if this 3114 * persistent transactions there are. We need to do this so if this
3108 * call is part of a failed restart_transaction, we can free it later */ 3115 * call is part of a failed restart_transaction, we can free it later */
3109 th->t_super = p_s_sb; 3116 th->t_super = sb;
3110 return retval; 3117 return retval;
3111} 3118}
3112 3119
@@ -3157,7 +3164,7 @@ int reiserfs_end_persistent_transaction(struct reiserfs_transaction_handle *th)
3157} 3164}
3158 3165
3159static int journal_join(struct reiserfs_transaction_handle *th, 3166static int journal_join(struct reiserfs_transaction_handle *th,
3160 struct super_block *p_s_sb, unsigned long nblocks) 3167 struct super_block *sb, unsigned long nblocks)
3161{ 3168{
3162 struct reiserfs_transaction_handle *cur_th = current->journal_info; 3169 struct reiserfs_transaction_handle *cur_th = current->journal_info;
3163 3170
@@ -3166,11 +3173,11 @@ static int journal_join(struct reiserfs_transaction_handle *th,
3166 */ 3173 */
3167 th->t_handle_save = cur_th; 3174 th->t_handle_save = cur_th;
3168 BUG_ON(cur_th && cur_th->t_refcount > 1); 3175 BUG_ON(cur_th && cur_th->t_refcount > 1);
3169 return do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_JOIN); 3176 return do_journal_begin_r(th, sb, nblocks, JBEGIN_JOIN);
3170} 3177}
3171 3178
3172int journal_join_abort(struct reiserfs_transaction_handle *th, 3179int journal_join_abort(struct reiserfs_transaction_handle *th,
3173 struct super_block *p_s_sb, unsigned long nblocks) 3180 struct super_block *sb, unsigned long nblocks)
3174{ 3181{
3175 struct reiserfs_transaction_handle *cur_th = current->journal_info; 3182 struct reiserfs_transaction_handle *cur_th = current->journal_info;
3176 3183
@@ -3179,11 +3186,11 @@ int journal_join_abort(struct reiserfs_transaction_handle *th,
3179 */ 3186 */
3180 th->t_handle_save = cur_th; 3187 th->t_handle_save = cur_th;
3181 BUG_ON(cur_th && cur_th->t_refcount > 1); 3188 BUG_ON(cur_th && cur_th->t_refcount > 1);
3182 return do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_ABORT); 3189 return do_journal_begin_r(th, sb, nblocks, JBEGIN_ABORT);
3183} 3190}
3184 3191
3185int journal_begin(struct reiserfs_transaction_handle *th, 3192int journal_begin(struct reiserfs_transaction_handle *th,
3186 struct super_block *p_s_sb, unsigned long nblocks) 3193 struct super_block *sb, unsigned long nblocks)
3187{ 3194{
3188 struct reiserfs_transaction_handle *cur_th = current->journal_info; 3195 struct reiserfs_transaction_handle *cur_th = current->journal_info;
3189 int ret; 3196 int ret;
@@ -3191,28 +3198,29 @@ int journal_begin(struct reiserfs_transaction_handle *th,
3191 th->t_handle_save = NULL; 3198 th->t_handle_save = NULL;
3192 if (cur_th) { 3199 if (cur_th) {
3193 /* we are nesting into the current transaction */ 3200 /* we are nesting into the current transaction */
3194 if (cur_th->t_super == p_s_sb) { 3201 if (cur_th->t_super == sb) {
3195 BUG_ON(!cur_th->t_refcount); 3202 BUG_ON(!cur_th->t_refcount);
3196 cur_th->t_refcount++; 3203 cur_th->t_refcount++;
3197 memcpy(th, cur_th, sizeof(*th)); 3204 memcpy(th, cur_th, sizeof(*th));
3198 if (th->t_refcount <= 1) 3205 if (th->t_refcount <= 1)
3199 reiserfs_warning(p_s_sb, 3206 reiserfs_warning(sb, "reiserfs-2005",
3200 "BAD: refcount <= 1, but journal_info != 0"); 3207 "BAD: refcount <= 1, but "
3208 "journal_info != 0");
3201 return 0; 3209 return 0;
3202 } else { 3210 } else {
3203 /* we've ended up with a handle from a different filesystem. 3211 /* we've ended up with a handle from a different filesystem.
3204 ** save it and restore on journal_end. This should never 3212 ** save it and restore on journal_end. This should never
3205 ** really happen... 3213 ** really happen...
3206 */ 3214 */
3207 reiserfs_warning(p_s_sb, 3215 reiserfs_warning(sb, "clm-2100",
3208 "clm-2100: nesting info a different FS"); 3216 "nesting info a different FS");
3209 th->t_handle_save = current->journal_info; 3217 th->t_handle_save = current->journal_info;
3210 current->journal_info = th; 3218 current->journal_info = th;
3211 } 3219 }
3212 } else { 3220 } else {
3213 current->journal_info = th; 3221 current->journal_info = th;
3214 } 3222 }
3215 ret = do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_REG); 3223 ret = do_journal_begin_r(th, sb, nblocks, JBEGIN_REG);
3216 BUG_ON(current->journal_info != th); 3224 BUG_ON(current->journal_info != th);
3217 3225
3218 /* I guess this boils down to being the reciprocal of clm-2100 above. 3226 /* I guess this boils down to being the reciprocal of clm-2100 above.
@@ -3232,32 +3240,32 @@ int journal_begin(struct reiserfs_transaction_handle *th,
3232** 3240**
3233** if it was dirty, cleans and files onto the clean list. I can't let it be dirty again until the 3241** if it was dirty, cleans and files onto the clean list. I can't let it be dirty again until the
3234** transaction is committed. 3242** transaction is committed.
3235** 3243**
3236** if j_len, is bigger than j_len_alloc, it pushes j_len_alloc to 10 + j_len. 3244** if j_len, is bigger than j_len_alloc, it pushes j_len_alloc to 10 + j_len.
3237*/ 3245*/
3238int journal_mark_dirty(struct reiserfs_transaction_handle *th, 3246int journal_mark_dirty(struct reiserfs_transaction_handle *th,
3239 struct super_block *p_s_sb, struct buffer_head *bh) 3247 struct super_block *sb, struct buffer_head *bh)
3240{ 3248{
3241 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 3249 struct reiserfs_journal *journal = SB_JOURNAL(sb);
3242 struct reiserfs_journal_cnode *cn = NULL; 3250 struct reiserfs_journal_cnode *cn = NULL;
3243 int count_already_incd = 0; 3251 int count_already_incd = 0;
3244 int prepared = 0; 3252 int prepared = 0;
3245 BUG_ON(!th->t_trans_id); 3253 BUG_ON(!th->t_trans_id);
3246 3254
3247 PROC_INFO_INC(p_s_sb, journal.mark_dirty); 3255 PROC_INFO_INC(sb, journal.mark_dirty);
3248 if (th->t_trans_id != journal->j_trans_id) { 3256 if (th->t_trans_id != journal->j_trans_id) {
3249 reiserfs_panic(th->t_super, 3257 reiserfs_panic(th->t_super, "journal-1577",
3250 "journal-1577: handle trans id %ld != current trans id %ld\n", 3258 "handle trans id %ld != current trans id %ld",
3251 th->t_trans_id, journal->j_trans_id); 3259 th->t_trans_id, journal->j_trans_id);
3252 } 3260 }
3253 3261
3254 p_s_sb->s_dirt = 1; 3262 sb->s_dirt = 1;
3255 3263
3256 prepared = test_clear_buffer_journal_prepared(bh); 3264 prepared = test_clear_buffer_journal_prepared(bh);
3257 clear_buffer_journal_restore_dirty(bh); 3265 clear_buffer_journal_restore_dirty(bh);
3258 /* already in this transaction, we are done */ 3266 /* already in this transaction, we are done */
3259 if (buffer_journaled(bh)) { 3267 if (buffer_journaled(bh)) {
3260 PROC_INFO_INC(p_s_sb, journal.mark_dirty_already); 3268 PROC_INFO_INC(sb, journal.mark_dirty_already);
3261 return 0; 3269 return 0;
3262 } 3270 }
3263 3271
@@ -3266,7 +3274,8 @@ int journal_mark_dirty(struct reiserfs_transaction_handle *th,
3266 ** could get to disk too early. NOT GOOD. 3274 ** could get to disk too early. NOT GOOD.
3267 */ 3275 */
3268 if (!prepared || buffer_dirty(bh)) { 3276 if (!prepared || buffer_dirty(bh)) {
3269 reiserfs_warning(p_s_sb, "journal-1777: buffer %llu bad state " 3277 reiserfs_warning(sb, "journal-1777",
3278 "buffer %llu bad state "
3270 "%cPREPARED %cLOCKED %cDIRTY %cJDIRTY_WAIT", 3279 "%cPREPARED %cLOCKED %cDIRTY %cJDIRTY_WAIT",
3271 (unsigned long long)bh->b_blocknr, 3280 (unsigned long long)bh->b_blocknr,
3272 prepared ? ' ' : '!', 3281 prepared ? ' ' : '!',
@@ -3276,23 +3285,23 @@ int journal_mark_dirty(struct reiserfs_transaction_handle *th,
3276 } 3285 }
3277 3286
3278 if (atomic_read(&(journal->j_wcount)) <= 0) { 3287 if (atomic_read(&(journal->j_wcount)) <= 0) {
3279 reiserfs_warning(p_s_sb, 3288 reiserfs_warning(sb, "journal-1409",
3280 "journal-1409: journal_mark_dirty returning because j_wcount was %d", 3289 "returning because j_wcount was %d",
3281 atomic_read(&(journal->j_wcount))); 3290 atomic_read(&(journal->j_wcount)));
3282 return 1; 3291 return 1;
3283 } 3292 }
3284 /* this error means I've screwed up, and we've overflowed the transaction. 3293 /* this error means I've screwed up, and we've overflowed the transaction.
3285 ** Nothing can be done here, except make the FS readonly or panic. 3294 ** Nothing can be done here, except make the FS readonly or panic.
3286 */ 3295 */
3287 if (journal->j_len >= journal->j_trans_max) { 3296 if (journal->j_len >= journal->j_trans_max) {
3288 reiserfs_panic(th->t_super, 3297 reiserfs_panic(th->t_super, "journal-1413",
3289 "journal-1413: journal_mark_dirty: j_len (%lu) is too big\n", 3298 "j_len (%lu) is too big",
3290 journal->j_len); 3299 journal->j_len);
3291 } 3300 }
3292 3301
3293 if (buffer_journal_dirty(bh)) { 3302 if (buffer_journal_dirty(bh)) {
3294 count_already_incd = 1; 3303 count_already_incd = 1;
3295 PROC_INFO_INC(p_s_sb, journal.mark_dirty_notjournal); 3304 PROC_INFO_INC(sb, journal.mark_dirty_notjournal);
3296 clear_buffer_journal_dirty(bh); 3305 clear_buffer_journal_dirty(bh);
3297 } 3306 }
3298 3307
@@ -3304,9 +3313,9 @@ int journal_mark_dirty(struct reiserfs_transaction_handle *th,
3304 3313
3305 /* now put this guy on the end */ 3314 /* now put this guy on the end */
3306 if (!cn) { 3315 if (!cn) {
3307 cn = get_cnode(p_s_sb); 3316 cn = get_cnode(sb);
3308 if (!cn) { 3317 if (!cn) {
3309 reiserfs_panic(p_s_sb, "get_cnode failed!\n"); 3318 reiserfs_panic(sb, "journal-4", "get_cnode failed!");
3310 } 3319 }
3311 3320
3312 if (th->t_blocks_logged == th->t_blocks_allocated) { 3321 if (th->t_blocks_logged == th->t_blocks_allocated) {
@@ -3318,7 +3327,7 @@ int journal_mark_dirty(struct reiserfs_transaction_handle *th,
3318 3327
3319 cn->bh = bh; 3328 cn->bh = bh;
3320 cn->blocknr = bh->b_blocknr; 3329 cn->blocknr = bh->b_blocknr;
3321 cn->sb = p_s_sb; 3330 cn->sb = sb;
3322 cn->jlist = NULL; 3331 cn->jlist = NULL;
3323 insert_journal_hash(journal->j_hash_table, cn); 3332 insert_journal_hash(journal->j_hash_table, cn);
3324 if (!count_already_incd) { 3333 if (!count_already_incd) {
@@ -3339,11 +3348,11 @@ int journal_mark_dirty(struct reiserfs_transaction_handle *th,
3339} 3348}
3340 3349
3341int journal_end(struct reiserfs_transaction_handle *th, 3350int journal_end(struct reiserfs_transaction_handle *th,
3342 struct super_block *p_s_sb, unsigned long nblocks) 3351 struct super_block *sb, unsigned long nblocks)
3343{ 3352{
3344 if (!current->journal_info && th->t_refcount > 1) 3353 if (!current->journal_info && th->t_refcount > 1)
3345 reiserfs_warning(p_s_sb, "REISER-NESTING: th NULL, refcount %d", 3354 reiserfs_warning(sb, "REISER-NESTING",
3346 th->t_refcount); 3355 "th NULL, refcount %d", th->t_refcount);
3347 3356
3348 if (!th->t_trans_id) { 3357 if (!th->t_trans_id) {
3349 WARN_ON(1); 3358 WARN_ON(1);
@@ -3366,26 +3375,26 @@ int journal_end(struct reiserfs_transaction_handle *th,
3366 } 3375 }
3367 return 0; 3376 return 0;
3368 } else { 3377 } else {
3369 return do_journal_end(th, p_s_sb, nblocks, 0); 3378 return do_journal_end(th, sb, nblocks, 0);
3370 } 3379 }
3371} 3380}
3372 3381
3373/* removes from the current transaction, relsing and descrementing any counters. 3382/* removes from the current transaction, relsing and descrementing any counters.
3374** also files the removed buffer directly onto the clean list 3383** also files the removed buffer directly onto the clean list
3375** 3384**
3376** called by journal_mark_freed when a block has been deleted 3385** called by journal_mark_freed when a block has been deleted
3377** 3386**
3378** returns 1 if it cleaned and relsed the buffer. 0 otherwise 3387** returns 1 if it cleaned and relsed the buffer. 0 otherwise
3379*/ 3388*/
3380static int remove_from_transaction(struct super_block *p_s_sb, 3389static int remove_from_transaction(struct super_block *sb,
3381 b_blocknr_t blocknr, int already_cleaned) 3390 b_blocknr_t blocknr, int already_cleaned)
3382{ 3391{
3383 struct buffer_head *bh; 3392 struct buffer_head *bh;
3384 struct reiserfs_journal_cnode *cn; 3393 struct reiserfs_journal_cnode *cn;
3385 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 3394 struct reiserfs_journal *journal = SB_JOURNAL(sb);
3386 int ret = 0; 3395 int ret = 0;
3387 3396
3388 cn = get_journal_hash_dev(p_s_sb, journal->j_hash_table, blocknr); 3397 cn = get_journal_hash_dev(sb, journal->j_hash_table, blocknr);
3389 if (!cn || !cn->bh) { 3398 if (!cn || !cn->bh) {
3390 return ret; 3399 return ret;
3391 } 3400 }
@@ -3403,7 +3412,7 @@ static int remove_from_transaction(struct super_block *p_s_sb,
3403 journal->j_last = cn->prev; 3412 journal->j_last = cn->prev;
3404 } 3413 }
3405 if (bh) 3414 if (bh)
3406 remove_journal_hash(p_s_sb, journal->j_hash_table, NULL, 3415 remove_journal_hash(sb, journal->j_hash_table, NULL,
3407 bh->b_blocknr, 0); 3416 bh->b_blocknr, 0);
3408 clear_buffer_journaled(bh); /* don't log this one */ 3417 clear_buffer_journaled(bh); /* don't log this one */
3409 3418
@@ -3413,14 +3422,14 @@ static int remove_from_transaction(struct super_block *p_s_sb,
3413 clear_buffer_journal_test(bh); 3422 clear_buffer_journal_test(bh);
3414 put_bh(bh); 3423 put_bh(bh);
3415 if (atomic_read(&(bh->b_count)) < 0) { 3424 if (atomic_read(&(bh->b_count)) < 0) {
3416 reiserfs_warning(p_s_sb, 3425 reiserfs_warning(sb, "journal-1752",
3417 "journal-1752: remove from trans, b_count < 0"); 3426 "b_count < 0");
3418 } 3427 }
3419 ret = 1; 3428 ret = 1;
3420 } 3429 }
3421 journal->j_len--; 3430 journal->j_len--;
3422 journal->j_len_alloc--; 3431 journal->j_len_alloc--;
3423 free_cnode(p_s_sb, cn); 3432 free_cnode(sb, cn);
3424 return ret; 3433 return ret;
3425} 3434}
3426 3435
@@ -3468,22 +3477,22 @@ static int can_dirty(struct reiserfs_journal_cnode *cn)
3468} 3477}
3469 3478
3470/* syncs the commit blocks, but does not force the real buffers to disk 3479/* syncs the commit blocks, but does not force the real buffers to disk
3471** will wait until the current transaction is done/committed before returning 3480** will wait until the current transaction is done/committed before returning
3472*/ 3481*/
3473int journal_end_sync(struct reiserfs_transaction_handle *th, 3482int journal_end_sync(struct reiserfs_transaction_handle *th,
3474 struct super_block *p_s_sb, unsigned long nblocks) 3483 struct super_block *sb, unsigned long nblocks)
3475{ 3484{
3476 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 3485 struct reiserfs_journal *journal = SB_JOURNAL(sb);
3477 3486
3478 BUG_ON(!th->t_trans_id); 3487 BUG_ON(!th->t_trans_id);
3479 /* you can sync while nested, very, very bad */ 3488 /* you can sync while nested, very, very bad */
3480 BUG_ON(th->t_refcount > 1); 3489 BUG_ON(th->t_refcount > 1);
3481 if (journal->j_len == 0) { 3490 if (journal->j_len == 0) {
3482 reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 3491 reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb),
3483 1); 3492 1);
3484 journal_mark_dirty(th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)); 3493 journal_mark_dirty(th, sb, SB_BUFFER_WITH_SB(sb));
3485 } 3494 }
3486 return do_journal_end(th, p_s_sb, nblocks, COMMIT_NOW | WAIT); 3495 return do_journal_end(th, sb, nblocks, COMMIT_NOW | WAIT);
3487} 3496}
3488 3497
3489/* 3498/*
@@ -3493,7 +3502,7 @@ static void flush_async_commits(struct work_struct *work)
3493{ 3502{
3494 struct reiserfs_journal *journal = 3503 struct reiserfs_journal *journal =
3495 container_of(work, struct reiserfs_journal, j_work.work); 3504 container_of(work, struct reiserfs_journal, j_work.work);
3496 struct super_block *p_s_sb = journal->j_work_sb; 3505 struct super_block *sb = journal->j_work_sb;
3497 struct reiserfs_journal_list *jl; 3506 struct reiserfs_journal_list *jl;
3498 struct list_head *entry; 3507 struct list_head *entry;
3499 3508
@@ -3502,7 +3511,7 @@ static void flush_async_commits(struct work_struct *work)
3502 /* last entry is the youngest, commit it and you get everything */ 3511 /* last entry is the youngest, commit it and you get everything */
3503 entry = journal->j_journal_list.prev; 3512 entry = journal->j_journal_list.prev;
3504 jl = JOURNAL_LIST_ENTRY(entry); 3513 jl = JOURNAL_LIST_ENTRY(entry);
3505 flush_commit_list(p_s_sb, jl, 1); 3514 flush_commit_list(sb, jl, 1);
3506 } 3515 }
3507 unlock_kernel(); 3516 unlock_kernel();
3508} 3517}
@@ -3511,11 +3520,11 @@ static void flush_async_commits(struct work_struct *work)
3511** flushes any old transactions to disk 3520** flushes any old transactions to disk
3512** ends the current transaction if it is too old 3521** ends the current transaction if it is too old
3513*/ 3522*/
3514int reiserfs_flush_old_commits(struct super_block *p_s_sb) 3523int reiserfs_flush_old_commits(struct super_block *sb)
3515{ 3524{
3516 time_t now; 3525 time_t now;
3517 struct reiserfs_transaction_handle th; 3526 struct reiserfs_transaction_handle th;
3518 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 3527 struct reiserfs_journal *journal = SB_JOURNAL(sb);
3519 3528
3520 now = get_seconds(); 3529 now = get_seconds();
3521 /* safety check so we don't flush while we are replaying the log during 3530 /* safety check so we don't flush while we are replaying the log during
@@ -3532,35 +3541,35 @@ int reiserfs_flush_old_commits(struct super_block *p_s_sb)
3532 journal->j_trans_start_time > 0 && 3541 journal->j_trans_start_time > 0 &&
3533 journal->j_len > 0 && 3542 journal->j_len > 0 &&
3534 (now - journal->j_trans_start_time) > journal->j_max_trans_age) { 3543 (now - journal->j_trans_start_time) > journal->j_max_trans_age) {
3535 if (!journal_join(&th, p_s_sb, 1)) { 3544 if (!journal_join(&th, sb, 1)) {
3536 reiserfs_prepare_for_journal(p_s_sb, 3545 reiserfs_prepare_for_journal(sb,
3537 SB_BUFFER_WITH_SB(p_s_sb), 3546 SB_BUFFER_WITH_SB(sb),
3538 1); 3547 1);
3539 journal_mark_dirty(&th, p_s_sb, 3548 journal_mark_dirty(&th, sb,
3540 SB_BUFFER_WITH_SB(p_s_sb)); 3549 SB_BUFFER_WITH_SB(sb));
3541 3550
3542 /* we're only being called from kreiserfsd, it makes no sense to do 3551 /* we're only being called from kreiserfsd, it makes no sense to do
3543 ** an async commit so that kreiserfsd can do it later 3552 ** an async commit so that kreiserfsd can do it later
3544 */ 3553 */
3545 do_journal_end(&th, p_s_sb, 1, COMMIT_NOW | WAIT); 3554 do_journal_end(&th, sb, 1, COMMIT_NOW | WAIT);
3546 } 3555 }
3547 } 3556 }
3548 return p_s_sb->s_dirt; 3557 return sb->s_dirt;
3549} 3558}
3550 3559
3551/* 3560/*
3552** returns 0 if do_journal_end should return right away, returns 1 if do_journal_end should finish the commit 3561** returns 0 if do_journal_end should return right away, returns 1 if do_journal_end should finish the commit
3553** 3562**
3554** if the current transaction is too old, but still has writers, this will wait on j_join_wait until all 3563** if the current transaction is too old, but still has writers, this will wait on j_join_wait until all
3555** the writers are done. By the time it wakes up, the transaction it was called has already ended, so it just 3564** the writers are done. By the time it wakes up, the transaction it was called has already ended, so it just
3556** flushes the commit list and returns 0. 3565** flushes the commit list and returns 0.
3557** 3566**
3558** Won't batch when flush or commit_now is set. Also won't batch when others are waiting on j_join_wait. 3567** Won't batch when flush or commit_now is set. Also won't batch when others are waiting on j_join_wait.
3559** 3568**
3560** Note, we can't allow the journal_end to proceed while there are still writers in the log. 3569** Note, we can't allow the journal_end to proceed while there are still writers in the log.
3561*/ 3570*/
3562static int check_journal_end(struct reiserfs_transaction_handle *th, 3571static int check_journal_end(struct reiserfs_transaction_handle *th,
3563 struct super_block *p_s_sb, unsigned long nblocks, 3572 struct super_block *sb, unsigned long nblocks,
3564 int flags) 3573 int flags)
3565{ 3574{
3566 3575
@@ -3569,13 +3578,13 @@ static int check_journal_end(struct reiserfs_transaction_handle *th,
3569 int commit_now = flags & COMMIT_NOW; 3578 int commit_now = flags & COMMIT_NOW;
3570 int wait_on_commit = flags & WAIT; 3579 int wait_on_commit = flags & WAIT;
3571 struct reiserfs_journal_list *jl; 3580 struct reiserfs_journal_list *jl;
3572 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 3581 struct reiserfs_journal *journal = SB_JOURNAL(sb);
3573 3582
3574 BUG_ON(!th->t_trans_id); 3583 BUG_ON(!th->t_trans_id);
3575 3584
3576 if (th->t_trans_id != journal->j_trans_id) { 3585 if (th->t_trans_id != journal->j_trans_id) {
3577 reiserfs_panic(th->t_super, 3586 reiserfs_panic(th->t_super, "journal-1577",
3578 "journal-1577: handle trans id %ld != current trans id %ld\n", 3587 "handle trans id %ld != current trans id %ld",
3579 th->t_trans_id, journal->j_trans_id); 3588 th->t_trans_id, journal->j_trans_id);
3580 } 3589 }
3581 3590
@@ -3584,7 +3593,7 @@ static int check_journal_end(struct reiserfs_transaction_handle *th,
3584 atomic_dec(&(journal->j_wcount)); 3593 atomic_dec(&(journal->j_wcount));
3585 } 3594 }
3586 3595
3587 /* BUG, deal with case where j_len is 0, but people previously freed blocks need to be released 3596 /* BUG, deal with case where j_len is 0, but people previously freed blocks need to be released
3588 ** will be dealt with by next transaction that actually writes something, but should be taken 3597 ** will be dealt with by next transaction that actually writes something, but should be taken
3589 ** care of in this trans 3598 ** care of in this trans
3590 */ 3599 */
@@ -3593,7 +3602,7 @@ static int check_journal_end(struct reiserfs_transaction_handle *th,
3593 /* if wcount > 0, and we are called to with flush or commit_now, 3602 /* if wcount > 0, and we are called to with flush or commit_now,
3594 ** we wait on j_join_wait. We will wake up when the last writer has 3603 ** we wait on j_join_wait. We will wake up when the last writer has
3595 ** finished the transaction, and started it on its way to the disk. 3604 ** finished the transaction, and started it on its way to the disk.
3596 ** Then, we flush the commit or journal list, and just return 0 3605 ** Then, we flush the commit or journal list, and just return 0
3597 ** because the rest of journal end was already done for this transaction. 3606 ** because the rest of journal end was already done for this transaction.
3598 */ 3607 */
3599 if (atomic_read(&(journal->j_wcount)) > 0) { 3608 if (atomic_read(&(journal->j_wcount)) > 0) {
@@ -3608,31 +3617,31 @@ static int check_journal_end(struct reiserfs_transaction_handle *th,
3608 if (flush) { 3617 if (flush) {
3609 journal->j_next_full_flush = 1; 3618 journal->j_next_full_flush = 1;
3610 } 3619 }
3611 unlock_journal(p_s_sb); 3620 unlock_journal(sb);
3612 3621
3613 /* sleep while the current transaction is still j_jlocked */ 3622 /* sleep while the current transaction is still j_jlocked */
3614 while (journal->j_trans_id == trans_id) { 3623 while (journal->j_trans_id == trans_id) {
3615 if (atomic_read(&journal->j_jlock)) { 3624 if (atomic_read(&journal->j_jlock)) {
3616 queue_log_writer(p_s_sb); 3625 queue_log_writer(sb);
3617 } else { 3626 } else {
3618 lock_journal(p_s_sb); 3627 lock_journal(sb);
3619 if (journal->j_trans_id == trans_id) { 3628 if (journal->j_trans_id == trans_id) {
3620 atomic_set(&(journal->j_jlock), 3629 atomic_set(&(journal->j_jlock),
3621 1); 3630 1);
3622 } 3631 }
3623 unlock_journal(p_s_sb); 3632 unlock_journal(sb);
3624 } 3633 }
3625 } 3634 }
3626 BUG_ON(journal->j_trans_id == trans_id); 3635 BUG_ON(journal->j_trans_id == trans_id);
3627 3636
3628 if (commit_now 3637 if (commit_now
3629 && journal_list_still_alive(p_s_sb, trans_id) 3638 && journal_list_still_alive(sb, trans_id)
3630 && wait_on_commit) { 3639 && wait_on_commit) {
3631 flush_commit_list(p_s_sb, jl, 1); 3640 flush_commit_list(sb, jl, 1);
3632 } 3641 }
3633 return 0; 3642 return 0;
3634 } 3643 }
3635 unlock_journal(p_s_sb); 3644 unlock_journal(sb);
3636 return 0; 3645 return 0;
3637 } 3646 }
3638 3647
@@ -3649,13 +3658,13 @@ static int check_journal_end(struct reiserfs_transaction_handle *th,
3649 && journal->j_len_alloc < journal->j_max_batch 3658 && journal->j_len_alloc < journal->j_max_batch
3650 && journal->j_cnode_free > (journal->j_trans_max * 3)) { 3659 && journal->j_cnode_free > (journal->j_trans_max * 3)) {
3651 journal->j_bcount++; 3660 journal->j_bcount++;
3652 unlock_journal(p_s_sb); 3661 unlock_journal(sb);
3653 return 0; 3662 return 0;
3654 } 3663 }
3655 3664
3656 if (journal->j_start > SB_ONDISK_JOURNAL_SIZE(p_s_sb)) { 3665 if (journal->j_start > SB_ONDISK_JOURNAL_SIZE(sb)) {
3657 reiserfs_panic(p_s_sb, 3666 reiserfs_panic(sb, "journal-003",
3658 "journal-003: journal_end: j_start (%ld) is too high\n", 3667 "j_start (%ld) is too high",
3659 journal->j_start); 3668 journal->j_start);
3660 } 3669 }
3661 return 1; 3670 return 1;
@@ -3664,7 +3673,7 @@ static int check_journal_end(struct reiserfs_transaction_handle *th,
3664/* 3673/*
3665** Does all the work that makes deleting blocks safe. 3674** Does all the work that makes deleting blocks safe.
3666** when deleting a block mark BH_JNew, just remove it from the current transaction, clean it's buffer_head and move on. 3675** when deleting a block mark BH_JNew, just remove it from the current transaction, clean it's buffer_head and move on.
3667** 3676**
3668** otherwise: 3677** otherwise:
3669** set a bit for the block in the journal bitmap. That will prevent it from being allocated for unformatted nodes 3678** set a bit for the block in the journal bitmap. That will prevent it from being allocated for unformatted nodes
3670** before this transaction has finished. 3679** before this transaction has finished.
@@ -3676,16 +3685,16 @@ static int check_journal_end(struct reiserfs_transaction_handle *th,
3676** Then remove it from the current transaction, decrementing any counters and filing it on the clean list. 3685** Then remove it from the current transaction, decrementing any counters and filing it on the clean list.
3677*/ 3686*/
3678int journal_mark_freed(struct reiserfs_transaction_handle *th, 3687int journal_mark_freed(struct reiserfs_transaction_handle *th,
3679 struct super_block *p_s_sb, b_blocknr_t blocknr) 3688 struct super_block *sb, b_blocknr_t blocknr)
3680{ 3689{
3681 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 3690 struct reiserfs_journal *journal = SB_JOURNAL(sb);
3682 struct reiserfs_journal_cnode *cn = NULL; 3691 struct reiserfs_journal_cnode *cn = NULL;
3683 struct buffer_head *bh = NULL; 3692 struct buffer_head *bh = NULL;
3684 struct reiserfs_list_bitmap *jb = NULL; 3693 struct reiserfs_list_bitmap *jb = NULL;
3685 int cleaned = 0; 3694 int cleaned = 0;
3686 BUG_ON(!th->t_trans_id); 3695 BUG_ON(!th->t_trans_id);
3687 3696
3688 cn = get_journal_hash_dev(p_s_sb, journal->j_hash_table, blocknr); 3697 cn = get_journal_hash_dev(sb, journal->j_hash_table, blocknr);
3689 if (cn && cn->bh) { 3698 if (cn && cn->bh) {
3690 bh = cn->bh; 3699 bh = cn->bh;
3691 get_bh(bh); 3700 get_bh(bh);
@@ -3695,15 +3704,15 @@ int journal_mark_freed(struct reiserfs_transaction_handle *th,
3695 clear_buffer_journal_new(bh); 3704 clear_buffer_journal_new(bh);
3696 clear_prepared_bits(bh); 3705 clear_prepared_bits(bh);
3697 reiserfs_clean_and_file_buffer(bh); 3706 reiserfs_clean_and_file_buffer(bh);
3698 cleaned = remove_from_transaction(p_s_sb, blocknr, cleaned); 3707 cleaned = remove_from_transaction(sb, blocknr, cleaned);
3699 } else { 3708 } else {
3700 /* set the bit for this block in the journal bitmap for this transaction */ 3709 /* set the bit for this block in the journal bitmap for this transaction */
3701 jb = journal->j_current_jl->j_list_bitmap; 3710 jb = journal->j_current_jl->j_list_bitmap;
3702 if (!jb) { 3711 if (!jb) {
3703 reiserfs_panic(p_s_sb, 3712 reiserfs_panic(sb, "journal-1702",
3704 "journal-1702: journal_mark_freed, journal_list_bitmap is NULL\n"); 3713 "journal_list_bitmap is NULL");
3705 } 3714 }
3706 set_bit_in_list_bitmap(p_s_sb, blocknr, jb); 3715 set_bit_in_list_bitmap(sb, blocknr, jb);
3707 3716
3708 /* Note, the entire while loop is not allowed to schedule. */ 3717 /* Note, the entire while loop is not allowed to schedule. */
3709 3718
@@ -3711,13 +3720,13 @@ int journal_mark_freed(struct reiserfs_transaction_handle *th,
3711 clear_prepared_bits(bh); 3720 clear_prepared_bits(bh);
3712 reiserfs_clean_and_file_buffer(bh); 3721 reiserfs_clean_and_file_buffer(bh);
3713 } 3722 }
3714 cleaned = remove_from_transaction(p_s_sb, blocknr, cleaned); 3723 cleaned = remove_from_transaction(sb, blocknr, cleaned);
3715 3724
3716 /* find all older transactions with this block, make sure they don't try to write it out */ 3725 /* find all older transactions with this block, make sure they don't try to write it out */
3717 cn = get_journal_hash_dev(p_s_sb, journal->j_list_hash_table, 3726 cn = get_journal_hash_dev(sb, journal->j_list_hash_table,
3718 blocknr); 3727 blocknr);
3719 while (cn) { 3728 while (cn) {
3720 if (p_s_sb == cn->sb && blocknr == cn->blocknr) { 3729 if (sb == cn->sb && blocknr == cn->blocknr) {
3721 set_bit(BLOCK_FREED, &cn->state); 3730 set_bit(BLOCK_FREED, &cn->state);
3722 if (cn->bh) { 3731 if (cn->bh) {
3723 if (!cleaned) { 3732 if (!cleaned) {
@@ -3733,8 +3742,9 @@ int journal_mark_freed(struct reiserfs_transaction_handle *th,
3733 put_bh(cn->bh); 3742 put_bh(cn->bh);
3734 if (atomic_read 3743 if (atomic_read
3735 (&(cn->bh->b_count)) < 0) { 3744 (&(cn->bh->b_count)) < 0) {
3736 reiserfs_warning(p_s_sb, 3745 reiserfs_warning(sb,
3737 "journal-2138: cn->bh->b_count < 0"); 3746 "journal-2138",
3747 "cn->bh->b_count < 0");
3738 } 3748 }
3739 } 3749 }
3740 if (cn->jlist) { /* since we are clearing the bh, we MUST dec nonzerolen */ 3750 if (cn->jlist) { /* since we are clearing the bh, we MUST dec nonzerolen */
@@ -3824,7 +3834,7 @@ static int __commit_trans_jl(struct inode *inode, unsigned long id,
3824 3834
3825int reiserfs_commit_for_inode(struct inode *inode) 3835int reiserfs_commit_for_inode(struct inode *inode)
3826{ 3836{
3827 unsigned long id = REISERFS_I(inode)->i_trans_id; 3837 unsigned int id = REISERFS_I(inode)->i_trans_id;
3828 struct reiserfs_journal_list *jl = REISERFS_I(inode)->i_jl; 3838 struct reiserfs_journal_list *jl = REISERFS_I(inode)->i_jl;
3829 3839
3830 /* for the whole inode, assume unset id means it was 3840 /* for the whole inode, assume unset id means it was
@@ -3839,18 +3849,18 @@ int reiserfs_commit_for_inode(struct inode *inode)
3839 return __commit_trans_jl(inode, id, jl); 3849 return __commit_trans_jl(inode, id, jl);
3840} 3850}
3841 3851
3842void reiserfs_restore_prepared_buffer(struct super_block *p_s_sb, 3852void reiserfs_restore_prepared_buffer(struct super_block *sb,
3843 struct buffer_head *bh) 3853 struct buffer_head *bh)
3844{ 3854{
3845 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 3855 struct reiserfs_journal *journal = SB_JOURNAL(sb);
3846 PROC_INFO_INC(p_s_sb, journal.restore_prepared); 3856 PROC_INFO_INC(sb, journal.restore_prepared);
3847 if (!bh) { 3857 if (!bh) {
3848 return; 3858 return;
3849 } 3859 }
3850 if (test_clear_buffer_journal_restore_dirty(bh) && 3860 if (test_clear_buffer_journal_restore_dirty(bh) &&
3851 buffer_journal_dirty(bh)) { 3861 buffer_journal_dirty(bh)) {
3852 struct reiserfs_journal_cnode *cn; 3862 struct reiserfs_journal_cnode *cn;
3853 cn = get_journal_hash_dev(p_s_sb, 3863 cn = get_journal_hash_dev(sb,
3854 journal->j_list_hash_table, 3864 journal->j_list_hash_table,
3855 bh->b_blocknr); 3865 bh->b_blocknr);
3856 if (cn && can_dirty(cn)) { 3866 if (cn && can_dirty(cn)) {
@@ -3867,12 +3877,12 @@ extern struct tree_balance *cur_tb;
3867** be written to disk while we are altering it. So, we must: 3877** be written to disk while we are altering it. So, we must:
3868** clean it 3878** clean it
3869** wait on it. 3879** wait on it.
3870** 3880**
3871*/ 3881*/
3872int reiserfs_prepare_for_journal(struct super_block *p_s_sb, 3882int reiserfs_prepare_for_journal(struct super_block *sb,
3873 struct buffer_head *bh, int wait) 3883 struct buffer_head *bh, int wait)
3874{ 3884{
3875 PROC_INFO_INC(p_s_sb, journal.prepare); 3885 PROC_INFO_INC(sb, journal.prepare);
3876 3886
3877 if (!trylock_buffer(bh)) { 3887 if (!trylock_buffer(bh)) {
3878 if (!wait) 3888 if (!wait)
@@ -3909,7 +3919,7 @@ static void flush_old_journal_lists(struct super_block *s)
3909 } 3919 }
3910} 3920}
3911 3921
3912/* 3922/*
3913** long and ugly. If flush, will not return until all commit 3923** long and ugly. If flush, will not return until all commit
3914** blocks and all real buffers in the trans are on disk. 3924** blocks and all real buffers in the trans are on disk.
3915** If no_async, won't return until all commit blocks are on disk. 3925** If no_async, won't return until all commit blocks are on disk.
@@ -3920,10 +3930,10 @@ static void flush_old_journal_lists(struct super_block *s)
3920** journal lists, etc just won't happen. 3930** journal lists, etc just won't happen.
3921*/ 3931*/
3922static int do_journal_end(struct reiserfs_transaction_handle *th, 3932static int do_journal_end(struct reiserfs_transaction_handle *th,
3923 struct super_block *p_s_sb, unsigned long nblocks, 3933 struct super_block *sb, unsigned long nblocks,
3924 int flags) 3934 int flags)
3925{ 3935{
3926 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 3936 struct reiserfs_journal *journal = SB_JOURNAL(sb);
3927 struct reiserfs_journal_cnode *cn, *next, *jl_cn; 3937 struct reiserfs_journal_cnode *cn, *next, *jl_cn;
3928 struct reiserfs_journal_cnode *last_cn = NULL; 3938 struct reiserfs_journal_cnode *last_cn = NULL;
3929 struct reiserfs_journal_desc *desc; 3939 struct reiserfs_journal_desc *desc;
@@ -3938,7 +3948,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
3938 struct reiserfs_journal_list *jl, *temp_jl; 3948 struct reiserfs_journal_list *jl, *temp_jl;
3939 struct list_head *entry, *safe; 3949 struct list_head *entry, *safe;
3940 unsigned long jindex; 3950 unsigned long jindex;
3941 unsigned long commit_trans_id; 3951 unsigned int commit_trans_id;
3942 int trans_half; 3952 int trans_half;
3943 3953
3944 BUG_ON(th->t_refcount > 1); 3954 BUG_ON(th->t_refcount > 1);
@@ -3946,21 +3956,21 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
3946 3956
3947 /* protect flush_older_commits from doing mistakes if the 3957 /* protect flush_older_commits from doing mistakes if the
3948 transaction ID counter gets overflowed. */ 3958 transaction ID counter gets overflowed. */
3949 if (th->t_trans_id == ~0UL) 3959 if (th->t_trans_id == ~0U)
3950 flags |= FLUSH_ALL | COMMIT_NOW | WAIT; 3960 flags |= FLUSH_ALL | COMMIT_NOW | WAIT;
3951 flush = flags & FLUSH_ALL; 3961 flush = flags & FLUSH_ALL;
3952 wait_on_commit = flags & WAIT; 3962 wait_on_commit = flags & WAIT;
3953 3963
3954 put_fs_excl(); 3964 put_fs_excl();
3955 current->journal_info = th->t_handle_save; 3965 current->journal_info = th->t_handle_save;
3956 reiserfs_check_lock_depth(p_s_sb, "journal end"); 3966 reiserfs_check_lock_depth(sb, "journal end");
3957 if (journal->j_len == 0) { 3967 if (journal->j_len == 0) {
3958 reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 3968 reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb),
3959 1); 3969 1);
3960 journal_mark_dirty(th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)); 3970 journal_mark_dirty(th, sb, SB_BUFFER_WITH_SB(sb));
3961 } 3971 }
3962 3972
3963 lock_journal(p_s_sb); 3973 lock_journal(sb);
3964 if (journal->j_next_full_flush) { 3974 if (journal->j_next_full_flush) {
3965 flags |= FLUSH_ALL; 3975 flags |= FLUSH_ALL;
3966 flush = 1; 3976 flush = 1;
@@ -3970,13 +3980,13 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
3970 wait_on_commit = 1; 3980 wait_on_commit = 1;
3971 } 3981 }
3972 3982
3973 /* check_journal_end locks the journal, and unlocks if it does not return 1 3983 /* check_journal_end locks the journal, and unlocks if it does not return 1
3974 ** it tells us if we should continue with the journal_end, or just return 3984 ** it tells us if we should continue with the journal_end, or just return
3975 */ 3985 */
3976 if (!check_journal_end(th, p_s_sb, nblocks, flags)) { 3986 if (!check_journal_end(th, sb, nblocks, flags)) {
3977 p_s_sb->s_dirt = 1; 3987 sb->s_dirt = 1;
3978 wake_queued_writers(p_s_sb); 3988 wake_queued_writers(sb);
3979 reiserfs_async_progress_wait(p_s_sb); 3989 reiserfs_async_progress_wait(sb);
3980 goto out; 3990 goto out;
3981 } 3991 }
3982 3992
@@ -4005,8 +4015,8 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
4005 4015
4006 /* setup description block */ 4016 /* setup description block */
4007 d_bh = 4017 d_bh =
4008 journal_getblk(p_s_sb, 4018 journal_getblk(sb,
4009 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + 4019 SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
4010 journal->j_start); 4020 journal->j_start);
4011 set_buffer_uptodate(d_bh); 4021 set_buffer_uptodate(d_bh);
4012 desc = (struct reiserfs_journal_desc *)(d_bh)->b_data; 4022 desc = (struct reiserfs_journal_desc *)(d_bh)->b_data;
@@ -4015,9 +4025,9 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
4015 set_desc_trans_id(desc, journal->j_trans_id); 4025 set_desc_trans_id(desc, journal->j_trans_id);
4016 4026
4017 /* setup commit block. Don't write (keep it clean too) this one until after everyone else is written */ 4027 /* setup commit block. Don't write (keep it clean too) this one until after everyone else is written */
4018 c_bh = journal_getblk(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + 4028 c_bh = journal_getblk(sb, SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
4019 ((journal->j_start + journal->j_len + 4029 ((journal->j_start + journal->j_len +
4020 1) % SB_ONDISK_JOURNAL_SIZE(p_s_sb))); 4030 1) % SB_ONDISK_JOURNAL_SIZE(sb)));
4021 commit = (struct reiserfs_journal_commit *)c_bh->b_data; 4031 commit = (struct reiserfs_journal_commit *)c_bh->b_data;
4022 memset(c_bh->b_data, 0, c_bh->b_size); 4032 memset(c_bh->b_data, 0, c_bh->b_size);
4023 set_commit_trans_id(commit, journal->j_trans_id); 4033 set_commit_trans_id(commit, journal->j_trans_id);
@@ -4050,13 +4060,13 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
4050 ** for each real block, add it to the journal list hash, 4060 ** for each real block, add it to the journal list hash,
4051 ** copy into real block index array in the commit or desc block 4061 ** copy into real block index array in the commit or desc block
4052 */ 4062 */
4053 trans_half = journal_trans_half(p_s_sb->s_blocksize); 4063 trans_half = journal_trans_half(sb->s_blocksize);
4054 for (i = 0, cn = journal->j_first; cn; cn = cn->next, i++) { 4064 for (i = 0, cn = journal->j_first; cn; cn = cn->next, i++) {
4055 if (buffer_journaled(cn->bh)) { 4065 if (buffer_journaled(cn->bh)) {
4056 jl_cn = get_cnode(p_s_sb); 4066 jl_cn = get_cnode(sb);
4057 if (!jl_cn) { 4067 if (!jl_cn) {
4058 reiserfs_panic(p_s_sb, 4068 reiserfs_panic(sb, "journal-1676",
4059 "journal-1676, get_cnode returned NULL\n"); 4069 "get_cnode returned NULL");
4060 } 4070 }
4061 if (i == 0) { 4071 if (i == 0) {
4062 jl->j_realblock = jl_cn; 4072 jl->j_realblock = jl_cn;
@@ -4067,18 +4077,19 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
4067 last_cn->next = jl_cn; 4077 last_cn->next = jl_cn;
4068 } 4078 }
4069 last_cn = jl_cn; 4079 last_cn = jl_cn;
4070 /* make sure the block we are trying to log is not a block 4080 /* make sure the block we are trying to log is not a block
4071 of journal or reserved area */ 4081 of journal or reserved area */
4072 4082
4073 if (is_block_in_log_or_reserved_area 4083 if (is_block_in_log_or_reserved_area
4074 (p_s_sb, cn->bh->b_blocknr)) { 4084 (sb, cn->bh->b_blocknr)) {
4075 reiserfs_panic(p_s_sb, 4085 reiserfs_panic(sb, "journal-2332",
4076 "journal-2332: Trying to log block %lu, which is a log block\n", 4086 "Trying to log block %lu, "
4087 "which is a log block",
4077 cn->bh->b_blocknr); 4088 cn->bh->b_blocknr);
4078 } 4089 }
4079 jl_cn->blocknr = cn->bh->b_blocknr; 4090 jl_cn->blocknr = cn->bh->b_blocknr;
4080 jl_cn->state = 0; 4091 jl_cn->state = 0;
4081 jl_cn->sb = p_s_sb; 4092 jl_cn->sb = sb;
4082 jl_cn->bh = cn->bh; 4093 jl_cn->bh = cn->bh;
4083 jl_cn->jlist = jl; 4094 jl_cn->jlist = jl;
4084 insert_journal_hash(journal->j_list_hash_table, jl_cn); 4095 insert_journal_hash(journal->j_list_hash_table, jl_cn);
@@ -4119,11 +4130,11 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
4119 char *addr; 4130 char *addr;
4120 struct page *page; 4131 struct page *page;
4121 tmp_bh = 4132 tmp_bh =
4122 journal_getblk(p_s_sb, 4133 journal_getblk(sb,
4123 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + 4134 SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
4124 ((cur_write_start + 4135 ((cur_write_start +
4125 jindex) % 4136 jindex) %
4126 SB_ONDISK_JOURNAL_SIZE(p_s_sb))); 4137 SB_ONDISK_JOURNAL_SIZE(sb)));
4127 set_buffer_uptodate(tmp_bh); 4138 set_buffer_uptodate(tmp_bh);
4128 page = cn->bh->b_page; 4139 page = cn->bh->b_page;
4129 addr = kmap(page); 4140 addr = kmap(page);
@@ -4137,12 +4148,13 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
4137 clear_buffer_journaled(cn->bh); 4148 clear_buffer_journaled(cn->bh);
4138 } else { 4149 } else {
4139 /* JDirty cleared sometime during transaction. don't log this one */ 4150 /* JDirty cleared sometime during transaction. don't log this one */
4140 reiserfs_warning(p_s_sb, 4151 reiserfs_warning(sb, "journal-2048",
4141 "journal-2048: do_journal_end: BAD, buffer in journal hash, but not JDirty!"); 4152 "BAD, buffer in journal hash, "
4153 "but not JDirty!");
4142 brelse(cn->bh); 4154 brelse(cn->bh);
4143 } 4155 }
4144 next = cn->next; 4156 next = cn->next;
4145 free_cnode(p_s_sb, cn); 4157 free_cnode(sb, cn);
4146 cn = next; 4158 cn = next;
4147 cond_resched(); 4159 cond_resched();
4148 } 4160 }
@@ -4152,7 +4164,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
4152 ** so we dirty/relse c_bh in flush_commit_list, with commit_left <= 1. 4164 ** so we dirty/relse c_bh in flush_commit_list, with commit_left <= 1.
4153 */ 4165 */
4154 4166
4155 journal->j_current_jl = alloc_journal_list(p_s_sb); 4167 journal->j_current_jl = alloc_journal_list(sb);
4156 4168
4157 /* now it is safe to insert this transaction on the main list */ 4169 /* now it is safe to insert this transaction on the main list */
4158 list_add_tail(&jl->j_list, &journal->j_journal_list); 4170 list_add_tail(&jl->j_list, &journal->j_journal_list);
@@ -4163,7 +4175,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
4163 old_start = journal->j_start; 4175 old_start = journal->j_start;
4164 journal->j_start = 4176 journal->j_start =
4165 (journal->j_start + journal->j_len + 4177 (journal->j_start + journal->j_len +
4166 2) % SB_ONDISK_JOURNAL_SIZE(p_s_sb); 4178 2) % SB_ONDISK_JOURNAL_SIZE(sb);
4167 atomic_set(&(journal->j_wcount), 0); 4179 atomic_set(&(journal->j_wcount), 0);
4168 journal->j_bcount = 0; 4180 journal->j_bcount = 0;
4169 journal->j_last = NULL; 4181 journal->j_last = NULL;
@@ -4178,7 +4190,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
4178 journal->j_len_alloc = 0; 4190 journal->j_len_alloc = 0;
4179 journal->j_next_full_flush = 0; 4191 journal->j_next_full_flush = 0;
4180 journal->j_next_async_flush = 0; 4192 journal->j_next_async_flush = 0;
4181 init_journal_hash(p_s_sb); 4193 init_journal_hash(sb);
4182 4194
4183 // make sure reiserfs_add_jh sees the new current_jl before we 4195 // make sure reiserfs_add_jh sees the new current_jl before we
4184 // write out the tails 4196 // write out the tails
@@ -4207,14 +4219,14 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
4207 ** queue don't wait for this proc to flush journal lists and such. 4219 ** queue don't wait for this proc to flush journal lists and such.
4208 */ 4220 */
4209 if (flush) { 4221 if (flush) {
4210 flush_commit_list(p_s_sb, jl, 1); 4222 flush_commit_list(sb, jl, 1);
4211 flush_journal_list(p_s_sb, jl, 1); 4223 flush_journal_list(sb, jl, 1);
4212 } else if (!(jl->j_state & LIST_COMMIT_PENDING)) 4224 } else if (!(jl->j_state & LIST_COMMIT_PENDING))
4213 queue_delayed_work(commit_wq, &journal->j_work, HZ / 10); 4225 queue_delayed_work(commit_wq, &journal->j_work, HZ / 10);
4214 4226
4215 /* if the next transaction has any chance of wrapping, flush 4227 /* if the next transaction has any chance of wrapping, flush
4216 ** transactions that might get overwritten. If any journal lists are very 4228 ** transactions that might get overwritten. If any journal lists are very
4217 ** old flush them as well. 4229 ** old flush them as well.
4218 */ 4230 */
4219 first_jl: 4231 first_jl:
4220 list_for_each_safe(entry, safe, &journal->j_journal_list) { 4232 list_for_each_safe(entry, safe, &journal->j_journal_list) {
@@ -4222,11 +4234,11 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
4222 if (journal->j_start <= temp_jl->j_start) { 4234 if (journal->j_start <= temp_jl->j_start) {
4223 if ((journal->j_start + journal->j_trans_max + 1) >= 4235 if ((journal->j_start + journal->j_trans_max + 1) >=
4224 temp_jl->j_start) { 4236 temp_jl->j_start) {
4225 flush_used_journal_lists(p_s_sb, temp_jl); 4237 flush_used_journal_lists(sb, temp_jl);
4226 goto first_jl; 4238 goto first_jl;
4227 } else if ((journal->j_start + 4239 } else if ((journal->j_start +
4228 journal->j_trans_max + 1) < 4240 journal->j_trans_max + 1) <
4229 SB_ONDISK_JOURNAL_SIZE(p_s_sb)) { 4241 SB_ONDISK_JOURNAL_SIZE(sb)) {
4230 /* if we don't cross into the next transaction and we don't 4242 /* if we don't cross into the next transaction and we don't
4231 * wrap, there is no way we can overlap any later transactions 4243 * wrap, there is no way we can overlap any later transactions
4232 * break now 4244 * break now
@@ -4235,11 +4247,11 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
4235 } 4247 }
4236 } else if ((journal->j_start + 4248 } else if ((journal->j_start +
4237 journal->j_trans_max + 1) > 4249 journal->j_trans_max + 1) >
4238 SB_ONDISK_JOURNAL_SIZE(p_s_sb)) { 4250 SB_ONDISK_JOURNAL_SIZE(sb)) {
4239 if (((journal->j_start + journal->j_trans_max + 1) % 4251 if (((journal->j_start + journal->j_trans_max + 1) %
4240 SB_ONDISK_JOURNAL_SIZE(p_s_sb)) >= 4252 SB_ONDISK_JOURNAL_SIZE(sb)) >=
4241 temp_jl->j_start) { 4253 temp_jl->j_start) {
4242 flush_used_journal_lists(p_s_sb, temp_jl); 4254 flush_used_journal_lists(sb, temp_jl);
4243 goto first_jl; 4255 goto first_jl;
4244 } else { 4256 } else {
4245 /* we don't overlap anything from out start to the end of the 4257 /* we don't overlap anything from out start to the end of the
@@ -4250,46 +4262,47 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
4250 } 4262 }
4251 } 4263 }
4252 } 4264 }
4253 flush_old_journal_lists(p_s_sb); 4265 flush_old_journal_lists(sb);
4254 4266
4255 journal->j_current_jl->j_list_bitmap = 4267 journal->j_current_jl->j_list_bitmap =
4256 get_list_bitmap(p_s_sb, journal->j_current_jl); 4268 get_list_bitmap(sb, journal->j_current_jl);
4257 4269
4258 if (!(journal->j_current_jl->j_list_bitmap)) { 4270 if (!(journal->j_current_jl->j_list_bitmap)) {
4259 reiserfs_panic(p_s_sb, 4271 reiserfs_panic(sb, "journal-1996",
4260 "journal-1996: do_journal_end, could not get a list bitmap\n"); 4272 "could not get a list bitmap");
4261 } 4273 }
4262 4274
4263 atomic_set(&(journal->j_jlock), 0); 4275 atomic_set(&(journal->j_jlock), 0);
4264 unlock_journal(p_s_sb); 4276 unlock_journal(sb);
4265 /* wake up any body waiting to join. */ 4277 /* wake up any body waiting to join. */
4266 clear_bit(J_WRITERS_QUEUED, &journal->j_state); 4278 clear_bit(J_WRITERS_QUEUED, &journal->j_state);
4267 wake_up(&(journal->j_join_wait)); 4279 wake_up(&(journal->j_join_wait));
4268 4280
4269 if (!flush && wait_on_commit && 4281 if (!flush && wait_on_commit &&
4270 journal_list_still_alive(p_s_sb, commit_trans_id)) { 4282 journal_list_still_alive(sb, commit_trans_id)) {
4271 flush_commit_list(p_s_sb, jl, 1); 4283 flush_commit_list(sb, jl, 1);
4272 } 4284 }
4273 out: 4285 out:
4274 reiserfs_check_lock_depth(p_s_sb, "journal end2"); 4286 reiserfs_check_lock_depth(sb, "journal end2");
4275 4287
4276 memset(th, 0, sizeof(*th)); 4288 memset(th, 0, sizeof(*th));
4277 /* Re-set th->t_super, so we can properly keep track of how many 4289 /* Re-set th->t_super, so we can properly keep track of how many
4278 * persistent transactions there are. We need to do this so if this 4290 * persistent transactions there are. We need to do this so if this
4279 * call is part of a failed restart_transaction, we can free it later */ 4291 * call is part of a failed restart_transaction, we can free it later */
4280 th->t_super = p_s_sb; 4292 th->t_super = sb;
4281 4293
4282 return journal->j_errno; 4294 return journal->j_errno;
4283} 4295}
4284 4296
4285static void __reiserfs_journal_abort_hard(struct super_block *sb) 4297/* Send the file system read only and refuse new transactions */
4298void reiserfs_abort_journal(struct super_block *sb, int errno)
4286{ 4299{
4287 struct reiserfs_journal *journal = SB_JOURNAL(sb); 4300 struct reiserfs_journal *journal = SB_JOURNAL(sb);
4288 if (test_bit(J_ABORTED, &journal->j_state)) 4301 if (test_bit(J_ABORTED, &journal->j_state))
4289 return; 4302 return;
4290 4303
4291 printk(KERN_CRIT "REISERFS: Aborting journal for filesystem on %s\n", 4304 if (!journal->j_errno)
4292 reiserfs_bdevname(sb)); 4305 journal->j_errno = errno;
4293 4306
4294 sb->s_flags |= MS_RDONLY; 4307 sb->s_flags |= MS_RDONLY;
4295 set_bit(J_ABORTED, &journal->j_state); 4308 set_bit(J_ABORTED, &journal->j_state);
@@ -4299,19 +4312,3 @@ static void __reiserfs_journal_abort_hard(struct super_block *sb)
4299#endif 4312#endif
4300} 4313}
4301 4314
4302static void __reiserfs_journal_abort_soft(struct super_block *sb, int errno)
4303{
4304 struct reiserfs_journal *journal = SB_JOURNAL(sb);
4305 if (test_bit(J_ABORTED, &journal->j_state))
4306 return;
4307
4308 if (!journal->j_errno)
4309 journal->j_errno = errno;
4310
4311 __reiserfs_journal_abort_hard(sb);
4312}
4313
4314void reiserfs_journal_abort(struct super_block *sb, int errno)
4315{
4316 __reiserfs_journal_abort_soft(sb, errno);
4317}
diff --git a/fs/reiserfs/lbalance.c b/fs/reiserfs/lbalance.c
index 6de060a6aa7f..381750a155f6 100644
--- a/fs/reiserfs/lbalance.c
+++ b/fs/reiserfs/lbalance.c
@@ -111,7 +111,7 @@ static void leaf_copy_dir_entries(struct buffer_info *dest_bi,
111 item_num_in_dest = 111 item_num_in_dest =
112 (last_first == FIRST_TO_LAST) ? (B_NR_ITEMS(dest) - 1) : 0; 112 (last_first == FIRST_TO_LAST) ? (B_NR_ITEMS(dest) - 1) : 0;
113 113
114 leaf_paste_entries(dest_bi->bi_bh, item_num_in_dest, 114 leaf_paste_entries(dest_bi, item_num_in_dest,
115 (last_first == 115 (last_first ==
116 FIRST_TO_LAST) ? I_ENTRY_COUNT(B_N_PITEM_HEAD(dest, 116 FIRST_TO_LAST) ? I_ENTRY_COUNT(B_N_PITEM_HEAD(dest,
117 item_num_in_dest)) 117 item_num_in_dest))
@@ -119,8 +119,8 @@ static void leaf_copy_dir_entries(struct buffer_info *dest_bi,
119 DEH_SIZE * copy_count + copy_records_len); 119 DEH_SIZE * copy_count + copy_records_len);
120} 120}
121 121
122/* Copy the first (if last_first == FIRST_TO_LAST) or last (last_first == LAST_TO_FIRST) item or 122/* Copy the first (if last_first == FIRST_TO_LAST) or last (last_first == LAST_TO_FIRST) item or
123 part of it or nothing (see the return 0 below) from SOURCE to the end 123 part of it or nothing (see the return 0 below) from SOURCE to the end
124 (if last_first) or beginning (!last_first) of the DEST */ 124 (if last_first) or beginning (!last_first) of the DEST */
125/* returns 1 if anything was copied, else 0 */ 125/* returns 1 if anything was copied, else 0 */
126static int leaf_copy_boundary_item(struct buffer_info *dest_bi, 126static int leaf_copy_boundary_item(struct buffer_info *dest_bi,
@@ -168,10 +168,11 @@ static int leaf_copy_boundary_item(struct buffer_info *dest_bi,
168 if (bytes_or_entries == ih_item_len(ih) 168 if (bytes_or_entries == ih_item_len(ih)
169 && is_indirect_le_ih(ih)) 169 && is_indirect_le_ih(ih))
170 if (get_ih_free_space(ih)) 170 if (get_ih_free_space(ih))
171 reiserfs_panic(NULL, 171 reiserfs_panic(sb_from_bi(dest_bi),
172 "vs-10020: leaf_copy_boundary_item: " 172 "vs-10020",
173 "last unformatted node must be filled entirely (%h)", 173 "last unformatted node "
174 ih); 174 "must be filled "
175 "entirely (%h)", ih);
175 } 176 }
176#endif 177#endif
177 178
@@ -395,7 +396,7 @@ static void leaf_item_bottle(struct buffer_info *dest_bi,
395 else { 396 else {
396 struct item_head n_ih; 397 struct item_head n_ih;
397 398
398 /* copy part of the body of the item number 'item_num' of SOURCE to the end of the DEST 399 /* copy part of the body of the item number 'item_num' of SOURCE to the end of the DEST
399 part defined by 'cpy_bytes'; create new item header; change old item_header (????); 400 part defined by 'cpy_bytes'; create new item header; change old item_header (????);
400 n_ih = new item_header; 401 n_ih = new item_header;
401 */ 402 */
@@ -425,7 +426,7 @@ static void leaf_item_bottle(struct buffer_info *dest_bi,
425 else { 426 else {
426 struct item_head n_ih; 427 struct item_head n_ih;
427 428
428 /* copy part of the body of the item number 'item_num' of SOURCE to the begin of the DEST 429 /* copy part of the body of the item number 'item_num' of SOURCE to the begin of the DEST
429 part defined by 'cpy_bytes'; create new item header; 430 part defined by 'cpy_bytes'; create new item header;
430 n_ih = new item_header; 431 n_ih = new item_header;
431 */ 432 */
@@ -622,9 +623,8 @@ static void leaf_define_dest_src_infos(int shift_mode, struct tree_balance *tb,
622 break; 623 break;
623 624
624 default: 625 default:
625 reiserfs_panic(NULL, 626 reiserfs_panic(sb_from_bi(src_bi), "vs-10250",
626 "vs-10250: leaf_define_dest_src_infos: shift type is unknown (%d)", 627 "shift type is unknown (%d)", shift_mode);
627 shift_mode);
628 } 628 }
629 RFALSE(!src_bi->bi_bh || !dest_bi->bi_bh, 629 RFALSE(!src_bi->bi_bh || !dest_bi->bi_bh,
630 "vs-10260: mode==%d, source (%p) or dest (%p) buffer is initialized incorrectly", 630 "vs-10260: mode==%d, source (%p) or dest (%p) buffer is initialized incorrectly",
@@ -674,9 +674,9 @@ int leaf_shift_left(struct tree_balance *tb, int shift_num, int shift_bytes)
674#ifdef CONFIG_REISERFS_CHECK 674#ifdef CONFIG_REISERFS_CHECK
675 if (tb->tb_mode == M_PASTE || tb->tb_mode == M_INSERT) { 675 if (tb->tb_mode == M_PASTE || tb->tb_mode == M_INSERT) {
676 print_cur_tb("vs-10275"); 676 print_cur_tb("vs-10275");
677 reiserfs_panic(tb->tb_sb, 677 reiserfs_panic(tb->tb_sb, "vs-10275",
678 "vs-10275: leaf_shift_left: balance condition corrupted (%c)", 678 "balance condition corrupted "
679 tb->tb_mode); 679 "(%c)", tb->tb_mode);
680 } 680 }
681#endif 681#endif
682 682
@@ -724,7 +724,7 @@ int leaf_shift_right(struct tree_balance *tb, int shift_num, int shift_bytes)
724static void leaf_delete_items_entirely(struct buffer_info *bi, 724static void leaf_delete_items_entirely(struct buffer_info *bi,
725 int first, int del_num); 725 int first, int del_num);
726/* If del_bytes == -1, starting from position 'first' delete del_num items in whole in buffer CUR. 726/* If del_bytes == -1, starting from position 'first' delete del_num items in whole in buffer CUR.
727 If not. 727 If not.
728 If last_first == 0. Starting from position 'first' delete del_num-1 items in whole. Delete part of body of 728 If last_first == 0. Starting from position 'first' delete del_num-1 items in whole. Delete part of body of
729 the first item. Part defined by del_bytes. Don't delete first item header 729 the first item. Part defined by del_bytes. Don't delete first item header
730 If last_first == 1. Starting from position 'first+1' delete del_num-1 items in whole. Delete part of body of 730 If last_first == 1. Starting from position 'first+1' delete del_num-1 items in whole. Delete part of body of
@@ -783,7 +783,7 @@ void leaf_delete_items(struct buffer_info *cur_bi, int last_first,
783 /* len = body len of item */ 783 /* len = body len of item */
784 len = ih_item_len(ih); 784 len = ih_item_len(ih);
785 785
786 /* delete the part of the last item of the bh 786 /* delete the part of the last item of the bh
787 do not delete item header 787 do not delete item header
788 */ 788 */
789 leaf_cut_from_buffer(cur_bi, B_NR_ITEMS(bh) - 1, 789 leaf_cut_from_buffer(cur_bi, B_NR_ITEMS(bh) - 1,
@@ -865,7 +865,7 @@ void leaf_insert_into_buf(struct buffer_info *bi, int before,
865 } 865 }
866} 866}
867 867
868/* paste paste_size bytes to affected_item_num-th item. 868/* paste paste_size bytes to affected_item_num-th item.
869 When item is a directory, this only prepare space for new entries */ 869 When item is a directory, this only prepare space for new entries */
870void leaf_paste_in_buffer(struct buffer_info *bi, int affected_item_num, 870void leaf_paste_in_buffer(struct buffer_info *bi, int affected_item_num,
871 int pos_in_item, int paste_size, 871 int pos_in_item, int paste_size,
@@ -889,9 +889,12 @@ void leaf_paste_in_buffer(struct buffer_info *bi, int affected_item_num,
889 889
890#ifdef CONFIG_REISERFS_CHECK 890#ifdef CONFIG_REISERFS_CHECK
891 if (zeros_number > paste_size) { 891 if (zeros_number > paste_size) {
892 struct super_block *sb = NULL;
893 if (bi && bi->tb)
894 sb = bi->tb->tb_sb;
892 print_cur_tb("10177"); 895 print_cur_tb("10177");
893 reiserfs_panic(NULL, 896 reiserfs_panic(sb, "vs-10177",
894 "vs-10177: leaf_paste_in_buffer: ero number == %d, paste_size == %d", 897 "zeros_number == %d, paste_size == %d",
895 zeros_number, paste_size); 898 zeros_number, paste_size);
896 } 899 }
897#endif /* CONFIG_REISERFS_CHECK */ 900#endif /* CONFIG_REISERFS_CHECK */
@@ -1019,7 +1022,7 @@ static int leaf_cut_entries(struct buffer_head *bh,
1019/* when cut item is part of regular file 1022/* when cut item is part of regular file
1020 pos_in_item - first byte that must be cut 1023 pos_in_item - first byte that must be cut
1021 cut_size - number of bytes to be cut beginning from pos_in_item 1024 cut_size - number of bytes to be cut beginning from pos_in_item
1022 1025
1023 when cut item is part of directory 1026 when cut item is part of directory
1024 pos_in_item - number of first deleted entry 1027 pos_in_item - number of first deleted entry
1025 cut_size - count of deleted entries 1028 cut_size - count of deleted entries
@@ -1191,7 +1194,7 @@ static void leaf_delete_items_entirely(struct buffer_info *bi,
1191} 1194}
1192 1195
1193/* paste new_entry_count entries (new_dehs, records) into position before to item_num-th item */ 1196/* paste new_entry_count entries (new_dehs, records) into position before to item_num-th item */
1194void leaf_paste_entries(struct buffer_head *bh, 1197void leaf_paste_entries(struct buffer_info *bi,
1195 int item_num, 1198 int item_num,
1196 int before, 1199 int before,
1197 int new_entry_count, 1200 int new_entry_count,
@@ -1203,6 +1206,7 @@ void leaf_paste_entries(struct buffer_head *bh,
1203 struct reiserfs_de_head *deh; 1206 struct reiserfs_de_head *deh;
1204 char *insert_point; 1207 char *insert_point;
1205 int i, old_entry_num; 1208 int i, old_entry_num;
1209 struct buffer_head *bh = bi->bi_bh;
1206 1210
1207 if (new_entry_count == 0) 1211 if (new_entry_count == 0)
1208 return; 1212 return;
@@ -1271,7 +1275,7 @@ void leaf_paste_entries(struct buffer_head *bh,
1271 /* change item key if necessary (when we paste before 0-th entry */ 1275 /* change item key if necessary (when we paste before 0-th entry */
1272 if (!before) { 1276 if (!before) {
1273 set_le_ih_k_offset(ih, deh_offset(new_dehs)); 1277 set_le_ih_k_offset(ih, deh_offset(new_dehs));
1274/* memcpy (&ih->ih_key.k_offset, 1278/* memcpy (&ih->ih_key.k_offset,
1275 &new_dehs->deh_offset, SHORT_KEY_SIZE);*/ 1279 &new_dehs->deh_offset, SHORT_KEY_SIZE);*/
1276 } 1280 }
1277#ifdef CONFIG_REISERFS_CHECK 1281#ifdef CONFIG_REISERFS_CHECK
@@ -1287,13 +1291,17 @@ void leaf_paste_entries(struct buffer_head *bh,
1287 prev = (i != 0) ? deh_location(&(deh[i - 1])) : 0; 1291 prev = (i != 0) ? deh_location(&(deh[i - 1])) : 0;
1288 1292
1289 if (prev && prev <= deh_location(&(deh[i]))) 1293 if (prev && prev <= deh_location(&(deh[i])))
1290 reiserfs_warning(NULL, 1294 reiserfs_error(sb_from_bi(bi), "vs-10240",
1291 "vs-10240: leaf_paste_entries: directory item (%h) corrupted (prev %a, cur(%d) %a)", 1295 "directory item (%h) "
1292 ih, deh + i - 1, i, deh + i); 1296 "corrupted (prev %a, "
1297 "cur(%d) %a)",
1298 ih, deh + i - 1, i, deh + i);
1293 if (next && next >= deh_location(&(deh[i]))) 1299 if (next && next >= deh_location(&(deh[i])))
1294 reiserfs_warning(NULL, 1300 reiserfs_error(sb_from_bi(bi), "vs-10250",
1295 "vs-10250: leaf_paste_entries: directory item (%h) corrupted (cur(%d) %a, next %a)", 1301 "directory item (%h) "
1296 ih, i, deh + i, deh + i + 1); 1302 "corrupted (cur(%d) %a, "
1303 "next %a)",
1304 ih, i, deh + i, deh + i + 1);
1297 } 1305 }
1298 } 1306 }
1299#endif 1307#endif
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 639d635d9d4b..efd4d720718e 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -106,7 +106,7 @@ key of the first directory entry in it.
106This function first calls search_by_key, then, if item whose first 106This function first calls search_by_key, then, if item whose first
107entry matches is not found it looks for the entry inside directory 107entry matches is not found it looks for the entry inside directory
108item found by search_by_key. Fills the path to the entry, and to the 108item found by search_by_key. Fills the path to the entry, and to the
109entry position in the item 109entry position in the item
110 110
111*/ 111*/
112 112
@@ -120,8 +120,8 @@ int search_by_entry_key(struct super_block *sb, const struct cpu_key *key,
120 switch (retval) { 120 switch (retval) {
121 case ITEM_NOT_FOUND: 121 case ITEM_NOT_FOUND:
122 if (!PATH_LAST_POSITION(path)) { 122 if (!PATH_LAST_POSITION(path)) {
123 reiserfs_warning(sb, 123 reiserfs_error(sb, "vs-7000", "search_by_key "
124 "vs-7000: search_by_entry_key: search_by_key returned item position == 0"); 124 "returned item position == 0");
125 pathrelse(path); 125 pathrelse(path);
126 return IO_ERROR; 126 return IO_ERROR;
127 } 127 }
@@ -135,8 +135,7 @@ int search_by_entry_key(struct super_block *sb, const struct cpu_key *key,
135 135
136 default: 136 default:
137 pathrelse(path); 137 pathrelse(path);
138 reiserfs_warning(sb, 138 reiserfs_error(sb, "vs-7002", "no path to here");
139 "vs-7002: search_by_entry_key: no path to here");
140 return IO_ERROR; 139 return IO_ERROR;
141 } 140 }
142 141
@@ -146,10 +145,9 @@ int search_by_entry_key(struct super_block *sb, const struct cpu_key *key,
146 if (!is_direntry_le_ih(de->de_ih) || 145 if (!is_direntry_le_ih(de->de_ih) ||
147 COMP_SHORT_KEYS(&(de->de_ih->ih_key), key)) { 146 COMP_SHORT_KEYS(&(de->de_ih->ih_key), key)) {
148 print_block(de->de_bh, 0, -1, -1); 147 print_block(de->de_bh, 0, -1, -1);
149 reiserfs_panic(sb, 148 reiserfs_panic(sb, "vs-7005", "found item %h is not directory "
150 "vs-7005: search_by_entry_key: found item %h is not directory item or " 149 "item or does not belong to the same directory "
151 "does not belong to the same directory as key %K", 150 "as key %K", de->de_ih, key);
152 de->de_ih, key);
153 } 151 }
154#endif /* CONFIG_REISERFS_CHECK */ 152#endif /* CONFIG_REISERFS_CHECK */
155 153
@@ -300,8 +298,7 @@ static int reiserfs_find_entry(struct inode *dir, const char *name, int namelen,
300 search_by_entry_key(dir->i_sb, &key_to_search, 298 search_by_entry_key(dir->i_sb, &key_to_search,
301 path_to_entry, de); 299 path_to_entry, de);
302 if (retval == IO_ERROR) { 300 if (retval == IO_ERROR) {
303 reiserfs_warning(dir->i_sb, "zam-7001: io error in %s", 301 reiserfs_error(dir->i_sb, "zam-7001", "io error");
304 __func__);
305 return IO_ERROR; 302 return IO_ERROR;
306 } 303 }
307 304
@@ -361,9 +358,10 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry,
361 return ERR_PTR(-EACCES); 358 return ERR_PTR(-EACCES);
362 } 359 }
363 360
364 /* Propogate the priv_object flag so we know we're in the priv tree */ 361 /* Propagate the private flag so we know we're
365 if (is_reiserfs_priv_object(dir)) 362 * in the priv tree */
366 reiserfs_mark_inode_private(inode); 363 if (IS_PRIVATE(dir))
364 inode->i_flags |= S_PRIVATE;
367 } 365 }
368 reiserfs_write_unlock(dir->i_sb); 366 reiserfs_write_unlock(dir->i_sb);
369 if (retval == IO_ERROR) { 367 if (retval == IO_ERROR) {
@@ -373,7 +371,7 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry,
373 return d_splice_alias(inode, dentry); 371 return d_splice_alias(inode, dentry);
374} 372}
375 373
376/* 374/*
377** looks up the dentry of the parent directory for child. 375** looks up the dentry of the parent directory for child.
378** taken from ext2_get_parent 376** taken from ext2_get_parent
379*/ 377*/
@@ -403,7 +401,7 @@ struct dentry *reiserfs_get_parent(struct dentry *child)
403 return d_obtain_alias(inode); 401 return d_obtain_alias(inode);
404} 402}
405 403
406/* add entry to the directory (entry can be hidden). 404/* add entry to the directory (entry can be hidden).
407 405
408insert definition of when hidden directories are used here -Hans 406insert definition of when hidden directories are used here -Hans
409 407
@@ -484,10 +482,9 @@ static int reiserfs_add_entry(struct reiserfs_transaction_handle *th,
484 } 482 }
485 483
486 if (retval != NAME_FOUND) { 484 if (retval != NAME_FOUND) {
487 reiserfs_warning(dir->i_sb, 485 reiserfs_error(dir->i_sb, "zam-7002",
488 "zam-7002:%s: \"reiserfs_find_entry\" " 486 "reiserfs_find_entry() returned "
489 "has returned unexpected value (%d)", 487 "unexpected value (%d)", retval);
490 __func__, retval);
491 } 488 }
492 489
493 return -EEXIST; 490 return -EEXIST;
@@ -498,8 +495,9 @@ static int reiserfs_add_entry(struct reiserfs_transaction_handle *th,
498 MAX_GENERATION_NUMBER + 1); 495 MAX_GENERATION_NUMBER + 1);
499 if (gen_number > MAX_GENERATION_NUMBER) { 496 if (gen_number > MAX_GENERATION_NUMBER) {
500 /* there is no free generation number */ 497 /* there is no free generation number */
501 reiserfs_warning(dir->i_sb, 498 reiserfs_warning(dir->i_sb, "reiserfs-7010",
502 "reiserfs_add_entry: Congratulations! we have got hash function screwed up"); 499 "Congratulations! we have got hash function "
500 "screwed up");
503 if (buffer != small_buf) 501 if (buffer != small_buf)
504 kfree(buffer); 502 kfree(buffer);
505 pathrelse(&path); 503 pathrelse(&path);
@@ -515,10 +513,9 @@ static int reiserfs_add_entry(struct reiserfs_transaction_handle *th,
515 if (gen_number != 0) { /* we need to re-search for the insertion point */ 513 if (gen_number != 0) { /* we need to re-search for the insertion point */
516 if (search_by_entry_key(dir->i_sb, &entry_key, &path, &de) != 514 if (search_by_entry_key(dir->i_sb, &entry_key, &path, &de) !=
517 NAME_NOT_FOUND) { 515 NAME_NOT_FOUND) {
518 reiserfs_warning(dir->i_sb, 516 reiserfs_warning(dir->i_sb, "vs-7032",
519 "vs-7032: reiserfs_add_entry: " 517 "entry with this key (%K) already "
520 "entry with this key (%K) already exists", 518 "exists", &entry_key);
521 &entry_key);
522 519
523 if (buffer != small_buf) 520 if (buffer != small_buf)
524 kfree(buffer); 521 kfree(buffer);
@@ -562,7 +559,7 @@ static int drop_new_inode(struct inode *inode)
562 return 0; 559 return 0;
563} 560}
564 561
565/* utility function that does setup for reiserfs_new_inode. 562/* utility function that does setup for reiserfs_new_inode.
566** vfs_dq_init needs lots of credits so it's better to have it 563** vfs_dq_init needs lots of credits so it's better to have it
567** outside of a transaction, so we had to pull some bits of 564** outside of a transaction, so we had to pull some bits of
568** reiserfs_new_inode out into this func. 565** reiserfs_new_inode out into this func.
@@ -601,20 +598,22 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, int mode,
601 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) + 598 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) +
602 REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb)); 599 REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb));
603 struct reiserfs_transaction_handle th; 600 struct reiserfs_transaction_handle th;
604 int locked; 601 struct reiserfs_security_handle security;
605 602
606 if (!(inode = new_inode(dir->i_sb))) { 603 if (!(inode = new_inode(dir->i_sb))) {
607 return -ENOMEM; 604 return -ENOMEM;
608 } 605 }
609 new_inode_init(inode, dir, mode); 606 new_inode_init(inode, dir, mode);
610 607
611 locked = reiserfs_cache_default_acl(dir); 608 jbegin_count += reiserfs_cache_default_acl(dir);
612 609 retval = reiserfs_security_init(dir, inode, &security);
610 if (retval < 0) {
611 drop_new_inode(inode);
612 return retval;
613 }
614 jbegin_count += retval;
613 reiserfs_write_lock(dir->i_sb); 615 reiserfs_write_lock(dir->i_sb);
614 616
615 if (locked)
616 reiserfs_write_lock_xattrs(dir->i_sb);
617
618 retval = journal_begin(&th, dir->i_sb, jbegin_count); 617 retval = journal_begin(&th, dir->i_sb, jbegin_count);
619 if (retval) { 618 if (retval) {
620 drop_new_inode(inode); 619 drop_new_inode(inode);
@@ -623,15 +622,10 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, int mode,
623 622
624 retval = 623 retval =
625 reiserfs_new_inode(&th, dir, mode, NULL, 0 /*i_size */ , dentry, 624 reiserfs_new_inode(&th, dir, mode, NULL, 0 /*i_size */ , dentry,
626 inode); 625 inode, &security);
627 if (retval) 626 if (retval)
628 goto out_failed; 627 goto out_failed;
629 628
630 if (locked) {
631 reiserfs_write_unlock_xattrs(dir->i_sb);
632 locked = 0;
633 }
634
635 inode->i_op = &reiserfs_file_inode_operations; 629 inode->i_op = &reiserfs_file_inode_operations;
636 inode->i_fop = &reiserfs_file_operations; 630 inode->i_fop = &reiserfs_file_operations;
637 inode->i_mapping->a_ops = &reiserfs_address_space_operations; 631 inode->i_mapping->a_ops = &reiserfs_address_space_operations;
@@ -658,8 +652,6 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, int mode,
658 retval = journal_end(&th, dir->i_sb, jbegin_count); 652 retval = journal_end(&th, dir->i_sb, jbegin_count);
659 653
660 out_failed: 654 out_failed:
661 if (locked)
662 reiserfs_write_unlock_xattrs(dir->i_sb);
663 reiserfs_write_unlock(dir->i_sb); 655 reiserfs_write_unlock(dir->i_sb);
664 return retval; 656 return retval;
665} 657}
@@ -670,12 +662,12 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
670 int retval; 662 int retval;
671 struct inode *inode; 663 struct inode *inode;
672 struct reiserfs_transaction_handle th; 664 struct reiserfs_transaction_handle th;
665 struct reiserfs_security_handle security;
673 /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ 666 /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */
674 int jbegin_count = 667 int jbegin_count =
675 JOURNAL_PER_BALANCE_CNT * 3 + 668 JOURNAL_PER_BALANCE_CNT * 3 +
676 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) + 669 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) +
677 REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb)); 670 REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb));
678 int locked;
679 671
680 if (!new_valid_dev(rdev)) 672 if (!new_valid_dev(rdev))
681 return -EINVAL; 673 return -EINVAL;
@@ -685,13 +677,15 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
685 } 677 }
686 new_inode_init(inode, dir, mode); 678 new_inode_init(inode, dir, mode);
687 679
688 locked = reiserfs_cache_default_acl(dir); 680 jbegin_count += reiserfs_cache_default_acl(dir);
689 681 retval = reiserfs_security_init(dir, inode, &security);
682 if (retval < 0) {
683 drop_new_inode(inode);
684 return retval;
685 }
686 jbegin_count += retval;
690 reiserfs_write_lock(dir->i_sb); 687 reiserfs_write_lock(dir->i_sb);
691 688
692 if (locked)
693 reiserfs_write_lock_xattrs(dir->i_sb);
694
695 retval = journal_begin(&th, dir->i_sb, jbegin_count); 689 retval = journal_begin(&th, dir->i_sb, jbegin_count);
696 if (retval) { 690 if (retval) {
697 drop_new_inode(inode); 691 drop_new_inode(inode);
@@ -700,16 +694,11 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
700 694
701 retval = 695 retval =
702 reiserfs_new_inode(&th, dir, mode, NULL, 0 /*i_size */ , dentry, 696 reiserfs_new_inode(&th, dir, mode, NULL, 0 /*i_size */ , dentry,
703 inode); 697 inode, &security);
704 if (retval) { 698 if (retval) {
705 goto out_failed; 699 goto out_failed;
706 } 700 }
707 701
708 if (locked) {
709 reiserfs_write_unlock_xattrs(dir->i_sb);
710 locked = 0;
711 }
712
713 inode->i_op = &reiserfs_special_inode_operations; 702 inode->i_op = &reiserfs_special_inode_operations;
714 init_special_inode(inode, inode->i_mode, rdev); 703 init_special_inode(inode, inode->i_mode, rdev);
715 704
@@ -739,8 +728,6 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
739 retval = journal_end(&th, dir->i_sb, jbegin_count); 728 retval = journal_end(&th, dir->i_sb, jbegin_count);
740 729
741 out_failed: 730 out_failed:
742 if (locked)
743 reiserfs_write_unlock_xattrs(dir->i_sb);
744 reiserfs_write_unlock(dir->i_sb); 731 reiserfs_write_unlock(dir->i_sb);
745 return retval; 732 return retval;
746} 733}
@@ -750,12 +737,12 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
750 int retval; 737 int retval;
751 struct inode *inode; 738 struct inode *inode;
752 struct reiserfs_transaction_handle th; 739 struct reiserfs_transaction_handle th;
740 struct reiserfs_security_handle security;
753 /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ 741 /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */
754 int jbegin_count = 742 int jbegin_count =
755 JOURNAL_PER_BALANCE_CNT * 3 + 743 JOURNAL_PER_BALANCE_CNT * 3 +
756 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) + 744 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) +
757 REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb)); 745 REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb));
758 int locked;
759 746
760#ifdef DISPLACE_NEW_PACKING_LOCALITIES 747#ifdef DISPLACE_NEW_PACKING_LOCALITIES
761 /* set flag that new packing locality created and new blocks for the content * of that directory are not displaced yet */ 748 /* set flag that new packing locality created and new blocks for the content * of that directory are not displaced yet */
@@ -767,11 +754,14 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
767 } 754 }
768 new_inode_init(inode, dir, mode); 755 new_inode_init(inode, dir, mode);
769 756
770 locked = reiserfs_cache_default_acl(dir); 757 jbegin_count += reiserfs_cache_default_acl(dir);
771 758 retval = reiserfs_security_init(dir, inode, &security);
759 if (retval < 0) {
760 drop_new_inode(inode);
761 return retval;
762 }
763 jbegin_count += retval;
772 reiserfs_write_lock(dir->i_sb); 764 reiserfs_write_lock(dir->i_sb);
773 if (locked)
774 reiserfs_write_lock_xattrs(dir->i_sb);
775 765
776 retval = journal_begin(&th, dir->i_sb, jbegin_count); 766 retval = journal_begin(&th, dir->i_sb, jbegin_count);
777 if (retval) { 767 if (retval) {
@@ -787,17 +777,12 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
787 retval = reiserfs_new_inode(&th, dir, mode, NULL /*symlink */ , 777 retval = reiserfs_new_inode(&th, dir, mode, NULL /*symlink */ ,
788 old_format_only(dir->i_sb) ? 778 old_format_only(dir->i_sb) ?
789 EMPTY_DIR_SIZE_V1 : EMPTY_DIR_SIZE, 779 EMPTY_DIR_SIZE_V1 : EMPTY_DIR_SIZE,
790 dentry, inode); 780 dentry, inode, &security);
791 if (retval) { 781 if (retval) {
792 dir->i_nlink--; 782 dir->i_nlink--;
793 goto out_failed; 783 goto out_failed;
794 } 784 }
795 785
796 if (locked) {
797 reiserfs_write_unlock_xattrs(dir->i_sb);
798 locked = 0;
799 }
800
801 reiserfs_update_inode_transaction(inode); 786 reiserfs_update_inode_transaction(inode);
802 reiserfs_update_inode_transaction(dir); 787 reiserfs_update_inode_transaction(dir);
803 788
@@ -827,8 +812,6 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
827 unlock_new_inode(inode); 812 unlock_new_inode(inode);
828 retval = journal_end(&th, dir->i_sb, jbegin_count); 813 retval = journal_end(&th, dir->i_sb, jbegin_count);
829 out_failed: 814 out_failed:
830 if (locked)
831 reiserfs_write_unlock_xattrs(dir->i_sb);
832 reiserfs_write_unlock(dir->i_sb); 815 reiserfs_write_unlock(dir->i_sb);
833 return retval; 816 return retval;
834} 817}
@@ -837,7 +820,7 @@ static inline int reiserfs_empty_dir(struct inode *inode)
837{ 820{
838 /* we can cheat because an old format dir cannot have 821 /* we can cheat because an old format dir cannot have
839 ** EMPTY_DIR_SIZE, and a new format dir cannot have 822 ** EMPTY_DIR_SIZE, and a new format dir cannot have
840 ** EMPTY_DIR_SIZE_V1. So, if the inode is either size, 823 ** EMPTY_DIR_SIZE_V1. So, if the inode is either size,
841 ** regardless of disk format version, the directory is empty. 824 ** regardless of disk format version, the directory is empty.
842 */ 825 */
843 if (inode->i_size != EMPTY_DIR_SIZE && 826 if (inode->i_size != EMPTY_DIR_SIZE &&
@@ -903,8 +886,9 @@ static int reiserfs_rmdir(struct inode *dir, struct dentry *dentry)
903 goto end_rmdir; 886 goto end_rmdir;
904 887
905 if (inode->i_nlink != 2 && inode->i_nlink != 1) 888 if (inode->i_nlink != 2 && inode->i_nlink != 1)
906 reiserfs_warning(inode->i_sb, "%s: empty directory has nlink " 889 reiserfs_error(inode->i_sb, "reiserfs-7040",
907 "!= 2 (%d)", __func__, inode->i_nlink); 890 "empty directory has nlink != 2 (%d)",
891 inode->i_nlink);
908 892
909 clear_nlink(inode); 893 clear_nlink(inode);
910 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; 894 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
@@ -980,10 +964,9 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
980 } 964 }
981 965
982 if (!inode->i_nlink) { 966 if (!inode->i_nlink) {
983 reiserfs_warning(inode->i_sb, "%s: deleting nonexistent file " 967 reiserfs_warning(inode->i_sb, "reiserfs-7042",
984 "(%s:%lu), %d", __func__, 968 "deleting nonexistent file (%lu), %d",
985 reiserfs_bdevname(inode->i_sb), inode->i_ino, 969 inode->i_ino, inode->i_nlink);
986 inode->i_nlink);
987 inode->i_nlink = 1; 970 inode->i_nlink = 1;
988 } 971 }
989 972
@@ -1037,6 +1020,7 @@ static int reiserfs_symlink(struct inode *parent_dir,
1037 char *name; 1020 char *name;
1038 int item_len; 1021 int item_len;
1039 struct reiserfs_transaction_handle th; 1022 struct reiserfs_transaction_handle th;
1023 struct reiserfs_security_handle security;
1040 int mode = S_IFLNK | S_IRWXUGO; 1024 int mode = S_IFLNK | S_IRWXUGO;
1041 /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ 1025 /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */
1042 int jbegin_count = 1026 int jbegin_count =
@@ -1049,6 +1033,13 @@ static int reiserfs_symlink(struct inode *parent_dir,
1049 } 1033 }
1050 new_inode_init(inode, parent_dir, mode); 1034 new_inode_init(inode, parent_dir, mode);
1051 1035
1036 retval = reiserfs_security_init(parent_dir, inode, &security);
1037 if (retval < 0) {
1038 drop_new_inode(inode);
1039 return retval;
1040 }
1041 jbegin_count += retval;
1042
1052 reiserfs_write_lock(parent_dir->i_sb); 1043 reiserfs_write_lock(parent_dir->i_sb);
1053 item_len = ROUND_UP(strlen(symname)); 1044 item_len = ROUND_UP(strlen(symname));
1054 if (item_len > MAX_DIRECT_ITEM_LEN(parent_dir->i_sb->s_blocksize)) { 1045 if (item_len > MAX_DIRECT_ITEM_LEN(parent_dir->i_sb->s_blocksize)) {
@@ -1066,8 +1057,6 @@ static int reiserfs_symlink(struct inode *parent_dir,
1066 memcpy(name, symname, strlen(symname)); 1057 memcpy(name, symname, strlen(symname));
1067 padd_item(name, item_len, strlen(symname)); 1058 padd_item(name, item_len, strlen(symname));
1068 1059
1069 /* We would inherit the default ACL here, but symlinks don't get ACLs */
1070
1071 retval = journal_begin(&th, parent_dir->i_sb, jbegin_count); 1060 retval = journal_begin(&th, parent_dir->i_sb, jbegin_count);
1072 if (retval) { 1061 if (retval) {
1073 drop_new_inode(inode); 1062 drop_new_inode(inode);
@@ -1077,7 +1066,7 @@ static int reiserfs_symlink(struct inode *parent_dir,
1077 1066
1078 retval = 1067 retval =
1079 reiserfs_new_inode(&th, parent_dir, mode, name, strlen(symname), 1068 reiserfs_new_inode(&th, parent_dir, mode, name, strlen(symname),
1080 dentry, inode); 1069 dentry, inode, &security);
1081 kfree(name); 1070 kfree(name);
1082 if (retval) { /* reiserfs_new_inode iputs for us */ 1071 if (retval) { /* reiserfs_new_inode iputs for us */
1083 goto out_failed; 1072 goto out_failed;
@@ -1173,7 +1162,7 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir,
1173 return retval; 1162 return retval;
1174} 1163}
1175 1164
1176// de contains information pointing to an entry which 1165/* de contains information pointing to an entry which */
1177static int de_still_valid(const char *name, int len, 1166static int de_still_valid(const char *name, int len,
1178 struct reiserfs_dir_entry *de) 1167 struct reiserfs_dir_entry *de)
1179{ 1168{
@@ -1196,15 +1185,14 @@ static int entry_points_to_object(const char *name, int len,
1196 1185
1197 if (inode) { 1186 if (inode) {
1198 if (!de_visible(de->de_deh + de->de_entry_num)) 1187 if (!de_visible(de->de_deh + de->de_entry_num))
1199 reiserfs_panic(NULL, 1188 reiserfs_panic(inode->i_sb, "vs-7042",
1200 "vs-7042: entry_points_to_object: entry must be visible"); 1189 "entry must be visible");
1201 return (de->de_objectid == inode->i_ino) ? 1 : 0; 1190 return (de->de_objectid == inode->i_ino) ? 1 : 0;
1202 } 1191 }
1203 1192
1204 /* this must be added hidden entry */ 1193 /* this must be added hidden entry */
1205 if (de_visible(de->de_deh + de->de_entry_num)) 1194 if (de_visible(de->de_deh + de->de_entry_num))
1206 reiserfs_panic(NULL, 1195 reiserfs_panic(NULL, "vs-7043", "entry must be visible");
1207 "vs-7043: entry_points_to_object: entry must be visible");
1208 1196
1209 return 1; 1197 return 1;
1210} 1198}
@@ -1218,10 +1206,10 @@ static void set_ino_in_dir_entry(struct reiserfs_dir_entry *de,
1218 de->de_deh[de->de_entry_num].deh_objectid = key->k_objectid; 1206 de->de_deh[de->de_entry_num].deh_objectid = key->k_objectid;
1219} 1207}
1220 1208
1221/* 1209/*
1222 * process, that is going to call fix_nodes/do_balance must hold only 1210 * process, that is going to call fix_nodes/do_balance must hold only
1223 * one path. If it holds 2 or more, it can get into endless waiting in 1211 * one path. If it holds 2 or more, it can get into endless waiting in
1224 * get_empty_nodes or its clones 1212 * get_empty_nodes or its clones
1225 */ 1213 */
1226static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry, 1214static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1227 struct inode *new_dir, struct dentry *new_dentry) 1215 struct inode *new_dir, struct dentry *new_dentry)
@@ -1275,7 +1263,7 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1275 1263
1276 old_inode_mode = old_inode->i_mode; 1264 old_inode_mode = old_inode->i_mode;
1277 if (S_ISDIR(old_inode_mode)) { 1265 if (S_ISDIR(old_inode_mode)) {
1278 // make sure, that directory being renamed has correct ".." 1266 // make sure, that directory being renamed has correct ".."
1279 // and that its new parent directory has not too many links 1267 // and that its new parent directory has not too many links
1280 // already 1268 // already
1281 1269
@@ -1286,8 +1274,8 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1286 } 1274 }
1287 } 1275 }
1288 1276
1289 /* directory is renamed, its parent directory will be changed, 1277 /* directory is renamed, its parent directory will be changed,
1290 ** so find ".." entry 1278 ** so find ".." entry
1291 */ 1279 */
1292 dot_dot_de.de_gen_number_bit_string = NULL; 1280 dot_dot_de.de_gen_number_bit_string = NULL;
1293 retval = 1281 retval =
@@ -1318,8 +1306,8 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1318 new_dentry->d_name.len, old_inode, 0); 1306 new_dentry->d_name.len, old_inode, 0);
1319 if (retval == -EEXIST) { 1307 if (retval == -EEXIST) {
1320 if (!new_dentry_inode) { 1308 if (!new_dentry_inode) {
1321 reiserfs_panic(old_dir->i_sb, 1309 reiserfs_panic(old_dir->i_sb, "vs-7050",
1322 "vs-7050: new entry is found, new inode == 0\n"); 1310 "new entry is found, new inode == 0");
1323 } 1311 }
1324 } else if (retval) { 1312 } else if (retval) {
1325 int err = journal_end(&th, old_dir->i_sb, jbegin_count); 1313 int err = journal_end(&th, old_dir->i_sb, jbegin_count);
@@ -1397,9 +1385,9 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1397 this stuff, yes? Then, having 1385 this stuff, yes? Then, having
1398 gathered everything into RAM we 1386 gathered everything into RAM we
1399 should lock the buffers, yes? -Hans */ 1387 should lock the buffers, yes? -Hans */
1400 /* probably. our rename needs to hold more 1388 /* probably. our rename needs to hold more
1401 ** than one path at once. The seals would 1389 ** than one path at once. The seals would
1402 ** have to be written to deal with multi-path 1390 ** have to be written to deal with multi-path
1403 ** issues -chris 1391 ** issues -chris
1404 */ 1392 */
1405 /* sanity checking before doing the rename - avoid races many 1393 /* sanity checking before doing the rename - avoid races many
@@ -1477,7 +1465,7 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1477 } 1465 }
1478 1466
1479 if (S_ISDIR(old_inode_mode)) { 1467 if (S_ISDIR(old_inode_mode)) {
1480 // adjust ".." of renamed directory 1468 /* adjust ".." of renamed directory */
1481 set_ino_in_dir_entry(&dot_dot_de, INODE_PKEY(new_dir)); 1469 set_ino_in_dir_entry(&dot_dot_de, INODE_PKEY(new_dir));
1482 journal_mark_dirty(&th, new_dir->i_sb, dot_dot_de.de_bh); 1470 journal_mark_dirty(&th, new_dir->i_sb, dot_dot_de.de_bh);
1483 1471
@@ -1499,8 +1487,8 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1499 if (reiserfs_cut_from_item 1487 if (reiserfs_cut_from_item
1500 (&th, &old_entry_path, &(old_de.de_entry_key), old_dir, NULL, 1488 (&th, &old_entry_path, &(old_de.de_entry_key), old_dir, NULL,
1501 0) < 0) 1489 0) < 0)
1502 reiserfs_warning(old_dir->i_sb, 1490 reiserfs_error(old_dir->i_sb, "vs-7060",
1503 "vs-7060: reiserfs_rename: couldn't not cut old name. Fsck later?"); 1491 "couldn't not cut old name. Fsck later?");
1504 1492
1505 old_dir->i_size -= DEH_SIZE + old_de.de_entrylen; 1493 old_dir->i_size -= DEH_SIZE + old_de.de_entrylen;
1506 1494
diff --git a/fs/reiserfs/objectid.c b/fs/reiserfs/objectid.c
index ea0cf8c28a99..3a6de810bd61 100644
--- a/fs/reiserfs/objectid.c
+++ b/fs/reiserfs/objectid.c
@@ -18,8 +18,7 @@
18static void check_objectid_map(struct super_block *s, __le32 * map) 18static void check_objectid_map(struct super_block *s, __le32 * map)
19{ 19{
20 if (le32_to_cpu(map[0]) != 1) 20 if (le32_to_cpu(map[0]) != 1)
21 reiserfs_panic(s, 21 reiserfs_panic(s, "vs-15010", "map corrupted: %lx",
22 "vs-15010: check_objectid_map: map corrupted: %lx",
23 (long unsigned int)le32_to_cpu(map[0])); 22 (long unsigned int)le32_to_cpu(map[0]));
24 23
25 // FIXME: add something else here 24 // FIXME: add something else here
@@ -61,7 +60,7 @@ __u32 reiserfs_get_unused_objectid(struct reiserfs_transaction_handle *th)
61 /* comment needed -Hans */ 60 /* comment needed -Hans */
62 unused_objectid = le32_to_cpu(map[1]); 61 unused_objectid = le32_to_cpu(map[1]);
63 if (unused_objectid == U32_MAX) { 62 if (unused_objectid == U32_MAX) {
64 reiserfs_warning(s, "%s: no more object ids", __func__); 63 reiserfs_warning(s, "reiserfs-15100", "no more object ids");
65 reiserfs_restore_prepared_buffer(s, SB_BUFFER_WITH_SB(s)); 64 reiserfs_restore_prepared_buffer(s, SB_BUFFER_WITH_SB(s));
66 return 0; 65 return 0;
67 } 66 }
@@ -160,9 +159,8 @@ void reiserfs_release_objectid(struct reiserfs_transaction_handle *th,
160 i += 2; 159 i += 2;
161 } 160 }
162 161
163 reiserfs_warning(s, 162 reiserfs_error(s, "vs-15011", "tried to free free object id (%lu)",
164 "vs-15011: reiserfs_release_objectid: tried to free free object id (%lu)", 163 (long unsigned)objectid_to_release);
165 (long unsigned)objectid_to_release);
166} 164}
167 165
168int reiserfs_convert_objectid_map_v1(struct super_block *s) 166int reiserfs_convert_objectid_map_v1(struct super_block *s)
@@ -182,7 +180,7 @@ int reiserfs_convert_objectid_map_v1(struct super_block *s)
182 180
183 if (cur_size > new_size) { 181 if (cur_size > new_size) {
184 /* mark everyone used that was listed as free at the end of the objectid 182 /* mark everyone used that was listed as free at the end of the objectid
185 ** map 183 ** map
186 */ 184 */
187 objectid_map[new_size - 1] = objectid_map[cur_size - 1]; 185 objectid_map[new_size - 1] = objectid_map[cur_size - 1];
188 set_sb_oid_cursize(disk_sb, new_size); 186 set_sb_oid_cursize(disk_sb, new_size);
diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c
index 740bb8c0c1ae..536eacaeb710 100644
--- a/fs/reiserfs/prints.c
+++ b/fs/reiserfs/prints.c
@@ -157,19 +157,16 @@ static void sprintf_disk_child(char *buf, struct disk_child *dc)
157 dc_size(dc)); 157 dc_size(dc));
158} 158}
159 159
160static char *is_there_reiserfs_struct(char *fmt, int *what, int *skip) 160static char *is_there_reiserfs_struct(char *fmt, int *what)
161{ 161{
162 char *k = fmt; 162 char *k = fmt;
163 163
164 *skip = 0;
165
166 while ((k = strchr(k, '%')) != NULL) { 164 while ((k = strchr(k, '%')) != NULL) {
167 if (k[1] == 'k' || k[1] == 'K' || k[1] == 'h' || k[1] == 't' || 165 if (k[1] == 'k' || k[1] == 'K' || k[1] == 'h' || k[1] == 't' ||
168 k[1] == 'z' || k[1] == 'b' || k[1] == 'y' || k[1] == 'a') { 166 k[1] == 'z' || k[1] == 'b' || k[1] == 'y' || k[1] == 'a') {
169 *what = k[1]; 167 *what = k[1];
170 break; 168 break;
171 } 169 }
172 (*skip)++;
173 k++; 170 k++;
174 } 171 }
175 return k; 172 return k;
@@ -181,30 +178,29 @@ static char *is_there_reiserfs_struct(char *fmt, int *what, int *skip)
181 appropriative printk. With this reiserfs_warning you can use format 178 appropriative printk. With this reiserfs_warning you can use format
182 specification for complex structures like you used to do with 179 specification for complex structures like you used to do with
183 printfs for integers, doubles and pointers. For instance, to print 180 printfs for integers, doubles and pointers. For instance, to print
184 out key structure you have to write just: 181 out key structure you have to write just:
185 reiserfs_warning ("bad key %k", key); 182 reiserfs_warning ("bad key %k", key);
186 instead of 183 instead of
187 printk ("bad key %lu %lu %lu %lu", key->k_dir_id, key->k_objectid, 184 printk ("bad key %lu %lu %lu %lu", key->k_dir_id, key->k_objectid,
188 key->k_offset, key->k_uniqueness); 185 key->k_offset, key->k_uniqueness);
189*/ 186*/
190 187static DEFINE_SPINLOCK(error_lock);
191static void prepare_error_buf(const char *fmt, va_list args) 188static void prepare_error_buf(const char *fmt, va_list args)
192{ 189{
193 char *fmt1 = fmt_buf; 190 char *fmt1 = fmt_buf;
194 char *k; 191 char *k;
195 char *p = error_buf; 192 char *p = error_buf;
196 int i, j, what, skip; 193 int what;
194
195 spin_lock(&error_lock);
197 196
198 strcpy(fmt1, fmt); 197 strcpy(fmt1, fmt);
199 198
200 while ((k = is_there_reiserfs_struct(fmt1, &what, &skip)) != NULL) { 199 while ((k = is_there_reiserfs_struct(fmt1, &what)) != NULL) {
201 *k = 0; 200 *k = 0;
202 201
203 p += vsprintf(p, fmt1, args); 202 p += vsprintf(p, fmt1, args);
204 203
205 for (i = 0; i < skip; i++)
206 j = va_arg(args, int);
207
208 switch (what) { 204 switch (what) {
209 case 'k': 205 case 'k':
210 sprintf_le_key(p, va_arg(args, struct reiserfs_key *)); 206 sprintf_le_key(p, va_arg(args, struct reiserfs_key *));
@@ -243,15 +239,16 @@ static void prepare_error_buf(const char *fmt, va_list args)
243 fmt1 = k + 2; 239 fmt1 = k + 2;
244 } 240 }
245 vsprintf(p, fmt1, args); 241 vsprintf(p, fmt1, args);
242 spin_unlock(&error_lock);
246 243
247} 244}
248 245
249/* in addition to usual conversion specifiers this accepts reiserfs 246/* in addition to usual conversion specifiers this accepts reiserfs
250 specific conversion specifiers: 247 specific conversion specifiers:
251 %k to print little endian key, 248 %k to print little endian key,
252 %K to print cpu key, 249 %K to print cpu key,
253 %h to print item_head, 250 %h to print item_head,
254 %t to print directory entry 251 %t to print directory entry
255 %z to print block head (arg must be struct buffer_head * 252 %z to print block head (arg must be struct buffer_head *
256 %b to print buffer_head 253 %b to print buffer_head
257*/ 254*/
@@ -264,14 +261,17 @@ static void prepare_error_buf(const char *fmt, va_list args)
264 va_end( args );\ 261 va_end( args );\
265} 262}
266 263
267void reiserfs_warning(struct super_block *sb, const char *fmt, ...) 264void __reiserfs_warning(struct super_block *sb, const char *id,
265 const char *function, const char *fmt, ...)
268{ 266{
269 do_reiserfs_warning(fmt); 267 do_reiserfs_warning(fmt);
270 if (sb) 268 if (sb)
271 printk(KERN_WARNING "ReiserFS: %s: warning: %s\n", 269 printk(KERN_WARNING "REISERFS warning (device %s): %s%s%s: "
272 reiserfs_bdevname(sb), error_buf); 270 "%s\n", sb->s_id, id ? id : "", id ? " " : "",
271 function, error_buf);
273 else 272 else
274 printk(KERN_WARNING "ReiserFS: warning: %s\n", error_buf); 273 printk(KERN_WARNING "REISERFS warning: %s%s%s: %s\n",
274 id ? id : "", id ? " " : "", function, error_buf);
275} 275}
276 276
277/* No newline.. reiserfs_info calls can be followed by printk's */ 277/* No newline.. reiserfs_info calls can be followed by printk's */
@@ -279,10 +279,10 @@ void reiserfs_info(struct super_block *sb, const char *fmt, ...)
279{ 279{
280 do_reiserfs_warning(fmt); 280 do_reiserfs_warning(fmt);
281 if (sb) 281 if (sb)
282 printk(KERN_NOTICE "ReiserFS: %s: %s", 282 printk(KERN_NOTICE "REISERFS (device %s): %s",
283 reiserfs_bdevname(sb), error_buf); 283 sb->s_id, error_buf);
284 else 284 else
285 printk(KERN_NOTICE "ReiserFS: %s", error_buf); 285 printk(KERN_NOTICE "REISERFS %s:", error_buf);
286} 286}
287 287
288/* No newline.. reiserfs_printk calls can be followed by printk's */ 288/* No newline.. reiserfs_printk calls can be followed by printk's */
@@ -297,10 +297,10 @@ void reiserfs_debug(struct super_block *s, int level, const char *fmt, ...)
297#ifdef CONFIG_REISERFS_CHECK 297#ifdef CONFIG_REISERFS_CHECK
298 do_reiserfs_warning(fmt); 298 do_reiserfs_warning(fmt);
299 if (s) 299 if (s)
300 printk(KERN_DEBUG "ReiserFS: %s: %s\n", 300 printk(KERN_DEBUG "REISERFS debug (device %s): %s\n",
301 reiserfs_bdevname(s), error_buf); 301 s->s_id, error_buf);
302 else 302 else
303 printk(KERN_DEBUG "ReiserFS: %s\n", error_buf); 303 printk(KERN_DEBUG "REISERFS debug: %s\n", error_buf);
304#endif 304#endif
305} 305}
306 306
@@ -314,17 +314,17 @@ void reiserfs_debug(struct super_block *s, int level, const char *fmt, ...)
314 maintainer-errorid. Don't bother with reusing errorids, there are 314 maintainer-errorid. Don't bother with reusing errorids, there are
315 lots of numbers out there. 315 lots of numbers out there.
316 316
317 Example: 317 Example:
318 318
319 reiserfs_panic( 319 reiserfs_panic(
320 p_sb, "reiser-29: reiserfs_new_blocknrs: " 320 p_sb, "reiser-29: reiserfs_new_blocknrs: "
321 "one of search_start or rn(%d) is equal to MAX_B_NUM," 321 "one of search_start or rn(%d) is equal to MAX_B_NUM,"
322 "which means that we are optimizing location based on the bogus location of a temp buffer (%p).", 322 "which means that we are optimizing location based on the bogus location of a temp buffer (%p).",
323 rn, bh 323 rn, bh
324 ); 324 );
325 325
326 Regular panic()s sometimes clear the screen before the message can 326 Regular panic()s sometimes clear the screen before the message can
327 be read, thus the need for the while loop. 327 be read, thus the need for the while loop.
328 328
329 Numbering scheme for panic used by Vladimir and Anatoly( Hans completely ignores this scheme, and considers it 329 Numbering scheme for panic used by Vladimir and Anatoly( Hans completely ignores this scheme, and considers it
330 pointless complexity): 330 pointless complexity):
@@ -353,14 +353,46 @@ void reiserfs_debug(struct super_block *s, int level, const char *fmt, ...)
353extern struct tree_balance *cur_tb; 353extern struct tree_balance *cur_tb;
354#endif 354#endif
355 355
356void reiserfs_panic(struct super_block *sb, const char *fmt, ...) 356void __reiserfs_panic(struct super_block *sb, const char *id,
357 const char *function, const char *fmt, ...)
357{ 358{
358 do_reiserfs_warning(fmt); 359 do_reiserfs_warning(fmt);
359 360
361#ifdef CONFIG_REISERFS_CHECK
360 dump_stack(); 362 dump_stack();
363#endif
364 if (sb)
365 panic(KERN_WARNING "REISERFS panic (device %s): %s%s%s: %s\n",
366 sb->s_id, id ? id : "", id ? " " : "",
367 function, error_buf);
368 else
369 panic(KERN_WARNING "REISERFS panic: %s%s%s: %s\n",
370 id ? id : "", id ? " " : "", function, error_buf);
371}
372
373void __reiserfs_error(struct super_block *sb, const char *id,
374 const char *function, const char *fmt, ...)
375{
376 do_reiserfs_warning(fmt);
361 377
362 panic(KERN_EMERG "REISERFS: panic (device %s): %s\n", 378 BUG_ON(sb == NULL);
363 reiserfs_bdevname(sb), error_buf); 379
380 if (reiserfs_error_panic(sb))
381 __reiserfs_panic(sb, id, function, error_buf);
382
383 if (id && id[0])
384 printk(KERN_CRIT "REISERFS error (device %s): %s %s: %s\n",
385 sb->s_id, id, function, error_buf);
386 else
387 printk(KERN_CRIT "REISERFS error (device %s): %s: %s\n",
388 sb->s_id, function, error_buf);
389
390 if (sb->s_flags & MS_RDONLY)
391 return;
392
393 reiserfs_info(sb, "Remounting filesystem read-only\n");
394 sb->s_flags |= MS_RDONLY;
395 reiserfs_abort_journal(sb, -EIO);
364} 396}
365 397
366void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...) 398void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...)
@@ -368,18 +400,18 @@ void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...)
368 do_reiserfs_warning(fmt); 400 do_reiserfs_warning(fmt);
369 401
370 if (reiserfs_error_panic(sb)) { 402 if (reiserfs_error_panic(sb)) {
371 panic(KERN_CRIT "REISERFS: panic (device %s): %s\n", 403 panic(KERN_CRIT "REISERFS panic (device %s): %s\n", sb->s_id,
372 reiserfs_bdevname(sb), error_buf); 404 error_buf);
373 } 405 }
374 406
375 if (sb->s_flags & MS_RDONLY) 407 if (reiserfs_is_journal_aborted(SB_JOURNAL(sb)))
376 return; 408 return;
377 409
378 printk(KERN_CRIT "REISERFS: abort (device %s): %s\n", 410 printk(KERN_CRIT "REISERFS abort (device %s): %s\n", sb->s_id,
379 reiserfs_bdevname(sb), error_buf); 411 error_buf);
380 412
381 sb->s_flags |= MS_RDONLY; 413 sb->s_flags |= MS_RDONLY;
382 reiserfs_journal_abort(sb, errno); 414 reiserfs_abort_journal(sb, errno);
383} 415}
384 416
385/* this prints internal nodes (4 keys/items in line) (dc_number, 417/* this prints internal nodes (4 keys/items in line) (dc_number,
@@ -681,12 +713,10 @@ static void check_leaf_block_head(struct buffer_head *bh)
681 blkh = B_BLK_HEAD(bh); 713 blkh = B_BLK_HEAD(bh);
682 nr = blkh_nr_item(blkh); 714 nr = blkh_nr_item(blkh);
683 if (nr > (bh->b_size - BLKH_SIZE) / IH_SIZE) 715 if (nr > (bh->b_size - BLKH_SIZE) / IH_SIZE)
684 reiserfs_panic(NULL, 716 reiserfs_panic(NULL, "vs-6010", "invalid item number %z",
685 "vs-6010: check_leaf_block_head: invalid item number %z",
686 bh); 717 bh);
687 if (blkh_free_space(blkh) > bh->b_size - BLKH_SIZE - IH_SIZE * nr) 718 if (blkh_free_space(blkh) > bh->b_size - BLKH_SIZE - IH_SIZE * nr)
688 reiserfs_panic(NULL, 719 reiserfs_panic(NULL, "vs-6020", "invalid free space %z",
689 "vs-6020: check_leaf_block_head: invalid free space %z",
690 bh); 720 bh);
691 721
692} 722}
@@ -697,21 +727,15 @@ static void check_internal_block_head(struct buffer_head *bh)
697 727
698 blkh = B_BLK_HEAD(bh); 728 blkh = B_BLK_HEAD(bh);
699 if (!(B_LEVEL(bh) > DISK_LEAF_NODE_LEVEL && B_LEVEL(bh) <= MAX_HEIGHT)) 729 if (!(B_LEVEL(bh) > DISK_LEAF_NODE_LEVEL && B_LEVEL(bh) <= MAX_HEIGHT))
700 reiserfs_panic(NULL, 730 reiserfs_panic(NULL, "vs-6025", "invalid level %z", bh);
701 "vs-6025: check_internal_block_head: invalid level %z",
702 bh);
703 731
704 if (B_NR_ITEMS(bh) > (bh->b_size - BLKH_SIZE) / IH_SIZE) 732 if (B_NR_ITEMS(bh) > (bh->b_size - BLKH_SIZE) / IH_SIZE)
705 reiserfs_panic(NULL, 733 reiserfs_panic(NULL, "vs-6030", "invalid item number %z", bh);
706 "vs-6030: check_internal_block_head: invalid item number %z",
707 bh);
708 734
709 if (B_FREE_SPACE(bh) != 735 if (B_FREE_SPACE(bh) !=
710 bh->b_size - BLKH_SIZE - KEY_SIZE * B_NR_ITEMS(bh) - 736 bh->b_size - BLKH_SIZE - KEY_SIZE * B_NR_ITEMS(bh) -
711 DC_SIZE * (B_NR_ITEMS(bh) + 1)) 737 DC_SIZE * (B_NR_ITEMS(bh) + 1))
712 reiserfs_panic(NULL, 738 reiserfs_panic(NULL, "vs-6040", "invalid free space %z", bh);
713 "vs-6040: check_internal_block_head: invalid free space %z",
714 bh);
715 739
716} 740}
717 741
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index 37173fa07d15..9229e5514a4e 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -321,7 +321,7 @@ static int show_journal(struct seq_file *m, struct super_block *sb)
321 /* incore fields */ 321 /* incore fields */
322 "j_1st_reserved_block: \t%i\n" 322 "j_1st_reserved_block: \t%i\n"
323 "j_state: \t%li\n" 323 "j_state: \t%li\n"
324 "j_trans_id: \t%lu\n" 324 "j_trans_id: \t%u\n"
325 "j_mount_id: \t%lu\n" 325 "j_mount_id: \t%lu\n"
326 "j_start: \t%lu\n" 326 "j_start: \t%lu\n"
327 "j_len: \t%lu\n" 327 "j_len: \t%lu\n"
@@ -329,7 +329,7 @@ static int show_journal(struct seq_file *m, struct super_block *sb)
329 "j_wcount: \t%i\n" 329 "j_wcount: \t%i\n"
330 "j_bcount: \t%lu\n" 330 "j_bcount: \t%lu\n"
331 "j_first_unflushed_offset: \t%lu\n" 331 "j_first_unflushed_offset: \t%lu\n"
332 "j_last_flush_trans_id: \t%lu\n" 332 "j_last_flush_trans_id: \t%u\n"
333 "j_trans_start_time: \t%li\n" 333 "j_trans_start_time: \t%li\n"
334 "j_list_bitmap_index: \t%i\n" 334 "j_list_bitmap_index: \t%i\n"
335 "j_must_wait: \t%i\n" 335 "j_must_wait: \t%i\n"
@@ -492,7 +492,6 @@ int reiserfs_proc_info_init(struct super_block *sb)
492 spin_lock_init(&__PINFO(sb).lock); 492 spin_lock_init(&__PINFO(sb).lock);
493 REISERFS_SB(sb)->procdir = proc_mkdir(b, proc_info_root); 493 REISERFS_SB(sb)->procdir = proc_mkdir(b, proc_info_root);
494 if (REISERFS_SB(sb)->procdir) { 494 if (REISERFS_SB(sb)->procdir) {
495 REISERFS_SB(sb)->procdir->owner = THIS_MODULE;
496 REISERFS_SB(sb)->procdir->data = sb; 495 REISERFS_SB(sb)->procdir->data = sb;
497 add_file(sb, "version", show_version); 496 add_file(sb, "version", show_version);
498 add_file(sb, "super", show_super); 497 add_file(sb, "super", show_super);
@@ -503,7 +502,7 @@ int reiserfs_proc_info_init(struct super_block *sb)
503 add_file(sb, "journal", show_journal); 502 add_file(sb, "journal", show_journal);
504 return 0; 503 return 0;
505 } 504 }
506 reiserfs_warning(sb, "reiserfs: cannot create /proc/%s/%s", 505 reiserfs_warning(sb, "cannot create /proc/%s/%s",
507 proc_info_root_name, b); 506 proc_info_root_name, b);
508 return 1; 507 return 1;
509} 508}
@@ -556,11 +555,8 @@ int reiserfs_proc_info_global_init(void)
556{ 555{
557 if (proc_info_root == NULL) { 556 if (proc_info_root == NULL) {
558 proc_info_root = proc_mkdir(proc_info_root_name, NULL); 557 proc_info_root = proc_mkdir(proc_info_root_name, NULL);
559 if (proc_info_root) { 558 if (!proc_info_root) {
560 proc_info_root->owner = THIS_MODULE; 559 reiserfs_warning(NULL, "cannot create /proc/%s",
561 } else {
562 reiserfs_warning(NULL,
563 "reiserfs: cannot create /proc/%s",
564 proc_info_root_name); 560 proc_info_root_name);
565 return 1; 561 return 1;
566 } 562 }
@@ -634,7 +630,7 @@ int reiserfs_global_version_in_proc(char *buffer, char **start,
634 * 630 *
635 */ 631 */
636 632
637/* 633/*
638 * Make Linus happy. 634 * Make Linus happy.
639 * Local variables: 635 * Local variables:
640 * c-indentation-style: "K&R" 636 * c-indentation-style: "K&R"
diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c
index f71c3948edef..238e9d9b31e0 100644
--- a/fs/reiserfs/resize.c
+++ b/fs/reiserfs/resize.c
@@ -1,8 +1,8 @@
1/* 1/*
2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README 2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3 */ 3 */
4 4
5/* 5/*
6 * Written by Alexander Zarochentcev. 6 * Written by Alexander Zarochentcev.
7 * 7 *
8 * The kernel part of the (on-line) reiserfs resizer. 8 * The kernel part of the (on-line) reiserfs resizer.
@@ -101,7 +101,7 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
101 memcpy(jbitmap[i].bitmaps, jb->bitmaps, copy_size); 101 memcpy(jbitmap[i].bitmaps, jb->bitmaps, copy_size);
102 102
103 /* just in case vfree schedules on us, copy the new 103 /* just in case vfree schedules on us, copy the new
104 ** pointer into the journal struct before freeing the 104 ** pointer into the journal struct before freeing the
105 ** old one 105 ** old one
106 */ 106 */
107 node_tmp = jb->bitmaps; 107 node_tmp = jb->bitmaps;
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index 73aaa33f6735..d036ee5b1c81 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -23,7 +23,6 @@
23 * get_rkey 23 * get_rkey
24 * key_in_buffer 24 * key_in_buffer
25 * decrement_bcount 25 * decrement_bcount
26 * decrement_counters_in_path
27 * reiserfs_check_path 26 * reiserfs_check_path
28 * pathrelse_and_restore 27 * pathrelse_and_restore
29 * pathrelse 28 * pathrelse
@@ -57,28 +56,28 @@
57#include <linux/quotaops.h> 56#include <linux/quotaops.h>
58 57
59/* Does the buffer contain a disk block which is in the tree. */ 58/* Does the buffer contain a disk block which is in the tree. */
60inline int B_IS_IN_TREE(const struct buffer_head *p_s_bh) 59inline int B_IS_IN_TREE(const struct buffer_head *bh)
61{ 60{
62 61
63 RFALSE(B_LEVEL(p_s_bh) > MAX_HEIGHT, 62 RFALSE(B_LEVEL(bh) > MAX_HEIGHT,
64 "PAP-1010: block (%b) has too big level (%z)", p_s_bh, p_s_bh); 63 "PAP-1010: block (%b) has too big level (%z)", bh, bh);
65 64
66 return (B_LEVEL(p_s_bh) != FREE_LEVEL); 65 return (B_LEVEL(bh) != FREE_LEVEL);
67} 66}
68 67
69// 68//
70// to gets item head in le form 69// to gets item head in le form
71// 70//
72inline void copy_item_head(struct item_head *p_v_to, 71inline void copy_item_head(struct item_head *to,
73 const struct item_head *p_v_from) 72 const struct item_head *from)
74{ 73{
75 memcpy(p_v_to, p_v_from, IH_SIZE); 74 memcpy(to, from, IH_SIZE);
76} 75}
77 76
78/* k1 is pointer to on-disk structure which is stored in little-endian 77/* k1 is pointer to on-disk structure which is stored in little-endian
79 form. k2 is pointer to cpu variable. For key of items of the same 78 form. k2 is pointer to cpu variable. For key of items of the same
80 object this returns 0. 79 object this returns 0.
81 Returns: -1 if key1 < key2 80 Returns: -1 if key1 < key2
82 0 if key1 == key2 81 0 if key1 == key2
83 1 if key1 > key2 */ 82 1 if key1 > key2 */
84inline int comp_short_keys(const struct reiserfs_key *le_key, 83inline int comp_short_keys(const struct reiserfs_key *le_key,
@@ -136,15 +135,15 @@ static inline int comp_keys(const struct reiserfs_key *le_key,
136inline int comp_short_le_keys(const struct reiserfs_key *key1, 135inline int comp_short_le_keys(const struct reiserfs_key *key1,
137 const struct reiserfs_key *key2) 136 const struct reiserfs_key *key2)
138{ 137{
139 __u32 *p_s_1_u32, *p_s_2_u32; 138 __u32 *k1_u32, *k2_u32;
140 int n_key_length = REISERFS_SHORT_KEY_LEN; 139 int key_length = REISERFS_SHORT_KEY_LEN;
141 140
142 p_s_1_u32 = (__u32 *) key1; 141 k1_u32 = (__u32 *) key1;
143 p_s_2_u32 = (__u32 *) key2; 142 k2_u32 = (__u32 *) key2;
144 for (; n_key_length--; ++p_s_1_u32, ++p_s_2_u32) { 143 for (; key_length--; ++k1_u32, ++k2_u32) {
145 if (le32_to_cpu(*p_s_1_u32) < le32_to_cpu(*p_s_2_u32)) 144 if (le32_to_cpu(*k1_u32) < le32_to_cpu(*k2_u32))
146 return -1; 145 return -1;
147 if (le32_to_cpu(*p_s_1_u32) > le32_to_cpu(*p_s_2_u32)) 146 if (le32_to_cpu(*k1_u32) > le32_to_cpu(*k2_u32))
148 return 1; 147 return 1;
149 } 148 }
150 return 0; 149 return 0;
@@ -175,52 +174,51 @@ inline int comp_le_keys(const struct reiserfs_key *k1,
175 * Binary search toolkit function * 174 * Binary search toolkit function *
176 * Search for an item in the array by the item key * 175 * Search for an item in the array by the item key *
177 * Returns: 1 if found, 0 if not found; * 176 * Returns: 1 if found, 0 if not found; *
178 * *p_n_pos = number of the searched element if found, else the * 177 * *pos = number of the searched element if found, else the *
179 * number of the first element that is larger than p_v_key. * 178 * number of the first element that is larger than key. *
180 **************************************************************************/ 179 **************************************************************************/
181/* For those not familiar with binary search: n_lbound is the leftmost item that it 180/* For those not familiar with binary search: lbound is the leftmost item that it
182 could be, n_rbound the rightmost item that it could be. We examine the item 181 could be, rbound the rightmost item that it could be. We examine the item
183 halfway between n_lbound and n_rbound, and that tells us either that we can increase 182 halfway between lbound and rbound, and that tells us either that we can increase
184 n_lbound, or decrease n_rbound, or that we have found it, or if n_lbound <= n_rbound that 183 lbound, or decrease rbound, or that we have found it, or if lbound <= rbound that
185 there are no possible items, and we have not found it. With each examination we 184 there are no possible items, and we have not found it. With each examination we
186 cut the number of possible items it could be by one more than half rounded down, 185 cut the number of possible items it could be by one more than half rounded down,
187 or we find it. */ 186 or we find it. */
188static inline int bin_search(const void *p_v_key, /* Key to search for. */ 187static inline int bin_search(const void *key, /* Key to search for. */
189 const void *p_v_base, /* First item in the array. */ 188 const void *base, /* First item in the array. */
190 int p_n_num, /* Number of items in the array. */ 189 int num, /* Number of items in the array. */
191 int p_n_width, /* Item size in the array. 190 int width, /* Item size in the array.
192 searched. Lest the reader be 191 searched. Lest the reader be
193 confused, note that this is crafted 192 confused, note that this is crafted
194 as a general function, and when it 193 as a general function, and when it
195 is applied specifically to the array 194 is applied specifically to the array
196 of item headers in a node, p_n_width 195 of item headers in a node, width
197 is actually the item header size not 196 is actually the item header size not
198 the item size. */ 197 the item size. */
199 int *p_n_pos /* Number of the searched for element. */ 198 int *pos /* Number of the searched for element. */
200 ) 199 )
201{ 200{
202 int n_rbound, n_lbound, n_j; 201 int rbound, lbound, j;
203 202
204 for (n_j = ((n_rbound = p_n_num - 1) + (n_lbound = 0)) / 2; 203 for (j = ((rbound = num - 1) + (lbound = 0)) / 2;
205 n_lbound <= n_rbound; n_j = (n_rbound + n_lbound) / 2) 204 lbound <= rbound; j = (rbound + lbound) / 2)
206 switch (comp_keys 205 switch (comp_keys
207 ((struct reiserfs_key *)((char *)p_v_base + 206 ((struct reiserfs_key *)((char *)base + j * width),
208 n_j * p_n_width), 207 (struct cpu_key *)key)) {
209 (struct cpu_key *)p_v_key)) {
210 case -1: 208 case -1:
211 n_lbound = n_j + 1; 209 lbound = j + 1;
212 continue; 210 continue;
213 case 1: 211 case 1:
214 n_rbound = n_j - 1; 212 rbound = j - 1;
215 continue; 213 continue;
216 case 0: 214 case 0:
217 *p_n_pos = n_j; 215 *pos = j;
218 return ITEM_FOUND; /* Key found in the array. */ 216 return ITEM_FOUND; /* Key found in the array. */
219 } 217 }
220 218
221 /* bin_search did not find given key, it returns position of key, 219 /* bin_search did not find given key, it returns position of key,
222 that is minimal and greater than the given one. */ 220 that is minimal and greater than the given one. */
223 *p_n_pos = n_lbound; 221 *pos = lbound;
224 return ITEM_NOT_FOUND; 222 return ITEM_NOT_FOUND;
225} 223}
226 224
@@ -243,90 +241,88 @@ static const struct reiserfs_key MAX_KEY = {
243 of the path, and going upwards. We must check the path's validity at each step. If the key is not in 241 of the path, and going upwards. We must check the path's validity at each step. If the key is not in
244 the path, there is no delimiting key in the tree (buffer is first or last buffer in tree), and in this 242 the path, there is no delimiting key in the tree (buffer is first or last buffer in tree), and in this
245 case we return a special key, either MIN_KEY or MAX_KEY. */ 243 case we return a special key, either MIN_KEY or MAX_KEY. */
246static inline const struct reiserfs_key *get_lkey(const struct treepath 244static inline const struct reiserfs_key *get_lkey(const struct treepath *chk_path,
247 *p_s_chk_path, 245 const struct super_block *sb)
248 const struct super_block
249 *p_s_sb)
250{ 246{
251 int n_position, n_path_offset = p_s_chk_path->path_length; 247 int position, path_offset = chk_path->path_length;
252 struct buffer_head *p_s_parent; 248 struct buffer_head *parent;
253 249
254 RFALSE(n_path_offset < FIRST_PATH_ELEMENT_OFFSET, 250 RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET,
255 "PAP-5010: invalid offset in the path"); 251 "PAP-5010: invalid offset in the path");
256 252
257 /* While not higher in path than first element. */ 253 /* While not higher in path than first element. */
258 while (n_path_offset-- > FIRST_PATH_ELEMENT_OFFSET) { 254 while (path_offset-- > FIRST_PATH_ELEMENT_OFFSET) {
259 255
260 RFALSE(!buffer_uptodate 256 RFALSE(!buffer_uptodate
261 (PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)), 257 (PATH_OFFSET_PBUFFER(chk_path, path_offset)),
262 "PAP-5020: parent is not uptodate"); 258 "PAP-5020: parent is not uptodate");
263 259
264 /* Parent at the path is not in the tree now. */ 260 /* Parent at the path is not in the tree now. */
265 if (!B_IS_IN_TREE 261 if (!B_IS_IN_TREE
266 (p_s_parent = 262 (parent =
267 PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset))) 263 PATH_OFFSET_PBUFFER(chk_path, path_offset)))
268 return &MAX_KEY; 264 return &MAX_KEY;
269 /* Check whether position in the parent is correct. */ 265 /* Check whether position in the parent is correct. */
270 if ((n_position = 266 if ((position =
271 PATH_OFFSET_POSITION(p_s_chk_path, 267 PATH_OFFSET_POSITION(chk_path,
272 n_path_offset)) > 268 path_offset)) >
273 B_NR_ITEMS(p_s_parent)) 269 B_NR_ITEMS(parent))
274 return &MAX_KEY; 270 return &MAX_KEY;
275 /* Check whether parent at the path really points to the child. */ 271 /* Check whether parent at the path really points to the child. */
276 if (B_N_CHILD_NUM(p_s_parent, n_position) != 272 if (B_N_CHILD_NUM(parent, position) !=
277 PATH_OFFSET_PBUFFER(p_s_chk_path, 273 PATH_OFFSET_PBUFFER(chk_path,
278 n_path_offset + 1)->b_blocknr) 274 path_offset + 1)->b_blocknr)
279 return &MAX_KEY; 275 return &MAX_KEY;
280 /* Return delimiting key if position in the parent is not equal to zero. */ 276 /* Return delimiting key if position in the parent is not equal to zero. */
281 if (n_position) 277 if (position)
282 return B_N_PDELIM_KEY(p_s_parent, n_position - 1); 278 return B_N_PDELIM_KEY(parent, position - 1);
283 } 279 }
284 /* Return MIN_KEY if we are in the root of the buffer tree. */ 280 /* Return MIN_KEY if we are in the root of the buffer tree. */
285 if (PATH_OFFSET_PBUFFER(p_s_chk_path, FIRST_PATH_ELEMENT_OFFSET)-> 281 if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)->
286 b_blocknr == SB_ROOT_BLOCK(p_s_sb)) 282 b_blocknr == SB_ROOT_BLOCK(sb))
287 return &MIN_KEY; 283 return &MIN_KEY;
288 return &MAX_KEY; 284 return &MAX_KEY;
289} 285}
290 286
291/* Get delimiting key of the buffer at the path and its right neighbor. */ 287/* Get delimiting key of the buffer at the path and its right neighbor. */
292inline const struct reiserfs_key *get_rkey(const struct treepath *p_s_chk_path, 288inline const struct reiserfs_key *get_rkey(const struct treepath *chk_path,
293 const struct super_block *p_s_sb) 289 const struct super_block *sb)
294{ 290{
295 int n_position, n_path_offset = p_s_chk_path->path_length; 291 int position, path_offset = chk_path->path_length;
296 struct buffer_head *p_s_parent; 292 struct buffer_head *parent;
297 293
298 RFALSE(n_path_offset < FIRST_PATH_ELEMENT_OFFSET, 294 RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET,
299 "PAP-5030: invalid offset in the path"); 295 "PAP-5030: invalid offset in the path");
300 296
301 while (n_path_offset-- > FIRST_PATH_ELEMENT_OFFSET) { 297 while (path_offset-- > FIRST_PATH_ELEMENT_OFFSET) {
302 298
303 RFALSE(!buffer_uptodate 299 RFALSE(!buffer_uptodate
304 (PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)), 300 (PATH_OFFSET_PBUFFER(chk_path, path_offset)),
305 "PAP-5040: parent is not uptodate"); 301 "PAP-5040: parent is not uptodate");
306 302
307 /* Parent at the path is not in the tree now. */ 303 /* Parent at the path is not in the tree now. */
308 if (!B_IS_IN_TREE 304 if (!B_IS_IN_TREE
309 (p_s_parent = 305 (parent =
310 PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset))) 306 PATH_OFFSET_PBUFFER(chk_path, path_offset)))
311 return &MIN_KEY; 307 return &MIN_KEY;
312 /* Check whether position in the parent is correct. */ 308 /* Check whether position in the parent is correct. */
313 if ((n_position = 309 if ((position =
314 PATH_OFFSET_POSITION(p_s_chk_path, 310 PATH_OFFSET_POSITION(chk_path,
315 n_path_offset)) > 311 path_offset)) >
316 B_NR_ITEMS(p_s_parent)) 312 B_NR_ITEMS(parent))
317 return &MIN_KEY; 313 return &MIN_KEY;
318 /* Check whether parent at the path really points to the child. */ 314 /* Check whether parent at the path really points to the child. */
319 if (B_N_CHILD_NUM(p_s_parent, n_position) != 315 if (B_N_CHILD_NUM(parent, position) !=
320 PATH_OFFSET_PBUFFER(p_s_chk_path, 316 PATH_OFFSET_PBUFFER(chk_path,
321 n_path_offset + 1)->b_blocknr) 317 path_offset + 1)->b_blocknr)
322 return &MIN_KEY; 318 return &MIN_KEY;
323 /* Return delimiting key if position in the parent is not the last one. */ 319 /* Return delimiting key if position in the parent is not the last one. */
324 if (n_position != B_NR_ITEMS(p_s_parent)) 320 if (position != B_NR_ITEMS(parent))
325 return B_N_PDELIM_KEY(p_s_parent, n_position); 321 return B_N_PDELIM_KEY(parent, position);
326 } 322 }
327 /* Return MAX_KEY if we are in the root of the buffer tree. */ 323 /* Return MAX_KEY if we are in the root of the buffer tree. */
328 if (PATH_OFFSET_PBUFFER(p_s_chk_path, FIRST_PATH_ELEMENT_OFFSET)-> 324 if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)->
329 b_blocknr == SB_ROOT_BLOCK(p_s_sb)) 325 b_blocknr == SB_ROOT_BLOCK(sb))
330 return &MAX_KEY; 326 return &MAX_KEY;
331 return &MIN_KEY; 327 return &MIN_KEY;
332} 328}
@@ -336,60 +332,29 @@ inline const struct reiserfs_key *get_rkey(const struct treepath *p_s_chk_path,
336 the path. These delimiting keys are stored at least one level above that buffer in the tree. If the 332 the path. These delimiting keys are stored at least one level above that buffer in the tree. If the
337 buffer is the first or last node in the tree order then one of the delimiting keys may be absent, and in 333 buffer is the first or last node in the tree order then one of the delimiting keys may be absent, and in
338 this case get_lkey and get_rkey return a special key which is MIN_KEY or MAX_KEY. */ 334 this case get_lkey and get_rkey return a special key which is MIN_KEY or MAX_KEY. */
339static inline int key_in_buffer(struct treepath *p_s_chk_path, /* Path which should be checked. */ 335static inline int key_in_buffer(struct treepath *chk_path, /* Path which should be checked. */
340 const struct cpu_key *p_s_key, /* Key which should be checked. */ 336 const struct cpu_key *key, /* Key which should be checked. */
341 struct super_block *p_s_sb /* Super block pointer. */ 337 struct super_block *sb
342 ) 338 )
343{ 339{
344 340
345 RFALSE(!p_s_key || p_s_chk_path->path_length < FIRST_PATH_ELEMENT_OFFSET 341 RFALSE(!key || chk_path->path_length < FIRST_PATH_ELEMENT_OFFSET
346 || p_s_chk_path->path_length > MAX_HEIGHT, 342 || chk_path->path_length > MAX_HEIGHT,
347 "PAP-5050: pointer to the key(%p) is NULL or invalid path length(%d)", 343 "PAP-5050: pointer to the key(%p) is NULL or invalid path length(%d)",
348 p_s_key, p_s_chk_path->path_length); 344 key, chk_path->path_length);
349 RFALSE(!PATH_PLAST_BUFFER(p_s_chk_path)->b_bdev, 345 RFALSE(!PATH_PLAST_BUFFER(chk_path)->b_bdev,
350 "PAP-5060: device must not be NODEV"); 346 "PAP-5060: device must not be NODEV");
351 347
352 if (comp_keys(get_lkey(p_s_chk_path, p_s_sb), p_s_key) == 1) 348 if (comp_keys(get_lkey(chk_path, sb), key) == 1)
353 /* left delimiting key is bigger, that the key we look for */ 349 /* left delimiting key is bigger, that the key we look for */
354 return 0; 350 return 0;
355 // if ( comp_keys(p_s_key, get_rkey(p_s_chk_path, p_s_sb)) != -1 ) 351 /* if ( comp_keys(key, get_rkey(chk_path, sb)) != -1 ) */
356 if (comp_keys(get_rkey(p_s_chk_path, p_s_sb), p_s_key) != 1) 352 if (comp_keys(get_rkey(chk_path, sb), key) != 1)
357 /* p_s_key must be less than right delimitiing key */ 353 /* key must be less than right delimitiing key */
358 return 0; 354 return 0;
359 return 1; 355 return 1;
360} 356}
361 357
362inline void decrement_bcount(struct buffer_head *p_s_bh)
363{
364 if (p_s_bh) {
365 if (atomic_read(&(p_s_bh->b_count))) {
366 put_bh(p_s_bh);
367 return;
368 }
369 reiserfs_panic(NULL,
370 "PAP-5070: decrement_bcount: trying to free free buffer %b",
371 p_s_bh);
372 }
373}
374
375/* Decrement b_count field of the all buffers in the path. */
376void decrement_counters_in_path(struct treepath *p_s_search_path)
377{
378 int n_path_offset = p_s_search_path->path_length;
379
380 RFALSE(n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET ||
381 n_path_offset > EXTENDED_MAX_HEIGHT - 1,
382 "PAP-5080: invalid path offset of %d", n_path_offset);
383
384 while (n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) {
385 struct buffer_head *bh;
386
387 bh = PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--);
388 decrement_bcount(bh);
389 }
390 p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
391}
392
393int reiserfs_check_path(struct treepath *p) 358int reiserfs_check_path(struct treepath *p)
394{ 359{
395 RFALSE(p->path_length != ILLEGAL_PATH_ELEMENT_OFFSET, 360 RFALSE(p->path_length != ILLEGAL_PATH_ELEMENT_OFFSET,
@@ -397,40 +362,38 @@ int reiserfs_check_path(struct treepath *p)
397 return 0; 362 return 0;
398} 363}
399 364
400/* Release all buffers in the path. Restore dirty bits clean 365/* Drop the reference to each buffer in a path and restore
401** when preparing the buffer for the log 366 * dirty bits clean when preparing the buffer for the log.
402** 367 * This version should only be called from fix_nodes() */
403** only called from fix_nodes() 368void pathrelse_and_restore(struct super_block *sb,
404*/ 369 struct treepath *search_path)
405void pathrelse_and_restore(struct super_block *s, struct treepath *p_s_search_path)
406{ 370{
407 int n_path_offset = p_s_search_path->path_length; 371 int path_offset = search_path->path_length;
408 372
409 RFALSE(n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET, 373 RFALSE(path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
410 "clm-4000: invalid path offset"); 374 "clm-4000: invalid path offset");
411 375
412 while (n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) { 376 while (path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) {
413 reiserfs_restore_prepared_buffer(s, 377 struct buffer_head *bh;
414 PATH_OFFSET_PBUFFER 378 bh = PATH_OFFSET_PBUFFER(search_path, path_offset--);
415 (p_s_search_path, 379 reiserfs_restore_prepared_buffer(sb, bh);
416 n_path_offset)); 380 brelse(bh);
417 brelse(PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--));
418 } 381 }
419 p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET; 382 search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
420} 383}
421 384
422/* Release all buffers in the path. */ 385/* Drop the reference to each buffer in a path */
423void pathrelse(struct treepath *p_s_search_path) 386void pathrelse(struct treepath *search_path)
424{ 387{
425 int n_path_offset = p_s_search_path->path_length; 388 int path_offset = search_path->path_length;
426 389
427 RFALSE(n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET, 390 RFALSE(path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
428 "PAP-5090: invalid path offset"); 391 "PAP-5090: invalid path offset");
429 392
430 while (n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) 393 while (path_offset > ILLEGAL_PATH_ELEMENT_OFFSET)
431 brelse(PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--)); 394 brelse(PATH_OFFSET_PBUFFER(search_path, path_offset--));
432 395
433 p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET; 396 search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
434} 397}
435 398
436static int is_leaf(char *buf, int blocksize, struct buffer_head *bh) 399static int is_leaf(char *buf, int blocksize, struct buffer_head *bh)
@@ -444,23 +407,24 @@ static int is_leaf(char *buf, int blocksize, struct buffer_head *bh)
444 407
445 blkh = (struct block_head *)buf; 408 blkh = (struct block_head *)buf;
446 if (blkh_level(blkh) != DISK_LEAF_NODE_LEVEL) { 409 if (blkh_level(blkh) != DISK_LEAF_NODE_LEVEL) {
447 reiserfs_warning(NULL, 410 reiserfs_warning(NULL, "reiserfs-5080",
448 "is_leaf: this should be caught earlier"); 411 "this should be caught earlier");
449 return 0; 412 return 0;
450 } 413 }
451 414
452 nr = blkh_nr_item(blkh); 415 nr = blkh_nr_item(blkh);
453 if (nr < 1 || nr > ((blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN))) { 416 if (nr < 1 || nr > ((blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN))) {
454 /* item number is too big or too small */ 417 /* item number is too big or too small */
455 reiserfs_warning(NULL, "is_leaf: nr_item seems wrong: %z", bh); 418 reiserfs_warning(NULL, "reiserfs-5081",
419 "nr_item seems wrong: %z", bh);
456 return 0; 420 return 0;
457 } 421 }
458 ih = (struct item_head *)(buf + BLKH_SIZE) + nr - 1; 422 ih = (struct item_head *)(buf + BLKH_SIZE) + nr - 1;
459 used_space = BLKH_SIZE + IH_SIZE * nr + (blocksize - ih_location(ih)); 423 used_space = BLKH_SIZE + IH_SIZE * nr + (blocksize - ih_location(ih));
460 if (used_space != blocksize - blkh_free_space(blkh)) { 424 if (used_space != blocksize - blkh_free_space(blkh)) {
461 /* free space does not match to calculated amount of use space */ 425 /* free space does not match to calculated amount of use space */
462 reiserfs_warning(NULL, "is_leaf: free space seems wrong: %z", 426 reiserfs_warning(NULL, "reiserfs-5082",
463 bh); 427 "free space seems wrong: %z", bh);
464 return 0; 428 return 0;
465 } 429 }
466 // FIXME: it is_leaf will hit performance too much - we may have 430 // FIXME: it is_leaf will hit performance too much - we may have
@@ -471,29 +435,29 @@ static int is_leaf(char *buf, int blocksize, struct buffer_head *bh)
471 prev_location = blocksize; 435 prev_location = blocksize;
472 for (i = 0; i < nr; i++, ih++) { 436 for (i = 0; i < nr; i++, ih++) {
473 if (le_ih_k_type(ih) == TYPE_ANY) { 437 if (le_ih_k_type(ih) == TYPE_ANY) {
474 reiserfs_warning(NULL, 438 reiserfs_warning(NULL, "reiserfs-5083",
475 "is_leaf: wrong item type for item %h", 439 "wrong item type for item %h",
476 ih); 440 ih);
477 return 0; 441 return 0;
478 } 442 }
479 if (ih_location(ih) >= blocksize 443 if (ih_location(ih) >= blocksize
480 || ih_location(ih) < IH_SIZE * nr) { 444 || ih_location(ih) < IH_SIZE * nr) {
481 reiserfs_warning(NULL, 445 reiserfs_warning(NULL, "reiserfs-5084",
482 "is_leaf: item location seems wrong: %h", 446 "item location seems wrong: %h",
483 ih); 447 ih);
484 return 0; 448 return 0;
485 } 449 }
486 if (ih_item_len(ih) < 1 450 if (ih_item_len(ih) < 1
487 || ih_item_len(ih) > MAX_ITEM_LEN(blocksize)) { 451 || ih_item_len(ih) > MAX_ITEM_LEN(blocksize)) {
488 reiserfs_warning(NULL, 452 reiserfs_warning(NULL, "reiserfs-5085",
489 "is_leaf: item length seems wrong: %h", 453 "item length seems wrong: %h",
490 ih); 454 ih);
491 return 0; 455 return 0;
492 } 456 }
493 if (prev_location - ih_location(ih) != ih_item_len(ih)) { 457 if (prev_location - ih_location(ih) != ih_item_len(ih)) {
494 reiserfs_warning(NULL, 458 reiserfs_warning(NULL, "reiserfs-5086",
495 "is_leaf: item location seems wrong (second one): %h", 459 "item location seems wrong "
496 ih); 460 "(second one): %h", ih);
497 return 0; 461 return 0;
498 } 462 }
499 prev_location = ih_location(ih); 463 prev_location = ih_location(ih);
@@ -514,24 +478,23 @@ static int is_internal(char *buf, int blocksize, struct buffer_head *bh)
514 nr = blkh_level(blkh); 478 nr = blkh_level(blkh);
515 if (nr <= DISK_LEAF_NODE_LEVEL || nr > MAX_HEIGHT) { 479 if (nr <= DISK_LEAF_NODE_LEVEL || nr > MAX_HEIGHT) {
516 /* this level is not possible for internal nodes */ 480 /* this level is not possible for internal nodes */
517 reiserfs_warning(NULL, 481 reiserfs_warning(NULL, "reiserfs-5087",
518 "is_internal: this should be caught earlier"); 482 "this should be caught earlier");
519 return 0; 483 return 0;
520 } 484 }
521 485
522 nr = blkh_nr_item(blkh); 486 nr = blkh_nr_item(blkh);
523 if (nr > (blocksize - BLKH_SIZE - DC_SIZE) / (KEY_SIZE + DC_SIZE)) { 487 if (nr > (blocksize - BLKH_SIZE - DC_SIZE) / (KEY_SIZE + DC_SIZE)) {
524 /* for internal which is not root we might check min number of keys */ 488 /* for internal which is not root we might check min number of keys */
525 reiserfs_warning(NULL, 489 reiserfs_warning(NULL, "reiserfs-5088",
526 "is_internal: number of key seems wrong: %z", 490 "number of key seems wrong: %z", bh);
527 bh);
528 return 0; 491 return 0;
529 } 492 }
530 493
531 used_space = BLKH_SIZE + KEY_SIZE * nr + DC_SIZE * (nr + 1); 494 used_space = BLKH_SIZE + KEY_SIZE * nr + DC_SIZE * (nr + 1);
532 if (used_space != blocksize - blkh_free_space(blkh)) { 495 if (used_space != blocksize - blkh_free_space(blkh)) {
533 reiserfs_warning(NULL, 496 reiserfs_warning(NULL, "reiserfs-5089",
534 "is_internal: free space seems wrong: %z", bh); 497 "free space seems wrong: %z", bh);
535 return 0; 498 return 0;
536 } 499 }
537 // one may imagine much more checks 500 // one may imagine much more checks
@@ -543,8 +506,8 @@ static int is_internal(char *buf, int blocksize, struct buffer_head *bh)
543static int is_tree_node(struct buffer_head *bh, int level) 506static int is_tree_node(struct buffer_head *bh, int level)
544{ 507{
545 if (B_LEVEL(bh) != level) { 508 if (B_LEVEL(bh) != level) {
546 reiserfs_warning(NULL, 509 reiserfs_warning(NULL, "reiserfs-5090", "node level %d does "
547 "is_tree_node: node level %d does not match to the expected one %d", 510 "not match to the expected one %d",
548 B_LEVEL(bh), level); 511 B_LEVEL(bh), level);
549 return 0; 512 return 0;
550 } 513 }
@@ -580,10 +543,10 @@ static void search_by_key_reada(struct super_block *s,
580/************************************************************************** 543/**************************************************************************
581 * Algorithm SearchByKey * 544 * Algorithm SearchByKey *
582 * look for item in the Disk S+Tree by its key * 545 * look for item in the Disk S+Tree by its key *
583 * Input: p_s_sb - super block * 546 * Input: sb - super block *
584 * p_s_key - pointer to the key to search * 547 * key - pointer to the key to search *
585 * Output: ITEM_FOUND, ITEM_NOT_FOUND or IO_ERROR * 548 * Output: ITEM_FOUND, ITEM_NOT_FOUND or IO_ERROR *
586 * p_s_search_path - path from the root to the needed leaf * 549 * search_path - path from the root to the needed leaf *
587 **************************************************************************/ 550 **************************************************************************/
588 551
589/* This function fills up the path from the root to the leaf as it 552/* This function fills up the path from the root to the leaf as it
@@ -600,22 +563,22 @@ static void search_by_key_reada(struct super_block *s,
600 correctness of the top of the path but need not be checked for the 563 correctness of the top of the path but need not be checked for the
601 correctness of the bottom of the path */ 564 correctness of the bottom of the path */
602/* The function is NOT SCHEDULE-SAFE! */ 565/* The function is NOT SCHEDULE-SAFE! */
603int search_by_key(struct super_block *p_s_sb, const struct cpu_key *p_s_key, /* Key to search. */ 566int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to search. */
604 struct treepath *p_s_search_path,/* This structure was 567 struct treepath *search_path,/* This structure was
605 allocated and initialized 568 allocated and initialized
606 by the calling 569 by the calling
607 function. It is filled up 570 function. It is filled up
608 by this function. */ 571 by this function. */
609 int n_stop_level /* How far down the tree to search. To 572 int stop_level /* How far down the tree to search. To
610 stop at leaf level - set to 573 stop at leaf level - set to
611 DISK_LEAF_NODE_LEVEL */ 574 DISK_LEAF_NODE_LEVEL */
612 ) 575 )
613{ 576{
614 b_blocknr_t n_block_number; 577 b_blocknr_t block_number;
615 int expected_level; 578 int expected_level;
616 struct buffer_head *p_s_bh; 579 struct buffer_head *bh;
617 struct path_element *p_s_last_element; 580 struct path_element *last_element;
618 int n_node_level, n_retval; 581 int node_level, retval;
619 int right_neighbor_of_leaf_node; 582 int right_neighbor_of_leaf_node;
620 int fs_gen; 583 int fs_gen;
621 struct buffer_head *reada_bh[SEARCH_BY_KEY_READA]; 584 struct buffer_head *reada_bh[SEARCH_BY_KEY_READA];
@@ -623,80 +586,79 @@ int search_by_key(struct super_block *p_s_sb, const struct cpu_key *p_s_key, /*
623 int reada_count = 0; 586 int reada_count = 0;
624 587
625#ifdef CONFIG_REISERFS_CHECK 588#ifdef CONFIG_REISERFS_CHECK
626 int n_repeat_counter = 0; 589 int repeat_counter = 0;
627#endif 590#endif
628 591
629 PROC_INFO_INC(p_s_sb, search_by_key); 592 PROC_INFO_INC(sb, search_by_key);
630 593
631 /* As we add each node to a path we increase its count. This means that 594 /* As we add each node to a path we increase its count. This means that
632 we must be careful to release all nodes in a path before we either 595 we must be careful to release all nodes in a path before we either
633 discard the path struct or re-use the path struct, as we do here. */ 596 discard the path struct or re-use the path struct, as we do here. */
634 597
635 decrement_counters_in_path(p_s_search_path); 598 pathrelse(search_path);
636 599
637 right_neighbor_of_leaf_node = 0; 600 right_neighbor_of_leaf_node = 0;
638 601
639 /* With each iteration of this loop we search through the items in the 602 /* With each iteration of this loop we search through the items in the
640 current node, and calculate the next current node(next path element) 603 current node, and calculate the next current node(next path element)
641 for the next iteration of this loop.. */ 604 for the next iteration of this loop.. */
642 n_block_number = SB_ROOT_BLOCK(p_s_sb); 605 block_number = SB_ROOT_BLOCK(sb);
643 expected_level = -1; 606 expected_level = -1;
644 while (1) { 607 while (1) {
645 608
646#ifdef CONFIG_REISERFS_CHECK 609#ifdef CONFIG_REISERFS_CHECK
647 if (!(++n_repeat_counter % 50000)) 610 if (!(++repeat_counter % 50000))
648 reiserfs_warning(p_s_sb, "PAP-5100: search_by_key: %s:" 611 reiserfs_warning(sb, "PAP-5100",
649 "there were %d iterations of while loop " 612 "%s: there were %d iterations of "
650 "looking for key %K", 613 "while loop looking for key %K",
651 current->comm, n_repeat_counter, 614 current->comm, repeat_counter,
652 p_s_key); 615 key);
653#endif 616#endif
654 617
655 /* prep path to have another element added to it. */ 618 /* prep path to have another element added to it. */
656 p_s_last_element = 619 last_element =
657 PATH_OFFSET_PELEMENT(p_s_search_path, 620 PATH_OFFSET_PELEMENT(search_path,
658 ++p_s_search_path->path_length); 621 ++search_path->path_length);
659 fs_gen = get_generation(p_s_sb); 622 fs_gen = get_generation(sb);
660 623
661 /* Read the next tree node, and set the last element in the path to 624 /* Read the next tree node, and set the last element in the path to
662 have a pointer to it. */ 625 have a pointer to it. */
663 if ((p_s_bh = p_s_last_element->pe_buffer = 626 if ((bh = last_element->pe_buffer =
664 sb_getblk(p_s_sb, n_block_number))) { 627 sb_getblk(sb, block_number))) {
665 if (!buffer_uptodate(p_s_bh) && reada_count > 1) { 628 if (!buffer_uptodate(bh) && reada_count > 1)
666 search_by_key_reada(p_s_sb, reada_bh, 629 search_by_key_reada(sb, reada_bh,
667 reada_blocks, reada_count); 630 reada_blocks, reada_count);
668 } 631 ll_rw_block(READ, 1, &bh);
669 ll_rw_block(READ, 1, &p_s_bh); 632 wait_on_buffer(bh);
670 wait_on_buffer(p_s_bh); 633 if (!buffer_uptodate(bh))
671 if (!buffer_uptodate(p_s_bh))
672 goto io_error; 634 goto io_error;
673 } else { 635 } else {
674 io_error: 636 io_error:
675 p_s_search_path->path_length--; 637 search_path->path_length--;
676 pathrelse(p_s_search_path); 638 pathrelse(search_path);
677 return IO_ERROR; 639 return IO_ERROR;
678 } 640 }
679 reada_count = 0; 641 reada_count = 0;
680 if (expected_level == -1) 642 if (expected_level == -1)
681 expected_level = SB_TREE_HEIGHT(p_s_sb); 643 expected_level = SB_TREE_HEIGHT(sb);
682 expected_level--; 644 expected_level--;
683 645
684 /* It is possible that schedule occurred. We must check whether the key 646 /* It is possible that schedule occurred. We must check whether the key
685 to search is still in the tree rooted from the current buffer. If 647 to search is still in the tree rooted from the current buffer. If
686 not then repeat search from the root. */ 648 not then repeat search from the root. */
687 if (fs_changed(fs_gen, p_s_sb) && 649 if (fs_changed(fs_gen, sb) &&
688 (!B_IS_IN_TREE(p_s_bh) || 650 (!B_IS_IN_TREE(bh) ||
689 B_LEVEL(p_s_bh) != expected_level || 651 B_LEVEL(bh) != expected_level ||
690 !key_in_buffer(p_s_search_path, p_s_key, p_s_sb))) { 652 !key_in_buffer(search_path, key, sb))) {
691 PROC_INFO_INC(p_s_sb, search_by_key_fs_changed); 653 PROC_INFO_INC(sb, search_by_key_fs_changed);
692 PROC_INFO_INC(p_s_sb, search_by_key_restarted); 654 PROC_INFO_INC(sb, search_by_key_restarted);
693 PROC_INFO_INC(p_s_sb, 655 PROC_INFO_INC(sb,
694 sbk_restarted[expected_level - 1]); 656 sbk_restarted[expected_level - 1]);
695 decrement_counters_in_path(p_s_search_path); 657 pathrelse(search_path);
696 658
697 /* Get the root block number so that we can repeat the search 659 /* Get the root block number so that we can repeat the search
698 starting from the root. */ 660 starting from the root. */
699 n_block_number = SB_ROOT_BLOCK(p_s_sb); 661 block_number = SB_ROOT_BLOCK(sb);
700 expected_level = -1; 662 expected_level = -1;
701 right_neighbor_of_leaf_node = 0; 663 right_neighbor_of_leaf_node = 0;
702 664
@@ -704,53 +666,53 @@ int search_by_key(struct super_block *p_s_sb, const struct cpu_key *p_s_key, /*
704 continue; 666 continue;
705 } 667 }
706 668
707 /* only check that the key is in the buffer if p_s_key is not 669 /* only check that the key is in the buffer if key is not
708 equal to the MAX_KEY. Latter case is only possible in 670 equal to the MAX_KEY. Latter case is only possible in
709 "finish_unfinished()" processing during mount. */ 671 "finish_unfinished()" processing during mount. */
710 RFALSE(comp_keys(&MAX_KEY, p_s_key) && 672 RFALSE(comp_keys(&MAX_KEY, key) &&
711 !key_in_buffer(p_s_search_path, p_s_key, p_s_sb), 673 !key_in_buffer(search_path, key, sb),
712 "PAP-5130: key is not in the buffer"); 674 "PAP-5130: key is not in the buffer");
713#ifdef CONFIG_REISERFS_CHECK 675#ifdef CONFIG_REISERFS_CHECK
714 if (cur_tb) { 676 if (cur_tb) {
715 print_cur_tb("5140"); 677 print_cur_tb("5140");
716 reiserfs_panic(p_s_sb, 678 reiserfs_panic(sb, "PAP-5140",
717 "PAP-5140: search_by_key: schedule occurred in do_balance!"); 679 "schedule occurred in do_balance!");
718 } 680 }
719#endif 681#endif
720 682
721 // make sure, that the node contents look like a node of 683 // make sure, that the node contents look like a node of
722 // certain level 684 // certain level
723 if (!is_tree_node(p_s_bh, expected_level)) { 685 if (!is_tree_node(bh, expected_level)) {
724 reiserfs_warning(p_s_sb, "vs-5150: search_by_key: " 686 reiserfs_error(sb, "vs-5150",
725 "invalid format found in block %ld. Fsck?", 687 "invalid format found in block %ld. "
726 p_s_bh->b_blocknr); 688 "Fsck?", bh->b_blocknr);
727 pathrelse(p_s_search_path); 689 pathrelse(search_path);
728 return IO_ERROR; 690 return IO_ERROR;
729 } 691 }
730 692
731 /* ok, we have acquired next formatted node in the tree */ 693 /* ok, we have acquired next formatted node in the tree */
732 n_node_level = B_LEVEL(p_s_bh); 694 node_level = B_LEVEL(bh);
733 695
734 PROC_INFO_BH_STAT(p_s_sb, p_s_bh, n_node_level - 1); 696 PROC_INFO_BH_STAT(sb, bh, node_level - 1);
735 697
736 RFALSE(n_node_level < n_stop_level, 698 RFALSE(node_level < stop_level,
737 "vs-5152: tree level (%d) is less than stop level (%d)", 699 "vs-5152: tree level (%d) is less than stop level (%d)",
738 n_node_level, n_stop_level); 700 node_level, stop_level);
739 701
740 n_retval = bin_search(p_s_key, B_N_PITEM_HEAD(p_s_bh, 0), 702 retval = bin_search(key, B_N_PITEM_HEAD(bh, 0),
741 B_NR_ITEMS(p_s_bh), 703 B_NR_ITEMS(bh),
742 (n_node_level == 704 (node_level ==
743 DISK_LEAF_NODE_LEVEL) ? IH_SIZE : 705 DISK_LEAF_NODE_LEVEL) ? IH_SIZE :
744 KEY_SIZE, 706 KEY_SIZE,
745 &(p_s_last_element->pe_position)); 707 &(last_element->pe_position));
746 if (n_node_level == n_stop_level) { 708 if (node_level == stop_level) {
747 return n_retval; 709 return retval;
748 } 710 }
749 711
750 /* we are not in the stop level */ 712 /* we are not in the stop level */
751 if (n_retval == ITEM_FOUND) 713 if (retval == ITEM_FOUND)
752 /* item has been found, so we choose the pointer which is to the right of the found one */ 714 /* item has been found, so we choose the pointer which is to the right of the found one */
753 p_s_last_element->pe_position++; 715 last_element->pe_position++;
754 716
755 /* if item was not found we choose the position which is to 717 /* if item was not found we choose the position which is to
756 the left of the found item. This requires no code, 718 the left of the found item. This requires no code,
@@ -759,24 +721,24 @@ int search_by_key(struct super_block *p_s_sb, const struct cpu_key *p_s_key, /*
759 /* So we have chosen a position in the current node which is 721 /* So we have chosen a position in the current node which is
760 an internal node. Now we calculate child block number by 722 an internal node. Now we calculate child block number by
761 position in the node. */ 723 position in the node. */
762 n_block_number = 724 block_number =
763 B_N_CHILD_NUM(p_s_bh, p_s_last_element->pe_position); 725 B_N_CHILD_NUM(bh, last_element->pe_position);
764 726
765 /* if we are going to read leaf nodes, try for read ahead as well */ 727 /* if we are going to read leaf nodes, try for read ahead as well */
766 if ((p_s_search_path->reada & PATH_READA) && 728 if ((search_path->reada & PATH_READA) &&
767 n_node_level == DISK_LEAF_NODE_LEVEL + 1) { 729 node_level == DISK_LEAF_NODE_LEVEL + 1) {
768 int pos = p_s_last_element->pe_position; 730 int pos = last_element->pe_position;
769 int limit = B_NR_ITEMS(p_s_bh); 731 int limit = B_NR_ITEMS(bh);
770 struct reiserfs_key *le_key; 732 struct reiserfs_key *le_key;
771 733
772 if (p_s_search_path->reada & PATH_READA_BACK) 734 if (search_path->reada & PATH_READA_BACK)
773 limit = 0; 735 limit = 0;
774 while (reada_count < SEARCH_BY_KEY_READA) { 736 while (reada_count < SEARCH_BY_KEY_READA) {
775 if (pos == limit) 737 if (pos == limit)
776 break; 738 break;
777 reada_blocks[reada_count++] = 739 reada_blocks[reada_count++] =
778 B_N_CHILD_NUM(p_s_bh, pos); 740 B_N_CHILD_NUM(bh, pos);
779 if (p_s_search_path->reada & PATH_READA_BACK) 741 if (search_path->reada & PATH_READA_BACK)
780 pos--; 742 pos--;
781 else 743 else
782 pos++; 744 pos++;
@@ -784,9 +746,9 @@ int search_by_key(struct super_block *p_s_sb, const struct cpu_key *p_s_key, /*
784 /* 746 /*
785 * check to make sure we're in the same object 747 * check to make sure we're in the same object
786 */ 748 */
787 le_key = B_N_PDELIM_KEY(p_s_bh, pos); 749 le_key = B_N_PDELIM_KEY(bh, pos);
788 if (le32_to_cpu(le_key->k_objectid) != 750 if (le32_to_cpu(le_key->k_objectid) !=
789 p_s_key->on_disk_key.k_objectid) { 751 key->on_disk_key.k_objectid) {
790 break; 752 break;
791 } 753 }
792 } 754 }
@@ -795,11 +757,11 @@ int search_by_key(struct super_block *p_s_sb, const struct cpu_key *p_s_key, /*
795} 757}
796 758
797/* Form the path to an item and position in this item which contains 759/* Form the path to an item and position in this item which contains
798 file byte defined by p_s_key. If there is no such item 760 file byte defined by key. If there is no such item
799 corresponding to the key, we point the path to the item with 761 corresponding to the key, we point the path to the item with
800 maximal key less than p_s_key, and *p_n_pos_in_item is set to one 762 maximal key less than key, and *pos_in_item is set to one
801 past the last entry/byte in the item. If searching for entry in a 763 past the last entry/byte in the item. If searching for entry in a
802 directory item, and it is not found, *p_n_pos_in_item is set to one 764 directory item, and it is not found, *pos_in_item is set to one
803 entry more than the entry with maximal key which is less than the 765 entry more than the entry with maximal key which is less than the
804 sought key. 766 sought key.
805 767
@@ -810,48 +772,48 @@ int search_by_key(struct super_block *p_s_sb, const struct cpu_key *p_s_key, /*
810 units of directory entries. */ 772 units of directory entries. */
811 773
812/* The function is NOT SCHEDULE-SAFE! */ 774/* The function is NOT SCHEDULE-SAFE! */
813int search_for_position_by_key(struct super_block *p_s_sb, /* Pointer to the super block. */ 775int search_for_position_by_key(struct super_block *sb, /* Pointer to the super block. */
814 const struct cpu_key *p_cpu_key, /* Key to search (cpu variable) */ 776 const struct cpu_key *p_cpu_key, /* Key to search (cpu variable) */
815 struct treepath *p_s_search_path /* Filled up by this function. */ 777 struct treepath *search_path /* Filled up by this function. */
816 ) 778 )
817{ 779{
818 struct item_head *p_le_ih; /* pointer to on-disk structure */ 780 struct item_head *p_le_ih; /* pointer to on-disk structure */
819 int n_blk_size; 781 int blk_size;
820 loff_t item_offset, offset; 782 loff_t item_offset, offset;
821 struct reiserfs_dir_entry de; 783 struct reiserfs_dir_entry de;
822 int retval; 784 int retval;
823 785
824 /* If searching for directory entry. */ 786 /* If searching for directory entry. */
825 if (is_direntry_cpu_key(p_cpu_key)) 787 if (is_direntry_cpu_key(p_cpu_key))
826 return search_by_entry_key(p_s_sb, p_cpu_key, p_s_search_path, 788 return search_by_entry_key(sb, p_cpu_key, search_path,
827 &de); 789 &de);
828 790
829 /* If not searching for directory entry. */ 791 /* If not searching for directory entry. */
830 792
831 /* If item is found. */ 793 /* If item is found. */
832 retval = search_item(p_s_sb, p_cpu_key, p_s_search_path); 794 retval = search_item(sb, p_cpu_key, search_path);
833 if (retval == IO_ERROR) 795 if (retval == IO_ERROR)
834 return retval; 796 return retval;
835 if (retval == ITEM_FOUND) { 797 if (retval == ITEM_FOUND) {
836 798
837 RFALSE(!ih_item_len 799 RFALSE(!ih_item_len
838 (B_N_PITEM_HEAD 800 (B_N_PITEM_HEAD
839 (PATH_PLAST_BUFFER(p_s_search_path), 801 (PATH_PLAST_BUFFER(search_path),
840 PATH_LAST_POSITION(p_s_search_path))), 802 PATH_LAST_POSITION(search_path))),
841 "PAP-5165: item length equals zero"); 803 "PAP-5165: item length equals zero");
842 804
843 pos_in_item(p_s_search_path) = 0; 805 pos_in_item(search_path) = 0;
844 return POSITION_FOUND; 806 return POSITION_FOUND;
845 } 807 }
846 808
847 RFALSE(!PATH_LAST_POSITION(p_s_search_path), 809 RFALSE(!PATH_LAST_POSITION(search_path),
848 "PAP-5170: position equals zero"); 810 "PAP-5170: position equals zero");
849 811
850 /* Item is not found. Set path to the previous item. */ 812 /* Item is not found. Set path to the previous item. */
851 p_le_ih = 813 p_le_ih =
852 B_N_PITEM_HEAD(PATH_PLAST_BUFFER(p_s_search_path), 814 B_N_PITEM_HEAD(PATH_PLAST_BUFFER(search_path),
853 --PATH_LAST_POSITION(p_s_search_path)); 815 --PATH_LAST_POSITION(search_path));
854 n_blk_size = p_s_sb->s_blocksize; 816 blk_size = sb->s_blocksize;
855 817
856 if (comp_short_keys(&(p_le_ih->ih_key), p_cpu_key)) { 818 if (comp_short_keys(&(p_le_ih->ih_key), p_cpu_key)) {
857 return FILE_NOT_FOUND; 819 return FILE_NOT_FOUND;
@@ -863,10 +825,10 @@ int search_for_position_by_key(struct super_block *p_s_sb, /* Pointer to the sup
863 825
864 /* Needed byte is contained in the item pointed to by the path. */ 826 /* Needed byte is contained in the item pointed to by the path. */
865 if (item_offset <= offset && 827 if (item_offset <= offset &&
866 item_offset + op_bytes_number(p_le_ih, n_blk_size) > offset) { 828 item_offset + op_bytes_number(p_le_ih, blk_size) > offset) {
867 pos_in_item(p_s_search_path) = offset - item_offset; 829 pos_in_item(search_path) = offset - item_offset;
868 if (is_indirect_le_ih(p_le_ih)) { 830 if (is_indirect_le_ih(p_le_ih)) {
869 pos_in_item(p_s_search_path) /= n_blk_size; 831 pos_in_item(search_path) /= blk_size;
870 } 832 }
871 return POSITION_FOUND; 833 return POSITION_FOUND;
872 } 834 }
@@ -874,30 +836,30 @@ int search_for_position_by_key(struct super_block *p_s_sb, /* Pointer to the sup
874 /* Needed byte is not contained in the item pointed to by the 836 /* Needed byte is not contained in the item pointed to by the
875 path. Set pos_in_item out of the item. */ 837 path. Set pos_in_item out of the item. */
876 if (is_indirect_le_ih(p_le_ih)) 838 if (is_indirect_le_ih(p_le_ih))
877 pos_in_item(p_s_search_path) = 839 pos_in_item(search_path) =
878 ih_item_len(p_le_ih) / UNFM_P_SIZE; 840 ih_item_len(p_le_ih) / UNFM_P_SIZE;
879 else 841 else
880 pos_in_item(p_s_search_path) = ih_item_len(p_le_ih); 842 pos_in_item(search_path) = ih_item_len(p_le_ih);
881 843
882 return POSITION_NOT_FOUND; 844 return POSITION_NOT_FOUND;
883} 845}
884 846
885/* Compare given item and item pointed to by the path. */ 847/* Compare given item and item pointed to by the path. */
886int comp_items(const struct item_head *stored_ih, const struct treepath *p_s_path) 848int comp_items(const struct item_head *stored_ih, const struct treepath *path)
887{ 849{
888 struct buffer_head *p_s_bh; 850 struct buffer_head *bh = PATH_PLAST_BUFFER(path);
889 struct item_head *ih; 851 struct item_head *ih;
890 852
891 /* Last buffer at the path is not in the tree. */ 853 /* Last buffer at the path is not in the tree. */
892 if (!B_IS_IN_TREE(p_s_bh = PATH_PLAST_BUFFER(p_s_path))) 854 if (!B_IS_IN_TREE(bh))
893 return 1; 855 return 1;
894 856
895 /* Last path position is invalid. */ 857 /* Last path position is invalid. */
896 if (PATH_LAST_POSITION(p_s_path) >= B_NR_ITEMS(p_s_bh)) 858 if (PATH_LAST_POSITION(path) >= B_NR_ITEMS(bh))
897 return 1; 859 return 1;
898 860
899 /* we need only to know, whether it is the same item */ 861 /* we need only to know, whether it is the same item */
900 ih = get_ih(p_s_path); 862 ih = get_ih(path);
901 return memcmp(stored_ih, ih, IH_SIZE); 863 return memcmp(stored_ih, ih, IH_SIZE);
902} 864}
903 865
@@ -924,9 +886,9 @@ static inline int prepare_for_direct_item(struct treepath *path,
924 } 886 }
925 // new file gets truncated 887 // new file gets truncated
926 if (get_inode_item_key_version(inode) == KEY_FORMAT_3_6) { 888 if (get_inode_item_key_version(inode) == KEY_FORMAT_3_6) {
927 // 889 //
928 round_len = ROUND_UP(new_file_length); 890 round_len = ROUND_UP(new_file_length);
929 /* this was n_new_file_length < le_ih ... */ 891 /* this was new_file_length < le_ih ... */
930 if (round_len < le_ih_k_offset(le_ih)) { 892 if (round_len < le_ih_k_offset(le_ih)) {
931 *cut_size = -(IH_SIZE + ih_item_len(le_ih)); 893 *cut_size = -(IH_SIZE + ih_item_len(le_ih));
932 return M_DELETE; /* Delete this item. */ 894 return M_DELETE; /* Delete this item. */
@@ -986,96 +948,95 @@ static inline int prepare_for_direntry_item(struct treepath *path,
986 In case of file truncate calculate whether this item must be deleted/truncated or last 948 In case of file truncate calculate whether this item must be deleted/truncated or last
987 unformatted node of this item will be converted to a direct item. 949 unformatted node of this item will be converted to a direct item.
988 This function returns a determination of what balance mode the calling function should employ. */ 950 This function returns a determination of what balance mode the calling function should employ. */
989static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, struct inode *inode, struct treepath *p_s_path, const struct cpu_key *p_s_item_key, int *p_n_removed, /* Number of unformatted nodes which were removed 951static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, struct inode *inode, struct treepath *path, const struct cpu_key *item_key, int *removed, /* Number of unformatted nodes which were removed
990 from end of the file. */ 952 from end of the file. */
991 int *p_n_cut_size, unsigned long long n_new_file_length /* MAX_KEY_OFFSET in case of delete. */ 953 int *cut_size, unsigned long long new_file_length /* MAX_KEY_OFFSET in case of delete. */
992 ) 954 )
993{ 955{
994 struct super_block *p_s_sb = inode->i_sb; 956 struct super_block *sb = inode->i_sb;
995 struct item_head *p_le_ih = PATH_PITEM_HEAD(p_s_path); 957 struct item_head *p_le_ih = PATH_PITEM_HEAD(path);
996 struct buffer_head *p_s_bh = PATH_PLAST_BUFFER(p_s_path); 958 struct buffer_head *bh = PATH_PLAST_BUFFER(path);
997 959
998 BUG_ON(!th->t_trans_id); 960 BUG_ON(!th->t_trans_id);
999 961
1000 /* Stat_data item. */ 962 /* Stat_data item. */
1001 if (is_statdata_le_ih(p_le_ih)) { 963 if (is_statdata_le_ih(p_le_ih)) {
1002 964
1003 RFALSE(n_new_file_length != max_reiserfs_offset(inode), 965 RFALSE(new_file_length != max_reiserfs_offset(inode),
1004 "PAP-5210: mode must be M_DELETE"); 966 "PAP-5210: mode must be M_DELETE");
1005 967
1006 *p_n_cut_size = -(IH_SIZE + ih_item_len(p_le_ih)); 968 *cut_size = -(IH_SIZE + ih_item_len(p_le_ih));
1007 return M_DELETE; 969 return M_DELETE;
1008 } 970 }
1009 971
1010 /* Directory item. */ 972 /* Directory item. */
1011 if (is_direntry_le_ih(p_le_ih)) 973 if (is_direntry_le_ih(p_le_ih))
1012 return prepare_for_direntry_item(p_s_path, p_le_ih, inode, 974 return prepare_for_direntry_item(path, p_le_ih, inode,
1013 n_new_file_length, 975 new_file_length,
1014 p_n_cut_size); 976 cut_size);
1015 977
1016 /* Direct item. */ 978 /* Direct item. */
1017 if (is_direct_le_ih(p_le_ih)) 979 if (is_direct_le_ih(p_le_ih))
1018 return prepare_for_direct_item(p_s_path, p_le_ih, inode, 980 return prepare_for_direct_item(path, p_le_ih, inode,
1019 n_new_file_length, p_n_cut_size); 981 new_file_length, cut_size);
1020 982
1021 /* Case of an indirect item. */ 983 /* Case of an indirect item. */
1022 { 984 {
1023 int blk_size = p_s_sb->s_blocksize; 985 int blk_size = sb->s_blocksize;
1024 struct item_head s_ih; 986 struct item_head s_ih;
1025 int need_re_search; 987 int need_re_search;
1026 int delete = 0; 988 int delete = 0;
1027 int result = M_CUT; 989 int result = M_CUT;
1028 int pos = 0; 990 int pos = 0;
1029 991
1030 if ( n_new_file_length == max_reiserfs_offset (inode) ) { 992 if ( new_file_length == max_reiserfs_offset (inode) ) {
1031 /* prepare_for_delete_or_cut() is called by 993 /* prepare_for_delete_or_cut() is called by
1032 * reiserfs_delete_item() */ 994 * reiserfs_delete_item() */
1033 n_new_file_length = 0; 995 new_file_length = 0;
1034 delete = 1; 996 delete = 1;
1035 } 997 }
1036 998
1037 do { 999 do {
1038 need_re_search = 0; 1000 need_re_search = 0;
1039 *p_n_cut_size = 0; 1001 *cut_size = 0;
1040 p_s_bh = PATH_PLAST_BUFFER(p_s_path); 1002 bh = PATH_PLAST_BUFFER(path);
1041 copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path)); 1003 copy_item_head(&s_ih, PATH_PITEM_HEAD(path));
1042 pos = I_UNFM_NUM(&s_ih); 1004 pos = I_UNFM_NUM(&s_ih);
1043 1005
1044 while (le_ih_k_offset (&s_ih) + (pos - 1) * blk_size > n_new_file_length) { 1006 while (le_ih_k_offset (&s_ih) + (pos - 1) * blk_size > new_file_length) {
1045 __le32 *unfm; 1007 __le32 *unfm;
1046 __u32 block; 1008 __u32 block;
1047 1009
1048 /* Each unformatted block deletion may involve one additional 1010 /* Each unformatted block deletion may involve one additional
1049 * bitmap block into the transaction, thereby the initial 1011 * bitmap block into the transaction, thereby the initial
1050 * journal space reservation might not be enough. */ 1012 * journal space reservation might not be enough. */
1051 if (!delete && (*p_n_cut_size) != 0 && 1013 if (!delete && (*cut_size) != 0 &&
1052 reiserfs_transaction_free_space(th) < JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) { 1014 reiserfs_transaction_free_space(th) < JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD)
1053 break; 1015 break;
1054 }
1055 1016
1056 unfm = (__le32 *)B_I_PITEM(p_s_bh, &s_ih) + pos - 1; 1017 unfm = (__le32 *)B_I_PITEM(bh, &s_ih) + pos - 1;
1057 block = get_block_num(unfm, 0); 1018 block = get_block_num(unfm, 0);
1058 1019
1059 if (block != 0) { 1020 if (block != 0) {
1060 reiserfs_prepare_for_journal(p_s_sb, p_s_bh, 1); 1021 reiserfs_prepare_for_journal(sb, bh, 1);
1061 put_block_num(unfm, 0, 0); 1022 put_block_num(unfm, 0, 0);
1062 journal_mark_dirty (th, p_s_sb, p_s_bh); 1023 journal_mark_dirty(th, sb, bh);
1063 reiserfs_free_block(th, inode, block, 1); 1024 reiserfs_free_block(th, inode, block, 1);
1064 } 1025 }
1065 1026
1066 cond_resched(); 1027 cond_resched();
1067 1028
1068 if (item_moved (&s_ih, p_s_path)) { 1029 if (item_moved (&s_ih, path)) {
1069 need_re_search = 1; 1030 need_re_search = 1;
1070 break; 1031 break;
1071 } 1032 }
1072 1033
1073 pos --; 1034 pos --;
1074 (*p_n_removed) ++; 1035 (*removed)++;
1075 (*p_n_cut_size) -= UNFM_P_SIZE; 1036 (*cut_size) -= UNFM_P_SIZE;
1076 1037
1077 if (pos == 0) { 1038 if (pos == 0) {
1078 (*p_n_cut_size) -= IH_SIZE; 1039 (*cut_size) -= IH_SIZE;
1079 result = M_DELETE; 1040 result = M_DELETE;
1080 break; 1041 break;
1081 } 1042 }
@@ -1083,12 +1044,12 @@ static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, st
1083 /* a trick. If the buffer has been logged, this will do nothing. If 1044 /* a trick. If the buffer has been logged, this will do nothing. If
1084 ** we've broken the loop without logging it, it will restore the 1045 ** we've broken the loop without logging it, it will restore the
1085 ** buffer */ 1046 ** buffer */
1086 reiserfs_restore_prepared_buffer(p_s_sb, p_s_bh); 1047 reiserfs_restore_prepared_buffer(sb, bh);
1087 } while (need_re_search && 1048 } while (need_re_search &&
1088 search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path) == POSITION_FOUND); 1049 search_for_position_by_key(sb, item_key, path) == POSITION_FOUND);
1089 pos_in_item(p_s_path) = pos * UNFM_P_SIZE; 1050 pos_in_item(path) = pos * UNFM_P_SIZE;
1090 1051
1091 if (*p_n_cut_size == 0) { 1052 if (*cut_size == 0) {
1092 /* Nothing were cut. maybe convert last unformatted node to the 1053 /* Nothing were cut. maybe convert last unformatted node to the
1093 * direct item? */ 1054 * direct item? */
1094 result = M_CONVERT; 1055 result = M_CONVERT;
@@ -1098,45 +1059,45 @@ static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, st
1098} 1059}
1099 1060
1100/* Calculate number of bytes which will be deleted or cut during balance */ 1061/* Calculate number of bytes which will be deleted or cut during balance */
1101static int calc_deleted_bytes_number(struct tree_balance *p_s_tb, char c_mode) 1062static int calc_deleted_bytes_number(struct tree_balance *tb, char mode)
1102{ 1063{
1103 int n_del_size; 1064 int del_size;
1104 struct item_head *p_le_ih = PATH_PITEM_HEAD(p_s_tb->tb_path); 1065 struct item_head *p_le_ih = PATH_PITEM_HEAD(tb->tb_path);
1105 1066
1106 if (is_statdata_le_ih(p_le_ih)) 1067 if (is_statdata_le_ih(p_le_ih))
1107 return 0; 1068 return 0;
1108 1069
1109 n_del_size = 1070 del_size =
1110 (c_mode == 1071 (mode ==
1111 M_DELETE) ? ih_item_len(p_le_ih) : -p_s_tb->insert_size[0]; 1072 M_DELETE) ? ih_item_len(p_le_ih) : -tb->insert_size[0];
1112 if (is_direntry_le_ih(p_le_ih)) { 1073 if (is_direntry_le_ih(p_le_ih)) {
1113 // return EMPTY_DIR_SIZE; /* We delete emty directoris only. */ 1074 /* return EMPTY_DIR_SIZE; We delete emty directoris only.
1114 // we can't use EMPTY_DIR_SIZE, as old format dirs have a different 1075 * we can't use EMPTY_DIR_SIZE, as old format dirs have a different
1115 // empty size. ick. FIXME, is this right? 1076 * empty size. ick. FIXME, is this right? */
1116 // 1077 return del_size;
1117 return n_del_size;
1118 } 1078 }
1119 1079
1120 if (is_indirect_le_ih(p_le_ih)) 1080 if (is_indirect_le_ih(p_le_ih))
1121 n_del_size = (n_del_size / UNFM_P_SIZE) * (PATH_PLAST_BUFFER(p_s_tb->tb_path)->b_size); // - get_ih_free_space (p_le_ih); 1081 del_size = (del_size / UNFM_P_SIZE) *
1122 return n_del_size; 1082 (PATH_PLAST_BUFFER(tb->tb_path)->b_size);
1083 return del_size;
1123} 1084}
1124 1085
1125static void init_tb_struct(struct reiserfs_transaction_handle *th, 1086static void init_tb_struct(struct reiserfs_transaction_handle *th,
1126 struct tree_balance *p_s_tb, 1087 struct tree_balance *tb,
1127 struct super_block *p_s_sb, 1088 struct super_block *sb,
1128 struct treepath *p_s_path, int n_size) 1089 struct treepath *path, int size)
1129{ 1090{
1130 1091
1131 BUG_ON(!th->t_trans_id); 1092 BUG_ON(!th->t_trans_id);
1132 1093
1133 memset(p_s_tb, '\0', sizeof(struct tree_balance)); 1094 memset(tb, '\0', sizeof(struct tree_balance));
1134 p_s_tb->transaction_handle = th; 1095 tb->transaction_handle = th;
1135 p_s_tb->tb_sb = p_s_sb; 1096 tb->tb_sb = sb;
1136 p_s_tb->tb_path = p_s_path; 1097 tb->tb_path = path;
1137 PATH_OFFSET_PBUFFER(p_s_path, ILLEGAL_PATH_ELEMENT_OFFSET) = NULL; 1098 PATH_OFFSET_PBUFFER(path, ILLEGAL_PATH_ELEMENT_OFFSET) = NULL;
1138 PATH_OFFSET_POSITION(p_s_path, ILLEGAL_PATH_ELEMENT_OFFSET) = 0; 1099 PATH_OFFSET_POSITION(path, ILLEGAL_PATH_ELEMENT_OFFSET) = 0;
1139 p_s_tb->insert_size[0] = n_size; 1100 tb->insert_size[0] = size;
1140} 1101}
1141 1102
1142void padd_item(char *item, int total_length, int length) 1103void padd_item(char *item, int total_length, int length)
@@ -1175,73 +1136,77 @@ char head2type(struct item_head *ih)
1175} 1136}
1176#endif 1137#endif
1177 1138
1178/* Delete object item. */ 1139/* Delete object item.
1179int reiserfs_delete_item(struct reiserfs_transaction_handle *th, struct treepath *p_s_path, /* Path to the deleted item. */ 1140 * th - active transaction handle
1180 const struct cpu_key *p_s_item_key, /* Key to search for the deleted item. */ 1141 * path - path to the deleted item
1181 struct inode *p_s_inode, /* inode is here just to update i_blocks and quotas */ 1142 * item_key - key to search for the deleted item
1182 struct buffer_head *p_s_un_bh) 1143 * indode - used for updating i_blocks and quotas
1183{ /* NULL or unformatted node pointer. */ 1144 * un_bh - NULL or unformatted node pointer
1184 struct super_block *p_s_sb = p_s_inode->i_sb; 1145 */
1146int reiserfs_delete_item(struct reiserfs_transaction_handle *th,
1147 struct treepath *path, const struct cpu_key *item_key,
1148 struct inode *inode, struct buffer_head *un_bh)
1149{
1150 struct super_block *sb = inode->i_sb;
1185 struct tree_balance s_del_balance; 1151 struct tree_balance s_del_balance;
1186 struct item_head s_ih; 1152 struct item_head s_ih;
1187 struct item_head *q_ih; 1153 struct item_head *q_ih;
1188 int quota_cut_bytes; 1154 int quota_cut_bytes;
1189 int n_ret_value, n_del_size, n_removed; 1155 int ret_value, del_size, removed;
1190 1156
1191#ifdef CONFIG_REISERFS_CHECK 1157#ifdef CONFIG_REISERFS_CHECK
1192 char c_mode; 1158 char mode;
1193 int n_iter = 0; 1159 int iter = 0;
1194#endif 1160#endif
1195 1161
1196 BUG_ON(!th->t_trans_id); 1162 BUG_ON(!th->t_trans_id);
1197 1163
1198 init_tb_struct(th, &s_del_balance, p_s_sb, p_s_path, 1164 init_tb_struct(th, &s_del_balance, sb, path,
1199 0 /*size is unknown */ ); 1165 0 /*size is unknown */ );
1200 1166
1201 while (1) { 1167 while (1) {
1202 n_removed = 0; 1168 removed = 0;
1203 1169
1204#ifdef CONFIG_REISERFS_CHECK 1170#ifdef CONFIG_REISERFS_CHECK
1205 n_iter++; 1171 iter++;
1206 c_mode = 1172 mode =
1207#endif 1173#endif
1208 prepare_for_delete_or_cut(th, p_s_inode, p_s_path, 1174 prepare_for_delete_or_cut(th, inode, path,
1209 p_s_item_key, &n_removed, 1175 item_key, &removed,
1210 &n_del_size, 1176 &del_size,
1211 max_reiserfs_offset(p_s_inode)); 1177 max_reiserfs_offset(inode));
1212 1178
1213 RFALSE(c_mode != M_DELETE, "PAP-5320: mode must be M_DELETE"); 1179 RFALSE(mode != M_DELETE, "PAP-5320: mode must be M_DELETE");
1214 1180
1215 copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path)); 1181 copy_item_head(&s_ih, PATH_PITEM_HEAD(path));
1216 s_del_balance.insert_size[0] = n_del_size; 1182 s_del_balance.insert_size[0] = del_size;
1217 1183
1218 n_ret_value = fix_nodes(M_DELETE, &s_del_balance, NULL, NULL); 1184 ret_value = fix_nodes(M_DELETE, &s_del_balance, NULL, NULL);
1219 if (n_ret_value != REPEAT_SEARCH) 1185 if (ret_value != REPEAT_SEARCH)
1220 break; 1186 break;
1221 1187
1222 PROC_INFO_INC(p_s_sb, delete_item_restarted); 1188 PROC_INFO_INC(sb, delete_item_restarted);
1223 1189
1224 // file system changed, repeat search 1190 // file system changed, repeat search
1225 n_ret_value = 1191 ret_value =
1226 search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path); 1192 search_for_position_by_key(sb, item_key, path);
1227 if (n_ret_value == IO_ERROR) 1193 if (ret_value == IO_ERROR)
1228 break; 1194 break;
1229 if (n_ret_value == FILE_NOT_FOUND) { 1195 if (ret_value == FILE_NOT_FOUND) {
1230 reiserfs_warning(p_s_sb, 1196 reiserfs_warning(sb, "vs-5340",
1231 "vs-5340: reiserfs_delete_item: "
1232 "no items of the file %K found", 1197 "no items of the file %K found",
1233 p_s_item_key); 1198 item_key);
1234 break; 1199 break;
1235 } 1200 }
1236 } /* while (1) */ 1201 } /* while (1) */
1237 1202
1238 if (n_ret_value != CARRY_ON) { 1203 if (ret_value != CARRY_ON) {
1239 unfix_nodes(&s_del_balance); 1204 unfix_nodes(&s_del_balance);
1240 return 0; 1205 return 0;
1241 } 1206 }
1242 // reiserfs_delete_item returns item length when success 1207 // reiserfs_delete_item returns item length when success
1243 n_ret_value = calc_deleted_bytes_number(&s_del_balance, M_DELETE); 1208 ret_value = calc_deleted_bytes_number(&s_del_balance, M_DELETE);
1244 q_ih = get_ih(p_s_path); 1209 q_ih = get_ih(path);
1245 quota_cut_bytes = ih_item_len(q_ih); 1210 quota_cut_bytes = ih_item_len(q_ih);
1246 1211
1247 /* hack so the quota code doesn't have to guess if the file 1212 /* hack so the quota code doesn't have to guess if the file
@@ -1250,15 +1215,15 @@ int reiserfs_delete_item(struct reiserfs_transaction_handle *th, struct treepath
1250 ** split into multiple items, and we only want to decrement for 1215 ** split into multiple items, and we only want to decrement for
1251 ** the unfm node once 1216 ** the unfm node once
1252 */ 1217 */
1253 if (!S_ISLNK(p_s_inode->i_mode) && is_direct_le_ih(q_ih)) { 1218 if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(q_ih)) {
1254 if ((le_ih_k_offset(q_ih) & (p_s_sb->s_blocksize - 1)) == 1) { 1219 if ((le_ih_k_offset(q_ih) & (sb->s_blocksize - 1)) == 1) {
1255 quota_cut_bytes = p_s_sb->s_blocksize + UNFM_P_SIZE; 1220 quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE;
1256 } else { 1221 } else {
1257 quota_cut_bytes = 0; 1222 quota_cut_bytes = 0;
1258 } 1223 }
1259 } 1224 }
1260 1225
1261 if (p_s_un_bh) { 1226 if (un_bh) {
1262 int off; 1227 int off;
1263 char *data; 1228 char *data;
1264 1229
@@ -1276,31 +1241,31 @@ int reiserfs_delete_item(struct reiserfs_transaction_handle *th, struct treepath
1276 ** The unformatted node must be dirtied later on. We can't be 1241 ** The unformatted node must be dirtied later on. We can't be
1277 ** sure here if the entire tail has been deleted yet. 1242 ** sure here if the entire tail has been deleted yet.
1278 ** 1243 **
1279 ** p_s_un_bh is from the page cache (all unformatted nodes are 1244 ** un_bh is from the page cache (all unformatted nodes are
1280 ** from the page cache) and might be a highmem page. So, we 1245 ** from the page cache) and might be a highmem page. So, we
1281 ** can't use p_s_un_bh->b_data. 1246 ** can't use un_bh->b_data.
1282 ** -clm 1247 ** -clm
1283 */ 1248 */
1284 1249
1285 data = kmap_atomic(p_s_un_bh->b_page, KM_USER0); 1250 data = kmap_atomic(un_bh->b_page, KM_USER0);
1286 off = ((le_ih_k_offset(&s_ih) - 1) & (PAGE_CACHE_SIZE - 1)); 1251 off = ((le_ih_k_offset(&s_ih) - 1) & (PAGE_CACHE_SIZE - 1));
1287 memcpy(data + off, 1252 memcpy(data + off,
1288 B_I_PITEM(PATH_PLAST_BUFFER(p_s_path), &s_ih), 1253 B_I_PITEM(PATH_PLAST_BUFFER(path), &s_ih),
1289 n_ret_value); 1254 ret_value);
1290 kunmap_atomic(data, KM_USER0); 1255 kunmap_atomic(data, KM_USER0);
1291 } 1256 }
1292 /* Perform balancing after all resources have been collected at once. */ 1257 /* Perform balancing after all resources have been collected at once. */
1293 do_balance(&s_del_balance, NULL, NULL, M_DELETE); 1258 do_balance(&s_del_balance, NULL, NULL, M_DELETE);
1294 1259
1295#ifdef REISERQUOTA_DEBUG 1260#ifdef REISERQUOTA_DEBUG
1296 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, 1261 reiserfs_debug(sb, REISERFS_DEBUG_CODE,
1297 "reiserquota delete_item(): freeing %u, id=%u type=%c", 1262 "reiserquota delete_item(): freeing %u, id=%u type=%c",
1298 quota_cut_bytes, p_s_inode->i_uid, head2type(&s_ih)); 1263 quota_cut_bytes, inode->i_uid, head2type(&s_ih));
1299#endif 1264#endif
1300 vfs_dq_free_space_nodirty(p_s_inode, quota_cut_bytes); 1265 vfs_dq_free_space_nodirty(inode, quota_cut_bytes);
1301 1266
1302 /* Return deleted body length */ 1267 /* Return deleted body length */
1303 return n_ret_value; 1268 return ret_value;
1304} 1269}
1305 1270
1306/* Summary Of Mechanisms For Handling Collisions Between Processes: 1271/* Summary Of Mechanisms For Handling Collisions Between Processes:
@@ -1338,10 +1303,9 @@ void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th,
1338 while (1) { 1303 while (1) {
1339 retval = search_item(th->t_super, &cpu_key, &path); 1304 retval = search_item(th->t_super, &cpu_key, &path);
1340 if (retval == IO_ERROR) { 1305 if (retval == IO_ERROR) {
1341 reiserfs_warning(th->t_super, 1306 reiserfs_error(th->t_super, "vs-5350",
1342 "vs-5350: reiserfs_delete_solid_item: " 1307 "i/o failure occurred trying "
1343 "i/o failure occurred trying to delete %K", 1308 "to delete %K", &cpu_key);
1344 &cpu_key);
1345 break; 1309 break;
1346 } 1310 }
1347 if (retval != ITEM_FOUND) { 1311 if (retval != ITEM_FOUND) {
@@ -1355,9 +1319,8 @@ void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th,
1355 GET_GENERATION_NUMBER(le_key_k_offset 1319 GET_GENERATION_NUMBER(le_key_k_offset
1356 (le_key_version(key), 1320 (le_key_version(key),
1357 key)) == 1)) 1321 key)) == 1))
1358 reiserfs_warning(th->t_super, 1322 reiserfs_warning(th->t_super, "vs-5355",
1359 "vs-5355: reiserfs_delete_solid_item: %k not found", 1323 "%k not found", key);
1360 key);
1361 break; 1324 break;
1362 } 1325 }
1363 if (!tb_init) { 1326 if (!tb_init) {
@@ -1389,8 +1352,7 @@ void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th,
1389 break; 1352 break;
1390 } 1353 }
1391 // IO_ERROR, NO_DISK_SPACE, etc 1354 // IO_ERROR, NO_DISK_SPACE, etc
1392 reiserfs_warning(th->t_super, 1355 reiserfs_warning(th->t_super, "vs-5360",
1393 "vs-5360: reiserfs_delete_solid_item: "
1394 "could not delete %K due to fix_nodes failure", 1356 "could not delete %K due to fix_nodes failure",
1395 &cpu_key); 1357 &cpu_key);
1396 unfix_nodes(&tb); 1358 unfix_nodes(&tb);
@@ -1462,36 +1424,37 @@ static void unmap_buffers(struct page *page, loff_t pos)
1462} 1424}
1463 1425
1464static int maybe_indirect_to_direct(struct reiserfs_transaction_handle *th, 1426static int maybe_indirect_to_direct(struct reiserfs_transaction_handle *th,
1465 struct inode *p_s_inode, 1427 struct inode *inode,
1466 struct page *page, 1428 struct page *page,
1467 struct treepath *p_s_path, 1429 struct treepath *path,
1468 const struct cpu_key *p_s_item_key, 1430 const struct cpu_key *item_key,
1469 loff_t n_new_file_size, char *p_c_mode) 1431 loff_t new_file_size, char *mode)
1470{ 1432{
1471 struct super_block *p_s_sb = p_s_inode->i_sb; 1433 struct super_block *sb = inode->i_sb;
1472 int n_block_size = p_s_sb->s_blocksize; 1434 int block_size = sb->s_blocksize;
1473 int cut_bytes; 1435 int cut_bytes;
1474 BUG_ON(!th->t_trans_id); 1436 BUG_ON(!th->t_trans_id);
1475 BUG_ON(n_new_file_size != p_s_inode->i_size); 1437 BUG_ON(new_file_size != inode->i_size);
1476 1438
1477 /* the page being sent in could be NULL if there was an i/o error 1439 /* the page being sent in could be NULL if there was an i/o error
1478 ** reading in the last block. The user will hit problems trying to 1440 ** reading in the last block. The user will hit problems trying to
1479 ** read the file, but for now we just skip the indirect2direct 1441 ** read the file, but for now we just skip the indirect2direct
1480 */ 1442 */
1481 if (atomic_read(&p_s_inode->i_count) > 1 || 1443 if (atomic_read(&inode->i_count) > 1 ||
1482 !tail_has_to_be_packed(p_s_inode) || 1444 !tail_has_to_be_packed(inode) ||
1483 !page || (REISERFS_I(p_s_inode)->i_flags & i_nopack_mask)) { 1445 !page || (REISERFS_I(inode)->i_flags & i_nopack_mask)) {
1484 // leave tail in an unformatted node 1446 /* leave tail in an unformatted node */
1485 *p_c_mode = M_SKIP_BALANCING; 1447 *mode = M_SKIP_BALANCING;
1486 cut_bytes = 1448 cut_bytes =
1487 n_block_size - (n_new_file_size & (n_block_size - 1)); 1449 block_size - (new_file_size & (block_size - 1));
1488 pathrelse(p_s_path); 1450 pathrelse(path);
1489 return cut_bytes; 1451 return cut_bytes;
1490 } 1452 }
1491 /* Permorm the conversion to a direct_item. */ 1453 /* Perform the conversion to a direct_item. */
1492 /*return indirect_to_direct (p_s_inode, p_s_path, p_s_item_key, n_new_file_size, p_c_mode); */ 1454 /* return indirect_to_direct(inode, path, item_key,
1493 return indirect2direct(th, p_s_inode, page, p_s_path, p_s_item_key, 1455 new_file_size, mode); */
1494 n_new_file_size, p_c_mode); 1456 return indirect2direct(th, inode, page, path, item_key,
1457 new_file_size, mode);
1495} 1458}
1496 1459
1497/* we did indirect_to_direct conversion. And we have inserted direct 1460/* we did indirect_to_direct conversion. And we have inserted direct
@@ -1515,8 +1478,8 @@ static void indirect_to_direct_roll_back(struct reiserfs_transaction_handle *th,
1515 /* look for the last byte of the tail */ 1478 /* look for the last byte of the tail */
1516 if (search_for_position_by_key(inode->i_sb, &tail_key, path) == 1479 if (search_for_position_by_key(inode->i_sb, &tail_key, path) ==
1517 POSITION_NOT_FOUND) 1480 POSITION_NOT_FOUND)
1518 reiserfs_panic(inode->i_sb, 1481 reiserfs_panic(inode->i_sb, "vs-5615",
1519 "vs-5615: indirect_to_direct_roll_back: found invalid item"); 1482 "found invalid item");
1520 RFALSE(path->pos_in_item != 1483 RFALSE(path->pos_in_item !=
1521 ih_item_len(PATH_PITEM_HEAD(path)) - 1, 1484 ih_item_len(PATH_PITEM_HEAD(path)) - 1,
1522 "vs-5616: appended bytes found"); 1485 "vs-5616: appended bytes found");
@@ -1533,38 +1496,39 @@ static void indirect_to_direct_roll_back(struct reiserfs_transaction_handle *th,
1533 set_cpu_key_k_offset(&tail_key, 1496 set_cpu_key_k_offset(&tail_key,
1534 cpu_key_k_offset(&tail_key) - removed); 1497 cpu_key_k_offset(&tail_key) - removed);
1535 } 1498 }
1536 reiserfs_warning(inode->i_sb, 1499 reiserfs_warning(inode->i_sb, "reiserfs-5091", "indirect_to_direct "
1537 "indirect_to_direct_roll_back: indirect_to_direct conversion has been rolled back due to lack of disk space"); 1500 "conversion has been rolled back due to "
1501 "lack of disk space");
1538 //mark_file_without_tail (inode); 1502 //mark_file_without_tail (inode);
1539 mark_inode_dirty(inode); 1503 mark_inode_dirty(inode);
1540} 1504}
1541 1505
1542/* (Truncate or cut entry) or delete object item. Returns < 0 on failure */ 1506/* (Truncate or cut entry) or delete object item. Returns < 0 on failure */
1543int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th, 1507int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th,
1544 struct treepath *p_s_path, 1508 struct treepath *path,
1545 struct cpu_key *p_s_item_key, 1509 struct cpu_key *item_key,
1546 struct inode *p_s_inode, 1510 struct inode *inode,
1547 struct page *page, loff_t n_new_file_size) 1511 struct page *page, loff_t new_file_size)
1548{ 1512{
1549 struct super_block *p_s_sb = p_s_inode->i_sb; 1513 struct super_block *sb = inode->i_sb;
1550 /* Every function which is going to call do_balance must first 1514 /* Every function which is going to call do_balance must first
1551 create a tree_balance structure. Then it must fill up this 1515 create a tree_balance structure. Then it must fill up this
1552 structure by using the init_tb_struct and fix_nodes functions. 1516 structure by using the init_tb_struct and fix_nodes functions.
1553 After that we can make tree balancing. */ 1517 After that we can make tree balancing. */
1554 struct tree_balance s_cut_balance; 1518 struct tree_balance s_cut_balance;
1555 struct item_head *p_le_ih; 1519 struct item_head *p_le_ih;
1556 int n_cut_size = 0, /* Amount to be cut. */ 1520 int cut_size = 0, /* Amount to be cut. */
1557 n_ret_value = CARRY_ON, n_removed = 0, /* Number of the removed unformatted nodes. */ 1521 ret_value = CARRY_ON, removed = 0, /* Number of the removed unformatted nodes. */
1558 n_is_inode_locked = 0; 1522 is_inode_locked = 0;
1559 char c_mode; /* Mode of the balance. */ 1523 char mode; /* Mode of the balance. */
1560 int retval2 = -1; 1524 int retval2 = -1;
1561 int quota_cut_bytes; 1525 int quota_cut_bytes;
1562 loff_t tail_pos = 0; 1526 loff_t tail_pos = 0;
1563 1527
1564 BUG_ON(!th->t_trans_id); 1528 BUG_ON(!th->t_trans_id);
1565 1529
1566 init_tb_struct(th, &s_cut_balance, p_s_inode->i_sb, p_s_path, 1530 init_tb_struct(th, &s_cut_balance, inode->i_sb, path,
1567 n_cut_size); 1531 cut_size);
1568 1532
1569 /* Repeat this loop until we either cut the item without needing 1533 /* Repeat this loop until we either cut the item without needing
1570 to balance, or we fix_nodes without schedule occurring */ 1534 to balance, or we fix_nodes without schedule occurring */
@@ -1574,144 +1538,142 @@ int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th,
1574 free unformatted nodes which are pointed to by the cut 1538 free unformatted nodes which are pointed to by the cut
1575 pointers. */ 1539 pointers. */
1576 1540
1577 c_mode = 1541 mode =
1578 prepare_for_delete_or_cut(th, p_s_inode, p_s_path, 1542 prepare_for_delete_or_cut(th, inode, path,
1579 p_s_item_key, &n_removed, 1543 item_key, &removed,
1580 &n_cut_size, n_new_file_size); 1544 &cut_size, new_file_size);
1581 if (c_mode == M_CONVERT) { 1545 if (mode == M_CONVERT) {
1582 /* convert last unformatted node to direct item or leave 1546 /* convert last unformatted node to direct item or leave
1583 tail in the unformatted node */ 1547 tail in the unformatted node */
1584 RFALSE(n_ret_value != CARRY_ON, 1548 RFALSE(ret_value != CARRY_ON,
1585 "PAP-5570: can not convert twice"); 1549 "PAP-5570: can not convert twice");
1586 1550
1587 n_ret_value = 1551 ret_value =
1588 maybe_indirect_to_direct(th, p_s_inode, page, 1552 maybe_indirect_to_direct(th, inode, page,
1589 p_s_path, p_s_item_key, 1553 path, item_key,
1590 n_new_file_size, &c_mode); 1554 new_file_size, &mode);
1591 if (c_mode == M_SKIP_BALANCING) 1555 if (mode == M_SKIP_BALANCING)
1592 /* tail has been left in the unformatted node */ 1556 /* tail has been left in the unformatted node */
1593 return n_ret_value; 1557 return ret_value;
1594 1558
1595 n_is_inode_locked = 1; 1559 is_inode_locked = 1;
1596 1560
1597 /* removing of last unformatted node will change value we 1561 /* removing of last unformatted node will change value we
1598 have to return to truncate. Save it */ 1562 have to return to truncate. Save it */
1599 retval2 = n_ret_value; 1563 retval2 = ret_value;
1600 /*retval2 = p_s_sb->s_blocksize - (n_new_file_size & (p_s_sb->s_blocksize - 1)); */ 1564 /*retval2 = sb->s_blocksize - (new_file_size & (sb->s_blocksize - 1)); */
1601 1565
1602 /* So, we have performed the first part of the conversion: 1566 /* So, we have performed the first part of the conversion:
1603 inserting the new direct item. Now we are removing the 1567 inserting the new direct item. Now we are removing the
1604 last unformatted node pointer. Set key to search for 1568 last unformatted node pointer. Set key to search for
1605 it. */ 1569 it. */
1606 set_cpu_key_k_type(p_s_item_key, TYPE_INDIRECT); 1570 set_cpu_key_k_type(item_key, TYPE_INDIRECT);
1607 p_s_item_key->key_length = 4; 1571 item_key->key_length = 4;
1608 n_new_file_size -= 1572 new_file_size -=
1609 (n_new_file_size & (p_s_sb->s_blocksize - 1)); 1573 (new_file_size & (sb->s_blocksize - 1));
1610 tail_pos = n_new_file_size; 1574 tail_pos = new_file_size;
1611 set_cpu_key_k_offset(p_s_item_key, n_new_file_size + 1); 1575 set_cpu_key_k_offset(item_key, new_file_size + 1);
1612 if (search_for_position_by_key 1576 if (search_for_position_by_key
1613 (p_s_sb, p_s_item_key, 1577 (sb, item_key,
1614 p_s_path) == POSITION_NOT_FOUND) { 1578 path) == POSITION_NOT_FOUND) {
1615 print_block(PATH_PLAST_BUFFER(p_s_path), 3, 1579 print_block(PATH_PLAST_BUFFER(path), 3,
1616 PATH_LAST_POSITION(p_s_path) - 1, 1580 PATH_LAST_POSITION(path) - 1,
1617 PATH_LAST_POSITION(p_s_path) + 1); 1581 PATH_LAST_POSITION(path) + 1);
1618 reiserfs_panic(p_s_sb, 1582 reiserfs_panic(sb, "PAP-5580", "item to "
1619 "PAP-5580: reiserfs_cut_from_item: item to convert does not exist (%K)", 1583 "convert does not exist (%K)",
1620 p_s_item_key); 1584 item_key);
1621 } 1585 }
1622 continue; 1586 continue;
1623 } 1587 }
1624 if (n_cut_size == 0) { 1588 if (cut_size == 0) {
1625 pathrelse(p_s_path); 1589 pathrelse(path);
1626 return 0; 1590 return 0;
1627 } 1591 }
1628 1592
1629 s_cut_balance.insert_size[0] = n_cut_size; 1593 s_cut_balance.insert_size[0] = cut_size;
1630 1594
1631 n_ret_value = fix_nodes(c_mode, &s_cut_balance, NULL, NULL); 1595 ret_value = fix_nodes(mode, &s_cut_balance, NULL, NULL);
1632 if (n_ret_value != REPEAT_SEARCH) 1596 if (ret_value != REPEAT_SEARCH)
1633 break; 1597 break;
1634 1598
1635 PROC_INFO_INC(p_s_sb, cut_from_item_restarted); 1599 PROC_INFO_INC(sb, cut_from_item_restarted);
1636 1600
1637 n_ret_value = 1601 ret_value =
1638 search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path); 1602 search_for_position_by_key(sb, item_key, path);
1639 if (n_ret_value == POSITION_FOUND) 1603 if (ret_value == POSITION_FOUND)
1640 continue; 1604 continue;
1641 1605
1642 reiserfs_warning(p_s_sb, 1606 reiserfs_warning(sb, "PAP-5610", "item %K not found",
1643 "PAP-5610: reiserfs_cut_from_item: item %K not found", 1607 item_key);
1644 p_s_item_key);
1645 unfix_nodes(&s_cut_balance); 1608 unfix_nodes(&s_cut_balance);
1646 return (n_ret_value == IO_ERROR) ? -EIO : -ENOENT; 1609 return (ret_value == IO_ERROR) ? -EIO : -ENOENT;
1647 } /* while */ 1610 } /* while */
1648 1611
1649 // check fix_nodes results (IO_ERROR or NO_DISK_SPACE) 1612 // check fix_nodes results (IO_ERROR or NO_DISK_SPACE)
1650 if (n_ret_value != CARRY_ON) { 1613 if (ret_value != CARRY_ON) {
1651 if (n_is_inode_locked) { 1614 if (is_inode_locked) {
1652 // FIXME: this seems to be not needed: we are always able 1615 // FIXME: this seems to be not needed: we are always able
1653 // to cut item 1616 // to cut item
1654 indirect_to_direct_roll_back(th, p_s_inode, p_s_path); 1617 indirect_to_direct_roll_back(th, inode, path);
1655 } 1618 }
1656 if (n_ret_value == NO_DISK_SPACE) 1619 if (ret_value == NO_DISK_SPACE)
1657 reiserfs_warning(p_s_sb, "NO_DISK_SPACE"); 1620 reiserfs_warning(sb, "reiserfs-5092",
1621 "NO_DISK_SPACE");
1658 unfix_nodes(&s_cut_balance); 1622 unfix_nodes(&s_cut_balance);
1659 return -EIO; 1623 return -EIO;
1660 } 1624 }
1661 1625
1662 /* go ahead and perform balancing */ 1626 /* go ahead and perform balancing */
1663 1627
1664 RFALSE(c_mode == M_PASTE || c_mode == M_INSERT, "invalid mode"); 1628 RFALSE(mode == M_PASTE || mode == M_INSERT, "invalid mode");
1665 1629
1666 /* Calculate number of bytes that need to be cut from the item. */ 1630 /* Calculate number of bytes that need to be cut from the item. */
1667 quota_cut_bytes = 1631 quota_cut_bytes =
1668 (c_mode == 1632 (mode ==
1669 M_DELETE) ? ih_item_len(get_ih(p_s_path)) : -s_cut_balance. 1633 M_DELETE) ? ih_item_len(get_ih(path)) : -s_cut_balance.
1670 insert_size[0]; 1634 insert_size[0];
1671 if (retval2 == -1) 1635 if (retval2 == -1)
1672 n_ret_value = calc_deleted_bytes_number(&s_cut_balance, c_mode); 1636 ret_value = calc_deleted_bytes_number(&s_cut_balance, mode);
1673 else 1637 else
1674 n_ret_value = retval2; 1638 ret_value = retval2;
1675 1639
1676 /* For direct items, we only change the quota when deleting the last 1640 /* For direct items, we only change the quota when deleting the last
1677 ** item. 1641 ** item.
1678 */ 1642 */
1679 p_le_ih = PATH_PITEM_HEAD(s_cut_balance.tb_path); 1643 p_le_ih = PATH_PITEM_HEAD(s_cut_balance.tb_path);
1680 if (!S_ISLNK(p_s_inode->i_mode) && is_direct_le_ih(p_le_ih)) { 1644 if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(p_le_ih)) {
1681 if (c_mode == M_DELETE && 1645 if (mode == M_DELETE &&
1682 (le_ih_k_offset(p_le_ih) & (p_s_sb->s_blocksize - 1)) == 1646 (le_ih_k_offset(p_le_ih) & (sb->s_blocksize - 1)) ==
1683 1) { 1647 1) {
1684 // FIXME: this is to keep 3.5 happy 1648 // FIXME: this is to keep 3.5 happy
1685 REISERFS_I(p_s_inode)->i_first_direct_byte = U32_MAX; 1649 REISERFS_I(inode)->i_first_direct_byte = U32_MAX;
1686 quota_cut_bytes = p_s_sb->s_blocksize + UNFM_P_SIZE; 1650 quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE;
1687 } else { 1651 } else {
1688 quota_cut_bytes = 0; 1652 quota_cut_bytes = 0;
1689 } 1653 }
1690 } 1654 }
1691#ifdef CONFIG_REISERFS_CHECK 1655#ifdef CONFIG_REISERFS_CHECK
1692 if (n_is_inode_locked) { 1656 if (is_inode_locked) {
1693 struct item_head *le_ih = 1657 struct item_head *le_ih =
1694 PATH_PITEM_HEAD(s_cut_balance.tb_path); 1658 PATH_PITEM_HEAD(s_cut_balance.tb_path);
1695 /* we are going to complete indirect2direct conversion. Make 1659 /* we are going to complete indirect2direct conversion. Make
1696 sure, that we exactly remove last unformatted node pointer 1660 sure, that we exactly remove last unformatted node pointer
1697 of the item */ 1661 of the item */
1698 if (!is_indirect_le_ih(le_ih)) 1662 if (!is_indirect_le_ih(le_ih))
1699 reiserfs_panic(p_s_sb, 1663 reiserfs_panic(sb, "vs-5652",
1700 "vs-5652: reiserfs_cut_from_item: "
1701 "item must be indirect %h", le_ih); 1664 "item must be indirect %h", le_ih);
1702 1665
1703 if (c_mode == M_DELETE && ih_item_len(le_ih) != UNFM_P_SIZE) 1666 if (mode == M_DELETE && ih_item_len(le_ih) != UNFM_P_SIZE)
1704 reiserfs_panic(p_s_sb, 1667 reiserfs_panic(sb, "vs-5653", "completing "
1705 "vs-5653: reiserfs_cut_from_item: " 1668 "indirect2direct conversion indirect "
1706 "completing indirect2direct conversion indirect item %h " 1669 "item %h being deleted must be of "
1707 "being deleted must be of 4 byte long", 1670 "4 byte long", le_ih);
1708 le_ih);
1709 1671
1710 if (c_mode == M_CUT 1672 if (mode == M_CUT
1711 && s_cut_balance.insert_size[0] != -UNFM_P_SIZE) { 1673 && s_cut_balance.insert_size[0] != -UNFM_P_SIZE) {
1712 reiserfs_panic(p_s_sb, 1674 reiserfs_panic(sb, "vs-5654", "can not complete "
1713 "vs-5654: reiserfs_cut_from_item: " 1675 "indirect2direct conversion of %h "
1714 "can not complete indirect2direct conversion of %h (CUT, insert_size==%d)", 1676 "(CUT, insert_size==%d)",
1715 le_ih, s_cut_balance.insert_size[0]); 1677 le_ih, s_cut_balance.insert_size[0]);
1716 } 1678 }
1717 /* it would be useful to make sure, that right neighboring 1679 /* it would be useful to make sure, that right neighboring
@@ -1719,23 +1681,23 @@ int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th,
1719 } 1681 }
1720#endif 1682#endif
1721 1683
1722 do_balance(&s_cut_balance, NULL, NULL, c_mode); 1684 do_balance(&s_cut_balance, NULL, NULL, mode);
1723 if (n_is_inode_locked) { 1685 if (is_inode_locked) {
1724 /* we've done an indirect->direct conversion. when the data block 1686 /* we've done an indirect->direct conversion. when the data block
1725 ** was freed, it was removed from the list of blocks that must 1687 ** was freed, it was removed from the list of blocks that must
1726 ** be flushed before the transaction commits, make sure to 1688 ** be flushed before the transaction commits, make sure to
1727 ** unmap and invalidate it 1689 ** unmap and invalidate it
1728 */ 1690 */
1729 unmap_buffers(page, tail_pos); 1691 unmap_buffers(page, tail_pos);
1730 REISERFS_I(p_s_inode)->i_flags &= ~i_pack_on_close_mask; 1692 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
1731 } 1693 }
1732#ifdef REISERQUOTA_DEBUG 1694#ifdef REISERQUOTA_DEBUG
1733 reiserfs_debug(p_s_inode->i_sb, REISERFS_DEBUG_CODE, 1695 reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
1734 "reiserquota cut_from_item(): freeing %u id=%u type=%c", 1696 "reiserquota cut_from_item(): freeing %u id=%u type=%c",
1735 quota_cut_bytes, p_s_inode->i_uid, '?'); 1697 quota_cut_bytes, inode->i_uid, '?');
1736#endif 1698#endif
1737 vfs_dq_free_space_nodirty(p_s_inode, quota_cut_bytes); 1699 vfs_dq_free_space_nodirty(inode, quota_cut_bytes);
1738 return n_ret_value; 1700 return ret_value;
1739} 1701}
1740 1702
1741static void truncate_directory(struct reiserfs_transaction_handle *th, 1703static void truncate_directory(struct reiserfs_transaction_handle *th,
@@ -1743,8 +1705,7 @@ static void truncate_directory(struct reiserfs_transaction_handle *th,
1743{ 1705{
1744 BUG_ON(!th->t_trans_id); 1706 BUG_ON(!th->t_trans_id);
1745 if (inode->i_nlink) 1707 if (inode->i_nlink)
1746 reiserfs_warning(inode->i_sb, 1708 reiserfs_error(inode->i_sb, "vs-5655", "link count != 0");
1747 "vs-5655: truncate_directory: link count != 0");
1748 1709
1749 set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), DOT_OFFSET); 1710 set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), DOT_OFFSET);
1750 set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_DIRENTRY); 1711 set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_DIRENTRY);
@@ -1756,8 +1717,8 @@ static void truncate_directory(struct reiserfs_transaction_handle *th,
1756 1717
1757/* Truncate file to the new size. Note, this must be called with a transaction 1718/* Truncate file to the new size. Note, this must be called with a transaction
1758 already started */ 1719 already started */
1759int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, struct inode *p_s_inode, /* ->i_size contains new 1720int reiserfs_do_truncate(struct reiserfs_transaction_handle *th,
1760 size */ 1721 struct inode *inode, /* ->i_size contains new size */
1761 struct page *page, /* up to date for last block */ 1722 struct page *page, /* up to date for last block */
1762 int update_timestamps /* when it is called by 1723 int update_timestamps /* when it is called by
1763 file_release to convert 1724 file_release to convert
@@ -1768,47 +1729,45 @@ int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, struct inode *p
1768 INITIALIZE_PATH(s_search_path); /* Path to the current object item. */ 1729 INITIALIZE_PATH(s_search_path); /* Path to the current object item. */
1769 struct item_head *p_le_ih; /* Pointer to an item header. */ 1730 struct item_head *p_le_ih; /* Pointer to an item header. */
1770 struct cpu_key s_item_key; /* Key to search for a previous file item. */ 1731 struct cpu_key s_item_key; /* Key to search for a previous file item. */
1771 loff_t n_file_size, /* Old file size. */ 1732 loff_t file_size, /* Old file size. */
1772 n_new_file_size; /* New file size. */ 1733 new_file_size; /* New file size. */
1773 int n_deleted; /* Number of deleted or truncated bytes. */ 1734 int deleted; /* Number of deleted or truncated bytes. */
1774 int retval; 1735 int retval;
1775 int err = 0; 1736 int err = 0;
1776 1737
1777 BUG_ON(!th->t_trans_id); 1738 BUG_ON(!th->t_trans_id);
1778 if (! 1739 if (!
1779 (S_ISREG(p_s_inode->i_mode) || S_ISDIR(p_s_inode->i_mode) 1740 (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)
1780 || S_ISLNK(p_s_inode->i_mode))) 1741 || S_ISLNK(inode->i_mode)))
1781 return 0; 1742 return 0;
1782 1743
1783 if (S_ISDIR(p_s_inode->i_mode)) { 1744 if (S_ISDIR(inode->i_mode)) {
1784 // deletion of directory - no need to update timestamps 1745 // deletion of directory - no need to update timestamps
1785 truncate_directory(th, p_s_inode); 1746 truncate_directory(th, inode);
1786 return 0; 1747 return 0;
1787 } 1748 }
1788 1749
1789 /* Get new file size. */ 1750 /* Get new file size. */
1790 n_new_file_size = p_s_inode->i_size; 1751 new_file_size = inode->i_size;
1791 1752
1792 // FIXME: note, that key type is unimportant here 1753 // FIXME: note, that key type is unimportant here
1793 make_cpu_key(&s_item_key, p_s_inode, max_reiserfs_offset(p_s_inode), 1754 make_cpu_key(&s_item_key, inode, max_reiserfs_offset(inode),
1794 TYPE_DIRECT, 3); 1755 TYPE_DIRECT, 3);
1795 1756
1796 retval = 1757 retval =
1797 search_for_position_by_key(p_s_inode->i_sb, &s_item_key, 1758 search_for_position_by_key(inode->i_sb, &s_item_key,
1798 &s_search_path); 1759 &s_search_path);
1799 if (retval == IO_ERROR) { 1760 if (retval == IO_ERROR) {
1800 reiserfs_warning(p_s_inode->i_sb, 1761 reiserfs_error(inode->i_sb, "vs-5657",
1801 "vs-5657: reiserfs_do_truncate: " 1762 "i/o failure occurred trying to truncate %K",
1802 "i/o failure occurred trying to truncate %K", 1763 &s_item_key);
1803 &s_item_key);
1804 err = -EIO; 1764 err = -EIO;
1805 goto out; 1765 goto out;
1806 } 1766 }
1807 if (retval == POSITION_FOUND || retval == FILE_NOT_FOUND) { 1767 if (retval == POSITION_FOUND || retval == FILE_NOT_FOUND) {
1808 reiserfs_warning(p_s_inode->i_sb, 1768 reiserfs_error(inode->i_sb, "PAP-5660",
1809 "PAP-5660: reiserfs_do_truncate: " 1769 "wrong result %d of search for %K", retval,
1810 "wrong result %d of search for %K", retval, 1770 &s_item_key);
1811 &s_item_key);
1812 1771
1813 err = -EIO; 1772 err = -EIO;
1814 goto out; 1773 goto out;
@@ -1819,56 +1778,56 @@ int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, struct inode *p
1819 /* Get real file size (total length of all file items) */ 1778 /* Get real file size (total length of all file items) */
1820 p_le_ih = PATH_PITEM_HEAD(&s_search_path); 1779 p_le_ih = PATH_PITEM_HEAD(&s_search_path);
1821 if (is_statdata_le_ih(p_le_ih)) 1780 if (is_statdata_le_ih(p_le_ih))
1822 n_file_size = 0; 1781 file_size = 0;
1823 else { 1782 else {
1824 loff_t offset = le_ih_k_offset(p_le_ih); 1783 loff_t offset = le_ih_k_offset(p_le_ih);
1825 int bytes = 1784 int bytes =
1826 op_bytes_number(p_le_ih, p_s_inode->i_sb->s_blocksize); 1785 op_bytes_number(p_le_ih, inode->i_sb->s_blocksize);
1827 1786
1828 /* this may mismatch with real file size: if last direct item 1787 /* this may mismatch with real file size: if last direct item
1829 had no padding zeros and last unformatted node had no free 1788 had no padding zeros and last unformatted node had no free
1830 space, this file would have this file size */ 1789 space, this file would have this file size */
1831 n_file_size = offset + bytes - 1; 1790 file_size = offset + bytes - 1;
1832 } 1791 }
1833 /* 1792 /*
1834 * are we doing a full truncate or delete, if so 1793 * are we doing a full truncate or delete, if so
1835 * kick in the reada code 1794 * kick in the reada code
1836 */ 1795 */
1837 if (n_new_file_size == 0) 1796 if (new_file_size == 0)
1838 s_search_path.reada = PATH_READA | PATH_READA_BACK; 1797 s_search_path.reada = PATH_READA | PATH_READA_BACK;
1839 1798
1840 if (n_file_size == 0 || n_file_size < n_new_file_size) { 1799 if (file_size == 0 || file_size < new_file_size) {
1841 goto update_and_out; 1800 goto update_and_out;
1842 } 1801 }
1843 1802
1844 /* Update key to search for the last file item. */ 1803 /* Update key to search for the last file item. */
1845 set_cpu_key_k_offset(&s_item_key, n_file_size); 1804 set_cpu_key_k_offset(&s_item_key, file_size);
1846 1805
1847 do { 1806 do {
1848 /* Cut or delete file item. */ 1807 /* Cut or delete file item. */
1849 n_deleted = 1808 deleted =
1850 reiserfs_cut_from_item(th, &s_search_path, &s_item_key, 1809 reiserfs_cut_from_item(th, &s_search_path, &s_item_key,
1851 p_s_inode, page, n_new_file_size); 1810 inode, page, new_file_size);
1852 if (n_deleted < 0) { 1811 if (deleted < 0) {
1853 reiserfs_warning(p_s_inode->i_sb, 1812 reiserfs_warning(inode->i_sb, "vs-5665",
1854 "vs-5665: reiserfs_do_truncate: reiserfs_cut_from_item failed"); 1813 "reiserfs_cut_from_item failed");
1855 reiserfs_check_path(&s_search_path); 1814 reiserfs_check_path(&s_search_path);
1856 return 0; 1815 return 0;
1857 } 1816 }
1858 1817
1859 RFALSE(n_deleted > n_file_size, 1818 RFALSE(deleted > file_size,
1860 "PAP-5670: reiserfs_cut_from_item: too many bytes deleted: deleted %d, file_size %lu, item_key %K", 1819 "PAP-5670: reiserfs_cut_from_item: too many bytes deleted: deleted %d, file_size %lu, item_key %K",
1861 n_deleted, n_file_size, &s_item_key); 1820 deleted, file_size, &s_item_key);
1862 1821
1863 /* Change key to search the last file item. */ 1822 /* Change key to search the last file item. */
1864 n_file_size -= n_deleted; 1823 file_size -= deleted;
1865 1824
1866 set_cpu_key_k_offset(&s_item_key, n_file_size); 1825 set_cpu_key_k_offset(&s_item_key, file_size);
1867 1826
1868 /* While there are bytes to truncate and previous file item is presented in the tree. */ 1827 /* While there are bytes to truncate and previous file item is presented in the tree. */
1869 1828
1870 /* 1829 /*
1871 ** This loop could take a really long time, and could log 1830 ** This loop could take a really long time, and could log
1872 ** many more blocks than a transaction can hold. So, we do a polite 1831 ** many more blocks than a transaction can hold. So, we do a polite
1873 ** journal end here, and if the transaction needs ending, we make 1832 ** journal end here, and if the transaction needs ending, we make
1874 ** sure the file is consistent before ending the current trans 1833 ** sure the file is consistent before ending the current trans
@@ -1877,37 +1836,38 @@ int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, struct inode *p
1877 if (journal_transaction_should_end(th, 0) || 1836 if (journal_transaction_should_end(th, 0) ||
1878 reiserfs_transaction_free_space(th) <= JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) { 1837 reiserfs_transaction_free_space(th) <= JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) {
1879 int orig_len_alloc = th->t_blocks_allocated; 1838 int orig_len_alloc = th->t_blocks_allocated;
1880 decrement_counters_in_path(&s_search_path); 1839 pathrelse(&s_search_path);
1881 1840
1882 if (update_timestamps) { 1841 if (update_timestamps) {
1883 p_s_inode->i_mtime = p_s_inode->i_ctime = 1842 inode->i_mtime = CURRENT_TIME_SEC;
1884 CURRENT_TIME_SEC; 1843 inode->i_ctime = CURRENT_TIME_SEC;
1885 } 1844 }
1886 reiserfs_update_sd(th, p_s_inode); 1845 reiserfs_update_sd(th, inode);
1887 1846
1888 err = journal_end(th, p_s_inode->i_sb, orig_len_alloc); 1847 err = journal_end(th, inode->i_sb, orig_len_alloc);
1889 if (err) 1848 if (err)
1890 goto out; 1849 goto out;
1891 err = journal_begin(th, p_s_inode->i_sb, 1850 err = journal_begin(th, inode->i_sb,
1892 JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD + JOURNAL_PER_BALANCE_CNT * 4) ; 1851 JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD + JOURNAL_PER_BALANCE_CNT * 4) ;
1893 if (err) 1852 if (err)
1894 goto out; 1853 goto out;
1895 reiserfs_update_inode_transaction(p_s_inode); 1854 reiserfs_update_inode_transaction(inode);
1896 } 1855 }
1897 } while (n_file_size > ROUND_UP(n_new_file_size) && 1856 } while (file_size > ROUND_UP(new_file_size) &&
1898 search_for_position_by_key(p_s_inode->i_sb, &s_item_key, 1857 search_for_position_by_key(inode->i_sb, &s_item_key,
1899 &s_search_path) == POSITION_FOUND); 1858 &s_search_path) == POSITION_FOUND);
1900 1859
1901 RFALSE(n_file_size > ROUND_UP(n_new_file_size), 1860 RFALSE(file_size > ROUND_UP(new_file_size),
1902 "PAP-5680: truncate did not finish: new_file_size %Ld, current %Ld, oid %d", 1861 "PAP-5680: truncate did not finish: new_file_size %Ld, current %Ld, oid %d",
1903 n_new_file_size, n_file_size, s_item_key.on_disk_key.k_objectid); 1862 new_file_size, file_size, s_item_key.on_disk_key.k_objectid);
1904 1863
1905 update_and_out: 1864 update_and_out:
1906 if (update_timestamps) { 1865 if (update_timestamps) {
1907 // this is truncate, not file closing 1866 // this is truncate, not file closing
1908 p_s_inode->i_mtime = p_s_inode->i_ctime = CURRENT_TIME_SEC; 1867 inode->i_mtime = CURRENT_TIME_SEC;
1868 inode->i_ctime = CURRENT_TIME_SEC;
1909 } 1869 }
1910 reiserfs_update_sd(th, p_s_inode); 1870 reiserfs_update_sd(th, inode);
1911 1871
1912 out: 1872 out:
1913 pathrelse(&s_search_path); 1873 pathrelse(&s_search_path);
@@ -1917,7 +1877,7 @@ int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, struct inode *p
1917#ifdef CONFIG_REISERFS_CHECK 1877#ifdef CONFIG_REISERFS_CHECK
1918// this makes sure, that we __append__, not overwrite or add holes 1878// this makes sure, that we __append__, not overwrite or add holes
1919static void check_research_for_paste(struct treepath *path, 1879static void check_research_for_paste(struct treepath *path,
1920 const struct cpu_key *p_s_key) 1880 const struct cpu_key *key)
1921{ 1881{
1922 struct item_head *found_ih = get_ih(path); 1882 struct item_head *found_ih = get_ih(path);
1923 1883
@@ -1925,36 +1885,36 @@ static void check_research_for_paste(struct treepath *path,
1925 if (le_ih_k_offset(found_ih) + 1885 if (le_ih_k_offset(found_ih) +
1926 op_bytes_number(found_ih, 1886 op_bytes_number(found_ih,
1927 get_last_bh(path)->b_size) != 1887 get_last_bh(path)->b_size) !=
1928 cpu_key_k_offset(p_s_key) 1888 cpu_key_k_offset(key)
1929 || op_bytes_number(found_ih, 1889 || op_bytes_number(found_ih,
1930 get_last_bh(path)->b_size) != 1890 get_last_bh(path)->b_size) !=
1931 pos_in_item(path)) 1891 pos_in_item(path))
1932 reiserfs_panic(NULL, 1892 reiserfs_panic(NULL, "PAP-5720", "found direct item "
1933 "PAP-5720: check_research_for_paste: " 1893 "%h or position (%d) does not match "
1934 "found direct item %h or position (%d) does not match to key %K", 1894 "to key %K", found_ih,
1935 found_ih, pos_in_item(path), p_s_key); 1895 pos_in_item(path), key);
1936 } 1896 }
1937 if (is_indirect_le_ih(found_ih)) { 1897 if (is_indirect_le_ih(found_ih)) {
1938 if (le_ih_k_offset(found_ih) + 1898 if (le_ih_k_offset(found_ih) +
1939 op_bytes_number(found_ih, 1899 op_bytes_number(found_ih,
1940 get_last_bh(path)->b_size) != 1900 get_last_bh(path)->b_size) !=
1941 cpu_key_k_offset(p_s_key) 1901 cpu_key_k_offset(key)
1942 || I_UNFM_NUM(found_ih) != pos_in_item(path) 1902 || I_UNFM_NUM(found_ih) != pos_in_item(path)
1943 || get_ih_free_space(found_ih) != 0) 1903 || get_ih_free_space(found_ih) != 0)
1944 reiserfs_panic(NULL, 1904 reiserfs_panic(NULL, "PAP-5730", "found indirect "
1945 "PAP-5730: check_research_for_paste: " 1905 "item (%h) or position (%d) does not "
1946 "found indirect item (%h) or position (%d) does not match to key (%K)", 1906 "match to key (%K)",
1947 found_ih, pos_in_item(path), p_s_key); 1907 found_ih, pos_in_item(path), key);
1948 } 1908 }
1949} 1909}
1950#endif /* config reiserfs check */ 1910#endif /* config reiserfs check */
1951 1911
1952/* Paste bytes to the existing item. Returns bytes number pasted into the item. */ 1912/* Paste bytes to the existing item. Returns bytes number pasted into the item. */
1953int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct treepath *p_s_search_path, /* Path to the pasted item. */ 1913int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct treepath *search_path, /* Path to the pasted item. */
1954 const struct cpu_key *p_s_key, /* Key to search for the needed item. */ 1914 const struct cpu_key *key, /* Key to search for the needed item. */
1955 struct inode *inode, /* Inode item belongs to */ 1915 struct inode *inode, /* Inode item belongs to */
1956 const char *p_c_body, /* Pointer to the bytes to paste. */ 1916 const char *body, /* Pointer to the bytes to paste. */
1957 int n_pasted_size) 1917 int pasted_size)
1958{ /* Size of pasted bytes. */ 1918{ /* Size of pasted bytes. */
1959 struct tree_balance s_paste_balance; 1919 struct tree_balance s_paste_balance;
1960 int retval; 1920 int retval;
@@ -1967,18 +1927,18 @@ int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct tree
1967#ifdef REISERQUOTA_DEBUG 1927#ifdef REISERQUOTA_DEBUG
1968 reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE, 1928 reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
1969 "reiserquota paste_into_item(): allocating %u id=%u type=%c", 1929 "reiserquota paste_into_item(): allocating %u id=%u type=%c",
1970 n_pasted_size, inode->i_uid, 1930 pasted_size, inode->i_uid,
1971 key2type(&(p_s_key->on_disk_key))); 1931 key2type(&(key->on_disk_key)));
1972#endif 1932#endif
1973 1933
1974 if (vfs_dq_alloc_space_nodirty(inode, n_pasted_size)) { 1934 if (vfs_dq_alloc_space_nodirty(inode, pasted_size)) {
1975 pathrelse(p_s_search_path); 1935 pathrelse(search_path);
1976 return -EDQUOT; 1936 return -EDQUOT;
1977 } 1937 }
1978 init_tb_struct(th, &s_paste_balance, th->t_super, p_s_search_path, 1938 init_tb_struct(th, &s_paste_balance, th->t_super, search_path,
1979 n_pasted_size); 1939 pasted_size);
1980#ifdef DISPLACE_NEW_PACKING_LOCALITIES 1940#ifdef DISPLACE_NEW_PACKING_LOCALITIES
1981 s_paste_balance.key = p_s_key->on_disk_key; 1941 s_paste_balance.key = key->on_disk_key;
1982#endif 1942#endif
1983 1943
1984 /* DQUOT_* can schedule, must check before the fix_nodes */ 1944 /* DQUOT_* can schedule, must check before the fix_nodes */
@@ -1988,33 +1948,33 @@ int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct tree
1988 1948
1989 while ((retval = 1949 while ((retval =
1990 fix_nodes(M_PASTE, &s_paste_balance, NULL, 1950 fix_nodes(M_PASTE, &s_paste_balance, NULL,
1991 p_c_body)) == REPEAT_SEARCH) { 1951 body)) == REPEAT_SEARCH) {
1992 search_again: 1952 search_again:
1993 /* file system changed while we were in the fix_nodes */ 1953 /* file system changed while we were in the fix_nodes */
1994 PROC_INFO_INC(th->t_super, paste_into_item_restarted); 1954 PROC_INFO_INC(th->t_super, paste_into_item_restarted);
1995 retval = 1955 retval =
1996 search_for_position_by_key(th->t_super, p_s_key, 1956 search_for_position_by_key(th->t_super, key,
1997 p_s_search_path); 1957 search_path);
1998 if (retval == IO_ERROR) { 1958 if (retval == IO_ERROR) {
1999 retval = -EIO; 1959 retval = -EIO;
2000 goto error_out; 1960 goto error_out;
2001 } 1961 }
2002 if (retval == POSITION_FOUND) { 1962 if (retval == POSITION_FOUND) {
2003 reiserfs_warning(inode->i_sb, 1963 reiserfs_warning(inode->i_sb, "PAP-5710",
2004 "PAP-5710: reiserfs_paste_into_item: entry or pasted byte (%K) exists", 1964 "entry or pasted byte (%K) exists",
2005 p_s_key); 1965 key);
2006 retval = -EEXIST; 1966 retval = -EEXIST;
2007 goto error_out; 1967 goto error_out;
2008 } 1968 }
2009#ifdef CONFIG_REISERFS_CHECK 1969#ifdef CONFIG_REISERFS_CHECK
2010 check_research_for_paste(p_s_search_path, p_s_key); 1970 check_research_for_paste(search_path, key);
2011#endif 1971#endif
2012 } 1972 }
2013 1973
2014 /* Perform balancing after all resources are collected by fix_nodes, and 1974 /* Perform balancing after all resources are collected by fix_nodes, and
2015 accessing them will not risk triggering schedule. */ 1975 accessing them will not risk triggering schedule. */
2016 if (retval == CARRY_ON) { 1976 if (retval == CARRY_ON) {
2017 do_balance(&s_paste_balance, NULL /*ih */ , p_c_body, M_PASTE); 1977 do_balance(&s_paste_balance, NULL /*ih */ , body, M_PASTE);
2018 return 0; 1978 return 0;
2019 } 1979 }
2020 retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO; 1980 retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
@@ -2024,18 +1984,24 @@ int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct tree
2024#ifdef REISERQUOTA_DEBUG 1984#ifdef REISERQUOTA_DEBUG
2025 reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE, 1985 reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
2026 "reiserquota paste_into_item(): freeing %u id=%u type=%c", 1986 "reiserquota paste_into_item(): freeing %u id=%u type=%c",
2027 n_pasted_size, inode->i_uid, 1987 pasted_size, inode->i_uid,
2028 key2type(&(p_s_key->on_disk_key))); 1988 key2type(&(key->on_disk_key)));
2029#endif 1989#endif
2030 vfs_dq_free_space_nodirty(inode, n_pasted_size); 1990 vfs_dq_free_space_nodirty(inode, pasted_size);
2031 return retval; 1991 return retval;
2032} 1992}
2033 1993
2034/* Insert new item into the buffer at the path. */ 1994/* Insert new item into the buffer at the path.
2035int reiserfs_insert_item(struct reiserfs_transaction_handle *th, struct treepath *p_s_path, /* Path to the inserteded item. */ 1995 * th - active transaction handle
2036 const struct cpu_key *key, struct item_head *p_s_ih, /* Pointer to the item header to insert. */ 1996 * path - path to the inserted item
2037 struct inode *inode, const char *p_c_body) 1997 * ih - pointer to the item header to insert
2038{ /* Pointer to the bytes to insert. */ 1998 * body - pointer to the bytes to insert
1999 */
2000int reiserfs_insert_item(struct reiserfs_transaction_handle *th,
2001 struct treepath *path, const struct cpu_key *key,
2002 struct item_head *ih, struct inode *inode,
2003 const char *body)
2004{
2039 struct tree_balance s_ins_balance; 2005 struct tree_balance s_ins_balance;
2040 int retval; 2006 int retval;
2041 int fs_gen = 0; 2007 int fs_gen = 0;
@@ -2045,28 +2011,27 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th, struct treepath
2045 2011
2046 if (inode) { /* Do we count quotas for item? */ 2012 if (inode) { /* Do we count quotas for item? */
2047 fs_gen = get_generation(inode->i_sb); 2013 fs_gen = get_generation(inode->i_sb);
2048 quota_bytes = ih_item_len(p_s_ih); 2014 quota_bytes = ih_item_len(ih);
2049 2015
2050 /* hack so the quota code doesn't have to guess if the file has 2016 /* hack so the quota code doesn't have to guess if the file has
2051 ** a tail, links are always tails, so there's no guessing needed 2017 ** a tail, links are always tails, so there's no guessing needed
2052 */ 2018 */
2053 if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(p_s_ih)) { 2019 if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(ih))
2054 quota_bytes = inode->i_sb->s_blocksize + UNFM_P_SIZE; 2020 quota_bytes = inode->i_sb->s_blocksize + UNFM_P_SIZE;
2055 }
2056#ifdef REISERQUOTA_DEBUG 2021#ifdef REISERQUOTA_DEBUG
2057 reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE, 2022 reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
2058 "reiserquota insert_item(): allocating %u id=%u type=%c", 2023 "reiserquota insert_item(): allocating %u id=%u type=%c",
2059 quota_bytes, inode->i_uid, head2type(p_s_ih)); 2024 quota_bytes, inode->i_uid, head2type(ih));
2060#endif 2025#endif
2061 /* We can't dirty inode here. It would be immediately written but 2026 /* We can't dirty inode here. It would be immediately written but
2062 * appropriate stat item isn't inserted yet... */ 2027 * appropriate stat item isn't inserted yet... */
2063 if (vfs_dq_alloc_space_nodirty(inode, quota_bytes)) { 2028 if (vfs_dq_alloc_space_nodirty(inode, quota_bytes)) {
2064 pathrelse(p_s_path); 2029 pathrelse(path);
2065 return -EDQUOT; 2030 return -EDQUOT;
2066 } 2031 }
2067 } 2032 }
2068 init_tb_struct(th, &s_ins_balance, th->t_super, p_s_path, 2033 init_tb_struct(th, &s_ins_balance, th->t_super, path,
2069 IH_SIZE + ih_item_len(p_s_ih)); 2034 IH_SIZE + ih_item_len(ih));
2070#ifdef DISPLACE_NEW_PACKING_LOCALITIES 2035#ifdef DISPLACE_NEW_PACKING_LOCALITIES
2071 s_ins_balance.key = key->on_disk_key; 2036 s_ins_balance.key = key->on_disk_key;
2072#endif 2037#endif
@@ -2076,19 +2041,18 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th, struct treepath
2076 } 2041 }
2077 2042
2078 while ((retval = 2043 while ((retval =
2079 fix_nodes(M_INSERT, &s_ins_balance, p_s_ih, 2044 fix_nodes(M_INSERT, &s_ins_balance, ih,
2080 p_c_body)) == REPEAT_SEARCH) { 2045 body)) == REPEAT_SEARCH) {
2081 search_again: 2046 search_again:
2082 /* file system changed while we were in the fix_nodes */ 2047 /* file system changed while we were in the fix_nodes */
2083 PROC_INFO_INC(th->t_super, insert_item_restarted); 2048 PROC_INFO_INC(th->t_super, insert_item_restarted);
2084 retval = search_item(th->t_super, key, p_s_path); 2049 retval = search_item(th->t_super, key, path);
2085 if (retval == IO_ERROR) { 2050 if (retval == IO_ERROR) {
2086 retval = -EIO; 2051 retval = -EIO;
2087 goto error_out; 2052 goto error_out;
2088 } 2053 }
2089 if (retval == ITEM_FOUND) { 2054 if (retval == ITEM_FOUND) {
2090 reiserfs_warning(th->t_super, 2055 reiserfs_warning(th->t_super, "PAP-5760",
2091 "PAP-5760: reiserfs_insert_item: "
2092 "key %K already exists in the tree", 2056 "key %K already exists in the tree",
2093 key); 2057 key);
2094 retval = -EEXIST; 2058 retval = -EEXIST;
@@ -2098,7 +2062,7 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th, struct treepath
2098 2062
2099 /* make balancing after all resources will be collected at a time */ 2063 /* make balancing after all resources will be collected at a time */
2100 if (retval == CARRY_ON) { 2064 if (retval == CARRY_ON) {
2101 do_balance(&s_ins_balance, p_s_ih, p_c_body, M_INSERT); 2065 do_balance(&s_ins_balance, ih, body, M_INSERT);
2102 return 0; 2066 return 0;
2103 } 2067 }
2104 2068
@@ -2109,7 +2073,7 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th, struct treepath
2109#ifdef REISERQUOTA_DEBUG 2073#ifdef REISERQUOTA_DEBUG
2110 reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE, 2074 reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE,
2111 "reiserquota insert_item(): freeing %u id=%u type=%c", 2075 "reiserquota insert_item(): freeing %u id=%u type=%c",
2112 quota_bytes, inode->i_uid, head2type(p_s_ih)); 2076 quota_bytes, inode->i_uid, head2type(ih));
2113#endif 2077#endif
2114 if (inode) 2078 if (inode)
2115 vfs_dq_free_space_nodirty(inode, quota_bytes); 2079 vfs_dq_free_space_nodirty(inode, quota_bytes);
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 5dbafb739401..972250c62896 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -183,9 +183,9 @@ static int finish_unfinished(struct super_block *s)
183 if (REISERFS_SB(s)->s_qf_names[i]) { 183 if (REISERFS_SB(s)->s_qf_names[i]) {
184 int ret = reiserfs_quota_on_mount(s, i); 184 int ret = reiserfs_quota_on_mount(s, i);
185 if (ret < 0) 185 if (ret < 0)
186 reiserfs_warning(s, 186 reiserfs_warning(s, "reiserfs-2500",
187 "reiserfs: cannot turn on journaled quota: error %d", 187 "cannot turn on journaled "
188 ret); 188 "quota: error %d", ret);
189 } 189 }
190 } 190 }
191#endif 191#endif
@@ -195,17 +195,16 @@ static int finish_unfinished(struct super_block *s)
195 while (!retval) { 195 while (!retval) {
196 retval = search_item(s, &max_cpu_key, &path); 196 retval = search_item(s, &max_cpu_key, &path);
197 if (retval != ITEM_NOT_FOUND) { 197 if (retval != ITEM_NOT_FOUND) {
198 reiserfs_warning(s, 198 reiserfs_error(s, "vs-2140",
199 "vs-2140: finish_unfinished: search_by_key returned %d", 199 "search_by_key returned %d", retval);
200 retval);
201 break; 200 break;
202 } 201 }
203 202
204 bh = get_last_bh(&path); 203 bh = get_last_bh(&path);
205 item_pos = get_item_pos(&path); 204 item_pos = get_item_pos(&path);
206 if (item_pos != B_NR_ITEMS(bh)) { 205 if (item_pos != B_NR_ITEMS(bh)) {
207 reiserfs_warning(s, 206 reiserfs_warning(s, "vs-2060",
208 "vs-2060: finish_unfinished: wrong position found"); 207 "wrong position found");
209 break; 208 break;
210 } 209 }
211 item_pos--; 210 item_pos--;
@@ -235,8 +234,7 @@ static int finish_unfinished(struct super_block *s)
235 if (!inode) { 234 if (!inode) {
236 /* the unlink almost completed, it just did not manage to remove 235 /* the unlink almost completed, it just did not manage to remove
237 "save" link and release objectid */ 236 "save" link and release objectid */
238 reiserfs_warning(s, 237 reiserfs_warning(s, "vs-2180", "iget failed for %K",
239 "vs-2180: finish_unfinished: iget failed for %K",
240 &obj_key); 238 &obj_key);
241 retval = remove_save_link_only(s, &save_link_key, 1); 239 retval = remove_save_link_only(s, &save_link_key, 1);
242 continue; 240 continue;
@@ -244,8 +242,8 @@ static int finish_unfinished(struct super_block *s)
244 242
245 if (!truncate && inode->i_nlink) { 243 if (!truncate && inode->i_nlink) {
246 /* file is not unlinked */ 244 /* file is not unlinked */
247 reiserfs_warning(s, 245 reiserfs_warning(s, "vs-2185",
248 "vs-2185: finish_unfinished: file %K is not unlinked", 246 "file %K is not unlinked",
249 &obj_key); 247 &obj_key);
250 retval = remove_save_link_only(s, &save_link_key, 0); 248 retval = remove_save_link_only(s, &save_link_key, 0);
251 continue; 249 continue;
@@ -257,8 +255,9 @@ static int finish_unfinished(struct super_block *s)
257 The only imaginable way is to execute unfinished truncate request 255 The only imaginable way is to execute unfinished truncate request
258 then boot into old kernel, remove the file and create dir with 256 then boot into old kernel, remove the file and create dir with
259 the same key. */ 257 the same key. */
260 reiserfs_warning(s, 258 reiserfs_warning(s, "green-2101",
261 "green-2101: impossible truncate on a directory %k. Please report", 259 "impossible truncate on a "
260 "directory %k. Please report",
262 INODE_PKEY(inode)); 261 INODE_PKEY(inode));
263 retval = remove_save_link_only(s, &save_link_key, 0); 262 retval = remove_save_link_only(s, &save_link_key, 0);
264 truncate = 0; 263 truncate = 0;
@@ -288,9 +287,10 @@ static int finish_unfinished(struct super_block *s)
288 /* removal gets completed in iput */ 287 /* removal gets completed in iput */
289 retval = 0; 288 retval = 0;
290 } else { 289 } else {
291 reiserfs_warning(s, "Dead loop in " 290 reiserfs_warning(s, "super-2189", "Dead loop "
292 "finish_unfinished detected, " 291 "in finish_unfinished "
293 "just remove save link\n"); 292 "detected, just remove "
293 "save link\n");
294 retval = remove_save_link_only(s, 294 retval = remove_save_link_only(s,
295 &save_link_key, 0); 295 &save_link_key, 0);
296 } 296 }
@@ -360,8 +360,9 @@ void add_save_link(struct reiserfs_transaction_handle *th,
360 } else { 360 } else {
361 /* truncate */ 361 /* truncate */
362 if (S_ISDIR(inode->i_mode)) 362 if (S_ISDIR(inode->i_mode))
363 reiserfs_warning(inode->i_sb, 363 reiserfs_warning(inode->i_sb, "green-2102",
364 "green-2102: Adding a truncate savelink for a directory %k! Please report", 364 "Adding a truncate savelink for "
365 "a directory %k! Please report",
365 INODE_PKEY(inode)); 366 INODE_PKEY(inode));
366 set_cpu_key_k_offset(&key, 1); 367 set_cpu_key_k_offset(&key, 1);
367 set_cpu_key_k_type(&key, TYPE_INDIRECT); 368 set_cpu_key_k_type(&key, TYPE_INDIRECT);
@@ -376,9 +377,9 @@ void add_save_link(struct reiserfs_transaction_handle *th,
376 retval = search_item(inode->i_sb, &key, &path); 377 retval = search_item(inode->i_sb, &key, &path);
377 if (retval != ITEM_NOT_FOUND) { 378 if (retval != ITEM_NOT_FOUND) {
378 if (retval != -ENOSPC) 379 if (retval != -ENOSPC)
379 reiserfs_warning(inode->i_sb, "vs-2100: add_save_link:" 380 reiserfs_error(inode->i_sb, "vs-2100",
380 "search_by_key (%K) returned %d", &key, 381 "search_by_key (%K) returned %d", &key,
381 retval); 382 retval);
382 pathrelse(&path); 383 pathrelse(&path);
383 return; 384 return;
384 } 385 }
@@ -391,9 +392,8 @@ void add_save_link(struct reiserfs_transaction_handle *th,
391 reiserfs_insert_item(th, &path, &key, &ih, NULL, (char *)&link); 392 reiserfs_insert_item(th, &path, &key, &ih, NULL, (char *)&link);
392 if (retval) { 393 if (retval) {
393 if (retval != -ENOSPC) 394 if (retval != -ENOSPC)
394 reiserfs_warning(inode->i_sb, 395 reiserfs_error(inode->i_sb, "vs-2120",
395 "vs-2120: add_save_link: insert_item returned %d", 396 "insert_item returned %d", retval);
396 retval);
397 } else { 397 } else {
398 if (truncate) 398 if (truncate)
399 REISERFS_I(inode)->i_flags |= 399 REISERFS_I(inode)->i_flags |=
@@ -492,8 +492,7 @@ static void reiserfs_put_super(struct super_block *s)
492 print_statistics(s); 492 print_statistics(s);
493 493
494 if (REISERFS_SB(s)->reserved_blocks != 0) { 494 if (REISERFS_SB(s)->reserved_blocks != 0) {
495 reiserfs_warning(s, 495 reiserfs_warning(s, "green-2005", "reserved blocks left %d",
496 "green-2005: reiserfs_put_super: reserved blocks left %d",
497 REISERFS_SB(s)->reserved_blocks); 496 REISERFS_SB(s)->reserved_blocks);
498 } 497 }
499 498
@@ -559,8 +558,8 @@ static void reiserfs_dirty_inode(struct inode *inode)
559 558
560 int err = 0; 559 int err = 0;
561 if (inode->i_sb->s_flags & MS_RDONLY) { 560 if (inode->i_sb->s_flags & MS_RDONLY) {
562 reiserfs_warning(inode->i_sb, 561 reiserfs_warning(inode->i_sb, "clm-6006",
563 "clm-6006: writing inode %lu on readonly FS", 562 "writing inode %lu on readonly FS",
564 inode->i_ino); 563 inode->i_ino);
565 return; 564 return;
566 } 565 }
@@ -757,7 +756,7 @@ static int reiserfs_getopt(struct super_block *s, char **cur, opt_desc_t * opts,
757 char **opt_arg, unsigned long *bit_flags) 756 char **opt_arg, unsigned long *bit_flags)
758{ 757{
759 char *p; 758 char *p;
760 /* foo=bar, 759 /* foo=bar,
761 ^ ^ ^ 760 ^ ^ ^
762 | | +-- option_end 761 | | +-- option_end
763 | +-- arg_start 762 | +-- arg_start
@@ -792,13 +791,15 @@ static int reiserfs_getopt(struct super_block *s, char **cur, opt_desc_t * opts,
792 if (bit_flags) { 791 if (bit_flags) {
793 if (opt->clrmask == 792 if (opt->clrmask ==
794 (1 << REISERFS_UNSUPPORTED_OPT)) 793 (1 << REISERFS_UNSUPPORTED_OPT))
795 reiserfs_warning(s, "%s not supported.", 794 reiserfs_warning(s, "super-6500",
795 "%s not supported.\n",
796 p); 796 p);
797 else 797 else
798 *bit_flags &= ~opt->clrmask; 798 *bit_flags &= ~opt->clrmask;
799 if (opt->setmask == 799 if (opt->setmask ==
800 (1 << REISERFS_UNSUPPORTED_OPT)) 800 (1 << REISERFS_UNSUPPORTED_OPT))
801 reiserfs_warning(s, "%s not supported.", 801 reiserfs_warning(s, "super-6501",
802 "%s not supported.\n",
802 p); 803 p);
803 else 804 else
804 *bit_flags |= opt->setmask; 805 *bit_flags |= opt->setmask;
@@ -807,7 +808,8 @@ static int reiserfs_getopt(struct super_block *s, char **cur, opt_desc_t * opts,
807 } 808 }
808 } 809 }
809 if (!opt->option_name) { 810 if (!opt->option_name) {
810 reiserfs_warning(s, "unknown mount option \"%s\"", p); 811 reiserfs_warning(s, "super-6502",
812 "unknown mount option \"%s\"", p);
811 return -1; 813 return -1;
812 } 814 }
813 815
@@ -815,8 +817,9 @@ static int reiserfs_getopt(struct super_block *s, char **cur, opt_desc_t * opts,
815 switch (*p) { 817 switch (*p) {
816 case '=': 818 case '=':
817 if (!opt->arg_required) { 819 if (!opt->arg_required) {
818 reiserfs_warning(s, 820 reiserfs_warning(s, "super-6503",
819 "the option \"%s\" does not require an argument", 821 "the option \"%s\" does not "
822 "require an argument\n",
820 opt->option_name); 823 opt->option_name);
821 return -1; 824 return -1;
822 } 825 }
@@ -824,14 +827,15 @@ static int reiserfs_getopt(struct super_block *s, char **cur, opt_desc_t * opts,
824 827
825 case 0: 828 case 0:
826 if (opt->arg_required) { 829 if (opt->arg_required) {
827 reiserfs_warning(s, 830 reiserfs_warning(s, "super-6504",
828 "the option \"%s\" requires an argument", 831 "the option \"%s\" requires an "
829 opt->option_name); 832 "argument\n", opt->option_name);
830 return -1; 833 return -1;
831 } 834 }
832 break; 835 break;
833 default: 836 default:
834 reiserfs_warning(s, "head of option \"%s\" is only correct", 837 reiserfs_warning(s, "super-6505",
838 "head of option \"%s\" is only correct\n",
835 opt->option_name); 839 opt->option_name);
836 return -1; 840 return -1;
837 } 841 }
@@ -843,7 +847,8 @@ static int reiserfs_getopt(struct super_block *s, char **cur, opt_desc_t * opts,
843 && !(opt->arg_required & (1 << REISERFS_OPT_ALLOWEMPTY)) 847 && !(opt->arg_required & (1 << REISERFS_OPT_ALLOWEMPTY))
844 && !strlen(p)) { 848 && !strlen(p)) {
845 /* this catches "option=," if not allowed */ 849 /* this catches "option=," if not allowed */
846 reiserfs_warning(s, "empty argument for \"%s\"", 850 reiserfs_warning(s, "super-6506",
851 "empty argument for \"%s\"\n",
847 opt->option_name); 852 opt->option_name);
848 return -1; 853 return -1;
849 } 854 }
@@ -865,7 +870,8 @@ static int reiserfs_getopt(struct super_block *s, char **cur, opt_desc_t * opts,
865 } 870 }
866 } 871 }
867 872
868 reiserfs_warning(s, "bad value \"%s\" for option \"%s\"", p, 873 reiserfs_warning(s, "super-6506",
874 "bad value \"%s\" for option \"%s\"\n", p,
869 opt->option_name); 875 opt->option_name);
870 return -1; 876 return -1;
871} 877}
@@ -955,9 +961,9 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin
955 *blocks = simple_strtoul(arg, &p, 0); 961 *blocks = simple_strtoul(arg, &p, 0);
956 if (*p != '\0') { 962 if (*p != '\0') {
957 /* NNN does not look like a number */ 963 /* NNN does not look like a number */
958 reiserfs_warning(s, 964 reiserfs_warning(s, "super-6507",
959 "reiserfs_parse_options: bad value %s", 965 "bad value %s for "
960 arg); 966 "-oresize\n", arg);
961 return 0; 967 return 0;
962 } 968 }
963 } 969 }
@@ -968,8 +974,8 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin
968 unsigned long val = simple_strtoul(arg, &p, 0); 974 unsigned long val = simple_strtoul(arg, &p, 0);
969 /* commit=NNN (time in seconds) */ 975 /* commit=NNN (time in seconds) */
970 if (*p != '\0' || val >= (unsigned int)-1) { 976 if (*p != '\0' || val >= (unsigned int)-1) {
971 reiserfs_warning(s, 977 reiserfs_warning(s, "super-6508",
972 "reiserfs_parse_options: bad value %s", 978 "bad value %s for -ocommit\n",
973 arg); 979 arg);
974 return 0; 980 return 0;
975 } 981 }
@@ -977,16 +983,18 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin
977 } 983 }
978 984
979 if (c == 'w') { 985 if (c == 'w') {
980 reiserfs_warning(s, "reiserfs: nolargeio option is no longer supported"); 986 reiserfs_warning(s, "super-6509", "nolargeio option "
987 "is no longer supported");
981 return 0; 988 return 0;
982 } 989 }
983 990
984 if (c == 'j') { 991 if (c == 'j') {
985 if (arg && *arg && jdev_name) { 992 if (arg && *arg && jdev_name) {
986 if (*jdev_name) { //Hm, already assigned? 993 if (*jdev_name) { //Hm, already assigned?
987 reiserfs_warning(s, 994 reiserfs_warning(s, "super-6510",
988 "reiserfs_parse_options: journal device was already specified to be %s", 995 "journal device was "
989 *jdev_name); 996 "already specified to "
997 "be %s", *jdev_name);
990 return 0; 998 return 0;
991 } 999 }
992 *jdev_name = arg; 1000 *jdev_name = arg;
@@ -998,29 +1006,35 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin
998 1006
999 if (sb_any_quota_loaded(s) && 1007 if (sb_any_quota_loaded(s) &&
1000 (!*arg != !REISERFS_SB(s)->s_qf_names[qtype])) { 1008 (!*arg != !REISERFS_SB(s)->s_qf_names[qtype])) {
1001 reiserfs_warning(s, 1009 reiserfs_warning(s, "super-6511",
1002 "reiserfs_parse_options: cannot change journaled quota options when quota turned on."); 1010 "cannot change journaled "
1011 "quota options when quota "
1012 "turned on.");
1003 return 0; 1013 return 0;
1004 } 1014 }
1005 if (*arg) { /* Some filename specified? */ 1015 if (*arg) { /* Some filename specified? */
1006 if (REISERFS_SB(s)->s_qf_names[qtype] 1016 if (REISERFS_SB(s)->s_qf_names[qtype]
1007 && strcmp(REISERFS_SB(s)->s_qf_names[qtype], 1017 && strcmp(REISERFS_SB(s)->s_qf_names[qtype],
1008 arg)) { 1018 arg)) {
1009 reiserfs_warning(s, 1019 reiserfs_warning(s, "super-6512",
1010 "reiserfs_parse_options: %s quota file already specified.", 1020 "%s quota file "
1021 "already specified.",
1011 QTYPE2NAME(qtype)); 1022 QTYPE2NAME(qtype));
1012 return 0; 1023 return 0;
1013 } 1024 }
1014 if (strchr(arg, '/')) { 1025 if (strchr(arg, '/')) {
1015 reiserfs_warning(s, 1026 reiserfs_warning(s, "super-6513",
1016 "reiserfs_parse_options: quotafile must be on filesystem root."); 1027 "quotafile must be "
1028 "on filesystem root.");
1017 return 0; 1029 return 0;
1018 } 1030 }
1019 qf_names[qtype] = 1031 qf_names[qtype] =
1020 kmalloc(strlen(arg) + 1, GFP_KERNEL); 1032 kmalloc(strlen(arg) + 1, GFP_KERNEL);
1021 if (!qf_names[qtype]) { 1033 if (!qf_names[qtype]) {
1022 reiserfs_warning(s, 1034 reiserfs_warning(s, "reiserfs-2502",
1023 "reiserfs_parse_options: not enough memory for storing quotafile name."); 1035 "not enough memory "
1036 "for storing "
1037 "quotafile name.");
1024 return 0; 1038 return 0;
1025 } 1039 }
1026 strcpy(qf_names[qtype], arg); 1040 strcpy(qf_names[qtype], arg);
@@ -1038,21 +1052,24 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin
1038 else if (!strcmp(arg, "vfsv0")) 1052 else if (!strcmp(arg, "vfsv0"))
1039 *qfmt = QFMT_VFS_V0; 1053 *qfmt = QFMT_VFS_V0;
1040 else { 1054 else {
1041 reiserfs_warning(s, 1055 reiserfs_warning(s, "super-6514",
1042 "reiserfs_parse_options: unknown quota format specified."); 1056 "unknown quota format "
1057 "specified.");
1043 return 0; 1058 return 0;
1044 } 1059 }
1045 if (sb_any_quota_loaded(s) && 1060 if (sb_any_quota_loaded(s) &&
1046 *qfmt != REISERFS_SB(s)->s_jquota_fmt) { 1061 *qfmt != REISERFS_SB(s)->s_jquota_fmt) {
1047 reiserfs_warning(s, 1062 reiserfs_warning(s, "super-6515",
1048 "reiserfs_parse_options: cannot change journaled quota options when quota turned on."); 1063 "cannot change journaled "
1064 "quota options when quota "
1065 "turned on.");
1049 return 0; 1066 return 0;
1050 } 1067 }
1051 } 1068 }
1052#else 1069#else
1053 if (c == 'u' || c == 'g' || c == 'f') { 1070 if (c == 'u' || c == 'g' || c == 'f') {
1054 reiserfs_warning(s, 1071 reiserfs_warning(s, "reiserfs-2503", "journaled "
1055 "reiserfs_parse_options: journaled quota options not supported."); 1072 "quota options not supported.");
1056 return 0; 1073 return 0;
1057 } 1074 }
1058#endif 1075#endif
@@ -1061,15 +1078,15 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin
1061#ifdef CONFIG_QUOTA 1078#ifdef CONFIG_QUOTA
1062 if (!REISERFS_SB(s)->s_jquota_fmt && !*qfmt 1079 if (!REISERFS_SB(s)->s_jquota_fmt && !*qfmt
1063 && (qf_names[USRQUOTA] || qf_names[GRPQUOTA])) { 1080 && (qf_names[USRQUOTA] || qf_names[GRPQUOTA])) {
1064 reiserfs_warning(s, 1081 reiserfs_warning(s, "super-6515",
1065 "reiserfs_parse_options: journaled quota format not specified."); 1082 "journaled quota format not specified.");
1066 return 0; 1083 return 0;
1067 } 1084 }
1068 /* This checking is not precise wrt the quota type but for our purposes it is sufficient */ 1085 /* This checking is not precise wrt the quota type but for our purposes it is sufficient */
1069 if (!(*mount_options & (1 << REISERFS_QUOTA)) 1086 if (!(*mount_options & (1 << REISERFS_QUOTA))
1070 && sb_any_quota_loaded(s)) { 1087 && sb_any_quota_loaded(s)) {
1071 reiserfs_warning(s, 1088 reiserfs_warning(s, "super-6516", "quota options must "
1072 "reiserfs_parse_options: quota options must be present when quota is turned on."); 1089 "be present when quota is turned on.");
1073 return 0; 1090 return 0;
1074 } 1091 }
1075#endif 1092#endif
@@ -1129,14 +1146,15 @@ static void handle_attrs(struct super_block *s)
1129 1146
1130 if (reiserfs_attrs(s)) { 1147 if (reiserfs_attrs(s)) {
1131 if (old_format_only(s)) { 1148 if (old_format_only(s)) {
1132 reiserfs_warning(s, 1149 reiserfs_warning(s, "super-6517", "cannot support "
1133 "reiserfs: cannot support attributes on 3.5.x disk format"); 1150 "attributes on 3.5.x disk format");
1134 REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_ATTRS); 1151 REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_ATTRS);
1135 return; 1152 return;
1136 } 1153 }
1137 if (!(le32_to_cpu(rs->s_flags) & reiserfs_attrs_cleared)) { 1154 if (!(le32_to_cpu(rs->s_flags) & reiserfs_attrs_cleared)) {
1138 reiserfs_warning(s, 1155 reiserfs_warning(s, "super-6518", "cannot support "
1139 "reiserfs: cannot support attributes until flag is set in super-block"); 1156 "attributes until flag is set in "
1157 "super-block");
1140 REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_ATTRS); 1158 REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_ATTRS);
1141 } 1159 }
1142 } 1160 }
@@ -1278,6 +1296,8 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1278 REISERFS_SB(s)->s_mount_state = sb_umount_state(rs); 1296 REISERFS_SB(s)->s_mount_state = sb_umount_state(rs);
1279 s->s_flags &= ~MS_RDONLY; 1297 s->s_flags &= ~MS_RDONLY;
1280 set_sb_umount_state(rs, REISERFS_ERROR_FS); 1298 set_sb_umount_state(rs, REISERFS_ERROR_FS);
1299 if (!old_format_only(s))
1300 set_sb_mnt_count(rs, sb_mnt_count(rs) + 1);
1281 /* mark_buffer_dirty (SB_BUFFER_WITH_SB (s), 1); */ 1301 /* mark_buffer_dirty (SB_BUFFER_WITH_SB (s), 1); */
1282 journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s)); 1302 journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s));
1283 REISERFS_SB(s)->s_mount_state = REISERFS_VALID_FS; 1303 REISERFS_SB(s)->s_mount_state = REISERFS_VALID_FS;
@@ -1312,7 +1332,7 @@ static int read_super_block(struct super_block *s, int offset)
1312 1332
1313 bh = sb_bread(s, offset / s->s_blocksize); 1333 bh = sb_bread(s, offset / s->s_blocksize);
1314 if (!bh) { 1334 if (!bh) {
1315 reiserfs_warning(s, "sh-2006: read_super_block: " 1335 reiserfs_warning(s, "sh-2006",
1316 "bread failed (dev %s, block %lu, size %lu)", 1336 "bread failed (dev %s, block %lu, size %lu)",
1317 reiserfs_bdevname(s), offset / s->s_blocksize, 1337 reiserfs_bdevname(s), offset / s->s_blocksize,
1318 s->s_blocksize); 1338 s->s_blocksize);
@@ -1326,15 +1346,15 @@ static int read_super_block(struct super_block *s, int offset)
1326 } 1346 }
1327 // 1347 //
1328 // ok, reiserfs signature (old or new) found in at the given offset 1348 // ok, reiserfs signature (old or new) found in at the given offset
1329 // 1349 //
1330 fs_blocksize = sb_blocksize(rs); 1350 fs_blocksize = sb_blocksize(rs);
1331 brelse(bh); 1351 brelse(bh);
1332 sb_set_blocksize(s, fs_blocksize); 1352 sb_set_blocksize(s, fs_blocksize);
1333 1353
1334 bh = sb_bread(s, offset / s->s_blocksize); 1354 bh = sb_bread(s, offset / s->s_blocksize);
1335 if (!bh) { 1355 if (!bh) {
1336 reiserfs_warning(s, "sh-2007: read_super_block: " 1356 reiserfs_warning(s, "sh-2007",
1337 "bread failed (dev %s, block %lu, size %lu)\n", 1357 "bread failed (dev %s, block %lu, size %lu)",
1338 reiserfs_bdevname(s), offset / s->s_blocksize, 1358 reiserfs_bdevname(s), offset / s->s_blocksize,
1339 s->s_blocksize); 1359 s->s_blocksize);
1340 return 1; 1360 return 1;
@@ -1342,8 +1362,8 @@ static int read_super_block(struct super_block *s, int offset)
1342 1362
1343 rs = (struct reiserfs_super_block *)bh->b_data; 1363 rs = (struct reiserfs_super_block *)bh->b_data;
1344 if (sb_blocksize(rs) != s->s_blocksize) { 1364 if (sb_blocksize(rs) != s->s_blocksize) {
1345 reiserfs_warning(s, "sh-2011: read_super_block: " 1365 reiserfs_warning(s, "sh-2011", "can't find a reiserfs "
1346 "can't find a reiserfs filesystem on (dev %s, block %Lu, size %lu)\n", 1366 "filesystem on (dev %s, block %Lu, size %lu)",
1347 reiserfs_bdevname(s), 1367 reiserfs_bdevname(s),
1348 (unsigned long long)bh->b_blocknr, 1368 (unsigned long long)bh->b_blocknr,
1349 s->s_blocksize); 1369 s->s_blocksize);
@@ -1353,9 +1373,10 @@ static int read_super_block(struct super_block *s, int offset)
1353 1373
1354 if (rs->s_v1.s_root_block == cpu_to_le32(-1)) { 1374 if (rs->s_v1.s_root_block == cpu_to_le32(-1)) {
1355 brelse(bh); 1375 brelse(bh);
1356 reiserfs_warning(s, 1376 reiserfs_warning(s, "super-6519", "Unfinished reiserfsck "
1357 "Unfinished reiserfsck --rebuild-tree run detected. Please run\n" 1377 "--rebuild-tree run detected. Please run\n"
1358 "reiserfsck --rebuild-tree and wait for a completion. If that fails\n" 1378 "reiserfsck --rebuild-tree and wait for a "
1379 "completion. If that fails\n"
1359 "get newer reiserfsprogs package"); 1380 "get newer reiserfsprogs package");
1360 return 1; 1381 return 1;
1361 } 1382 }
@@ -1367,18 +1388,15 @@ static int read_super_block(struct super_block *s, int offset)
1367 /* magic is of non-standard journal filesystem, look at s_version to 1388 /* magic is of non-standard journal filesystem, look at s_version to
1368 find which format is in use */ 1389 find which format is in use */
1369 if (sb_version(rs) == REISERFS_VERSION_2) 1390 if (sb_version(rs) == REISERFS_VERSION_2)
1370 reiserfs_warning(s, 1391 reiserfs_info(s, "found reiserfs format \"3.6\""
1371 "read_super_block: found reiserfs format \"3.6\"" 1392 " with non-standard journal\n");
1372 " with non-standard journal");
1373 else if (sb_version(rs) == REISERFS_VERSION_1) 1393 else if (sb_version(rs) == REISERFS_VERSION_1)
1374 reiserfs_warning(s, 1394 reiserfs_info(s, "found reiserfs format \"3.5\""
1375 "read_super_block: found reiserfs format \"3.5\"" 1395 " with non-standard journal\n");
1376 " with non-standard journal");
1377 else { 1396 else {
1378 reiserfs_warning(s, 1397 reiserfs_warning(s, "sh-2012", "found unknown "
1379 "sh-2012: read_super_block: found unknown " 1398 "format \"%u\" of reiserfs with "
1380 "format \"%u\" of reiserfs with non-standard magic", 1399 "non-standard magic", sb_version(rs));
1381 sb_version(rs));
1382 return 1; 1400 return 1;
1383 } 1401 }
1384 } else 1402 } else
@@ -1408,8 +1426,7 @@ static int reread_meta_blocks(struct super_block *s)
1408 ll_rw_block(READ, 1, &(SB_BUFFER_WITH_SB(s))); 1426 ll_rw_block(READ, 1, &(SB_BUFFER_WITH_SB(s)));
1409 wait_on_buffer(SB_BUFFER_WITH_SB(s)); 1427 wait_on_buffer(SB_BUFFER_WITH_SB(s));
1410 if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) { 1428 if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) {
1411 reiserfs_warning(s, 1429 reiserfs_warning(s, "reiserfs-2504", "error reading the super");
1412 "reread_meta_blocks, error reading the super");
1413 return 1; 1430 return 1;
1414 } 1431 }
1415 1432
@@ -1452,8 +1469,8 @@ static __u32 find_hash_out(struct super_block *s)
1452 if (reiserfs_rupasov_hash(s)) { 1469 if (reiserfs_rupasov_hash(s)) {
1453 hash = YURA_HASH; 1470 hash = YURA_HASH;
1454 } 1471 }
1455 reiserfs_warning(s, "FS seems to be empty, autodetect " 1472 reiserfs_info(s, "FS seems to be empty, autodetect "
1456 "is using the default hash"); 1473 "is using the default hash\n");
1457 break; 1474 break;
1458 } 1475 }
1459 r5hash = GET_HASH_VALUE(r5_hash(de.de_name, de.de_namelen)); 1476 r5hash = GET_HASH_VALUE(r5_hash(de.de_name, de.de_namelen));
@@ -1473,10 +1490,10 @@ static __u32 find_hash_out(struct super_block *s)
1473 && (yurahash == 1490 && (yurahash ==
1474 GET_HASH_VALUE(deh_offset 1491 GET_HASH_VALUE(deh_offset
1475 (&(de.de_deh[de.de_entry_num])))))) { 1492 (&(de.de_deh[de.de_entry_num])))))) {
1476 reiserfs_warning(s, 1493 reiserfs_warning(s, "reiserfs-2506", "Unable to "
1477 "Unable to automatically detect hash function. " 1494 "automatically detect hash function. "
1478 "Please mount with -o hash={tea,rupasov,r5}", 1495 "Please mount with -o "
1479 reiserfs_bdevname(s)); 1496 "hash={tea,rupasov,r5}");
1480 hash = UNSET_HASH; 1497 hash = UNSET_HASH;
1481 break; 1498 break;
1482 } 1499 }
@@ -1490,7 +1507,8 @@ static __u32 find_hash_out(struct super_block *s)
1490 (deh_offset(&(de.de_deh[de.de_entry_num]))) == r5hash) 1507 (deh_offset(&(de.de_deh[de.de_entry_num]))) == r5hash)
1491 hash = R5_HASH; 1508 hash = R5_HASH;
1492 else { 1509 else {
1493 reiserfs_warning(s, "Unrecognised hash function"); 1510 reiserfs_warning(s, "reiserfs-2506",
1511 "Unrecognised hash function");
1494 hash = UNSET_HASH; 1512 hash = UNSET_HASH;
1495 } 1513 }
1496 } while (0); 1514 } while (0);
@@ -1514,21 +1532,24 @@ static int what_hash(struct super_block *s)
1514 code = find_hash_out(s); 1532 code = find_hash_out(s);
1515 1533
1516 if (code != UNSET_HASH && reiserfs_hash_detect(s)) { 1534 if (code != UNSET_HASH && reiserfs_hash_detect(s)) {
1517 /* detection has found the hash, and we must check against the 1535 /* detection has found the hash, and we must check against the
1518 ** mount options 1536 ** mount options
1519 */ 1537 */
1520 if (reiserfs_rupasov_hash(s) && code != YURA_HASH) { 1538 if (reiserfs_rupasov_hash(s) && code != YURA_HASH) {
1521 reiserfs_warning(s, "Error, %s hash detected, " 1539 reiserfs_warning(s, "reiserfs-2507",
1540 "Error, %s hash detected, "
1522 "unable to force rupasov hash", 1541 "unable to force rupasov hash",
1523 reiserfs_hashname(code)); 1542 reiserfs_hashname(code));
1524 code = UNSET_HASH; 1543 code = UNSET_HASH;
1525 } else if (reiserfs_tea_hash(s) && code != TEA_HASH) { 1544 } else if (reiserfs_tea_hash(s) && code != TEA_HASH) {
1526 reiserfs_warning(s, "Error, %s hash detected, " 1545 reiserfs_warning(s, "reiserfs-2508",
1546 "Error, %s hash detected, "
1527 "unable to force tea hash", 1547 "unable to force tea hash",
1528 reiserfs_hashname(code)); 1548 reiserfs_hashname(code));
1529 code = UNSET_HASH; 1549 code = UNSET_HASH;
1530 } else if (reiserfs_r5_hash(s) && code != R5_HASH) { 1550 } else if (reiserfs_r5_hash(s) && code != R5_HASH) {
1531 reiserfs_warning(s, "Error, %s hash detected, " 1551 reiserfs_warning(s, "reiserfs-2509",
1552 "Error, %s hash detected, "
1532 "unable to force r5 hash", 1553 "unable to force r5 hash",
1533 reiserfs_hashname(code)); 1554 reiserfs_hashname(code));
1534 code = UNSET_HASH; 1555 code = UNSET_HASH;
@@ -1544,7 +1565,7 @@ static int what_hash(struct super_block *s)
1544 } 1565 }
1545 } 1566 }
1546 1567
1547 /* if we are mounted RW, and we have a new valid hash code, update 1568 /* if we are mounted RW, and we have a new valid hash code, update
1548 ** the super 1569 ** the super
1549 */ 1570 */
1550 if (code != UNSET_HASH && 1571 if (code != UNSET_HASH &&
@@ -1587,9 +1608,9 @@ static int function2code(hashf_t func)
1587 return 0; 1608 return 0;
1588} 1609}
1589 1610
1590#define SWARN(silent, s, ...) \ 1611#define SWARN(silent, s, id, ...) \
1591 if (!(silent)) \ 1612 if (!(silent)) \
1592 reiserfs_warning (s, __VA_ARGS__) 1613 reiserfs_warning(s, id, __VA_ARGS__)
1593 1614
1594static int reiserfs_fill_super(struct super_block *s, void *data, int silent) 1615static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1595{ 1616{
@@ -1623,10 +1644,6 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1623 REISERFS_SB(s)->s_alloc_options.preallocmin = 0; 1644 REISERFS_SB(s)->s_alloc_options.preallocmin = 0;
1624 /* Preallocate by 16 blocks (17-1) at once */ 1645 /* Preallocate by 16 blocks (17-1) at once */
1625 REISERFS_SB(s)->s_alloc_options.preallocsize = 17; 1646 REISERFS_SB(s)->s_alloc_options.preallocsize = 17;
1626#ifdef CONFIG_REISERFS_FS_XATTR
1627 /* Initialize the rwsem for xattr dir */
1628 init_rwsem(&REISERFS_SB(s)->xattr_dir_sem);
1629#endif
1630 /* setup default block allocator options */ 1647 /* setup default block allocator options */
1631 reiserfs_init_alloc_options(s); 1648 reiserfs_init_alloc_options(s);
1632 1649
@@ -1641,8 +1658,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1641#endif 1658#endif
1642 1659
1643 if (blocks) { 1660 if (blocks) {
1644 SWARN(silent, s, "jmacd-7: reiserfs_fill_super: resize option " 1661 SWARN(silent, s, "jmacd-7", "resize option for remount only");
1645 "for remount only");
1646 goto error; 1662 goto error;
1647 } 1663 }
1648 1664
@@ -1651,8 +1667,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1651 old_format = 1; 1667 old_format = 1;
1652 /* try new format (64-th 1k block), which can contain reiserfs super block */ 1668 /* try new format (64-th 1k block), which can contain reiserfs super block */
1653 else if (read_super_block(s, REISERFS_DISK_OFFSET_IN_BYTES)) { 1669 else if (read_super_block(s, REISERFS_DISK_OFFSET_IN_BYTES)) {
1654 SWARN(silent, s, 1670 SWARN(silent, s, "sh-2021", "can not find reiserfs on %s",
1655 "sh-2021: reiserfs_fill_super: can not find reiserfs on %s",
1656 reiserfs_bdevname(s)); 1671 reiserfs_bdevname(s));
1657 goto error; 1672 goto error;
1658 } 1673 }
@@ -1664,13 +1679,12 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1664 if (s->s_bdev && s->s_bdev->bd_inode 1679 if (s->s_bdev && s->s_bdev->bd_inode
1665 && i_size_read(s->s_bdev->bd_inode) < 1680 && i_size_read(s->s_bdev->bd_inode) <
1666 sb_block_count(rs) * sb_blocksize(rs)) { 1681 sb_block_count(rs) * sb_blocksize(rs)) {
1667 SWARN(silent, s, 1682 SWARN(silent, s, "", "Filesystem cannot be "
1668 "Filesystem on %s cannot be mounted because it is bigger than the device", 1683 "mounted because it is bigger than the device");
1669 reiserfs_bdevname(s)); 1684 SWARN(silent, s, "", "You may need to run fsck "
1670 SWARN(silent, s, 1685 "or increase size of your LVM partition");
1671 "You may need to run fsck or increase size of your LVM partition"); 1686 SWARN(silent, s, "", "Or may be you forgot to "
1672 SWARN(silent, s, 1687 "reboot after fdisk when it told you to");
1673 "Or may be you forgot to reboot after fdisk when it told you to");
1674 goto error; 1688 goto error;
1675 } 1689 }
1676 1690
@@ -1678,14 +1692,13 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1678 sbi->s_mount_state = REISERFS_VALID_FS; 1692 sbi->s_mount_state = REISERFS_VALID_FS;
1679 1693
1680 if ((errval = reiserfs_init_bitmap_cache(s))) { 1694 if ((errval = reiserfs_init_bitmap_cache(s))) {
1681 SWARN(silent, s, 1695 SWARN(silent, s, "jmacd-8", "unable to read bitmap");
1682 "jmacd-8: reiserfs_fill_super: unable to read bitmap");
1683 goto error; 1696 goto error;
1684 } 1697 }
1685 errval = -EINVAL; 1698 errval = -EINVAL;
1686#ifdef CONFIG_REISERFS_CHECK 1699#ifdef CONFIG_REISERFS_CHECK
1687 SWARN(silent, s, "CONFIG_REISERFS_CHECK is set ON"); 1700 SWARN(silent, s, "", "CONFIG_REISERFS_CHECK is set ON");
1688 SWARN(silent, s, "- it is slow mode for debugging."); 1701 SWARN(silent, s, "", "- it is slow mode for debugging.");
1689#endif 1702#endif
1690 1703
1691 /* make data=ordered the default */ 1704 /* make data=ordered the default */
@@ -1706,8 +1719,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1706 } 1719 }
1707 // set_device_ro(s->s_dev, 1) ; 1720 // set_device_ro(s->s_dev, 1) ;
1708 if (journal_init(s, jdev_name, old_format, commit_max_age)) { 1721 if (journal_init(s, jdev_name, old_format, commit_max_age)) {
1709 SWARN(silent, s, 1722 SWARN(silent, s, "sh-2022",
1710 "sh-2022: reiserfs_fill_super: unable to initialize journal space"); 1723 "unable to initialize journal space");
1711 goto error; 1724 goto error;
1712 } else { 1725 } else {
1713 jinit_done = 1; /* once this is set, journal_release must be called 1726 jinit_done = 1; /* once this is set, journal_release must be called
@@ -1715,8 +1728,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1715 */ 1728 */
1716 } 1729 }
1717 if (reread_meta_blocks(s)) { 1730 if (reread_meta_blocks(s)) {
1718 SWARN(silent, s, 1731 SWARN(silent, s, "jmacd-9",
1719 "jmacd-9: reiserfs_fill_super: unable to reread meta blocks after journal init"); 1732 "unable to reread meta blocks after journal init");
1720 goto error; 1733 goto error;
1721 } 1734 }
1722 1735
@@ -1724,8 +1737,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1724 goto error; 1737 goto error;
1725 1738
1726 if (bdev_read_only(s->s_bdev) && !(s->s_flags & MS_RDONLY)) { 1739 if (bdev_read_only(s->s_bdev) && !(s->s_flags & MS_RDONLY)) {
1727 SWARN(silent, s, 1740 SWARN(silent, s, "clm-7000",
1728 "clm-7000: Detected readonly device, marking FS readonly"); 1741 "Detected readonly device, marking FS readonly");
1729 s->s_flags |= MS_RDONLY; 1742 s->s_flags |= MS_RDONLY;
1730 } 1743 }
1731 args.objectid = REISERFS_ROOT_OBJECTID; 1744 args.objectid = REISERFS_ROOT_OBJECTID;
@@ -1734,8 +1747,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1734 iget5_locked(s, REISERFS_ROOT_OBJECTID, reiserfs_find_actor, 1747 iget5_locked(s, REISERFS_ROOT_OBJECTID, reiserfs_find_actor,
1735 reiserfs_init_locked_inode, (void *)(&args)); 1748 reiserfs_init_locked_inode, (void *)(&args));
1736 if (!root_inode) { 1749 if (!root_inode) {
1737 SWARN(silent, s, 1750 SWARN(silent, s, "jmacd-10", "get root inode failed");
1738 "jmacd-10: reiserfs_fill_super: get root inode failed");
1739 goto error; 1751 goto error;
1740 } 1752 }
1741 1753
@@ -1784,7 +1796,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1784 * avoiding corruption. -jeffm */ 1796 * avoiding corruption. -jeffm */
1785 if (bmap_would_wrap(reiserfs_bmap_count(s)) && 1797 if (bmap_would_wrap(reiserfs_bmap_count(s)) &&
1786 sb_bmap_nr(rs) != 0) { 1798 sb_bmap_nr(rs) != 0) {
1787 reiserfs_warning(s, "super-2030: This file system " 1799 reiserfs_warning(s, "super-2030", "This file system "
1788 "claims to use %u bitmap blocks in " 1800 "claims to use %u bitmap blocks in "
1789 "its super block, but requires %u. " 1801 "its super block, but requires %u. "
1790 "Clearing to zero.", sb_bmap_nr(rs), 1802 "Clearing to zero.", sb_bmap_nr(rs),
@@ -1817,7 +1829,9 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1817 } else if (!silent) { 1829 } else if (!silent) {
1818 reiserfs_info(s, "using 3.5.x disk format\n"); 1830 reiserfs_info(s, "using 3.5.x disk format\n");
1819 } 1831 }
1820 } 1832 } else
1833 set_sb_mnt_count(rs, sb_mnt_count(rs) + 1);
1834
1821 1835
1822 journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s)); 1836 journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s));
1823 errval = journal_end(&th, s, 1); 1837 errval = journal_end(&th, s, 1);
@@ -2031,8 +2045,8 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
2031 if (!(REISERFS_I(inode)->i_flags & i_nopack_mask)) { 2045 if (!(REISERFS_I(inode)->i_flags & i_nopack_mask)) {
2032 err = reiserfs_unpack(inode, NULL); 2046 err = reiserfs_unpack(inode, NULL);
2033 if (err) { 2047 if (err) {
2034 reiserfs_warning(sb, 2048 reiserfs_warning(sb, "super-6520",
2035 "reiserfs: Unpacking tail of quota file failed" 2049 "Unpacking tail of quota file failed"
2036 " (%d). Cannot turn on quotas.", err); 2050 " (%d). Cannot turn on quotas.", err);
2037 err = -EINVAL; 2051 err = -EINVAL;
2038 goto out; 2052 goto out;
@@ -2043,8 +2057,8 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
2043 if (REISERFS_SB(sb)->s_qf_names[type]) { 2057 if (REISERFS_SB(sb)->s_qf_names[type]) {
2044 /* Quotafile not of fs root? */ 2058 /* Quotafile not of fs root? */
2045 if (path.dentry->d_parent != sb->s_root) 2059 if (path.dentry->d_parent != sb->s_root)
2046 reiserfs_warning(sb, 2060 reiserfs_warning(sb, "super-6521",
2047 "reiserfs: Quota file not on filesystem root. " 2061 "Quota file not on filesystem root. "
2048 "Journalled quota will not work."); 2062 "Journalled quota will not work.");
2049 } 2063 }
2050 2064
@@ -2195,9 +2209,6 @@ static int __init init_reiserfs_fs(void)
2195 return ret; 2209 return ret;
2196 } 2210 }
2197 2211
2198 if ((ret = reiserfs_xattr_register_handlers()))
2199 goto failed_reiserfs_xattr_register_handlers;
2200
2201 reiserfs_proc_info_global_init(); 2212 reiserfs_proc_info_global_init();
2202 reiserfs_proc_register_global("version", 2213 reiserfs_proc_register_global("version",
2203 reiserfs_global_version_in_proc); 2214 reiserfs_global_version_in_proc);
@@ -2208,9 +2219,6 @@ static int __init init_reiserfs_fs(void)
2208 return 0; 2219 return 0;
2209 } 2220 }
2210 2221
2211 reiserfs_xattr_unregister_handlers();
2212
2213 failed_reiserfs_xattr_register_handlers:
2214 reiserfs_proc_unregister_global("version"); 2222 reiserfs_proc_unregister_global("version");
2215 reiserfs_proc_info_global_done(); 2223 reiserfs_proc_info_global_done();
2216 destroy_inodecache(); 2224 destroy_inodecache();
@@ -2220,7 +2228,6 @@ static int __init init_reiserfs_fs(void)
2220 2228
2221static void __exit exit_reiserfs_fs(void) 2229static void __exit exit_reiserfs_fs(void)
2222{ 2230{
2223 reiserfs_xattr_unregister_handlers();
2224 reiserfs_proc_unregister_global("version"); 2231 reiserfs_proc_unregister_global("version");
2225 reiserfs_proc_info_global_done(); 2232 reiserfs_proc_info_global_done();
2226 unregister_filesystem(&reiserfs_fs_type); 2233 unregister_filesystem(&reiserfs_fs_type);
diff --git a/fs/reiserfs/tail_conversion.c b/fs/reiserfs/tail_conversion.c
index f8121a1147e8..d7f6e51bef2a 100644
--- a/fs/reiserfs/tail_conversion.c
+++ b/fs/reiserfs/tail_conversion.c
@@ -26,7 +26,7 @@ int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode,
26 converted item. */ 26 converted item. */
27 struct item_head ind_ih; /* new indirect item to be inserted or 27 struct item_head ind_ih; /* new indirect item to be inserted or
28 key of unfm pointer to be pasted */ 28 key of unfm pointer to be pasted */
29 int n_blk_size, n_retval; /* returned value for reiserfs_insert_item and clones */ 29 int blk_size, retval; /* returned value for reiserfs_insert_item and clones */
30 unp_t unfm_ptr; /* Handle on an unformatted node 30 unp_t unfm_ptr; /* Handle on an unformatted node
31 that will be inserted in the 31 that will be inserted in the
32 tree. */ 32 tree. */
@@ -35,7 +35,7 @@ int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode,
35 35
36 REISERFS_SB(sb)->s_direct2indirect++; 36 REISERFS_SB(sb)->s_direct2indirect++;
37 37
38 n_blk_size = sb->s_blocksize; 38 blk_size = sb->s_blocksize;
39 39
40 /* and key to search for append or insert pointer to the new 40 /* and key to search for append or insert pointer to the new
41 unformatted node. */ 41 unformatted node. */
@@ -46,11 +46,11 @@ int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode,
46 /* Set the key to search for the place for new unfm pointer */ 46 /* Set the key to search for the place for new unfm pointer */
47 make_cpu_key(&end_key, inode, tail_offset, TYPE_INDIRECT, 4); 47 make_cpu_key(&end_key, inode, tail_offset, TYPE_INDIRECT, 4);
48 48
49 // FIXME: we could avoid this 49 /* FIXME: we could avoid this */
50 if (search_for_position_by_key(sb, &end_key, path) == POSITION_FOUND) { 50 if (search_for_position_by_key(sb, &end_key, path) == POSITION_FOUND) {
51 reiserfs_warning(sb, "PAP-14030: direct2indirect: " 51 reiserfs_error(sb, "PAP-14030",
52 "pasted or inserted byte exists in the tree %K. " 52 "pasted or inserted byte exists in "
53 "Use fsck to repair.", &end_key); 53 "the tree %K. Use fsck to repair.", &end_key);
54 pathrelse(path); 54 pathrelse(path);
55 return -EIO; 55 return -EIO;
56 } 56 }
@@ -64,17 +64,17 @@ int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode,
64 set_ih_free_space(&ind_ih, 0); /* delete at nearest future */ 64 set_ih_free_space(&ind_ih, 0); /* delete at nearest future */
65 put_ih_item_len(&ind_ih, UNFM_P_SIZE); 65 put_ih_item_len(&ind_ih, UNFM_P_SIZE);
66 PATH_LAST_POSITION(path)++; 66 PATH_LAST_POSITION(path)++;
67 n_retval = 67 retval =
68 reiserfs_insert_item(th, path, &end_key, &ind_ih, inode, 68 reiserfs_insert_item(th, path, &end_key, &ind_ih, inode,
69 (char *)&unfm_ptr); 69 (char *)&unfm_ptr);
70 } else { 70 } else {
71 /* Paste into last indirect item of an object. */ 71 /* Paste into last indirect item of an object. */
72 n_retval = reiserfs_paste_into_item(th, path, &end_key, inode, 72 retval = reiserfs_paste_into_item(th, path, &end_key, inode,
73 (char *)&unfm_ptr, 73 (char *)&unfm_ptr,
74 UNFM_P_SIZE); 74 UNFM_P_SIZE);
75 } 75 }
76 if (n_retval) { 76 if (retval) {
77 return n_retval; 77 return retval;
78 } 78 }
79 // note: from here there are two keys which have matching first 79 // note: from here there are two keys which have matching first
80 // three key components. They only differ by the fourth one. 80 // three key components. They only differ by the fourth one.
@@ -92,14 +92,13 @@ int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode,
92 last item of the file */ 92 last item of the file */
93 if (search_for_position_by_key(sb, &end_key, path) == 93 if (search_for_position_by_key(sb, &end_key, path) ==
94 POSITION_FOUND) 94 POSITION_FOUND)
95 reiserfs_panic(sb, 95 reiserfs_panic(sb, "PAP-14050",
96 "PAP-14050: direct2indirect: "
97 "direct item (%K) not found", &end_key); 96 "direct item (%K) not found", &end_key);
98 p_le_ih = PATH_PITEM_HEAD(path); 97 p_le_ih = PATH_PITEM_HEAD(path);
99 RFALSE(!is_direct_le_ih(p_le_ih), 98 RFALSE(!is_direct_le_ih(p_le_ih),
100 "vs-14055: direct item expected(%K), found %h", 99 "vs-14055: direct item expected(%K), found %h",
101 &end_key, p_le_ih); 100 &end_key, p_le_ih);
102 tail_size = (le_ih_k_offset(p_le_ih) & (n_blk_size - 1)) 101 tail_size = (le_ih_k_offset(p_le_ih) & (blk_size - 1))
103 + ih_item_len(p_le_ih) - 1; 102 + ih_item_len(p_le_ih) - 1;
104 103
105 /* we only send the unbh pointer if the buffer is not up to date. 104 /* we only send the unbh pointer if the buffer is not up to date.
@@ -114,11 +113,11 @@ int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode,
114 } else { 113 } else {
115 up_to_date_bh = unbh; 114 up_to_date_bh = unbh;
116 } 115 }
117 n_retval = reiserfs_delete_item(th, path, &end_key, inode, 116 retval = reiserfs_delete_item(th, path, &end_key, inode,
118 up_to_date_bh); 117 up_to_date_bh);
119 118
120 total_tail += n_retval; 119 total_tail += retval;
121 if (tail_size == n_retval) 120 if (tail_size == retval)
122 // done: file does not have direct items anymore 121 // done: file does not have direct items anymore
123 break; 122 break;
124 123
@@ -130,7 +129,7 @@ int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode,
130 unsigned pgoff = 129 unsigned pgoff =
131 (tail_offset + total_tail - 1) & (PAGE_CACHE_SIZE - 1); 130 (tail_offset + total_tail - 1) & (PAGE_CACHE_SIZE - 1);
132 char *kaddr = kmap_atomic(up_to_date_bh->b_page, KM_USER0); 131 char *kaddr = kmap_atomic(up_to_date_bh->b_page, KM_USER0);
133 memset(kaddr + pgoff, 0, n_blk_size - total_tail); 132 memset(kaddr + pgoff, 0, blk_size - total_tail);
134 kunmap_atomic(kaddr, KM_USER0); 133 kunmap_atomic(kaddr, KM_USER0);
135 } 134 }
136 135
@@ -171,14 +170,18 @@ void reiserfs_unmap_buffer(struct buffer_head *bh)
171 what we expect from it (number of cut bytes). But when tail remains 170 what we expect from it (number of cut bytes). But when tail remains
172 in the unformatted node, we set mode to SKIP_BALANCING and unlock 171 in the unformatted node, we set mode to SKIP_BALANCING and unlock
173 inode */ 172 inode */
174int indirect2direct(struct reiserfs_transaction_handle *th, struct inode *p_s_inode, struct page *page, struct treepath *p_s_path, /* path to the indirect item. */ 173int indirect2direct(struct reiserfs_transaction_handle *th,
175 const struct cpu_key *p_s_item_key, /* Key to look for unformatted node pointer to be cut. */ 174 struct inode *inode, struct page *page,
175 struct treepath *path, /* path to the indirect item. */
176 const struct cpu_key *item_key, /* Key to look for
177 * unformatted node
178 * pointer to be cut. */
176 loff_t n_new_file_size, /* New file size. */ 179 loff_t n_new_file_size, /* New file size. */
177 char *p_c_mode) 180 char *mode)
178{ 181{
179 struct super_block *p_s_sb = p_s_inode->i_sb; 182 struct super_block *sb = inode->i_sb;
180 struct item_head s_ih; 183 struct item_head s_ih;
181 unsigned long n_block_size = p_s_sb->s_blocksize; 184 unsigned long block_size = sb->s_blocksize;
182 char *tail; 185 char *tail;
183 int tail_len, round_tail_len; 186 int tail_len, round_tail_len;
184 loff_t pos, pos1; /* position of first byte of the tail */ 187 loff_t pos, pos1; /* position of first byte of the tail */
@@ -186,22 +189,22 @@ int indirect2direct(struct reiserfs_transaction_handle *th, struct inode *p_s_in
186 189
187 BUG_ON(!th->t_trans_id); 190 BUG_ON(!th->t_trans_id);
188 191
189 REISERFS_SB(p_s_sb)->s_indirect2direct++; 192 REISERFS_SB(sb)->s_indirect2direct++;
190 193
191 *p_c_mode = M_SKIP_BALANCING; 194 *mode = M_SKIP_BALANCING;
192 195
193 /* store item head path points to. */ 196 /* store item head path points to. */
194 copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path)); 197 copy_item_head(&s_ih, PATH_PITEM_HEAD(path));
195 198
196 tail_len = (n_new_file_size & (n_block_size - 1)); 199 tail_len = (n_new_file_size & (block_size - 1));
197 if (get_inode_sd_version(p_s_inode) == STAT_DATA_V2) 200 if (get_inode_sd_version(inode) == STAT_DATA_V2)
198 round_tail_len = ROUND_UP(tail_len); 201 round_tail_len = ROUND_UP(tail_len);
199 else 202 else
200 round_tail_len = tail_len; 203 round_tail_len = tail_len;
201 204
202 pos = 205 pos =
203 le_ih_k_offset(&s_ih) - 1 + (ih_item_len(&s_ih) / UNFM_P_SIZE - 206 le_ih_k_offset(&s_ih) - 1 + (ih_item_len(&s_ih) / UNFM_P_SIZE -
204 1) * p_s_sb->s_blocksize; 207 1) * sb->s_blocksize;
205 pos1 = pos; 208 pos1 = pos;
206 209
207 // we are protected by i_mutex. The tail can not disapper, not 210 // we are protected by i_mutex. The tail can not disapper, not
@@ -210,27 +213,26 @@ int indirect2direct(struct reiserfs_transaction_handle *th, struct inode *p_s_in
210 213
211 tail = (char *)kmap(page); /* this can schedule */ 214 tail = (char *)kmap(page); /* this can schedule */
212 215
213 if (path_changed(&s_ih, p_s_path)) { 216 if (path_changed(&s_ih, path)) {
214 /* re-search indirect item */ 217 /* re-search indirect item */
215 if (search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path) 218 if (search_for_position_by_key(sb, item_key, path)
216 == POSITION_NOT_FOUND) 219 == POSITION_NOT_FOUND)
217 reiserfs_panic(p_s_sb, 220 reiserfs_panic(sb, "PAP-5520",
218 "PAP-5520: indirect2direct: "
219 "item to be converted %K does not exist", 221 "item to be converted %K does not exist",
220 p_s_item_key); 222 item_key);
221 copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path)); 223 copy_item_head(&s_ih, PATH_PITEM_HEAD(path));
222#ifdef CONFIG_REISERFS_CHECK 224#ifdef CONFIG_REISERFS_CHECK
223 pos = le_ih_k_offset(&s_ih) - 1 + 225 pos = le_ih_k_offset(&s_ih) - 1 +
224 (ih_item_len(&s_ih) / UNFM_P_SIZE - 226 (ih_item_len(&s_ih) / UNFM_P_SIZE -
225 1) * p_s_sb->s_blocksize; 227 1) * sb->s_blocksize;
226 if (pos != pos1) 228 if (pos != pos1)
227 reiserfs_panic(p_s_sb, "vs-5530: indirect2direct: " 229 reiserfs_panic(sb, "vs-5530", "tail position "
228 "tail position changed while we were reading it"); 230 "changed while we were reading it");
229#endif 231#endif
230 } 232 }
231 233
232 /* Set direct item header to insert. */ 234 /* Set direct item header to insert. */
233 make_le_item_head(&s_ih, NULL, get_inode_item_key_version(p_s_inode), 235 make_le_item_head(&s_ih, NULL, get_inode_item_key_version(inode),
234 pos1 + 1, TYPE_DIRECT, round_tail_len, 236 pos1 + 1, TYPE_DIRECT, round_tail_len,
235 0xffff /*ih_free_space */ ); 237 0xffff /*ih_free_space */ );
236 238
@@ -240,13 +242,13 @@ int indirect2direct(struct reiserfs_transaction_handle *th, struct inode *p_s_in
240 */ 242 */
241 tail = tail + (pos & (PAGE_CACHE_SIZE - 1)); 243 tail = tail + (pos & (PAGE_CACHE_SIZE - 1));
242 244
243 PATH_LAST_POSITION(p_s_path)++; 245 PATH_LAST_POSITION(path)++;
244 246
245 key = *p_s_item_key; 247 key = *item_key;
246 set_cpu_key_k_type(&key, TYPE_DIRECT); 248 set_cpu_key_k_type(&key, TYPE_DIRECT);
247 key.key_length = 4; 249 key.key_length = 4;
248 /* Insert tail as new direct item in the tree */ 250 /* Insert tail as new direct item in the tree */
249 if (reiserfs_insert_item(th, p_s_path, &key, &s_ih, p_s_inode, 251 if (reiserfs_insert_item(th, path, &key, &s_ih, inode,
250 tail ? tail : NULL) < 0) { 252 tail ? tail : NULL) < 0) {
251 /* No disk memory. So we can not convert last unformatted node 253 /* No disk memory. So we can not convert last unformatted node
252 to the direct item. In this case we used to adjust 254 to the direct item. In this case we used to adjust
@@ -255,12 +257,12 @@ int indirect2direct(struct reiserfs_transaction_handle *th, struct inode *p_s_in
255 unformatted node. For now i_size is considered as guard for 257 unformatted node. For now i_size is considered as guard for
256 going out of file size */ 258 going out of file size */
257 kunmap(page); 259 kunmap(page);
258 return n_block_size - round_tail_len; 260 return block_size - round_tail_len;
259 } 261 }
260 kunmap(page); 262 kunmap(page);
261 263
262 /* make sure to get the i_blocks changes from reiserfs_insert_item */ 264 /* make sure to get the i_blocks changes from reiserfs_insert_item */
263 reiserfs_update_sd(th, p_s_inode); 265 reiserfs_update_sd(th, inode);
264 266
265 // note: we have now the same as in above direct2indirect 267 // note: we have now the same as in above direct2indirect
266 // conversion: there are two keys which have matching first three 268 // conversion: there are two keys which have matching first three
@@ -268,11 +270,11 @@ int indirect2direct(struct reiserfs_transaction_handle *th, struct inode *p_s_in
268 270
269 /* We have inserted new direct item and must remove last 271 /* We have inserted new direct item and must remove last
270 unformatted node. */ 272 unformatted node. */
271 *p_c_mode = M_CUT; 273 *mode = M_CUT;
272 274
273 /* we store position of first direct item in the in-core inode */ 275 /* we store position of first direct item in the in-core inode */
274 //mark_file_with_tail (p_s_inode, pos1 + 1); 276 /* mark_file_with_tail (inode, pos1 + 1); */
275 REISERFS_I(p_s_inode)->i_first_direct_byte = pos1 + 1; 277 REISERFS_I(inode)->i_first_direct_byte = pos1 + 1;
276 278
277 return n_block_size - round_tail_len; 279 return block_size - round_tail_len;
278} 280}
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index ae881ccd2f03..f83f52bae390 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -27,6 +27,10 @@
27 * these are special cases for filesystem ACLs, they are interpreted by the 27 * these are special cases for filesystem ACLs, they are interpreted by the
28 * kernel, in addition, they are negatively and positively cached and attached 28 * kernel, in addition, they are negatively and positively cached and attached
29 * to the inode so that unnecessary lookups are avoided. 29 * to the inode so that unnecessary lookups are avoided.
30 *
31 * Locking works like so:
32 * Directory components (xattr root, xattr dir) are protectd by their i_mutex.
33 * The xattrs themselves are protected by the xattr_sem.
30 */ 34 */
31 35
32#include <linux/reiserfs_fs.h> 36#include <linux/reiserfs_fs.h>
@@ -44,328 +48,334 @@
44#include <net/checksum.h> 48#include <net/checksum.h>
45#include <linux/smp_lock.h> 49#include <linux/smp_lock.h>
46#include <linux/stat.h> 50#include <linux/stat.h>
51#include <linux/quotaops.h>
47 52
48#define FL_READONLY 128
49#define FL_DIR_SEM_HELD 256
50#define PRIVROOT_NAME ".reiserfs_priv" 53#define PRIVROOT_NAME ".reiserfs_priv"
51#define XAROOT_NAME "xattrs" 54#define XAROOT_NAME "xattrs"
52 55
53static struct reiserfs_xattr_handler *find_xattr_handler_prefix(const char
54 *prefix);
55 56
56/* Returns the dentry referring to the root of the extended attribute 57/* Helpers for inode ops. We do this so that we don't have all the VFS
57 * directory tree. If it has already been retrieved, it is used. If it 58 * overhead and also for proper i_mutex annotation.
58 * hasn't been created and the flags indicate creation is allowed, we 59 * dir->i_mutex must be held for all of them. */
59 * attempt to create it. On error, we return a pointer-encoded error. 60#ifdef CONFIG_REISERFS_FS_XATTR
60 */ 61static int xattr_create(struct inode *dir, struct dentry *dentry, int mode)
61static struct dentry *get_xa_root(struct super_block *sb, int flags)
62{ 62{
63 struct dentry *privroot = dget(REISERFS_SB(sb)->priv_root); 63 BUG_ON(!mutex_is_locked(&dir->i_mutex));
64 struct dentry *xaroot; 64 vfs_dq_init(dir);
65 return dir->i_op->create(dir, dentry, mode, NULL);
66}
67#endif
65 68
66 /* This needs to be created at mount-time */ 69static int xattr_mkdir(struct inode *dir, struct dentry *dentry, int mode)
67 if (!privroot) 70{
68 return ERR_PTR(-ENODATA); 71 BUG_ON(!mutex_is_locked(&dir->i_mutex));
72 vfs_dq_init(dir);
73 return dir->i_op->mkdir(dir, dentry, mode);
74}
69 75
70 mutex_lock_nested(&privroot->d_inode->i_mutex, I_MUTEX_XATTR); 76/* We use I_MUTEX_CHILD here to silence lockdep. It's safe because xattr
71 if (REISERFS_SB(sb)->xattr_root) { 77 * mutation ops aren't called during rename or splace, which are the
72 xaroot = dget(REISERFS_SB(sb)->xattr_root); 78 * only other users of I_MUTEX_CHILD. It violates the ordering, but that's
73 goto out; 79 * better than allocating another subclass just for this code. */
74 } 80static int xattr_unlink(struct inode *dir, struct dentry *dentry)
81{
82 int error;
83 BUG_ON(!mutex_is_locked(&dir->i_mutex));
84 vfs_dq_init(dir);
75 85
76 xaroot = lookup_one_len(XAROOT_NAME, privroot, strlen(XAROOT_NAME)); 86 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
77 if (IS_ERR(xaroot)) { 87 error = dir->i_op->unlink(dir, dentry);
78 goto out; 88 mutex_unlock(&dentry->d_inode->i_mutex);
79 } else if (!xaroot->d_inode) { 89
90 if (!error)
91 d_delete(dentry);
92 return error;
93}
94
95static int xattr_rmdir(struct inode *dir, struct dentry *dentry)
96{
97 int error;
98 BUG_ON(!mutex_is_locked(&dir->i_mutex));
99 vfs_dq_init(dir);
100
101 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
102 dentry_unhash(dentry);
103 error = dir->i_op->rmdir(dir, dentry);
104 if (!error)
105 dentry->d_inode->i_flags |= S_DEAD;
106 mutex_unlock(&dentry->d_inode->i_mutex);
107 if (!error)
108 d_delete(dentry);
109 dput(dentry);
110
111 return error;
112}
113
114#define xattr_may_create(flags) (!flags || flags & XATTR_CREATE)
115
116/* Returns and possibly creates the xattr dir. */
117static struct dentry *lookup_or_create_dir(struct dentry *parent,
118 const char *name, int flags)
119{
120 struct dentry *dentry;
121 BUG_ON(!parent);
122
123 dentry = lookup_one_len(name, parent, strlen(name));
124 if (IS_ERR(dentry))
125 return dentry;
126 else if (!dentry->d_inode) {
80 int err = -ENODATA; 127 int err = -ENODATA;
81 if (flags == 0 || flags & XATTR_CREATE) 128
82 err = privroot->d_inode->i_op->mkdir(privroot->d_inode, 129 if (xattr_may_create(flags)) {
83 xaroot, 0700); 130 mutex_lock_nested(&parent->d_inode->i_mutex,
131 I_MUTEX_XATTR);
132 err = xattr_mkdir(parent->d_inode, dentry, 0700);
133 mutex_unlock(&parent->d_inode->i_mutex);
134 }
135
84 if (err) { 136 if (err) {
85 dput(xaroot); 137 dput(dentry);
86 xaroot = ERR_PTR(err); 138 dentry = ERR_PTR(err);
87 goto out;
88 } 139 }
89 } 140 }
90 REISERFS_SB(sb)->xattr_root = dget(xaroot);
91 141
92 out: 142 return dentry;
93 mutex_unlock(&privroot->d_inode->i_mutex); 143}
94 dput(privroot); 144
95 return xaroot; 145static struct dentry *open_xa_root(struct super_block *sb, int flags)
146{
147 struct dentry *privroot = REISERFS_SB(sb)->priv_root;
148 if (!privroot)
149 return ERR_PTR(-ENODATA);
150 return lookup_or_create_dir(privroot, XAROOT_NAME, flags);
96} 151}
97 152
98/* Opens the directory corresponding to the inode's extended attribute store.
99 * If flags allow, the tree to the directory may be created. If creation is
100 * prohibited, -ENODATA is returned. */
101static struct dentry *open_xa_dir(const struct inode *inode, int flags) 153static struct dentry *open_xa_dir(const struct inode *inode, int flags)
102{ 154{
103 struct dentry *xaroot, *xadir; 155 struct dentry *xaroot, *xadir;
104 char namebuf[17]; 156 char namebuf[17];
105 157
106 xaroot = get_xa_root(inode->i_sb, flags); 158 xaroot = open_xa_root(inode->i_sb, flags);
107 if (IS_ERR(xaroot)) 159 if (IS_ERR(xaroot))
108 return xaroot; 160 return xaroot;
109 161
110 /* ok, we have xaroot open */
111 snprintf(namebuf, sizeof(namebuf), "%X.%X", 162 snprintf(namebuf, sizeof(namebuf), "%X.%X",
112 le32_to_cpu(INODE_PKEY(inode)->k_objectid), 163 le32_to_cpu(INODE_PKEY(inode)->k_objectid),
113 inode->i_generation); 164 inode->i_generation);
114 xadir = lookup_one_len(namebuf, xaroot, strlen(namebuf));
115 if (IS_ERR(xadir)) {
116 dput(xaroot);
117 return xadir;
118 }
119
120 if (!xadir->d_inode) {
121 int err;
122 if (flags == 0 || flags & XATTR_CREATE) {
123 /* Although there is nothing else trying to create this directory,
124 * another directory with the same hash may be created, so we need
125 * to protect against that */
126 err =
127 xaroot->d_inode->i_op->mkdir(xaroot->d_inode, xadir,
128 0700);
129 if (err) {
130 dput(xaroot);
131 dput(xadir);
132 return ERR_PTR(err);
133 }
134 }
135 if (!xadir->d_inode) {
136 dput(xaroot);
137 dput(xadir);
138 return ERR_PTR(-ENODATA);
139 }
140 }
141 165
166 xadir = lookup_or_create_dir(xaroot, namebuf, flags);
142 dput(xaroot); 167 dput(xaroot);
143 return xadir; 168 return xadir;
169
144} 170}
145 171
146/* Returns a dentry corresponding to a specific extended attribute file 172/* The following are side effects of other operations that aren't explicitly
147 * for the inode. If flags allow, the file is created. Otherwise, a 173 * modifying extended attributes. This includes operations such as permissions
148 * valid or negative dentry, or an error is returned. */ 174 * or ownership changes, object deletions, etc. */
149static struct dentry *get_xa_file_dentry(const struct inode *inode, 175struct reiserfs_dentry_buf {
150 const char *name, int flags) 176 struct dentry *xadir;
151{ 177 int count;
152 struct dentry *xadir, *xafile; 178 struct dentry *dentries[8];
153 int err = 0; 179};
154 180
155 xadir = open_xa_dir(inode, flags); 181static int
156 if (IS_ERR(xadir)) { 182fill_with_dentries(void *buf, const char *name, int namelen, loff_t offset,
157 return ERR_CAST(xadir); 183 u64 ino, unsigned int d_type)
158 } else if (!xadir->d_inode) { 184{
159 dput(xadir); 185 struct reiserfs_dentry_buf *dbuf = buf;
160 return ERR_PTR(-ENODATA); 186 struct dentry *dentry;
161 }
162 187
163 xafile = lookup_one_len(name, xadir, strlen(name)); 188 if (dbuf->count == ARRAY_SIZE(dbuf->dentries))
164 if (IS_ERR(xafile)) { 189 return -ENOSPC;
165 dput(xadir);
166 return ERR_CAST(xafile);
167 }
168 190
169 if (xafile->d_inode) { /* file exists */ 191 if (name[0] == '.' && (name[1] == '\0' ||
170 if (flags & XATTR_CREATE) { 192 (name[1] == '.' && name[2] == '\0')))
171 err = -EEXIST; 193 return 0;
172 dput(xafile);
173 goto out;
174 }
175 } else if (flags & XATTR_REPLACE || flags & FL_READONLY) {
176 goto out;
177 } else {
178 /* inode->i_mutex is down, so nothing else can try to create
179 * the same xattr */
180 err = xadir->d_inode->i_op->create(xadir->d_inode, xafile,
181 0700 | S_IFREG, NULL);
182 194
183 if (err) { 195 dentry = lookup_one_len(name, dbuf->xadir, namelen);
184 dput(xafile); 196 if (IS_ERR(dentry)) {
185 goto out; 197 return PTR_ERR(dentry);
186 } 198 } else if (!dentry->d_inode) {
199 /* A directory entry exists, but no file? */
200 reiserfs_error(dentry->d_sb, "xattr-20003",
201 "Corrupted directory: xattr %s listed but "
202 "not found for file %s.\n",
203 dentry->d_name.name, dbuf->xadir->d_name.name);
204 dput(dentry);
205 return -EIO;
187 } 206 }
188 207
189 out: 208 dbuf->dentries[dbuf->count++] = dentry;
190 dput(xadir); 209 return 0;
191 if (err)
192 xafile = ERR_PTR(err);
193 else if (!xafile->d_inode) {
194 dput(xafile);
195 xafile = ERR_PTR(-ENODATA);
196 }
197 return xafile;
198} 210}
199 211
200/* 212static void
201 * this is very similar to fs/reiserfs/dir.c:reiserfs_readdir, but 213cleanup_dentry_buf(struct reiserfs_dentry_buf *buf)
202 * we need to drop the path before calling the filldir struct. That
203 * would be a big performance hit to the non-xattr case, so I've copied
204 * the whole thing for now. --clm
205 *
206 * the big difference is that I go backwards through the directory,
207 * and don't mess with f->f_pos, but the idea is the same. Do some
208 * action on each and every entry in the directory.
209 *
210 * we're called with i_mutex held, so there are no worries about the directory
211 * changing underneath us.
212 */
213static int __xattr_readdir(struct inode *inode, void *dirent, filldir_t filldir)
214{ 214{
215 struct cpu_key pos_key; /* key of current position in the directory (key of directory entry) */ 215 int i;
216 INITIALIZE_PATH(path_to_entry); 216 for (i = 0; i < buf->count; i++)
217 struct buffer_head *bh; 217 if (buf->dentries[i])
218 int entry_num; 218 dput(buf->dentries[i]);
219 struct item_head *ih, tmp_ih; 219}
220 int search_res; 220
221 char *local_buf; 221static int reiserfs_for_each_xattr(struct inode *inode,
222 loff_t next_pos; 222 int (*action)(struct dentry *, void *),
223 char small_buf[32]; /* avoid kmalloc if we can */ 223 void *data)
224 struct reiserfs_de_head *deh; 224{
225 int d_reclen; 225 struct dentry *dir;
226 char *d_name; 226 int i, err = 0;
227 off_t d_off; 227 loff_t pos = 0;
228 ino_t d_ino; 228 struct reiserfs_dentry_buf buf = {
229 struct reiserfs_dir_entry de; 229 .count = 0,
230 230 };
231 /* form key for search the next directory entry using f_pos field of
232 file structure */
233 next_pos = max_reiserfs_offset(inode);
234
235 while (1) {
236 research:
237 if (next_pos <= DOT_DOT_OFFSET)
238 break;
239 make_cpu_key(&pos_key, inode, next_pos, TYPE_DIRENTRY, 3);
240
241 search_res =
242 search_by_entry_key(inode->i_sb, &pos_key, &path_to_entry,
243 &de);
244 if (search_res == IO_ERROR) {
245 // FIXME: we could just skip part of directory which could
246 // not be read
247 pathrelse(&path_to_entry);
248 return -EIO;
249 }
250 231
251 if (search_res == NAME_NOT_FOUND) 232 /* Skip out, an xattr has no xattrs associated with it */
252 de.de_entry_num--; 233 if (IS_PRIVATE(inode) || get_inode_sd_version(inode) == STAT_DATA_V1)
234 return 0;
253 235
254 set_de_name_and_namelen(&de); 236 dir = open_xa_dir(inode, XATTR_REPLACE);
255 entry_num = de.de_entry_num; 237 if (IS_ERR(dir)) {
256 deh = &(de.de_deh[entry_num]); 238 err = PTR_ERR(dir);
239 goto out;
240 } else if (!dir->d_inode) {
241 err = 0;
242 goto out_dir;
243 }
257 244
258 bh = de.de_bh; 245 mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_XATTR);
259 ih = de.de_ih; 246 buf.xadir = dir;
247 err = reiserfs_readdir_dentry(dir, &buf, fill_with_dentries, &pos);
248 while ((err == 0 || err == -ENOSPC) && buf.count) {
249 err = 0;
260 250
261 if (!is_direntry_le_ih(ih)) { 251 for (i = 0; i < buf.count && buf.dentries[i]; i++) {
262 reiserfs_warning(inode->i_sb, "not direntry %h", ih); 252 int lerr = 0;
263 break; 253 struct dentry *dentry = buf.dentries[i];
264 }
265 copy_item_head(&tmp_ih, ih);
266 254
267 /* we must have found item, that is item of this directory, */ 255 if (err == 0 && !S_ISDIR(dentry->d_inode->i_mode))
268 RFALSE(COMP_SHORT_KEYS(&(ih->ih_key), &pos_key), 256 lerr = action(dentry, data);
269 "vs-9000: found item %h does not match to dir we readdir %K",
270 ih, &pos_key);
271 257
272 if (deh_offset(deh) <= DOT_DOT_OFFSET) { 258 dput(dentry);
273 break; 259 buf.dentries[i] = NULL;
260 err = lerr ?: err;
274 } 261 }
262 buf.count = 0;
263 if (!err)
264 err = reiserfs_readdir_dentry(dir, &buf,
265 fill_with_dentries, &pos);
266 }
267 mutex_unlock(&dir->d_inode->i_mutex);
275 268
276 /* look for the previous entry in the directory */ 269 /* Clean up after a failed readdir */
277 next_pos = deh_offset(deh) - 1; 270 cleanup_dentry_buf(&buf);
278
279 if (!de_visible(deh))
280 /* it is hidden entry */
281 continue;
282 271
283 d_reclen = entry_length(bh, ih, entry_num); 272 if (!err) {
284 d_name = B_I_DEH_ENTRY_FILE_NAME(bh, ih, deh); 273 /* We start a transaction here to avoid a ABBA situation
285 d_off = deh_offset(deh); 274 * between the xattr root's i_mutex and the journal lock.
286 d_ino = deh_objectid(deh); 275 * This doesn't incur much additional overhead since the
276 * new transaction will just nest inside the
277 * outer transaction. */
278 int blocks = JOURNAL_PER_BALANCE_CNT * 2 + 2 +
279 4 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb);
280 struct reiserfs_transaction_handle th;
281 err = journal_begin(&th, inode->i_sb, blocks);
282 if (!err) {
283 int jerror;
284 mutex_lock_nested(&dir->d_parent->d_inode->i_mutex,
285 I_MUTEX_XATTR);
286 err = action(dir, data);
287 jerror = journal_end(&th, inode->i_sb, blocks);
288 mutex_unlock(&dir->d_parent->d_inode->i_mutex);
289 err = jerror ?: err;
290 }
291 }
292out_dir:
293 dput(dir);
294out:
295 /* -ENODATA isn't an error */
296 if (err == -ENODATA)
297 err = 0;
298 return err;
299}
287 300
288 if (!d_name[d_reclen - 1]) 301static int delete_one_xattr(struct dentry *dentry, void *data)
289 d_reclen = strlen(d_name); 302{
303 struct inode *dir = dentry->d_parent->d_inode;
290 304
291 if (d_reclen > REISERFS_MAX_NAME(inode->i_sb->s_blocksize)) { 305 /* This is the xattr dir, handle specially. */
292 /* too big to send back to VFS */ 306 if (S_ISDIR(dentry->d_inode->i_mode))
293 continue; 307 return xattr_rmdir(dir, dentry);
294 }
295 308
296 /* Ignore the .reiserfs_priv entry */ 309 return xattr_unlink(dir, dentry);
297 if (reiserfs_xattrs(inode->i_sb) && 310}
298 !old_format_only(inode->i_sb) &&
299 deh_objectid(deh) ==
300 le32_to_cpu(INODE_PKEY
301 (REISERFS_SB(inode->i_sb)->priv_root->d_inode)->
302 k_objectid))
303 continue;
304
305 if (d_reclen <= 32) {
306 local_buf = small_buf;
307 } else {
308 local_buf = kmalloc(d_reclen, GFP_NOFS);
309 if (!local_buf) {
310 pathrelse(&path_to_entry);
311 return -ENOMEM;
312 }
313 if (item_moved(&tmp_ih, &path_to_entry)) {
314 kfree(local_buf);
315 311
316 /* sigh, must retry. Do this same offset again */ 312static int chown_one_xattr(struct dentry *dentry, void *data)
317 next_pos = d_off; 313{
318 goto research; 314 struct iattr *attrs = data;
319 } 315 return reiserfs_setattr(dentry, attrs);
320 } 316}
321 317
322 // Note, that we copy name to user space via temporary 318/* No i_mutex, but the inode is unconnected. */
323 // buffer (local_buf) because filldir will block if 319int reiserfs_delete_xattrs(struct inode *inode)
324 // user space buffer is swapped out. At that time 320{
325 // entry can move to somewhere else 321 int err = reiserfs_for_each_xattr(inode, delete_one_xattr, NULL);
326 memcpy(local_buf, d_name, d_reclen); 322 if (err)
327 323 reiserfs_warning(inode->i_sb, "jdm-20004",
328 /* the filldir function might need to start transactions, 324 "Couldn't delete all xattrs (%d)\n", err);
329 * or do who knows what. Release the path now that we've 325 return err;
330 * copied all the important stuff out of the deh 326}
331 */
332 pathrelse(&path_to_entry);
333
334 if (filldir(dirent, local_buf, d_reclen, d_off, d_ino,
335 DT_UNKNOWN) < 0) {
336 if (local_buf != small_buf) {
337 kfree(local_buf);
338 }
339 goto end;
340 }
341 if (local_buf != small_buf) {
342 kfree(local_buf);
343 }
344 } /* while */
345 327
346 end: 328/* inode->i_mutex: down */
347 pathrelse(&path_to_entry); 329int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs)
348 return 0; 330{
331 int err = reiserfs_for_each_xattr(inode, chown_one_xattr, attrs);
332 if (err)
333 reiserfs_warning(inode->i_sb, "jdm-20007",
334 "Couldn't chown all xattrs (%d)\n", err);
335 return err;
349} 336}
350 337
351/* 338#ifdef CONFIG_REISERFS_FS_XATTR
352 * this could be done with dedicated readdir ops for the xattr files, 339/* Returns a dentry corresponding to a specific extended attribute file
353 * but I want to get something working asap 340 * for the inode. If flags allow, the file is created. Otherwise, a
354 * this is stolen from vfs_readdir 341 * valid or negative dentry, or an error is returned. */
355 * 342static struct dentry *xattr_lookup(struct inode *inode, const char *name,
356 */ 343 int flags)
357static
358int xattr_readdir(struct inode *inode, filldir_t filler, void *buf)
359{ 344{
360 int res = -ENOENT; 345 struct dentry *xadir, *xafile;
361 mutex_lock_nested(&inode->i_mutex, I_MUTEX_XATTR); 346 int err = 0;
362 if (!IS_DEADDIR(inode)) { 347
363 lock_kernel(); 348 xadir = open_xa_dir(inode, flags);
364 res = __xattr_readdir(inode, buf, filler); 349 if (IS_ERR(xadir))
365 unlock_kernel(); 350 return ERR_CAST(xadir);
351
352 xafile = lookup_one_len(name, xadir, strlen(name));
353 if (IS_ERR(xafile)) {
354 err = PTR_ERR(xafile);
355 goto out;
366 } 356 }
367 mutex_unlock(&inode->i_mutex); 357
368 return res; 358 if (xafile->d_inode && (flags & XATTR_CREATE))
359 err = -EEXIST;
360
361 if (!xafile->d_inode) {
362 err = -ENODATA;
363 if (xattr_may_create(flags)) {
364 mutex_lock_nested(&xadir->d_inode->i_mutex,
365 I_MUTEX_XATTR);
366 err = xattr_create(xadir->d_inode, xafile,
367 0700|S_IFREG);
368 mutex_unlock(&xadir->d_inode->i_mutex);
369 }
370 }
371
372 if (err)
373 dput(xafile);
374out:
375 dput(xadir);
376 if (err)
377 return ERR_PTR(err);
378 return xafile;
369} 379}
370 380
371/* Internal operations on file data */ 381/* Internal operations on file data */
@@ -375,14 +385,14 @@ static inline void reiserfs_put_page(struct page *page)
375 page_cache_release(page); 385 page_cache_release(page);
376} 386}
377 387
378static struct page *reiserfs_get_page(struct inode *dir, unsigned long n) 388static struct page *reiserfs_get_page(struct inode *dir, size_t n)
379{ 389{
380 struct address_space *mapping = dir->i_mapping; 390 struct address_space *mapping = dir->i_mapping;
381 struct page *page; 391 struct page *page;
382 /* We can deadlock if we try to free dentries, 392 /* We can deadlock if we try to free dentries,
383 and an unlink/rmdir has just occured - GFP_NOFS avoids this */ 393 and an unlink/rmdir has just occured - GFP_NOFS avoids this */
384 mapping_set_gfp_mask(mapping, GFP_NOFS); 394 mapping_set_gfp_mask(mapping, GFP_NOFS);
385 page = read_mapping_page(mapping, n, NULL); 395 page = read_mapping_page(mapping, n >> PAGE_CACHE_SHIFT, NULL);
386 if (!IS_ERR(page)) { 396 if (!IS_ERR(page)) {
387 kmap(page); 397 kmap(page);
388 if (PageError(page)) 398 if (PageError(page))
@@ -405,6 +415,45 @@ int reiserfs_commit_write(struct file *f, struct page *page,
405int reiserfs_prepare_write(struct file *f, struct page *page, 415int reiserfs_prepare_write(struct file *f, struct page *page,
406 unsigned from, unsigned to); 416 unsigned from, unsigned to);
407 417
418static void update_ctime(struct inode *inode)
419{
420 struct timespec now = current_fs_time(inode->i_sb);
421 if (hlist_unhashed(&inode->i_hash) || !inode->i_nlink ||
422 timespec_equal(&inode->i_ctime, &now))
423 return;
424
425 inode->i_ctime = CURRENT_TIME_SEC;
426 mark_inode_dirty(inode);
427}
428
429static int lookup_and_delete_xattr(struct inode *inode, const char *name)
430{
431 int err = 0;
432 struct dentry *dentry, *xadir;
433
434 xadir = open_xa_dir(inode, XATTR_REPLACE);
435 if (IS_ERR(xadir))
436 return PTR_ERR(xadir);
437
438 dentry = lookup_one_len(name, xadir, strlen(name));
439 if (IS_ERR(dentry)) {
440 err = PTR_ERR(dentry);
441 goto out_dput;
442 }
443
444 if (dentry->d_inode) {
445 mutex_lock_nested(&xadir->d_inode->i_mutex, I_MUTEX_XATTR);
446 err = xattr_unlink(xadir->d_inode, dentry);
447 mutex_unlock(&xadir->d_inode->i_mutex);
448 update_ctime(inode);
449 }
450
451 dput(dentry);
452out_dput:
453 dput(xadir);
454 return err;
455}
456
408 457
409/* Generic extended attribute operations that can be used by xa plugins */ 458/* Generic extended attribute operations that can be used by xa plugins */
410 459
@@ -412,58 +461,32 @@ int reiserfs_prepare_write(struct file *f, struct page *page,
412 * inode->i_mutex: down 461 * inode->i_mutex: down
413 */ 462 */
414int 463int
415reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer, 464reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
416 size_t buffer_size, int flags) 465 struct inode *inode, const char *name,
466 const void *buffer, size_t buffer_size, int flags)
417{ 467{
418 int err = 0; 468 int err = 0;
419 struct dentry *dentry; 469 struct dentry *dentry;
420 struct page *page; 470 struct page *page;
421 char *data; 471 char *data;
422 struct address_space *mapping;
423 size_t file_pos = 0; 472 size_t file_pos = 0;
424 size_t buffer_pos = 0; 473 size_t buffer_pos = 0;
425 struct inode *xinode; 474 size_t new_size;
426 struct iattr newattrs;
427 __u32 xahash = 0; 475 __u32 xahash = 0;
428 476
429 if (get_inode_sd_version(inode) == STAT_DATA_V1) 477 if (get_inode_sd_version(inode) == STAT_DATA_V1)
430 return -EOPNOTSUPP; 478 return -EOPNOTSUPP;
431 479
432 /* Empty xattrs are ok, they're just empty files, no hash */ 480 if (!buffer)
433 if (buffer && buffer_size) 481 return lookup_and_delete_xattr(inode, name);
434 xahash = xattr_hash(buffer, buffer_size);
435 482
436 open_file: 483 dentry = xattr_lookup(inode, name, flags);
437 dentry = get_xa_file_dentry(inode, name, flags); 484 if (IS_ERR(dentry))
438 if (IS_ERR(dentry)) { 485 return PTR_ERR(dentry);
439 err = PTR_ERR(dentry);
440 goto out;
441 }
442
443 xinode = dentry->d_inode;
444 REISERFS_I(inode)->i_flags |= i_has_xattr_dir;
445 486
446 /* we need to copy it off.. */ 487 down_write(&REISERFS_I(inode)->i_xattr_sem);
447 if (xinode->i_nlink > 1) {
448 dput(dentry);
449 err = reiserfs_xattr_del(inode, name);
450 if (err < 0)
451 goto out;
452 /* We just killed the old one, we're not replacing anymore */
453 if (flags & XATTR_REPLACE)
454 flags &= ~XATTR_REPLACE;
455 goto open_file;
456 }
457 488
458 /* Resize it so we're ok to write there */ 489 xahash = xattr_hash(buffer, buffer_size);
459 newattrs.ia_size = buffer_size;
460 newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
461 mutex_lock_nested(&xinode->i_mutex, I_MUTEX_XATTR);
462 err = notify_change(dentry, &newattrs);
463 if (err)
464 goto out_filp;
465
466 mapping = xinode->i_mapping;
467 while (buffer_pos < buffer_size || buffer_pos == 0) { 490 while (buffer_pos < buffer_size || buffer_pos == 0) {
468 size_t chunk; 491 size_t chunk;
469 size_t skip = 0; 492 size_t skip = 0;
@@ -473,10 +496,10 @@ reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer,
473 else 496 else
474 chunk = buffer_size - buffer_pos; 497 chunk = buffer_size - buffer_pos;
475 498
476 page = reiserfs_get_page(xinode, file_pos >> PAGE_CACHE_SHIFT); 499 page = reiserfs_get_page(dentry->d_inode, file_pos);
477 if (IS_ERR(page)) { 500 if (IS_ERR(page)) {
478 err = PTR_ERR(page); 501 err = PTR_ERR(page);
479 goto out_filp; 502 goto out_unlock;
480 } 503 }
481 504
482 lock_page(page); 505 lock_page(page);
@@ -510,28 +533,61 @@ reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer,
510 break; 533 break;
511 } 534 }
512 535
513 /* We can't mark the inode dirty if it's not hashed. This is the case 536 new_size = buffer_size + sizeof(struct reiserfs_xattr_header);
514 * when we're inheriting the default ACL. If we dirty it, the inode 537 if (!err && new_size < i_size_read(dentry->d_inode)) {
515 * gets marked dirty, but won't (ever) make it onto the dirty list until 538 struct iattr newattrs = {
516 * it's synced explicitly to clear I_DIRTY. This is bad. */ 539 .ia_ctime = current_fs_time(inode->i_sb),
517 if (!hlist_unhashed(&inode->i_hash)) { 540 .ia_size = buffer_size,
518 inode->i_ctime = CURRENT_TIME_SEC; 541 .ia_valid = ATTR_SIZE | ATTR_CTIME,
519 mark_inode_dirty(inode); 542 };
543 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_XATTR);
544 down_write(&dentry->d_inode->i_alloc_sem);
545 err = reiserfs_setattr(dentry, &newattrs);
546 up_write(&dentry->d_inode->i_alloc_sem);
547 mutex_unlock(&dentry->d_inode->i_mutex);
548 } else
549 update_ctime(inode);
550out_unlock:
551 up_write(&REISERFS_I(inode)->i_xattr_sem);
552 dput(dentry);
553 return err;
554}
555
556/* We need to start a transaction to maintain lock ordering */
557int reiserfs_xattr_set(struct inode *inode, const char *name,
558 const void *buffer, size_t buffer_size, int flags)
559{
560
561 struct reiserfs_transaction_handle th;
562 int error, error2;
563 size_t jbegin_count = reiserfs_xattr_nblocks(inode, buffer_size);
564
565 if (!(flags & XATTR_REPLACE))
566 jbegin_count += reiserfs_xattr_jcreate_nblocks(inode);
567
568 reiserfs_write_lock(inode->i_sb);
569 error = journal_begin(&th, inode->i_sb, jbegin_count);
570 if (error) {
571 reiserfs_write_unlock(inode->i_sb);
572 return error;
520 } 573 }
521 574
522 out_filp: 575 error = reiserfs_xattr_set_handle(&th, inode, name,
523 mutex_unlock(&xinode->i_mutex); 576 buffer, buffer_size, flags);
524 dput(dentry);
525 577
526 out: 578 error2 = journal_end(&th, inode->i_sb, jbegin_count);
527 return err; 579 if (error == 0)
580 error = error2;
581 reiserfs_write_unlock(inode->i_sb);
582
583 return error;
528} 584}
529 585
530/* 586/*
531 * inode->i_mutex: down 587 * inode->i_mutex: down
532 */ 588 */
533int 589int
534reiserfs_xattr_get(const struct inode *inode, const char *name, void *buffer, 590reiserfs_xattr_get(struct inode *inode, const char *name, void *buffer,
535 size_t buffer_size) 591 size_t buffer_size)
536{ 592{
537 ssize_t err = 0; 593 ssize_t err = 0;
@@ -540,7 +596,6 @@ reiserfs_xattr_get(const struct inode *inode, const char *name, void *buffer,
540 size_t file_pos = 0; 596 size_t file_pos = 0;
541 size_t buffer_pos = 0; 597 size_t buffer_pos = 0;
542 struct page *page; 598 struct page *page;
543 struct inode *xinode;
544 __u32 hash = 0; 599 __u32 hash = 0;
545 600
546 if (name == NULL) 601 if (name == NULL)
@@ -551,25 +606,25 @@ reiserfs_xattr_get(const struct inode *inode, const char *name, void *buffer,
551 if (get_inode_sd_version(inode) == STAT_DATA_V1) 606 if (get_inode_sd_version(inode) == STAT_DATA_V1)
552 return -EOPNOTSUPP; 607 return -EOPNOTSUPP;
553 608
554 dentry = get_xa_file_dentry(inode, name, FL_READONLY); 609 dentry = xattr_lookup(inode, name, XATTR_REPLACE);
555 if (IS_ERR(dentry)) { 610 if (IS_ERR(dentry)) {
556 err = PTR_ERR(dentry); 611 err = PTR_ERR(dentry);
557 goto out; 612 goto out;
558 } 613 }
559 614
560 xinode = dentry->d_inode; 615 down_read(&REISERFS_I(inode)->i_xattr_sem);
561 isize = xinode->i_size; 616
562 REISERFS_I(inode)->i_flags |= i_has_xattr_dir; 617 isize = i_size_read(dentry->d_inode);
563 618
564 /* Just return the size needed */ 619 /* Just return the size needed */
565 if (buffer == NULL) { 620 if (buffer == NULL) {
566 err = isize - sizeof(struct reiserfs_xattr_header); 621 err = isize - sizeof(struct reiserfs_xattr_header);
567 goto out_dput; 622 goto out_unlock;
568 } 623 }
569 624
570 if (buffer_size < isize - sizeof(struct reiserfs_xattr_header)) { 625 if (buffer_size < isize - sizeof(struct reiserfs_xattr_header)) {
571 err = -ERANGE; 626 err = -ERANGE;
572 goto out_dput; 627 goto out_unlock;
573 } 628 }
574 629
575 while (file_pos < isize) { 630 while (file_pos < isize) {
@@ -581,10 +636,10 @@ reiserfs_xattr_get(const struct inode *inode, const char *name, void *buffer,
581 else 636 else
582 chunk = isize - file_pos; 637 chunk = isize - file_pos;
583 638
584 page = reiserfs_get_page(xinode, file_pos >> PAGE_CACHE_SHIFT); 639 page = reiserfs_get_page(dentry->d_inode, file_pos);
585 if (IS_ERR(page)) { 640 if (IS_ERR(page)) {
586 err = PTR_ERR(page); 641 err = PTR_ERR(page);
587 goto out_dput; 642 goto out_unlock;
588 } 643 }
589 644
590 lock_page(page); 645 lock_page(page);
@@ -598,12 +653,12 @@ reiserfs_xattr_get(const struct inode *inode, const char *name, void *buffer,
598 if (rxh->h_magic != cpu_to_le32(REISERFS_XATTR_MAGIC)) { 653 if (rxh->h_magic != cpu_to_le32(REISERFS_XATTR_MAGIC)) {
599 unlock_page(page); 654 unlock_page(page);
600 reiserfs_put_page(page); 655 reiserfs_put_page(page);
601 reiserfs_warning(inode->i_sb, 656 reiserfs_warning(inode->i_sb, "jdm-20001",
602 "Invalid magic for xattr (%s) " 657 "Invalid magic for xattr (%s) "
603 "associated with %k", name, 658 "associated with %k", name,
604 INODE_PKEY(inode)); 659 INODE_PKEY(inode));
605 err = -EIO; 660 err = -EIO;
606 goto out_dput; 661 goto out_unlock;
607 } 662 }
608 hash = le32_to_cpu(rxh->h_hash); 663 hash = le32_to_cpu(rxh->h_hash);
609 } 664 }
@@ -618,256 +673,83 @@ reiserfs_xattr_get(const struct inode *inode, const char *name, void *buffer,
618 673
619 if (xattr_hash(buffer, isize - sizeof(struct reiserfs_xattr_header)) != 674 if (xattr_hash(buffer, isize - sizeof(struct reiserfs_xattr_header)) !=
620 hash) { 675 hash) {
621 reiserfs_warning(inode->i_sb, 676 reiserfs_warning(inode->i_sb, "jdm-20002",
622 "Invalid hash for xattr (%s) associated " 677 "Invalid hash for xattr (%s) associated "
623 "with %k", name, INODE_PKEY(inode)); 678 "with %k", name, INODE_PKEY(inode));
624 err = -EIO; 679 err = -EIO;
625 } 680 }
626 681
627 out_dput: 682out_unlock:
683 up_read(&REISERFS_I(inode)->i_xattr_sem);
628 dput(dentry); 684 dput(dentry);
629 685
630 out: 686out:
631 return err; 687 return err;
632} 688}
633 689
634static int 690/* Actual operations that are exported to VFS-land */
635__reiserfs_xattr_del(struct dentry *xadir, const char *name, int namelen) 691struct xattr_handler *reiserfs_xattr_handlers[] = {
636{ 692 &reiserfs_xattr_user_handler,
637 struct dentry *dentry; 693 &reiserfs_xattr_trusted_handler,
638 struct inode *dir = xadir->d_inode; 694#ifdef CONFIG_REISERFS_FS_SECURITY
639 int err = 0; 695 &reiserfs_xattr_security_handler,
640 696#endif
641 dentry = lookup_one_len(name, xadir, namelen); 697#ifdef CONFIG_REISERFS_FS_POSIX_ACL
642 if (IS_ERR(dentry)) { 698 &reiserfs_posix_acl_access_handler,
643 err = PTR_ERR(dentry); 699 &reiserfs_posix_acl_default_handler,
644 goto out; 700#endif
645 } else if (!dentry->d_inode) { 701 NULL
646 err = -ENODATA;
647 goto out_file;
648 }
649
650 /* Skip directories.. */
651 if (S_ISDIR(dentry->d_inode->i_mode))
652 goto out_file;
653
654 if (!is_reiserfs_priv_object(dentry->d_inode)) {
655 reiserfs_warning(dir->i_sb, "OID %08x [%.*s/%.*s] doesn't have "
656 "priv flag set [parent is %sset].",
657 le32_to_cpu(INODE_PKEY(dentry->d_inode)->
658 k_objectid), xadir->d_name.len,
659 xadir->d_name.name, namelen, name,
660 is_reiserfs_priv_object(xadir->
661 d_inode) ? "" :
662 "not ");
663 dput(dentry);
664 return -EIO;
665 }
666
667 err = dir->i_op->unlink(dir, dentry);
668 if (!err)
669 d_delete(dentry);
670
671 out_file:
672 dput(dentry);
673
674 out:
675 return err;
676}
677
678int reiserfs_xattr_del(struct inode *inode, const char *name)
679{
680 struct dentry *dir;
681 int err;
682
683 dir = open_xa_dir(inode, FL_READONLY);
684 if (IS_ERR(dir)) {
685 err = PTR_ERR(dir);
686 goto out;
687 }
688
689 err = __reiserfs_xattr_del(dir, name, strlen(name));
690 dput(dir);
691
692 if (!err) {
693 inode->i_ctime = CURRENT_TIME_SEC;
694 mark_inode_dirty(inode);
695 }
696
697 out:
698 return err;
699}
700
701/* The following are side effects of other operations that aren't explicitly
702 * modifying extended attributes. This includes operations such as permissions
703 * or ownership changes, object deletions, etc. */
704
705static int
706reiserfs_delete_xattrs_filler(void *buf, const char *name, int namelen,
707 loff_t offset, u64 ino, unsigned int d_type)
708{
709 struct dentry *xadir = (struct dentry *)buf;
710
711 return __reiserfs_xattr_del(xadir, name, namelen);
712
713}
714
715/* This is called w/ inode->i_mutex downed */
716int reiserfs_delete_xattrs(struct inode *inode)
717{
718 struct dentry *dir, *root;
719 int err = 0;
720
721 /* Skip out, an xattr has no xattrs associated with it */
722 if (is_reiserfs_priv_object(inode) ||
723 get_inode_sd_version(inode) == STAT_DATA_V1 ||
724 !reiserfs_xattrs(inode->i_sb)) {
725 return 0;
726 }
727 reiserfs_read_lock_xattrs(inode->i_sb);
728 dir = open_xa_dir(inode, FL_READONLY);
729 reiserfs_read_unlock_xattrs(inode->i_sb);
730 if (IS_ERR(dir)) {
731 err = PTR_ERR(dir);
732 goto out;
733 } else if (!dir->d_inode) {
734 dput(dir);
735 return 0;
736 }
737
738 lock_kernel();
739 err = xattr_readdir(dir->d_inode, reiserfs_delete_xattrs_filler, dir);
740 if (err) {
741 unlock_kernel();
742 goto out_dir;
743 }
744
745 /* Leftovers besides . and .. -- that's not good. */
746 if (dir->d_inode->i_nlink <= 2) {
747 root = get_xa_root(inode->i_sb, XATTR_REPLACE);
748 reiserfs_write_lock_xattrs(inode->i_sb);
749 err = vfs_rmdir(root->d_inode, dir);
750 reiserfs_write_unlock_xattrs(inode->i_sb);
751 dput(root);
752 } else {
753 reiserfs_warning(inode->i_sb,
754 "Couldn't remove all entries in directory");
755 }
756 unlock_kernel();
757
758 out_dir:
759 dput(dir);
760
761 out:
762 if (!err)
763 REISERFS_I(inode)->i_flags =
764 REISERFS_I(inode)->i_flags & ~i_has_xattr_dir;
765 return err;
766}
767
768struct reiserfs_chown_buf {
769 struct inode *inode;
770 struct dentry *xadir;
771 struct iattr *attrs;
772}; 702};
773 703
774/* XXX: If there is a better way to do this, I'd love to hear about it */ 704/*
775static int 705 * In order to implement different sets of xattr operations for each xattr
776reiserfs_chown_xattrs_filler(void *buf, const char *name, int namelen, 706 * prefix with the generic xattr API, a filesystem should create a
777 loff_t offset, u64 ino, unsigned int d_type) 707 * null-terminated array of struct xattr_handler (one for each prefix) and
778{ 708 * hang a pointer to it off of the s_xattr field of the superblock.
779 struct reiserfs_chown_buf *chown_buf = (struct reiserfs_chown_buf *)buf; 709 *
780 struct dentry *xafile, *xadir = chown_buf->xadir; 710 * The generic_fooxattr() functions will use this list to dispatch xattr
781 struct iattr *attrs = chown_buf->attrs; 711 * operations to the correct xattr_handler.
782 int err = 0; 712 */
783 713#define for_each_xattr_handler(handlers, handler) \
784 xafile = lookup_one_len(name, xadir, namelen); 714 for ((handler) = *(handlers)++; \
785 if (IS_ERR(xafile)) 715 (handler) != NULL; \
786 return PTR_ERR(xafile); 716 (handler) = *(handlers)++)
787 else if (!xafile->d_inode) {
788 dput(xafile);
789 return -ENODATA;
790 }
791
792 if (!S_ISDIR(xafile->d_inode->i_mode))
793 err = notify_change(xafile, attrs);
794 dput(xafile);
795
796 return err;
797}
798 717
799int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs) 718/* This is the implementation for the xattr plugin infrastructure */
719static inline struct xattr_handler *
720find_xattr_handler_prefix(struct xattr_handler **handlers,
721 const char *name)
800{ 722{
801 struct dentry *dir; 723 struct xattr_handler *xah;
802 int err = 0;
803 struct reiserfs_chown_buf buf;
804 unsigned int ia_valid = attrs->ia_valid;
805 724
806 /* Skip out, an xattr has no xattrs associated with it */ 725 if (!handlers)
807 if (is_reiserfs_priv_object(inode) || 726 return NULL;
808 get_inode_sd_version(inode) == STAT_DATA_V1 ||
809 !reiserfs_xattrs(inode->i_sb)) {
810 return 0;
811 }
812 reiserfs_read_lock_xattrs(inode->i_sb);
813 dir = open_xa_dir(inode, FL_READONLY);
814 reiserfs_read_unlock_xattrs(inode->i_sb);
815 if (IS_ERR(dir)) {
816 if (PTR_ERR(dir) != -ENODATA)
817 err = PTR_ERR(dir);
818 goto out;
819 } else if (!dir->d_inode) {
820 dput(dir);
821 goto out;
822 }
823 727
824 lock_kernel(); 728 for_each_xattr_handler(handlers, xah) {
825 729 if (strncmp(xah->prefix, name, strlen(xah->prefix)) == 0)
826 attrs->ia_valid &= (ATTR_UID | ATTR_GID | ATTR_CTIME); 730 break;
827 buf.xadir = dir;
828 buf.attrs = attrs;
829 buf.inode = inode;
830
831 err = xattr_readdir(dir->d_inode, reiserfs_chown_xattrs_filler, &buf);
832 if (err) {
833 unlock_kernel();
834 goto out_dir;
835 } 731 }
836 732
837 err = notify_change(dir, attrs); 733 return xah;
838 unlock_kernel();
839
840 out_dir:
841 dput(dir);
842
843 out:
844 attrs->ia_valid = ia_valid;
845 return err;
846} 734}
847 735
848/* Actual operations that are exported to VFS-land */
849 736
850/* 737/*
851 * Inode operation getxattr() 738 * Inode operation getxattr()
852 * Preliminary locking: we down dentry->d_inode->i_mutex
853 */ 739 */
854ssize_t 740ssize_t
855reiserfs_getxattr(struct dentry * dentry, const char *name, void *buffer, 741reiserfs_getxattr(struct dentry * dentry, const char *name, void *buffer,
856 size_t size) 742 size_t size)
857{ 743{
858 struct reiserfs_xattr_handler *xah = find_xattr_handler_prefix(name); 744 struct inode *inode = dentry->d_inode;
859 int err; 745 struct xattr_handler *handler;
860 746
861 if (!xah || !reiserfs_xattrs(dentry->d_sb) || 747 handler = find_xattr_handler_prefix(inode->i_sb->s_xattr, name);
862 get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1) 748
749 if (!handler || get_inode_sd_version(inode) == STAT_DATA_V1)
863 return -EOPNOTSUPP; 750 return -EOPNOTSUPP;
864 751
865 reiserfs_read_lock_xattr_i(dentry->d_inode); 752 return handler->get(inode, name, buffer, size);
866 reiserfs_read_lock_xattrs(dentry->d_sb);
867 err = xah->get(dentry->d_inode, name, buffer, size);
868 reiserfs_read_unlock_xattrs(dentry->d_sb);
869 reiserfs_read_unlock_xattr_i(dentry->d_inode);
870 return err;
871} 753}
872 754
873/* 755/*
@@ -879,27 +761,15 @@ int
879reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value, 761reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value,
880 size_t size, int flags) 762 size_t size, int flags)
881{ 763{
882 struct reiserfs_xattr_handler *xah = find_xattr_handler_prefix(name); 764 struct inode *inode = dentry->d_inode;
883 int err; 765 struct xattr_handler *handler;
884 int lock;
885 766
886 if (!xah || !reiserfs_xattrs(dentry->d_sb) || 767 handler = find_xattr_handler_prefix(inode->i_sb->s_xattr, name);
887 get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1) 768
769 if (!handler || get_inode_sd_version(inode) == STAT_DATA_V1)
888 return -EOPNOTSUPP; 770 return -EOPNOTSUPP;
889 771
890 reiserfs_write_lock_xattr_i(dentry->d_inode); 772 return handler->set(inode, name, value, size, flags);
891 lock = !has_xattr_dir(dentry->d_inode);
892 if (lock)
893 reiserfs_write_lock_xattrs(dentry->d_sb);
894 else
895 reiserfs_read_lock_xattrs(dentry->d_sb);
896 err = xah->set(dentry->d_inode, name, value, size, flags);
897 if (lock)
898 reiserfs_write_unlock_xattrs(dentry->d_sb);
899 else
900 reiserfs_read_unlock_xattrs(dentry->d_sb);
901 reiserfs_write_unlock_xattr_i(dentry->d_inode);
902 return err;
903} 773}
904 774
905/* 775/*
@@ -909,86 +779,66 @@ reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value,
909 */ 779 */
910int reiserfs_removexattr(struct dentry *dentry, const char *name) 780int reiserfs_removexattr(struct dentry *dentry, const char *name)
911{ 781{
912 int err; 782 struct inode *inode = dentry->d_inode;
913 struct reiserfs_xattr_handler *xah = find_xattr_handler_prefix(name); 783 struct xattr_handler *handler;
784 handler = find_xattr_handler_prefix(inode->i_sb->s_xattr, name);
914 785
915 if (!xah || !reiserfs_xattrs(dentry->d_sb) || 786 if (!handler || get_inode_sd_version(inode) == STAT_DATA_V1)
916 get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
917 return -EOPNOTSUPP; 787 return -EOPNOTSUPP;
918 788
919 reiserfs_write_lock_xattr_i(dentry->d_inode); 789 return handler->set(inode, name, NULL, 0, XATTR_REPLACE);
920 reiserfs_read_lock_xattrs(dentry->d_sb);
921
922 /* Deletion pre-operation */
923 if (xah->del) {
924 err = xah->del(dentry->d_inode, name);
925 if (err)
926 goto out;
927 }
928
929 err = reiserfs_xattr_del(dentry->d_inode, name);
930
931 dentry->d_inode->i_ctime = CURRENT_TIME_SEC;
932 mark_inode_dirty(dentry->d_inode);
933
934 out:
935 reiserfs_read_unlock_xattrs(dentry->d_sb);
936 reiserfs_write_unlock_xattr_i(dentry->d_inode);
937 return err;
938} 790}
939 791
940/* This is what filldir will use: 792struct listxattr_buf {
941 * r_pos will always contain the amount of space required for the entire 793 size_t size;
942 * list. If r_pos becomes larger than r_size, we need more space and we 794 size_t pos;
943 * return an error indicating this. If r_pos is less than r_size, then we've 795 char *buf;
944 * filled the buffer successfully and we return success */ 796 struct inode *inode;
945struct reiserfs_listxattr_buf {
946 int r_pos;
947 int r_size;
948 char *r_buf;
949 struct inode *r_inode;
950}; 797};
951 798
952static int 799static int listxattr_filler(void *buf, const char *name, int namelen,
953reiserfs_listxattr_filler(void *buf, const char *name, int namelen, 800 loff_t offset, u64 ino, unsigned int d_type)
954 loff_t offset, u64 ino, unsigned int d_type)
955{ 801{
956 struct reiserfs_listxattr_buf *b = (struct reiserfs_listxattr_buf *)buf; 802 struct listxattr_buf *b = (struct listxattr_buf *)buf;
957 int len = 0; 803 size_t size;
958 if (name[0] != '.' 804 if (name[0] != '.' ||
959 || (namelen != 1 && (name[1] != '.' || namelen != 2))) { 805 (namelen != 1 && (name[1] != '.' || namelen != 2))) {
960 struct reiserfs_xattr_handler *xah = 806 struct xattr_handler *handler;
961 find_xattr_handler_prefix(name); 807 handler = find_xattr_handler_prefix(b->inode->i_sb->s_xattr,
962 if (!xah) 808 name);
963 return 0; /* Unsupported xattr name, skip it */ 809 if (!handler) /* Unsupported xattr name */
964 810 return 0;
965 /* We call ->list() twice because the operation isn't required to just 811 if (b->buf) {
966 * return the name back - we want to make sure we have enough space */ 812 size = handler->list(b->inode, b->buf + b->pos,
967 len += xah->list(b->r_inode, name, namelen, NULL); 813 b->size, name, namelen);
968 814 if (size > b->size)
969 if (len) { 815 return -ERANGE;
970 if (b->r_pos + len + 1 <= b->r_size) { 816 } else {
971 char *p = b->r_buf + b->r_pos; 817 size = handler->list(b->inode, NULL, 0, name, namelen);
972 p += xah->list(b->r_inode, name, namelen, p);
973 *p++ = '\0';
974 }
975 b->r_pos += len + 1;
976 } 818 }
977 }
978 819
820 b->pos += size;
821 }
979 return 0; 822 return 0;
980} 823}
981 824
982/* 825/*
983 * Inode operation listxattr() 826 * Inode operation listxattr()
984 * 827 *
985 * Preliminary locking: we down dentry->d_inode->i_mutex 828 * We totally ignore the generic listxattr here because it would be stupid
829 * not to. Since the xattrs are organized in a directory, we can just
830 * readdir to find them.
986 */ 831 */
987ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size) 832ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size)
988{ 833{
989 struct dentry *dir; 834 struct dentry *dir;
990 int err = 0; 835 int err = 0;
991 struct reiserfs_listxattr_buf buf; 836 loff_t pos = 0;
837 struct listxattr_buf buf = {
838 .inode = dentry->d_inode,
839 .buf = buffer,
840 .size = buffer ? size : 0,
841 };
992 842
993 if (!dentry->d_inode) 843 if (!dentry->d_inode)
994 return -EINVAL; 844 return -EINVAL;
@@ -997,130 +847,104 @@ ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size)
997 get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1) 847 get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
998 return -EOPNOTSUPP; 848 return -EOPNOTSUPP;
999 849
1000 reiserfs_read_lock_xattr_i(dentry->d_inode); 850 dir = open_xa_dir(dentry->d_inode, XATTR_REPLACE);
1001 reiserfs_read_lock_xattrs(dentry->d_sb);
1002 dir = open_xa_dir(dentry->d_inode, FL_READONLY);
1003 reiserfs_read_unlock_xattrs(dentry->d_sb);
1004 if (IS_ERR(dir)) { 851 if (IS_ERR(dir)) {
1005 err = PTR_ERR(dir); 852 err = PTR_ERR(dir);
1006 if (err == -ENODATA) 853 if (err == -ENODATA)
1007 err = 0; /* Not an error if there aren't any xattrs */ 854 err = 0; /* Not an error if there aren't any xattrs */
1008 goto out; 855 goto out;
1009 } 856 }
1010 857
1011 buf.r_buf = buffer; 858 mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_XATTR);
1012 buf.r_size = buffer ? size : 0; 859 err = reiserfs_readdir_dentry(dir, &buf, listxattr_filler, &pos);
1013 buf.r_pos = 0; 860 mutex_unlock(&dir->d_inode->i_mutex);
1014 buf.r_inode = dentry->d_inode;
1015 861
1016 REISERFS_I(dentry->d_inode)->i_flags |= i_has_xattr_dir; 862 if (!err)
1017 863 err = buf.pos;
1018 err = xattr_readdir(dir->d_inode, reiserfs_listxattr_filler, &buf);
1019 if (err)
1020 goto out_dir;
1021
1022 if (buf.r_pos > buf.r_size && buffer != NULL)
1023 err = -ERANGE;
1024 else
1025 err = buf.r_pos;
1026 864
1027 out_dir:
1028 dput(dir); 865 dput(dir);
1029 866out:
1030 out:
1031 reiserfs_read_unlock_xattr_i(dentry->d_inode);
1032 return err; 867 return err;
1033} 868}
1034 869
1035/* This is the implementation for the xattr plugin infrastructure */ 870static int reiserfs_check_acl(struct inode *inode, int mask)
1036static LIST_HEAD(xattr_handlers);
1037static DEFINE_RWLOCK(handler_lock);
1038
1039static struct reiserfs_xattr_handler *find_xattr_handler_prefix(const char
1040 *prefix)
1041{ 871{
1042 struct reiserfs_xattr_handler *xah = NULL; 872 struct posix_acl *acl;
1043 struct list_head *p; 873 int error = -EAGAIN; /* do regular unix permission checks by default */
1044 874
1045 read_lock(&handler_lock); 875 acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS);
1046 list_for_each(p, &xattr_handlers) { 876
1047 xah = list_entry(p, struct reiserfs_xattr_handler, handlers); 877 if (acl) {
1048 if (strncmp(xah->prefix, prefix, strlen(xah->prefix)) == 0) 878 if (!IS_ERR(acl)) {
1049 break; 879 error = posix_acl_permission(inode, acl, mask);
1050 xah = NULL; 880 posix_acl_release(acl);
881 } else if (PTR_ERR(acl) != -ENODATA)
882 error = PTR_ERR(acl);
1051 } 883 }
1052 884
1053 read_unlock(&handler_lock); 885 return error;
1054 return xah;
1055} 886}
1056 887
1057static void __unregister_handlers(void) 888int reiserfs_permission(struct inode *inode, int mask)
1058{ 889{
1059 struct reiserfs_xattr_handler *xah; 890 /*
1060 struct list_head *p, *tmp; 891 * We don't do permission checks on the internal objects.
1061 892 * Permissions are determined by the "owning" object.
1062 list_for_each_safe(p, tmp, &xattr_handlers) { 893 */
1063 xah = list_entry(p, struct reiserfs_xattr_handler, handlers); 894 if (IS_PRIVATE(inode))
1064 if (xah->exit) 895 return 0;
1065 xah->exit(); 896 /*
1066 897 * Stat data v1 doesn't support ACLs.
1067 list_del_init(p); 898 */
1068 } 899 if (get_inode_sd_version(inode) == STAT_DATA_V1)
1069 INIT_LIST_HEAD(&xattr_handlers); 900 return generic_permission(inode, mask, NULL);
901 else
902 return generic_permission(inode, mask, reiserfs_check_acl);
1070} 903}
1071 904
1072int __init reiserfs_xattr_register_handlers(void) 905static int create_privroot(struct dentry *dentry)
1073{ 906{
1074 int err = 0; 907 int err;
1075 struct reiserfs_xattr_handler *xah; 908 struct inode *inode = dentry->d_parent->d_inode;
1076 struct list_head *p; 909 mutex_lock_nested(&inode->i_mutex, I_MUTEX_XATTR);
1077 910 err = xattr_mkdir(inode, dentry, 0700);
1078 write_lock(&handler_lock); 911 mutex_unlock(&inode->i_mutex);
1079 912 if (err) {
1080 /* If we're already initialized, nothing to do */ 913 dput(dentry);
1081 if (!list_empty(&xattr_handlers)) { 914 dentry = NULL;
1082 write_unlock(&handler_lock);
1083 return 0;
1084 }
1085
1086 /* Add the handlers */
1087 list_add_tail(&user_handler.handlers, &xattr_handlers);
1088 list_add_tail(&trusted_handler.handlers, &xattr_handlers);
1089#ifdef CONFIG_REISERFS_FS_SECURITY
1090 list_add_tail(&security_handler.handlers, &xattr_handlers);
1091#endif
1092#ifdef CONFIG_REISERFS_FS_POSIX_ACL
1093 list_add_tail(&posix_acl_access_handler.handlers, &xattr_handlers);
1094 list_add_tail(&posix_acl_default_handler.handlers, &xattr_handlers);
1095#endif
1096
1097 /* Run initializers, if available */
1098 list_for_each(p, &xattr_handlers) {
1099 xah = list_entry(p, struct reiserfs_xattr_handler, handlers);
1100 if (xah->init) {
1101 err = xah->init();
1102 if (err) {
1103 list_del_init(p);
1104 break;
1105 }
1106 }
1107 } 915 }
1108 916
1109 /* Clean up other handlers, if any failed */ 917 if (dentry && dentry->d_inode)
1110 if (err) 918 reiserfs_info(dentry->d_sb, "Created %s - reserved for xattr "
1111 __unregister_handlers(); 919 "storage.\n", PRIVROOT_NAME);
1112 920
1113 write_unlock(&handler_lock);
1114 return err; 921 return err;
1115} 922}
1116 923
1117void reiserfs_xattr_unregister_handlers(void) 924static int xattr_mount_check(struct super_block *s)
1118{ 925{
1119 write_lock(&handler_lock); 926 /* We need generation numbers to ensure that the oid mapping is correct
1120 __unregister_handlers(); 927 * v3.5 filesystems don't have them. */
1121 write_unlock(&handler_lock); 928 if (old_format_only(s)) {
929 if (reiserfs_xattrs_optional(s)) {
930 /* Old format filesystem, but optional xattrs have
931 * been enabled. Error out. */
932 reiserfs_warning(s, "jdm-2005",
933 "xattrs/ACLs not supported "
934 "on pre-v3.6 format filesystems. "
935 "Failing mount.");
936 return -EOPNOTSUPP;
937 }
938 }
939
940 return 0;
1122} 941}
1123 942
943#else
944int __init reiserfs_xattr_register_handlers(void) { return 0; }
945void reiserfs_xattr_unregister_handlers(void) {}
946#endif
947
1124/* This will catch lookups from the fs root to .reiserfs_priv */ 948/* This will catch lookups from the fs root to .reiserfs_priv */
1125static int 949static int
1126xattr_lookup_poison(struct dentry *dentry, struct qstr *q1, struct qstr *name) 950xattr_lookup_poison(struct dentry *dentry, struct qstr *q1, struct qstr *name)
@@ -1147,48 +971,23 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags)
1147{ 971{
1148 int err = 0; 972 int err = 0;
1149 973
1150 /* We need generation numbers to ensure that the oid mapping is correct 974#ifdef CONFIG_REISERFS_FS_XATTR
1151 * v3.5 filesystems don't have them. */ 975 err = xattr_mount_check(s);
1152 if (!old_format_only(s)) { 976 if (err)
1153 set_bit(REISERFS_XATTRS, &(REISERFS_SB(s)->s_mount_opt));
1154 } else if (reiserfs_xattrs_optional(s)) {
1155 /* Old format filesystem, but optional xattrs have been enabled
1156 * at mount time. Error out. */
1157 reiserfs_warning(s, "xattrs/ACLs not supported on pre v3.6 "
1158 "format filesystem. Failing mount.");
1159 err = -EOPNOTSUPP;
1160 goto error; 977 goto error;
1161 } else { 978#endif
1162 /* Old format filesystem, but no optional xattrs have been enabled. This
1163 * means we silently disable xattrs on the filesystem. */
1164 clear_bit(REISERFS_XATTRS, &(REISERFS_SB(s)->s_mount_opt));
1165 }
1166 979
1167 /* If we don't have the privroot located yet - go find it */ 980 /* If we don't have the privroot located yet - go find it */
1168 if (reiserfs_xattrs(s) && !REISERFS_SB(s)->priv_root) { 981 if (!REISERFS_SB(s)->priv_root) {
1169 struct dentry *dentry; 982 struct dentry *dentry;
1170 dentry = lookup_one_len(PRIVROOT_NAME, s->s_root, 983 dentry = lookup_one_len(PRIVROOT_NAME, s->s_root,
1171 strlen(PRIVROOT_NAME)); 984 strlen(PRIVROOT_NAME));
1172 if (!IS_ERR(dentry)) { 985 if (!IS_ERR(dentry)) {
1173 if (!(mount_flags & MS_RDONLY) && !dentry->d_inode) { 986#ifdef CONFIG_REISERFS_FS_XATTR
1174 struct inode *inode = dentry->d_parent->d_inode; 987 if (!(mount_flags & MS_RDONLY) && !dentry->d_inode)
1175 mutex_lock_nested(&inode->i_mutex, 988 err = create_privroot(dentry);
1176 I_MUTEX_XATTR); 989#endif
1177 err = inode->i_op->mkdir(inode, dentry, 0700); 990 if (!dentry->d_inode) {
1178 mutex_unlock(&inode->i_mutex);
1179 if (err) {
1180 dput(dentry);
1181 dentry = NULL;
1182 }
1183
1184 if (dentry && dentry->d_inode)
1185 reiserfs_warning(s,
1186 "Created %s on %s - reserved for "
1187 "xattr storage.",
1188 PRIVROOT_NAME,
1189 reiserfs_bdevname
1190 (inode->i_sb));
1191 } else if (!dentry->d_inode) {
1192 dput(dentry); 991 dput(dentry);
1193 dentry = NULL; 992 dentry = NULL;
1194 } 993 }
@@ -1197,73 +996,41 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags)
1197 996
1198 if (!err && dentry) { 997 if (!err && dentry) {
1199 s->s_root->d_op = &xattr_lookup_poison_ops; 998 s->s_root->d_op = &xattr_lookup_poison_ops;
1200 reiserfs_mark_inode_private(dentry->d_inode); 999 dentry->d_inode->i_flags |= S_PRIVATE;
1201 REISERFS_SB(s)->priv_root = dentry; 1000 REISERFS_SB(s)->priv_root = dentry;
1202 } else if (!(mount_flags & MS_RDONLY)) { /* xattrs are unavailable */ 1001#ifdef CONFIG_REISERFS_FS_XATTR
1203 /* If we're read-only it just means that the dir hasn't been 1002 /* xattrs are unavailable */
1204 * created. Not an error -- just no xattrs on the fs. We'll 1003 } else if (!(mount_flags & MS_RDONLY)) {
1205 * check again if we go read-write */ 1004 /* If we're read-only it just means that the dir
1206 reiserfs_warning(s, "xattrs/ACLs enabled and couldn't " 1005 * hasn't been created. Not an error -- just no
1207 "find/create .reiserfs_priv. Failing mount."); 1006 * xattrs on the fs. We'll check again if we
1007 * go read-write */
1008 reiserfs_warning(s, "jdm-20006",
1009 "xattrs/ACLs enabled and couldn't "
1010 "find/create .reiserfs_priv. "
1011 "Failing mount.");
1208 err = -EOPNOTSUPP; 1012 err = -EOPNOTSUPP;
1013#endif
1209 } 1014 }
1210 } 1015 }
1211 1016
1212 error: 1017#ifdef CONFIG_REISERFS_FS_XATTR
1213 /* This is only nonzero if there was an error initializing the xattr 1018 if (!err)
1214 * directory or if there is a condition where we don't support them. */ 1019 s->s_xattr = reiserfs_xattr_handlers;
1020
1021error:
1215 if (err) { 1022 if (err) {
1216 clear_bit(REISERFS_XATTRS, &(REISERFS_SB(s)->s_mount_opt));
1217 clear_bit(REISERFS_XATTRS_USER, &(REISERFS_SB(s)->s_mount_opt)); 1023 clear_bit(REISERFS_XATTRS_USER, &(REISERFS_SB(s)->s_mount_opt));
1218 clear_bit(REISERFS_POSIXACL, &(REISERFS_SB(s)->s_mount_opt)); 1024 clear_bit(REISERFS_POSIXACL, &(REISERFS_SB(s)->s_mount_opt));
1219 } 1025 }
1026#endif
1220 1027
1221 /* The super_block MS_POSIXACL must mirror the (no)acl mount option. */ 1028 /* The super_block MS_POSIXACL must mirror the (no)acl mount option. */
1222 s->s_flags = s->s_flags & ~MS_POSIXACL; 1029 s->s_flags = s->s_flags & ~MS_POSIXACL;
1030#ifdef CONFIG_REISERFS_FS_POSIX_ACL
1223 if (reiserfs_posixacl(s)) 1031 if (reiserfs_posixacl(s))
1224 s->s_flags |= MS_POSIXACL; 1032 s->s_flags |= MS_POSIXACL;
1033#endif
1225 1034
1226 return err; 1035 return err;
1227} 1036}
1228
1229static int reiserfs_check_acl(struct inode *inode, int mask)
1230{
1231 struct posix_acl *acl;
1232 int error = -EAGAIN; /* do regular unix permission checks by default */
1233
1234 reiserfs_read_lock_xattr_i(inode);
1235 reiserfs_read_lock_xattrs(inode->i_sb);
1236
1237 acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS);
1238
1239 reiserfs_read_unlock_xattrs(inode->i_sb);
1240 reiserfs_read_unlock_xattr_i(inode);
1241
1242 if (acl) {
1243 if (!IS_ERR(acl)) {
1244 error = posix_acl_permission(inode, acl, mask);
1245 posix_acl_release(acl);
1246 } else if (PTR_ERR(acl) != -ENODATA)
1247 error = PTR_ERR(acl);
1248 }
1249
1250 return error;
1251}
1252
1253int reiserfs_permission(struct inode *inode, int mask)
1254{
1255 /*
1256 * We don't do permission checks on the internal objects.
1257 * Permissions are determined by the "owning" object.
1258 */
1259 if (is_reiserfs_priv_object(inode))
1260 return 0;
1261
1262 /*
1263 * Stat data v1 doesn't support ACLs.
1264 */
1265 if (get_inode_sd_version(inode) == STAT_DATA_V1)
1266 return generic_permission(inode, mask, NULL);
1267 else
1268 return generic_permission(inode, mask, reiserfs_check_acl);
1269}
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index b7e4fa4539de..d423416d93d1 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -10,15 +10,17 @@
10#include <linux/reiserfs_acl.h> 10#include <linux/reiserfs_acl.h>
11#include <asm/uaccess.h> 11#include <asm/uaccess.h>
12 12
13static int reiserfs_set_acl(struct inode *inode, int type, 13static int reiserfs_set_acl(struct reiserfs_transaction_handle *th,
14 struct inode *inode, int type,
14 struct posix_acl *acl); 15 struct posix_acl *acl);
15 16
16static int 17static int
17xattr_set_acl(struct inode *inode, int type, const void *value, size_t size) 18xattr_set_acl(struct inode *inode, int type, const void *value, size_t size)
18{ 19{
19 struct posix_acl *acl; 20 struct posix_acl *acl;
20 int error; 21 int error, error2;
21 22 struct reiserfs_transaction_handle th;
23 size_t jcreate_blocks;
22 if (!reiserfs_posixacl(inode->i_sb)) 24 if (!reiserfs_posixacl(inode->i_sb))
23 return -EOPNOTSUPP; 25 return -EOPNOTSUPP;
24 if (!is_owner_or_cap(inode)) 26 if (!is_owner_or_cap(inode))
@@ -36,7 +38,21 @@ xattr_set_acl(struct inode *inode, int type, const void *value, size_t size)
36 } else 38 } else
37 acl = NULL; 39 acl = NULL;
38 40
39 error = reiserfs_set_acl(inode, type, acl); 41 /* Pessimism: We can't assume that anything from the xattr root up
42 * has been created. */
43
44 jcreate_blocks = reiserfs_xattr_jcreate_nblocks(inode) +
45 reiserfs_xattr_nblocks(inode, size) * 2;
46
47 reiserfs_write_lock(inode->i_sb);
48 error = journal_begin(&th, inode->i_sb, jcreate_blocks);
49 if (error == 0) {
50 error = reiserfs_set_acl(&th, inode, type, acl);
51 error2 = journal_end(&th, inode->i_sb, jcreate_blocks);
52 if (error2)
53 error = error2;
54 }
55 reiserfs_write_unlock(inode->i_sb);
40 56
41 release_and_out: 57 release_and_out:
42 posix_acl_release(acl); 58 posix_acl_release(acl);
@@ -172,6 +188,29 @@ static void *posix_acl_to_disk(const struct posix_acl *acl, size_t * size)
172 return ERR_PTR(-EINVAL); 188 return ERR_PTR(-EINVAL);
173} 189}
174 190
191static inline void iset_acl(struct inode *inode, struct posix_acl **i_acl,
192 struct posix_acl *acl)
193{
194 spin_lock(&inode->i_lock);
195 if (*i_acl != ERR_PTR(-ENODATA))
196 posix_acl_release(*i_acl);
197 *i_acl = posix_acl_dup(acl);
198 spin_unlock(&inode->i_lock);
199}
200
201static inline struct posix_acl *iget_acl(struct inode *inode,
202 struct posix_acl **i_acl)
203{
204 struct posix_acl *acl = ERR_PTR(-ENODATA);
205
206 spin_lock(&inode->i_lock);
207 if (*i_acl != ERR_PTR(-ENODATA))
208 acl = posix_acl_dup(*i_acl);
209 spin_unlock(&inode->i_lock);
210
211 return acl;
212}
213
175/* 214/*
176 * Inode operation get_posix_acl(). 215 * Inode operation get_posix_acl().
177 * 216 *
@@ -199,11 +238,11 @@ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
199 return ERR_PTR(-EINVAL); 238 return ERR_PTR(-EINVAL);
200 } 239 }
201 240
202 if (IS_ERR(*p_acl)) { 241 acl = iget_acl(inode, p_acl);
203 if (PTR_ERR(*p_acl) == -ENODATA) 242 if (acl && !IS_ERR(acl))
204 return NULL; 243 return acl;
205 } else if (*p_acl != NULL) 244 else if (PTR_ERR(acl) == -ENODATA)
206 return posix_acl_dup(*p_acl); 245 return NULL;
207 246
208 size = reiserfs_xattr_get(inode, name, NULL, 0); 247 size = reiserfs_xattr_get(inode, name, NULL, 0);
209 if (size < 0) { 248 if (size < 0) {
@@ -229,7 +268,7 @@ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
229 } else { 268 } else {
230 acl = posix_acl_from_disk(value, retval); 269 acl = posix_acl_from_disk(value, retval);
231 if (!IS_ERR(acl)) 270 if (!IS_ERR(acl))
232 *p_acl = posix_acl_dup(acl); 271 iset_acl(inode, p_acl, acl);
233 } 272 }
234 273
235 kfree(value); 274 kfree(value);
@@ -243,12 +282,13 @@ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
243 * BKL held [before 2.5.x] 282 * BKL held [before 2.5.x]
244 */ 283 */
245static int 284static int
246reiserfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) 285reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode,
286 int type, struct posix_acl *acl)
247{ 287{
248 char *name; 288 char *name;
249 void *value = NULL; 289 void *value = NULL;
250 struct posix_acl **p_acl; 290 struct posix_acl **p_acl;
251 size_t size; 291 size_t size = 0;
252 int error; 292 int error;
253 struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode); 293 struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode);
254 294
@@ -285,31 +325,28 @@ reiserfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
285 value = posix_acl_to_disk(acl, &size); 325 value = posix_acl_to_disk(acl, &size);
286 if (IS_ERR(value)) 326 if (IS_ERR(value))
287 return (int)PTR_ERR(value); 327 return (int)PTR_ERR(value);
288 error = reiserfs_xattr_set(inode, name, value, size, 0); 328 }
289 } else { 329
290 error = reiserfs_xattr_del(inode, name); 330 error = reiserfs_xattr_set_handle(th, inode, name, value, size, 0);
291 if (error == -ENODATA) { 331
292 /* This may seem odd here, but it means that the ACL was set 332 /*
293 * with a value representable with mode bits. If there was 333 * Ensure that the inode gets dirtied if we're only using
294 * an ACL before, reiserfs_xattr_del already dirtied the inode. 334 * the mode bits and an old ACL didn't exist. We don't need
295 */ 335 * to check if the inode is hashed here since we won't get
336 * called by reiserfs_inherit_default_acl().
337 */
338 if (error == -ENODATA) {
339 error = 0;
340 if (type == ACL_TYPE_ACCESS) {
341 inode->i_ctime = CURRENT_TIME_SEC;
296 mark_inode_dirty(inode); 342 mark_inode_dirty(inode);
297 error = 0;
298 } 343 }
299 } 344 }
300 345
301 kfree(value); 346 kfree(value);
302 347
303 if (!error) { 348 if (!error)
304 /* Release the old one */ 349 iset_acl(inode, p_acl, acl);
305 if (!IS_ERR(*p_acl) && *p_acl)
306 posix_acl_release(*p_acl);
307
308 if (acl == NULL)
309 *p_acl = ERR_PTR(-ENODATA);
310 else
311 *p_acl = posix_acl_dup(acl);
312 }
313 350
314 return error; 351 return error;
315} 352}
@@ -317,7 +354,8 @@ reiserfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
317/* dir->i_mutex: locked, 354/* dir->i_mutex: locked,
318 * inode is new and not released into the wild yet */ 355 * inode is new and not released into the wild yet */
319int 356int
320reiserfs_inherit_default_acl(struct inode *dir, struct dentry *dentry, 357reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th,
358 struct inode *dir, struct dentry *dentry,
321 struct inode *inode) 359 struct inode *inode)
322{ 360{
323 struct posix_acl *acl; 361 struct posix_acl *acl;
@@ -335,8 +373,8 @@ reiserfs_inherit_default_acl(struct inode *dir, struct dentry *dentry,
335 /* Don't apply ACLs to objects in the .reiserfs_priv tree.. This 373 /* Don't apply ACLs to objects in the .reiserfs_priv tree.. This
336 * would be useless since permissions are ignored, and a pain because 374 * would be useless since permissions are ignored, and a pain because
337 * it introduces locking cycles */ 375 * it introduces locking cycles */
338 if (is_reiserfs_priv_object(dir)) { 376 if (IS_PRIVATE(dir)) {
339 reiserfs_mark_inode_private(inode); 377 inode->i_flags |= S_PRIVATE;
340 goto apply_umask; 378 goto apply_umask;
341 } 379 }
342 380
@@ -354,7 +392,8 @@ reiserfs_inherit_default_acl(struct inode *dir, struct dentry *dentry,
354 392
355 /* Copy the default ACL to the default ACL of a new directory */ 393 /* Copy the default ACL to the default ACL of a new directory */
356 if (S_ISDIR(inode->i_mode)) { 394 if (S_ISDIR(inode->i_mode)) {
357 err = reiserfs_set_acl(inode, ACL_TYPE_DEFAULT, acl); 395 err = reiserfs_set_acl(th, inode, ACL_TYPE_DEFAULT,
396 acl);
358 if (err) 397 if (err)
359 goto cleanup; 398 goto cleanup;
360 } 399 }
@@ -375,9 +414,9 @@ reiserfs_inherit_default_acl(struct inode *dir, struct dentry *dentry,
375 414
376 /* If we need an ACL.. */ 415 /* If we need an ACL.. */
377 if (need_acl > 0) { 416 if (need_acl > 0) {
378 err = 417 err = reiserfs_set_acl(th, inode,
379 reiserfs_set_acl(inode, ACL_TYPE_ACCESS, 418 ACL_TYPE_ACCESS,
380 acl_copy); 419 acl_copy);
381 if (err) 420 if (err)
382 goto cleanup_copy; 421 goto cleanup_copy;
383 } 422 }
@@ -395,25 +434,45 @@ reiserfs_inherit_default_acl(struct inode *dir, struct dentry *dentry,
395 return err; 434 return err;
396} 435}
397 436
398/* Looks up and caches the result of the default ACL. 437/* This is used to cache the default acl before a new object is created.
399 * We do this so that we don't need to carry the xattr_sem into 438 * The biggest reason for this is to get an idea of how many blocks will
400 * reiserfs_new_inode if we don't need to */ 439 * actually be required for the create operation if we must inherit an ACL.
440 * An ACL write can add up to 3 object creations and an additional file write
441 * so we'd prefer not to reserve that many blocks in the journal if we can.
442 * It also has the advantage of not loading the ACL with a transaction open,
443 * this may seem silly, but if the owner of the directory is doing the
444 * creation, the ACL may not be loaded since the permissions wouldn't require
445 * it.
446 * We return the number of blocks required for the transaction.
447 */
401int reiserfs_cache_default_acl(struct inode *inode) 448int reiserfs_cache_default_acl(struct inode *inode)
402{ 449{
403 int ret = 0; 450 struct posix_acl *acl;
404 if (reiserfs_posixacl(inode->i_sb) && !is_reiserfs_priv_object(inode)) { 451 int nblocks = 0;
405 struct posix_acl *acl; 452
406 reiserfs_read_lock_xattr_i(inode); 453 if (IS_PRIVATE(inode))
407 reiserfs_read_lock_xattrs(inode->i_sb); 454 return 0;
408 acl = reiserfs_get_acl(inode, ACL_TYPE_DEFAULT); 455
409 reiserfs_read_unlock_xattrs(inode->i_sb); 456 acl = reiserfs_get_acl(inode, ACL_TYPE_DEFAULT);
410 reiserfs_read_unlock_xattr_i(inode); 457
411 ret = (acl && !IS_ERR(acl)); 458 if (acl && !IS_ERR(acl)) {
412 if (ret) 459 int size = reiserfs_acl_size(acl->a_count);
413 posix_acl_release(acl); 460
461 /* Other xattrs can be created during inode creation. We don't
462 * want to claim too many blocks, so we check to see if we
463 * we need to create the tree to the xattrs, and then we
464 * just want two files. */
465 nblocks = reiserfs_xattr_jcreate_nblocks(inode);
466 nblocks += JOURNAL_BLOCKS_PER_OBJECT(inode->i_sb);
467
468 REISERFS_I(inode)->i_flags |= i_has_xattr_dir;
469
470 /* We need to account for writes + bitmaps for two files */
471 nblocks += reiserfs_xattr_nblocks(inode, size) * 4;
472 posix_acl_release(acl);
414 } 473 }
415 474
416 return ret; 475 return nblocks;
417} 476}
418 477
419int reiserfs_acl_chmod(struct inode *inode) 478int reiserfs_acl_chmod(struct inode *inode)
@@ -429,9 +488,7 @@ int reiserfs_acl_chmod(struct inode *inode)
429 return 0; 488 return 0;
430 } 489 }
431 490
432 reiserfs_read_lock_xattrs(inode->i_sb);
433 acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS); 491 acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS);
434 reiserfs_read_unlock_xattrs(inode->i_sb);
435 if (!acl) 492 if (!acl)
436 return 0; 493 return 0;
437 if (IS_ERR(acl)) 494 if (IS_ERR(acl))
@@ -442,18 +499,20 @@ int reiserfs_acl_chmod(struct inode *inode)
442 return -ENOMEM; 499 return -ENOMEM;
443 error = posix_acl_chmod_masq(clone, inode->i_mode); 500 error = posix_acl_chmod_masq(clone, inode->i_mode);
444 if (!error) { 501 if (!error) {
445 int lock = !has_xattr_dir(inode); 502 struct reiserfs_transaction_handle th;
446 reiserfs_write_lock_xattr_i(inode); 503 size_t size = reiserfs_xattr_nblocks(inode,
447 if (lock) 504 reiserfs_acl_size(clone->a_count));
448 reiserfs_write_lock_xattrs(inode->i_sb); 505 reiserfs_write_lock(inode->i_sb);
449 else 506 error = journal_begin(&th, inode->i_sb, size * 2);
450 reiserfs_read_lock_xattrs(inode->i_sb); 507 if (!error) {
451 error = reiserfs_set_acl(inode, ACL_TYPE_ACCESS, clone); 508 int error2;
452 if (lock) 509 error = reiserfs_set_acl(&th, inode, ACL_TYPE_ACCESS,
453 reiserfs_write_unlock_xattrs(inode->i_sb); 510 clone);
454 else 511 error2 = journal_end(&th, inode->i_sb, size * 2);
455 reiserfs_read_unlock_xattrs(inode->i_sb); 512 if (error2)
456 reiserfs_write_unlock_xattr_i(inode); 513 error = error2;
514 }
515 reiserfs_write_unlock(inode->i_sb);
457 } 516 }
458 posix_acl_release(clone); 517 posix_acl_release(clone);
459 return error; 518 return error;
@@ -477,38 +536,22 @@ posix_acl_access_set(struct inode *inode, const char *name,
477 return xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size); 536 return xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
478} 537}
479 538
480static int posix_acl_access_del(struct inode *inode, const char *name) 539static size_t posix_acl_access_list(struct inode *inode, char *list,
540 size_t list_size, const char *name,
541 size_t name_len)
481{ 542{
482 struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode); 543 const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
483 struct posix_acl **acl = &reiserfs_i->i_acl_access;
484 if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS) - 1)
485 return -EINVAL;
486 if (!IS_ERR(*acl) && *acl) {
487 posix_acl_release(*acl);
488 *acl = ERR_PTR(-ENODATA);
489 }
490
491 return 0;
492}
493
494static int
495posix_acl_access_list(struct inode *inode, const char *name, int namelen,
496 char *out)
497{
498 int len = namelen;
499 if (!reiserfs_posixacl(inode->i_sb)) 544 if (!reiserfs_posixacl(inode->i_sb))
500 return 0; 545 return 0;
501 if (out) 546 if (list && size <= list_size)
502 memcpy(out, name, len); 547 memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
503 548 return size;
504 return len;
505} 549}
506 550
507struct reiserfs_xattr_handler posix_acl_access_handler = { 551struct xattr_handler reiserfs_posix_acl_access_handler = {
508 .prefix = POSIX_ACL_XATTR_ACCESS, 552 .prefix = POSIX_ACL_XATTR_ACCESS,
509 .get = posix_acl_access_get, 553 .get = posix_acl_access_get,
510 .set = posix_acl_access_set, 554 .set = posix_acl_access_set,
511 .del = posix_acl_access_del,
512 .list = posix_acl_access_list, 555 .list = posix_acl_access_list,
513}; 556};
514 557
@@ -530,37 +573,21 @@ posix_acl_default_set(struct inode *inode, const char *name,
530 return xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size); 573 return xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
531} 574}
532 575
533static int posix_acl_default_del(struct inode *inode, const char *name) 576static size_t posix_acl_default_list(struct inode *inode, char *list,
577 size_t list_size, const char *name,
578 size_t name_len)
534{ 579{
535 struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode); 580 const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
536 struct posix_acl **acl = &reiserfs_i->i_acl_default;
537 if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT) - 1)
538 return -EINVAL;
539 if (!IS_ERR(*acl) && *acl) {
540 posix_acl_release(*acl);
541 *acl = ERR_PTR(-ENODATA);
542 }
543
544 return 0;
545}
546
547static int
548posix_acl_default_list(struct inode *inode, const char *name, int namelen,
549 char *out)
550{
551 int len = namelen;
552 if (!reiserfs_posixacl(inode->i_sb)) 581 if (!reiserfs_posixacl(inode->i_sb))
553 return 0; 582 return 0;
554 if (out) 583 if (list && size <= list_size)
555 memcpy(out, name, len); 584 memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
556 585 return size;
557 return len;
558} 586}
559 587
560struct reiserfs_xattr_handler posix_acl_default_handler = { 588struct xattr_handler reiserfs_posix_acl_default_handler = {
561 .prefix = POSIX_ACL_XATTR_DEFAULT, 589 .prefix = POSIX_ACL_XATTR_DEFAULT,
562 .get = posix_acl_default_get, 590 .get = posix_acl_default_get,
563 .set = posix_acl_default_set, 591 .set = posix_acl_default_set,
564 .del = posix_acl_default_del,
565 .list = posix_acl_default_list, 592 .list = posix_acl_default_list,
566}; 593};
diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c
index 056008db1377..4d3c20e787c3 100644
--- a/fs/reiserfs/xattr_security.c
+++ b/fs/reiserfs/xattr_security.c
@@ -4,6 +4,7 @@
4#include <linux/pagemap.h> 4#include <linux/pagemap.h>
5#include <linux/xattr.h> 5#include <linux/xattr.h>
6#include <linux/reiserfs_xattr.h> 6#include <linux/reiserfs_xattr.h>
7#include <linux/security.h>
7#include <asm/uaccess.h> 8#include <asm/uaccess.h>
8 9
9static int 10static int
@@ -12,7 +13,7 @@ security_get(struct inode *inode, const char *name, void *buffer, size_t size)
12 if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX)) 13 if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX))
13 return -EINVAL; 14 return -EINVAL;
14 15
15 if (is_reiserfs_priv_object(inode)) 16 if (IS_PRIVATE(inode))
16 return -EPERM; 17 return -EPERM;
17 18
18 return reiserfs_xattr_get(inode, name, buffer, size); 19 return reiserfs_xattr_get(inode, name, buffer, size);
@@ -25,41 +26,84 @@ security_set(struct inode *inode, const char *name, const void *buffer,
25 if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX)) 26 if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX))
26 return -EINVAL; 27 return -EINVAL;
27 28
28 if (is_reiserfs_priv_object(inode)) 29 if (IS_PRIVATE(inode))
29 return -EPERM; 30 return -EPERM;
30 31
31 return reiserfs_xattr_set(inode, name, buffer, size, flags); 32 return reiserfs_xattr_set(inode, name, buffer, size, flags);
32} 33}
33 34
34static int security_del(struct inode *inode, const char *name) 35static size_t security_list(struct inode *inode, char *list, size_t list_len,
36 const char *name, size_t namelen)
35{ 37{
36 if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX)) 38 const size_t len = namelen + 1;
37 return -EINVAL;
38 39
39 if (is_reiserfs_priv_object(inode)) 40 if (IS_PRIVATE(inode))
40 return -EPERM; 41 return 0;
42
43 if (list && len <= list_len) {
44 memcpy(list, name, namelen);
45 list[namelen] = '\0';
46 }
41 47
42 return 0; 48 return len;
43} 49}
44 50
45static int 51/* Initializes the security context for a new inode and returns the number
46security_list(struct inode *inode, const char *name, int namelen, char *out) 52 * of blocks needed for the transaction. If successful, reiserfs_security
53 * must be released using reiserfs_security_free when the caller is done. */
54int reiserfs_security_init(struct inode *dir, struct inode *inode,
55 struct reiserfs_security_handle *sec)
47{ 56{
48 int len = namelen; 57 int blocks = 0;
58 int error = security_inode_init_security(inode, dir, &sec->name,
59 &sec->value, &sec->length);
60 if (error) {
61 if (error == -EOPNOTSUPP)
62 error = 0;
49 63
50 if (is_reiserfs_priv_object(inode)) 64 sec->name = NULL;
51 return 0; 65 sec->value = NULL;
66 sec->length = 0;
67 return error;
68 }
52 69
53 if (out) 70 if (sec->length) {
54 memcpy(out, name, len); 71 blocks = reiserfs_xattr_jcreate_nblocks(inode) +
72 reiserfs_xattr_nblocks(inode, sec->length);
73 /* We don't want to count the directories twice if we have
74 * a default ACL. */
75 REISERFS_I(inode)->i_flags |= i_has_xattr_dir;
76 }
77 return blocks;
78}
55 79
56 return len; 80int reiserfs_security_write(struct reiserfs_transaction_handle *th,
81 struct inode *inode,
82 struct reiserfs_security_handle *sec)
83{
84 int error;
85 if (strlen(sec->name) < sizeof(XATTR_SECURITY_PREFIX))
86 return -EINVAL;
87
88 error = reiserfs_xattr_set_handle(th, inode, sec->name, sec->value,
89 sec->length, XATTR_CREATE);
90 if (error == -ENODATA || error == -EOPNOTSUPP)
91 error = 0;
92
93 return error;
94}
95
96void reiserfs_security_free(struct reiserfs_security_handle *sec)
97{
98 kfree(sec->name);
99 kfree(sec->value);
100 sec->name = NULL;
101 sec->value = NULL;
57} 102}
58 103
59struct reiserfs_xattr_handler security_handler = { 104struct xattr_handler reiserfs_xattr_security_handler = {
60 .prefix = XATTR_SECURITY_PREFIX, 105 .prefix = XATTR_SECURITY_PREFIX,
61 .get = security_get, 106 .get = security_get,
62 .set = security_set, 107 .set = security_set,
63 .del = security_del,
64 .list = security_list, 108 .list = security_list,
65}; 109};
diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c
index 60abe2bb1f98..a865042f75e2 100644
--- a/fs/reiserfs/xattr_trusted.c
+++ b/fs/reiserfs/xattr_trusted.c
@@ -13,10 +13,7 @@ trusted_get(struct inode *inode, const char *name, void *buffer, size_t size)
13 if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX)) 13 if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX))
14 return -EINVAL; 14 return -EINVAL;
15 15
16 if (!reiserfs_xattrs(inode->i_sb)) 16 if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(inode))
17 return -EOPNOTSUPP;
18
19 if (!(capable(CAP_SYS_ADMIN) || is_reiserfs_priv_object(inode)))
20 return -EPERM; 17 return -EPERM;
21 18
22 return reiserfs_xattr_get(inode, name, buffer, size); 19 return reiserfs_xattr_get(inode, name, buffer, size);
@@ -29,50 +26,30 @@ trusted_set(struct inode *inode, const char *name, const void *buffer,
29 if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX)) 26 if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX))
30 return -EINVAL; 27 return -EINVAL;
31 28
32 if (!reiserfs_xattrs(inode->i_sb)) 29 if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(inode))
33 return -EOPNOTSUPP;
34
35 if (!(capable(CAP_SYS_ADMIN) || is_reiserfs_priv_object(inode)))
36 return -EPERM; 30 return -EPERM;
37 31
38 return reiserfs_xattr_set(inode, name, buffer, size, flags); 32 return reiserfs_xattr_set(inode, name, buffer, size, flags);
39} 33}
40 34
41static int trusted_del(struct inode *inode, const char *name) 35static size_t trusted_list(struct inode *inode, char *list, size_t list_size,
36 const char *name, size_t name_len)
42{ 37{
43 if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX)) 38 const size_t len = name_len + 1;
44 return -EINVAL;
45 39
46 if (!reiserfs_xattrs(inode->i_sb)) 40 if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(inode))
47 return -EOPNOTSUPP;
48
49 if (!(capable(CAP_SYS_ADMIN) || is_reiserfs_priv_object(inode)))
50 return -EPERM;
51
52 return 0;
53}
54
55static int
56trusted_list(struct inode *inode, const char *name, int namelen, char *out)
57{
58 int len = namelen;
59
60 if (!reiserfs_xattrs(inode->i_sb))
61 return 0; 41 return 0;
62 42
63 if (!(capable(CAP_SYS_ADMIN) || is_reiserfs_priv_object(inode))) 43 if (list && len <= list_size) {
64 return 0; 44 memcpy(list, name, name_len);
65 45 list[name_len] = '\0';
66 if (out) 46 }
67 memcpy(out, name, len);
68
69 return len; 47 return len;
70} 48}
71 49
72struct reiserfs_xattr_handler trusted_handler = { 50struct xattr_handler reiserfs_xattr_trusted_handler = {
73 .prefix = XATTR_TRUSTED_PREFIX, 51 .prefix = XATTR_TRUSTED_PREFIX,
74 .get = trusted_get, 52 .get = trusted_get,
75 .set = trusted_set, 53 .set = trusted_set,
76 .del = trusted_del,
77 .list = trusted_list, 54 .list = trusted_list,
78}; 55};
diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c
index 1384efcb938e..e3238dc4f3db 100644
--- a/fs/reiserfs/xattr_user.c
+++ b/fs/reiserfs/xattr_user.c
@@ -6,10 +6,6 @@
6#include <linux/reiserfs_xattr.h> 6#include <linux/reiserfs_xattr.h>
7#include <asm/uaccess.h> 7#include <asm/uaccess.h>
8 8
9#ifdef CONFIG_REISERFS_FS_POSIX_ACL
10# include <linux/reiserfs_acl.h>
11#endif
12
13static int 9static int
14user_get(struct inode *inode, const char *name, void *buffer, size_t size) 10user_get(struct inode *inode, const char *name, void *buffer, size_t size)
15{ 11{
@@ -25,7 +21,6 @@ static int
25user_set(struct inode *inode, const char *name, const void *buffer, 21user_set(struct inode *inode, const char *name, const void *buffer,
26 size_t size, int flags) 22 size_t size, int flags)
27{ 23{
28
29 if (strlen(name) < sizeof(XATTR_USER_PREFIX)) 24 if (strlen(name) < sizeof(XATTR_USER_PREFIX))
30 return -EINVAL; 25 return -EINVAL;
31 26
@@ -34,33 +29,23 @@ user_set(struct inode *inode, const char *name, const void *buffer,
34 return reiserfs_xattr_set(inode, name, buffer, size, flags); 29 return reiserfs_xattr_set(inode, name, buffer, size, flags);
35} 30}
36 31
37static int user_del(struct inode *inode, const char *name) 32static size_t user_list(struct inode *inode, char *list, size_t list_size,
33 const char *name, size_t name_len)
38{ 34{
39 if (strlen(name) < sizeof(XATTR_USER_PREFIX)) 35 const size_t len = name_len + 1;
40 return -EINVAL;
41
42 if (!reiserfs_xattrs_user(inode->i_sb))
43 return -EOPNOTSUPP;
44 return 0;
45}
46 36
47static int
48user_list(struct inode *inode, const char *name, int namelen, char *out)
49{
50 int len = namelen;
51 if (!reiserfs_xattrs_user(inode->i_sb)) 37 if (!reiserfs_xattrs_user(inode->i_sb))
52 return 0; 38 return 0;
53 39 if (list && len <= list_size) {
54 if (out) 40 memcpy(list, name, name_len);
55 memcpy(out, name, len); 41 list[name_len] = '\0';
56 42 }
57 return len; 43 return len;
58} 44}
59 45
60struct reiserfs_xattr_handler user_handler = { 46struct xattr_handler reiserfs_xattr_user_handler = {
61 .prefix = XATTR_USER_PREFIX, 47 .prefix = XATTR_USER_PREFIX,
62 .get = user_get, 48 .get = user_get,
63 .set = user_set, 49 .set = user_set,
64 .del = user_del,
65 .list = user_list, 50 .list = user_list,
66}; 51};
diff --git a/fs/seq_file.c b/fs/seq_file.c
index a1a4cfe19210..7f40f30c55c5 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -513,7 +513,7 @@ int seq_bitmap(struct seq_file *m, const unsigned long *bits,
513} 513}
514EXPORT_SYMBOL(seq_bitmap); 514EXPORT_SYMBOL(seq_bitmap);
515 515
516int seq_bitmap_list(struct seq_file *m, unsigned long *bits, 516int seq_bitmap_list(struct seq_file *m, const unsigned long *bits,
517 unsigned int nr_bits) 517 unsigned int nr_bits)
518{ 518{
519 if (m->count < m->size) { 519 if (m->count < m->size) {
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index 07703d3ff4a1..93e0c0281d45 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -234,7 +234,7 @@ static int bin_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
234 return ret; 234 return ret;
235} 235}
236 236
237static int bin_page_mkwrite(struct vm_area_struct *vma, struct page *page) 237static int bin_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
238{ 238{
239 struct file *file = vma->vm_file; 239 struct file *file = vma->vm_file;
240 struct bin_buffer *bb = file->private_data; 240 struct bin_buffer *bb = file->private_data;
@@ -242,15 +242,15 @@ static int bin_page_mkwrite(struct vm_area_struct *vma, struct page *page)
242 int ret; 242 int ret;
243 243
244 if (!bb->vm_ops) 244 if (!bb->vm_ops)
245 return -EINVAL; 245 return VM_FAULT_SIGBUS;
246 246
247 if (!bb->vm_ops->page_mkwrite) 247 if (!bb->vm_ops->page_mkwrite)
248 return 0; 248 return 0;
249 249
250 if (!sysfs_get_active_two(attr_sd)) 250 if (!sysfs_get_active_two(attr_sd))
251 return -EINVAL; 251 return VM_FAULT_SIGBUS;
252 252
253 ret = bb->vm_ops->page_mkwrite(vma, page); 253 ret = bb->vm_ops->page_mkwrite(vma, vmf);
254 254
255 sysfs_put_active_two(attr_sd); 255 sysfs_put_active_two(attr_sd);
256 return ret; 256 return ret;
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 93b6de51f261..0ff89fe71e51 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1434,8 +1434,9 @@ static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags)
1434 * mmap()d file has taken write protection fault and is being made 1434 * mmap()d file has taken write protection fault and is being made
1435 * writable. UBIFS must ensure page is budgeted for. 1435 * writable. UBIFS must ensure page is budgeted for.
1436 */ 1436 */
1437static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) 1437static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
1438{ 1438{
1439 struct page *page = vmf->page;
1439 struct inode *inode = vma->vm_file->f_path.dentry->d_inode; 1440 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
1440 struct ubifs_info *c = inode->i_sb->s_fs_info; 1441 struct ubifs_info *c = inode->i_sb->s_fs_info;
1441 struct timespec now = ubifs_current_time(inode); 1442 struct timespec now = ubifs_current_time(inode);
@@ -1447,7 +1448,7 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
1447 ubifs_assert(!(inode->i_sb->s_flags & MS_RDONLY)); 1448 ubifs_assert(!(inode->i_sb->s_flags & MS_RDONLY));
1448 1449
1449 if (unlikely(c->ro_media)) 1450 if (unlikely(c->ro_media))
1450 return -EROFS; 1451 return VM_FAULT_SIGBUS; /* -EROFS */
1451 1452
1452 /* 1453 /*
1453 * We have not locked @page so far so we may budget for changing the 1454 * We have not locked @page so far so we may budget for changing the
@@ -1480,7 +1481,7 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
1480 if (err == -ENOSPC) 1481 if (err == -ENOSPC)
1481 ubifs_warn("out of space for mmapped file " 1482 ubifs_warn("out of space for mmapped file "
1482 "(inode number %lu)", inode->i_ino); 1483 "(inode number %lu)", inode->i_ino);
1483 return err; 1484 return VM_FAULT_SIGBUS;
1484 } 1485 }
1485 1486
1486 lock_page(page); 1487 lock_page(page);
@@ -1520,6 +1521,8 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
1520out_unlock: 1521out_unlock:
1521 unlock_page(page); 1522 unlock_page(page);
1522 ubifs_release_budget(c, &req); 1523 ubifs_release_budget(c, &req);
1524 if (err)
1525 err = VM_FAULT_SIGBUS;
1523 return err; 1526 return err;
1524} 1527}
1525 1528
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index e14c4e3aea0c..f4e255441574 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -234,9 +234,9 @@ xfs_file_mmap(
234STATIC int 234STATIC int
235xfs_vm_page_mkwrite( 235xfs_vm_page_mkwrite(
236 struct vm_area_struct *vma, 236 struct vm_area_struct *vma,
237 struct page *page) 237 struct vm_fault *vmf)
238{ 238{
239 return block_page_mkwrite(vma, page, xfs_get_blocks); 239 return block_page_mkwrite(vma, vmf, xfs_get_blocks);
240} 240}
241 241
242const struct file_operations xfs_file_operations = { 242const struct file_operations xfs_file_operations = {