aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorDavid Woodhouse <dwmw2@infradead.org>2007-07-11 09:55:48 -0400
committerDavid Woodhouse <dwmw2@infradead.org>2007-07-11 09:55:48 -0400
commitdb1b39d8b860e3716620c225bc86e0ec41764e34 (patch)
tree8739074db733ef767400ea92cfbfed9352ddb92d /fs
parenta6bc432e296dfa1f05d4b586ca5ca3085a2d42d7 (diff)
parent4eb6bf6bfb580afaf1e1a1d30cba17a078530cf4 (diff)
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
Diffstat (limited to 'fs')
-rw-r--r--fs/adfs/file.c2
-rw-r--r--fs/affs/file.c2
-rw-r--r--fs/afs/file.c2
-rw-r--r--fs/bad_inode.c7
-rw-r--r--fs/bfs/file.c2
-rw-r--r--fs/binfmt_elf.c7
-rw-r--r--fs/bio.c2
-rw-r--r--fs/block_dev.c1
-rw-r--r--fs/cifs/cifsfs.c8
-rw-r--r--fs/coda/file.c11
-rw-r--r--fs/dlm/Kconfig2
-rw-r--r--fs/dlm/Makefile1
-rw-r--r--fs/dlm/config.c25
-rw-r--r--fs/dlm/config.h1
-rw-r--r--fs/dlm/debug_fs.c186
-rw-r--r--fs/dlm/dlm_internal.h17
-rw-r--r--fs/dlm/lock.c470
-rw-r--r--fs/dlm/lock.h13
-rw-r--r--fs/dlm/lockspace.c86
-rw-r--r--fs/dlm/lowcomms.c23
-rw-r--r--fs/dlm/main.c11
-rw-r--r--fs/dlm/member.c11
-rw-r--r--fs/dlm/netlink.c153
-rw-r--r--fs/dlm/rcom.c13
-rw-r--r--fs/dlm/recoverd.c4
-rw-r--r--fs/dlm/user.c129
-rw-r--r--fs/ecryptfs/file.c15
-rw-r--r--fs/ext2/file.c2
-rw-r--r--fs/ext3/file.c1
-rw-r--r--fs/ext4/file.c1
-rw-r--r--fs/fat/file.c2
-rw-r--r--fs/fuse/file.c4
-rw-r--r--fs/gfs2/Makefile2
-rw-r--r--fs/gfs2/bmap.c23
-rw-r--r--fs/gfs2/daemon.c11
-rw-r--r--fs/gfs2/dir.c69
-rw-r--r--fs/gfs2/dir.h9
-rw-r--r--fs/gfs2/eattr.c14
-rw-r--r--fs/gfs2/glock.c123
-rw-r--r--fs/gfs2/glock.h1
-rw-r--r--fs/gfs2/glops.c2
-rw-r--r--fs/gfs2/incore.h81
-rw-r--r--fs/gfs2/inode.c288
-rw-r--r--fs/gfs2/inode.h30
-rw-r--r--fs/gfs2/locking/dlm/lock.c11
-rw-r--r--fs/gfs2/locking/dlm/lock_dlm.h2
-rw-r--r--fs/gfs2/locking/dlm/mount.c2
-rw-r--r--fs/gfs2/locking/dlm/plock.c8
-rw-r--r--fs/gfs2/locking/dlm/thread.c11
-rw-r--r--fs/gfs2/log.c129
-rw-r--r--fs/gfs2/lops.c49
-rw-r--r--fs/gfs2/lops.h23
-rw-r--r--fs/gfs2/meta_io.c8
-rw-r--r--fs/gfs2/meta_io.h2
-rw-r--r--fs/gfs2/mount.c25
-rw-r--r--fs/gfs2/ondisk.c251
-rw-r--r--fs/gfs2/ops_address.c69
-rw-r--r--fs/gfs2/ops_address.h2
-rw-r--r--fs/gfs2/ops_dentry.c24
-rw-r--r--fs/gfs2/ops_export.c65
-rw-r--r--fs/gfs2/ops_export.h22
-rw-r--r--fs/gfs2/ops_file.c5
-rw-r--r--fs/gfs2/ops_fstype.c33
-rw-r--r--fs/gfs2/ops_fstype.h1
-rw-r--r--fs/gfs2/ops_inode.c30
-rw-r--r--fs/gfs2/ops_super.c8
-rw-r--r--fs/gfs2/ops_vm.c2
-rw-r--r--fs/gfs2/quota.c57
-rw-r--r--fs/gfs2/recovery.c22
-rw-r--r--fs/gfs2/rgrp.c377
-rw-r--r--fs/gfs2/rgrp.h1
-rw-r--r--fs/gfs2/super.c79
-rw-r--r--fs/gfs2/super.h2
-rw-r--r--fs/gfs2/util.c6
-rw-r--r--fs/hfs/inode.c2
-rw-r--r--fs/hfsplus/inode.c2
-rw-r--r--fs/hostfs/hostfs_kern.c2
-rw-r--r--fs/hpfs/file.c2
-rw-r--r--fs/jffs2/file.c2
-rw-r--r--fs/jfs/endian24.h2
-rw-r--r--fs/jfs/file.c1
-rw-r--r--fs/jfs/jfs_debug.c28
-rw-r--r--fs/jfs/jfs_debug.h2
-rw-r--r--fs/jfs/jfs_dinode.h42
-rw-r--r--fs/jfs/jfs_dmap.c419
-rw-r--r--fs/jfs/jfs_dmap.h118
-rw-r--r--fs/jfs/jfs_dtree.c105
-rw-r--r--fs/jfs/jfs_dtree.h2
-rw-r--r--fs/jfs/jfs_extent.c102
-rw-r--r--fs/jfs/jfs_filsys.h13
-rw-r--r--fs/jfs/jfs_imap.c296
-rw-r--r--fs/jfs/jfs_imap.h98
-rw-r--r--fs/jfs/jfs_incore.h4
-rw-r--r--fs/jfs/jfs_logmgr.c90
-rw-r--r--fs/jfs/jfs_logmgr.h26
-rw-r--r--fs/jfs/jfs_metapage.c3
-rw-r--r--fs/jfs/jfs_mount.c6
-rw-r--r--fs/jfs/jfs_txnmgr.c302
-rw-r--r--fs/jfs/jfs_txnmgr.h2
-rw-r--r--fs/jfs/jfs_types.h20
-rw-r--r--fs/jfs/jfs_umount.c2
-rw-r--r--fs/jfs/jfs_xtree.c428
-rw-r--r--fs/jfs/jfs_xtree.h48
-rw-r--r--fs/jfs/namei.c26
-rw-r--r--fs/jfs/resize.c48
-rw-r--r--fs/jfs/xattr.c9
-rw-r--r--fs/minix/file.c2
-rw-r--r--fs/nfs/file.c15
-rw-r--r--fs/nfsd/vfs.c47
-rw-r--r--fs/ntfs/file.c2
-rw-r--r--fs/ocfs2/file.c18
-rw-r--r--fs/partitions/ibm.c167
-rw-r--r--fs/pipe.c70
-rw-r--r--fs/proc/array.c59
-rw-r--r--fs/proc/base.c71
-rw-r--r--fs/qnx4/file.c2
-rw-r--r--fs/ramfs/file-mmu.c2
-rw-r--r--fs/ramfs/file-nommu.c2
-rw-r--r--fs/read_write.c20
-rw-r--r--fs/reiserfs/file.c1
-rw-r--r--fs/seq_file.c34
-rw-r--r--fs/smbfs/file.c9
-rw-r--r--fs/splice.c413
-rw-r--r--fs/sysv/file.c2
-rw-r--r--fs/udf/file.c2
-rw-r--r--fs/ufs/file.c2
-rw-r--r--fs/utimes.c13
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c26
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c44
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.h3
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.h6
-rw-r--r--fs/xfs/xfs_vnodeops.c3
133 files changed, 4005 insertions, 2476 deletions
diff --git a/fs/adfs/file.c b/fs/adfs/file.c
index f544a2855923..36e381c6a99a 100644
--- a/fs/adfs/file.c
+++ b/fs/adfs/file.c
@@ -33,7 +33,7 @@ const struct file_operations adfs_file_operations = {
33 .fsync = file_fsync, 33 .fsync = file_fsync,
34 .write = do_sync_write, 34 .write = do_sync_write,
35 .aio_write = generic_file_aio_write, 35 .aio_write = generic_file_aio_write,
36 .sendfile = generic_file_sendfile, 36 .splice_read = generic_file_splice_read,
37}; 37};
38 38
39const struct inode_operations adfs_file_inode_operations = { 39const struct inode_operations adfs_file_inode_operations = {
diff --git a/fs/affs/file.c b/fs/affs/file.c
index c8796906f584..c314a35f0918 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -35,7 +35,7 @@ const struct file_operations affs_file_operations = {
35 .open = affs_file_open, 35 .open = affs_file_open,
36 .release = affs_file_release, 36 .release = affs_file_release,
37 .fsync = file_fsync, 37 .fsync = file_fsync,
38 .sendfile = generic_file_sendfile, 38 .splice_read = generic_file_splice_read,
39}; 39};
40 40
41const struct inode_operations affs_file_inode_operations = { 41const struct inode_operations affs_file_inode_operations = {
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 9c0e721d9fc2..aede7eb66dd4 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -32,7 +32,7 @@ const struct file_operations afs_file_operations = {
32 .aio_read = generic_file_aio_read, 32 .aio_read = generic_file_aio_read,
33 .aio_write = afs_file_write, 33 .aio_write = afs_file_write,
34 .mmap = generic_file_readonly_mmap, 34 .mmap = generic_file_readonly_mmap,
35 .sendfile = generic_file_sendfile, 35 .splice_read = generic_file_splice_read,
36 .fsync = afs_fsync, 36 .fsync = afs_fsync,
37}; 37};
38 38
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 329ee473eede..521ff7caadbd 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -114,12 +114,6 @@ static int bad_file_lock(struct file *file, int cmd, struct file_lock *fl)
114 return -EIO; 114 return -EIO;
115} 115}
116 116
117static ssize_t bad_file_sendfile(struct file *in_file, loff_t *ppos,
118 size_t count, read_actor_t actor, void *target)
119{
120 return -EIO;
121}
122
123static ssize_t bad_file_sendpage(struct file *file, struct page *page, 117static ssize_t bad_file_sendpage(struct file *file, struct page *page,
124 int off, size_t len, loff_t *pos, int more) 118 int off, size_t len, loff_t *pos, int more)
125{ 119{
@@ -182,7 +176,6 @@ static const struct file_operations bad_file_ops =
182 .aio_fsync = bad_file_aio_fsync, 176 .aio_fsync = bad_file_aio_fsync,
183 .fasync = bad_file_fasync, 177 .fasync = bad_file_fasync,
184 .lock = bad_file_lock, 178 .lock = bad_file_lock,
185 .sendfile = bad_file_sendfile,
186 .sendpage = bad_file_sendpage, 179 .sendpage = bad_file_sendpage,
187 .get_unmapped_area = bad_file_get_unmapped_area, 180 .get_unmapped_area = bad_file_get_unmapped_area,
188 .check_flags = bad_file_check_flags, 181 .check_flags = bad_file_check_flags,
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index ef4d1fa04e65..24310e9ee05a 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -24,7 +24,7 @@ const struct file_operations bfs_file_operations = {
24 .write = do_sync_write, 24 .write = do_sync_write,
25 .aio_write = generic_file_aio_write, 25 .aio_write = generic_file_aio_write,
26 .mmap = generic_file_mmap, 26 .mmap = generic_file_mmap,
27 .sendfile = generic_file_sendfile, 27 .splice_read = generic_file_splice_read,
28}; 28};
29 29
30static int bfs_move_block(unsigned long from, unsigned long to, struct super_block *sb) 30static int bfs_move_block(unsigned long from, unsigned long to, struct super_block *sb)
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index fa8ea33ab0be..08e4414b8374 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1499,6 +1499,9 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
1499#endif 1499#endif
1500 int thread_status_size = 0; 1500 int thread_status_size = 0;
1501 elf_addr_t *auxv; 1501 elf_addr_t *auxv;
1502#ifdef ELF_CORE_WRITE_EXTRA_NOTES
1503 int extra_notes_size;
1504#endif
1502 1505
1503 /* 1506 /*
1504 * We no longer stop all VM operations. 1507 * We no longer stop all VM operations.
@@ -1628,7 +1631,8 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
1628 sz += thread_status_size; 1631 sz += thread_status_size;
1629 1632
1630#ifdef ELF_CORE_WRITE_EXTRA_NOTES 1633#ifdef ELF_CORE_WRITE_EXTRA_NOTES
1631 sz += ELF_CORE_EXTRA_NOTES_SIZE; 1634 extra_notes_size = ELF_CORE_EXTRA_NOTES_SIZE;
1635 sz += extra_notes_size;
1632#endif 1636#endif
1633 1637
1634 fill_elf_note_phdr(&phdr, sz, offset); 1638 fill_elf_note_phdr(&phdr, sz, offset);
@@ -1674,6 +1678,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
1674 1678
1675#ifdef ELF_CORE_WRITE_EXTRA_NOTES 1679#ifdef ELF_CORE_WRITE_EXTRA_NOTES
1676 ELF_CORE_WRITE_EXTRA_NOTES; 1680 ELF_CORE_WRITE_EXTRA_NOTES;
1681 foffset += extra_notes_size;
1677#endif 1682#endif
1678 1683
1679 /* write out the thread status notes section */ 1684 /* write out the thread status notes section */
diff --git a/fs/bio.c b/fs/bio.c
index 093345f00128..33e46340a766 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -1223,8 +1223,6 @@ EXPORT_SYMBOL(bio_hw_segments);
1223EXPORT_SYMBOL(bio_add_page); 1223EXPORT_SYMBOL(bio_add_page);
1224EXPORT_SYMBOL(bio_add_pc_page); 1224EXPORT_SYMBOL(bio_add_pc_page);
1225EXPORT_SYMBOL(bio_get_nr_vecs); 1225EXPORT_SYMBOL(bio_get_nr_vecs);
1226EXPORT_SYMBOL(bio_map_user);
1227EXPORT_SYMBOL(bio_unmap_user);
1228EXPORT_SYMBOL(bio_map_kern); 1226EXPORT_SYMBOL(bio_map_kern);
1229EXPORT_SYMBOL(bio_pair_release); 1227EXPORT_SYMBOL(bio_pair_release);
1230EXPORT_SYMBOL(bio_split); 1228EXPORT_SYMBOL(bio_split);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index ea1480a16f51..b3e9bfa748cf 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1346,7 +1346,6 @@ const struct file_operations def_blk_fops = {
1346#ifdef CONFIG_COMPAT 1346#ifdef CONFIG_COMPAT
1347 .compat_ioctl = compat_blkdev_ioctl, 1347 .compat_ioctl = compat_blkdev_ioctl,
1348#endif 1348#endif
1349 .sendfile = generic_file_sendfile,
1350 .splice_read = generic_file_splice_read, 1349 .splice_read = generic_file_splice_read,
1351 .splice_write = generic_file_splice_write, 1350 .splice_write = generic_file_splice_write,
1352}; 1351};
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 7c04752b76cb..8b0cbf4a4ad0 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -616,7 +616,7 @@ const struct file_operations cifs_file_ops = {
616 .fsync = cifs_fsync, 616 .fsync = cifs_fsync,
617 .flush = cifs_flush, 617 .flush = cifs_flush,
618 .mmap = cifs_file_mmap, 618 .mmap = cifs_file_mmap,
619 .sendfile = generic_file_sendfile, 619 .splice_read = generic_file_splice_read,
620 .llseek = cifs_llseek, 620 .llseek = cifs_llseek,
621#ifdef CONFIG_CIFS_POSIX 621#ifdef CONFIG_CIFS_POSIX
622 .ioctl = cifs_ioctl, 622 .ioctl = cifs_ioctl,
@@ -637,7 +637,7 @@ const struct file_operations cifs_file_direct_ops = {
637 .lock = cifs_lock, 637 .lock = cifs_lock,
638 .fsync = cifs_fsync, 638 .fsync = cifs_fsync,
639 .flush = cifs_flush, 639 .flush = cifs_flush,
640 .sendfile = generic_file_sendfile, /* BB removeme BB */ 640 .splice_read = generic_file_splice_read,
641#ifdef CONFIG_CIFS_POSIX 641#ifdef CONFIG_CIFS_POSIX
642 .ioctl = cifs_ioctl, 642 .ioctl = cifs_ioctl,
643#endif /* CONFIG_CIFS_POSIX */ 643#endif /* CONFIG_CIFS_POSIX */
@@ -656,7 +656,7 @@ const struct file_operations cifs_file_nobrl_ops = {
656 .fsync = cifs_fsync, 656 .fsync = cifs_fsync,
657 .flush = cifs_flush, 657 .flush = cifs_flush,
658 .mmap = cifs_file_mmap, 658 .mmap = cifs_file_mmap,
659 .sendfile = generic_file_sendfile, 659 .splice_read = generic_file_splice_read,
660 .llseek = cifs_llseek, 660 .llseek = cifs_llseek,
661#ifdef CONFIG_CIFS_POSIX 661#ifdef CONFIG_CIFS_POSIX
662 .ioctl = cifs_ioctl, 662 .ioctl = cifs_ioctl,
@@ -676,7 +676,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = {
676 .release = cifs_close, 676 .release = cifs_close,
677 .fsync = cifs_fsync, 677 .fsync = cifs_fsync,
678 .flush = cifs_flush, 678 .flush = cifs_flush,
679 .sendfile = generic_file_sendfile, /* BB removeme BB */ 679 .splice_read = generic_file_splice_read,
680#ifdef CONFIG_CIFS_POSIX 680#ifdef CONFIG_CIFS_POSIX
681 .ioctl = cifs_ioctl, 681 .ioctl = cifs_ioctl,
682#endif /* CONFIG_CIFS_POSIX */ 682#endif /* CONFIG_CIFS_POSIX */
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 5ef2b609ec7d..99dbe866816d 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -47,8 +47,9 @@ coda_file_read(struct file *coda_file, char __user *buf, size_t count, loff_t *p
47} 47}
48 48
49static ssize_t 49static ssize_t
50coda_file_sendfile(struct file *coda_file, loff_t *ppos, size_t count, 50coda_file_splice_read(struct file *coda_file, loff_t *ppos,
51 read_actor_t actor, void *target) 51 struct pipe_inode_info *pipe, size_t count,
52 unsigned int flags)
52{ 53{
53 struct coda_file_info *cfi; 54 struct coda_file_info *cfi;
54 struct file *host_file; 55 struct file *host_file;
@@ -57,10 +58,10 @@ coda_file_sendfile(struct file *coda_file, loff_t *ppos, size_t count,
57 BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); 58 BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
58 host_file = cfi->cfi_container; 59 host_file = cfi->cfi_container;
59 60
60 if (!host_file->f_op || !host_file->f_op->sendfile) 61 if (!host_file->f_op || !host_file->f_op->splice_read)
61 return -EINVAL; 62 return -EINVAL;
62 63
63 return host_file->f_op->sendfile(host_file, ppos, count, actor, target); 64 return host_file->f_op->splice_read(host_file, ppos, pipe, count,flags);
64} 65}
65 66
66static ssize_t 67static ssize_t
@@ -295,6 +296,6 @@ const struct file_operations coda_file_operations = {
295 .flush = coda_flush, 296 .flush = coda_flush,
296 .release = coda_release, 297 .release = coda_release,
297 .fsync = coda_fsync, 298 .fsync = coda_fsync,
298 .sendfile = coda_file_sendfile, 299 .splice_read = coda_file_splice_read,
299}; 300};
300 301
diff --git a/fs/dlm/Kconfig b/fs/dlm/Kconfig
index 69a94690e493..54bcc00ec8df 100644
--- a/fs/dlm/Kconfig
+++ b/fs/dlm/Kconfig
@@ -3,7 +3,7 @@ menu "Distributed Lock Manager"
3 3
4config DLM 4config DLM
5 tristate "Distributed Lock Manager (DLM)" 5 tristate "Distributed Lock Manager (DLM)"
6 depends on IPV6 || IPV6=n 6 depends on SYSFS && (IPV6 || IPV6=n)
7 select CONFIGFS_FS 7 select CONFIGFS_FS
8 select IP_SCTP 8 select IP_SCTP
9 help 9 help
diff --git a/fs/dlm/Makefile b/fs/dlm/Makefile
index 604cf7dc5f39..d248e60951ba 100644
--- a/fs/dlm/Makefile
+++ b/fs/dlm/Makefile
@@ -8,6 +8,7 @@ dlm-y := ast.o \
8 member.o \ 8 member.o \
9 memory.o \ 9 memory.o \
10 midcomms.o \ 10 midcomms.o \
11 netlink.o \
11 lowcomms.o \ 12 lowcomms.o \
12 rcom.o \ 13 rcom.o \
13 recover.o \ 14 recover.o \
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 822abdcd1434..5069b2cb5a1f 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -90,6 +90,7 @@ struct cluster {
90 unsigned int cl_scan_secs; 90 unsigned int cl_scan_secs;
91 unsigned int cl_log_debug; 91 unsigned int cl_log_debug;
92 unsigned int cl_protocol; 92 unsigned int cl_protocol;
93 unsigned int cl_timewarn_cs;
93}; 94};
94 95
95enum { 96enum {
@@ -103,6 +104,7 @@ enum {
103 CLUSTER_ATTR_SCAN_SECS, 104 CLUSTER_ATTR_SCAN_SECS,
104 CLUSTER_ATTR_LOG_DEBUG, 105 CLUSTER_ATTR_LOG_DEBUG,
105 CLUSTER_ATTR_PROTOCOL, 106 CLUSTER_ATTR_PROTOCOL,
107 CLUSTER_ATTR_TIMEWARN_CS,
106}; 108};
107 109
108struct cluster_attribute { 110struct cluster_attribute {
@@ -162,6 +164,7 @@ CLUSTER_ATTR(toss_secs, 1);
162CLUSTER_ATTR(scan_secs, 1); 164CLUSTER_ATTR(scan_secs, 1);
163CLUSTER_ATTR(log_debug, 0); 165CLUSTER_ATTR(log_debug, 0);
164CLUSTER_ATTR(protocol, 0); 166CLUSTER_ATTR(protocol, 0);
167CLUSTER_ATTR(timewarn_cs, 1);
165 168
166static struct configfs_attribute *cluster_attrs[] = { 169static struct configfs_attribute *cluster_attrs[] = {
167 [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr, 170 [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr,
@@ -174,6 +177,7 @@ static struct configfs_attribute *cluster_attrs[] = {
174 [CLUSTER_ATTR_SCAN_SECS] = &cluster_attr_scan_secs.attr, 177 [CLUSTER_ATTR_SCAN_SECS] = &cluster_attr_scan_secs.attr,
175 [CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr, 178 [CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr,
176 [CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol.attr, 179 [CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol.attr,
180 [CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr,
177 NULL, 181 NULL,
178}; 182};
179 183
@@ -429,6 +433,8 @@ static struct config_group *make_cluster(struct config_group *g,
429 cl->cl_toss_secs = dlm_config.ci_toss_secs; 433 cl->cl_toss_secs = dlm_config.ci_toss_secs;
430 cl->cl_scan_secs = dlm_config.ci_scan_secs; 434 cl->cl_scan_secs = dlm_config.ci_scan_secs;
431 cl->cl_log_debug = dlm_config.ci_log_debug; 435 cl->cl_log_debug = dlm_config.ci_log_debug;
436 cl->cl_protocol = dlm_config.ci_protocol;
437 cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs;
432 438
433 space_list = &sps->ss_group; 439 space_list = &sps->ss_group;
434 comm_list = &cms->cs_group; 440 comm_list = &cms->cs_group;
@@ -748,9 +754,16 @@ static ssize_t node_weight_write(struct node *nd, const char *buf, size_t len)
748 754
749static struct space *get_space(char *name) 755static struct space *get_space(char *name)
750{ 756{
757 struct config_item *i;
758
751 if (!space_list) 759 if (!space_list)
752 return NULL; 760 return NULL;
753 return to_space(config_group_find_obj(space_list, name)); 761
762 down(&space_list->cg_subsys->su_sem);
763 i = config_group_find_obj(space_list, name);
764 up(&space_list->cg_subsys->su_sem);
765
766 return to_space(i);
754} 767}
755 768
756static void put_space(struct space *sp) 769static void put_space(struct space *sp)
@@ -776,20 +789,20 @@ static struct comm *get_comm(int nodeid, struct sockaddr_storage *addr)
776 if (cm->nodeid != nodeid) 789 if (cm->nodeid != nodeid)
777 continue; 790 continue;
778 found = 1; 791 found = 1;
792 config_item_get(i);
779 break; 793 break;
780 } else { 794 } else {
781 if (!cm->addr_count || 795 if (!cm->addr_count ||
782 memcmp(cm->addr[0], addr, sizeof(*addr))) 796 memcmp(cm->addr[0], addr, sizeof(*addr)))
783 continue; 797 continue;
784 found = 1; 798 found = 1;
799 config_item_get(i);
785 break; 800 break;
786 } 801 }
787 } 802 }
788 up(&clusters_root.subsys.su_sem); 803 up(&clusters_root.subsys.su_sem);
789 804
790 if (found) 805 if (!found)
791 config_item_get(i);
792 else
793 cm = NULL; 806 cm = NULL;
794 return cm; 807 return cm;
795} 808}
@@ -909,6 +922,7 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
909#define DEFAULT_SCAN_SECS 5 922#define DEFAULT_SCAN_SECS 5
910#define DEFAULT_LOG_DEBUG 0 923#define DEFAULT_LOG_DEBUG 0
911#define DEFAULT_PROTOCOL 0 924#define DEFAULT_PROTOCOL 0
925#define DEFAULT_TIMEWARN_CS 500 /* 5 sec = 500 centiseconds */
912 926
913struct dlm_config_info dlm_config = { 927struct dlm_config_info dlm_config = {
914 .ci_tcp_port = DEFAULT_TCP_PORT, 928 .ci_tcp_port = DEFAULT_TCP_PORT,
@@ -920,6 +934,7 @@ struct dlm_config_info dlm_config = {
920 .ci_toss_secs = DEFAULT_TOSS_SECS, 934 .ci_toss_secs = DEFAULT_TOSS_SECS,
921 .ci_scan_secs = DEFAULT_SCAN_SECS, 935 .ci_scan_secs = DEFAULT_SCAN_SECS,
922 .ci_log_debug = DEFAULT_LOG_DEBUG, 936 .ci_log_debug = DEFAULT_LOG_DEBUG,
923 .ci_protocol = DEFAULT_PROTOCOL 937 .ci_protocol = DEFAULT_PROTOCOL,
938 .ci_timewarn_cs = DEFAULT_TIMEWARN_CS
924}; 939};
925 940
diff --git a/fs/dlm/config.h b/fs/dlm/config.h
index 967cc3d72e5e..a3170fe22090 100644
--- a/fs/dlm/config.h
+++ b/fs/dlm/config.h
@@ -27,6 +27,7 @@ struct dlm_config_info {
27 int ci_scan_secs; 27 int ci_scan_secs;
28 int ci_log_debug; 28 int ci_log_debug;
29 int ci_protocol; 29 int ci_protocol;
30 int ci_timewarn_cs;
30}; 31};
31 32
32extern struct dlm_config_info dlm_config; 33extern struct dlm_config_info dlm_config;
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index 61ba670b9e02..12c3bfd5e660 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -17,6 +17,7 @@
17#include <linux/debugfs.h> 17#include <linux/debugfs.h>
18 18
19#include "dlm_internal.h" 19#include "dlm_internal.h"
20#include "lock.h"
20 21
21#define DLM_DEBUG_BUF_LEN 4096 22#define DLM_DEBUG_BUF_LEN 4096
22static char debug_buf[DLM_DEBUG_BUF_LEN]; 23static char debug_buf[DLM_DEBUG_BUF_LEN];
@@ -26,6 +27,8 @@ static struct dentry *dlm_root;
26 27
27struct rsb_iter { 28struct rsb_iter {
28 int entry; 29 int entry;
30 int locks;
31 int header;
29 struct dlm_ls *ls; 32 struct dlm_ls *ls;
30 struct list_head *next; 33 struct list_head *next;
31 struct dlm_rsb *rsb; 34 struct dlm_rsb *rsb;
@@ -57,8 +60,8 @@ static char *print_lockmode(int mode)
57 } 60 }
58} 61}
59 62
60static void print_lock(struct seq_file *s, struct dlm_lkb *lkb, 63static void print_resource_lock(struct seq_file *s, struct dlm_lkb *lkb,
61 struct dlm_rsb *res) 64 struct dlm_rsb *res)
62{ 65{
63 seq_printf(s, "%08x %s", lkb->lkb_id, print_lockmode(lkb->lkb_grmode)); 66 seq_printf(s, "%08x %s", lkb->lkb_id, print_lockmode(lkb->lkb_grmode));
64 67
@@ -85,6 +88,8 @@ static int print_resource(struct dlm_rsb *res, struct seq_file *s)
85 struct dlm_lkb *lkb; 88 struct dlm_lkb *lkb;
86 int i, lvblen = res->res_ls->ls_lvblen, recover_list, root_list; 89 int i, lvblen = res->res_ls->ls_lvblen, recover_list, root_list;
87 90
91 lock_rsb(res);
92
88 seq_printf(s, "\nResource %p Name (len=%d) \"", res, res->res_length); 93 seq_printf(s, "\nResource %p Name (len=%d) \"", res, res->res_length);
89 for (i = 0; i < res->res_length; i++) { 94 for (i = 0; i < res->res_length; i++) {
90 if (isprint(res->res_name[i])) 95 if (isprint(res->res_name[i]))
@@ -129,15 +134,15 @@ static int print_resource(struct dlm_rsb *res, struct seq_file *s)
129 /* Print the locks attached to this resource */ 134 /* Print the locks attached to this resource */
130 seq_printf(s, "Granted Queue\n"); 135 seq_printf(s, "Granted Queue\n");
131 list_for_each_entry(lkb, &res->res_grantqueue, lkb_statequeue) 136 list_for_each_entry(lkb, &res->res_grantqueue, lkb_statequeue)
132 print_lock(s, lkb, res); 137 print_resource_lock(s, lkb, res);
133 138
134 seq_printf(s, "Conversion Queue\n"); 139 seq_printf(s, "Conversion Queue\n");
135 list_for_each_entry(lkb, &res->res_convertqueue, lkb_statequeue) 140 list_for_each_entry(lkb, &res->res_convertqueue, lkb_statequeue)
136 print_lock(s, lkb, res); 141 print_resource_lock(s, lkb, res);
137 142
138 seq_printf(s, "Waiting Queue\n"); 143 seq_printf(s, "Waiting Queue\n");
139 list_for_each_entry(lkb, &res->res_waitqueue, lkb_statequeue) 144 list_for_each_entry(lkb, &res->res_waitqueue, lkb_statequeue)
140 print_lock(s, lkb, res); 145 print_resource_lock(s, lkb, res);
141 146
142 if (list_empty(&res->res_lookup)) 147 if (list_empty(&res->res_lookup))
143 goto out; 148 goto out;
@@ -151,6 +156,61 @@ static int print_resource(struct dlm_rsb *res, struct seq_file *s)
151 seq_printf(s, "\n"); 156 seq_printf(s, "\n");
152 } 157 }
153 out: 158 out:
159 unlock_rsb(res);
160 return 0;
161}
162
163static void print_lock(struct seq_file *s, struct dlm_lkb *lkb, struct dlm_rsb *r)
164{
165 struct dlm_user_args *ua;
166 unsigned int waiting = 0;
167 uint64_t xid = 0;
168
169 if (lkb->lkb_flags & DLM_IFL_USER) {
170 ua = (struct dlm_user_args *) lkb->lkb_astparam;
171 if (ua)
172 xid = ua->xid;
173 }
174
175 if (lkb->lkb_timestamp)
176 waiting = jiffies_to_msecs(jiffies - lkb->lkb_timestamp);
177
178 /* id nodeid remid pid xid exflags flags sts grmode rqmode time_ms
179 r_nodeid r_len r_name */
180
181 seq_printf(s, "%x %d %x %u %llu %x %x %d %d %d %u %u %d \"%s\"\n",
182 lkb->lkb_id,
183 lkb->lkb_nodeid,
184 lkb->lkb_remid,
185 lkb->lkb_ownpid,
186 (unsigned long long)xid,
187 lkb->lkb_exflags,
188 lkb->lkb_flags,
189 lkb->lkb_status,
190 lkb->lkb_grmode,
191 lkb->lkb_rqmode,
192 waiting,
193 r->res_nodeid,
194 r->res_length,
195 r->res_name);
196}
197
198static int print_locks(struct dlm_rsb *r, struct seq_file *s)
199{
200 struct dlm_lkb *lkb;
201
202 lock_rsb(r);
203
204 list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue)
205 print_lock(s, lkb, r);
206
207 list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue)
208 print_lock(s, lkb, r);
209
210 list_for_each_entry(lkb, &r->res_waitqueue, lkb_statequeue)
211 print_lock(s, lkb, r);
212
213 unlock_rsb(r);
154 return 0; 214 return 0;
155} 215}
156 216
@@ -166,6 +226,9 @@ static int rsb_iter_next(struct rsb_iter *ri)
166 read_lock(&ls->ls_rsbtbl[i].lock); 226 read_lock(&ls->ls_rsbtbl[i].lock);
167 if (!list_empty(&ls->ls_rsbtbl[i].list)) { 227 if (!list_empty(&ls->ls_rsbtbl[i].list)) {
168 ri->next = ls->ls_rsbtbl[i].list.next; 228 ri->next = ls->ls_rsbtbl[i].list.next;
229 ri->rsb = list_entry(ri->next, struct dlm_rsb,
230 res_hashchain);
231 dlm_hold_rsb(ri->rsb);
169 read_unlock(&ls->ls_rsbtbl[i].lock); 232 read_unlock(&ls->ls_rsbtbl[i].lock);
170 break; 233 break;
171 } 234 }
@@ -176,6 +239,7 @@ static int rsb_iter_next(struct rsb_iter *ri)
176 if (ri->entry >= ls->ls_rsbtbl_size) 239 if (ri->entry >= ls->ls_rsbtbl_size)
177 return 1; 240 return 1;
178 } else { 241 } else {
242 struct dlm_rsb *old = ri->rsb;
179 i = ri->entry; 243 i = ri->entry;
180 read_lock(&ls->ls_rsbtbl[i].lock); 244 read_lock(&ls->ls_rsbtbl[i].lock);
181 ri->next = ri->next->next; 245 ri->next = ri->next->next;
@@ -184,11 +248,14 @@ static int rsb_iter_next(struct rsb_iter *ri)
184 ri->next = NULL; 248 ri->next = NULL;
185 ri->entry++; 249 ri->entry++;
186 read_unlock(&ls->ls_rsbtbl[i].lock); 250 read_unlock(&ls->ls_rsbtbl[i].lock);
251 dlm_put_rsb(old);
187 goto top; 252 goto top;
188 } 253 }
254 ri->rsb = list_entry(ri->next, struct dlm_rsb, res_hashchain);
255 dlm_hold_rsb(ri->rsb);
189 read_unlock(&ls->ls_rsbtbl[i].lock); 256 read_unlock(&ls->ls_rsbtbl[i].lock);
257 dlm_put_rsb(old);
190 } 258 }
191 ri->rsb = list_entry(ri->next, struct dlm_rsb, res_hashchain);
192 259
193 return 0; 260 return 0;
194} 261}
@@ -202,7 +269,7 @@ static struct rsb_iter *rsb_iter_init(struct dlm_ls *ls)
202{ 269{
203 struct rsb_iter *ri; 270 struct rsb_iter *ri;
204 271
205 ri = kmalloc(sizeof *ri, GFP_KERNEL); 272 ri = kzalloc(sizeof *ri, GFP_KERNEL);
206 if (!ri) 273 if (!ri)
207 return NULL; 274 return NULL;
208 275
@@ -260,7 +327,17 @@ static int rsb_seq_show(struct seq_file *file, void *iter_ptr)
260{ 327{
261 struct rsb_iter *ri = iter_ptr; 328 struct rsb_iter *ri = iter_ptr;
262 329
263 print_resource(ri->rsb, file); 330 if (ri->locks) {
331 if (ri->header) {
332 seq_printf(file, "id nodeid remid pid xid exflags flags "
333 "sts grmode rqmode time_ms r_nodeid "
334 "r_len r_name\n");
335 ri->header = 0;
336 }
337 print_locks(ri->rsb, file);
338 } else {
339 print_resource(ri->rsb, file);
340 }
264 341
265 return 0; 342 return 0;
266} 343}
@@ -296,6 +373,83 @@ static const struct file_operations rsb_fops = {
296}; 373};
297 374
298/* 375/*
376 * Dump state in compact per-lock listing
377 */
378
379static struct rsb_iter *locks_iter_init(struct dlm_ls *ls, loff_t *pos)
380{
381 struct rsb_iter *ri;
382
383 ri = kzalloc(sizeof *ri, GFP_KERNEL);
384 if (!ri)
385 return NULL;
386
387 ri->ls = ls;
388 ri->entry = 0;
389 ri->next = NULL;
390 ri->locks = 1;
391
392 if (*pos == 0)
393 ri->header = 1;
394
395 if (rsb_iter_next(ri)) {
396 rsb_iter_free(ri);
397 return NULL;
398 }
399
400 return ri;
401}
402
403static void *locks_seq_start(struct seq_file *file, loff_t *pos)
404{
405 struct rsb_iter *ri;
406 loff_t n = *pos;
407
408 ri = locks_iter_init(file->private, pos);
409 if (!ri)
410 return NULL;
411
412 while (n--) {
413 if (rsb_iter_next(ri)) {
414 rsb_iter_free(ri);
415 return NULL;
416 }
417 }
418
419 return ri;
420}
421
422static struct seq_operations locks_seq_ops = {
423 .start = locks_seq_start,
424 .next = rsb_seq_next,
425 .stop = rsb_seq_stop,
426 .show = rsb_seq_show,
427};
428
429static int locks_open(struct inode *inode, struct file *file)
430{
431 struct seq_file *seq;
432 int ret;
433
434 ret = seq_open(file, &locks_seq_ops);
435 if (ret)
436 return ret;
437
438 seq = file->private_data;
439 seq->private = inode->i_private;
440
441 return 0;
442}
443
444static const struct file_operations locks_fops = {
445 .owner = THIS_MODULE,
446 .open = locks_open,
447 .read = seq_read,
448 .llseek = seq_lseek,
449 .release = seq_release
450};
451
452/*
299 * dump lkb's on the ls_waiters list 453 * dump lkb's on the ls_waiters list
300 */ 454 */
301 455
@@ -362,6 +516,20 @@ int dlm_create_debug_file(struct dlm_ls *ls)
362 return -ENOMEM; 516 return -ENOMEM;
363 } 517 }
364 518
519 memset(name, 0, sizeof(name));
520 snprintf(name, DLM_LOCKSPACE_LEN+8, "%s_locks", ls->ls_name);
521
522 ls->ls_debug_locks_dentry = debugfs_create_file(name,
523 S_IFREG | S_IRUGO,
524 dlm_root,
525 ls,
526 &locks_fops);
527 if (!ls->ls_debug_locks_dentry) {
528 debugfs_remove(ls->ls_debug_waiters_dentry);
529 debugfs_remove(ls->ls_debug_rsb_dentry);
530 return -ENOMEM;
531 }
532
365 return 0; 533 return 0;
366} 534}
367 535
@@ -371,6 +539,8 @@ void dlm_delete_debug_file(struct dlm_ls *ls)
371 debugfs_remove(ls->ls_debug_rsb_dentry); 539 debugfs_remove(ls->ls_debug_rsb_dentry);
372 if (ls->ls_debug_waiters_dentry) 540 if (ls->ls_debug_waiters_dentry)
373 debugfs_remove(ls->ls_debug_waiters_dentry); 541 debugfs_remove(ls->ls_debug_waiters_dentry);
542 if (ls->ls_debug_locks_dentry)
543 debugfs_remove(ls->ls_debug_locks_dentry);
374} 544}
375 545
376int dlm_register_debugfs(void) 546int dlm_register_debugfs(void)
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 30994d68f6a0..74901e981e10 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -151,6 +151,7 @@ struct dlm_args {
151 void *bastaddr; 151 void *bastaddr;
152 int mode; 152 int mode;
153 struct dlm_lksb *lksb; 153 struct dlm_lksb *lksb;
154 unsigned long timeout;
154}; 155};
155 156
156 157
@@ -213,6 +214,9 @@ struct dlm_args {
213#define DLM_IFL_OVERLAP_UNLOCK 0x00080000 214#define DLM_IFL_OVERLAP_UNLOCK 0x00080000
214#define DLM_IFL_OVERLAP_CANCEL 0x00100000 215#define DLM_IFL_OVERLAP_CANCEL 0x00100000
215#define DLM_IFL_ENDOFLIFE 0x00200000 216#define DLM_IFL_ENDOFLIFE 0x00200000
217#define DLM_IFL_WATCH_TIMEWARN 0x00400000
218#define DLM_IFL_TIMEOUT_CANCEL 0x00800000
219#define DLM_IFL_DEADLOCK_CANCEL 0x01000000
216#define DLM_IFL_USER 0x00000001 220#define DLM_IFL_USER 0x00000001
217#define DLM_IFL_ORPHAN 0x00000002 221#define DLM_IFL_ORPHAN 0x00000002
218 222
@@ -243,6 +247,9 @@ struct dlm_lkb {
243 struct list_head lkb_wait_reply; /* waiting for remote reply */ 247 struct list_head lkb_wait_reply; /* waiting for remote reply */
244 struct list_head lkb_astqueue; /* need ast to be sent */ 248 struct list_head lkb_astqueue; /* need ast to be sent */
245 struct list_head lkb_ownqueue; /* list of locks for a process */ 249 struct list_head lkb_ownqueue; /* list of locks for a process */
250 struct list_head lkb_time_list;
251 unsigned long lkb_timestamp;
252 unsigned long lkb_timeout_cs;
246 253
247 char *lkb_lvbptr; 254 char *lkb_lvbptr;
248 struct dlm_lksb *lkb_lksb; /* caller's status block */ 255 struct dlm_lksb *lkb_lksb; /* caller's status block */
@@ -447,12 +454,16 @@ struct dlm_ls {
447 struct mutex ls_orphans_mutex; 454 struct mutex ls_orphans_mutex;
448 struct list_head ls_orphans; 455 struct list_head ls_orphans;
449 456
457 struct mutex ls_timeout_mutex;
458 struct list_head ls_timeout;
459
450 struct list_head ls_nodes; /* current nodes in ls */ 460 struct list_head ls_nodes; /* current nodes in ls */
451 struct list_head ls_nodes_gone; /* dead node list, recovery */ 461 struct list_head ls_nodes_gone; /* dead node list, recovery */
452 int ls_num_nodes; /* number of nodes in ls */ 462 int ls_num_nodes; /* number of nodes in ls */
453 int ls_low_nodeid; 463 int ls_low_nodeid;
454 int ls_total_weight; 464 int ls_total_weight;
455 int *ls_node_array; 465 int *ls_node_array;
466 gfp_t ls_allocation;
456 467
457 struct dlm_rsb ls_stub_rsb; /* for returning errors */ 468 struct dlm_rsb ls_stub_rsb; /* for returning errors */
458 struct dlm_lkb ls_stub_lkb; /* for returning errors */ 469 struct dlm_lkb ls_stub_lkb; /* for returning errors */
@@ -460,9 +471,12 @@ struct dlm_ls {
460 471
461 struct dentry *ls_debug_rsb_dentry; /* debugfs */ 472 struct dentry *ls_debug_rsb_dentry; /* debugfs */
462 struct dentry *ls_debug_waiters_dentry; /* debugfs */ 473 struct dentry *ls_debug_waiters_dentry; /* debugfs */
474 struct dentry *ls_debug_locks_dentry; /* debugfs */
463 475
464 wait_queue_head_t ls_uevent_wait; /* user part of join/leave */ 476 wait_queue_head_t ls_uevent_wait; /* user part of join/leave */
465 int ls_uevent_result; 477 int ls_uevent_result;
478 struct completion ls_members_done;
479 int ls_members_result;
466 480
467 struct miscdevice ls_device; 481 struct miscdevice ls_device;
468 482
@@ -472,6 +486,7 @@ struct dlm_ls {
472 struct task_struct *ls_recoverd_task; 486 struct task_struct *ls_recoverd_task;
473 struct mutex ls_recoverd_active; 487 struct mutex ls_recoverd_active;
474 spinlock_t ls_recover_lock; 488 spinlock_t ls_recover_lock;
489 unsigned long ls_recover_begin; /* jiffies timestamp */
475 uint32_t ls_recover_status; /* DLM_RS_ */ 490 uint32_t ls_recover_status; /* DLM_RS_ */
476 uint64_t ls_recover_seq; 491 uint64_t ls_recover_seq;
477 struct dlm_recover *ls_recover_args; 492 struct dlm_recover *ls_recover_args;
@@ -501,6 +516,7 @@ struct dlm_ls {
501#define LSFL_RCOM_READY 3 516#define LSFL_RCOM_READY 3
502#define LSFL_RCOM_WAIT 4 517#define LSFL_RCOM_WAIT 4
503#define LSFL_UEVENT_WAIT 5 518#define LSFL_UEVENT_WAIT 5
519#define LSFL_TIMEWARN 6
504 520
505/* much of this is just saving user space pointers associated with the 521/* much of this is just saving user space pointers associated with the
506 lock that we pass back to the user lib with an ast */ 522 lock that we pass back to the user lib with an ast */
@@ -518,6 +534,7 @@ struct dlm_user_args {
518 void __user *castaddr; 534 void __user *castaddr;
519 void __user *bastparam; 535 void __user *bastparam;
520 void __user *bastaddr; 536 void __user *bastaddr;
537 uint64_t xid;
521}; 538};
522 539
523#define DLM_PROC_FLAGS_CLOSING 1 540#define DLM_PROC_FLAGS_CLOSING 1
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index d8d6e729f96b..b455919c1998 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -82,10 +82,13 @@ static int send_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int mode);
82static int send_lookup(struct dlm_rsb *r, struct dlm_lkb *lkb); 82static int send_lookup(struct dlm_rsb *r, struct dlm_lkb *lkb);
83static int send_remove(struct dlm_rsb *r); 83static int send_remove(struct dlm_rsb *r);
84static int _request_lock(struct dlm_rsb *r, struct dlm_lkb *lkb); 84static int _request_lock(struct dlm_rsb *r, struct dlm_lkb *lkb);
85static int _cancel_lock(struct dlm_rsb *r, struct dlm_lkb *lkb);
85static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, 86static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
86 struct dlm_message *ms); 87 struct dlm_message *ms);
87static int receive_extralen(struct dlm_message *ms); 88static int receive_extralen(struct dlm_message *ms);
88static void do_purge(struct dlm_ls *ls, int nodeid, int pid); 89static void do_purge(struct dlm_ls *ls, int nodeid, int pid);
90static void del_timeout(struct dlm_lkb *lkb);
91void dlm_timeout_warn(struct dlm_lkb *lkb);
89 92
90/* 93/*
91 * Lock compatibilty matrix - thanks Steve 94 * Lock compatibilty matrix - thanks Steve
@@ -194,17 +197,17 @@ void dlm_dump_rsb(struct dlm_rsb *r)
194 197
195/* Threads cannot use the lockspace while it's being recovered */ 198/* Threads cannot use the lockspace while it's being recovered */
196 199
197static inline void lock_recovery(struct dlm_ls *ls) 200static inline void dlm_lock_recovery(struct dlm_ls *ls)
198{ 201{
199 down_read(&ls->ls_in_recovery); 202 down_read(&ls->ls_in_recovery);
200} 203}
201 204
202static inline void unlock_recovery(struct dlm_ls *ls) 205void dlm_unlock_recovery(struct dlm_ls *ls)
203{ 206{
204 up_read(&ls->ls_in_recovery); 207 up_read(&ls->ls_in_recovery);
205} 208}
206 209
207static inline int lock_recovery_try(struct dlm_ls *ls) 210int dlm_lock_recovery_try(struct dlm_ls *ls)
208{ 211{
209 return down_read_trylock(&ls->ls_in_recovery); 212 return down_read_trylock(&ls->ls_in_recovery);
210} 213}
@@ -286,8 +289,22 @@ static void queue_cast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv)
286 if (is_master_copy(lkb)) 289 if (is_master_copy(lkb))
287 return; 290 return;
288 291
292 del_timeout(lkb);
293
289 DLM_ASSERT(lkb->lkb_lksb, dlm_print_lkb(lkb);); 294 DLM_ASSERT(lkb->lkb_lksb, dlm_print_lkb(lkb););
290 295
296 /* if the operation was a cancel, then return -DLM_ECANCEL, if a
297 timeout caused the cancel then return -ETIMEDOUT */
298 if (rv == -DLM_ECANCEL && (lkb->lkb_flags & DLM_IFL_TIMEOUT_CANCEL)) {
299 lkb->lkb_flags &= ~DLM_IFL_TIMEOUT_CANCEL;
300 rv = -ETIMEDOUT;
301 }
302
303 if (rv == -DLM_ECANCEL && (lkb->lkb_flags & DLM_IFL_DEADLOCK_CANCEL)) {
304 lkb->lkb_flags &= ~DLM_IFL_DEADLOCK_CANCEL;
305 rv = -EDEADLK;
306 }
307
291 lkb->lkb_lksb->sb_status = rv; 308 lkb->lkb_lksb->sb_status = rv;
292 lkb->lkb_lksb->sb_flags = lkb->lkb_sbflags; 309 lkb->lkb_lksb->sb_flags = lkb->lkb_sbflags;
293 310
@@ -581,6 +598,7 @@ static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret)
581 kref_init(&lkb->lkb_ref); 598 kref_init(&lkb->lkb_ref);
582 INIT_LIST_HEAD(&lkb->lkb_ownqueue); 599 INIT_LIST_HEAD(&lkb->lkb_ownqueue);
583 INIT_LIST_HEAD(&lkb->lkb_rsb_lookup); 600 INIT_LIST_HEAD(&lkb->lkb_rsb_lookup);
601 INIT_LIST_HEAD(&lkb->lkb_time_list);
584 602
585 get_random_bytes(&bucket, sizeof(bucket)); 603 get_random_bytes(&bucket, sizeof(bucket));
586 bucket &= (ls->ls_lkbtbl_size - 1); 604 bucket &= (ls->ls_lkbtbl_size - 1);
@@ -985,15 +1003,136 @@ void dlm_scan_rsbs(struct dlm_ls *ls)
985{ 1003{
986 int i; 1004 int i;
987 1005
988 if (dlm_locking_stopped(ls))
989 return;
990
991 for (i = 0; i < ls->ls_rsbtbl_size; i++) { 1006 for (i = 0; i < ls->ls_rsbtbl_size; i++) {
992 shrink_bucket(ls, i); 1007 shrink_bucket(ls, i);
1008 if (dlm_locking_stopped(ls))
1009 break;
993 cond_resched(); 1010 cond_resched();
994 } 1011 }
995} 1012}
996 1013
1014static void add_timeout(struct dlm_lkb *lkb)
1015{
1016 struct dlm_ls *ls = lkb->lkb_resource->res_ls;
1017
1018 if (is_master_copy(lkb)) {
1019 lkb->lkb_timestamp = jiffies;
1020 return;
1021 }
1022
1023 if (test_bit(LSFL_TIMEWARN, &ls->ls_flags) &&
1024 !(lkb->lkb_exflags & DLM_LKF_NODLCKWT)) {
1025 lkb->lkb_flags |= DLM_IFL_WATCH_TIMEWARN;
1026 goto add_it;
1027 }
1028 if (lkb->lkb_exflags & DLM_LKF_TIMEOUT)
1029 goto add_it;
1030 return;
1031
1032 add_it:
1033 DLM_ASSERT(list_empty(&lkb->lkb_time_list), dlm_print_lkb(lkb););
1034 mutex_lock(&ls->ls_timeout_mutex);
1035 hold_lkb(lkb);
1036 lkb->lkb_timestamp = jiffies;
1037 list_add_tail(&lkb->lkb_time_list, &ls->ls_timeout);
1038 mutex_unlock(&ls->ls_timeout_mutex);
1039}
1040
1041static void del_timeout(struct dlm_lkb *lkb)
1042{
1043 struct dlm_ls *ls = lkb->lkb_resource->res_ls;
1044
1045 mutex_lock(&ls->ls_timeout_mutex);
1046 if (!list_empty(&lkb->lkb_time_list)) {
1047 list_del_init(&lkb->lkb_time_list);
1048 unhold_lkb(lkb);
1049 }
1050 mutex_unlock(&ls->ls_timeout_mutex);
1051}
1052
1053/* FIXME: is it safe to look at lkb_exflags, lkb_flags, lkb_timestamp, and
1054 lkb_lksb_timeout without lock_rsb? Note: we can't lock timeout_mutex
1055 and then lock rsb because of lock ordering in add_timeout. We may need
1056 to specify some special timeout-related bits in the lkb that are just to
1057 be accessed under the timeout_mutex. */
1058
1059void dlm_scan_timeout(struct dlm_ls *ls)
1060{
1061 struct dlm_rsb *r;
1062 struct dlm_lkb *lkb;
1063 int do_cancel, do_warn;
1064
1065 for (;;) {
1066 if (dlm_locking_stopped(ls))
1067 break;
1068
1069 do_cancel = 0;
1070 do_warn = 0;
1071 mutex_lock(&ls->ls_timeout_mutex);
1072 list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list) {
1073
1074 if ((lkb->lkb_exflags & DLM_LKF_TIMEOUT) &&
1075 time_after_eq(jiffies, lkb->lkb_timestamp +
1076 lkb->lkb_timeout_cs * HZ/100))
1077 do_cancel = 1;
1078
1079 if ((lkb->lkb_flags & DLM_IFL_WATCH_TIMEWARN) &&
1080 time_after_eq(jiffies, lkb->lkb_timestamp +
1081 dlm_config.ci_timewarn_cs * HZ/100))
1082 do_warn = 1;
1083
1084 if (!do_cancel && !do_warn)
1085 continue;
1086 hold_lkb(lkb);
1087 break;
1088 }
1089 mutex_unlock(&ls->ls_timeout_mutex);
1090
1091 if (!do_cancel && !do_warn)
1092 break;
1093
1094 r = lkb->lkb_resource;
1095 hold_rsb(r);
1096 lock_rsb(r);
1097
1098 if (do_warn) {
1099 /* clear flag so we only warn once */
1100 lkb->lkb_flags &= ~DLM_IFL_WATCH_TIMEWARN;
1101 if (!(lkb->lkb_exflags & DLM_LKF_TIMEOUT))
1102 del_timeout(lkb);
1103 dlm_timeout_warn(lkb);
1104 }
1105
1106 if (do_cancel) {
1107 log_debug(ls, "timeout cancel %x node %d %s",
1108 lkb->lkb_id, lkb->lkb_nodeid, r->res_name);
1109 lkb->lkb_flags &= ~DLM_IFL_WATCH_TIMEWARN;
1110 lkb->lkb_flags |= DLM_IFL_TIMEOUT_CANCEL;
1111 del_timeout(lkb);
1112 _cancel_lock(r, lkb);
1113 }
1114
1115 unlock_rsb(r);
1116 unhold_rsb(r);
1117 dlm_put_lkb(lkb);
1118 }
1119}
1120
1121/* This is only called by dlm_recoverd, and we rely on dlm_ls_stop() stopping
1122 dlm_recoverd before checking/setting ls_recover_begin. */
1123
1124void dlm_adjust_timeouts(struct dlm_ls *ls)
1125{
1126 struct dlm_lkb *lkb;
1127 long adj = jiffies - ls->ls_recover_begin;
1128
1129 ls->ls_recover_begin = 0;
1130 mutex_lock(&ls->ls_timeout_mutex);
1131 list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list)
1132 lkb->lkb_timestamp += adj;
1133 mutex_unlock(&ls->ls_timeout_mutex);
1134}
1135
997/* lkb is master or local copy */ 1136/* lkb is master or local copy */
998 1137
999static void set_lvb_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) 1138static void set_lvb_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
@@ -1275,10 +1414,8 @@ static int queue_conflict(struct list_head *head, struct dlm_lkb *lkb)
1275 * queue for one resource. The granted mode of each lock blocks the requested 1414 * queue for one resource. The granted mode of each lock blocks the requested
1276 * mode of the other lock." 1415 * mode of the other lock."
1277 * 1416 *
1278 * Part 2: if the granted mode of lkb is preventing the first lkb in the 1417 * Part 2: if the granted mode of lkb is preventing an earlier lkb in the
1279 * convert queue from being granted, then demote lkb (set grmode to NL). 1418 * convert queue from being granted, then deadlk/demote lkb.
1280 * This second form requires that we check for conv-deadlk even when
1281 * now == 0 in _can_be_granted().
1282 * 1419 *
1283 * Example: 1420 * Example:
1284 * Granted Queue: empty 1421 * Granted Queue: empty
@@ -1287,41 +1424,52 @@ static int queue_conflict(struct list_head *head, struct dlm_lkb *lkb)
1287 * 1424 *
1288 * The first lock can't be granted because of the granted mode of the second 1425 * The first lock can't be granted because of the granted mode of the second
1289 * lock and the second lock can't be granted because it's not first in the 1426 * lock and the second lock can't be granted because it's not first in the
1290 * list. We demote the granted mode of the second lock (the lkb passed to this 1427 * list. We either cancel lkb's conversion (PR->EX) and return EDEADLK, or we
1291 * function). 1428 * demote the granted mode of lkb (from PR to NL) if it has the CONVDEADLK
1429 * flag set and return DEMOTED in the lksb flags.
1430 *
1431 * Originally, this function detected conv-deadlk in a more limited scope:
1432 * - if !modes_compat(lkb1, lkb2) && !modes_compat(lkb2, lkb1), or
1433 * - if lkb1 was the first entry in the queue (not just earlier), and was
1434 * blocked by the granted mode of lkb2, and there was nothing on the
1435 * granted queue preventing lkb1 from being granted immediately, i.e.
1436 * lkb2 was the only thing preventing lkb1 from being granted.
1437 *
1438 * That second condition meant we'd only say there was conv-deadlk if
1439 * resolving it (by demotion) would lead to the first lock on the convert
1440 * queue being granted right away. It allowed conversion deadlocks to exist
1441 * between locks on the convert queue while they couldn't be granted anyway.
1292 * 1442 *
1293 * After the resolution, the "grant pending" function needs to go back and try 1443 * Now, we detect and take action on conversion deadlocks immediately when
1294 * to grant locks on the convert queue again since the first lock can now be 1444 * they're created, even if they may not be immediately consequential. If
1295 * granted. 1445 * lkb1 exists anywhere in the convert queue and lkb2 comes in with a granted
1446 * mode that would prevent lkb1's conversion from being granted, we do a
1447 * deadlk/demote on lkb2 right away and don't let it onto the convert queue.
1448 * I think this means that the lkb_is_ahead condition below should always
1449 * be zero, i.e. there will never be conv-deadlk between two locks that are
1450 * both already on the convert queue.
1296 */ 1451 */
1297 1452
1298static int conversion_deadlock_detect(struct dlm_rsb *rsb, struct dlm_lkb *lkb) 1453static int conversion_deadlock_detect(struct dlm_rsb *r, struct dlm_lkb *lkb2)
1299{ 1454{
1300 struct dlm_lkb *this, *first = NULL, *self = NULL; 1455 struct dlm_lkb *lkb1;
1456 int lkb_is_ahead = 0;
1301 1457
1302 list_for_each_entry(this, &rsb->res_convertqueue, lkb_statequeue) { 1458 list_for_each_entry(lkb1, &r->res_convertqueue, lkb_statequeue) {
1303 if (!first) 1459 if (lkb1 == lkb2) {
1304 first = this; 1460 lkb_is_ahead = 1;
1305 if (this == lkb) {
1306 self = lkb;
1307 continue; 1461 continue;
1308 } 1462 }
1309 1463
1310 if (!modes_compat(this, lkb) && !modes_compat(lkb, this)) 1464 if (!lkb_is_ahead) {
1311 return 1; 1465 if (!modes_compat(lkb2, lkb1))
1312 } 1466 return 1;
1313 1467 } else {
1314 /* if lkb is on the convert queue and is preventing the first 1468 if (!modes_compat(lkb2, lkb1) &&
1315 from being granted, then there's deadlock and we demote lkb. 1469 !modes_compat(lkb1, lkb2))
1316 multiple converting locks may need to do this before the first 1470 return 1;
1317 converting lock can be granted. */ 1471 }
1318
1319 if (self && self != first) {
1320 if (!modes_compat(lkb, first) &&
1321 !queue_conflict(&rsb->res_grantqueue, first))
1322 return 1;
1323 } 1472 }
1324
1325 return 0; 1473 return 0;
1326} 1474}
1327 1475
@@ -1450,42 +1598,57 @@ static int _can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now)
1450 if (!now && !conv && list_empty(&r->res_convertqueue) && 1598 if (!now && !conv && list_empty(&r->res_convertqueue) &&
1451 first_in_list(lkb, &r->res_waitqueue)) 1599 first_in_list(lkb, &r->res_waitqueue))
1452 return 1; 1600 return 1;
1453
1454 out: 1601 out:
1455 /*
1456 * The following, enabled by CONVDEADLK, departs from VMS.
1457 */
1458
1459 if (conv && (lkb->lkb_exflags & DLM_LKF_CONVDEADLK) &&
1460 conversion_deadlock_detect(r, lkb)) {
1461 lkb->lkb_grmode = DLM_LOCK_NL;
1462 lkb->lkb_sbflags |= DLM_SBF_DEMOTED;
1463 }
1464
1465 return 0; 1602 return 0;
1466} 1603}
1467 1604
1468/* 1605static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now,
1469 * The ALTPR and ALTCW flags aren't traditional lock manager flags, but are a 1606 int *err)
1470 * simple way to provide a big optimization to applications that can use them.
1471 */
1472
1473static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now)
1474{ 1607{
1475 uint32_t flags = lkb->lkb_exflags;
1476 int rv; 1608 int rv;
1477 int8_t alt = 0, rqmode = lkb->lkb_rqmode; 1609 int8_t alt = 0, rqmode = lkb->lkb_rqmode;
1610 int8_t is_convert = (lkb->lkb_grmode != DLM_LOCK_IV);
1611
1612 if (err)
1613 *err = 0;
1478 1614
1479 rv = _can_be_granted(r, lkb, now); 1615 rv = _can_be_granted(r, lkb, now);
1480 if (rv) 1616 if (rv)
1481 goto out; 1617 goto out;
1482 1618
1483 if (lkb->lkb_sbflags & DLM_SBF_DEMOTED) 1619 /*
1620 * The CONVDEADLK flag is non-standard and tells the dlm to resolve
1621 * conversion deadlocks by demoting grmode to NL, otherwise the dlm
1622 * cancels one of the locks.
1623 */
1624
1625 if (is_convert && can_be_queued(lkb) &&
1626 conversion_deadlock_detect(r, lkb)) {
1627 if (lkb->lkb_exflags & DLM_LKF_CONVDEADLK) {
1628 lkb->lkb_grmode = DLM_LOCK_NL;
1629 lkb->lkb_sbflags |= DLM_SBF_DEMOTED;
1630 } else if (!(lkb->lkb_exflags & DLM_LKF_NODLCKWT)) {
1631 if (err)
1632 *err = -EDEADLK;
1633 else {
1634 log_print("can_be_granted deadlock %x now %d",
1635 lkb->lkb_id, now);
1636 dlm_dump_rsb(r);
1637 }
1638 }
1484 goto out; 1639 goto out;
1640 }
1485 1641
1486 if (rqmode != DLM_LOCK_PR && flags & DLM_LKF_ALTPR) 1642 /*
1643 * The ALTPR and ALTCW flags are non-standard and tell the dlm to try
1644 * to grant a request in a mode other than the normal rqmode. It's a
1645 * simple way to provide a big optimization to applications that can
1646 * use them.
1647 */
1648
1649 if (rqmode != DLM_LOCK_PR && (lkb->lkb_exflags & DLM_LKF_ALTPR))
1487 alt = DLM_LOCK_PR; 1650 alt = DLM_LOCK_PR;
1488 else if (rqmode != DLM_LOCK_CW && flags & DLM_LKF_ALTCW) 1651 else if (rqmode != DLM_LOCK_CW && (lkb->lkb_exflags & DLM_LKF_ALTCW))
1489 alt = DLM_LOCK_CW; 1652 alt = DLM_LOCK_CW;
1490 1653
1491 if (alt) { 1654 if (alt) {
@@ -1500,10 +1663,20 @@ static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now)
1500 return rv; 1663 return rv;
1501} 1664}
1502 1665
1666/* FIXME: I don't think that can_be_granted() can/will demote or find deadlock
1667 for locks pending on the convert list. Once verified (watch for these
1668 log_prints), we should be able to just call _can_be_granted() and not
1669 bother with the demote/deadlk cases here (and there's no easy way to deal
1670 with a deadlk here, we'd have to generate something like grant_lock with
1671 the deadlk error.) */
1672
1673/* returns the highest requested mode of all blocked conversions */
1674
1503static int grant_pending_convert(struct dlm_rsb *r, int high) 1675static int grant_pending_convert(struct dlm_rsb *r, int high)
1504{ 1676{
1505 struct dlm_lkb *lkb, *s; 1677 struct dlm_lkb *lkb, *s;
1506 int hi, demoted, quit, grant_restart, demote_restart; 1678 int hi, demoted, quit, grant_restart, demote_restart;
1679 int deadlk;
1507 1680
1508 quit = 0; 1681 quit = 0;
1509 restart: 1682 restart:
@@ -1513,14 +1686,29 @@ static int grant_pending_convert(struct dlm_rsb *r, int high)
1513 1686
1514 list_for_each_entry_safe(lkb, s, &r->res_convertqueue, lkb_statequeue) { 1687 list_for_each_entry_safe(lkb, s, &r->res_convertqueue, lkb_statequeue) {
1515 demoted = is_demoted(lkb); 1688 demoted = is_demoted(lkb);
1516 if (can_be_granted(r, lkb, 0)) { 1689 deadlk = 0;
1690
1691 if (can_be_granted(r, lkb, 0, &deadlk)) {
1517 grant_lock_pending(r, lkb); 1692 grant_lock_pending(r, lkb);
1518 grant_restart = 1; 1693 grant_restart = 1;
1519 } else { 1694 continue;
1520 hi = max_t(int, lkb->lkb_rqmode, hi);
1521 if (!demoted && is_demoted(lkb))
1522 demote_restart = 1;
1523 } 1695 }
1696
1697 if (!demoted && is_demoted(lkb)) {
1698 log_print("WARN: pending demoted %x node %d %s",
1699 lkb->lkb_id, lkb->lkb_nodeid, r->res_name);
1700 demote_restart = 1;
1701 continue;
1702 }
1703
1704 if (deadlk) {
1705 log_print("WARN: pending deadlock %x node %d %s",
1706 lkb->lkb_id, lkb->lkb_nodeid, r->res_name);
1707 dlm_dump_rsb(r);
1708 continue;
1709 }
1710
1711 hi = max_t(int, lkb->lkb_rqmode, hi);
1524 } 1712 }
1525 1713
1526 if (grant_restart) 1714 if (grant_restart)
@@ -1538,7 +1726,7 @@ static int grant_pending_wait(struct dlm_rsb *r, int high)
1538 struct dlm_lkb *lkb, *s; 1726 struct dlm_lkb *lkb, *s;
1539 1727
1540 list_for_each_entry_safe(lkb, s, &r->res_waitqueue, lkb_statequeue) { 1728 list_for_each_entry_safe(lkb, s, &r->res_waitqueue, lkb_statequeue) {
1541 if (can_be_granted(r, lkb, 0)) 1729 if (can_be_granted(r, lkb, 0, NULL))
1542 grant_lock_pending(r, lkb); 1730 grant_lock_pending(r, lkb);
1543 else 1731 else
1544 high = max_t(int, lkb->lkb_rqmode, high); 1732 high = max_t(int, lkb->lkb_rqmode, high);
@@ -1733,7 +1921,7 @@ static void confirm_master(struct dlm_rsb *r, int error)
1733} 1921}
1734 1922
1735static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags, 1923static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags,
1736 int namelen, uint32_t parent_lkid, void *ast, 1924 int namelen, unsigned long timeout_cs, void *ast,
1737 void *astarg, void *bast, struct dlm_args *args) 1925 void *astarg, void *bast, struct dlm_args *args)
1738{ 1926{
1739 int rv = -EINVAL; 1927 int rv = -EINVAL;
@@ -1776,10 +1964,6 @@ static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags,
1776 if (flags & DLM_LKF_VALBLK && !lksb->sb_lvbptr) 1964 if (flags & DLM_LKF_VALBLK && !lksb->sb_lvbptr)
1777 goto out; 1965 goto out;
1778 1966
1779 /* parent/child locks not yet supported */
1780 if (parent_lkid)
1781 goto out;
1782
1783 if (flags & DLM_LKF_CONVERT && !lksb->sb_lkid) 1967 if (flags & DLM_LKF_CONVERT && !lksb->sb_lkid)
1784 goto out; 1968 goto out;
1785 1969
@@ -1791,6 +1975,7 @@ static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags,
1791 args->astaddr = ast; 1975 args->astaddr = ast;
1792 args->astparam = (long) astarg; 1976 args->astparam = (long) astarg;
1793 args->bastaddr = bast; 1977 args->bastaddr = bast;
1978 args->timeout = timeout_cs;
1794 args->mode = mode; 1979 args->mode = mode;
1795 args->lksb = lksb; 1980 args->lksb = lksb;
1796 rv = 0; 1981 rv = 0;
@@ -1845,6 +2030,7 @@ static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
1845 lkb->lkb_lksb = args->lksb; 2030 lkb->lkb_lksb = args->lksb;
1846 lkb->lkb_lvbptr = args->lksb->sb_lvbptr; 2031 lkb->lkb_lvbptr = args->lksb->sb_lvbptr;
1847 lkb->lkb_ownpid = (int) current->pid; 2032 lkb->lkb_ownpid = (int) current->pid;
2033 lkb->lkb_timeout_cs = args->timeout;
1848 rv = 0; 2034 rv = 0;
1849 out: 2035 out:
1850 return rv; 2036 return rv;
@@ -1903,6 +2089,9 @@ static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args)
1903 if (is_overlap(lkb)) 2089 if (is_overlap(lkb))
1904 goto out; 2090 goto out;
1905 2091
2092 /* don't let scand try to do a cancel */
2093 del_timeout(lkb);
2094
1906 if (lkb->lkb_flags & DLM_IFL_RESEND) { 2095 if (lkb->lkb_flags & DLM_IFL_RESEND) {
1907 lkb->lkb_flags |= DLM_IFL_OVERLAP_CANCEL; 2096 lkb->lkb_flags |= DLM_IFL_OVERLAP_CANCEL;
1908 rv = -EBUSY; 2097 rv = -EBUSY;
@@ -1934,6 +2123,9 @@ static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args)
1934 if (is_overlap_unlock(lkb)) 2123 if (is_overlap_unlock(lkb))
1935 goto out; 2124 goto out;
1936 2125
2126 /* don't let scand try to do a cancel */
2127 del_timeout(lkb);
2128
1937 if (lkb->lkb_flags & DLM_IFL_RESEND) { 2129 if (lkb->lkb_flags & DLM_IFL_RESEND) {
1938 lkb->lkb_flags |= DLM_IFL_OVERLAP_UNLOCK; 2130 lkb->lkb_flags |= DLM_IFL_OVERLAP_UNLOCK;
1939 rv = -EBUSY; 2131 rv = -EBUSY;
@@ -1984,7 +2176,7 @@ static int do_request(struct dlm_rsb *r, struct dlm_lkb *lkb)
1984{ 2176{
1985 int error = 0; 2177 int error = 0;
1986 2178
1987 if (can_be_granted(r, lkb, 1)) { 2179 if (can_be_granted(r, lkb, 1, NULL)) {
1988 grant_lock(r, lkb); 2180 grant_lock(r, lkb);
1989 queue_cast(r, lkb, 0); 2181 queue_cast(r, lkb, 0);
1990 goto out; 2182 goto out;
@@ -1994,6 +2186,7 @@ static int do_request(struct dlm_rsb *r, struct dlm_lkb *lkb)
1994 error = -EINPROGRESS; 2186 error = -EINPROGRESS;
1995 add_lkb(r, lkb, DLM_LKSTS_WAITING); 2187 add_lkb(r, lkb, DLM_LKSTS_WAITING);
1996 send_blocking_asts(r, lkb); 2188 send_blocking_asts(r, lkb);
2189 add_timeout(lkb);
1997 goto out; 2190 goto out;
1998 } 2191 }
1999 2192
@@ -2009,16 +2202,32 @@ static int do_request(struct dlm_rsb *r, struct dlm_lkb *lkb)
2009static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb) 2202static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
2010{ 2203{
2011 int error = 0; 2204 int error = 0;
2205 int deadlk = 0;
2012 2206
2013 /* changing an existing lock may allow others to be granted */ 2207 /* changing an existing lock may allow others to be granted */
2014 2208
2015 if (can_be_granted(r, lkb, 1)) { 2209 if (can_be_granted(r, lkb, 1, &deadlk)) {
2016 grant_lock(r, lkb); 2210 grant_lock(r, lkb);
2017 queue_cast(r, lkb, 0); 2211 queue_cast(r, lkb, 0);
2018 grant_pending_locks(r); 2212 grant_pending_locks(r);
2019 goto out; 2213 goto out;
2020 } 2214 }
2021 2215
2216 /* can_be_granted() detected that this lock would block in a conversion
2217 deadlock, so we leave it on the granted queue and return EDEADLK in
2218 the ast for the convert. */
2219
2220 if (deadlk) {
2221 /* it's left on the granted queue */
2222 log_debug(r->res_ls, "deadlock %x node %d sts%d g%d r%d %s",
2223 lkb->lkb_id, lkb->lkb_nodeid, lkb->lkb_status,
2224 lkb->lkb_grmode, lkb->lkb_rqmode, r->res_name);
2225 revert_lock(r, lkb);
2226 queue_cast(r, lkb, -EDEADLK);
2227 error = -EDEADLK;
2228 goto out;
2229 }
2230
2022 /* is_demoted() means the can_be_granted() above set the grmode 2231 /* is_demoted() means the can_be_granted() above set the grmode
2023 to NL, and left us on the granted queue. This auto-demotion 2232 to NL, and left us on the granted queue. This auto-demotion
2024 (due to CONVDEADLK) might mean other locks, and/or this lock, are 2233 (due to CONVDEADLK) might mean other locks, and/or this lock, are
@@ -2041,6 +2250,7 @@ static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
2041 del_lkb(r, lkb); 2250 del_lkb(r, lkb);
2042 add_lkb(r, lkb, DLM_LKSTS_CONVERT); 2251 add_lkb(r, lkb, DLM_LKSTS_CONVERT);
2043 send_blocking_asts(r, lkb); 2252 send_blocking_asts(r, lkb);
2253 add_timeout(lkb);
2044 goto out; 2254 goto out;
2045 } 2255 }
2046 2256
@@ -2274,7 +2484,7 @@ int dlm_lock(dlm_lockspace_t *lockspace,
2274 if (!ls) 2484 if (!ls)
2275 return -EINVAL; 2485 return -EINVAL;
2276 2486
2277 lock_recovery(ls); 2487 dlm_lock_recovery(ls);
2278 2488
2279 if (convert) 2489 if (convert)
2280 error = find_lkb(ls, lksb->sb_lkid, &lkb); 2490 error = find_lkb(ls, lksb->sb_lkid, &lkb);
@@ -2284,7 +2494,7 @@ int dlm_lock(dlm_lockspace_t *lockspace,
2284 if (error) 2494 if (error)
2285 goto out; 2495 goto out;
2286 2496
2287 error = set_lock_args(mode, lksb, flags, namelen, parent_lkid, ast, 2497 error = set_lock_args(mode, lksb, flags, namelen, 0, ast,
2288 astarg, bast, &args); 2498 astarg, bast, &args);
2289 if (error) 2499 if (error)
2290 goto out_put; 2500 goto out_put;
@@ -2299,10 +2509,10 @@ int dlm_lock(dlm_lockspace_t *lockspace,
2299 out_put: 2509 out_put:
2300 if (convert || error) 2510 if (convert || error)
2301 __put_lkb(ls, lkb); 2511 __put_lkb(ls, lkb);
2302 if (error == -EAGAIN) 2512 if (error == -EAGAIN || error == -EDEADLK)
2303 error = 0; 2513 error = 0;
2304 out: 2514 out:
2305 unlock_recovery(ls); 2515 dlm_unlock_recovery(ls);
2306 dlm_put_lockspace(ls); 2516 dlm_put_lockspace(ls);
2307 return error; 2517 return error;
2308} 2518}
@@ -2322,7 +2532,7 @@ int dlm_unlock(dlm_lockspace_t *lockspace,
2322 if (!ls) 2532 if (!ls)
2323 return -EINVAL; 2533 return -EINVAL;
2324 2534
2325 lock_recovery(ls); 2535 dlm_lock_recovery(ls);
2326 2536
2327 error = find_lkb(ls, lkid, &lkb); 2537 error = find_lkb(ls, lkid, &lkb);
2328 if (error) 2538 if (error)
@@ -2344,7 +2554,7 @@ int dlm_unlock(dlm_lockspace_t *lockspace,
2344 out_put: 2554 out_put:
2345 dlm_put_lkb(lkb); 2555 dlm_put_lkb(lkb);
2346 out: 2556 out:
2347 unlock_recovery(ls); 2557 dlm_unlock_recovery(ls);
2348 dlm_put_lockspace(ls); 2558 dlm_put_lockspace(ls);
2349 return error; 2559 return error;
2350} 2560}
@@ -2384,7 +2594,7 @@ static int _create_message(struct dlm_ls *ls, int mb_len,
2384 pass into lowcomms_commit and a message buffer (mb) that we 2594 pass into lowcomms_commit and a message buffer (mb) that we
2385 write our data into */ 2595 write our data into */
2386 2596
2387 mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, GFP_KERNEL, &mb); 2597 mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, ls->ls_allocation, &mb);
2388 if (!mh) 2598 if (!mh)
2389 return -ENOBUFS; 2599 return -ENOBUFS;
2390 2600
@@ -3111,9 +3321,10 @@ static void receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms)
3111 lkb->lkb_remid = ms->m_lkid; 3321 lkb->lkb_remid = ms->m_lkid;
3112 if (is_altmode(lkb)) 3322 if (is_altmode(lkb))
3113 munge_altmode(lkb, ms); 3323 munge_altmode(lkb, ms);
3114 if (result) 3324 if (result) {
3115 add_lkb(r, lkb, DLM_LKSTS_WAITING); 3325 add_lkb(r, lkb, DLM_LKSTS_WAITING);
3116 else { 3326 add_timeout(lkb);
3327 } else {
3117 grant_lock_pc(r, lkb, ms); 3328 grant_lock_pc(r, lkb, ms);
3118 queue_cast(r, lkb, 0); 3329 queue_cast(r, lkb, 0);
3119 } 3330 }
@@ -3172,6 +3383,12 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
3172 queue_cast(r, lkb, -EAGAIN); 3383 queue_cast(r, lkb, -EAGAIN);
3173 break; 3384 break;
3174 3385
3386 case -EDEADLK:
3387 receive_flags_reply(lkb, ms);
3388 revert_lock_pc(r, lkb);
3389 queue_cast(r, lkb, -EDEADLK);
3390 break;
3391
3175 case -EINPROGRESS: 3392 case -EINPROGRESS:
3176 /* convert was queued on remote master */ 3393 /* convert was queued on remote master */
3177 receive_flags_reply(lkb, ms); 3394 receive_flags_reply(lkb, ms);
@@ -3179,6 +3396,7 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
3179 munge_demoted(lkb, ms); 3396 munge_demoted(lkb, ms);
3180 del_lkb(r, lkb); 3397 del_lkb(r, lkb);
3181 add_lkb(r, lkb, DLM_LKSTS_CONVERT); 3398 add_lkb(r, lkb, DLM_LKSTS_CONVERT);
3399 add_timeout(lkb);
3182 break; 3400 break;
3183 3401
3184 case 0: 3402 case 0:
@@ -3298,8 +3516,7 @@ static void _receive_cancel_reply(struct dlm_lkb *lkb, struct dlm_message *ms)
3298 case -DLM_ECANCEL: 3516 case -DLM_ECANCEL:
3299 receive_flags_reply(lkb, ms); 3517 receive_flags_reply(lkb, ms);
3300 revert_lock_pc(r, lkb); 3518 revert_lock_pc(r, lkb);
3301 if (ms->m_result) 3519 queue_cast(r, lkb, -DLM_ECANCEL);
3302 queue_cast(r, lkb, -DLM_ECANCEL);
3303 break; 3520 break;
3304 case 0: 3521 case 0:
3305 break; 3522 break;
@@ -3424,7 +3641,7 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery)
3424 } 3641 }
3425 } 3642 }
3426 3643
3427 if (lock_recovery_try(ls)) 3644 if (dlm_lock_recovery_try(ls))
3428 break; 3645 break;
3429 schedule(); 3646 schedule();
3430 } 3647 }
@@ -3503,7 +3720,7 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery)
3503 log_error(ls, "unknown message type %d", ms->m_type); 3720 log_error(ls, "unknown message type %d", ms->m_type);
3504 } 3721 }
3505 3722
3506 unlock_recovery(ls); 3723 dlm_unlock_recovery(ls);
3507 out: 3724 out:
3508 dlm_put_lockspace(ls); 3725 dlm_put_lockspace(ls);
3509 dlm_astd_wake(); 3726 dlm_astd_wake();
@@ -4034,13 +4251,13 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc)
4034 4251
4035int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, 4252int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
4036 int mode, uint32_t flags, void *name, unsigned int namelen, 4253 int mode, uint32_t flags, void *name, unsigned int namelen,
4037 uint32_t parent_lkid) 4254 unsigned long timeout_cs)
4038{ 4255{
4039 struct dlm_lkb *lkb; 4256 struct dlm_lkb *lkb;
4040 struct dlm_args args; 4257 struct dlm_args args;
4041 int error; 4258 int error;
4042 4259
4043 lock_recovery(ls); 4260 dlm_lock_recovery(ls);
4044 4261
4045 error = create_lkb(ls, &lkb); 4262 error = create_lkb(ls, &lkb);
4046 if (error) { 4263 if (error) {
@@ -4062,7 +4279,7 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
4062 When DLM_IFL_USER is set, the dlm knows that this is a userspace 4279 When DLM_IFL_USER is set, the dlm knows that this is a userspace
4063 lock and that lkb_astparam is the dlm_user_args structure. */ 4280 lock and that lkb_astparam is the dlm_user_args structure. */
4064 4281
4065 error = set_lock_args(mode, &ua->lksb, flags, namelen, parent_lkid, 4282 error = set_lock_args(mode, &ua->lksb, flags, namelen, timeout_cs,
4066 DLM_FAKE_USER_AST, ua, DLM_FAKE_USER_AST, &args); 4283 DLM_FAKE_USER_AST, ua, DLM_FAKE_USER_AST, &args);
4067 lkb->lkb_flags |= DLM_IFL_USER; 4284 lkb->lkb_flags |= DLM_IFL_USER;
4068 ua->old_mode = DLM_LOCK_IV; 4285 ua->old_mode = DLM_LOCK_IV;
@@ -4094,19 +4311,20 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
4094 list_add_tail(&lkb->lkb_ownqueue, &ua->proc->locks); 4311 list_add_tail(&lkb->lkb_ownqueue, &ua->proc->locks);
4095 spin_unlock(&ua->proc->locks_spin); 4312 spin_unlock(&ua->proc->locks_spin);
4096 out: 4313 out:
4097 unlock_recovery(ls); 4314 dlm_unlock_recovery(ls);
4098 return error; 4315 return error;
4099} 4316}
4100 4317
4101int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, 4318int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
4102 int mode, uint32_t flags, uint32_t lkid, char *lvb_in) 4319 int mode, uint32_t flags, uint32_t lkid, char *lvb_in,
4320 unsigned long timeout_cs)
4103{ 4321{
4104 struct dlm_lkb *lkb; 4322 struct dlm_lkb *lkb;
4105 struct dlm_args args; 4323 struct dlm_args args;
4106 struct dlm_user_args *ua; 4324 struct dlm_user_args *ua;
4107 int error; 4325 int error;
4108 4326
4109 lock_recovery(ls); 4327 dlm_lock_recovery(ls);
4110 4328
4111 error = find_lkb(ls, lkid, &lkb); 4329 error = find_lkb(ls, lkid, &lkb);
4112 if (error) 4330 if (error)
@@ -4127,6 +4345,7 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
4127 if (lvb_in && ua->lksb.sb_lvbptr) 4345 if (lvb_in && ua->lksb.sb_lvbptr)
4128 memcpy(ua->lksb.sb_lvbptr, lvb_in, DLM_USER_LVB_LEN); 4346 memcpy(ua->lksb.sb_lvbptr, lvb_in, DLM_USER_LVB_LEN);
4129 4347
4348 ua->xid = ua_tmp->xid;
4130 ua->castparam = ua_tmp->castparam; 4349 ua->castparam = ua_tmp->castparam;
4131 ua->castaddr = ua_tmp->castaddr; 4350 ua->castaddr = ua_tmp->castaddr;
4132 ua->bastparam = ua_tmp->bastparam; 4351 ua->bastparam = ua_tmp->bastparam;
@@ -4134,19 +4353,19 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
4134 ua->user_lksb = ua_tmp->user_lksb; 4353 ua->user_lksb = ua_tmp->user_lksb;
4135 ua->old_mode = lkb->lkb_grmode; 4354 ua->old_mode = lkb->lkb_grmode;
4136 4355
4137 error = set_lock_args(mode, &ua->lksb, flags, 0, 0, DLM_FAKE_USER_AST, 4356 error = set_lock_args(mode, &ua->lksb, flags, 0, timeout_cs,
4138 ua, DLM_FAKE_USER_AST, &args); 4357 DLM_FAKE_USER_AST, ua, DLM_FAKE_USER_AST, &args);
4139 if (error) 4358 if (error)
4140 goto out_put; 4359 goto out_put;
4141 4360
4142 error = convert_lock(ls, lkb, &args); 4361 error = convert_lock(ls, lkb, &args);
4143 4362
4144 if (error == -EINPROGRESS || error == -EAGAIN) 4363 if (error == -EINPROGRESS || error == -EAGAIN || error == -EDEADLK)
4145 error = 0; 4364 error = 0;
4146 out_put: 4365 out_put:
4147 dlm_put_lkb(lkb); 4366 dlm_put_lkb(lkb);
4148 out: 4367 out:
4149 unlock_recovery(ls); 4368 dlm_unlock_recovery(ls);
4150 kfree(ua_tmp); 4369 kfree(ua_tmp);
4151 return error; 4370 return error;
4152} 4371}
@@ -4159,7 +4378,7 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
4159 struct dlm_user_args *ua; 4378 struct dlm_user_args *ua;
4160 int error; 4379 int error;
4161 4380
4162 lock_recovery(ls); 4381 dlm_lock_recovery(ls);
4163 4382
4164 error = find_lkb(ls, lkid, &lkb); 4383 error = find_lkb(ls, lkid, &lkb);
4165 if (error) 4384 if (error)
@@ -4194,7 +4413,7 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
4194 out_put: 4413 out_put:
4195 dlm_put_lkb(lkb); 4414 dlm_put_lkb(lkb);
4196 out: 4415 out:
4197 unlock_recovery(ls); 4416 dlm_unlock_recovery(ls);
4198 kfree(ua_tmp); 4417 kfree(ua_tmp);
4199 return error; 4418 return error;
4200} 4419}
@@ -4207,7 +4426,7 @@ int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
4207 struct dlm_user_args *ua; 4426 struct dlm_user_args *ua;
4208 int error; 4427 int error;
4209 4428
4210 lock_recovery(ls); 4429 dlm_lock_recovery(ls);
4211 4430
4212 error = find_lkb(ls, lkid, &lkb); 4431 error = find_lkb(ls, lkid, &lkb);
4213 if (error) 4432 if (error)
@@ -4231,11 +4450,59 @@ int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
4231 out_put: 4450 out_put:
4232 dlm_put_lkb(lkb); 4451 dlm_put_lkb(lkb);
4233 out: 4452 out:
4234 unlock_recovery(ls); 4453 dlm_unlock_recovery(ls);
4235 kfree(ua_tmp); 4454 kfree(ua_tmp);
4236 return error; 4455 return error;
4237} 4456}
4238 4457
4458int dlm_user_deadlock(struct dlm_ls *ls, uint32_t flags, uint32_t lkid)
4459{
4460 struct dlm_lkb *lkb;
4461 struct dlm_args args;
4462 struct dlm_user_args *ua;
4463 struct dlm_rsb *r;
4464 int error;
4465
4466 dlm_lock_recovery(ls);
4467
4468 error = find_lkb(ls, lkid, &lkb);
4469 if (error)
4470 goto out;
4471
4472 ua = (struct dlm_user_args *)lkb->lkb_astparam;
4473
4474 error = set_unlock_args(flags, ua, &args);
4475 if (error)
4476 goto out_put;
4477
4478 /* same as cancel_lock(), but set DEADLOCK_CANCEL after lock_rsb */
4479
4480 r = lkb->lkb_resource;
4481 hold_rsb(r);
4482 lock_rsb(r);
4483
4484 error = validate_unlock_args(lkb, &args);
4485 if (error)
4486 goto out_r;
4487 lkb->lkb_flags |= DLM_IFL_DEADLOCK_CANCEL;
4488
4489 error = _cancel_lock(r, lkb);
4490 out_r:
4491 unlock_rsb(r);
4492 put_rsb(r);
4493
4494 if (error == -DLM_ECANCEL)
4495 error = 0;
4496 /* from validate_unlock_args() */
4497 if (error == -EBUSY)
4498 error = 0;
4499 out_put:
4500 dlm_put_lkb(lkb);
4501 out:
4502 dlm_unlock_recovery(ls);
4503 return error;
4504}
4505
4239/* lkb's that are removed from the waiters list by revert are just left on the 4506/* lkb's that are removed from the waiters list by revert are just left on the
4240 orphans list with the granted orphan locks, to be freed by purge */ 4507 orphans list with the granted orphan locks, to be freed by purge */
4241 4508
@@ -4314,12 +4581,13 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
4314{ 4581{
4315 struct dlm_lkb *lkb, *safe; 4582 struct dlm_lkb *lkb, *safe;
4316 4583
4317 lock_recovery(ls); 4584 dlm_lock_recovery(ls);
4318 4585
4319 while (1) { 4586 while (1) {
4320 lkb = del_proc_lock(ls, proc); 4587 lkb = del_proc_lock(ls, proc);
4321 if (!lkb) 4588 if (!lkb)
4322 break; 4589 break;
4590 del_timeout(lkb);
4323 if (lkb->lkb_exflags & DLM_LKF_PERSISTENT) 4591 if (lkb->lkb_exflags & DLM_LKF_PERSISTENT)
4324 orphan_proc_lock(ls, lkb); 4592 orphan_proc_lock(ls, lkb);
4325 else 4593 else
@@ -4347,7 +4615,7 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
4347 } 4615 }
4348 4616
4349 mutex_unlock(&ls->ls_clear_proc_locks); 4617 mutex_unlock(&ls->ls_clear_proc_locks);
4350 unlock_recovery(ls); 4618 dlm_unlock_recovery(ls);
4351} 4619}
4352 4620
4353static void purge_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc) 4621static void purge_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
@@ -4429,12 +4697,12 @@ int dlm_user_purge(struct dlm_ls *ls, struct dlm_user_proc *proc,
4429 if (nodeid != dlm_our_nodeid()) { 4697 if (nodeid != dlm_our_nodeid()) {
4430 error = send_purge(ls, nodeid, pid); 4698 error = send_purge(ls, nodeid, pid);
4431 } else { 4699 } else {
4432 lock_recovery(ls); 4700 dlm_lock_recovery(ls);
4433 if (pid == current->pid) 4701 if (pid == current->pid)
4434 purge_proc_locks(ls, proc); 4702 purge_proc_locks(ls, proc);
4435 else 4703 else
4436 do_purge(ls, nodeid, pid); 4704 do_purge(ls, nodeid, pid);
4437 unlock_recovery(ls); 4705 dlm_unlock_recovery(ls);
4438 } 4706 }
4439 return error; 4707 return error;
4440} 4708}
diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h
index 64fc4ec40668..1720313c22df 100644
--- a/fs/dlm/lock.h
+++ b/fs/dlm/lock.h
@@ -1,7 +1,7 @@
1/****************************************************************************** 1/******************************************************************************
2******************************************************************************* 2*******************************************************************************
3** 3**
4** Copyright (C) 2005 Red Hat, Inc. All rights reserved. 4** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved.
5** 5**
6** This copyrighted material is made available to anyone wishing to use, 6** This copyrighted material is made available to anyone wishing to use,
7** modify, copy, or redistribute it subject to the terms and conditions 7** modify, copy, or redistribute it subject to the terms and conditions
@@ -24,6 +24,10 @@ void dlm_put_rsb(struct dlm_rsb *r);
24void dlm_hold_rsb(struct dlm_rsb *r); 24void dlm_hold_rsb(struct dlm_rsb *r);
25int dlm_put_lkb(struct dlm_lkb *lkb); 25int dlm_put_lkb(struct dlm_lkb *lkb);
26void dlm_scan_rsbs(struct dlm_ls *ls); 26void dlm_scan_rsbs(struct dlm_ls *ls);
27int dlm_lock_recovery_try(struct dlm_ls *ls);
28void dlm_unlock_recovery(struct dlm_ls *ls);
29void dlm_scan_timeout(struct dlm_ls *ls);
30void dlm_adjust_timeouts(struct dlm_ls *ls);
27 31
28int dlm_purge_locks(struct dlm_ls *ls); 32int dlm_purge_locks(struct dlm_ls *ls);
29void dlm_purge_mstcpy_locks(struct dlm_rsb *r); 33void dlm_purge_mstcpy_locks(struct dlm_rsb *r);
@@ -34,15 +38,18 @@ int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc);
34int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc); 38int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc);
35 39
36int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, int mode, 40int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, int mode,
37 uint32_t flags, void *name, unsigned int namelen, uint32_t parent_lkid); 41 uint32_t flags, void *name, unsigned int namelen,
42 unsigned long timeout_cs);
38int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, 43int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
39 int mode, uint32_t flags, uint32_t lkid, char *lvb_in); 44 int mode, uint32_t flags, uint32_t lkid, char *lvb_in,
45 unsigned long timeout_cs);
40int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, 46int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
41 uint32_t flags, uint32_t lkid, char *lvb_in); 47 uint32_t flags, uint32_t lkid, char *lvb_in);
42int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, 48int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
43 uint32_t flags, uint32_t lkid); 49 uint32_t flags, uint32_t lkid);
44int dlm_user_purge(struct dlm_ls *ls, struct dlm_user_proc *proc, 50int dlm_user_purge(struct dlm_ls *ls, struct dlm_user_proc *proc,
45 int nodeid, int pid); 51 int nodeid, int pid);
52int dlm_user_deadlock(struct dlm_ls *ls, uint32_t flags, uint32_t lkid);
46void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc); 53void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc);
47 54
48static inline int is_master(struct dlm_rsb *r) 55static inline int is_master(struct dlm_rsb *r)
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index a677b2a5eed4..1dc72105ab12 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -197,13 +197,24 @@ static int do_uevent(struct dlm_ls *ls, int in)
197 else 197 else
198 kobject_uevent(&ls->ls_kobj, KOBJ_OFFLINE); 198 kobject_uevent(&ls->ls_kobj, KOBJ_OFFLINE);
199 199
200 log_debug(ls, "%s the lockspace group...", in ? "joining" : "leaving");
201
202 /* dlm_controld will see the uevent, do the necessary group management
203 and then write to sysfs to wake us */
204
200 error = wait_event_interruptible(ls->ls_uevent_wait, 205 error = wait_event_interruptible(ls->ls_uevent_wait,
201 test_and_clear_bit(LSFL_UEVENT_WAIT, &ls->ls_flags)); 206 test_and_clear_bit(LSFL_UEVENT_WAIT, &ls->ls_flags));
207
208 log_debug(ls, "group event done %d %d", error, ls->ls_uevent_result);
209
202 if (error) 210 if (error)
203 goto out; 211 goto out;
204 212
205 error = ls->ls_uevent_result; 213 error = ls->ls_uevent_result;
206 out: 214 out:
215 if (error)
216 log_error(ls, "group %s failed %d %d", in ? "join" : "leave",
217 error, ls->ls_uevent_result);
207 return error; 218 return error;
208} 219}
209 220
@@ -234,8 +245,13 @@ static int dlm_scand(void *data)
234 struct dlm_ls *ls; 245 struct dlm_ls *ls;
235 246
236 while (!kthread_should_stop()) { 247 while (!kthread_should_stop()) {
237 list_for_each_entry(ls, &lslist, ls_list) 248 list_for_each_entry(ls, &lslist, ls_list) {
238 dlm_scan_rsbs(ls); 249 if (dlm_lock_recovery_try(ls)) {
250 dlm_scan_rsbs(ls);
251 dlm_scan_timeout(ls);
252 dlm_unlock_recovery(ls);
253 }
254 }
239 schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ); 255 schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ);
240 } 256 }
241 return 0; 257 return 0;
@@ -395,6 +411,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
395{ 411{
396 struct dlm_ls *ls; 412 struct dlm_ls *ls;
397 int i, size, error = -ENOMEM; 413 int i, size, error = -ENOMEM;
414 int do_unreg = 0;
398 415
399 if (namelen > DLM_LOCKSPACE_LEN) 416 if (namelen > DLM_LOCKSPACE_LEN)
400 return -EINVAL; 417 return -EINVAL;
@@ -417,11 +434,22 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
417 goto out; 434 goto out;
418 memcpy(ls->ls_name, name, namelen); 435 memcpy(ls->ls_name, name, namelen);
419 ls->ls_namelen = namelen; 436 ls->ls_namelen = namelen;
420 ls->ls_exflags = flags;
421 ls->ls_lvblen = lvblen; 437 ls->ls_lvblen = lvblen;
422 ls->ls_count = 0; 438 ls->ls_count = 0;
423 ls->ls_flags = 0; 439 ls->ls_flags = 0;
424 440
441 if (flags & DLM_LSFL_TIMEWARN)
442 set_bit(LSFL_TIMEWARN, &ls->ls_flags);
443
444 if (flags & DLM_LSFL_FS)
445 ls->ls_allocation = GFP_NOFS;
446 else
447 ls->ls_allocation = GFP_KERNEL;
448
449 /* ls_exflags are forced to match among nodes, and we don't
450 need to require all nodes to have TIMEWARN or FS set */
451 ls->ls_exflags = (flags & ~(DLM_LSFL_TIMEWARN | DLM_LSFL_FS));
452
425 size = dlm_config.ci_rsbtbl_size; 453 size = dlm_config.ci_rsbtbl_size;
426 ls->ls_rsbtbl_size = size; 454 ls->ls_rsbtbl_size = size;
427 455
@@ -461,6 +489,8 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
461 mutex_init(&ls->ls_waiters_mutex); 489 mutex_init(&ls->ls_waiters_mutex);
462 INIT_LIST_HEAD(&ls->ls_orphans); 490 INIT_LIST_HEAD(&ls->ls_orphans);
463 mutex_init(&ls->ls_orphans_mutex); 491 mutex_init(&ls->ls_orphans_mutex);
492 INIT_LIST_HEAD(&ls->ls_timeout);
493 mutex_init(&ls->ls_timeout_mutex);
464 494
465 INIT_LIST_HEAD(&ls->ls_nodes); 495 INIT_LIST_HEAD(&ls->ls_nodes);
466 INIT_LIST_HEAD(&ls->ls_nodes_gone); 496 INIT_LIST_HEAD(&ls->ls_nodes_gone);
@@ -477,6 +507,8 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
477 507
478 init_waitqueue_head(&ls->ls_uevent_wait); 508 init_waitqueue_head(&ls->ls_uevent_wait);
479 ls->ls_uevent_result = 0; 509 ls->ls_uevent_result = 0;
510 init_completion(&ls->ls_members_done);
511 ls->ls_members_result = -1;
480 512
481 ls->ls_recoverd_task = NULL; 513 ls->ls_recoverd_task = NULL;
482 mutex_init(&ls->ls_recoverd_active); 514 mutex_init(&ls->ls_recoverd_active);
@@ -513,32 +545,49 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
513 error = dlm_recoverd_start(ls); 545 error = dlm_recoverd_start(ls);
514 if (error) { 546 if (error) {
515 log_error(ls, "can't start dlm_recoverd %d", error); 547 log_error(ls, "can't start dlm_recoverd %d", error);
516 goto out_rcomfree; 548 goto out_delist;
517 } 549 }
518 550
519 dlm_create_debug_file(ls);
520
521 error = kobject_setup(ls); 551 error = kobject_setup(ls);
522 if (error) 552 if (error)
523 goto out_del; 553 goto out_stop;
524 554
525 error = kobject_register(&ls->ls_kobj); 555 error = kobject_register(&ls->ls_kobj);
526 if (error) 556 if (error)
527 goto out_del; 557 goto out_stop;
558
559 /* let kobject handle freeing of ls if there's an error */
560 do_unreg = 1;
561
562 /* This uevent triggers dlm_controld in userspace to add us to the
563 group of nodes that are members of this lockspace (managed by the
564 cluster infrastructure.) Once it's done that, it tells us who the
565 current lockspace members are (via configfs) and then tells the
566 lockspace to start running (via sysfs) in dlm_ls_start(). */
528 567
529 error = do_uevent(ls, 1); 568 error = do_uevent(ls, 1);
530 if (error) 569 if (error)
531 goto out_unreg; 570 goto out_stop;
571
572 wait_for_completion(&ls->ls_members_done);
573 error = ls->ls_members_result;
574 if (error)
575 goto out_members;
576
577 dlm_create_debug_file(ls);
578
579 log_debug(ls, "join complete");
532 580
533 *lockspace = ls; 581 *lockspace = ls;
534 return 0; 582 return 0;
535 583
536 out_unreg: 584 out_members:
537 kobject_unregister(&ls->ls_kobj); 585 do_uevent(ls, 0);
538 out_del: 586 dlm_clear_members(ls);
539 dlm_delete_debug_file(ls); 587 kfree(ls->ls_node_array);
588 out_stop:
540 dlm_recoverd_stop(ls); 589 dlm_recoverd_stop(ls);
541 out_rcomfree: 590 out_delist:
542 spin_lock(&lslist_lock); 591 spin_lock(&lslist_lock);
543 list_del(&ls->ls_list); 592 list_del(&ls->ls_list);
544 spin_unlock(&lslist_lock); 593 spin_unlock(&lslist_lock);
@@ -550,7 +599,10 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
550 out_rsbfree: 599 out_rsbfree:
551 kfree(ls->ls_rsbtbl); 600 kfree(ls->ls_rsbtbl);
552 out_lsfree: 601 out_lsfree:
553 kfree(ls); 602 if (do_unreg)
603 kobject_unregister(&ls->ls_kobj);
604 else
605 kfree(ls);
554 out: 606 out:
555 module_put(THIS_MODULE); 607 module_put(THIS_MODULE);
556 return error; 608 return error;
@@ -570,6 +622,8 @@ int dlm_new_lockspace(char *name, int namelen, void **lockspace,
570 error = new_lockspace(name, namelen, lockspace, flags, lvblen); 622 error = new_lockspace(name, namelen, lockspace, flags, lvblen);
571 if (!error) 623 if (!error)
572 ls_count++; 624 ls_count++;
625 else if (!ls_count)
626 threads_stop();
573 out: 627 out:
574 mutex_unlock(&ls_lock); 628 mutex_unlock(&ls_lock);
575 return error; 629 return error;
@@ -696,7 +750,7 @@ static int release_lockspace(struct dlm_ls *ls, int force)
696 dlm_clear_members_gone(ls); 750 dlm_clear_members_gone(ls);
697 kfree(ls->ls_node_array); 751 kfree(ls->ls_node_array);
698 kobject_unregister(&ls->ls_kobj); 752 kobject_unregister(&ls->ls_kobj);
699 /* The ls structure will be freed when the kobject is done with */ 753 /* The ls structure will be freed when the kobject is done with */
700 754
701 mutex_lock(&ls_lock); 755 mutex_lock(&ls_lock);
702 ls_count--; 756 ls_count--;
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 27970a58d29b..0553a6158dcb 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -260,7 +260,7 @@ static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr)
260static void lowcomms_data_ready(struct sock *sk, int count_unused) 260static void lowcomms_data_ready(struct sock *sk, int count_unused)
261{ 261{
262 struct connection *con = sock2con(sk); 262 struct connection *con = sock2con(sk);
263 if (!test_and_set_bit(CF_READ_PENDING, &con->flags)) 263 if (con && !test_and_set_bit(CF_READ_PENDING, &con->flags))
264 queue_work(recv_workqueue, &con->rwork); 264 queue_work(recv_workqueue, &con->rwork);
265} 265}
266 266
@@ -268,7 +268,7 @@ static void lowcomms_write_space(struct sock *sk)
268{ 268{
269 struct connection *con = sock2con(sk); 269 struct connection *con = sock2con(sk);
270 270
271 if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) 271 if (con && !test_and_set_bit(CF_WRITE_PENDING, &con->flags))
272 queue_work(send_workqueue, &con->swork); 272 queue_work(send_workqueue, &con->swork);
273} 273}
274 274
@@ -720,11 +720,17 @@ static int tcp_accept_from_sock(struct connection *con)
720 INIT_WORK(&othercon->rwork, process_recv_sockets); 720 INIT_WORK(&othercon->rwork, process_recv_sockets);
721 set_bit(CF_IS_OTHERCON, &othercon->flags); 721 set_bit(CF_IS_OTHERCON, &othercon->flags);
722 newcon->othercon = othercon; 722 newcon->othercon = othercon;
723 othercon->sock = newsock;
724 newsock->sk->sk_user_data = othercon;
725 add_sock(newsock, othercon);
726 addcon = othercon;
727 }
728 else {
729 printk("Extra connection from node %d attempted\n", nodeid);
730 result = -EAGAIN;
731 mutex_unlock(&newcon->sock_mutex);
732 goto accept_err;
723 } 733 }
724 othercon->sock = newsock;
725 newsock->sk->sk_user_data = othercon;
726 add_sock(newsock, othercon);
727 addcon = othercon;
728 } 734 }
729 else { 735 else {
730 newsock->sk->sk_user_data = newcon; 736 newsock->sk->sk_user_data = newcon;
@@ -1400,8 +1406,11 @@ void dlm_lowcomms_stop(void)
1400 down(&connections_lock); 1406 down(&connections_lock);
1401 for (i = 0; i <= max_nodeid; i++) { 1407 for (i = 0; i <= max_nodeid; i++) {
1402 con = __nodeid2con(i, 0); 1408 con = __nodeid2con(i, 0);
1403 if (con) 1409 if (con) {
1404 con->flags |= 0xFF; 1410 con->flags |= 0xFF;
1411 if (con->sock)
1412 con->sock->sk->sk_user_data = NULL;
1413 }
1405 } 1414 }
1406 up(&connections_lock); 1415 up(&connections_lock);
1407 1416
diff --git a/fs/dlm/main.c b/fs/dlm/main.c
index 162fbae58fe5..eca2907f2386 100644
--- a/fs/dlm/main.c
+++ b/fs/dlm/main.c
@@ -2,7 +2,7 @@
2******************************************************************************* 2*******************************************************************************
3** 3**
4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
5** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. 5** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
6** 6**
7** This copyrighted material is made available to anyone wishing to use, 7** This copyrighted material is made available to anyone wishing to use,
8** modify, copy, or redistribute it subject to the terms and conditions 8** modify, copy, or redistribute it subject to the terms and conditions
@@ -25,6 +25,8 @@ void dlm_unregister_debugfs(void);
25static inline int dlm_register_debugfs(void) { return 0; } 25static inline int dlm_register_debugfs(void) { return 0; }
26static inline void dlm_unregister_debugfs(void) { } 26static inline void dlm_unregister_debugfs(void) { }
27#endif 27#endif
28int dlm_netlink_init(void);
29void dlm_netlink_exit(void);
28 30
29static int __init init_dlm(void) 31static int __init init_dlm(void)
30{ 32{
@@ -50,10 +52,16 @@ static int __init init_dlm(void)
50 if (error) 52 if (error)
51 goto out_debug; 53 goto out_debug;
52 54
55 error = dlm_netlink_init();
56 if (error)
57 goto out_user;
58
53 printk("DLM (built %s %s) installed\n", __DATE__, __TIME__); 59 printk("DLM (built %s %s) installed\n", __DATE__, __TIME__);
54 60
55 return 0; 61 return 0;
56 62
63 out_user:
64 dlm_user_exit();
57 out_debug: 65 out_debug:
58 dlm_unregister_debugfs(); 66 dlm_unregister_debugfs();
59 out_config: 67 out_config:
@@ -68,6 +76,7 @@ static int __init init_dlm(void)
68 76
69static void __exit exit_dlm(void) 77static void __exit exit_dlm(void)
70{ 78{
79 dlm_netlink_exit();
71 dlm_user_exit(); 80 dlm_user_exit();
72 dlm_config_exit(); 81 dlm_config_exit();
73 dlm_memory_exit(); 82 dlm_memory_exit();
diff --git a/fs/dlm/member.c b/fs/dlm/member.c
index 85e2897bd740..073599dced2a 100644
--- a/fs/dlm/member.c
+++ b/fs/dlm/member.c
@@ -1,7 +1,7 @@
1/****************************************************************************** 1/******************************************************************************
2******************************************************************************* 2*******************************************************************************
3** 3**
4** Copyright (C) 2005 Red Hat, Inc. All rights reserved. 4** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved.
5** 5**
6** This copyrighted material is made available to anyone wishing to use, 6** This copyrighted material is made available to anyone wishing to use,
7** modify, copy, or redistribute it subject to the terms and conditions 7** modify, copy, or redistribute it subject to the terms and conditions
@@ -233,6 +233,12 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
233 *neg_out = neg; 233 *neg_out = neg;
234 234
235 error = ping_members(ls); 235 error = ping_members(ls);
236 if (!error || error == -EPROTO) {
237 /* new_lockspace() may be waiting to know if the config
238 is good or bad */
239 ls->ls_members_result = error;
240 complete(&ls->ls_members_done);
241 }
236 if (error) 242 if (error)
237 goto out; 243 goto out;
238 244
@@ -284,6 +290,9 @@ int dlm_ls_stop(struct dlm_ls *ls)
284 dlm_recoverd_suspend(ls); 290 dlm_recoverd_suspend(ls);
285 ls->ls_recover_status = 0; 291 ls->ls_recover_status = 0;
286 dlm_recoverd_resume(ls); 292 dlm_recoverd_resume(ls);
293
294 if (!ls->ls_recover_begin)
295 ls->ls_recover_begin = jiffies;
287 return 0; 296 return 0;
288} 297}
289 298
diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c
new file mode 100644
index 000000000000..863b87d0dc71
--- /dev/null
+++ b/fs/dlm/netlink.c
@@ -0,0 +1,153 @@
1/*
2 * Copyright (C) 2007 Red Hat, Inc. All rights reserved.
3 *
4 * This copyrighted material is made available to anyone wishing to use,
5 * modify, copy, or redistribute it subject to the terms and conditions
6 * of the GNU General Public License v.2.
7 */
8
9#include <net/genetlink.h>
10#include <linux/dlm.h>
11#include <linux/dlm_netlink.h>
12
13#include "dlm_internal.h"
14
15static uint32_t dlm_nl_seqnum;
16static uint32_t listener_nlpid;
17
18static struct genl_family family = {
19 .id = GENL_ID_GENERATE,
20 .name = DLM_GENL_NAME,
21 .version = DLM_GENL_VERSION,
22};
23
24static int prepare_data(u8 cmd, struct sk_buff **skbp, size_t size)
25{
26 struct sk_buff *skb;
27 void *data;
28
29 skb = genlmsg_new(size, GFP_KERNEL);
30 if (!skb)
31 return -ENOMEM;
32
33 /* add the message headers */
34 data = genlmsg_put(skb, 0, dlm_nl_seqnum++, &family, 0, cmd);
35 if (!data) {
36 nlmsg_free(skb);
37 return -EINVAL;
38 }
39
40 *skbp = skb;
41 return 0;
42}
43
44static struct dlm_lock_data *mk_data(struct sk_buff *skb)
45{
46 struct nlattr *ret;
47
48 ret = nla_reserve(skb, DLM_TYPE_LOCK, sizeof(struct dlm_lock_data));
49 if (!ret)
50 return NULL;
51 return nla_data(ret);
52}
53
54static int send_data(struct sk_buff *skb)
55{
56 struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data);
57 void *data = genlmsg_data(genlhdr);
58 int rv;
59
60 rv = genlmsg_end(skb, data);
61 if (rv < 0) {
62 nlmsg_free(skb);
63 return rv;
64 }
65
66 return genlmsg_unicast(skb, listener_nlpid);
67}
68
69static int user_cmd(struct sk_buff *skb, struct genl_info *info)
70{
71 listener_nlpid = info->snd_pid;
72 printk("user_cmd nlpid %u\n", listener_nlpid);
73 return 0;
74}
75
76static struct genl_ops dlm_nl_ops = {
77 .cmd = DLM_CMD_HELLO,
78 .doit = user_cmd,
79};
80
81int dlm_netlink_init(void)
82{
83 int rv;
84
85 rv = genl_register_family(&family);
86 if (rv)
87 return rv;
88
89 rv = genl_register_ops(&family, &dlm_nl_ops);
90 if (rv < 0)
91 goto err;
92 return 0;
93 err:
94 genl_unregister_family(&family);
95 return rv;
96}
97
98void dlm_netlink_exit(void)
99{
100 genl_unregister_ops(&family, &dlm_nl_ops);
101 genl_unregister_family(&family);
102}
103
104static void fill_data(struct dlm_lock_data *data, struct dlm_lkb *lkb)
105{
106 struct dlm_rsb *r = lkb->lkb_resource;
107 struct dlm_user_args *ua = (struct dlm_user_args *) lkb->lkb_astparam;
108
109 memset(data, 0, sizeof(struct dlm_lock_data));
110
111 data->version = DLM_LOCK_DATA_VERSION;
112 data->nodeid = lkb->lkb_nodeid;
113 data->ownpid = lkb->lkb_ownpid;
114 data->id = lkb->lkb_id;
115 data->remid = lkb->lkb_remid;
116 data->status = lkb->lkb_status;
117 data->grmode = lkb->lkb_grmode;
118 data->rqmode = lkb->lkb_rqmode;
119 data->timestamp = lkb->lkb_timestamp;
120 if (ua)
121 data->xid = ua->xid;
122 if (r) {
123 data->lockspace_id = r->res_ls->ls_global_id;
124 data->resource_namelen = r->res_length;
125 memcpy(data->resource_name, r->res_name, r->res_length);
126 }
127}
128
129void dlm_timeout_warn(struct dlm_lkb *lkb)
130{
131 struct dlm_lock_data *data;
132 struct sk_buff *send_skb;
133 size_t size;
134 int rv;
135
136 size = nla_total_size(sizeof(struct dlm_lock_data)) +
137 nla_total_size(0); /* why this? */
138
139 rv = prepare_data(DLM_CMD_TIMEOUT, &send_skb, size);
140 if (rv < 0)
141 return;
142
143 data = mk_data(send_skb);
144 if (!data) {
145 nlmsg_free(send_skb);
146 return;
147 }
148
149 fill_data(data, lkb);
150
151 send_data(send_skb);
152}
153
diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c
index 6bfbd6153809..e3a1527cbdbe 100644
--- a/fs/dlm/rcom.c
+++ b/fs/dlm/rcom.c
@@ -38,7 +38,7 @@ static int create_rcom(struct dlm_ls *ls, int to_nodeid, int type, int len,
38 char *mb; 38 char *mb;
39 int mb_len = sizeof(struct dlm_rcom) + len; 39 int mb_len = sizeof(struct dlm_rcom) + len;
40 40
41 mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, GFP_KERNEL, &mb); 41 mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, ls->ls_allocation, &mb);
42 if (!mh) { 42 if (!mh) {
43 log_print("create_rcom to %d type %d len %d ENOBUFS", 43 log_print("create_rcom to %d type %d len %d ENOBUFS",
44 to_nodeid, type, len); 44 to_nodeid, type, len);
@@ -90,7 +90,7 @@ static int check_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
90 log_error(ls, "version mismatch: %x nodeid %d: %x", 90 log_error(ls, "version mismatch: %x nodeid %d: %x",
91 DLM_HEADER_MAJOR | DLM_HEADER_MINOR, nodeid, 91 DLM_HEADER_MAJOR | DLM_HEADER_MINOR, nodeid,
92 rc->rc_header.h_version); 92 rc->rc_header.h_version);
93 return -EINVAL; 93 return -EPROTO;
94 } 94 }
95 95
96 if (rf->rf_lvblen != ls->ls_lvblen || 96 if (rf->rf_lvblen != ls->ls_lvblen ||
@@ -98,7 +98,7 @@ static int check_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
98 log_error(ls, "config mismatch: %d,%x nodeid %d: %d,%x", 98 log_error(ls, "config mismatch: %d,%x nodeid %d: %d,%x",
99 ls->ls_lvblen, ls->ls_exflags, 99 ls->ls_lvblen, ls->ls_exflags,
100 nodeid, rf->rf_lvblen, rf->rf_lsflags); 100 nodeid, rf->rf_lvblen, rf->rf_lsflags);
101 return -EINVAL; 101 return -EPROTO;
102 } 102 }
103 return 0; 103 return 0;
104} 104}
@@ -386,7 +386,8 @@ static void receive_rcom_lock_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
386 dlm_recover_process_copy(ls, rc_in); 386 dlm_recover_process_copy(ls, rc_in);
387} 387}
388 388
389static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in) 389static int send_ls_not_ready(struct dlm_ls *ls, int nodeid,
390 struct dlm_rcom *rc_in)
390{ 391{
391 struct dlm_rcom *rc; 392 struct dlm_rcom *rc;
392 struct rcom_config *rf; 393 struct rcom_config *rf;
@@ -394,7 +395,7 @@ static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
394 char *mb; 395 char *mb;
395 int mb_len = sizeof(struct dlm_rcom) + sizeof(struct rcom_config); 396 int mb_len = sizeof(struct dlm_rcom) + sizeof(struct rcom_config);
396 397
397 mh = dlm_lowcomms_get_buffer(nodeid, mb_len, GFP_KERNEL, &mb); 398 mh = dlm_lowcomms_get_buffer(nodeid, mb_len, ls->ls_allocation, &mb);
398 if (!mh) 399 if (!mh)
399 return -ENOBUFS; 400 return -ENOBUFS;
400 memset(mb, 0, mb_len); 401 memset(mb, 0, mb_len);
@@ -464,7 +465,7 @@ void dlm_receive_rcom(struct dlm_header *hd, int nodeid)
464 log_print("lockspace %x from %d type %x not found", 465 log_print("lockspace %x from %d type %x not found",
465 hd->h_lockspace, nodeid, rc->rc_type); 466 hd->h_lockspace, nodeid, rc->rc_type);
466 if (rc->rc_type == DLM_RCOM_STATUS) 467 if (rc->rc_type == DLM_RCOM_STATUS)
467 send_ls_not_ready(nodeid, rc); 468 send_ls_not_ready(ls, nodeid, rc);
468 return; 469 return;
469 } 470 }
470 471
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c
index 3cb636d60249..66575997861c 100644
--- a/fs/dlm/recoverd.c
+++ b/fs/dlm/recoverd.c
@@ -2,7 +2,7 @@
2******************************************************************************* 2*******************************************************************************
3** 3**
4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
5** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. 5** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
6** 6**
7** This copyrighted material is made available to anyone wishing to use, 7** This copyrighted material is made available to anyone wishing to use,
8** modify, copy, or redistribute it subject to the terms and conditions 8** modify, copy, or redistribute it subject to the terms and conditions
@@ -190,6 +190,8 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
190 190
191 dlm_clear_members_gone(ls); 191 dlm_clear_members_gone(ls);
192 192
193 dlm_adjust_timeouts(ls);
194
193 error = enable_locking(ls, rv->seq); 195 error = enable_locking(ls, rv->seq);
194 if (error) { 196 if (error) {
195 log_debug(ls, "enable_locking failed %d", error); 197 log_debug(ls, "enable_locking failed %d", error);
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index b0201ec325a7..6438941ab1f8 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -33,16 +33,17 @@ static const struct file_operations device_fops;
33struct dlm_lock_params32 { 33struct dlm_lock_params32 {
34 __u8 mode; 34 __u8 mode;
35 __u8 namelen; 35 __u8 namelen;
36 __u16 flags; 36 __u16 unused;
37 __u32 flags;
37 __u32 lkid; 38 __u32 lkid;
38 __u32 parent; 39 __u32 parent;
39 40 __u64 xid;
41 __u64 timeout;
40 __u32 castparam; 42 __u32 castparam;
41 __u32 castaddr; 43 __u32 castaddr;
42 __u32 bastparam; 44 __u32 bastparam;
43 __u32 bastaddr; 45 __u32 bastaddr;
44 __u32 lksb; 46 __u32 lksb;
45
46 char lvb[DLM_USER_LVB_LEN]; 47 char lvb[DLM_USER_LVB_LEN];
47 char name[0]; 48 char name[0];
48}; 49};
@@ -68,6 +69,7 @@ struct dlm_lksb32 {
68}; 69};
69 70
70struct dlm_lock_result32 { 71struct dlm_lock_result32 {
72 __u32 version[3];
71 __u32 length; 73 __u32 length;
72 __u32 user_astaddr; 74 __u32 user_astaddr;
73 __u32 user_astparam; 75 __u32 user_astparam;
@@ -102,6 +104,8 @@ static void compat_input(struct dlm_write_request *kb,
102 kb->i.lock.flags = kb32->i.lock.flags; 104 kb->i.lock.flags = kb32->i.lock.flags;
103 kb->i.lock.lkid = kb32->i.lock.lkid; 105 kb->i.lock.lkid = kb32->i.lock.lkid;
104 kb->i.lock.parent = kb32->i.lock.parent; 106 kb->i.lock.parent = kb32->i.lock.parent;
107 kb->i.lock.xid = kb32->i.lock.xid;
108 kb->i.lock.timeout = kb32->i.lock.timeout;
105 kb->i.lock.castparam = (void *)(long)kb32->i.lock.castparam; 109 kb->i.lock.castparam = (void *)(long)kb32->i.lock.castparam;
106 kb->i.lock.castaddr = (void *)(long)kb32->i.lock.castaddr; 110 kb->i.lock.castaddr = (void *)(long)kb32->i.lock.castaddr;
107 kb->i.lock.bastparam = (void *)(long)kb32->i.lock.bastparam; 111 kb->i.lock.bastparam = (void *)(long)kb32->i.lock.bastparam;
@@ -115,6 +119,10 @@ static void compat_input(struct dlm_write_request *kb,
115static void compat_output(struct dlm_lock_result *res, 119static void compat_output(struct dlm_lock_result *res,
116 struct dlm_lock_result32 *res32) 120 struct dlm_lock_result32 *res32)
117{ 121{
122 res32->version[0] = res->version[0];
123 res32->version[1] = res->version[1];
124 res32->version[2] = res->version[2];
125
118 res32->user_astaddr = (__u32)(long)res->user_astaddr; 126 res32->user_astaddr = (__u32)(long)res->user_astaddr;
119 res32->user_astparam = (__u32)(long)res->user_astparam; 127 res32->user_astparam = (__u32)(long)res->user_astparam;
120 res32->user_lksb = (__u32)(long)res->user_lksb; 128 res32->user_lksb = (__u32)(long)res->user_lksb;
@@ -130,6 +138,36 @@ static void compat_output(struct dlm_lock_result *res,
130} 138}
131#endif 139#endif
132 140
141/* Figure out if this lock is at the end of its life and no longer
142 available for the application to use. The lkb still exists until
143 the final ast is read. A lock becomes EOL in three situations:
144 1. a noqueue request fails with EAGAIN
145 2. an unlock completes with EUNLOCK
146 3. a cancel of a waiting request completes with ECANCEL/EDEADLK
147 An EOL lock needs to be removed from the process's list of locks.
148 And we can't allow any new operation on an EOL lock. This is
149 not related to the lifetime of the lkb struct which is managed
150 entirely by refcount. */
151
152static int lkb_is_endoflife(struct dlm_lkb *lkb, int sb_status, int type)
153{
154 switch (sb_status) {
155 case -DLM_EUNLOCK:
156 return 1;
157 case -DLM_ECANCEL:
158 case -ETIMEDOUT:
159 case -EDEADLK:
160 if (lkb->lkb_grmode == DLM_LOCK_IV)
161 return 1;
162 break;
163 case -EAGAIN:
164 if (type == AST_COMP && lkb->lkb_grmode == DLM_LOCK_IV)
165 return 1;
166 break;
167 }
168 return 0;
169}
170
133/* we could possibly check if the cancel of an orphan has resulted in the lkb 171/* we could possibly check if the cancel of an orphan has resulted in the lkb
134 being removed and then remove that lkb from the orphans list and free it */ 172 being removed and then remove that lkb from the orphans list and free it */
135 173
@@ -176,25 +214,7 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, int type)
176 log_debug(ls, "ast overlap %x status %x %x", 214 log_debug(ls, "ast overlap %x status %x %x",
177 lkb->lkb_id, ua->lksb.sb_status, lkb->lkb_flags); 215 lkb->lkb_id, ua->lksb.sb_status, lkb->lkb_flags);
178 216
179 /* Figure out if this lock is at the end of its life and no longer 217 eol = lkb_is_endoflife(lkb, ua->lksb.sb_status, type);
180 available for the application to use. The lkb still exists until
181 the final ast is read. A lock becomes EOL in three situations:
182 1. a noqueue request fails with EAGAIN
183 2. an unlock completes with EUNLOCK
184 3. a cancel of a waiting request completes with ECANCEL
185 An EOL lock needs to be removed from the process's list of locks.
186 And we can't allow any new operation on an EOL lock. This is
187 not related to the lifetime of the lkb struct which is managed
188 entirely by refcount. */
189
190 if (type == AST_COMP &&
191 lkb->lkb_grmode == DLM_LOCK_IV &&
192 ua->lksb.sb_status == -EAGAIN)
193 eol = 1;
194 else if (ua->lksb.sb_status == -DLM_EUNLOCK ||
195 (ua->lksb.sb_status == -DLM_ECANCEL &&
196 lkb->lkb_grmode == DLM_LOCK_IV))
197 eol = 1;
198 if (eol) { 218 if (eol) {
199 lkb->lkb_ast_type &= ~AST_BAST; 219 lkb->lkb_ast_type &= ~AST_BAST;
200 lkb->lkb_flags |= DLM_IFL_ENDOFLIFE; 220 lkb->lkb_flags |= DLM_IFL_ENDOFLIFE;
@@ -252,16 +272,18 @@ static int device_user_lock(struct dlm_user_proc *proc,
252 ua->castaddr = params->castaddr; 272 ua->castaddr = params->castaddr;
253 ua->bastparam = params->bastparam; 273 ua->bastparam = params->bastparam;
254 ua->bastaddr = params->bastaddr; 274 ua->bastaddr = params->bastaddr;
275 ua->xid = params->xid;
255 276
256 if (params->flags & DLM_LKF_CONVERT) 277 if (params->flags & DLM_LKF_CONVERT)
257 error = dlm_user_convert(ls, ua, 278 error = dlm_user_convert(ls, ua,
258 params->mode, params->flags, 279 params->mode, params->flags,
259 params->lkid, params->lvb); 280 params->lkid, params->lvb,
281 (unsigned long) params->timeout);
260 else { 282 else {
261 error = dlm_user_request(ls, ua, 283 error = dlm_user_request(ls, ua,
262 params->mode, params->flags, 284 params->mode, params->flags,
263 params->name, params->namelen, 285 params->name, params->namelen,
264 params->parent); 286 (unsigned long) params->timeout);
265 if (!error) 287 if (!error)
266 error = ua->lksb.sb_lkid; 288 error = ua->lksb.sb_lkid;
267 } 289 }
@@ -299,6 +321,22 @@ static int device_user_unlock(struct dlm_user_proc *proc,
299 return error; 321 return error;
300} 322}
301 323
324static int device_user_deadlock(struct dlm_user_proc *proc,
325 struct dlm_lock_params *params)
326{
327 struct dlm_ls *ls;
328 int error;
329
330 ls = dlm_find_lockspace_local(proc->lockspace);
331 if (!ls)
332 return -ENOENT;
333
334 error = dlm_user_deadlock(ls, params->flags, params->lkid);
335
336 dlm_put_lockspace(ls);
337 return error;
338}
339
302static int create_misc_device(struct dlm_ls *ls, char *name) 340static int create_misc_device(struct dlm_ls *ls, char *name)
303{ 341{
304 int error, len; 342 int error, len;
@@ -348,7 +386,7 @@ static int device_create_lockspace(struct dlm_lspace_params *params)
348 return -EPERM; 386 return -EPERM;
349 387
350 error = dlm_new_lockspace(params->name, strlen(params->name), 388 error = dlm_new_lockspace(params->name, strlen(params->name),
351 &lockspace, 0, DLM_USER_LVB_LEN); 389 &lockspace, params->flags, DLM_USER_LVB_LEN);
352 if (error) 390 if (error)
353 return error; 391 return error;
354 392
@@ -524,6 +562,14 @@ static ssize_t device_write(struct file *file, const char __user *buf,
524 error = device_user_unlock(proc, &kbuf->i.lock); 562 error = device_user_unlock(proc, &kbuf->i.lock);
525 break; 563 break;
526 564
565 case DLM_USER_DEADLOCK:
566 if (!proc) {
567 log_print("no locking on control device");
568 goto out_sig;
569 }
570 error = device_user_deadlock(proc, &kbuf->i.lock);
571 break;
572
527 case DLM_USER_CREATE_LOCKSPACE: 573 case DLM_USER_CREATE_LOCKSPACE:
528 if (proc) { 574 if (proc) {
529 log_print("create/remove only on control device"); 575 log_print("create/remove only on control device");
@@ -641,6 +687,9 @@ static int copy_result_to_user(struct dlm_user_args *ua, int compat, int type,
641 int struct_len; 687 int struct_len;
642 688
643 memset(&result, 0, sizeof(struct dlm_lock_result)); 689 memset(&result, 0, sizeof(struct dlm_lock_result));
690 result.version[0] = DLM_DEVICE_VERSION_MAJOR;
691 result.version[1] = DLM_DEVICE_VERSION_MINOR;
692 result.version[2] = DLM_DEVICE_VERSION_PATCH;
644 memcpy(&result.lksb, &ua->lksb, sizeof(struct dlm_lksb)); 693 memcpy(&result.lksb, &ua->lksb, sizeof(struct dlm_lksb));
645 result.user_lksb = ua->user_lksb; 694 result.user_lksb = ua->user_lksb;
646 695
@@ -699,6 +748,20 @@ static int copy_result_to_user(struct dlm_user_args *ua, int compat, int type,
699 return error; 748 return error;
700} 749}
701 750
751static int copy_version_to_user(char __user *buf, size_t count)
752{
753 struct dlm_device_version ver;
754
755 memset(&ver, 0, sizeof(struct dlm_device_version));
756 ver.version[0] = DLM_DEVICE_VERSION_MAJOR;
757 ver.version[1] = DLM_DEVICE_VERSION_MINOR;
758 ver.version[2] = DLM_DEVICE_VERSION_PATCH;
759
760 if (copy_to_user(buf, &ver, sizeof(struct dlm_device_version)))
761 return -EFAULT;
762 return sizeof(struct dlm_device_version);
763}
764
702/* a read returns a single ast described in a struct dlm_lock_result */ 765/* a read returns a single ast described in a struct dlm_lock_result */
703 766
704static ssize_t device_read(struct file *file, char __user *buf, size_t count, 767static ssize_t device_read(struct file *file, char __user *buf, size_t count,
@@ -710,6 +773,16 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count,
710 DECLARE_WAITQUEUE(wait, current); 773 DECLARE_WAITQUEUE(wait, current);
711 int error, type=0, bmode=0, removed = 0; 774 int error, type=0, bmode=0, removed = 0;
712 775
776 if (count == sizeof(struct dlm_device_version)) {
777 error = copy_version_to_user(buf, count);
778 return error;
779 }
780
781 if (!proc) {
782 log_print("non-version read from control device %zu", count);
783 return -EINVAL;
784 }
785
713#ifdef CONFIG_COMPAT 786#ifdef CONFIG_COMPAT
714 if (count < sizeof(struct dlm_lock_result32)) 787 if (count < sizeof(struct dlm_lock_result32))
715#else 788#else
@@ -747,11 +820,6 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count,
747 } 820 }
748 } 821 }
749 822
750 if (list_empty(&proc->asts)) {
751 spin_unlock(&proc->asts_spin);
752 return -EAGAIN;
753 }
754
755 /* there may be both completion and blocking asts to return for 823 /* there may be both completion and blocking asts to return for
756 the lkb, don't remove lkb from asts list unless no asts remain */ 824 the lkb, don't remove lkb from asts list unless no asts remain */
757 825
@@ -823,6 +891,7 @@ static const struct file_operations device_fops = {
823static const struct file_operations ctl_device_fops = { 891static const struct file_operations ctl_device_fops = {
824 .open = ctl_device_open, 892 .open = ctl_device_open,
825 .release = ctl_device_close, 893 .release = ctl_device_close,
894 .read = device_read,
826 .write = device_write, 895 .write = device_write,
827 .owner = THIS_MODULE, 896 .owner = THIS_MODULE,
828}; 897};
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 59288d817078..94f456fe4d9b 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -338,16 +338,17 @@ static int ecryptfs_fasync(int fd, struct file *file, int flag)
338 return rc; 338 return rc;
339} 339}
340 340
341static ssize_t ecryptfs_sendfile(struct file *file, loff_t * ppos, 341static ssize_t ecryptfs_splice_read(struct file *file, loff_t * ppos,
342 size_t count, read_actor_t actor, void *target) 342 struct pipe_inode_info *pipe, size_t count,
343 unsigned int flags)
343{ 344{
344 struct file *lower_file = NULL; 345 struct file *lower_file = NULL;
345 int rc = -EINVAL; 346 int rc = -EINVAL;
346 347
347 lower_file = ecryptfs_file_to_lower(file); 348 lower_file = ecryptfs_file_to_lower(file);
348 if (lower_file->f_op && lower_file->f_op->sendfile) 349 if (lower_file->f_op && lower_file->f_op->splice_read)
349 rc = lower_file->f_op->sendfile(lower_file, ppos, count, 350 rc = lower_file->f_op->splice_read(lower_file, ppos, pipe,
350 actor, target); 351 count, flags);
351 352
352 return rc; 353 return rc;
353} 354}
@@ -364,7 +365,7 @@ const struct file_operations ecryptfs_dir_fops = {
364 .release = ecryptfs_release, 365 .release = ecryptfs_release,
365 .fsync = ecryptfs_fsync, 366 .fsync = ecryptfs_fsync,
366 .fasync = ecryptfs_fasync, 367 .fasync = ecryptfs_fasync,
367 .sendfile = ecryptfs_sendfile, 368 .splice_read = ecryptfs_splice_read,
368}; 369};
369 370
370const struct file_operations ecryptfs_main_fops = { 371const struct file_operations ecryptfs_main_fops = {
@@ -381,7 +382,7 @@ const struct file_operations ecryptfs_main_fops = {
381 .release = ecryptfs_release, 382 .release = ecryptfs_release,
382 .fsync = ecryptfs_fsync, 383 .fsync = ecryptfs_fsync,
383 .fasync = ecryptfs_fasync, 384 .fasync = ecryptfs_fasync,
384 .sendfile = ecryptfs_sendfile, 385 .splice_read = ecryptfs_splice_read,
385}; 386};
386 387
387static int 388static int
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 566d4e2d3852..04afeecaaef3 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -53,7 +53,6 @@ const struct file_operations ext2_file_operations = {
53 .open = generic_file_open, 53 .open = generic_file_open,
54 .release = ext2_release_file, 54 .release = ext2_release_file,
55 .fsync = ext2_sync_file, 55 .fsync = ext2_sync_file,
56 .sendfile = generic_file_sendfile,
57 .splice_read = generic_file_splice_read, 56 .splice_read = generic_file_splice_read,
58 .splice_write = generic_file_splice_write, 57 .splice_write = generic_file_splice_write,
59}; 58};
@@ -71,7 +70,6 @@ const struct file_operations ext2_xip_file_operations = {
71 .open = generic_file_open, 70 .open = generic_file_open,
72 .release = ext2_release_file, 71 .release = ext2_release_file,
73 .fsync = ext2_sync_file, 72 .fsync = ext2_sync_file,
74 .sendfile = xip_file_sendfile,
75}; 73};
76#endif 74#endif
77 75
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index 1e6f13864536..acc4913d3019 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -120,7 +120,6 @@ const struct file_operations ext3_file_operations = {
120 .open = generic_file_open, 120 .open = generic_file_open,
121 .release = ext3_release_file, 121 .release = ext3_release_file,
122 .fsync = ext3_sync_file, 122 .fsync = ext3_sync_file,
123 .sendfile = generic_file_sendfile,
124 .splice_read = generic_file_splice_read, 123 .splice_read = generic_file_splice_read,
125 .splice_write = generic_file_splice_write, 124 .splice_write = generic_file_splice_write,
126}; 125};
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 3c6c1fd2be90..d4c8186aed64 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -120,7 +120,6 @@ const struct file_operations ext4_file_operations = {
120 .open = generic_file_open, 120 .open = generic_file_open,
121 .release = ext4_release_file, 121 .release = ext4_release_file,
122 .fsync = ext4_sync_file, 122 .fsync = ext4_sync_file,
123 .sendfile = generic_file_sendfile,
124 .splice_read = generic_file_splice_read, 123 .splice_read = generic_file_splice_read,
125 .splice_write = generic_file_splice_write, 124 .splice_write = generic_file_splice_write,
126}; 125};
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 55d3c7461c5b..69a83b59dce8 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -134,7 +134,7 @@ const struct file_operations fat_file_operations = {
134 .release = fat_file_release, 134 .release = fat_file_release,
135 .ioctl = fat_generic_ioctl, 135 .ioctl = fat_generic_ioctl,
136 .fsync = file_fsync, 136 .fsync = file_fsync,
137 .sendfile = generic_file_sendfile, 137 .splice_read = generic_file_splice_read,
138}; 138};
139 139
140static int fat_cont_expand(struct inode *inode, loff_t size) 140static int fat_cont_expand(struct inode *inode, loff_t size)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index adf7995232b8..f79de7c8cdfa 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -802,7 +802,7 @@ static const struct file_operations fuse_file_operations = {
802 .release = fuse_release, 802 .release = fuse_release,
803 .fsync = fuse_fsync, 803 .fsync = fuse_fsync,
804 .lock = fuse_file_lock, 804 .lock = fuse_file_lock,
805 .sendfile = generic_file_sendfile, 805 .splice_read = generic_file_splice_read,
806}; 806};
807 807
808static const struct file_operations fuse_direct_io_file_operations = { 808static const struct file_operations fuse_direct_io_file_operations = {
@@ -814,7 +814,7 @@ static const struct file_operations fuse_direct_io_file_operations = {
814 .release = fuse_release, 814 .release = fuse_release,
815 .fsync = fuse_fsync, 815 .fsync = fuse_fsync,
816 .lock = fuse_file_lock, 816 .lock = fuse_file_lock,
817 /* no mmap and sendfile */ 817 /* no mmap and splice_read */
818}; 818};
819 819
820static const struct address_space_operations fuse_file_aops = { 820static const struct address_space_operations fuse_file_aops = {
diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile
index e3f1ada643ac..04ad0caebedb 100644
--- a/fs/gfs2/Makefile
+++ b/fs/gfs2/Makefile
@@ -1,7 +1,7 @@
1obj-$(CONFIG_GFS2_FS) += gfs2.o 1obj-$(CONFIG_GFS2_FS) += gfs2.o
2gfs2-y := acl.o bmap.o daemon.o dir.o eaops.o eattr.o glock.o \ 2gfs2-y := acl.o bmap.o daemon.o dir.o eaops.o eattr.o glock.o \
3 glops.o inode.o lm.o log.o lops.o locking.o main.o meta_io.o \ 3 glops.o inode.o lm.o log.o lops.o locking.o main.o meta_io.o \
4 mount.o ondisk.o ops_address.o ops_dentry.o ops_export.o ops_file.o \ 4 mount.o ops_address.o ops_dentry.o ops_export.o ops_file.o \
5 ops_fstype.o ops_inode.o ops_super.o ops_vm.o quota.o \ 5 ops_fstype.o ops_inode.o ops_super.o ops_vm.o quota.o \
6 recovery.o rgrp.o super.o sys.o trans.o util.o 6 recovery.o rgrp.o super.o sys.o trans.o util.o
7 7
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index c53a5d2d0590..cd805a66880d 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -718,7 +718,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
718 for (x = 0; x < rlist.rl_rgrps; x++) { 718 for (x = 0; x < rlist.rl_rgrps; x++) {
719 struct gfs2_rgrpd *rgd; 719 struct gfs2_rgrpd *rgd;
720 rgd = rlist.rl_ghs[x].gh_gl->gl_object; 720 rgd = rlist.rl_ghs[x].gh_gl->gl_object;
721 rg_blocks += rgd->rd_ri.ri_length; 721 rg_blocks += rgd->rd_length;
722 } 722 }
723 723
724 error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs); 724 error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
@@ -772,7 +772,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
772 gfs2_free_data(ip, bstart, blen); 772 gfs2_free_data(ip, bstart, blen);
773 } 773 }
774 774
775 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; 775 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
776 776
777 gfs2_dinode_out(ip, dibh->b_data); 777 gfs2_dinode_out(ip, dibh->b_data);
778 778
@@ -824,7 +824,7 @@ static int do_grow(struct gfs2_inode *ip, u64 size)
824 goto out_gunlock_q; 824 goto out_gunlock_q;
825 825
826 error = gfs2_trans_begin(sdp, 826 error = gfs2_trans_begin(sdp,
827 sdp->sd_max_height + al->al_rgd->rd_ri.ri_length + 827 sdp->sd_max_height + al->al_rgd->rd_length +
828 RES_JDATA + RES_DINODE + RES_STATFS + RES_QUOTA, 0); 828 RES_JDATA + RES_DINODE + RES_STATFS + RES_QUOTA, 0);
829 if (error) 829 if (error)
830 goto out_ipres; 830 goto out_ipres;
@@ -847,7 +847,7 @@ static int do_grow(struct gfs2_inode *ip, u64 size)
847 } 847 }
848 848
849 ip->i_di.di_size = size; 849 ip->i_di.di_size = size;
850 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; 850 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
851 851
852 error = gfs2_meta_inode_buffer(ip, &dibh); 852 error = gfs2_meta_inode_buffer(ip, &dibh);
853 if (error) 853 if (error)
@@ -885,7 +885,6 @@ static int gfs2_block_truncate_page(struct address_space *mapping)
885 unsigned blocksize, iblock, length, pos; 885 unsigned blocksize, iblock, length, pos;
886 struct buffer_head *bh; 886 struct buffer_head *bh;
887 struct page *page; 887 struct page *page;
888 void *kaddr;
889 int err; 888 int err;
890 889
891 page = grab_cache_page(mapping, index); 890 page = grab_cache_page(mapping, index);
@@ -928,15 +927,13 @@ static int gfs2_block_truncate_page(struct address_space *mapping)
928 /* Uhhuh. Read error. Complain and punt. */ 927 /* Uhhuh. Read error. Complain and punt. */
929 if (!buffer_uptodate(bh)) 928 if (!buffer_uptodate(bh))
930 goto unlock; 929 goto unlock;
930 err = 0;
931 } 931 }
932 932
933 if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) 933 if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip))
934 gfs2_trans_add_bh(ip->i_gl, bh, 0); 934 gfs2_trans_add_bh(ip->i_gl, bh, 0);
935 935
936 kaddr = kmap_atomic(page, KM_USER0); 936 zero_user_page(page, offset, length, KM_USER0);
937 memset(kaddr + offset, 0, length);
938 flush_dcache_page(page);
939 kunmap_atomic(kaddr, KM_USER0);
940 937
941unlock: 938unlock:
942 unlock_page(page); 939 unlock_page(page);
@@ -962,7 +959,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size)
962 959
963 if (gfs2_is_stuffed(ip)) { 960 if (gfs2_is_stuffed(ip)) {
964 ip->i_di.di_size = size; 961 ip->i_di.di_size = size;
965 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; 962 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
966 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 963 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
967 gfs2_dinode_out(ip, dibh->b_data); 964 gfs2_dinode_out(ip, dibh->b_data);
968 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + size); 965 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + size);
@@ -974,7 +971,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size)
974 971
975 if (!error) { 972 if (!error) {
976 ip->i_di.di_size = size; 973 ip->i_di.di_size = size;
977 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; 974 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
978 ip->i_di.di_flags |= GFS2_DIF_TRUNC_IN_PROG; 975 ip->i_di.di_flags |= GFS2_DIF_TRUNC_IN_PROG;
979 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 976 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
980 gfs2_dinode_out(ip, dibh->b_data); 977 gfs2_dinode_out(ip, dibh->b_data);
@@ -1044,10 +1041,10 @@ static int trunc_end(struct gfs2_inode *ip)
1044 ip->i_di.di_height = 0; 1041 ip->i_di.di_height = 0;
1045 ip->i_di.di_goal_meta = 1042 ip->i_di.di_goal_meta =
1046 ip->i_di.di_goal_data = 1043 ip->i_di.di_goal_data =
1047 ip->i_num.no_addr; 1044 ip->i_no_addr;
1048 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); 1045 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
1049 } 1046 }
1050 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; 1047 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
1051 ip->i_di.di_flags &= ~GFS2_DIF_TRUNC_IN_PROG; 1048 ip->i_di.di_flags &= ~GFS2_DIF_TRUNC_IN_PROG;
1052 1049
1053 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1050 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
diff --git a/fs/gfs2/daemon.c b/fs/gfs2/daemon.c
index 683cb5bda870..3548d9f31e0d 100644
--- a/fs/gfs2/daemon.c
+++ b/fs/gfs2/daemon.c
@@ -16,6 +16,7 @@
16#include <linux/delay.h> 16#include <linux/delay.h>
17#include <linux/gfs2_ondisk.h> 17#include <linux/gfs2_ondisk.h>
18#include <linux/lm_interface.h> 18#include <linux/lm_interface.h>
19#include <linux/freezer.h>
19 20
20#include "gfs2.h" 21#include "gfs2.h"
21#include "incore.h" 22#include "incore.h"
@@ -49,6 +50,8 @@ int gfs2_scand(void *data)
49 while (!kthread_should_stop()) { 50 while (!kthread_should_stop()) {
50 gfs2_scand_internal(sdp); 51 gfs2_scand_internal(sdp);
51 t = gfs2_tune_get(sdp, gt_scand_secs) * HZ; 52 t = gfs2_tune_get(sdp, gt_scand_secs) * HZ;
53 if (freezing(current))
54 refrigerator();
52 schedule_timeout_interruptible(t); 55 schedule_timeout_interruptible(t);
53 } 56 }
54 57
@@ -74,6 +77,8 @@ int gfs2_glockd(void *data)
74 wait_event_interruptible(sdp->sd_reclaim_wq, 77 wait_event_interruptible(sdp->sd_reclaim_wq,
75 (atomic_read(&sdp->sd_reclaim_count) || 78 (atomic_read(&sdp->sd_reclaim_count) ||
76 kthread_should_stop())); 79 kthread_should_stop()));
80 if (freezing(current))
81 refrigerator();
77 } 82 }
78 83
79 return 0; 84 return 0;
@@ -93,6 +98,8 @@ int gfs2_recoverd(void *data)
93 while (!kthread_should_stop()) { 98 while (!kthread_should_stop()) {
94 gfs2_check_journals(sdp); 99 gfs2_check_journals(sdp);
95 t = gfs2_tune_get(sdp, gt_recoverd_secs) * HZ; 100 t = gfs2_tune_get(sdp, gt_recoverd_secs) * HZ;
101 if (freezing(current))
102 refrigerator();
96 schedule_timeout_interruptible(t); 103 schedule_timeout_interruptible(t);
97 } 104 }
98 105
@@ -141,6 +148,8 @@ int gfs2_logd(void *data)
141 } 148 }
142 149
143 t = gfs2_tune_get(sdp, gt_logd_secs) * HZ; 150 t = gfs2_tune_get(sdp, gt_logd_secs) * HZ;
151 if (freezing(current))
152 refrigerator();
144 schedule_timeout_interruptible(t); 153 schedule_timeout_interruptible(t);
145 } 154 }
146 155
@@ -191,6 +200,8 @@ int gfs2_quotad(void *data)
191 gfs2_quota_scan(sdp); 200 gfs2_quota_scan(sdp);
192 201
193 t = gfs2_tune_get(sdp, gt_quotad_secs) * HZ; 202 t = gfs2_tune_get(sdp, gt_quotad_secs) * HZ;
203 if (freezing(current))
204 refrigerator();
194 schedule_timeout_interruptible(t); 205 schedule_timeout_interruptible(t);
195 } 206 }
196 207
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index a96fa07b3f3b..2beb2f401aa2 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -130,7 +130,7 @@ static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf,
130 memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size); 130 memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size);
131 if (ip->i_di.di_size < offset + size) 131 if (ip->i_di.di_size < offset + size)
132 ip->i_di.di_size = offset + size; 132 ip->i_di.di_size = offset + size;
133 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; 133 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
134 gfs2_dinode_out(ip, dibh->b_data); 134 gfs2_dinode_out(ip, dibh->b_data);
135 135
136 brelse(dibh); 136 brelse(dibh);
@@ -228,7 +228,7 @@ out:
228 228
229 if (ip->i_di.di_size < offset + copied) 229 if (ip->i_di.di_size < offset + copied)
230 ip->i_di.di_size = offset + copied; 230 ip->i_di.di_size = offset + copied;
231 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; 231 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
232 232
233 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 233 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
234 gfs2_dinode_out(ip, dibh->b_data); 234 gfs2_dinode_out(ip, dibh->b_data);
@@ -1456,7 +1456,7 @@ int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
1456 if (dip->i_di.di_entries != g.offset) { 1456 if (dip->i_di.di_entries != g.offset) {
1457 fs_warn(sdp, "Number of entries corrupt in dir %llu, " 1457 fs_warn(sdp, "Number of entries corrupt in dir %llu, "
1458 "ip->i_di.di_entries (%u) != g.offset (%u)\n", 1458 "ip->i_di.di_entries (%u) != g.offset (%u)\n",
1459 (unsigned long long)dip->i_num.no_addr, 1459 (unsigned long long)dip->i_no_addr,
1460 dip->i_di.di_entries, 1460 dip->i_di.di_entries,
1461 g.offset); 1461 g.offset);
1462 error = -EIO; 1462 error = -EIO;
@@ -1488,24 +1488,55 @@ out:
1488 * Returns: errno 1488 * Returns: errno
1489 */ 1489 */
1490 1490
1491int gfs2_dir_search(struct inode *dir, const struct qstr *name, 1491struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *name)
1492 struct gfs2_inum_host *inum, unsigned int *type)
1493{ 1492{
1494 struct buffer_head *bh; 1493 struct buffer_head *bh;
1495 struct gfs2_dirent *dent; 1494 struct gfs2_dirent *dent;
1495 struct inode *inode;
1496
1497 dent = gfs2_dirent_search(dir, name, gfs2_dirent_find, &bh);
1498 if (dent) {
1499 if (IS_ERR(dent))
1500 return ERR_PTR(PTR_ERR(dent));
1501 inode = gfs2_inode_lookup(dir->i_sb,
1502 be16_to_cpu(dent->de_type),
1503 be64_to_cpu(dent->de_inum.no_addr),
1504 be64_to_cpu(dent->de_inum.no_formal_ino));
1505 brelse(bh);
1506 return inode;
1507 }
1508 return ERR_PTR(-ENOENT);
1509}
1510
1511int gfs2_dir_check(struct inode *dir, const struct qstr *name,
1512 const struct gfs2_inode *ip)
1513{
1514 struct buffer_head *bh;
1515 struct gfs2_dirent *dent;
1516 int ret = -ENOENT;
1496 1517
1497 dent = gfs2_dirent_search(dir, name, gfs2_dirent_find, &bh); 1518 dent = gfs2_dirent_search(dir, name, gfs2_dirent_find, &bh);
1498 if (dent) { 1519 if (dent) {
1499 if (IS_ERR(dent)) 1520 if (IS_ERR(dent))
1500 return PTR_ERR(dent); 1521 return PTR_ERR(dent);
1501 if (inum) 1522 if (ip) {
1502 gfs2_inum_in(inum, (char *)&dent->de_inum); 1523 if (be64_to_cpu(dent->de_inum.no_addr) != ip->i_no_addr)
1503 if (type) 1524 goto out;
1504 *type = be16_to_cpu(dent->de_type); 1525 if (be64_to_cpu(dent->de_inum.no_formal_ino) !=
1526 ip->i_no_formal_ino)
1527 goto out;
1528 if (unlikely(IF2DT(ip->i_inode.i_mode) !=
1529 be16_to_cpu(dent->de_type))) {
1530 gfs2_consist_inode(GFS2_I(dir));
1531 ret = -EIO;
1532 goto out;
1533 }
1534 }
1535 ret = 0;
1536out:
1505 brelse(bh); 1537 brelse(bh);
1506 return 0;
1507 } 1538 }
1508 return -ENOENT; 1539 return ret;
1509} 1540}
1510 1541
1511static int dir_new_leaf(struct inode *inode, const struct qstr *name) 1542static int dir_new_leaf(struct inode *inode, const struct qstr *name)
@@ -1565,7 +1596,7 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name)
1565 */ 1596 */
1566 1597
1567int gfs2_dir_add(struct inode *inode, const struct qstr *name, 1598int gfs2_dir_add(struct inode *inode, const struct qstr *name,
1568 const struct gfs2_inum_host *inum, unsigned type) 1599 const struct gfs2_inode *nip, unsigned type)
1569{ 1600{
1570 struct gfs2_inode *ip = GFS2_I(inode); 1601 struct gfs2_inode *ip = GFS2_I(inode);
1571 struct buffer_head *bh; 1602 struct buffer_head *bh;
@@ -1580,7 +1611,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
1580 if (IS_ERR(dent)) 1611 if (IS_ERR(dent))
1581 return PTR_ERR(dent); 1612 return PTR_ERR(dent);
1582 dent = gfs2_init_dirent(inode, dent, name, bh); 1613 dent = gfs2_init_dirent(inode, dent, name, bh);
1583 gfs2_inum_out(inum, (char *)&dent->de_inum); 1614 gfs2_inum_out(nip, dent);
1584 dent->de_type = cpu_to_be16(type); 1615 dent->de_type = cpu_to_be16(type);
1585 if (ip->i_di.di_flags & GFS2_DIF_EXHASH) { 1616 if (ip->i_di.di_flags & GFS2_DIF_EXHASH) {
1586 leaf = (struct gfs2_leaf *)bh->b_data; 1617 leaf = (struct gfs2_leaf *)bh->b_data;
@@ -1592,7 +1623,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
1592 break; 1623 break;
1593 gfs2_trans_add_bh(ip->i_gl, bh, 1); 1624 gfs2_trans_add_bh(ip->i_gl, bh, 1);
1594 ip->i_di.di_entries++; 1625 ip->i_di.di_entries++;
1595 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; 1626 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
1596 gfs2_dinode_out(ip, bh->b_data); 1627 gfs2_dinode_out(ip, bh->b_data);
1597 brelse(bh); 1628 brelse(bh);
1598 error = 0; 1629 error = 0;
@@ -1678,7 +1709,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name)
1678 gfs2_consist_inode(dip); 1709 gfs2_consist_inode(dip);
1679 gfs2_trans_add_bh(dip->i_gl, bh, 1); 1710 gfs2_trans_add_bh(dip->i_gl, bh, 1);
1680 dip->i_di.di_entries--; 1711 dip->i_di.di_entries--;
1681 dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME_SEC; 1712 dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME;
1682 gfs2_dinode_out(dip, bh->b_data); 1713 gfs2_dinode_out(dip, bh->b_data);
1683 brelse(bh); 1714 brelse(bh);
1684 mark_inode_dirty(&dip->i_inode); 1715 mark_inode_dirty(&dip->i_inode);
@@ -1700,7 +1731,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name)
1700 */ 1731 */
1701 1732
1702int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, 1733int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
1703 struct gfs2_inum_host *inum, unsigned int new_type) 1734 const struct gfs2_inode *nip, unsigned int new_type)
1704{ 1735{
1705 struct buffer_head *bh; 1736 struct buffer_head *bh;
1706 struct gfs2_dirent *dent; 1737 struct gfs2_dirent *dent;
@@ -1715,7 +1746,7 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
1715 return PTR_ERR(dent); 1746 return PTR_ERR(dent);
1716 1747
1717 gfs2_trans_add_bh(dip->i_gl, bh, 1); 1748 gfs2_trans_add_bh(dip->i_gl, bh, 1);
1718 gfs2_inum_out(inum, (char *)&dent->de_inum); 1749 gfs2_inum_out(nip, dent);
1719 dent->de_type = cpu_to_be16(new_type); 1750 dent->de_type = cpu_to_be16(new_type);
1720 1751
1721 if (dip->i_di.di_flags & GFS2_DIF_EXHASH) { 1752 if (dip->i_di.di_flags & GFS2_DIF_EXHASH) {
@@ -1726,7 +1757,7 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
1726 gfs2_trans_add_bh(dip->i_gl, bh, 1); 1757 gfs2_trans_add_bh(dip->i_gl, bh, 1);
1727 } 1758 }
1728 1759
1729 dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME_SEC; 1760 dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME;
1730 gfs2_dinode_out(dip, bh->b_data); 1761 gfs2_dinode_out(dip, bh->b_data);
1731 brelse(bh); 1762 brelse(bh);
1732 return 0; 1763 return 0;
@@ -1867,7 +1898,7 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
1867 for (x = 0; x < rlist.rl_rgrps; x++) { 1898 for (x = 0; x < rlist.rl_rgrps; x++) {
1868 struct gfs2_rgrpd *rgd; 1899 struct gfs2_rgrpd *rgd;
1869 rgd = rlist.rl_ghs[x].gh_gl->gl_object; 1900 rgd = rlist.rl_ghs[x].gh_gl->gl_object;
1870 rg_blocks += rgd->rd_ri.ri_length; 1901 rg_blocks += rgd->rd_length;
1871 } 1902 }
1872 1903
1873 error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs); 1904 error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h
index 48fe89046bba..8a468cac9328 100644
--- a/fs/gfs2/dir.h
+++ b/fs/gfs2/dir.h
@@ -16,15 +16,16 @@ struct inode;
16struct gfs2_inode; 16struct gfs2_inode;
17struct gfs2_inum; 17struct gfs2_inum;
18 18
19int gfs2_dir_search(struct inode *dir, const struct qstr *filename, 19struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *filename);
20 struct gfs2_inum_host *inum, unsigned int *type); 20int gfs2_dir_check(struct inode *dir, const struct qstr *filename,
21 const struct gfs2_inode *ip);
21int gfs2_dir_add(struct inode *inode, const struct qstr *filename, 22int gfs2_dir_add(struct inode *inode, const struct qstr *filename,
22 const struct gfs2_inum_host *inum, unsigned int type); 23 const struct gfs2_inode *ip, unsigned int type);
23int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename); 24int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename);
24int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, 25int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
25 filldir_t filldir); 26 filldir_t filldir);
26int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, 27int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
27 struct gfs2_inum_host *new_inum, unsigned int new_type); 28 const struct gfs2_inode *nip, unsigned int new_type);
28 29
29int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip); 30int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip);
30 31
diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c
index 5b83ca6acab1..2a7435b5c4dc 100644
--- a/fs/gfs2/eattr.c
+++ b/fs/gfs2/eattr.c
@@ -254,7 +254,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
254 if (error) 254 if (error)
255 return error; 255 return error;
256 256
257 error = gfs2_trans_begin(sdp, rgd->rd_ri.ri_length + RES_DINODE + 257 error = gfs2_trans_begin(sdp, rgd->rd_length + RES_DINODE +
258 RES_EATTR + RES_STATFS + RES_QUOTA, blks); 258 RES_EATTR + RES_STATFS + RES_QUOTA, blks);
259 if (error) 259 if (error)
260 goto out_gunlock; 260 goto out_gunlock;
@@ -300,7 +300,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
300 300
301 error = gfs2_meta_inode_buffer(ip, &dibh); 301 error = gfs2_meta_inode_buffer(ip, &dibh);
302 if (!error) { 302 if (!error) {
303 ip->i_inode.i_ctime = CURRENT_TIME_SEC; 303 ip->i_inode.i_ctime = CURRENT_TIME;
304 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 304 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
305 gfs2_dinode_out(ip, dibh->b_data); 305 gfs2_dinode_out(ip, dibh->b_data);
306 brelse(dibh); 306 brelse(dibh);
@@ -700,7 +700,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
700 goto out_gunlock_q; 700 goto out_gunlock_q;
701 701
702 error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), 702 error = gfs2_trans_begin(GFS2_SB(&ip->i_inode),
703 blks + al->al_rgd->rd_ri.ri_length + 703 blks + al->al_rgd->rd_length +
704 RES_DINODE + RES_STATFS + RES_QUOTA, 0); 704 RES_DINODE + RES_STATFS + RES_QUOTA, 0);
705 if (error) 705 if (error)
706 goto out_ipres; 706 goto out_ipres;
@@ -717,7 +717,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
717 (er->er_mode & S_IFMT)); 717 (er->er_mode & S_IFMT));
718 ip->i_inode.i_mode = er->er_mode; 718 ip->i_inode.i_mode = er->er_mode;
719 } 719 }
720 ip->i_inode.i_ctime = CURRENT_TIME_SEC; 720 ip->i_inode.i_ctime = CURRENT_TIME;
721 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 721 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
722 gfs2_dinode_out(ip, dibh->b_data); 722 gfs2_dinode_out(ip, dibh->b_data);
723 brelse(dibh); 723 brelse(dibh);
@@ -852,7 +852,7 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh,
852 (ip->i_inode.i_mode & S_IFMT) == (er->er_mode & S_IFMT)); 852 (ip->i_inode.i_mode & S_IFMT) == (er->er_mode & S_IFMT));
853 ip->i_inode.i_mode = er->er_mode; 853 ip->i_inode.i_mode = er->er_mode;
854 } 854 }
855 ip->i_inode.i_ctime = CURRENT_TIME_SEC; 855 ip->i_inode.i_ctime = CURRENT_TIME;
856 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 856 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
857 gfs2_dinode_out(ip, dibh->b_data); 857 gfs2_dinode_out(ip, dibh->b_data);
858 brelse(dibh); 858 brelse(dibh);
@@ -1133,7 +1133,7 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
1133 1133
1134 error = gfs2_meta_inode_buffer(ip, &dibh); 1134 error = gfs2_meta_inode_buffer(ip, &dibh);
1135 if (!error) { 1135 if (!error) {
1136 ip->i_inode.i_ctime = CURRENT_TIME_SEC; 1136 ip->i_inode.i_ctime = CURRENT_TIME;
1137 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1137 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1138 gfs2_dinode_out(ip, dibh->b_data); 1138 gfs2_dinode_out(ip, dibh->b_data);
1139 brelse(dibh); 1139 brelse(dibh);
@@ -1352,7 +1352,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip)
1352 for (x = 0; x < rlist.rl_rgrps; x++) { 1352 for (x = 0; x < rlist.rl_rgrps; x++) {
1353 struct gfs2_rgrpd *rgd; 1353 struct gfs2_rgrpd *rgd;
1354 rgd = rlist.rl_ghs[x].gh_gl->gl_object; 1354 rgd = rlist.rl_ghs[x].gh_gl->gl_object;
1355 rg_blocks += rgd->rd_ri.ri_length; 1355 rg_blocks += rgd->rd_length;
1356 } 1356 }
1357 1357
1358 error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs); 1358 error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 1815429a2978..3f0974e1afef 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -422,11 +422,11 @@ void gfs2_holder_uninit(struct gfs2_holder *gh)
422static void gfs2_holder_wake(struct gfs2_holder *gh) 422static void gfs2_holder_wake(struct gfs2_holder *gh)
423{ 423{
424 clear_bit(HIF_WAIT, &gh->gh_iflags); 424 clear_bit(HIF_WAIT, &gh->gh_iflags);
425 smp_mb(); 425 smp_mb__after_clear_bit();
426 wake_up_bit(&gh->gh_iflags, HIF_WAIT); 426 wake_up_bit(&gh->gh_iflags, HIF_WAIT);
427} 427}
428 428
429static int holder_wait(void *word) 429static int just_schedule(void *word)
430{ 430{
431 schedule(); 431 schedule();
432 return 0; 432 return 0;
@@ -435,7 +435,20 @@ static int holder_wait(void *word)
435static void wait_on_holder(struct gfs2_holder *gh) 435static void wait_on_holder(struct gfs2_holder *gh)
436{ 436{
437 might_sleep(); 437 might_sleep();
438 wait_on_bit(&gh->gh_iflags, HIF_WAIT, holder_wait, TASK_UNINTERRUPTIBLE); 438 wait_on_bit(&gh->gh_iflags, HIF_WAIT, just_schedule, TASK_UNINTERRUPTIBLE);
439}
440
441static void gfs2_demote_wake(struct gfs2_glock *gl)
442{
443 clear_bit(GLF_DEMOTE, &gl->gl_flags);
444 smp_mb__after_clear_bit();
445 wake_up_bit(&gl->gl_flags, GLF_DEMOTE);
446}
447
448static void wait_on_demote(struct gfs2_glock *gl)
449{
450 might_sleep();
451 wait_on_bit(&gl->gl_flags, GLF_DEMOTE, just_schedule, TASK_UNINTERRUPTIBLE);
439} 452}
440 453
441/** 454/**
@@ -528,7 +541,7 @@ static int rq_demote(struct gfs2_glock *gl)
528 541
529 if (gl->gl_state == gl->gl_demote_state || 542 if (gl->gl_state == gl->gl_demote_state ||
530 gl->gl_state == LM_ST_UNLOCKED) { 543 gl->gl_state == LM_ST_UNLOCKED) {
531 clear_bit(GLF_DEMOTE, &gl->gl_flags); 544 gfs2_demote_wake(gl);
532 return 0; 545 return 0;
533 } 546 }
534 set_bit(GLF_LOCK, &gl->gl_flags); 547 set_bit(GLF_LOCK, &gl->gl_flags);
@@ -666,12 +679,22 @@ static void gfs2_glmutex_unlock(struct gfs2_glock *gl)
666 * practise: LM_ST_SHARED and LM_ST_UNLOCKED 679 * practise: LM_ST_SHARED and LM_ST_UNLOCKED
667 */ 680 */
668 681
669static void handle_callback(struct gfs2_glock *gl, unsigned int state) 682static void handle_callback(struct gfs2_glock *gl, unsigned int state, int remote)
670{ 683{
671 spin_lock(&gl->gl_spin); 684 spin_lock(&gl->gl_spin);
672 if (test_and_set_bit(GLF_DEMOTE, &gl->gl_flags) == 0) { 685 if (test_and_set_bit(GLF_DEMOTE, &gl->gl_flags) == 0) {
673 gl->gl_demote_state = state; 686 gl->gl_demote_state = state;
674 gl->gl_demote_time = jiffies; 687 gl->gl_demote_time = jiffies;
688 if (remote && gl->gl_ops->go_type == LM_TYPE_IOPEN &&
689 gl->gl_object) {
690 struct inode *inode = igrab(gl->gl_object);
691 spin_unlock(&gl->gl_spin);
692 if (inode) {
693 d_prune_aliases(inode);
694 iput(inode);
695 }
696 return;
697 }
675 } else if (gl->gl_demote_state != LM_ST_UNLOCKED) { 698 } else if (gl->gl_demote_state != LM_ST_UNLOCKED) {
676 gl->gl_demote_state = state; 699 gl->gl_demote_state = state;
677 } 700 }
@@ -740,7 +763,7 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret)
740 if (ret & LM_OUT_CANCELED) 763 if (ret & LM_OUT_CANCELED)
741 op_done = 0; 764 op_done = 0;
742 else 765 else
743 clear_bit(GLF_DEMOTE, &gl->gl_flags); 766 gfs2_demote_wake(gl);
744 } else { 767 } else {
745 spin_lock(&gl->gl_spin); 768 spin_lock(&gl->gl_spin);
746 list_del_init(&gh->gh_list); 769 list_del_init(&gh->gh_list);
@@ -848,7 +871,7 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret)
848 gfs2_assert_warn(sdp, !ret); 871 gfs2_assert_warn(sdp, !ret);
849 872
850 state_change(gl, LM_ST_UNLOCKED); 873 state_change(gl, LM_ST_UNLOCKED);
851 clear_bit(GLF_DEMOTE, &gl->gl_flags); 874 gfs2_demote_wake(gl);
852 875
853 if (glops->go_inval) 876 if (glops->go_inval)
854 glops->go_inval(gl, DIO_METADATA); 877 glops->go_inval(gl, DIO_METADATA);
@@ -1174,7 +1197,7 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
1174 const struct gfs2_glock_operations *glops = gl->gl_ops; 1197 const struct gfs2_glock_operations *glops = gl->gl_ops;
1175 1198
1176 if (gh->gh_flags & GL_NOCACHE) 1199 if (gh->gh_flags & GL_NOCACHE)
1177 handle_callback(gl, LM_ST_UNLOCKED); 1200 handle_callback(gl, LM_ST_UNLOCKED, 0);
1178 1201
1179 gfs2_glmutex_lock(gl); 1202 gfs2_glmutex_lock(gl);
1180 1203
@@ -1196,6 +1219,13 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
1196 spin_unlock(&gl->gl_spin); 1219 spin_unlock(&gl->gl_spin);
1197} 1220}
1198 1221
1222void gfs2_glock_dq_wait(struct gfs2_holder *gh)
1223{
1224 struct gfs2_glock *gl = gh->gh_gl;
1225 gfs2_glock_dq(gh);
1226 wait_on_demote(gl);
1227}
1228
1199/** 1229/**
1200 * gfs2_glock_dq_uninit - dequeue a holder from a glock and initialize it 1230 * gfs2_glock_dq_uninit - dequeue a holder from a glock and initialize it
1201 * @gh: the holder structure 1231 * @gh: the holder structure
@@ -1297,10 +1327,6 @@ static int nq_m_sync(unsigned int num_gh, struct gfs2_holder *ghs,
1297 * @num_gh: the number of structures 1327 * @num_gh: the number of structures
1298 * @ghs: an array of struct gfs2_holder structures 1328 * @ghs: an array of struct gfs2_holder structures
1299 * 1329 *
1300 * Figure out how big an impact this function has. Either:
1301 * 1) Replace this code with code that calls gfs2_glock_prefetch()
1302 * 2) Forget async stuff and just call nq_m_sync()
1303 * 3) Leave it like it is
1304 * 1330 *
1305 * Returns: 0 on success (all glocks acquired), 1331 * Returns: 0 on success (all glocks acquired),
1306 * errno on failure (no glocks acquired) 1332 * errno on failure (no glocks acquired)
@@ -1308,62 +1334,28 @@ static int nq_m_sync(unsigned int num_gh, struct gfs2_holder *ghs,
1308 1334
1309int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs) 1335int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs)
1310{ 1336{
1311 int *e; 1337 struct gfs2_holder *tmp[4];
1312 unsigned int x; 1338 struct gfs2_holder **pph = tmp;
1313 int borked = 0, serious = 0;
1314 int error = 0; 1339 int error = 0;
1315 1340
1316 if (!num_gh) 1341 switch(num_gh) {
1342 case 0:
1317 return 0; 1343 return 0;
1318 1344 case 1:
1319 if (num_gh == 1) {
1320 ghs->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC); 1345 ghs->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
1321 return gfs2_glock_nq(ghs); 1346 return gfs2_glock_nq(ghs);
1322 } 1347 default:
1323 1348 if (num_gh <= 4)
1324 e = kcalloc(num_gh, sizeof(struct gfs2_holder *), GFP_KERNEL);
1325 if (!e)
1326 return -ENOMEM;
1327
1328 for (x = 0; x < num_gh; x++) {
1329 ghs[x].gh_flags |= LM_FLAG_TRY | GL_ASYNC;
1330 error = gfs2_glock_nq(&ghs[x]);
1331 if (error) {
1332 borked = 1;
1333 serious = error;
1334 num_gh = x;
1335 break; 1349 break;
1336 } 1350 pph = kmalloc(num_gh * sizeof(struct gfs2_holder *), GFP_NOFS);
1337 } 1351 if (!pph)
1338 1352 return -ENOMEM;
1339 for (x = 0; x < num_gh; x++) {
1340 error = e[x] = glock_wait_internal(&ghs[x]);
1341 if (error) {
1342 borked = 1;
1343 if (error != GLR_TRYFAILED && error != GLR_CANCELED)
1344 serious = error;
1345 }
1346 } 1353 }
1347 1354
1348 if (!borked) { 1355 error = nq_m_sync(num_gh, ghs, pph);
1349 kfree(e);
1350 return 0;
1351 }
1352
1353 for (x = 0; x < num_gh; x++)
1354 if (!e[x])
1355 gfs2_glock_dq(&ghs[x]);
1356
1357 if (serious)
1358 error = serious;
1359 else {
1360 for (x = 0; x < num_gh; x++)
1361 gfs2_holder_reinit(ghs[x].gh_state, ghs[x].gh_flags,
1362 &ghs[x]);
1363 error = nq_m_sync(num_gh, ghs, (struct gfs2_holder **)e);
1364 }
1365 1356
1366 kfree(e); 1357 if (pph != tmp)
1358 kfree(pph);
1367 1359
1368 return error; 1360 return error;
1369} 1361}
@@ -1456,7 +1448,7 @@ static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name,
1456 if (!gl) 1448 if (!gl)
1457 return; 1449 return;
1458 1450
1459 handle_callback(gl, state); 1451 handle_callback(gl, state, 1);
1460 1452
1461 spin_lock(&gl->gl_spin); 1453 spin_lock(&gl->gl_spin);
1462 run_queue(gl); 1454 run_queue(gl);
@@ -1596,7 +1588,7 @@ void gfs2_reclaim_glock(struct gfs2_sbd *sdp)
1596 if (gfs2_glmutex_trylock(gl)) { 1588 if (gfs2_glmutex_trylock(gl)) {
1597 if (list_empty(&gl->gl_holders) && 1589 if (list_empty(&gl->gl_holders) &&
1598 gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl)) 1590 gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl))
1599 handle_callback(gl, LM_ST_UNLOCKED); 1591 handle_callback(gl, LM_ST_UNLOCKED, 0);
1600 gfs2_glmutex_unlock(gl); 1592 gfs2_glmutex_unlock(gl);
1601 } 1593 }
1602 1594
@@ -1709,7 +1701,7 @@ static void clear_glock(struct gfs2_glock *gl)
1709 if (gfs2_glmutex_trylock(gl)) { 1701 if (gfs2_glmutex_trylock(gl)) {
1710 if (list_empty(&gl->gl_holders) && 1702 if (list_empty(&gl->gl_holders) &&
1711 gl->gl_state != LM_ST_UNLOCKED) 1703 gl->gl_state != LM_ST_UNLOCKED)
1712 handle_callback(gl, LM_ST_UNLOCKED); 1704 handle_callback(gl, LM_ST_UNLOCKED, 0);
1713 gfs2_glmutex_unlock(gl); 1705 gfs2_glmutex_unlock(gl);
1714 } 1706 }
1715} 1707}
@@ -1823,7 +1815,8 @@ static int dump_inode(struct glock_iter *gi, struct gfs2_inode *ip)
1823 1815
1824 print_dbg(gi, " Inode:\n"); 1816 print_dbg(gi, " Inode:\n");
1825 print_dbg(gi, " num = %llu/%llu\n", 1817 print_dbg(gi, " num = %llu/%llu\n",
1826 ip->i_num.no_formal_ino, ip->i_num.no_addr); 1818 (unsigned long long)ip->i_no_formal_ino,
1819 (unsigned long long)ip->i_no_addr);
1827 print_dbg(gi, " type = %u\n", IF2DT(ip->i_inode.i_mode)); 1820 print_dbg(gi, " type = %u\n", IF2DT(ip->i_inode.i_mode));
1828 print_dbg(gi, " i_flags ="); 1821 print_dbg(gi, " i_flags =");
1829 for (x = 0; x < 32; x++) 1822 for (x = 0; x < 32; x++)
@@ -1909,8 +1902,8 @@ static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl)
1909 } 1902 }
1910 if (test_bit(GLF_DEMOTE, &gl->gl_flags)) { 1903 if (test_bit(GLF_DEMOTE, &gl->gl_flags)) {
1911 print_dbg(gi, " Demotion req to state %u (%llu uS ago)\n", 1904 print_dbg(gi, " Demotion req to state %u (%llu uS ago)\n",
1912 gl->gl_demote_state, 1905 gl->gl_demote_state, (unsigned long long)
1913 (u64)(jiffies - gl->gl_demote_time)*(1000000/HZ)); 1906 (jiffies - gl->gl_demote_time)*(1000000/HZ));
1914 } 1907 }
1915 if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object) { 1908 if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object) {
1916 if (!test_bit(GLF_LOCK, &gl->gl_flags) && 1909 if (!test_bit(GLF_LOCK, &gl->gl_flags) &&
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index b3e152db70c8..7721ca3fff9e 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -87,6 +87,7 @@ int gfs2_glock_nq(struct gfs2_holder *gh);
87int gfs2_glock_poll(struct gfs2_holder *gh); 87int gfs2_glock_poll(struct gfs2_holder *gh);
88int gfs2_glock_wait(struct gfs2_holder *gh); 88int gfs2_glock_wait(struct gfs2_holder *gh);
89void gfs2_glock_dq(struct gfs2_holder *gh); 89void gfs2_glock_dq(struct gfs2_holder *gh);
90void gfs2_glock_dq_wait(struct gfs2_holder *gh);
90 91
91void gfs2_glock_dq_uninit(struct gfs2_holder *gh); 92void gfs2_glock_dq_uninit(struct gfs2_holder *gh);
92int gfs2_glock_nq_num(struct gfs2_sbd *sdp, 93int gfs2_glock_nq_num(struct gfs2_sbd *sdp,
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 7b82657a9910..777ca46010e8 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -156,9 +156,9 @@ static void inode_go_sync(struct gfs2_glock *gl)
156 ip = NULL; 156 ip = NULL;
157 157
158 if (test_bit(GLF_DIRTY, &gl->gl_flags)) { 158 if (test_bit(GLF_DIRTY, &gl->gl_flags)) {
159 gfs2_log_flush(gl->gl_sbd, gl);
160 if (ip) 159 if (ip)
161 filemap_fdatawrite(ip->i_inode.i_mapping); 160 filemap_fdatawrite(ip->i_inode.i_mapping);
161 gfs2_log_flush(gl->gl_sbd, gl);
162 gfs2_meta_sync(gl); 162 gfs2_meta_sync(gl);
163 if (ip) { 163 if (ip) {
164 struct address_space *mapping = ip->i_inode.i_mapping; 164 struct address_space *mapping = ip->i_inode.i_mapping;
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index d995441373ab..170ba93829c0 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -28,6 +28,14 @@ struct gfs2_sbd;
28 28
29typedef void (*gfs2_glop_bh_t) (struct gfs2_glock *gl, unsigned int ret); 29typedef void (*gfs2_glop_bh_t) (struct gfs2_glock *gl, unsigned int ret);
30 30
31struct gfs2_log_header_host {
32 u64 lh_sequence; /* Sequence number of this transaction */
33 u32 lh_flags; /* GFS2_LOG_HEAD_... */
34 u32 lh_tail; /* Block number of log tail */
35 u32 lh_blkno;
36 u32 lh_hash;
37};
38
31/* 39/*
32 * Structure of operations that are associated with each 40 * Structure of operations that are associated with each
33 * type of element in the log. 41 * type of element in the log.
@@ -60,12 +68,23 @@ struct gfs2_bitmap {
60 u32 bi_len; 68 u32 bi_len;
61}; 69};
62 70
71struct gfs2_rgrp_host {
72 u32 rg_flags;
73 u32 rg_free;
74 u32 rg_dinodes;
75 u64 rg_igeneration;
76};
77
63struct gfs2_rgrpd { 78struct gfs2_rgrpd {
64 struct list_head rd_list; /* Link with superblock */ 79 struct list_head rd_list; /* Link with superblock */
65 struct list_head rd_list_mru; 80 struct list_head rd_list_mru;
66 struct list_head rd_recent; /* Recently used rgrps */ 81 struct list_head rd_recent; /* Recently used rgrps */
67 struct gfs2_glock *rd_gl; /* Glock for this rgrp */ 82 struct gfs2_glock *rd_gl; /* Glock for this rgrp */
68 struct gfs2_rindex_host rd_ri; 83 u64 rd_addr; /* grp block disk address */
84 u64 rd_data0; /* first data location */
85 u32 rd_length; /* length of rgrp header in fs blocks */
86 u32 rd_data; /* num of data blocks in rgrp */
87 u32 rd_bitbytes; /* number of bytes in data bitmaps */
69 struct gfs2_rgrp_host rd_rg; 88 struct gfs2_rgrp_host rd_rg;
70 u64 rd_rg_vn; 89 u64 rd_rg_vn;
71 struct gfs2_bitmap *rd_bits; 90 struct gfs2_bitmap *rd_bits;
@@ -76,6 +95,8 @@ struct gfs2_rgrpd {
76 u32 rd_last_alloc_data; 95 u32 rd_last_alloc_data;
77 u32 rd_last_alloc_meta; 96 u32 rd_last_alloc_meta;
78 struct gfs2_sbd *rd_sbd; 97 struct gfs2_sbd *rd_sbd;
98 unsigned long rd_flags;
99#define GFS2_RDF_CHECK 0x0001 /* Need to check for unlinked inodes */
79}; 100};
80 101
81enum gfs2_state_bits { 102enum gfs2_state_bits {
@@ -211,10 +232,24 @@ enum {
211 GIF_SW_PAGED = 3, 232 GIF_SW_PAGED = 3,
212}; 233};
213 234
235struct gfs2_dinode_host {
236 u64 di_size; /* number of bytes in file */
237 u64 di_blocks; /* number of blocks in file */
238 u64 di_goal_meta; /* rgrp to alloc from next */
239 u64 di_goal_data; /* data block goal */
240 u64 di_generation; /* generation number for NFS */
241 u32 di_flags; /* GFS2_DIF_... */
242 u16 di_height; /* height of metadata */
243 /* These only apply to directories */
244 u16 di_depth; /* Number of bits in the table */
245 u32 di_entries; /* The number of entries in the directory */
246 u64 di_eattr; /* extended attribute block number */
247};
248
214struct gfs2_inode { 249struct gfs2_inode {
215 struct inode i_inode; 250 struct inode i_inode;
216 struct gfs2_inum_host i_num; 251 u64 i_no_addr;
217 252 u64 i_no_formal_ino;
218 unsigned long i_flags; /* GIF_... */ 253 unsigned long i_flags; /* GIF_... */
219 254
220 struct gfs2_dinode_host i_di; /* To be replaced by ref to block */ 255 struct gfs2_dinode_host i_di; /* To be replaced by ref to block */
@@ -275,14 +310,6 @@ enum {
275 QDF_LOCKED = 2, 310 QDF_LOCKED = 2,
276}; 311};
277 312
278struct gfs2_quota_lvb {
279 __be32 qb_magic;
280 u32 __pad;
281 __be64 qb_limit; /* Hard limit of # blocks to alloc */
282 __be64 qb_warn; /* Warn user when alloc is above this # */
283 __be64 qb_value; /* Current # blocks allocated */
284};
285
286struct gfs2_quota_data { 313struct gfs2_quota_data {
287 struct list_head qd_list; 314 struct list_head qd_list;
288 unsigned int qd_count; 315 unsigned int qd_count;
@@ -327,7 +354,9 @@ struct gfs2_trans {
327 354
328 unsigned int tr_num_buf; 355 unsigned int tr_num_buf;
329 unsigned int tr_num_buf_new; 356 unsigned int tr_num_buf_new;
357 unsigned int tr_num_databuf_new;
330 unsigned int tr_num_buf_rm; 358 unsigned int tr_num_buf_rm;
359 unsigned int tr_num_databuf_rm;
331 struct list_head tr_list_buf; 360 struct list_head tr_list_buf;
332 361
333 unsigned int tr_num_revoke; 362 unsigned int tr_num_revoke;
@@ -354,6 +383,12 @@ struct gfs2_jdesc {
354 unsigned int jd_blocks; 383 unsigned int jd_blocks;
355}; 384};
356 385
386struct gfs2_statfs_change_host {
387 s64 sc_total;
388 s64 sc_free;
389 s64 sc_dinodes;
390};
391
357#define GFS2_GLOCKD_DEFAULT 1 392#define GFS2_GLOCKD_DEFAULT 1
358#define GFS2_GLOCKD_MAX 16 393#define GFS2_GLOCKD_MAX 16
359 394
@@ -426,6 +461,28 @@ enum {
426 461
427#define GFS2_FSNAME_LEN 256 462#define GFS2_FSNAME_LEN 256
428 463
464struct gfs2_inum_host {
465 u64 no_formal_ino;
466 u64 no_addr;
467};
468
469struct gfs2_sb_host {
470 u32 sb_magic;
471 u32 sb_type;
472 u32 sb_format;
473
474 u32 sb_fs_format;
475 u32 sb_multihost_format;
476 u32 sb_bsize;
477 u32 sb_bsize_shift;
478
479 struct gfs2_inum_host sb_master_dir;
480 struct gfs2_inum_host sb_root_dir;
481
482 char sb_lockproto[GFS2_LOCKNAME_LEN];
483 char sb_locktable[GFS2_LOCKNAME_LEN];
484};
485
429struct gfs2_sbd { 486struct gfs2_sbd {
430 struct super_block *sd_vfs; 487 struct super_block *sd_vfs;
431 struct super_block *sd_vfs_meta; 488 struct super_block *sd_vfs_meta;
@@ -544,6 +601,7 @@ struct gfs2_sbd {
544 601
545 unsigned int sd_log_blks_reserved; 602 unsigned int sd_log_blks_reserved;
546 unsigned int sd_log_commited_buf; 603 unsigned int sd_log_commited_buf;
604 unsigned int sd_log_commited_databuf;
547 unsigned int sd_log_commited_revoke; 605 unsigned int sd_log_commited_revoke;
548 606
549 unsigned int sd_log_num_gl; 607 unsigned int sd_log_num_gl;
@@ -552,7 +610,6 @@ struct gfs2_sbd {
552 unsigned int sd_log_num_rg; 610 unsigned int sd_log_num_rg;
553 unsigned int sd_log_num_databuf; 611 unsigned int sd_log_num_databuf;
554 unsigned int sd_log_num_jdata; 612 unsigned int sd_log_num_jdata;
555 unsigned int sd_log_num_hdrs;
556 613
557 struct list_head sd_log_le_gl; 614 struct list_head sd_log_le_gl;
558 struct list_head sd_log_le_buf; 615 struct list_head sd_log_le_buf;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index df0b8b3018b9..34f7bcdea1e9 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -38,12 +38,17 @@
38#include "trans.h" 38#include "trans.h"
39#include "util.h" 39#include "util.h"
40 40
41struct gfs2_inum_range_host {
42 u64 ir_start;
43 u64 ir_length;
44};
45
41static int iget_test(struct inode *inode, void *opaque) 46static int iget_test(struct inode *inode, void *opaque)
42{ 47{
43 struct gfs2_inode *ip = GFS2_I(inode); 48 struct gfs2_inode *ip = GFS2_I(inode);
44 struct gfs2_inum_host *inum = opaque; 49 u64 *no_addr = opaque;
45 50
46 if (ip->i_num.no_addr == inum->no_addr && 51 if (ip->i_no_addr == *no_addr &&
47 inode->i_private != NULL) 52 inode->i_private != NULL)
48 return 1; 53 return 1;
49 54
@@ -53,37 +58,70 @@ static int iget_test(struct inode *inode, void *opaque)
53static int iget_set(struct inode *inode, void *opaque) 58static int iget_set(struct inode *inode, void *opaque)
54{ 59{
55 struct gfs2_inode *ip = GFS2_I(inode); 60 struct gfs2_inode *ip = GFS2_I(inode);
56 struct gfs2_inum_host *inum = opaque; 61 u64 *no_addr = opaque;
57 62
58 ip->i_num = *inum; 63 inode->i_ino = (unsigned long)*no_addr;
59 inode->i_ino = inum->no_addr; 64 ip->i_no_addr = *no_addr;
60 return 0; 65 return 0;
61} 66}
62 67
63struct inode *gfs2_ilookup(struct super_block *sb, struct gfs2_inum_host *inum) 68struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr)
69{
70 unsigned long hash = (unsigned long)no_addr;
71 return ilookup5(sb, hash, iget_test, &no_addr);
72}
73
74static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr)
64{ 75{
65 return ilookup5(sb, (unsigned long)inum->no_addr, 76 unsigned long hash = (unsigned long)no_addr;
66 iget_test, inum); 77 return iget5_locked(sb, hash, iget_test, iget_set, &no_addr);
67} 78}
68 79
69static struct inode *gfs2_iget(struct super_block *sb, struct gfs2_inum_host *inum) 80/**
81 * GFS2 lookup code fills in vfs inode contents based on info obtained
82 * from directory entry inside gfs2_inode_lookup(). This has caused issues
83 * with NFS code path since its get_dentry routine doesn't have the relevant
84 * directory entry when gfs2_inode_lookup() is invoked. Part of the code
85 * segment inside gfs2_inode_lookup code needs to get moved around.
86 *
87 * Clean up I_LOCK and I_NEW as well.
88 **/
89
90void gfs2_set_iop(struct inode *inode)
70{ 91{
71 return iget5_locked(sb, (unsigned long)inum->no_addr, 92 umode_t mode = inode->i_mode;
72 iget_test, iget_set, inum); 93
94 if (S_ISREG(mode)) {
95 inode->i_op = &gfs2_file_iops;
96 inode->i_fop = &gfs2_file_fops;
97 inode->i_mapping->a_ops = &gfs2_file_aops;
98 } else if (S_ISDIR(mode)) {
99 inode->i_op = &gfs2_dir_iops;
100 inode->i_fop = &gfs2_dir_fops;
101 } else if (S_ISLNK(mode)) {
102 inode->i_op = &gfs2_symlink_iops;
103 } else {
104 inode->i_op = &gfs2_dev_iops;
105 }
106
107 unlock_new_inode(inode);
73} 108}
74 109
75/** 110/**
76 * gfs2_inode_lookup - Lookup an inode 111 * gfs2_inode_lookup - Lookup an inode
77 * @sb: The super block 112 * @sb: The super block
78 * @inum: The inode number 113 * @no_addr: The inode number
79 * @type: The type of the inode 114 * @type: The type of the inode
80 * 115 *
81 * Returns: A VFS inode, or an error 116 * Returns: A VFS inode, or an error
82 */ 117 */
83 118
84struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum_host *inum, unsigned int type) 119struct inode *gfs2_inode_lookup(struct super_block *sb,
120 unsigned int type,
121 u64 no_addr,
122 u64 no_formal_ino)
85{ 123{
86 struct inode *inode = gfs2_iget(sb, inum); 124 struct inode *inode = gfs2_iget(sb, no_addr);
87 struct gfs2_inode *ip = GFS2_I(inode); 125 struct gfs2_inode *ip = GFS2_I(inode);
88 struct gfs2_glock *io_gl; 126 struct gfs2_glock *io_gl;
89 int error; 127 int error;
@@ -93,29 +131,15 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum_host *i
93 131
94 if (inode->i_state & I_NEW) { 132 if (inode->i_state & I_NEW) {
95 struct gfs2_sbd *sdp = GFS2_SB(inode); 133 struct gfs2_sbd *sdp = GFS2_SB(inode);
96 umode_t mode = DT2IF(type);
97 inode->i_private = ip; 134 inode->i_private = ip;
98 inode->i_mode = mode; 135 ip->i_no_formal_ino = no_formal_ino;
99
100 if (S_ISREG(mode)) {
101 inode->i_op = &gfs2_file_iops;
102 inode->i_fop = &gfs2_file_fops;
103 inode->i_mapping->a_ops = &gfs2_file_aops;
104 } else if (S_ISDIR(mode)) {
105 inode->i_op = &gfs2_dir_iops;
106 inode->i_fop = &gfs2_dir_fops;
107 } else if (S_ISLNK(mode)) {
108 inode->i_op = &gfs2_symlink_iops;
109 } else {
110 inode->i_op = &gfs2_dev_iops;
111 }
112 136
113 error = gfs2_glock_get(sdp, inum->no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl); 137 error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl);
114 if (unlikely(error)) 138 if (unlikely(error))
115 goto fail; 139 goto fail;
116 ip->i_gl->gl_object = ip; 140 ip->i_gl->gl_object = ip;
117 141
118 error = gfs2_glock_get(sdp, inum->no_addr, &gfs2_iopen_glops, CREATE, &io_gl); 142 error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl);
119 if (unlikely(error)) 143 if (unlikely(error))
120 goto fail_put; 144 goto fail_put;
121 145
@@ -123,12 +147,38 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum_host *i
123 error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); 147 error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh);
124 if (unlikely(error)) 148 if (unlikely(error))
125 goto fail_iopen; 149 goto fail_iopen;
150 ip->i_iopen_gh.gh_gl->gl_object = ip;
126 151
127 gfs2_glock_put(io_gl); 152 gfs2_glock_put(io_gl);
128 unlock_new_inode(inode); 153
154 if ((type == DT_UNKNOWN) && (no_formal_ino == 0))
155 goto gfs2_nfsbypass;
156
157 inode->i_mode = DT2IF(type);
158
159 /*
160 * We must read the inode in order to work out its type in
161 * this case. Note that this doesn't happen often as we normally
162 * know the type beforehand. This code path only occurs during
163 * unlinked inode recovery (where it is safe to do this glock,
164 * which is not true in the general case).
165 */
166 if (type == DT_UNKNOWN) {
167 struct gfs2_holder gh;
168 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
169 if (unlikely(error))
170 goto fail_glock;
171 /* Inode is now uptodate */
172 gfs2_glock_dq_uninit(&gh);
173 }
174
175 gfs2_set_iop(inode);
129 } 176 }
130 177
178gfs2_nfsbypass:
131 return inode; 179 return inode;
180fail_glock:
181 gfs2_glock_dq(&ip->i_iopen_gh);
132fail_iopen: 182fail_iopen:
133 gfs2_glock_put(io_gl); 183 gfs2_glock_put(io_gl);
134fail_put: 184fail_put:
@@ -144,14 +194,12 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
144 struct gfs2_dinode_host *di = &ip->i_di; 194 struct gfs2_dinode_host *di = &ip->i_di;
145 const struct gfs2_dinode *str = buf; 195 const struct gfs2_dinode *str = buf;
146 196
147 if (ip->i_num.no_addr != be64_to_cpu(str->di_num.no_addr)) { 197 if (ip->i_no_addr != be64_to_cpu(str->di_num.no_addr)) {
148 if (gfs2_consist_inode(ip)) 198 if (gfs2_consist_inode(ip))
149 gfs2_dinode_print(ip); 199 gfs2_dinode_print(ip);
150 return -EIO; 200 return -EIO;
151 } 201 }
152 if (ip->i_num.no_formal_ino != be64_to_cpu(str->di_num.no_formal_ino)) 202 ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino);
153 return -ESTALE;
154
155 ip->i_inode.i_mode = be32_to_cpu(str->di_mode); 203 ip->i_inode.i_mode = be32_to_cpu(str->di_mode);
156 ip->i_inode.i_rdev = 0; 204 ip->i_inode.i_rdev = 0;
157 switch (ip->i_inode.i_mode & S_IFMT) { 205 switch (ip->i_inode.i_mode & S_IFMT) {
@@ -175,11 +223,11 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
175 di->di_blocks = be64_to_cpu(str->di_blocks); 223 di->di_blocks = be64_to_cpu(str->di_blocks);
176 gfs2_set_inode_blocks(&ip->i_inode); 224 gfs2_set_inode_blocks(&ip->i_inode);
177 ip->i_inode.i_atime.tv_sec = be64_to_cpu(str->di_atime); 225 ip->i_inode.i_atime.tv_sec = be64_to_cpu(str->di_atime);
178 ip->i_inode.i_atime.tv_nsec = 0; 226 ip->i_inode.i_atime.tv_nsec = be32_to_cpu(str->di_atime_nsec);
179 ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime); 227 ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime);
180 ip->i_inode.i_mtime.tv_nsec = 0; 228 ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec);
181 ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime); 229 ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime);
182 ip->i_inode.i_ctime.tv_nsec = 0; 230 ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec);
183 231
184 di->di_goal_meta = be64_to_cpu(str->di_goal_meta); 232 di->di_goal_meta = be64_to_cpu(str->di_goal_meta);
185 di->di_goal_data = be64_to_cpu(str->di_goal_data); 233 di->di_goal_data = be64_to_cpu(str->di_goal_data);
@@ -247,7 +295,7 @@ int gfs2_dinode_dealloc(struct gfs2_inode *ip)
247 if (error) 295 if (error)
248 goto out_qs; 296 goto out_qs;
249 297
250 rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr); 298 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
251 if (!rgd) { 299 if (!rgd) {
252 gfs2_consist_inode(ip); 300 gfs2_consist_inode(ip);
253 error = -EIO; 301 error = -EIO;
@@ -314,7 +362,7 @@ int gfs2_change_nlink(struct gfs2_inode *ip, int diff)
314 else 362 else
315 drop_nlink(&ip->i_inode); 363 drop_nlink(&ip->i_inode);
316 364
317 ip->i_inode.i_ctime = CURRENT_TIME_SEC; 365 ip->i_inode.i_ctime = CURRENT_TIME;
318 366
319 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 367 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
320 gfs2_dinode_out(ip, dibh->b_data); 368 gfs2_dinode_out(ip, dibh->b_data);
@@ -366,9 +414,7 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
366 struct super_block *sb = dir->i_sb; 414 struct super_block *sb = dir->i_sb;
367 struct gfs2_inode *dip = GFS2_I(dir); 415 struct gfs2_inode *dip = GFS2_I(dir);
368 struct gfs2_holder d_gh; 416 struct gfs2_holder d_gh;
369 struct gfs2_inum_host inum; 417 int error = 0;
370 unsigned int type;
371 int error;
372 struct inode *inode = NULL; 418 struct inode *inode = NULL;
373 int unlock = 0; 419 int unlock = 0;
374 420
@@ -395,12 +441,9 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
395 goto out; 441 goto out;
396 } 442 }
397 443
398 error = gfs2_dir_search(dir, name, &inum, &type); 444 inode = gfs2_dir_search(dir, name);
399 if (error) 445 if (IS_ERR(inode))
400 goto out; 446 error = PTR_ERR(inode);
401
402 inode = gfs2_inode_lookup(sb, &inum, type);
403
404out: 447out:
405 if (unlock) 448 if (unlock)
406 gfs2_glock_dq_uninit(&d_gh); 449 gfs2_glock_dq_uninit(&d_gh);
@@ -409,6 +452,22 @@ out:
409 return inode ? inode : ERR_PTR(error); 452 return inode ? inode : ERR_PTR(error);
410} 453}
411 454
455static void gfs2_inum_range_in(struct gfs2_inum_range_host *ir, const void *buf)
456{
457 const struct gfs2_inum_range *str = buf;
458
459 ir->ir_start = be64_to_cpu(str->ir_start);
460 ir->ir_length = be64_to_cpu(str->ir_length);
461}
462
463static void gfs2_inum_range_out(const struct gfs2_inum_range_host *ir, void *buf)
464{
465 struct gfs2_inum_range *str = buf;
466
467 str->ir_start = cpu_to_be64(ir->ir_start);
468 str->ir_length = cpu_to_be64(ir->ir_length);
469}
470
412static int pick_formal_ino_1(struct gfs2_sbd *sdp, u64 *formal_ino) 471static int pick_formal_ino_1(struct gfs2_sbd *sdp, u64 *formal_ino)
413{ 472{
414 struct gfs2_inode *ip = GFS2_I(sdp->sd_ir_inode); 473 struct gfs2_inode *ip = GFS2_I(sdp->sd_ir_inode);
@@ -548,7 +607,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name,
548 if (!dip->i_inode.i_nlink) 607 if (!dip->i_inode.i_nlink)
549 return -EPERM; 608 return -EPERM;
550 609
551 error = gfs2_dir_search(&dip->i_inode, name, NULL, NULL); 610 error = gfs2_dir_check(&dip->i_inode, name, NULL);
552 switch (error) { 611 switch (error) {
553 case -ENOENT: 612 case -ENOENT:
554 error = 0; 613 error = 0;
@@ -588,8 +647,7 @@ static void munge_mode_uid_gid(struct gfs2_inode *dip, unsigned int *mode,
588 *gid = current->fsgid; 647 *gid = current->fsgid;
589} 648}
590 649
591static int alloc_dinode(struct gfs2_inode *dip, struct gfs2_inum_host *inum, 650static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation)
592 u64 *generation)
593{ 651{
594 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 652 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
595 int error; 653 int error;
@@ -605,7 +663,7 @@ static int alloc_dinode(struct gfs2_inode *dip, struct gfs2_inum_host *inum,
605 if (error) 663 if (error)
606 goto out_ipreserv; 664 goto out_ipreserv;
607 665
608 inum->no_addr = gfs2_alloc_di(dip, generation); 666 *no_addr = gfs2_alloc_di(dip, generation);
609 667
610 gfs2_trans_end(sdp); 668 gfs2_trans_end(sdp);
611 669
@@ -635,6 +693,7 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
635 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 693 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
636 struct gfs2_dinode *di; 694 struct gfs2_dinode *di;
637 struct buffer_head *dibh; 695 struct buffer_head *dibh;
696 struct timespec tv = CURRENT_TIME;
638 697
639 dibh = gfs2_meta_new(gl, inum->no_addr); 698 dibh = gfs2_meta_new(gl, inum->no_addr);
640 gfs2_trans_add_bh(gl, dibh, 1); 699 gfs2_trans_add_bh(gl, dibh, 1);
@@ -650,7 +709,7 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
650 di->di_nlink = 0; 709 di->di_nlink = 0;
651 di->di_size = 0; 710 di->di_size = 0;
652 di->di_blocks = cpu_to_be64(1); 711 di->di_blocks = cpu_to_be64(1);
653 di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(get_seconds()); 712 di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(tv.tv_sec);
654 di->di_major = cpu_to_be32(MAJOR(dev)); 713 di->di_major = cpu_to_be32(MAJOR(dev));
655 di->di_minor = cpu_to_be32(MINOR(dev)); 714 di->di_minor = cpu_to_be32(MINOR(dev));
656 di->di_goal_meta = di->di_goal_data = cpu_to_be64(inum->no_addr); 715 di->di_goal_meta = di->di_goal_data = cpu_to_be64(inum->no_addr);
@@ -680,6 +739,9 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
680 di->di_entries = 0; 739 di->di_entries = 0;
681 memset(&di->__pad4, 0, sizeof(di->__pad4)); 740 memset(&di->__pad4, 0, sizeof(di->__pad4));
682 di->di_eattr = 0; 741 di->di_eattr = 0;
742 di->di_atime_nsec = cpu_to_be32(tv.tv_nsec);
743 di->di_mtime_nsec = cpu_to_be32(tv.tv_nsec);
744 di->di_ctime_nsec = cpu_to_be32(tv.tv_nsec);
683 memset(&di->di_reserved, 0, sizeof(di->di_reserved)); 745 memset(&di->di_reserved, 0, sizeof(di->di_reserved));
684 746
685 brelse(dibh); 747 brelse(dibh);
@@ -749,7 +811,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
749 goto fail_quota_locks; 811 goto fail_quota_locks;
750 812
751 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + 813 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
752 al->al_rgd->rd_ri.ri_length + 814 al->al_rgd->rd_length +
753 2 * RES_DINODE + 815 2 * RES_DINODE +
754 RES_STATFS + RES_QUOTA, 0); 816 RES_STATFS + RES_QUOTA, 0);
755 if (error) 817 if (error)
@@ -760,7 +822,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
760 goto fail_quota_locks; 822 goto fail_quota_locks;
761 } 823 }
762 824
763 error = gfs2_dir_add(&dip->i_inode, name, &ip->i_num, IF2DT(ip->i_inode.i_mode)); 825 error = gfs2_dir_add(&dip->i_inode, name, ip, IF2DT(ip->i_inode.i_mode));
764 if (error) 826 if (error)
765 goto fail_end_trans; 827 goto fail_end_trans;
766 828
@@ -840,11 +902,11 @@ static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip)
840struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, 902struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
841 unsigned int mode, dev_t dev) 903 unsigned int mode, dev_t dev)
842{ 904{
843 struct inode *inode; 905 struct inode *inode = NULL;
844 struct gfs2_inode *dip = ghs->gh_gl->gl_object; 906 struct gfs2_inode *dip = ghs->gh_gl->gl_object;
845 struct inode *dir = &dip->i_inode; 907 struct inode *dir = &dip->i_inode;
846 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 908 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
847 struct gfs2_inum_host inum; 909 struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 };
848 int error; 910 int error;
849 u64 generation; 911 u64 generation;
850 912
@@ -864,7 +926,7 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
864 if (error) 926 if (error)
865 goto fail_gunlock; 927 goto fail_gunlock;
866 928
867 error = alloc_dinode(dip, &inum, &generation); 929 error = alloc_dinode(dip, &inum.no_addr, &generation);
868 if (error) 930 if (error)
869 goto fail_gunlock; 931 goto fail_gunlock;
870 932
@@ -877,34 +939,36 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
877 if (error) 939 if (error)
878 goto fail_gunlock2; 940 goto fail_gunlock2;
879 941
880 inode = gfs2_inode_lookup(dir->i_sb, &inum, IF2DT(mode)); 942 inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode),
943 inum.no_addr,
944 inum.no_formal_ino);
881 if (IS_ERR(inode)) 945 if (IS_ERR(inode))
882 goto fail_gunlock2; 946 goto fail_gunlock2;
883 947
884 error = gfs2_inode_refresh(GFS2_I(inode)); 948 error = gfs2_inode_refresh(GFS2_I(inode));
885 if (error) 949 if (error)
886 goto fail_iput; 950 goto fail_gunlock2;
887 951
888 error = gfs2_acl_create(dip, GFS2_I(inode)); 952 error = gfs2_acl_create(dip, GFS2_I(inode));
889 if (error) 953 if (error)
890 goto fail_iput; 954 goto fail_gunlock2;
891 955
892 error = gfs2_security_init(dip, GFS2_I(inode)); 956 error = gfs2_security_init(dip, GFS2_I(inode));
893 if (error) 957 if (error)
894 goto fail_iput; 958 goto fail_gunlock2;
895 959
896 error = link_dinode(dip, name, GFS2_I(inode)); 960 error = link_dinode(dip, name, GFS2_I(inode));
897 if (error) 961 if (error)
898 goto fail_iput; 962 goto fail_gunlock2;
899 963
900 if (!inode) 964 if (!inode)
901 return ERR_PTR(-ENOMEM); 965 return ERR_PTR(-ENOMEM);
902 return inode; 966 return inode;
903 967
904fail_iput:
905 iput(inode);
906fail_gunlock2: 968fail_gunlock2:
907 gfs2_glock_dq_uninit(ghs + 1); 969 gfs2_glock_dq_uninit(ghs + 1);
970 if (inode)
971 iput(inode);
908fail_gunlock: 972fail_gunlock:
909 gfs2_glock_dq(ghs); 973 gfs2_glock_dq(ghs);
910fail: 974fail:
@@ -976,10 +1040,8 @@ int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
976 */ 1040 */
977 1041
978int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, 1042int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
979 struct gfs2_inode *ip) 1043 const struct gfs2_inode *ip)
980{ 1044{
981 struct gfs2_inum_host inum;
982 unsigned int type;
983 int error; 1045 int error;
984 1046
985 if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode)) 1047 if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode))
@@ -997,18 +1059,10 @@ int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
997 if (error) 1059 if (error)
998 return error; 1060 return error;
999 1061
1000 error = gfs2_dir_search(&dip->i_inode, name, &inum, &type); 1062 error = gfs2_dir_check(&dip->i_inode, name, ip);
1001 if (error) 1063 if (error)
1002 return error; 1064 return error;
1003 1065
1004 if (!gfs2_inum_equal(&inum, &ip->i_num))
1005 return -ENOENT;
1006
1007 if (IF2DT(ip->i_inode.i_mode) != type) {
1008 gfs2_consist_inode(dip);
1009 return -EIO;
1010 }
1011
1012 return 0; 1066 return 0;
1013} 1067}
1014 1068
@@ -1132,10 +1186,11 @@ int gfs2_glock_nq_atime(struct gfs2_holder *gh)
1132 struct gfs2_glock *gl = gh->gh_gl; 1186 struct gfs2_glock *gl = gh->gh_gl;
1133 struct gfs2_sbd *sdp = gl->gl_sbd; 1187 struct gfs2_sbd *sdp = gl->gl_sbd;
1134 struct gfs2_inode *ip = gl->gl_object; 1188 struct gfs2_inode *ip = gl->gl_object;
1135 s64 curtime, quantum = gfs2_tune_get(sdp, gt_atime_quantum); 1189 s64 quantum = gfs2_tune_get(sdp, gt_atime_quantum);
1136 unsigned int state; 1190 unsigned int state;
1137 int flags; 1191 int flags;
1138 int error; 1192 int error;
1193 struct timespec tv = CURRENT_TIME;
1139 1194
1140 if (gfs2_assert_warn(sdp, gh->gh_flags & GL_ATIME) || 1195 if (gfs2_assert_warn(sdp, gh->gh_flags & GL_ATIME) ||
1141 gfs2_assert_warn(sdp, !(gh->gh_flags & GL_ASYNC)) || 1196 gfs2_assert_warn(sdp, !(gh->gh_flags & GL_ASYNC)) ||
@@ -1153,8 +1208,7 @@ int gfs2_glock_nq_atime(struct gfs2_holder *gh)
1153 (sdp->sd_vfs->s_flags & MS_RDONLY)) 1208 (sdp->sd_vfs->s_flags & MS_RDONLY))
1154 return 0; 1209 return 0;
1155 1210
1156 curtime = get_seconds(); 1211 if (tv.tv_sec - ip->i_inode.i_atime.tv_sec >= quantum) {
1157 if (curtime - ip->i_inode.i_atime.tv_sec >= quantum) {
1158 gfs2_glock_dq(gh); 1212 gfs2_glock_dq(gh);
1159 gfs2_holder_reinit(LM_ST_EXCLUSIVE, gh->gh_flags & ~LM_FLAG_ANY, 1213 gfs2_holder_reinit(LM_ST_EXCLUSIVE, gh->gh_flags & ~LM_FLAG_ANY,
1160 gh); 1214 gh);
@@ -1165,8 +1219,8 @@ int gfs2_glock_nq_atime(struct gfs2_holder *gh)
1165 /* Verify that atime hasn't been updated while we were 1219 /* Verify that atime hasn't been updated while we were
1166 trying to get exclusive lock. */ 1220 trying to get exclusive lock. */
1167 1221
1168 curtime = get_seconds(); 1222 tv = CURRENT_TIME;
1169 if (curtime - ip->i_inode.i_atime.tv_sec >= quantum) { 1223 if (tv.tv_sec - ip->i_inode.i_atime.tv_sec >= quantum) {
1170 struct buffer_head *dibh; 1224 struct buffer_head *dibh;
1171 struct gfs2_dinode *di; 1225 struct gfs2_dinode *di;
1172 1226
@@ -1180,11 +1234,12 @@ int gfs2_glock_nq_atime(struct gfs2_holder *gh)
1180 if (error) 1234 if (error)
1181 goto fail_end_trans; 1235 goto fail_end_trans;
1182 1236
1183 ip->i_inode.i_atime.tv_sec = curtime; 1237 ip->i_inode.i_atime = tv;
1184 1238
1185 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1239 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1186 di = (struct gfs2_dinode *)dibh->b_data; 1240 di = (struct gfs2_dinode *)dibh->b_data;
1187 di->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec); 1241 di->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
1242 di->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec);
1188 brelse(dibh); 1243 brelse(dibh);
1189 1244
1190 gfs2_trans_end(sdp); 1245 gfs2_trans_end(sdp);
@@ -1252,3 +1307,66 @@ int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
1252 return error; 1307 return error;
1253} 1308}
1254 1309
1310void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
1311{
1312 const struct gfs2_dinode_host *di = &ip->i_di;
1313 struct gfs2_dinode *str = buf;
1314
1315 str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
1316 str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI);
1317 str->di_header.__pad0 = 0;
1318 str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI);
1319 str->di_header.__pad1 = 0;
1320 str->di_num.no_addr = cpu_to_be64(ip->i_no_addr);
1321 str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino);
1322 str->di_mode = cpu_to_be32(ip->i_inode.i_mode);
1323 str->di_uid = cpu_to_be32(ip->i_inode.i_uid);
1324 str->di_gid = cpu_to_be32(ip->i_inode.i_gid);
1325 str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink);
1326 str->di_size = cpu_to_be64(di->di_size);
1327 str->di_blocks = cpu_to_be64(di->di_blocks);
1328 str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
1329 str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec);
1330 str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec);
1331
1332 str->di_goal_meta = cpu_to_be64(di->di_goal_meta);
1333 str->di_goal_data = cpu_to_be64(di->di_goal_data);
1334 str->di_generation = cpu_to_be64(di->di_generation);
1335
1336 str->di_flags = cpu_to_be32(di->di_flags);
1337 str->di_height = cpu_to_be16(di->di_height);
1338 str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) &&
1339 !(ip->i_di.di_flags & GFS2_DIF_EXHASH) ?
1340 GFS2_FORMAT_DE : 0);
1341 str->di_depth = cpu_to_be16(di->di_depth);
1342 str->di_entries = cpu_to_be32(di->di_entries);
1343
1344 str->di_eattr = cpu_to_be64(di->di_eattr);
1345 str->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec);
1346 str->di_mtime_nsec = cpu_to_be32(ip->i_inode.i_mtime.tv_nsec);
1347 str->di_ctime_nsec = cpu_to_be32(ip->i_inode.i_ctime.tv_nsec);
1348}
1349
1350void gfs2_dinode_print(const struct gfs2_inode *ip)
1351{
1352 const struct gfs2_dinode_host *di = &ip->i_di;
1353
1354 printk(KERN_INFO " no_formal_ino = %llu\n",
1355 (unsigned long long)ip->i_no_formal_ino);
1356 printk(KERN_INFO " no_addr = %llu\n",
1357 (unsigned long long)ip->i_no_addr);
1358 printk(KERN_INFO " di_size = %llu\n", (unsigned long long)di->di_size);
1359 printk(KERN_INFO " di_blocks = %llu\n",
1360 (unsigned long long)di->di_blocks);
1361 printk(KERN_INFO " di_goal_meta = %llu\n",
1362 (unsigned long long)di->di_goal_meta);
1363 printk(KERN_INFO " di_goal_data = %llu\n",
1364 (unsigned long long)di->di_goal_data);
1365 printk(KERN_INFO " di_flags = 0x%.8X\n", di->di_flags);
1366 printk(KERN_INFO " di_height = %u\n", di->di_height);
1367 printk(KERN_INFO " di_depth = %u\n", di->di_depth);
1368 printk(KERN_INFO " di_entries = %u\n", di->di_entries);
1369 printk(KERN_INFO " di_eattr = %llu\n",
1370 (unsigned long long)di->di_eattr);
1371}
1372
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index b57f448b15bc..4517ac82c01c 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -10,17 +10,17 @@
10#ifndef __INODE_DOT_H__ 10#ifndef __INODE_DOT_H__
11#define __INODE_DOT_H__ 11#define __INODE_DOT_H__
12 12
13static inline int gfs2_is_stuffed(struct gfs2_inode *ip) 13static inline int gfs2_is_stuffed(const struct gfs2_inode *ip)
14{ 14{
15 return !ip->i_di.di_height; 15 return !ip->i_di.di_height;
16} 16}
17 17
18static inline int gfs2_is_jdata(struct gfs2_inode *ip) 18static inline int gfs2_is_jdata(const struct gfs2_inode *ip)
19{ 19{
20 return ip->i_di.di_flags & GFS2_DIF_JDATA; 20 return ip->i_di.di_flags & GFS2_DIF_JDATA;
21} 21}
22 22
23static inline int gfs2_is_dir(struct gfs2_inode *ip) 23static inline int gfs2_is_dir(const struct gfs2_inode *ip)
24{ 24{
25 return S_ISDIR(ip->i_inode.i_mode); 25 return S_ISDIR(ip->i_inode.i_mode);
26} 26}
@@ -32,9 +32,25 @@ static inline void gfs2_set_inode_blocks(struct inode *inode)
32 (GFS2_SB(inode)->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT); 32 (GFS2_SB(inode)->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT);
33} 33}
34 34
35static inline int gfs2_check_inum(const struct gfs2_inode *ip, u64 no_addr,
36 u64 no_formal_ino)
37{
38 return ip->i_no_addr == no_addr && ip->i_no_formal_ino == no_formal_ino;
39}
40
41static inline void gfs2_inum_out(const struct gfs2_inode *ip,
42 struct gfs2_dirent *dent)
43{
44 dent->de_inum.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino);
45 dent->de_inum.no_addr = cpu_to_be64(ip->i_no_addr);
46}
47
48
35void gfs2_inode_attr_in(struct gfs2_inode *ip); 49void gfs2_inode_attr_in(struct gfs2_inode *ip);
36struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum_host *inum, unsigned type); 50void gfs2_set_iop(struct inode *inode);
37struct inode *gfs2_ilookup(struct super_block *sb, struct gfs2_inum_host *inum); 51struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type,
52 u64 no_addr, u64 no_formal_ino);
53struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr);
38 54
39int gfs2_inode_refresh(struct gfs2_inode *ip); 55int gfs2_inode_refresh(struct gfs2_inode *ip);
40 56
@@ -47,12 +63,14 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
47int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, 63int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
48 struct gfs2_inode *ip); 64 struct gfs2_inode *ip);
49int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, 65int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
50 struct gfs2_inode *ip); 66 const struct gfs2_inode *ip);
51int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to); 67int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to);
52int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len); 68int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len);
53int gfs2_glock_nq_atime(struct gfs2_holder *gh); 69int gfs2_glock_nq_atime(struct gfs2_holder *gh);
54int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr); 70int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr);
55struct inode *gfs2_lookup_simple(struct inode *dip, const char *name); 71struct inode *gfs2_lookup_simple(struct inode *dip, const char *name);
72void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf);
73void gfs2_dinode_print(const struct gfs2_inode *ip);
56 74
57#endif /* __INODE_DOT_H__ */ 75#endif /* __INODE_DOT_H__ */
58 76
diff --git a/fs/gfs2/locking/dlm/lock.c b/fs/gfs2/locking/dlm/lock.c
index c305255bfe8a..542a797ac89a 100644
--- a/fs/gfs2/locking/dlm/lock.c
+++ b/fs/gfs2/locking/dlm/lock.c
@@ -174,7 +174,6 @@ static int gdlm_create_lp(struct gdlm_ls *ls, struct lm_lockname *name,
174 lp->cur = DLM_LOCK_IV; 174 lp->cur = DLM_LOCK_IV;
175 lp->lvb = NULL; 175 lp->lvb = NULL;
176 lp->hold_null = NULL; 176 lp->hold_null = NULL;
177 init_completion(&lp->ast_wait);
178 INIT_LIST_HEAD(&lp->clist); 177 INIT_LIST_HEAD(&lp->clist);
179 INIT_LIST_HEAD(&lp->blist); 178 INIT_LIST_HEAD(&lp->blist);
180 INIT_LIST_HEAD(&lp->delay_list); 179 INIT_LIST_HEAD(&lp->delay_list);
@@ -399,6 +398,12 @@ static void gdlm_del_lvb(struct gdlm_lock *lp)
399 lp->lksb.sb_lvbptr = NULL; 398 lp->lksb.sb_lvbptr = NULL;
400} 399}
401 400
401static int gdlm_ast_wait(void *word)
402{
403 schedule();
404 return 0;
405}
406
402/* This can do a synchronous dlm request (requiring a lock_dlm thread to get 407/* This can do a synchronous dlm request (requiring a lock_dlm thread to get
403 the completion) because gfs won't call hold_lvb() during a callback (from 408 the completion) because gfs won't call hold_lvb() during a callback (from
404 the context of a lock_dlm thread). */ 409 the context of a lock_dlm thread). */
@@ -424,10 +429,10 @@ static int hold_null_lock(struct gdlm_lock *lp)
424 lpn->lkf = DLM_LKF_VALBLK | DLM_LKF_EXPEDITE; 429 lpn->lkf = DLM_LKF_VALBLK | DLM_LKF_EXPEDITE;
425 set_bit(LFL_NOBAST, &lpn->flags); 430 set_bit(LFL_NOBAST, &lpn->flags);
426 set_bit(LFL_INLOCK, &lpn->flags); 431 set_bit(LFL_INLOCK, &lpn->flags);
432 set_bit(LFL_AST_WAIT, &lpn->flags);
427 433
428 init_completion(&lpn->ast_wait);
429 gdlm_do_lock(lpn); 434 gdlm_do_lock(lpn);
430 wait_for_completion(&lpn->ast_wait); 435 wait_on_bit(&lpn->flags, LFL_AST_WAIT, gdlm_ast_wait, TASK_UNINTERRUPTIBLE);
431 error = lpn->lksb.sb_status; 436 error = lpn->lksb.sb_status;
432 if (error) { 437 if (error) {
433 printk(KERN_INFO "lock_dlm: hold_null_lock dlm error %d\n", 438 printk(KERN_INFO "lock_dlm: hold_null_lock dlm error %d\n",
diff --git a/fs/gfs2/locking/dlm/lock_dlm.h b/fs/gfs2/locking/dlm/lock_dlm.h
index d074c6e6f9bf..24d70f73b651 100644
--- a/fs/gfs2/locking/dlm/lock_dlm.h
+++ b/fs/gfs2/locking/dlm/lock_dlm.h
@@ -101,6 +101,7 @@ enum {
101 LFL_NOBAST = 10, 101 LFL_NOBAST = 10,
102 LFL_HEADQUE = 11, 102 LFL_HEADQUE = 11,
103 LFL_UNLOCK_DELETE = 12, 103 LFL_UNLOCK_DELETE = 12,
104 LFL_AST_WAIT = 13,
104}; 105};
105 106
106struct gdlm_lock { 107struct gdlm_lock {
@@ -117,7 +118,6 @@ struct gdlm_lock {
117 unsigned long flags; /* lock_dlm flags LFL_ */ 118 unsigned long flags; /* lock_dlm flags LFL_ */
118 119
119 int bast_mode; /* protected by async_lock */ 120 int bast_mode; /* protected by async_lock */
120 struct completion ast_wait;
121 121
122 struct list_head clist; /* complete */ 122 struct list_head clist; /* complete */
123 struct list_head blist; /* blocking */ 123 struct list_head blist; /* blocking */
diff --git a/fs/gfs2/locking/dlm/mount.c b/fs/gfs2/locking/dlm/mount.c
index 1d8faa3da8af..41c5b04caaba 100644
--- a/fs/gfs2/locking/dlm/mount.c
+++ b/fs/gfs2/locking/dlm/mount.c
@@ -147,7 +147,7 @@ static int gdlm_mount(char *table_name, char *host_data,
147 147
148 error = dlm_new_lockspace(ls->fsname, strlen(ls->fsname), 148 error = dlm_new_lockspace(ls->fsname, strlen(ls->fsname),
149 &ls->dlm_lockspace, 149 &ls->dlm_lockspace,
150 nodir ? DLM_LSFL_NODIR : 0, 150 DLM_LSFL_FS | (nodir ? DLM_LSFL_NODIR : 0),
151 GDLM_LVB_SIZE); 151 GDLM_LVB_SIZE);
152 if (error) { 152 if (error) {
153 log_error("dlm_new_lockspace error %d", error); 153 log_error("dlm_new_lockspace error %d", error);
diff --git a/fs/gfs2/locking/dlm/plock.c b/fs/gfs2/locking/dlm/plock.c
index f82495e18c2d..fba1f1d87e4f 100644
--- a/fs/gfs2/locking/dlm/plock.c
+++ b/fs/gfs2/locking/dlm/plock.c
@@ -242,7 +242,7 @@ int gdlm_plock_get(void *lockspace, struct lm_lockname *name,
242 op->info.number = name->ln_number; 242 op->info.number = name->ln_number;
243 op->info.start = fl->fl_start; 243 op->info.start = fl->fl_start;
244 op->info.end = fl->fl_end; 244 op->info.end = fl->fl_end;
245 245 op->info.owner = (__u64)(long) fl->fl_owner;
246 246
247 send_op(op); 247 send_op(op);
248 wait_event(recv_wq, (op->done != 0)); 248 wait_event(recv_wq, (op->done != 0));
@@ -254,16 +254,20 @@ int gdlm_plock_get(void *lockspace, struct lm_lockname *name,
254 } 254 }
255 spin_unlock(&ops_lock); 255 spin_unlock(&ops_lock);
256 256
257 /* info.rv from userspace is 1 for conflict, 0 for no-conflict,
258 -ENOENT if there are no locks on the file */
259
257 rv = op->info.rv; 260 rv = op->info.rv;
258 261
259 fl->fl_type = F_UNLCK; 262 fl->fl_type = F_UNLCK;
260 if (rv == -ENOENT) 263 if (rv == -ENOENT)
261 rv = 0; 264 rv = 0;
262 else if (rv == 0 && op->info.pid != fl->fl_pid) { 265 else if (rv > 0) {
263 fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK; 266 fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK;
264 fl->fl_pid = op->info.pid; 267 fl->fl_pid = op->info.pid;
265 fl->fl_start = op->info.start; 268 fl->fl_start = op->info.start;
266 fl->fl_end = op->info.end; 269 fl->fl_end = op->info.end;
270 rv = 0;
267 } 271 }
268 272
269 kfree(op); 273 kfree(op);
diff --git a/fs/gfs2/locking/dlm/thread.c b/fs/gfs2/locking/dlm/thread.c
index 9cf1f168eaf8..1aca51e45092 100644
--- a/fs/gfs2/locking/dlm/thread.c
+++ b/fs/gfs2/locking/dlm/thread.c
@@ -44,6 +44,13 @@ static void process_blocking(struct gdlm_lock *lp, int bast_mode)
44 ls->fscb(ls->sdp, cb, &lp->lockname); 44 ls->fscb(ls->sdp, cb, &lp->lockname);
45} 45}
46 46
47static void wake_up_ast(struct gdlm_lock *lp)
48{
49 clear_bit(LFL_AST_WAIT, &lp->flags);
50 smp_mb__after_clear_bit();
51 wake_up_bit(&lp->flags, LFL_AST_WAIT);
52}
53
47static void process_complete(struct gdlm_lock *lp) 54static void process_complete(struct gdlm_lock *lp)
48{ 55{
49 struct gdlm_ls *ls = lp->ls; 56 struct gdlm_ls *ls = lp->ls;
@@ -136,7 +143,7 @@ static void process_complete(struct gdlm_lock *lp)
136 */ 143 */
137 144
138 if (test_and_clear_bit(LFL_SYNC_LVB, &lp->flags)) { 145 if (test_and_clear_bit(LFL_SYNC_LVB, &lp->flags)) {
139 complete(&lp->ast_wait); 146 wake_up_ast(lp);
140 return; 147 return;
141 } 148 }
142 149
@@ -214,7 +221,7 @@ out:
214 if (test_bit(LFL_INLOCK, &lp->flags)) { 221 if (test_bit(LFL_INLOCK, &lp->flags)) {
215 clear_bit(LFL_NOBLOCK, &lp->flags); 222 clear_bit(LFL_NOBLOCK, &lp->flags);
216 lp->cur = lp->req; 223 lp->cur = lp->req;
217 complete(&lp->ast_wait); 224 wake_up_ast(lp);
218 return; 225 return;
219 } 226 }
220 227
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 291415ddfe51..f49a12e24086 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -83,6 +83,11 @@ static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
83 83
84 gfs2_assert(sdp, bd->bd_ail == ai); 84 gfs2_assert(sdp, bd->bd_ail == ai);
85 85
86 if (!bh){
87 list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
88 continue;
89 }
90
86 if (!buffer_busy(bh)) { 91 if (!buffer_busy(bh)) {
87 if (!buffer_uptodate(bh)) { 92 if (!buffer_uptodate(bh)) {
88 gfs2_log_unlock(sdp); 93 gfs2_log_unlock(sdp);
@@ -125,6 +130,11 @@ static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int fl
125 bd_ail_st_list) { 130 bd_ail_st_list) {
126 bh = bd->bd_bh; 131 bh = bd->bd_bh;
127 132
133 if (!bh){
134 list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
135 continue;
136 }
137
128 gfs2_assert(sdp, bd->bd_ail == ai); 138 gfs2_assert(sdp, bd->bd_ail == ai);
129 139
130 if (buffer_busy(bh)) { 140 if (buffer_busy(bh)) {
@@ -262,8 +272,8 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
262 * @sdp: The GFS2 superblock 272 * @sdp: The GFS2 superblock
263 * @blks: The number of blocks to reserve 273 * @blks: The number of blocks to reserve
264 * 274 *
265 * Note that we never give out the last 6 blocks of the journal. Thats 275 * Note that we never give out the last few blocks of the journal. Thats
266 * due to the fact that there is are a small number of header blocks 276 * due to the fact that there is a small number of header blocks
267 * associated with each log flush. The exact number can't be known until 277 * associated with each log flush. The exact number can't be known until
268 * flush time, so we ensure that we have just enough free blocks at all 278 * flush time, so we ensure that we have just enough free blocks at all
269 * times to avoid running out during a log flush. 279 * times to avoid running out during a log flush.
@@ -274,6 +284,7 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
274int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks) 284int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
275{ 285{
276 unsigned int try = 0; 286 unsigned int try = 0;
287 unsigned reserved_blks = 6 * (4096 / sdp->sd_vfs->s_blocksize);
277 288
278 if (gfs2_assert_warn(sdp, blks) || 289 if (gfs2_assert_warn(sdp, blks) ||
279 gfs2_assert_warn(sdp, blks <= sdp->sd_jdesc->jd_blocks)) 290 gfs2_assert_warn(sdp, blks <= sdp->sd_jdesc->jd_blocks))
@@ -281,7 +292,7 @@ int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
281 292
282 mutex_lock(&sdp->sd_log_reserve_mutex); 293 mutex_lock(&sdp->sd_log_reserve_mutex);
283 gfs2_log_lock(sdp); 294 gfs2_log_lock(sdp);
284 while(sdp->sd_log_blks_free <= (blks + 6)) { 295 while(sdp->sd_log_blks_free <= (blks + reserved_blks)) {
285 gfs2_log_unlock(sdp); 296 gfs2_log_unlock(sdp);
286 gfs2_ail1_empty(sdp, 0); 297 gfs2_ail1_empty(sdp, 0);
287 gfs2_log_flush(sdp, NULL); 298 gfs2_log_flush(sdp, NULL);
@@ -357,6 +368,58 @@ static inline unsigned int log_distance(struct gfs2_sbd *sdp, unsigned int newer
357 return dist; 368 return dist;
358} 369}
359 370
371/**
372 * calc_reserved - Calculate the number of blocks to reserve when
373 * refunding a transaction's unused buffers.
374 * @sdp: The GFS2 superblock
375 *
376 * This is complex. We need to reserve room for all our currently used
377 * metadata buffers (e.g. normal file I/O rewriting file time stamps) and
378 * all our journaled data buffers for journaled files (e.g. files in the
379 * meta_fs like rindex, or files for which chattr +j was done.)
380 * If we don't reserve enough space, gfs2_log_refund and gfs2_log_flush
381 * will count it as free space (sd_log_blks_free) and corruption will follow.
382 *
383 * We can have metadata bufs and jdata bufs in the same journal. So each
384 * type gets its own log header, for which we need to reserve a block.
385 * In fact, each type has the potential for needing more than one header
386 * in cases where we have more buffers than will fit on a journal page.
387 * Metadata journal entries take up half the space of journaled buffer entries.
388 * Thus, metadata entries have buf_limit (502) and journaled buffers have
389 * databuf_limit (251) before they cause a wrap around.
390 *
391 * Also, we need to reserve blocks for revoke journal entries and one for an
392 * overall header for the lot.
393 *
394 * Returns: the number of blocks reserved
395 */
396static unsigned int calc_reserved(struct gfs2_sbd *sdp)
397{
398 unsigned int reserved = 0;
399 unsigned int mbuf_limit, metabufhdrs_needed;
400 unsigned int dbuf_limit, databufhdrs_needed;
401 unsigned int revokes = 0;
402
403 mbuf_limit = buf_limit(sdp);
404 metabufhdrs_needed = (sdp->sd_log_commited_buf +
405 (mbuf_limit - 1)) / mbuf_limit;
406 dbuf_limit = databuf_limit(sdp);
407 databufhdrs_needed = (sdp->sd_log_commited_databuf +
408 (dbuf_limit - 1)) / dbuf_limit;
409
410 if (sdp->sd_log_commited_revoke)
411 revokes = gfs2_struct2blk(sdp, sdp->sd_log_commited_revoke,
412 sizeof(u64));
413
414 reserved = sdp->sd_log_commited_buf + metabufhdrs_needed +
415 sdp->sd_log_commited_databuf + databufhdrs_needed +
416 revokes;
417 /* One for the overall header */
418 if (reserved)
419 reserved++;
420 return reserved;
421}
422
360static unsigned int current_tail(struct gfs2_sbd *sdp) 423static unsigned int current_tail(struct gfs2_sbd *sdp)
361{ 424{
362 struct gfs2_ail *ai; 425 struct gfs2_ail *ai;
@@ -447,14 +510,14 @@ struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
447 return bh; 510 return bh;
448} 511}
449 512
450static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail, int pull) 513static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail)
451{ 514{
452 unsigned int dist = log_distance(sdp, new_tail, sdp->sd_log_tail); 515 unsigned int dist = log_distance(sdp, new_tail, sdp->sd_log_tail);
453 516
454 ail2_empty(sdp, new_tail); 517 ail2_empty(sdp, new_tail);
455 518
456 gfs2_log_lock(sdp); 519 gfs2_log_lock(sdp);
457 sdp->sd_log_blks_free += dist - (pull ? 1 : 0); 520 sdp->sd_log_blks_free += dist;
458 gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks); 521 gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks);
459 gfs2_log_unlock(sdp); 522 gfs2_log_unlock(sdp);
460 523
@@ -504,7 +567,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
504 brelse(bh); 567 brelse(bh);
505 568
506 if (sdp->sd_log_tail != tail) 569 if (sdp->sd_log_tail != tail)
507 log_pull_tail(sdp, tail, pull); 570 log_pull_tail(sdp, tail);
508 else 571 else
509 gfs2_assert_withdraw(sdp, !pull); 572 gfs2_assert_withdraw(sdp, !pull);
510 573
@@ -517,6 +580,7 @@ static void log_flush_commit(struct gfs2_sbd *sdp)
517 struct list_head *head = &sdp->sd_log_flush_list; 580 struct list_head *head = &sdp->sd_log_flush_list;
518 struct gfs2_log_buf *lb; 581 struct gfs2_log_buf *lb;
519 struct buffer_head *bh; 582 struct buffer_head *bh;
583 int flushcount = 0;
520 584
521 while (!list_empty(head)) { 585 while (!list_empty(head)) {
522 lb = list_entry(head->next, struct gfs2_log_buf, lb_list); 586 lb = list_entry(head->next, struct gfs2_log_buf, lb_list);
@@ -533,9 +597,20 @@ static void log_flush_commit(struct gfs2_sbd *sdp)
533 } else 597 } else
534 brelse(bh); 598 brelse(bh);
535 kfree(lb); 599 kfree(lb);
600 flushcount++;
536 } 601 }
537 602
538 log_write_header(sdp, 0, 0); 603 /* If nothing was journaled, the header is unplanned and unwanted. */
604 if (flushcount) {
605 log_write_header(sdp, 0, 0);
606 } else {
607 unsigned int tail;
608 tail = current_tail(sdp);
609
610 gfs2_ail1_empty(sdp, 0);
611 if (sdp->sd_log_tail != tail)
612 log_pull_tail(sdp, tail);
613 }
539} 614}
540 615
541/** 616/**
@@ -565,7 +640,10 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
565 INIT_LIST_HEAD(&ai->ai_ail1_list); 640 INIT_LIST_HEAD(&ai->ai_ail1_list);
566 INIT_LIST_HEAD(&ai->ai_ail2_list); 641 INIT_LIST_HEAD(&ai->ai_ail2_list);
567 642
568 gfs2_assert_withdraw(sdp, sdp->sd_log_num_buf == sdp->sd_log_commited_buf); 643 gfs2_assert_withdraw(sdp,
644 sdp->sd_log_num_buf + sdp->sd_log_num_jdata ==
645 sdp->sd_log_commited_buf +
646 sdp->sd_log_commited_databuf);
569 gfs2_assert_withdraw(sdp, 647 gfs2_assert_withdraw(sdp,
570 sdp->sd_log_num_revoke == sdp->sd_log_commited_revoke); 648 sdp->sd_log_num_revoke == sdp->sd_log_commited_revoke);
571 649
@@ -576,16 +654,19 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
576 lops_before_commit(sdp); 654 lops_before_commit(sdp);
577 if (!list_empty(&sdp->sd_log_flush_list)) 655 if (!list_empty(&sdp->sd_log_flush_list))
578 log_flush_commit(sdp); 656 log_flush_commit(sdp);
579 else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle) 657 else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){
658 gfs2_log_lock(sdp);
659 sdp->sd_log_blks_free--; /* Adjust for unreserved buffer */
660 gfs2_log_unlock(sdp);
580 log_write_header(sdp, 0, PULL); 661 log_write_header(sdp, 0, PULL);
662 }
581 lops_after_commit(sdp, ai); 663 lops_after_commit(sdp, ai);
582 664
583 gfs2_log_lock(sdp); 665 gfs2_log_lock(sdp);
584 sdp->sd_log_head = sdp->sd_log_flush_head; 666 sdp->sd_log_head = sdp->sd_log_flush_head;
585 sdp->sd_log_blks_free -= sdp->sd_log_num_hdrs;
586 sdp->sd_log_blks_reserved = 0; 667 sdp->sd_log_blks_reserved = 0;
587 sdp->sd_log_commited_buf = 0; 668 sdp->sd_log_commited_buf = 0;
588 sdp->sd_log_num_hdrs = 0; 669 sdp->sd_log_commited_databuf = 0;
589 sdp->sd_log_commited_revoke = 0; 670 sdp->sd_log_commited_revoke = 0;
590 671
591 if (!list_empty(&ai->ai_ail1_list)) { 672 if (!list_empty(&ai->ai_ail1_list)) {
@@ -602,32 +683,26 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
602 683
603static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr) 684static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
604{ 685{
605 unsigned int reserved = 0; 686 unsigned int reserved;
606 unsigned int old; 687 unsigned int old;
607 688
608 gfs2_log_lock(sdp); 689 gfs2_log_lock(sdp);
609 690
610 sdp->sd_log_commited_buf += tr->tr_num_buf_new - tr->tr_num_buf_rm; 691 sdp->sd_log_commited_buf += tr->tr_num_buf_new - tr->tr_num_buf_rm;
611 gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_buf) >= 0); 692 sdp->sd_log_commited_databuf += tr->tr_num_databuf_new -
693 tr->tr_num_databuf_rm;
694 gfs2_assert_withdraw(sdp, (((int)sdp->sd_log_commited_buf) >= 0) ||
695 (((int)sdp->sd_log_commited_databuf) >= 0));
612 sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm; 696 sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm;
613 gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_revoke) >= 0); 697 gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_revoke) >= 0);
614 698 reserved = calc_reserved(sdp);
615 if (sdp->sd_log_commited_buf)
616 reserved += sdp->sd_log_commited_buf;
617 if (sdp->sd_log_commited_revoke)
618 reserved += gfs2_struct2blk(sdp, sdp->sd_log_commited_revoke,
619 sizeof(u64));
620 if (reserved)
621 reserved++;
622
623 old = sdp->sd_log_blks_free; 699 old = sdp->sd_log_blks_free;
624 sdp->sd_log_blks_free += tr->tr_reserved - 700 sdp->sd_log_blks_free += tr->tr_reserved -
625 (reserved - sdp->sd_log_blks_reserved); 701 (reserved - sdp->sd_log_blks_reserved);
626 702
627 gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free >= old); 703 gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free >= old);
628 gfs2_assert_withdraw(sdp, 704 gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free <=
629 sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks + 705 sdp->sd_jdesc->jd_blocks);
630 sdp->sd_log_num_hdrs);
631 706
632 sdp->sd_log_blks_reserved = reserved; 707 sdp->sd_log_blks_reserved = reserved;
633 708
@@ -673,13 +748,13 @@ void gfs2_log_shutdown(struct gfs2_sbd *sdp)
673 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); 748 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
674 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_rg); 749 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_rg);
675 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_databuf); 750 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_databuf);
676 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_hdrs);
677 gfs2_assert_withdraw(sdp, list_empty(&sdp->sd_ail1_list)); 751 gfs2_assert_withdraw(sdp, list_empty(&sdp->sd_ail1_list));
678 752
679 sdp->sd_log_flush_head = sdp->sd_log_head; 753 sdp->sd_log_flush_head = sdp->sd_log_head;
680 sdp->sd_log_flush_wrapped = 0; 754 sdp->sd_log_flush_wrapped = 0;
681 755
682 log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT, 0); 756 log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT,
757 (sdp->sd_log_tail == current_tail(sdp)) ? 0 : PULL);
683 758
684 gfs2_assert_warn(sdp, sdp->sd_log_blks_free == sdp->sd_jdesc->jd_blocks); 759 gfs2_assert_warn(sdp, sdp->sd_log_blks_free == sdp->sd_jdesc->jd_blocks);
685 gfs2_assert_warn(sdp, sdp->sd_log_head == sdp->sd_log_tail); 760 gfs2_assert_warn(sdp, sdp->sd_log_head == sdp->sd_log_tail);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index f82d84d05d23..aff70f0698fd 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -17,6 +17,7 @@
17 17
18#include "gfs2.h" 18#include "gfs2.h"
19#include "incore.h" 19#include "incore.h"
20#include "inode.h"
20#include "glock.h" 21#include "glock.h"
21#include "log.h" 22#include "log.h"
22#include "lops.h" 23#include "lops.h"
@@ -117,15 +118,13 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp)
117 struct gfs2_log_descriptor *ld; 118 struct gfs2_log_descriptor *ld;
118 struct gfs2_bufdata *bd1 = NULL, *bd2; 119 struct gfs2_bufdata *bd1 = NULL, *bd2;
119 unsigned int total = sdp->sd_log_num_buf; 120 unsigned int total = sdp->sd_log_num_buf;
120 unsigned int offset = sizeof(struct gfs2_log_descriptor); 121 unsigned int offset = BUF_OFFSET;
121 unsigned int limit; 122 unsigned int limit;
122 unsigned int num; 123 unsigned int num;
123 unsigned n; 124 unsigned n;
124 __be64 *ptr; 125 __be64 *ptr;
125 126
126 offset += sizeof(__be64) - 1; 127 limit = buf_limit(sdp);
127 offset &= ~(sizeof(__be64) - 1);
128 limit = (sdp->sd_sb.sb_bsize - offset)/sizeof(__be64);
129 /* for 4k blocks, limit = 503 */ 128 /* for 4k blocks, limit = 503 */
130 129
131 bd1 = bd2 = list_prepare_entry(bd1, &sdp->sd_log_le_buf, bd_le.le_list); 130 bd1 = bd2 = list_prepare_entry(bd1, &sdp->sd_log_le_buf, bd_le.le_list);
@@ -134,7 +133,6 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp)
134 if (total > limit) 133 if (total > limit)
135 num = limit; 134 num = limit;
136 bh = gfs2_log_get_buf(sdp); 135 bh = gfs2_log_get_buf(sdp);
137 sdp->sd_log_num_hdrs++;
138 ld = (struct gfs2_log_descriptor *)bh->b_data; 136 ld = (struct gfs2_log_descriptor *)bh->b_data;
139 ptr = (__be64 *)(bh->b_data + offset); 137 ptr = (__be64 *)(bh->b_data + offset);
140 ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC); 138 ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
@@ -469,25 +467,28 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
469 struct gfs2_inode *ip = GFS2_I(mapping->host); 467 struct gfs2_inode *ip = GFS2_I(mapping->host);
470 468
471 gfs2_log_lock(sdp); 469 gfs2_log_lock(sdp);
470 if (!list_empty(&bd->bd_list_tr)) {
471 gfs2_log_unlock(sdp);
472 return;
473 }
472 tr->tr_touched = 1; 474 tr->tr_touched = 1;
473 if (list_empty(&bd->bd_list_tr) && 475 if (gfs2_is_jdata(ip)) {
474 (ip->i_di.di_flags & GFS2_DIF_JDATA)) {
475 tr->tr_num_buf++; 476 tr->tr_num_buf++;
476 list_add(&bd->bd_list_tr, &tr->tr_list_buf); 477 list_add(&bd->bd_list_tr, &tr->tr_list_buf);
477 gfs2_log_unlock(sdp);
478 gfs2_pin(sdp, bd->bd_bh);
479 tr->tr_num_buf_new++;
480 } else {
481 gfs2_log_unlock(sdp);
482 } 478 }
479 gfs2_log_unlock(sdp);
480 if (!list_empty(&le->le_list))
481 return;
482
483 gfs2_trans_add_gl(bd->bd_gl); 483 gfs2_trans_add_gl(bd->bd_gl);
484 gfs2_log_lock(sdp); 484 if (gfs2_is_jdata(ip)) {
485 if (list_empty(&le->le_list)) { 485 sdp->sd_log_num_jdata++;
486 if (ip->i_di.di_flags & GFS2_DIF_JDATA) 486 gfs2_pin(sdp, bd->bd_bh);
487 sdp->sd_log_num_jdata++; 487 tr->tr_num_databuf_new++;
488 sdp->sd_log_num_databuf++;
489 list_add(&le->le_list, &sdp->sd_log_le_databuf);
490 } 488 }
489 sdp->sd_log_num_databuf++;
490 gfs2_log_lock(sdp);
491 list_add(&le->le_list, &sdp->sd_log_le_databuf);
491 gfs2_log_unlock(sdp); 492 gfs2_log_unlock(sdp);
492} 493}
493 494
@@ -520,7 +521,6 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
520 LIST_HEAD(started); 521 LIST_HEAD(started);
521 struct gfs2_bufdata *bd1 = NULL, *bd2, *bdt; 522 struct gfs2_bufdata *bd1 = NULL, *bd2, *bdt;
522 struct buffer_head *bh = NULL,*bh1 = NULL; 523 struct buffer_head *bh = NULL,*bh1 = NULL;
523 unsigned int offset = sizeof(struct gfs2_log_descriptor);
524 struct gfs2_log_descriptor *ld; 524 struct gfs2_log_descriptor *ld;
525 unsigned int limit; 525 unsigned int limit;
526 unsigned int total_dbuf = sdp->sd_log_num_databuf; 526 unsigned int total_dbuf = sdp->sd_log_num_databuf;
@@ -528,9 +528,7 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
528 unsigned int num, n; 528 unsigned int num, n;
529 __be64 *ptr = NULL; 529 __be64 *ptr = NULL;
530 530
531 offset += 2*sizeof(__be64) - 1; 531 limit = databuf_limit(sdp);
532 offset &= ~(2*sizeof(__be64) - 1);
533 limit = (sdp->sd_sb.sb_bsize - offset)/sizeof(__be64);
534 532
535 /* 533 /*
536 * Start writing ordered buffers, write journaled buffers 534 * Start writing ordered buffers, write journaled buffers
@@ -581,10 +579,10 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
581 gfs2_log_unlock(sdp); 579 gfs2_log_unlock(sdp);
582 if (!bh) { 580 if (!bh) {
583 bh = gfs2_log_get_buf(sdp); 581 bh = gfs2_log_get_buf(sdp);
584 sdp->sd_log_num_hdrs++;
585 ld = (struct gfs2_log_descriptor *) 582 ld = (struct gfs2_log_descriptor *)
586 bh->b_data; 583 bh->b_data;
587 ptr = (__be64 *)(bh->b_data + offset); 584 ptr = (__be64 *)(bh->b_data +
585 DATABUF_OFFSET);
588 ld->ld_header.mh_magic = 586 ld->ld_header.mh_magic =
589 cpu_to_be32(GFS2_MAGIC); 587 cpu_to_be32(GFS2_MAGIC);
590 ld->ld_header.mh_type = 588 ld->ld_header.mh_type =
@@ -605,7 +603,7 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
605 if (unlikely(magic != 0)) 603 if (unlikely(magic != 0))
606 set_buffer_escaped(bh1); 604 set_buffer_escaped(bh1);
607 gfs2_log_lock(sdp); 605 gfs2_log_lock(sdp);
608 if (n++ > num) 606 if (++n >= num)
609 break; 607 break;
610 } else if (!bh1) { 608 } else if (!bh1) {
611 total_dbuf--; 609 total_dbuf--;
@@ -622,6 +620,7 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
622 } 620 }
623 gfs2_log_unlock(sdp); 621 gfs2_log_unlock(sdp);
624 if (bh) { 622 if (bh) {
623 set_buffer_mapped(bh);
625 set_buffer_dirty(bh); 624 set_buffer_dirty(bh);
626 ll_rw_block(WRITE, 1, &bh); 625 ll_rw_block(WRITE, 1, &bh);
627 bh = NULL; 626 bh = NULL;
diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h
index 965bc65c7c64..41a00df75587 100644
--- a/fs/gfs2/lops.h
+++ b/fs/gfs2/lops.h
@@ -13,6 +13,13 @@
13#include <linux/list.h> 13#include <linux/list.h>
14#include "incore.h" 14#include "incore.h"
15 15
16#define BUF_OFFSET \
17 ((sizeof(struct gfs2_log_descriptor) + sizeof(__be64) - 1) & \
18 ~(sizeof(__be64) - 1))
19#define DATABUF_OFFSET \
20 ((sizeof(struct gfs2_log_descriptor) + (2 * sizeof(__be64) - 1)) & \
21 ~(2 * sizeof(__be64) - 1))
22
16extern const struct gfs2_log_operations gfs2_glock_lops; 23extern const struct gfs2_log_operations gfs2_glock_lops;
17extern const struct gfs2_log_operations gfs2_buf_lops; 24extern const struct gfs2_log_operations gfs2_buf_lops;
18extern const struct gfs2_log_operations gfs2_revoke_lops; 25extern const struct gfs2_log_operations gfs2_revoke_lops;
@@ -21,6 +28,22 @@ extern const struct gfs2_log_operations gfs2_databuf_lops;
21 28
22extern const struct gfs2_log_operations *gfs2_log_ops[]; 29extern const struct gfs2_log_operations *gfs2_log_ops[];
23 30
31static inline unsigned int buf_limit(struct gfs2_sbd *sdp)
32{
33 unsigned int limit;
34
35 limit = (sdp->sd_sb.sb_bsize - BUF_OFFSET) / sizeof(__be64);
36 return limit;
37}
38
39static inline unsigned int databuf_limit(struct gfs2_sbd *sdp)
40{
41 unsigned int limit;
42
43 limit = (sdp->sd_sb.sb_bsize - DATABUF_OFFSET) / (2 * sizeof(__be64));
44 return limit;
45}
46
24static inline void lops_init_le(struct gfs2_log_element *le, 47static inline void lops_init_le(struct gfs2_log_element *le,
25 const struct gfs2_log_operations *lops) 48 const struct gfs2_log_operations *lops)
26{ 49{
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index e62d4f620c58..8da343b34ae7 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -387,12 +387,18 @@ void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen)
387 387
388 if (test_clear_buffer_pinned(bh)) { 388 if (test_clear_buffer_pinned(bh)) {
389 struct gfs2_trans *tr = current->journal_info; 389 struct gfs2_trans *tr = current->journal_info;
390 struct gfs2_inode *bh_ip =
391 GFS2_I(bh->b_page->mapping->host);
392
390 gfs2_log_lock(sdp); 393 gfs2_log_lock(sdp);
391 list_del_init(&bd->bd_le.le_list); 394 list_del_init(&bd->bd_le.le_list);
392 gfs2_assert_warn(sdp, sdp->sd_log_num_buf); 395 gfs2_assert_warn(sdp, sdp->sd_log_num_buf);
393 sdp->sd_log_num_buf--; 396 sdp->sd_log_num_buf--;
394 gfs2_log_unlock(sdp); 397 gfs2_log_unlock(sdp);
395 tr->tr_num_buf_rm++; 398 if (bh_ip->i_inode.i_private != NULL)
399 tr->tr_num_databuf_rm++;
400 else
401 tr->tr_num_buf_rm++;
396 brelse(bh); 402 brelse(bh);
397 } 403 }
398 if (bd) { 404 if (bd) {
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h
index e037425bc042..527bf19d9690 100644
--- a/fs/gfs2/meta_io.h
+++ b/fs/gfs2/meta_io.h
@@ -63,7 +63,7 @@ int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num,
63static inline int gfs2_meta_inode_buffer(struct gfs2_inode *ip, 63static inline int gfs2_meta_inode_buffer(struct gfs2_inode *ip,
64 struct buffer_head **bhp) 64 struct buffer_head **bhp)
65{ 65{
66 return gfs2_meta_indirect_buffer(ip, 0, ip->i_num.no_addr, 0, bhp); 66 return gfs2_meta_indirect_buffer(ip, 0, ip->i_no_addr, 0, bhp);
67} 67}
68 68
69struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen); 69struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen);
diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c
index 4864659555d4..6f006a804db3 100644
--- a/fs/gfs2/mount.c
+++ b/fs/gfs2/mount.c
@@ -82,20 +82,19 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount)
82 char *options, *o, *v; 82 char *options, *o, *v;
83 int error = 0; 83 int error = 0;
84 84
85 if (!remount) { 85 /* If someone preloaded options, use those instead */
86 /* If someone preloaded options, use those instead */ 86 spin_lock(&gfs2_sys_margs_lock);
87 spin_lock(&gfs2_sys_margs_lock); 87 if (!remount && gfs2_sys_margs) {
88 if (gfs2_sys_margs) { 88 data = gfs2_sys_margs;
89 data = gfs2_sys_margs; 89 gfs2_sys_margs = NULL;
90 gfs2_sys_margs = NULL;
91 }
92 spin_unlock(&gfs2_sys_margs_lock);
93
94 /* Set some defaults */
95 args->ar_num_glockd = GFS2_GLOCKD_DEFAULT;
96 args->ar_quota = GFS2_QUOTA_DEFAULT;
97 args->ar_data = GFS2_DATA_DEFAULT;
98 } 90 }
91 spin_unlock(&gfs2_sys_margs_lock);
92
93 /* Set some defaults */
94 memset(args, 0, sizeof(struct gfs2_args));
95 args->ar_num_glockd = GFS2_GLOCKD_DEFAULT;
96 args->ar_quota = GFS2_QUOTA_DEFAULT;
97 args->ar_data = GFS2_DATA_DEFAULT;
99 98
100 /* Split the options into tokens with the "," character and 99 /* Split the options into tokens with the "," character and
101 process them */ 100 process them */
diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
deleted file mode 100644
index d9ecfd23a49e..000000000000
--- a/fs/gfs2/ondisk.c
+++ /dev/null
@@ -1,251 +0,0 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/slab.h>
11#include <linux/spinlock.h>
12#include <linux/completion.h>
13#include <linux/buffer_head.h>
14
15#include "gfs2.h"
16#include <linux/gfs2_ondisk.h>
17#include <linux/lm_interface.h>
18#include "incore.h"
19
20#define pv(struct, member, fmt) printk(KERN_INFO " "#member" = "fmt"\n", \
21 struct->member);
22
23/*
24 * gfs2_xxx_in - read in an xxx struct
25 * first arg: the cpu-order structure
26 * buf: the disk-order buffer
27 *
28 * gfs2_xxx_out - write out an xxx struct
29 * first arg: the cpu-order structure
30 * buf: the disk-order buffer
31 *
32 * gfs2_xxx_print - print out an xxx struct
33 * first arg: the cpu-order structure
34 */
35
36void gfs2_inum_in(struct gfs2_inum_host *no, const void *buf)
37{
38 const struct gfs2_inum *str = buf;
39
40 no->no_formal_ino = be64_to_cpu(str->no_formal_ino);
41 no->no_addr = be64_to_cpu(str->no_addr);
42}
43
44void gfs2_inum_out(const struct gfs2_inum_host *no, void *buf)
45{
46 struct gfs2_inum *str = buf;
47
48 str->no_formal_ino = cpu_to_be64(no->no_formal_ino);
49 str->no_addr = cpu_to_be64(no->no_addr);
50}
51
52static void gfs2_inum_print(const struct gfs2_inum_host *no)
53{
54 printk(KERN_INFO " no_formal_ino = %llu\n", (unsigned long long)no->no_formal_ino);
55 printk(KERN_INFO " no_addr = %llu\n", (unsigned long long)no->no_addr);
56}
57
58static void gfs2_meta_header_in(struct gfs2_meta_header_host *mh, const void *buf)
59{
60 const struct gfs2_meta_header *str = buf;
61
62 mh->mh_magic = be32_to_cpu(str->mh_magic);
63 mh->mh_type = be32_to_cpu(str->mh_type);
64 mh->mh_format = be32_to_cpu(str->mh_format);
65}
66
67void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
68{
69 const struct gfs2_sb *str = buf;
70
71 gfs2_meta_header_in(&sb->sb_header, buf);
72
73 sb->sb_fs_format = be32_to_cpu(str->sb_fs_format);
74 sb->sb_multihost_format = be32_to_cpu(str->sb_multihost_format);
75 sb->sb_bsize = be32_to_cpu(str->sb_bsize);
76 sb->sb_bsize_shift = be32_to_cpu(str->sb_bsize_shift);
77
78 gfs2_inum_in(&sb->sb_master_dir, (char *)&str->sb_master_dir);
79 gfs2_inum_in(&sb->sb_root_dir, (char *)&str->sb_root_dir);
80
81 memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN);
82 memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
83}
84
85void gfs2_rindex_in(struct gfs2_rindex_host *ri, const void *buf)
86{
87 const struct gfs2_rindex *str = buf;
88
89 ri->ri_addr = be64_to_cpu(str->ri_addr);
90 ri->ri_length = be32_to_cpu(str->ri_length);
91 ri->ri_data0 = be64_to_cpu(str->ri_data0);
92 ri->ri_data = be32_to_cpu(str->ri_data);
93 ri->ri_bitbytes = be32_to_cpu(str->ri_bitbytes);
94
95}
96
97void gfs2_rindex_print(const struct gfs2_rindex_host *ri)
98{
99 printk(KERN_INFO " ri_addr = %llu\n", (unsigned long long)ri->ri_addr);
100 pv(ri, ri_length, "%u");
101
102 printk(KERN_INFO " ri_data0 = %llu\n", (unsigned long long)ri->ri_data0);
103 pv(ri, ri_data, "%u");
104
105 pv(ri, ri_bitbytes, "%u");
106}
107
108void gfs2_rgrp_in(struct gfs2_rgrp_host *rg, const void *buf)
109{
110 const struct gfs2_rgrp *str = buf;
111
112 rg->rg_flags = be32_to_cpu(str->rg_flags);
113 rg->rg_free = be32_to_cpu(str->rg_free);
114 rg->rg_dinodes = be32_to_cpu(str->rg_dinodes);
115 rg->rg_igeneration = be64_to_cpu(str->rg_igeneration);
116}
117
118void gfs2_rgrp_out(const struct gfs2_rgrp_host *rg, void *buf)
119{
120 struct gfs2_rgrp *str = buf;
121
122 str->rg_flags = cpu_to_be32(rg->rg_flags);
123 str->rg_free = cpu_to_be32(rg->rg_free);
124 str->rg_dinodes = cpu_to_be32(rg->rg_dinodes);
125 str->__pad = cpu_to_be32(0);
126 str->rg_igeneration = cpu_to_be64(rg->rg_igeneration);
127 memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
128}
129
130void gfs2_quota_in(struct gfs2_quota_host *qu, const void *buf)
131{
132 const struct gfs2_quota *str = buf;
133
134 qu->qu_limit = be64_to_cpu(str->qu_limit);
135 qu->qu_warn = be64_to_cpu(str->qu_warn);
136 qu->qu_value = be64_to_cpu(str->qu_value);
137}
138
139void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
140{
141 const struct gfs2_dinode_host *di = &ip->i_di;
142 struct gfs2_dinode *str = buf;
143
144 str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
145 str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI);
146 str->di_header.__pad0 = 0;
147 str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI);
148 str->di_header.__pad1 = 0;
149
150 gfs2_inum_out(&ip->i_num, &str->di_num);
151
152 str->di_mode = cpu_to_be32(ip->i_inode.i_mode);
153 str->di_uid = cpu_to_be32(ip->i_inode.i_uid);
154 str->di_gid = cpu_to_be32(ip->i_inode.i_gid);
155 str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink);
156 str->di_size = cpu_to_be64(di->di_size);
157 str->di_blocks = cpu_to_be64(di->di_blocks);
158 str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
159 str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec);
160 str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec);
161
162 str->di_goal_meta = cpu_to_be64(di->di_goal_meta);
163 str->di_goal_data = cpu_to_be64(di->di_goal_data);
164 str->di_generation = cpu_to_be64(di->di_generation);
165
166 str->di_flags = cpu_to_be32(di->di_flags);
167 str->di_height = cpu_to_be16(di->di_height);
168 str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) &&
169 !(ip->i_di.di_flags & GFS2_DIF_EXHASH) ?
170 GFS2_FORMAT_DE : 0);
171 str->di_depth = cpu_to_be16(di->di_depth);
172 str->di_entries = cpu_to_be32(di->di_entries);
173
174 str->di_eattr = cpu_to_be64(di->di_eattr);
175}
176
177void gfs2_dinode_print(const struct gfs2_inode *ip)
178{
179 const struct gfs2_dinode_host *di = &ip->i_di;
180
181 gfs2_inum_print(&ip->i_num);
182
183 printk(KERN_INFO " di_size = %llu\n", (unsigned long long)di->di_size);
184 printk(KERN_INFO " di_blocks = %llu\n", (unsigned long long)di->di_blocks);
185 printk(KERN_INFO " di_goal_meta = %llu\n", (unsigned long long)di->di_goal_meta);
186 printk(KERN_INFO " di_goal_data = %llu\n", (unsigned long long)di->di_goal_data);
187
188 pv(di, di_flags, "0x%.8X");
189 pv(di, di_height, "%u");
190
191 pv(di, di_depth, "%u");
192 pv(di, di_entries, "%u");
193
194 printk(KERN_INFO " di_eattr = %llu\n", (unsigned long long)di->di_eattr);
195}
196
197void gfs2_log_header_in(struct gfs2_log_header_host *lh, const void *buf)
198{
199 const struct gfs2_log_header *str = buf;
200
201 gfs2_meta_header_in(&lh->lh_header, buf);
202 lh->lh_sequence = be64_to_cpu(str->lh_sequence);
203 lh->lh_flags = be32_to_cpu(str->lh_flags);
204 lh->lh_tail = be32_to_cpu(str->lh_tail);
205 lh->lh_blkno = be32_to_cpu(str->lh_blkno);
206 lh->lh_hash = be32_to_cpu(str->lh_hash);
207}
208
209void gfs2_inum_range_in(struct gfs2_inum_range_host *ir, const void *buf)
210{
211 const struct gfs2_inum_range *str = buf;
212
213 ir->ir_start = be64_to_cpu(str->ir_start);
214 ir->ir_length = be64_to_cpu(str->ir_length);
215}
216
217void gfs2_inum_range_out(const struct gfs2_inum_range_host *ir, void *buf)
218{
219 struct gfs2_inum_range *str = buf;
220
221 str->ir_start = cpu_to_be64(ir->ir_start);
222 str->ir_length = cpu_to_be64(ir->ir_length);
223}
224
225void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf)
226{
227 const struct gfs2_statfs_change *str = buf;
228
229 sc->sc_total = be64_to_cpu(str->sc_total);
230 sc->sc_free = be64_to_cpu(str->sc_free);
231 sc->sc_dinodes = be64_to_cpu(str->sc_dinodes);
232}
233
234void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc, void *buf)
235{
236 struct gfs2_statfs_change *str = buf;
237
238 str->sc_total = cpu_to_be64(sc->sc_total);
239 str->sc_free = cpu_to_be64(sc->sc_free);
240 str->sc_dinodes = cpu_to_be64(sc->sc_dinodes);
241}
242
243void gfs2_quota_change_in(struct gfs2_quota_change_host *qc, const void *buf)
244{
245 const struct gfs2_quota_change *str = buf;
246
247 qc->qc_change = be64_to_cpu(str->qc_change);
248 qc->qc_flags = be32_to_cpu(str->qc_flags);
249 qc->qc_id = be32_to_cpu(str->qc_id);
250}
251
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 30c15622174f..26c888890c24 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. 3 * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
4 * 4 *
5 * This copyrighted material is made available to anyone wishing to use, 5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions 6 * modify, copy, or redistribute it subject to the terms and conditions
@@ -32,6 +32,7 @@
32#include "trans.h" 32#include "trans.h"
33#include "rgrp.h" 33#include "rgrp.h"
34#include "ops_file.h" 34#include "ops_file.h"
35#include "super.h"
35#include "util.h" 36#include "util.h"
36#include "glops.h" 37#include "glops.h"
37 38
@@ -49,6 +50,8 @@ static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
49 end = start + bsize; 50 end = start + bsize;
50 if (end <= from || start >= to) 51 if (end <= from || start >= to)
51 continue; 52 continue;
53 if (gfs2_is_jdata(ip))
54 set_buffer_uptodate(bh);
52 gfs2_trans_add_bh(ip->i_gl, bh, 0); 55 gfs2_trans_add_bh(ip->i_gl, bh, 0);
53 } 56 }
54} 57}
@@ -134,7 +137,9 @@ static int gfs2_writepage(struct page *page, struct writeback_control *wbc)
134 return 0; /* don't care */ 137 return 0; /* don't care */
135 } 138 }
136 139
137 if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) { 140 if ((sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) &&
141 PageChecked(page)) {
142 ClearPageChecked(page);
138 error = gfs2_trans_begin(sdp, RES_DINODE + 1, 0); 143 error = gfs2_trans_begin(sdp, RES_DINODE + 1, 0);
139 if (error) 144 if (error)
140 goto out_ignore; 145 goto out_ignore;
@@ -203,11 +208,7 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
203 * so we need to supply one here. It doesn't happen often. 208 * so we need to supply one here. It doesn't happen often.
204 */ 209 */
205 if (unlikely(page->index)) { 210 if (unlikely(page->index)) {
206 kaddr = kmap_atomic(page, KM_USER0); 211 zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0);
207 memset(kaddr, 0, PAGE_CACHE_SIZE);
208 kunmap_atomic(kaddr, KM_USER0);
209 flush_dcache_page(page);
210 SetPageUptodate(page);
211 return 0; 212 return 0;
212 } 213 }
213 214
@@ -450,6 +451,31 @@ out_uninit:
450} 451}
451 452
452/** 453/**
454 * adjust_fs_space - Adjusts the free space available due to gfs2_grow
455 * @inode: the rindex inode
456 */
457static void adjust_fs_space(struct inode *inode)
458{
459 struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
460 struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
461 struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
462 u64 fs_total, new_free;
463
464 /* Total up the file system space, according to the latest rindex. */
465 fs_total = gfs2_ri_total(sdp);
466
467 spin_lock(&sdp->sd_statfs_spin);
468 if (fs_total > (m_sc->sc_total + l_sc->sc_total))
469 new_free = fs_total - (m_sc->sc_total + l_sc->sc_total);
470 else
471 new_free = 0;
472 spin_unlock(&sdp->sd_statfs_spin);
473 fs_warn(sdp, "File system extended by %llu blocks.\n",
474 (unsigned long long)new_free);
475 gfs2_statfs_change(sdp, new_free, new_free, 0);
476}
477
478/**
453 * gfs2_commit_write - Commit write to a file 479 * gfs2_commit_write - Commit write to a file
454 * @file: The file to write to 480 * @file: The file to write to
455 * @page: The page containing the data 481 * @page: The page containing the data
@@ -511,6 +537,9 @@ static int gfs2_commit_write(struct file *file, struct page *page,
511 di->di_size = cpu_to_be64(inode->i_size); 537 di->di_size = cpu_to_be64(inode->i_size);
512 } 538 }
513 539
540 if (inode == sdp->sd_rindex)
541 adjust_fs_space(inode);
542
514 brelse(dibh); 543 brelse(dibh);
515 gfs2_trans_end(sdp); 544 gfs2_trans_end(sdp);
516 if (al->al_requested) { 545 if (al->al_requested) {
@@ -543,6 +572,23 @@ fail_nounlock:
543} 572}
544 573
545/** 574/**
575 * gfs2_set_page_dirty - Page dirtying function
576 * @page: The page to dirty
577 *
578 * Returns: 1 if it dirtyed the page, or 0 otherwise
579 */
580
581static int gfs2_set_page_dirty(struct page *page)
582{
583 struct gfs2_inode *ip = GFS2_I(page->mapping->host);
584 struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
585
586 if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip))
587 SetPageChecked(page);
588 return __set_page_dirty_buffers(page);
589}
590
591/**
546 * gfs2_bmap - Block map function 592 * gfs2_bmap - Block map function
547 * @mapping: Address space info 593 * @mapping: Address space info
548 * @lblock: The block to map 594 * @lblock: The block to map
@@ -578,6 +624,8 @@ static void discard_buffer(struct gfs2_sbd *sdp, struct buffer_head *bh)
578 if (bd) { 624 if (bd) {
579 bd->bd_bh = NULL; 625 bd->bd_bh = NULL;
580 bh->b_private = NULL; 626 bh->b_private = NULL;
627 if (!bd->bd_ail && list_empty(&bd->bd_le.le_list))
628 kmem_cache_free(gfs2_bufdata_cachep, bd);
581 } 629 }
582 gfs2_log_unlock(sdp); 630 gfs2_log_unlock(sdp);
583 631
@@ -598,6 +646,8 @@ static void gfs2_invalidatepage(struct page *page, unsigned long offset)
598 unsigned int curr_off = 0; 646 unsigned int curr_off = 0;
599 647
600 BUG_ON(!PageLocked(page)); 648 BUG_ON(!PageLocked(page));
649 if (offset == 0)
650 ClearPageChecked(page);
601 if (!page_has_buffers(page)) 651 if (!page_has_buffers(page))
602 return; 652 return;
603 653
@@ -728,8 +778,8 @@ static unsigned limit = 0;
728 return; 778 return;
729 779
730 fs_warn(sdp, "ip = %llu %llu\n", 780 fs_warn(sdp, "ip = %llu %llu\n",
731 (unsigned long long)ip->i_num.no_formal_ino, 781 (unsigned long long)ip->i_no_formal_ino,
732 (unsigned long long)ip->i_num.no_addr); 782 (unsigned long long)ip->i_no_addr);
733 783
734 for (x = 0; x < GFS2_MAX_META_HEIGHT; x++) 784 for (x = 0; x < GFS2_MAX_META_HEIGHT; x++)
735 fs_warn(sdp, "ip->i_cache[%u] = %s\n", 785 fs_warn(sdp, "ip->i_cache[%u] = %s\n",
@@ -810,6 +860,7 @@ const struct address_space_operations gfs2_file_aops = {
810 .sync_page = block_sync_page, 860 .sync_page = block_sync_page,
811 .prepare_write = gfs2_prepare_write, 861 .prepare_write = gfs2_prepare_write,
812 .commit_write = gfs2_commit_write, 862 .commit_write = gfs2_commit_write,
863 .set_page_dirty = gfs2_set_page_dirty,
813 .bmap = gfs2_bmap, 864 .bmap = gfs2_bmap,
814 .invalidatepage = gfs2_invalidatepage, 865 .invalidatepage = gfs2_invalidatepage,
815 .releasepage = gfs2_releasepage, 866 .releasepage = gfs2_releasepage,
diff --git a/fs/gfs2/ops_address.h b/fs/gfs2/ops_address.h
index 35aaee4aa7e1..fa1b5b3d28b9 100644
--- a/fs/gfs2/ops_address.h
+++ b/fs/gfs2/ops_address.h
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. 3 * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
4 * 4 *
5 * This copyrighted material is made available to anyone wishing to use, 5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions 6 * modify, copy, or redistribute it subject to the terms and conditions
diff --git a/fs/gfs2/ops_dentry.c b/fs/gfs2/ops_dentry.c
index a6fdc52f554a..793e334d098e 100644
--- a/fs/gfs2/ops_dentry.c
+++ b/fs/gfs2/ops_dentry.c
@@ -21,6 +21,7 @@
21#include "glock.h" 21#include "glock.h"
22#include "ops_dentry.h" 22#include "ops_dentry.h"
23#include "util.h" 23#include "util.h"
24#include "inode.h"
24 25
25/** 26/**
26 * gfs2_drevalidate - Check directory lookup consistency 27 * gfs2_drevalidate - Check directory lookup consistency
@@ -40,14 +41,15 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
40 struct gfs2_inode *dip = GFS2_I(parent->d_inode); 41 struct gfs2_inode *dip = GFS2_I(parent->d_inode);
41 struct inode *inode = dentry->d_inode; 42 struct inode *inode = dentry->d_inode;
42 struct gfs2_holder d_gh; 43 struct gfs2_holder d_gh;
43 struct gfs2_inode *ip; 44 struct gfs2_inode *ip = NULL;
44 struct gfs2_inum_host inum;
45 unsigned int type;
46 int error; 45 int error;
47 int had_lock=0; 46 int had_lock=0;
48 47
49 if (inode && is_bad_inode(inode)) 48 if (inode) {
50 goto invalid; 49 if (is_bad_inode(inode))
50 goto invalid;
51 ip = GFS2_I(inode);
52 }
51 53
52 if (sdp->sd_args.ar_localcaching) 54 if (sdp->sd_args.ar_localcaching)
53 goto valid; 55 goto valid;
@@ -59,7 +61,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
59 goto fail; 61 goto fail;
60 } 62 }
61 63
62 error = gfs2_dir_search(parent->d_inode, &dentry->d_name, &inum, &type); 64 error = gfs2_dir_check(parent->d_inode, &dentry->d_name, ip);
63 switch (error) { 65 switch (error) {
64 case 0: 66 case 0:
65 if (!inode) 67 if (!inode)
@@ -73,16 +75,6 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
73 goto fail_gunlock; 75 goto fail_gunlock;
74 } 76 }
75 77
76 ip = GFS2_I(inode);
77
78 if (!gfs2_inum_equal(&ip->i_num, &inum))
79 goto invalid_gunlock;
80
81 if (IF2DT(ip->i_inode.i_mode) != type) {
82 gfs2_consist_inode(dip);
83 goto fail_gunlock;
84 }
85
86valid_gunlock: 78valid_gunlock:
87 if (!had_lock) 79 if (!had_lock)
88 gfs2_glock_dq_uninit(&d_gh); 80 gfs2_glock_dq_uninit(&d_gh);
diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c
index aad918337a46..99ea5659bc2c 100644
--- a/fs/gfs2/ops_export.c
+++ b/fs/gfs2/ops_export.c
@@ -22,10 +22,14 @@
22#include "glops.h" 22#include "glops.h"
23#include "inode.h" 23#include "inode.h"
24#include "ops_dentry.h" 24#include "ops_dentry.h"
25#include "ops_export.h" 25#include "ops_fstype.h"
26#include "rgrp.h" 26#include "rgrp.h"
27#include "util.h" 27#include "util.h"
28 28
29#define GFS2_SMALL_FH_SIZE 4
30#define GFS2_LARGE_FH_SIZE 8
31#define GFS2_OLD_FH_SIZE 10
32
29static struct dentry *gfs2_decode_fh(struct super_block *sb, 33static struct dentry *gfs2_decode_fh(struct super_block *sb,
30 __u32 *p, 34 __u32 *p,
31 int fh_len, 35 int fh_len,
@@ -35,31 +39,28 @@ static struct dentry *gfs2_decode_fh(struct super_block *sb,
35 void *context) 39 void *context)
36{ 40{
37 __be32 *fh = (__force __be32 *)p; 41 __be32 *fh = (__force __be32 *)p;
38 struct gfs2_fh_obj fh_obj; 42 struct gfs2_inum_host inum, parent;
39 struct gfs2_inum_host *this, parent;
40 43
41 this = &fh_obj.this;
42 fh_obj.imode = DT_UNKNOWN;
43 memset(&parent, 0, sizeof(struct gfs2_inum)); 44 memset(&parent, 0, sizeof(struct gfs2_inum));
44 45
45 switch (fh_len) { 46 switch (fh_len) {
46 case GFS2_LARGE_FH_SIZE: 47 case GFS2_LARGE_FH_SIZE:
48 case GFS2_OLD_FH_SIZE:
47 parent.no_formal_ino = ((u64)be32_to_cpu(fh[4])) << 32; 49 parent.no_formal_ino = ((u64)be32_to_cpu(fh[4])) << 32;
48 parent.no_formal_ino |= be32_to_cpu(fh[5]); 50 parent.no_formal_ino |= be32_to_cpu(fh[5]);
49 parent.no_addr = ((u64)be32_to_cpu(fh[6])) << 32; 51 parent.no_addr = ((u64)be32_to_cpu(fh[6])) << 32;
50 parent.no_addr |= be32_to_cpu(fh[7]); 52 parent.no_addr |= be32_to_cpu(fh[7]);
51 fh_obj.imode = be32_to_cpu(fh[8]);
52 case GFS2_SMALL_FH_SIZE: 53 case GFS2_SMALL_FH_SIZE:
53 this->no_formal_ino = ((u64)be32_to_cpu(fh[0])) << 32; 54 inum.no_formal_ino = ((u64)be32_to_cpu(fh[0])) << 32;
54 this->no_formal_ino |= be32_to_cpu(fh[1]); 55 inum.no_formal_ino |= be32_to_cpu(fh[1]);
55 this->no_addr = ((u64)be32_to_cpu(fh[2])) << 32; 56 inum.no_addr = ((u64)be32_to_cpu(fh[2])) << 32;
56 this->no_addr |= be32_to_cpu(fh[3]); 57 inum.no_addr |= be32_to_cpu(fh[3]);
57 break; 58 break;
58 default: 59 default:
59 return NULL; 60 return NULL;
60 } 61 }
61 62
62 return gfs2_export_ops.find_exported_dentry(sb, &fh_obj, &parent, 63 return gfs2_export_ops.find_exported_dentry(sb, &inum, &parent,
63 acceptable, context); 64 acceptable, context);
64} 65}
65 66
@@ -75,10 +76,10 @@ static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len,
75 (connectable && *len < GFS2_LARGE_FH_SIZE)) 76 (connectable && *len < GFS2_LARGE_FH_SIZE))
76 return 255; 77 return 255;
77 78
78 fh[0] = cpu_to_be32(ip->i_num.no_formal_ino >> 32); 79 fh[0] = cpu_to_be32(ip->i_no_formal_ino >> 32);
79 fh[1] = cpu_to_be32(ip->i_num.no_formal_ino & 0xFFFFFFFF); 80 fh[1] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF);
80 fh[2] = cpu_to_be32(ip->i_num.no_addr >> 32); 81 fh[2] = cpu_to_be32(ip->i_no_addr >> 32);
81 fh[3] = cpu_to_be32(ip->i_num.no_addr & 0xFFFFFFFF); 82 fh[3] = cpu_to_be32(ip->i_no_addr & 0xFFFFFFFF);
82 *len = GFS2_SMALL_FH_SIZE; 83 *len = GFS2_SMALL_FH_SIZE;
83 84
84 if (!connectable || inode == sb->s_root->d_inode) 85 if (!connectable || inode == sb->s_root->d_inode)
@@ -90,13 +91,10 @@ static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len,
90 igrab(inode); 91 igrab(inode);
91 spin_unlock(&dentry->d_lock); 92 spin_unlock(&dentry->d_lock);
92 93
93 fh[4] = cpu_to_be32(ip->i_num.no_formal_ino >> 32); 94 fh[4] = cpu_to_be32(ip->i_no_formal_ino >> 32);
94 fh[5] = cpu_to_be32(ip->i_num.no_formal_ino & 0xFFFFFFFF); 95 fh[5] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF);
95 fh[6] = cpu_to_be32(ip->i_num.no_addr >> 32); 96 fh[6] = cpu_to_be32(ip->i_no_addr >> 32);
96 fh[7] = cpu_to_be32(ip->i_num.no_addr & 0xFFFFFFFF); 97 fh[7] = cpu_to_be32(ip->i_no_addr & 0xFFFFFFFF);
97
98 fh[8] = cpu_to_be32(inode->i_mode);
99 fh[9] = 0; /* pad to double word */
100 *len = GFS2_LARGE_FH_SIZE; 98 *len = GFS2_LARGE_FH_SIZE;
101 99
102 iput(inode); 100 iput(inode);
@@ -144,7 +142,8 @@ static int gfs2_get_name(struct dentry *parent, char *name,
144 ip = GFS2_I(inode); 142 ip = GFS2_I(inode);
145 143
146 *name = 0; 144 *name = 0;
147 gnfd.inum = ip->i_num; 145 gnfd.inum.no_addr = ip->i_no_addr;
146 gnfd.inum.no_formal_ino = ip->i_no_formal_ino;
148 gnfd.name = name; 147 gnfd.name = name;
149 148
150 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &gh); 149 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &gh);
@@ -192,8 +191,7 @@ static struct dentry *gfs2_get_parent(struct dentry *child)
192static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj) 191static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj)
193{ 192{
194 struct gfs2_sbd *sdp = sb->s_fs_info; 193 struct gfs2_sbd *sdp = sb->s_fs_info;
195 struct gfs2_fh_obj *fh_obj = (struct gfs2_fh_obj *)inum_obj; 194 struct gfs2_inum_host *inum = inum_obj;
196 struct gfs2_inum_host *inum = &fh_obj->this;
197 struct gfs2_holder i_gh, ri_gh, rgd_gh; 195 struct gfs2_holder i_gh, ri_gh, rgd_gh;
198 struct gfs2_rgrpd *rgd; 196 struct gfs2_rgrpd *rgd;
199 struct inode *inode; 197 struct inode *inode;
@@ -202,9 +200,9 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj)
202 200
203 /* System files? */ 201 /* System files? */
204 202
205 inode = gfs2_ilookup(sb, inum); 203 inode = gfs2_ilookup(sb, inum->no_addr);
206 if (inode) { 204 if (inode) {
207 if (GFS2_I(inode)->i_num.no_formal_ino != inum->no_formal_ino) { 205 if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) {
208 iput(inode); 206 iput(inode);
209 return ERR_PTR(-ESTALE); 207 return ERR_PTR(-ESTALE);
210 } 208 }
@@ -236,7 +234,9 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj)
236 gfs2_glock_dq_uninit(&rgd_gh); 234 gfs2_glock_dq_uninit(&rgd_gh);
237 gfs2_glock_dq_uninit(&ri_gh); 235 gfs2_glock_dq_uninit(&ri_gh);
238 236
239 inode = gfs2_inode_lookup(sb, inum, fh_obj->imode); 237 inode = gfs2_inode_lookup(sb, DT_UNKNOWN,
238 inum->no_addr,
239 0);
240 if (!inode) 240 if (!inode)
241 goto fail; 241 goto fail;
242 if (IS_ERR(inode)) { 242 if (IS_ERR(inode)) {
@@ -250,6 +250,15 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj)
250 goto fail; 250 goto fail;
251 } 251 }
252 252
253 /* Pick up the works we bypass in gfs2_inode_lookup */
254 if (inode->i_state & I_NEW)
255 gfs2_set_iop(inode);
256
257 if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) {
258 iput(inode);
259 goto fail;
260 }
261
253 error = -EIO; 262 error = -EIO;
254 if (GFS2_I(inode)->i_di.di_flags & GFS2_DIF_SYSTEM) { 263 if (GFS2_I(inode)->i_di.di_flags & GFS2_DIF_SYSTEM) {
255 iput(inode); 264 iput(inode);
diff --git a/fs/gfs2/ops_export.h b/fs/gfs2/ops_export.h
deleted file mode 100644
index f925a955b3b8..000000000000
--- a/fs/gfs2/ops_export.h
+++ /dev/null
@@ -1,22 +0,0 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __OPS_EXPORT_DOT_H__
11#define __OPS_EXPORT_DOT_H__
12
13#define GFS2_SMALL_FH_SIZE 4
14#define GFS2_LARGE_FH_SIZE 10
15
16extern struct export_operations gfs2_export_ops;
17struct gfs2_fh_obj {
18 struct gfs2_inum_host this;
19 __u32 imode;
20};
21
22#endif /* __OPS_EXPORT_DOT_H__ */
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index 064df8804582..196d83266e34 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -502,7 +502,7 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
502 struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); 502 struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
503 struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host); 503 struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host);
504 struct lm_lockname name = 504 struct lm_lockname name =
505 { .ln_number = ip->i_num.no_addr, 505 { .ln_number = ip->i_no_addr,
506 .ln_type = LM_TYPE_PLOCK }; 506 .ln_type = LM_TYPE_PLOCK };
507 507
508 if (!(fl->fl_flags & FL_POSIX)) 508 if (!(fl->fl_flags & FL_POSIX))
@@ -557,7 +557,7 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
557 gfs2_glock_dq_uninit(fl_gh); 557 gfs2_glock_dq_uninit(fl_gh);
558 } else { 558 } else {
559 error = gfs2_glock_get(GFS2_SB(&ip->i_inode), 559 error = gfs2_glock_get(GFS2_SB(&ip->i_inode),
560 ip->i_num.no_addr, &gfs2_flock_glops, 560 ip->i_no_addr, &gfs2_flock_glops,
561 CREATE, &gl); 561 CREATE, &gl);
562 if (error) 562 if (error)
563 goto out; 563 goto out;
@@ -635,7 +635,6 @@ const struct file_operations gfs2_file_fops = {
635 .release = gfs2_close, 635 .release = gfs2_close,
636 .fsync = gfs2_fsync, 636 .fsync = gfs2_fsync,
637 .lock = gfs2_lock, 637 .lock = gfs2_lock,
638 .sendfile = generic_file_sendfile,
639 .flock = gfs2_flock, 638 .flock = gfs2_flock,
640 .splice_read = generic_file_splice_read, 639 .splice_read = generic_file_splice_read,
641 .splice_write = generic_file_splice_write, 640 .splice_write = generic_file_splice_write,
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 2c5f8e7def0d..cf5aa5050548 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -27,7 +27,6 @@
27#include "inode.h" 27#include "inode.h"
28#include "lm.h" 28#include "lm.h"
29#include "mount.h" 29#include "mount.h"
30#include "ops_export.h"
31#include "ops_fstype.h" 30#include "ops_fstype.h"
32#include "ops_super.h" 31#include "ops_super.h"
33#include "recovery.h" 32#include "recovery.h"
@@ -105,6 +104,7 @@ static void init_vfs(struct super_block *sb, unsigned noatime)
105 sb->s_magic = GFS2_MAGIC; 104 sb->s_magic = GFS2_MAGIC;
106 sb->s_op = &gfs2_super_ops; 105 sb->s_op = &gfs2_super_ops;
107 sb->s_export_op = &gfs2_export_ops; 106 sb->s_export_op = &gfs2_export_ops;
107 sb->s_time_gran = 1;
108 sb->s_maxbytes = MAX_LFS_FILESIZE; 108 sb->s_maxbytes = MAX_LFS_FILESIZE;
109 109
110 if (sb->s_flags & (MS_NOATIME | MS_NODIRATIME)) 110 if (sb->s_flags & (MS_NOATIME | MS_NODIRATIME))
@@ -116,7 +116,6 @@ static void init_vfs(struct super_block *sb, unsigned noatime)
116 116
117static int init_names(struct gfs2_sbd *sdp, int silent) 117static int init_names(struct gfs2_sbd *sdp, int silent)
118{ 118{
119 struct page *page;
120 char *proto, *table; 119 char *proto, *table;
121 int error = 0; 120 int error = 0;
122 121
@@ -126,14 +125,9 @@ static int init_names(struct gfs2_sbd *sdp, int silent)
126 /* Try to autodetect */ 125 /* Try to autodetect */
127 126
128 if (!proto[0] || !table[0]) { 127 if (!proto[0] || !table[0]) {
129 struct gfs2_sb *sb; 128 error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
130 page = gfs2_read_super(sdp->sd_vfs, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift); 129 if (error)
131 if (!page) 130 return error;
132 return -ENOBUFS;
133 sb = kmap(page);
134 gfs2_sb_in(&sdp->sd_sb, sb);
135 kunmap(page);
136 __free_page(page);
137 131
138 error = gfs2_check_sb(sdp, &sdp->sd_sb, silent); 132 error = gfs2_check_sb(sdp, &sdp->sd_sb, silent);
139 if (error) 133 if (error)
@@ -151,6 +145,9 @@ static int init_names(struct gfs2_sbd *sdp, int silent)
151 snprintf(sdp->sd_proto_name, GFS2_FSNAME_LEN, "%s", proto); 145 snprintf(sdp->sd_proto_name, GFS2_FSNAME_LEN, "%s", proto);
152 snprintf(sdp->sd_table_name, GFS2_FSNAME_LEN, "%s", table); 146 snprintf(sdp->sd_table_name, GFS2_FSNAME_LEN, "%s", table);
153 147
148 while ((table = strchr(sdp->sd_table_name, '/')))
149 *table = '_';
150
154out: 151out:
155 return error; 152 return error;
156} 153}
@@ -236,17 +233,17 @@ fail:
236 return error; 233 return error;
237} 234}
238 235
239static struct inode *gfs2_lookup_root(struct super_block *sb, 236static inline struct inode *gfs2_lookup_root(struct super_block *sb,
240 struct gfs2_inum_host *inum) 237 u64 no_addr)
241{ 238{
242 return gfs2_inode_lookup(sb, inum, DT_DIR); 239 return gfs2_inode_lookup(sb, DT_DIR, no_addr, 0);
243} 240}
244 241
245static int init_sb(struct gfs2_sbd *sdp, int silent, int undo) 242static int init_sb(struct gfs2_sbd *sdp, int silent, int undo)
246{ 243{
247 struct super_block *sb = sdp->sd_vfs; 244 struct super_block *sb = sdp->sd_vfs;
248 struct gfs2_holder sb_gh; 245 struct gfs2_holder sb_gh;
249 struct gfs2_inum_host *inum; 246 u64 no_addr;
250 struct inode *inode; 247 struct inode *inode;
251 int error = 0; 248 int error = 0;
252 249
@@ -289,10 +286,10 @@ static int init_sb(struct gfs2_sbd *sdp, int silent, int undo)
289 sb_set_blocksize(sb, sdp->sd_sb.sb_bsize); 286 sb_set_blocksize(sb, sdp->sd_sb.sb_bsize);
290 287
291 /* Get the root inode */ 288 /* Get the root inode */
292 inum = &sdp->sd_sb.sb_root_dir; 289 no_addr = sdp->sd_sb.sb_root_dir.no_addr;
293 if (sb->s_type == &gfs2meta_fs_type) 290 if (sb->s_type == &gfs2meta_fs_type)
294 inum = &sdp->sd_sb.sb_master_dir; 291 no_addr = sdp->sd_sb.sb_master_dir.no_addr;
295 inode = gfs2_lookup_root(sb, inum); 292 inode = gfs2_lookup_root(sb, no_addr);
296 if (IS_ERR(inode)) { 293 if (IS_ERR(inode)) {
297 error = PTR_ERR(inode); 294 error = PTR_ERR(inode);
298 fs_err(sdp, "can't read in root inode: %d\n", error); 295 fs_err(sdp, "can't read in root inode: %d\n", error);
@@ -449,7 +446,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo)
449 if (undo) 446 if (undo)
450 goto fail_qinode; 447 goto fail_qinode;
451 448
452 inode = gfs2_lookup_root(sdp->sd_vfs, &sdp->sd_sb.sb_master_dir); 449 inode = gfs2_lookup_root(sdp->sd_vfs, sdp->sd_sb.sb_master_dir.no_addr);
453 if (IS_ERR(inode)) { 450 if (IS_ERR(inode)) {
454 error = PTR_ERR(inode); 451 error = PTR_ERR(inode);
455 fs_err(sdp, "can't read in master directory: %d\n", error); 452 fs_err(sdp, "can't read in master directory: %d\n", error);
diff --git a/fs/gfs2/ops_fstype.h b/fs/gfs2/ops_fstype.h
index 7cc2c296271b..407029b3b2b3 100644
--- a/fs/gfs2/ops_fstype.h
+++ b/fs/gfs2/ops_fstype.h
@@ -14,5 +14,6 @@
14 14
15extern struct file_system_type gfs2_fs_type; 15extern struct file_system_type gfs2_fs_type;
16extern struct file_system_type gfs2meta_fs_type; 16extern struct file_system_type gfs2meta_fs_type;
17extern struct export_operations gfs2_export_ops;
17 18
18#endif /* __OPS_FSTYPE_DOT_H__ */ 19#endif /* __OPS_FSTYPE_DOT_H__ */
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index d85f6e05cb95..911c115b5c6c 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -157,7 +157,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
157 if (error) 157 if (error)
158 goto out_gunlock; 158 goto out_gunlock;
159 159
160 error = gfs2_dir_search(dir, &dentry->d_name, NULL, NULL); 160 error = gfs2_dir_check(dir, &dentry->d_name, NULL);
161 switch (error) { 161 switch (error) {
162 case -ENOENT: 162 case -ENOENT:
163 break; 163 break;
@@ -206,7 +206,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
206 goto out_gunlock_q; 206 goto out_gunlock_q;
207 207
208 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + 208 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
209 al->al_rgd->rd_ri.ri_length + 209 al->al_rgd->rd_length +
210 2 * RES_DINODE + RES_STATFS + 210 2 * RES_DINODE + RES_STATFS +
211 RES_QUOTA, 0); 211 RES_QUOTA, 0);
212 if (error) 212 if (error)
@@ -217,8 +217,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
217 goto out_ipres; 217 goto out_ipres;
218 } 218 }
219 219
220 error = gfs2_dir_add(dir, &dentry->d_name, &ip->i_num, 220 error = gfs2_dir_add(dir, &dentry->d_name, ip, IF2DT(inode->i_mode));
221 IF2DT(inode->i_mode));
222 if (error) 221 if (error)
223 goto out_end_trans; 222 goto out_end_trans;
224 223
@@ -275,7 +274,7 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
275 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); 274 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
276 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); 275 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
277 276
278 rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr); 277 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
279 gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2); 278 gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
280 279
281 280
@@ -420,7 +419,7 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
420 dent = (struct gfs2_dirent *)((char*)dent + GFS2_DIRENT_SIZE(1)); 419 dent = (struct gfs2_dirent *)((char*)dent + GFS2_DIRENT_SIZE(1));
421 gfs2_qstr2dirent(&str, dibh->b_size - GFS2_DIRENT_SIZE(1) - sizeof(struct gfs2_dinode), dent); 420 gfs2_qstr2dirent(&str, dibh->b_size - GFS2_DIRENT_SIZE(1) - sizeof(struct gfs2_dinode), dent);
422 421
423 gfs2_inum_out(&dip->i_num, &dent->de_inum); 422 gfs2_inum_out(dip, dent);
424 dent->de_type = cpu_to_be16(DT_DIR); 423 dent->de_type = cpu_to_be16(DT_DIR);
425 424
426 gfs2_dinode_out(ip, di); 425 gfs2_dinode_out(ip, di);
@@ -472,7 +471,7 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
472 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); 471 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
473 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); 472 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
474 473
475 rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr); 474 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
476 gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2); 475 gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
477 476
478 error = gfs2_glock_nq_m(3, ghs); 477 error = gfs2_glock_nq_m(3, ghs);
@@ -614,7 +613,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
614 * this is the case of the target file already existing 613 * this is the case of the target file already existing
615 * so we unlink before doing the rename 614 * so we unlink before doing the rename
616 */ 615 */
617 nrgd = gfs2_blk2rgrpd(sdp, nip->i_num.no_addr); 616 nrgd = gfs2_blk2rgrpd(sdp, nip->i_no_addr);
618 if (nrgd) 617 if (nrgd)
619 gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++); 618 gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++);
620 } 619 }
@@ -653,7 +652,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
653 if (error) 652 if (error)
654 goto out_gunlock; 653 goto out_gunlock;
655 654
656 error = gfs2_dir_search(ndir, &ndentry->d_name, NULL, NULL); 655 error = gfs2_dir_check(ndir, &ndentry->d_name, NULL);
657 switch (error) { 656 switch (error) {
658 case -ENOENT: 657 case -ENOENT:
659 error = 0; 658 error = 0;
@@ -712,7 +711,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
712 goto out_gunlock_q; 711 goto out_gunlock_q;
713 712
714 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + 713 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
715 al->al_rgd->rd_ri.ri_length + 714 al->al_rgd->rd_length +
716 4 * RES_DINODE + 4 * RES_LEAF + 715 4 * RES_DINODE + 4 * RES_LEAF +
717 RES_STATFS + RES_QUOTA + 4, 0); 716 RES_STATFS + RES_QUOTA + 4, 0);
718 if (error) 717 if (error)
@@ -750,7 +749,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
750 if (error) 749 if (error)
751 goto out_end_trans; 750 goto out_end_trans;
752 751
753 error = gfs2_dir_mvino(ip, &name, &ndip->i_num, DT_DIR); 752 error = gfs2_dir_mvino(ip, &name, ndip, DT_DIR);
754 if (error) 753 if (error)
755 goto out_end_trans; 754 goto out_end_trans;
756 } else { 755 } else {
@@ -758,7 +757,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
758 error = gfs2_meta_inode_buffer(ip, &dibh); 757 error = gfs2_meta_inode_buffer(ip, &dibh);
759 if (error) 758 if (error)
760 goto out_end_trans; 759 goto out_end_trans;
761 ip->i_inode.i_ctime = CURRENT_TIME_SEC; 760 ip->i_inode.i_ctime = CURRENT_TIME;
762 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 761 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
763 gfs2_dinode_out(ip, dibh->b_data); 762 gfs2_dinode_out(ip, dibh->b_data);
764 brelse(dibh); 763 brelse(dibh);
@@ -768,8 +767,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
768 if (error) 767 if (error)
769 goto out_end_trans; 768 goto out_end_trans;
770 769
771 error = gfs2_dir_add(ndir, &ndentry->d_name, &ip->i_num, 770 error = gfs2_dir_add(ndir, &ndentry->d_name, ip, IF2DT(ip->i_inode.i_mode));
772 IF2DT(ip->i_inode.i_mode));
773 if (error) 771 if (error)
774 goto out_end_trans; 772 goto out_end_trans;
775 773
@@ -905,8 +903,8 @@ static int setattr_size(struct inode *inode, struct iattr *attr)
905 } 903 }
906 904
907 error = gfs2_truncatei(ip, attr->ia_size); 905 error = gfs2_truncatei(ip, attr->ia_size);
908 if (error) 906 if (error && (inode->i_size != ip->i_di.di_size))
909 return error; 907 i_size_write(inode, ip->i_di.di_size);
910 908
911 return error; 909 return error;
912} 910}
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
index 485ce3d49923..603d940f1159 100644
--- a/fs/gfs2/ops_super.c
+++ b/fs/gfs2/ops_super.c
@@ -326,8 +326,10 @@ static void gfs2_clear_inode(struct inode *inode)
326 gfs2_glock_schedule_for_reclaim(ip->i_gl); 326 gfs2_glock_schedule_for_reclaim(ip->i_gl);
327 gfs2_glock_put(ip->i_gl); 327 gfs2_glock_put(ip->i_gl);
328 ip->i_gl = NULL; 328 ip->i_gl = NULL;
329 if (ip->i_iopen_gh.gh_gl) 329 if (ip->i_iopen_gh.gh_gl) {
330 ip->i_iopen_gh.gh_gl->gl_object = NULL;
330 gfs2_glock_dq_uninit(&ip->i_iopen_gh); 331 gfs2_glock_dq_uninit(&ip->i_iopen_gh);
332 }
331 } 333 }
332} 334}
333 335
@@ -422,13 +424,13 @@ static void gfs2_delete_inode(struct inode *inode)
422 if (!inode->i_private) 424 if (!inode->i_private)
423 goto out; 425 goto out;
424 426
425 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB, &gh); 427 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
426 if (unlikely(error)) { 428 if (unlikely(error)) {
427 gfs2_glock_dq_uninit(&ip->i_iopen_gh); 429 gfs2_glock_dq_uninit(&ip->i_iopen_gh);
428 goto out; 430 goto out;
429 } 431 }
430 432
431 gfs2_glock_dq(&ip->i_iopen_gh); 433 gfs2_glock_dq_wait(&ip->i_iopen_gh);
432 gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh); 434 gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh);
433 error = gfs2_glock_nq(&ip->i_iopen_gh); 435 error = gfs2_glock_nq(&ip->i_iopen_gh);
434 if (error) 436 if (error)
diff --git a/fs/gfs2/ops_vm.c b/fs/gfs2/ops_vm.c
index aa0dbd2aac1b..404b7cc9f8c4 100644
--- a/fs/gfs2/ops_vm.c
+++ b/fs/gfs2/ops_vm.c
@@ -66,7 +66,7 @@ static int alloc_page_backing(struct gfs2_inode *ip, struct page *page)
66 if (error) 66 if (error)
67 goto out_gunlock_q; 67 goto out_gunlock_q;
68 68
69 error = gfs2_trans_begin(sdp, al->al_rgd->rd_ri.ri_length + 69 error = gfs2_trans_begin(sdp, al->al_rgd->rd_length +
70 ind_blocks + RES_DINODE + 70 ind_blocks + RES_DINODE +
71 RES_STATFS + RES_QUOTA, 0); 71 RES_STATFS + RES_QUOTA, 0);
72 if (error) 72 if (error)
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index c186857e48a8..6e546ee8f3d4 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -66,6 +66,18 @@
66#define QUOTA_USER 1 66#define QUOTA_USER 1
67#define QUOTA_GROUP 0 67#define QUOTA_GROUP 0
68 68
69struct gfs2_quota_host {
70 u64 qu_limit;
71 u64 qu_warn;
72 s64 qu_value;
73};
74
75struct gfs2_quota_change_host {
76 u64 qc_change;
77 u32 qc_flags; /* GFS2_QCF_... */
78 u32 qc_id;
79};
80
69static u64 qd2offset(struct gfs2_quota_data *qd) 81static u64 qd2offset(struct gfs2_quota_data *qd)
70{ 82{
71 u64 offset; 83 u64 offset;
@@ -561,6 +573,25 @@ static void do_qc(struct gfs2_quota_data *qd, s64 change)
561 mutex_unlock(&sdp->sd_quota_mutex); 573 mutex_unlock(&sdp->sd_quota_mutex);
562} 574}
563 575
576static void gfs2_quota_in(struct gfs2_quota_host *qu, const void *buf)
577{
578 const struct gfs2_quota *str = buf;
579
580 qu->qu_limit = be64_to_cpu(str->qu_limit);
581 qu->qu_warn = be64_to_cpu(str->qu_warn);
582 qu->qu_value = be64_to_cpu(str->qu_value);
583}
584
585static void gfs2_quota_out(const struct gfs2_quota_host *qu, void *buf)
586{
587 struct gfs2_quota *str = buf;
588
589 str->qu_limit = cpu_to_be64(qu->qu_limit);
590 str->qu_warn = cpu_to_be64(qu->qu_warn);
591 str->qu_value = cpu_to_be64(qu->qu_value);
592 memset(&str->qu_reserved, 0, sizeof(str->qu_reserved));
593}
594
564/** 595/**
565 * gfs2_adjust_quota 596 * gfs2_adjust_quota
566 * 597 *
@@ -573,12 +604,13 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
573 struct inode *inode = &ip->i_inode; 604 struct inode *inode = &ip->i_inode;
574 struct address_space *mapping = inode->i_mapping; 605 struct address_space *mapping = inode->i_mapping;
575 unsigned long index = loc >> PAGE_CACHE_SHIFT; 606 unsigned long index = loc >> PAGE_CACHE_SHIFT;
576 unsigned offset = loc & (PAGE_CACHE_SHIFT - 1); 607 unsigned offset = loc & (PAGE_CACHE_SIZE - 1);
577 unsigned blocksize, iblock, pos; 608 unsigned blocksize, iblock, pos;
578 struct buffer_head *bh; 609 struct buffer_head *bh;
579 struct page *page; 610 struct page *page;
580 void *kaddr; 611 void *kaddr;
581 __be64 *ptr; 612 char *ptr;
613 struct gfs2_quota_host qp;
582 s64 value; 614 s64 value;
583 int err = -EIO; 615 int err = -EIO;
584 616
@@ -620,13 +652,17 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
620 652
621 kaddr = kmap_atomic(page, KM_USER0); 653 kaddr = kmap_atomic(page, KM_USER0);
622 ptr = kaddr + offset; 654 ptr = kaddr + offset;
623 value = (s64)be64_to_cpu(*ptr) + change; 655 gfs2_quota_in(&qp, ptr);
624 *ptr = cpu_to_be64(value); 656 qp.qu_value += change;
657 value = qp.qu_value;
658 gfs2_quota_out(&qp, ptr);
625 flush_dcache_page(page); 659 flush_dcache_page(page);
626 kunmap_atomic(kaddr, KM_USER0); 660 kunmap_atomic(kaddr, KM_USER0);
627 err = 0; 661 err = 0;
628 qd->qd_qb.qb_magic = cpu_to_be32(GFS2_MAGIC); 662 qd->qd_qb.qb_magic = cpu_to_be32(GFS2_MAGIC);
629 qd->qd_qb.qb_value = cpu_to_be64(value); 663 qd->qd_qb.qb_value = cpu_to_be64(value);
664 ((struct gfs2_quota_lvb*)(qd->qd_gl->gl_lvb))->qb_magic = cpu_to_be32(GFS2_MAGIC);
665 ((struct gfs2_quota_lvb*)(qd->qd_gl->gl_lvb))->qb_value = cpu_to_be64(value);
630unlock: 666unlock:
631 unlock_page(page); 667 unlock_page(page);
632 page_cache_release(page); 668 page_cache_release(page);
@@ -689,7 +725,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
689 goto out_alloc; 725 goto out_alloc;
690 726
691 error = gfs2_trans_begin(sdp, 727 error = gfs2_trans_begin(sdp,
692 al->al_rgd->rd_ri.ri_length + 728 al->al_rgd->rd_length +
693 num_qd * data_blocks + 729 num_qd * data_blocks +
694 nalloc * ind_blocks + 730 nalloc * ind_blocks +
695 RES_DINODE + num_qd + 731 RES_DINODE + num_qd +
@@ -709,7 +745,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
709 offset = qd2offset(qd); 745 offset = qd2offset(qd);
710 error = gfs2_adjust_quota(ip, offset, qd->qd_change_sync, 746 error = gfs2_adjust_quota(ip, offset, qd->qd_change_sync,
711 (struct gfs2_quota_data *) 747 (struct gfs2_quota_data *)
712 qd->qd_gl->gl_lvb); 748 qd);
713 if (error) 749 if (error)
714 goto out_end_trans; 750 goto out_end_trans;
715 751
@@ -1050,6 +1086,15 @@ int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id)
1050 return error; 1086 return error;
1051} 1087}
1052 1088
1089static void gfs2_quota_change_in(struct gfs2_quota_change_host *qc, const void *buf)
1090{
1091 const struct gfs2_quota_change *str = buf;
1092
1093 qc->qc_change = be64_to_cpu(str->qc_change);
1094 qc->qc_flags = be32_to_cpu(str->qc_flags);
1095 qc->qc_id = be32_to_cpu(str->qc_id);
1096}
1097
1053int gfs2_quota_init(struct gfs2_sbd *sdp) 1098int gfs2_quota_init(struct gfs2_sbd *sdp)
1054{ 1099{
1055 struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode); 1100 struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode);
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 8bc182c7e2ef..5ada38c99a2c 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -116,6 +116,22 @@ void gfs2_revoke_clean(struct gfs2_sbd *sdp)
116 } 116 }
117} 117}
118 118
119static int gfs2_log_header_in(struct gfs2_log_header_host *lh, const void *buf)
120{
121 const struct gfs2_log_header *str = buf;
122
123 if (str->lh_header.mh_magic != cpu_to_be32(GFS2_MAGIC) ||
124 str->lh_header.mh_type != cpu_to_be32(GFS2_METATYPE_LH))
125 return 1;
126
127 lh->lh_sequence = be64_to_cpu(str->lh_sequence);
128 lh->lh_flags = be32_to_cpu(str->lh_flags);
129 lh->lh_tail = be32_to_cpu(str->lh_tail);
130 lh->lh_blkno = be32_to_cpu(str->lh_blkno);
131 lh->lh_hash = be32_to_cpu(str->lh_hash);
132 return 0;
133}
134
119/** 135/**
120 * get_log_header - read the log header for a given segment 136 * get_log_header - read the log header for a given segment
121 * @jd: the journal 137 * @jd: the journal
@@ -147,12 +163,10 @@ static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
147 sizeof(u32)); 163 sizeof(u32));
148 hash = crc32_le(hash, (unsigned char const *)&nothing, sizeof(nothing)); 164 hash = crc32_le(hash, (unsigned char const *)&nothing, sizeof(nothing));
149 hash ^= (u32)~0; 165 hash ^= (u32)~0;
150 gfs2_log_header_in(&lh, bh->b_data); 166 error = gfs2_log_header_in(&lh, bh->b_data);
151 brelse(bh); 167 brelse(bh);
152 168
153 if (lh.lh_header.mh_magic != GFS2_MAGIC || 169 if (error || lh.lh_blkno != blk || lh.lh_hash != hash)
154 lh.lh_header.mh_type != GFS2_METATYPE_LH ||
155 lh.lh_blkno != blk || lh.lh_hash != hash)
156 return 1; 170 return 1;
157 171
158 *head = lh; 172 *head = lh;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 1727f5012efe..e4e040625153 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. 3 * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
4 * 4 *
5 * This copyrighted material is made available to anyone wishing to use, 5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions 6 * modify, copy, or redistribute it subject to the terms and conditions
@@ -28,6 +28,7 @@
28#include "ops_file.h" 28#include "ops_file.h"
29#include "util.h" 29#include "util.h"
30#include "log.h" 30#include "log.h"
31#include "inode.h"
31 32
32#define BFITNOENT ((u32)~0) 33#define BFITNOENT ((u32)~0)
33 34
@@ -50,6 +51,9 @@ static const char valid_change[16] = {
50 1, 0, 0, 0 51 1, 0, 0, 0
51}; 52};
52 53
54static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
55 unsigned char old_state, unsigned char new_state);
56
53/** 57/**
54 * gfs2_setbit - Set a bit in the bitmaps 58 * gfs2_setbit - Set a bit in the bitmaps
55 * @buffer: the buffer that holds the bitmaps 59 * @buffer: the buffer that holds the bitmaps
@@ -204,7 +208,7 @@ void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
204{ 208{
205 struct gfs2_sbd *sdp = rgd->rd_sbd; 209 struct gfs2_sbd *sdp = rgd->rd_sbd;
206 struct gfs2_bitmap *bi = NULL; 210 struct gfs2_bitmap *bi = NULL;
207 u32 length = rgd->rd_ri.ri_length; 211 u32 length = rgd->rd_length;
208 u32 count[4], tmp; 212 u32 count[4], tmp;
209 int buf, x; 213 int buf, x;
210 214
@@ -227,7 +231,7 @@ void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
227 return; 231 return;
228 } 232 }
229 233
230 tmp = rgd->rd_ri.ri_data - 234 tmp = rgd->rd_data -
231 rgd->rd_rg.rg_free - 235 rgd->rd_rg.rg_free -
232 rgd->rd_rg.rg_dinodes; 236 rgd->rd_rg.rg_dinodes;
233 if (count[1] + count[2] != tmp) { 237 if (count[1] + count[2] != tmp) {
@@ -253,10 +257,10 @@ void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
253 257
254} 258}
255 259
256static inline int rgrp_contains_block(struct gfs2_rindex_host *ri, u64 block) 260static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block)
257{ 261{
258 u64 first = ri->ri_data0; 262 u64 first = rgd->rd_data0;
259 u64 last = first + ri->ri_data; 263 u64 last = first + rgd->rd_data;
260 return first <= block && block < last; 264 return first <= block && block < last;
261} 265}
262 266
@@ -275,7 +279,7 @@ struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk)
275 spin_lock(&sdp->sd_rindex_spin); 279 spin_lock(&sdp->sd_rindex_spin);
276 280
277 list_for_each_entry(rgd, &sdp->sd_rindex_mru_list, rd_list_mru) { 281 list_for_each_entry(rgd, &sdp->sd_rindex_mru_list, rd_list_mru) {
278 if (rgrp_contains_block(&rgd->rd_ri, blk)) { 282 if (rgrp_contains_block(rgd, blk)) {
279 list_move(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list); 283 list_move(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
280 spin_unlock(&sdp->sd_rindex_spin); 284 spin_unlock(&sdp->sd_rindex_spin);
281 return rgd; 285 return rgd;
@@ -354,6 +358,15 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
354 mutex_unlock(&sdp->sd_rindex_mutex); 358 mutex_unlock(&sdp->sd_rindex_mutex);
355} 359}
356 360
361static void gfs2_rindex_print(const struct gfs2_rgrpd *rgd)
362{
363 printk(KERN_INFO " ri_addr = %llu\n", (unsigned long long)rgd->rd_addr);
364 printk(KERN_INFO " ri_length = %u\n", rgd->rd_length);
365 printk(KERN_INFO " ri_data0 = %llu\n", (unsigned long long)rgd->rd_data0);
366 printk(KERN_INFO " ri_data = %u\n", rgd->rd_data);
367 printk(KERN_INFO " ri_bitbytes = %u\n", rgd->rd_bitbytes);
368}
369
357/** 370/**
358 * gfs2_compute_bitstructs - Compute the bitmap sizes 371 * gfs2_compute_bitstructs - Compute the bitmap sizes
359 * @rgd: The resource group descriptor 372 * @rgd: The resource group descriptor
@@ -367,7 +380,7 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd)
367{ 380{
368 struct gfs2_sbd *sdp = rgd->rd_sbd; 381 struct gfs2_sbd *sdp = rgd->rd_sbd;
369 struct gfs2_bitmap *bi; 382 struct gfs2_bitmap *bi;
370 u32 length = rgd->rd_ri.ri_length; /* # blocks in hdr & bitmap */ 383 u32 length = rgd->rd_length; /* # blocks in hdr & bitmap */
371 u32 bytes_left, bytes; 384 u32 bytes_left, bytes;
372 int x; 385 int x;
373 386
@@ -378,7 +391,7 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd)
378 if (!rgd->rd_bits) 391 if (!rgd->rd_bits)
379 return -ENOMEM; 392 return -ENOMEM;
380 393
381 bytes_left = rgd->rd_ri.ri_bitbytes; 394 bytes_left = rgd->rd_bitbytes;
382 395
383 for (x = 0; x < length; x++) { 396 for (x = 0; x < length; x++) {
384 bi = rgd->rd_bits + x; 397 bi = rgd->rd_bits + x;
@@ -399,14 +412,14 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd)
399 } else if (x + 1 == length) { 412 } else if (x + 1 == length) {
400 bytes = bytes_left; 413 bytes = bytes_left;
401 bi->bi_offset = sizeof(struct gfs2_meta_header); 414 bi->bi_offset = sizeof(struct gfs2_meta_header);
402 bi->bi_start = rgd->rd_ri.ri_bitbytes - bytes_left; 415 bi->bi_start = rgd->rd_bitbytes - bytes_left;
403 bi->bi_len = bytes; 416 bi->bi_len = bytes;
404 /* other blocks */ 417 /* other blocks */
405 } else { 418 } else {
406 bytes = sdp->sd_sb.sb_bsize - 419 bytes = sdp->sd_sb.sb_bsize -
407 sizeof(struct gfs2_meta_header); 420 sizeof(struct gfs2_meta_header);
408 bi->bi_offset = sizeof(struct gfs2_meta_header); 421 bi->bi_offset = sizeof(struct gfs2_meta_header);
409 bi->bi_start = rgd->rd_ri.ri_bitbytes - bytes_left; 422 bi->bi_start = rgd->rd_bitbytes - bytes_left;
410 bi->bi_len = bytes; 423 bi->bi_len = bytes;
411 } 424 }
412 425
@@ -418,9 +431,9 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd)
418 return -EIO; 431 return -EIO;
419 } 432 }
420 bi = rgd->rd_bits + (length - 1); 433 bi = rgd->rd_bits + (length - 1);
421 if ((bi->bi_start + bi->bi_len) * GFS2_NBBY != rgd->rd_ri.ri_data) { 434 if ((bi->bi_start + bi->bi_len) * GFS2_NBBY != rgd->rd_data) {
422 if (gfs2_consist_rgrpd(rgd)) { 435 if (gfs2_consist_rgrpd(rgd)) {
423 gfs2_rindex_print(&rgd->rd_ri); 436 gfs2_rindex_print(rgd);
424 fs_err(sdp, "start=%u len=%u offset=%u\n", 437 fs_err(sdp, "start=%u len=%u offset=%u\n",
425 bi->bi_start, bi->bi_len, bi->bi_offset); 438 bi->bi_start, bi->bi_len, bi->bi_offset);
426 } 439 }
@@ -431,9 +444,104 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd)
431} 444}
432 445
433/** 446/**
434 * gfs2_ri_update - Pull in a new resource index from the disk 447 * gfs2_ri_total - Total up the file system space, according to the rindex.
448 *
449 */
450u64 gfs2_ri_total(struct gfs2_sbd *sdp)
451{
452 u64 total_data = 0;
453 struct inode *inode = sdp->sd_rindex;
454 struct gfs2_inode *ip = GFS2_I(inode);
455 char buf[sizeof(struct gfs2_rindex)];
456 struct file_ra_state ra_state;
457 int error, rgrps;
458
459 mutex_lock(&sdp->sd_rindex_mutex);
460 file_ra_state_init(&ra_state, inode->i_mapping);
461 for (rgrps = 0;; rgrps++) {
462 loff_t pos = rgrps * sizeof(struct gfs2_rindex);
463
464 if (pos + sizeof(struct gfs2_rindex) >= ip->i_di.di_size)
465 break;
466 error = gfs2_internal_read(ip, &ra_state, buf, &pos,
467 sizeof(struct gfs2_rindex));
468 if (error != sizeof(struct gfs2_rindex))
469 break;
470 total_data += be32_to_cpu(((struct gfs2_rindex *)buf)->ri_data);
471 }
472 mutex_unlock(&sdp->sd_rindex_mutex);
473 return total_data;
474}
475
476static void gfs2_rindex_in(struct gfs2_rgrpd *rgd, const void *buf)
477{
478 const struct gfs2_rindex *str = buf;
479
480 rgd->rd_addr = be64_to_cpu(str->ri_addr);
481 rgd->rd_length = be32_to_cpu(str->ri_length);
482 rgd->rd_data0 = be64_to_cpu(str->ri_data0);
483 rgd->rd_data = be32_to_cpu(str->ri_data);
484 rgd->rd_bitbytes = be32_to_cpu(str->ri_bitbytes);
485}
486
487/**
488 * read_rindex_entry - Pull in a new resource index entry from the disk
435 * @gl: The glock covering the rindex inode 489 * @gl: The glock covering the rindex inode
436 * 490 *
491 * Returns: 0 on success, error code otherwise
492 */
493
494static int read_rindex_entry(struct gfs2_inode *ip,
495 struct file_ra_state *ra_state)
496{
497 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
498 loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex);
499 char buf[sizeof(struct gfs2_rindex)];
500 int error;
501 struct gfs2_rgrpd *rgd;
502
503 error = gfs2_internal_read(ip, ra_state, buf, &pos,
504 sizeof(struct gfs2_rindex));
505 if (!error)
506 return 0;
507 if (error != sizeof(struct gfs2_rindex)) {
508 if (error > 0)
509 error = -EIO;
510 return error;
511 }
512
513 rgd = kzalloc(sizeof(struct gfs2_rgrpd), GFP_NOFS);
514 error = -ENOMEM;
515 if (!rgd)
516 return error;
517
518 mutex_init(&rgd->rd_mutex);
519 lops_init_le(&rgd->rd_le, &gfs2_rg_lops);
520 rgd->rd_sbd = sdp;
521
522 list_add_tail(&rgd->rd_list, &sdp->sd_rindex_list);
523 list_add_tail(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
524
525 gfs2_rindex_in(rgd, buf);
526 error = compute_bitstructs(rgd);
527 if (error)
528 return error;
529
530 error = gfs2_glock_get(sdp, rgd->rd_addr,
531 &gfs2_rgrp_glops, CREATE, &rgd->rd_gl);
532 if (error)
533 return error;
534
535 rgd->rd_gl->gl_object = rgd;
536 rgd->rd_rg_vn = rgd->rd_gl->gl_vn - 1;
537 rgd->rd_flags |= GFS2_RDF_CHECK;
538 return error;
539}
540
541/**
542 * gfs2_ri_update - Pull in a new resource index from the disk
543 * @ip: pointer to the rindex inode
544 *
437 * Returns: 0 on successful update, error code otherwise 545 * Returns: 0 on successful update, error code otherwise
438 */ 546 */
439 547
@@ -441,13 +549,11 @@ static int gfs2_ri_update(struct gfs2_inode *ip)
441{ 549{
442 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 550 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
443 struct inode *inode = &ip->i_inode; 551 struct inode *inode = &ip->i_inode;
444 struct gfs2_rgrpd *rgd;
445 char buf[sizeof(struct gfs2_rindex)];
446 struct file_ra_state ra_state; 552 struct file_ra_state ra_state;
447 u64 junk = ip->i_di.di_size; 553 u64 rgrp_count = ip->i_di.di_size;
448 int error; 554 int error;
449 555
450 if (do_div(junk, sizeof(struct gfs2_rindex))) { 556 if (do_div(rgrp_count, sizeof(struct gfs2_rindex))) {
451 gfs2_consist_inode(ip); 557 gfs2_consist_inode(ip);
452 return -EIO; 558 return -EIO;
453 } 559 }
@@ -455,50 +561,50 @@ static int gfs2_ri_update(struct gfs2_inode *ip)
455 clear_rgrpdi(sdp); 561 clear_rgrpdi(sdp);
456 562
457 file_ra_state_init(&ra_state, inode->i_mapping); 563 file_ra_state_init(&ra_state, inode->i_mapping);
458 for (sdp->sd_rgrps = 0;; sdp->sd_rgrps++) { 564 for (sdp->sd_rgrps = 0; sdp->sd_rgrps < rgrp_count; sdp->sd_rgrps++) {
459 loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex); 565 error = read_rindex_entry(ip, &ra_state);
460 error = gfs2_internal_read(ip, &ra_state, buf, &pos, 566 if (error) {
461 sizeof(struct gfs2_rindex)); 567 clear_rgrpdi(sdp);
462 if (!error) 568 return error;
463 break;
464 if (error != sizeof(struct gfs2_rindex)) {
465 if (error > 0)
466 error = -EIO;
467 goto fail;
468 } 569 }
570 }
469 571
470 rgd = kzalloc(sizeof(struct gfs2_rgrpd), GFP_NOFS); 572 sdp->sd_rindex_vn = ip->i_gl->gl_vn;
471 error = -ENOMEM; 573 return 0;
472 if (!rgd) 574}
473 goto fail;
474
475 mutex_init(&rgd->rd_mutex);
476 lops_init_le(&rgd->rd_le, &gfs2_rg_lops);
477 rgd->rd_sbd = sdp;
478
479 list_add_tail(&rgd->rd_list, &sdp->sd_rindex_list);
480 list_add_tail(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
481
482 gfs2_rindex_in(&rgd->rd_ri, buf);
483 error = compute_bitstructs(rgd);
484 if (error)
485 goto fail;
486 575
487 error = gfs2_glock_get(sdp, rgd->rd_ri.ri_addr, 576/**
488 &gfs2_rgrp_glops, CREATE, &rgd->rd_gl); 577 * gfs2_ri_update_special - Pull in a new resource index from the disk
489 if (error) 578 *
490 goto fail; 579 * This is a special version that's safe to call from gfs2_inplace_reserve_i.
580 * In this case we know that we don't have any resource groups in memory yet.
581 *
582 * @ip: pointer to the rindex inode
583 *
584 * Returns: 0 on successful update, error code otherwise
585 */
586static int gfs2_ri_update_special(struct gfs2_inode *ip)
587{
588 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
589 struct inode *inode = &ip->i_inode;
590 struct file_ra_state ra_state;
591 int error;
491 592
492 rgd->rd_gl->gl_object = rgd; 593 file_ra_state_init(&ra_state, inode->i_mapping);
493 rgd->rd_rg_vn = rgd->rd_gl->gl_vn - 1; 594 for (sdp->sd_rgrps = 0;; sdp->sd_rgrps++) {
595 /* Ignore partials */
596 if ((sdp->sd_rgrps + 1) * sizeof(struct gfs2_rindex) >
597 ip->i_di.di_size)
598 break;
599 error = read_rindex_entry(ip, &ra_state);
600 if (error) {
601 clear_rgrpdi(sdp);
602 return error;
603 }
494 } 604 }
495 605
496 sdp->sd_rindex_vn = ip->i_gl->gl_vn; 606 sdp->sd_rindex_vn = ip->i_gl->gl_vn;
497 return 0; 607 return 0;
498
499fail:
500 clear_rgrpdi(sdp);
501 return error;
502} 608}
503 609
504/** 610/**
@@ -543,6 +649,28 @@ int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh)
543 return error; 649 return error;
544} 650}
545 651
652static void gfs2_rgrp_in(struct gfs2_rgrp_host *rg, const void *buf)
653{
654 const struct gfs2_rgrp *str = buf;
655
656 rg->rg_flags = be32_to_cpu(str->rg_flags);
657 rg->rg_free = be32_to_cpu(str->rg_free);
658 rg->rg_dinodes = be32_to_cpu(str->rg_dinodes);
659 rg->rg_igeneration = be64_to_cpu(str->rg_igeneration);
660}
661
662static void gfs2_rgrp_out(const struct gfs2_rgrp_host *rg, void *buf)
663{
664 struct gfs2_rgrp *str = buf;
665
666 str->rg_flags = cpu_to_be32(rg->rg_flags);
667 str->rg_free = cpu_to_be32(rg->rg_free);
668 str->rg_dinodes = cpu_to_be32(rg->rg_dinodes);
669 str->__pad = cpu_to_be32(0);
670 str->rg_igeneration = cpu_to_be64(rg->rg_igeneration);
671 memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
672}
673
546/** 674/**
547 * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps 675 * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps
548 * @rgd: the struct gfs2_rgrpd describing the RG to read in 676 * @rgd: the struct gfs2_rgrpd describing the RG to read in
@@ -557,7 +685,7 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
557{ 685{
558 struct gfs2_sbd *sdp = rgd->rd_sbd; 686 struct gfs2_sbd *sdp = rgd->rd_sbd;
559 struct gfs2_glock *gl = rgd->rd_gl; 687 struct gfs2_glock *gl = rgd->rd_gl;
560 unsigned int length = rgd->rd_ri.ri_length; 688 unsigned int length = rgd->rd_length;
561 struct gfs2_bitmap *bi; 689 struct gfs2_bitmap *bi;
562 unsigned int x, y; 690 unsigned int x, y;
563 int error; 691 int error;
@@ -575,7 +703,7 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
575 703
576 for (x = 0; x < length; x++) { 704 for (x = 0; x < length; x++) {
577 bi = rgd->rd_bits + x; 705 bi = rgd->rd_bits + x;
578 error = gfs2_meta_read(gl, rgd->rd_ri.ri_addr + x, 0, &bi->bi_bh); 706 error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, &bi->bi_bh);
579 if (error) 707 if (error)
580 goto fail; 708 goto fail;
581 } 709 }
@@ -637,7 +765,7 @@ void gfs2_rgrp_bh_hold(struct gfs2_rgrpd *rgd)
637void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd) 765void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd)
638{ 766{
639 struct gfs2_sbd *sdp = rgd->rd_sbd; 767 struct gfs2_sbd *sdp = rgd->rd_sbd;
640 int x, length = rgd->rd_ri.ri_length; 768 int x, length = rgd->rd_length;
641 769
642 spin_lock(&sdp->sd_rindex_spin); 770 spin_lock(&sdp->sd_rindex_spin);
643 gfs2_assert_warn(rgd->rd_sbd, rgd->rd_bh_count); 771 gfs2_assert_warn(rgd->rd_sbd, rgd->rd_bh_count);
@@ -660,7 +788,7 @@ void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd)
660void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd) 788void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd)
661{ 789{
662 struct gfs2_sbd *sdp = rgd->rd_sbd; 790 struct gfs2_sbd *sdp = rgd->rd_sbd;
663 unsigned int length = rgd->rd_ri.ri_length; 791 unsigned int length = rgd->rd_length;
664 unsigned int x; 792 unsigned int x;
665 793
666 for (x = 0; x < length; x++) { 794 for (x = 0; x < length; x++) {
@@ -722,6 +850,38 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al)
722} 850}
723 851
724/** 852/**
853 * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes
854 * @rgd: The rgrp
855 *
856 * Returns: The inode, if one has been found
857 */
858
859static struct inode *try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked)
860{
861 struct inode *inode;
862 u32 goal = 0;
863 u64 no_addr;
864
865 for(;;) {
866 goal = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED,
867 GFS2_BLKST_UNLINKED);
868 if (goal == 0)
869 return 0;
870 no_addr = goal + rgd->rd_data0;
871 if (no_addr <= *last_unlinked)
872 continue;
873 *last_unlinked = no_addr;
874 inode = gfs2_inode_lookup(rgd->rd_sbd->sd_vfs, DT_UNKNOWN,
875 no_addr, -1);
876 if (!IS_ERR(inode))
877 return inode;
878 }
879
880 rgd->rd_flags &= ~GFS2_RDF_CHECK;
881 return NULL;
882}
883
884/**
725 * recent_rgrp_first - get first RG from "recent" list 885 * recent_rgrp_first - get first RG from "recent" list
726 * @sdp: The GFS2 superblock 886 * @sdp: The GFS2 superblock
727 * @rglast: address of the rgrp used last 887 * @rglast: address of the rgrp used last
@@ -743,7 +903,7 @@ static struct gfs2_rgrpd *recent_rgrp_first(struct gfs2_sbd *sdp,
743 goto first; 903 goto first;
744 904
745 list_for_each_entry(rgd, &sdp->sd_rindex_recent_list, rd_recent) { 905 list_for_each_entry(rgd, &sdp->sd_rindex_recent_list, rd_recent) {
746 if (rgd->rd_ri.ri_addr == rglast) 906 if (rgd->rd_addr == rglast)
747 goto out; 907 goto out;
748 } 908 }
749 909
@@ -882,8 +1042,9 @@ static void forward_rgrp_set(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd)
882 * Returns: errno 1042 * Returns: errno
883 */ 1043 */
884 1044
885static int get_local_rgrp(struct gfs2_inode *ip) 1045static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
886{ 1046{
1047 struct inode *inode = NULL;
887 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1048 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
888 struct gfs2_rgrpd *rgd, *begin = NULL; 1049 struct gfs2_rgrpd *rgd, *begin = NULL;
889 struct gfs2_alloc *al = &ip->i_alloc; 1050 struct gfs2_alloc *al = &ip->i_alloc;
@@ -903,7 +1064,11 @@ static int get_local_rgrp(struct gfs2_inode *ip)
903 case 0: 1064 case 0:
904 if (try_rgrp_fit(rgd, al)) 1065 if (try_rgrp_fit(rgd, al))
905 goto out; 1066 goto out;
1067 if (rgd->rd_flags & GFS2_RDF_CHECK)
1068 inode = try_rgrp_unlink(rgd, last_unlinked);
906 gfs2_glock_dq_uninit(&al->al_rgd_gh); 1069 gfs2_glock_dq_uninit(&al->al_rgd_gh);
1070 if (inode)
1071 return inode;
907 rgd = recent_rgrp_next(rgd, 1); 1072 rgd = recent_rgrp_next(rgd, 1);
908 break; 1073 break;
909 1074
@@ -912,7 +1077,7 @@ static int get_local_rgrp(struct gfs2_inode *ip)
912 break; 1077 break;
913 1078
914 default: 1079 default:
915 return error; 1080 return ERR_PTR(error);
916 } 1081 }
917 } 1082 }
918 1083
@@ -927,7 +1092,11 @@ static int get_local_rgrp(struct gfs2_inode *ip)
927 case 0: 1092 case 0:
928 if (try_rgrp_fit(rgd, al)) 1093 if (try_rgrp_fit(rgd, al))
929 goto out; 1094 goto out;
1095 if (rgd->rd_flags & GFS2_RDF_CHECK)
1096 inode = try_rgrp_unlink(rgd, last_unlinked);
930 gfs2_glock_dq_uninit(&al->al_rgd_gh); 1097 gfs2_glock_dq_uninit(&al->al_rgd_gh);
1098 if (inode)
1099 return inode;
931 break; 1100 break;
932 1101
933 case GLR_TRYFAILED: 1102 case GLR_TRYFAILED:
@@ -935,7 +1104,7 @@ static int get_local_rgrp(struct gfs2_inode *ip)
935 break; 1104 break;
936 1105
937 default: 1106 default:
938 return error; 1107 return ERR_PTR(error);
939 } 1108 }
940 1109
941 rgd = gfs2_rgrpd_get_next(rgd); 1110 rgd = gfs2_rgrpd_get_next(rgd);
@@ -944,7 +1113,7 @@ static int get_local_rgrp(struct gfs2_inode *ip)
944 1113
945 if (rgd == begin) { 1114 if (rgd == begin) {
946 if (++loops >= 3) 1115 if (++loops >= 3)
947 return -ENOSPC; 1116 return ERR_PTR(-ENOSPC);
948 if (!skipped) 1117 if (!skipped)
949 loops++; 1118 loops++;
950 flags = 0; 1119 flags = 0;
@@ -954,7 +1123,7 @@ static int get_local_rgrp(struct gfs2_inode *ip)
954 } 1123 }
955 1124
956out: 1125out:
957 ip->i_last_rg_alloc = rgd->rd_ri.ri_addr; 1126 ip->i_last_rg_alloc = rgd->rd_addr;
958 1127
959 if (begin) { 1128 if (begin) {
960 recent_rgrp_add(rgd); 1129 recent_rgrp_add(rgd);
@@ -964,7 +1133,7 @@ out:
964 forward_rgrp_set(sdp, rgd); 1133 forward_rgrp_set(sdp, rgd);
965 } 1134 }
966 1135
967 return 0; 1136 return NULL;
968} 1137}
969 1138
970/** 1139/**
@@ -978,19 +1147,33 @@ int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file, unsigned int line)
978{ 1147{
979 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1148 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
980 struct gfs2_alloc *al = &ip->i_alloc; 1149 struct gfs2_alloc *al = &ip->i_alloc;
981 int error; 1150 struct inode *inode;
1151 int error = 0;
1152 u64 last_unlinked = 0;
982 1153
983 if (gfs2_assert_warn(sdp, al->al_requested)) 1154 if (gfs2_assert_warn(sdp, al->al_requested))
984 return -EINVAL; 1155 return -EINVAL;
985 1156
986 error = gfs2_rindex_hold(sdp, &al->al_ri_gh); 1157try_again:
1158 /* We need to hold the rindex unless the inode we're using is
1159 the rindex itself, in which case it's already held. */
1160 if (ip != GFS2_I(sdp->sd_rindex))
1161 error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
1162 else if (!sdp->sd_rgrps) /* We may not have the rindex read in, so: */
1163 error = gfs2_ri_update_special(ip);
1164
987 if (error) 1165 if (error)
988 return error; 1166 return error;
989 1167
990 error = get_local_rgrp(ip); 1168 inode = get_local_rgrp(ip, &last_unlinked);
991 if (error) { 1169 if (inode) {
992 gfs2_glock_dq_uninit(&al->al_ri_gh); 1170 if (ip != GFS2_I(sdp->sd_rindex))
993 return error; 1171 gfs2_glock_dq_uninit(&al->al_ri_gh);
1172 if (IS_ERR(inode))
1173 return PTR_ERR(inode);
1174 iput(inode);
1175 gfs2_log_flush(sdp, NULL);
1176 goto try_again;
994 } 1177 }
995 1178
996 al->al_file = file; 1179 al->al_file = file;
@@ -1019,7 +1202,8 @@ void gfs2_inplace_release(struct gfs2_inode *ip)
1019 1202
1020 al->al_rgd = NULL; 1203 al->al_rgd = NULL;
1021 gfs2_glock_dq_uninit(&al->al_rgd_gh); 1204 gfs2_glock_dq_uninit(&al->al_rgd_gh);
1022 gfs2_glock_dq_uninit(&al->al_ri_gh); 1205 if (ip != GFS2_I(sdp->sd_rindex))
1206 gfs2_glock_dq_uninit(&al->al_ri_gh);
1023} 1207}
1024 1208
1025/** 1209/**
@@ -1037,8 +1221,8 @@ unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
1037 unsigned int buf; 1221 unsigned int buf;
1038 unsigned char type; 1222 unsigned char type;
1039 1223
1040 length = rgd->rd_ri.ri_length; 1224 length = rgd->rd_length;
1041 rgrp_block = block - rgd->rd_ri.ri_data0; 1225 rgrp_block = block - rgd->rd_data0;
1042 1226
1043 for (buf = 0; buf < length; buf++) { 1227 for (buf = 0; buf < length; buf++) {
1044 bi = rgd->rd_bits + buf; 1228 bi = rgd->rd_bits + buf;
@@ -1077,10 +1261,10 @@ unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
1077 */ 1261 */
1078 1262
1079static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, 1263static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
1080 unsigned char old_state, unsigned char new_state) 1264 unsigned char old_state, unsigned char new_state)
1081{ 1265{
1082 struct gfs2_bitmap *bi = NULL; 1266 struct gfs2_bitmap *bi = NULL;
1083 u32 length = rgd->rd_ri.ri_length; 1267 u32 length = rgd->rd_length;
1084 u32 blk = 0; 1268 u32 blk = 0;
1085 unsigned int buf, x; 1269 unsigned int buf, x;
1086 1270
@@ -1118,17 +1302,18 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
1118 goal = 0; 1302 goal = 0;
1119 } 1303 }
1120 1304
1121 if (gfs2_assert_withdraw(rgd->rd_sbd, x <= length)) 1305 if (old_state != new_state) {
1122 blk = 0; 1306 gfs2_assert_withdraw(rgd->rd_sbd, blk != BFITNOENT);
1123 1307
1124 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); 1308 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
1125 gfs2_setbit(rgd, bi->bi_bh->b_data + bi->bi_offset, 1309 gfs2_setbit(rgd, bi->bi_bh->b_data + bi->bi_offset,
1126 bi->bi_len, blk, new_state);
1127 if (bi->bi_clone)
1128 gfs2_setbit(rgd, bi->bi_clone + bi->bi_offset,
1129 bi->bi_len, blk, new_state); 1310 bi->bi_len, blk, new_state);
1311 if (bi->bi_clone)
1312 gfs2_setbit(rgd, bi->bi_clone + bi->bi_offset,
1313 bi->bi_len, blk, new_state);
1314 }
1130 1315
1131 return bi->bi_start * GFS2_NBBY + blk; 1316 return (blk == BFITNOENT) ? 0 : (bi->bi_start * GFS2_NBBY) + blk;
1132} 1317}
1133 1318
1134/** 1319/**
@@ -1156,9 +1341,9 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
1156 return NULL; 1341 return NULL;
1157 } 1342 }
1158 1343
1159 length = rgd->rd_ri.ri_length; 1344 length = rgd->rd_length;
1160 1345
1161 rgrp_blk = bstart - rgd->rd_ri.ri_data0; 1346 rgrp_blk = bstart - rgd->rd_data0;
1162 1347
1163 while (blen--) { 1348 while (blen--) {
1164 for (buf = 0; buf < length; buf++) { 1349 for (buf = 0; buf < length; buf++) {
@@ -1202,15 +1387,15 @@ u64 gfs2_alloc_data(struct gfs2_inode *ip)
1202 u32 goal, blk; 1387 u32 goal, blk;
1203 u64 block; 1388 u64 block;
1204 1389
1205 if (rgrp_contains_block(&rgd->rd_ri, ip->i_di.di_goal_data)) 1390 if (rgrp_contains_block(rgd, ip->i_di.di_goal_data))
1206 goal = ip->i_di.di_goal_data - rgd->rd_ri.ri_data0; 1391 goal = ip->i_di.di_goal_data - rgd->rd_data0;
1207 else 1392 else
1208 goal = rgd->rd_last_alloc_data; 1393 goal = rgd->rd_last_alloc_data;
1209 1394
1210 blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED); 1395 blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED);
1211 rgd->rd_last_alloc_data = blk; 1396 rgd->rd_last_alloc_data = blk;
1212 1397
1213 block = rgd->rd_ri.ri_data0 + blk; 1398 block = rgd->rd_data0 + blk;
1214 ip->i_di.di_goal_data = block; 1399 ip->i_di.di_goal_data = block;
1215 1400
1216 gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free); 1401 gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free);
@@ -1246,15 +1431,15 @@ u64 gfs2_alloc_meta(struct gfs2_inode *ip)
1246 u32 goal, blk; 1431 u32 goal, blk;
1247 u64 block; 1432 u64 block;
1248 1433
1249 if (rgrp_contains_block(&rgd->rd_ri, ip->i_di.di_goal_meta)) 1434 if (rgrp_contains_block(rgd, ip->i_di.di_goal_meta))
1250 goal = ip->i_di.di_goal_meta - rgd->rd_ri.ri_data0; 1435 goal = ip->i_di.di_goal_meta - rgd->rd_data0;
1251 else 1436 else
1252 goal = rgd->rd_last_alloc_meta; 1437 goal = rgd->rd_last_alloc_meta;
1253 1438
1254 blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED); 1439 blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED);
1255 rgd->rd_last_alloc_meta = blk; 1440 rgd->rd_last_alloc_meta = blk;
1256 1441
1257 block = rgd->rd_ri.ri_data0 + blk; 1442 block = rgd->rd_data0 + blk;
1258 ip->i_di.di_goal_meta = block; 1443 ip->i_di.di_goal_meta = block;
1259 1444
1260 gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free); 1445 gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free);
@@ -1296,7 +1481,7 @@ u64 gfs2_alloc_di(struct gfs2_inode *dip, u64 *generation)
1296 1481
1297 rgd->rd_last_alloc_meta = blk; 1482 rgd->rd_last_alloc_meta = blk;
1298 1483
1299 block = rgd->rd_ri.ri_data0 + blk; 1484 block = rgd->rd_data0 + blk;
1300 1485
1301 gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free); 1486 gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free);
1302 rgd->rd_rg.rg_free--; 1487 rgd->rd_rg.rg_free--;
@@ -1379,7 +1564,7 @@ void gfs2_unlink_di(struct inode *inode)
1379 struct gfs2_inode *ip = GFS2_I(inode); 1564 struct gfs2_inode *ip = GFS2_I(inode);
1380 struct gfs2_sbd *sdp = GFS2_SB(inode); 1565 struct gfs2_sbd *sdp = GFS2_SB(inode);
1381 struct gfs2_rgrpd *rgd; 1566 struct gfs2_rgrpd *rgd;
1382 u64 blkno = ip->i_num.no_addr; 1567 u64 blkno = ip->i_no_addr;
1383 1568
1384 rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED); 1569 rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED);
1385 if (!rgd) 1570 if (!rgd)
@@ -1414,9 +1599,9 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
1414 1599
1415void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) 1600void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
1416{ 1601{
1417 gfs2_free_uninit_di(rgd, ip->i_num.no_addr); 1602 gfs2_free_uninit_di(rgd, ip->i_no_addr);
1418 gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid); 1603 gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid);
1419 gfs2_meta_wipe(ip, ip->i_num.no_addr, 1); 1604 gfs2_meta_wipe(ip, ip->i_no_addr, 1);
1420} 1605}
1421 1606
1422/** 1607/**
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index b01e0cfc99b5..b4c6adfc6f2e 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -65,5 +65,6 @@ void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist,
65void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state, 65void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state,
66 int flags); 66 int flags);
67void gfs2_rlist_free(struct gfs2_rgrp_list *rlist); 67void gfs2_rlist_free(struct gfs2_rgrp_list *rlist);
68u64 gfs2_ri_total(struct gfs2_sbd *sdp);
68 69
69#endif /* __RGRP_DOT_H__ */ 70#endif /* __RGRP_DOT_H__ */
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 4fdda974dc83..f916b9740c75 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -95,8 +95,8 @@ int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent)
95{ 95{
96 unsigned int x; 96 unsigned int x;
97 97
98 if (sb->sb_header.mh_magic != GFS2_MAGIC || 98 if (sb->sb_magic != GFS2_MAGIC ||
99 sb->sb_header.mh_type != GFS2_METATYPE_SB) { 99 sb->sb_type != GFS2_METATYPE_SB) {
100 if (!silent) 100 if (!silent)
101 printk(KERN_WARNING "GFS2: not a GFS2 filesystem\n"); 101 printk(KERN_WARNING "GFS2: not a GFS2 filesystem\n");
102 return -EINVAL; 102 return -EINVAL;
@@ -174,10 +174,31 @@ static int end_bio_io_page(struct bio *bio, unsigned int bytes_done, int error)
174 return 0; 174 return 0;
175} 175}
176 176
177static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
178{
179 const struct gfs2_sb *str = buf;
180
181 sb->sb_magic = be32_to_cpu(str->sb_header.mh_magic);
182 sb->sb_type = be32_to_cpu(str->sb_header.mh_type);
183 sb->sb_format = be32_to_cpu(str->sb_header.mh_format);
184 sb->sb_fs_format = be32_to_cpu(str->sb_fs_format);
185 sb->sb_multihost_format = be32_to_cpu(str->sb_multihost_format);
186 sb->sb_bsize = be32_to_cpu(str->sb_bsize);
187 sb->sb_bsize_shift = be32_to_cpu(str->sb_bsize_shift);
188 sb->sb_master_dir.no_addr = be64_to_cpu(str->sb_master_dir.no_addr);
189 sb->sb_master_dir.no_formal_ino = be64_to_cpu(str->sb_master_dir.no_formal_ino);
190 sb->sb_root_dir.no_addr = be64_to_cpu(str->sb_root_dir.no_addr);
191 sb->sb_root_dir.no_formal_ino = be64_to_cpu(str->sb_root_dir.no_formal_ino);
192
193 memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN);
194 memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
195}
196
177/** 197/**
178 * gfs2_read_super - Read the gfs2 super block from disk 198 * gfs2_read_super - Read the gfs2 super block from disk
179 * @sb: The VFS super block 199 * @sdp: The GFS2 super block
180 * @sector: The location of the super block 200 * @sector: The location of the super block
201 * @error: The error code to return
181 * 202 *
182 * This uses the bio functions to read the super block from disk 203 * This uses the bio functions to read the super block from disk
183 * because we want to be 100% sure that we never read cached data. 204 * because we want to be 100% sure that we never read cached data.
@@ -189,17 +210,19 @@ static int end_bio_io_page(struct bio *bio, unsigned int bytes_done, int error)
189 * the master directory (contains pointers to journals etc) and the 210 * the master directory (contains pointers to journals etc) and the
190 * root directory. 211 * root directory.
191 * 212 *
192 * Returns: A page containing the sb or NULL 213 * Returns: 0 on success or error
193 */ 214 */
194 215
195struct page *gfs2_read_super(struct super_block *sb, sector_t sector) 216int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector)
196{ 217{
218 struct super_block *sb = sdp->sd_vfs;
219 struct gfs2_sb *p;
197 struct page *page; 220 struct page *page;
198 struct bio *bio; 221 struct bio *bio;
199 222
200 page = alloc_page(GFP_KERNEL); 223 page = alloc_page(GFP_KERNEL);
201 if (unlikely(!page)) 224 if (unlikely(!page))
202 return NULL; 225 return -ENOBUFS;
203 226
204 ClearPageUptodate(page); 227 ClearPageUptodate(page);
205 ClearPageDirty(page); 228 ClearPageDirty(page);
@@ -208,7 +231,7 @@ struct page *gfs2_read_super(struct super_block *sb, sector_t sector)
208 bio = bio_alloc(GFP_KERNEL, 1); 231 bio = bio_alloc(GFP_KERNEL, 1);
209 if (unlikely(!bio)) { 232 if (unlikely(!bio)) {
210 __free_page(page); 233 __free_page(page);
211 return NULL; 234 return -ENOBUFS;
212 } 235 }
213 236
214 bio->bi_sector = sector * (sb->s_blocksize >> 9); 237 bio->bi_sector = sector * (sb->s_blocksize >> 9);
@@ -222,9 +245,13 @@ struct page *gfs2_read_super(struct super_block *sb, sector_t sector)
222 bio_put(bio); 245 bio_put(bio);
223 if (!PageUptodate(page)) { 246 if (!PageUptodate(page)) {
224 __free_page(page); 247 __free_page(page);
225 return NULL; 248 return -EIO;
226 } 249 }
227 return page; 250 p = kmap(page);
251 gfs2_sb_in(&sdp->sd_sb, p);
252 kunmap(page);
253 __free_page(page);
254 return 0;
228} 255}
229 256
230/** 257/**
@@ -241,19 +268,13 @@ int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent)
241 u32 tmp_blocks; 268 u32 tmp_blocks;
242 unsigned int x; 269 unsigned int x;
243 int error; 270 int error;
244 struct page *page;
245 char *sb;
246 271
247 page = gfs2_read_super(sdp->sd_vfs, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift); 272 error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
248 if (!page) { 273 if (error) {
249 if (!silent) 274 if (!silent)
250 fs_err(sdp, "can't read superblock\n"); 275 fs_err(sdp, "can't read superblock\n");
251 return -EIO; 276 return error;
252 } 277 }
253 sb = kmap(page);
254 gfs2_sb_in(&sdp->sd_sb, sb);
255 kunmap(page);
256 __free_page(page);
257 278
258 error = gfs2_check_sb(sdp, &sdp->sd_sb, silent); 279 error = gfs2_check_sb(sdp, &sdp->sd_sb, silent);
259 if (error) 280 if (error)
@@ -360,7 +381,7 @@ int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
360 name.len = sprintf(buf, "journal%u", sdp->sd_journals); 381 name.len = sprintf(buf, "journal%u", sdp->sd_journals);
361 name.hash = gfs2_disk_hash(name.name, name.len); 382 name.hash = gfs2_disk_hash(name.name, name.len);
362 383
363 error = gfs2_dir_search(sdp->sd_jindex, &name, NULL, NULL); 384 error = gfs2_dir_check(sdp->sd_jindex, &name, NULL);
364 if (error == -ENOENT) { 385 if (error == -ENOENT) {
365 error = 0; 386 error = 0;
366 break; 387 break;
@@ -593,6 +614,24 @@ int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
593 return error; 614 return error;
594} 615}
595 616
617static void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf)
618{
619 const struct gfs2_statfs_change *str = buf;
620
621 sc->sc_total = be64_to_cpu(str->sc_total);
622 sc->sc_free = be64_to_cpu(str->sc_free);
623 sc->sc_dinodes = be64_to_cpu(str->sc_dinodes);
624}
625
626static void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc, void *buf)
627{
628 struct gfs2_statfs_change *str = buf;
629
630 str->sc_total = cpu_to_be64(sc->sc_total);
631 str->sc_free = cpu_to_be64(sc->sc_free);
632 str->sc_dinodes = cpu_to_be64(sc->sc_dinodes);
633}
634
596int gfs2_statfs_init(struct gfs2_sbd *sdp) 635int gfs2_statfs_init(struct gfs2_sbd *sdp)
597{ 636{
598 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); 637 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
@@ -772,7 +811,7 @@ static int statfs_slow_fill(struct gfs2_rgrpd *rgd,
772 struct gfs2_statfs_change_host *sc) 811 struct gfs2_statfs_change_host *sc)
773{ 812{
774 gfs2_rgrp_verify(rgd); 813 gfs2_rgrp_verify(rgd);
775 sc->sc_total += rgd->rd_ri.ri_data; 814 sc->sc_total += rgd->rd_data;
776 sc->sc_free += rgd->rd_rg.rg_free; 815 sc->sc_free += rgd->rd_rg.rg_free;
777 sc->sc_dinodes += rgd->rd_rg.rg_dinodes; 816 sc->sc_dinodes += rgd->rd_rg.rg_dinodes;
778 return 0; 817 return 0;
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index e590b2df11dc..60a870e430be 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -16,7 +16,7 @@ void gfs2_tune_init(struct gfs2_tune *gt);
16 16
17int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent); 17int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent);
18int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent); 18int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent);
19struct page *gfs2_read_super(struct super_block *sb, sector_t sector); 19int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector);
20 20
21static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp) 21static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp)
22{ 22{
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index 601eaa1b9ed6..424a0774eda8 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -115,8 +115,8 @@ int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide,
115 "GFS2: fsid=%s: inode = %llu %llu\n" 115 "GFS2: fsid=%s: inode = %llu %llu\n"
116 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", 116 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
117 sdp->sd_fsname, 117 sdp->sd_fsname,
118 sdp->sd_fsname, (unsigned long long)ip->i_num.no_formal_ino, 118 sdp->sd_fsname, (unsigned long long)ip->i_no_formal_ino,
119 (unsigned long long)ip->i_num.no_addr, 119 (unsigned long long)ip->i_no_addr,
120 sdp->sd_fsname, function, file, line); 120 sdp->sd_fsname, function, file, line);
121 return rv; 121 return rv;
122} 122}
@@ -137,7 +137,7 @@ int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide,
137 "GFS2: fsid=%s: RG = %llu\n" 137 "GFS2: fsid=%s: RG = %llu\n"
138 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", 138 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
139 sdp->sd_fsname, 139 sdp->sd_fsname,
140 sdp->sd_fsname, (unsigned long long)rgd->rd_ri.ri_addr, 140 sdp->sd_fsname, (unsigned long long)rgd->rd_addr,
141 sdp->sd_fsname, function, file, line); 141 sdp->sd_fsname, function, file, line);
142 return rv; 142 return rv;
143} 143}
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 9a934db0bd8a..bc835f272a6e 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -607,7 +607,7 @@ static const struct file_operations hfs_file_operations = {
607 .write = do_sync_write, 607 .write = do_sync_write,
608 .aio_write = generic_file_aio_write, 608 .aio_write = generic_file_aio_write,
609 .mmap = generic_file_mmap, 609 .mmap = generic_file_mmap,
610 .sendfile = generic_file_sendfile, 610 .splice_read = generic_file_splice_read,
611 .fsync = file_fsync, 611 .fsync = file_fsync,
612 .open = hfs_file_open, 612 .open = hfs_file_open,
613 .release = hfs_file_release, 613 .release = hfs_file_release,
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 45dab5d6cc10..409ce5429c91 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -288,7 +288,7 @@ static const struct file_operations hfsplus_file_operations = {
288 .write = do_sync_write, 288 .write = do_sync_write,
289 .aio_write = generic_file_aio_write, 289 .aio_write = generic_file_aio_write,
290 .mmap = generic_file_mmap, 290 .mmap = generic_file_mmap,
291 .sendfile = generic_file_sendfile, 291 .splice_read = generic_file_splice_read,
292 .fsync = file_fsync, 292 .fsync = file_fsync,
293 .open = hfsplus_file_open, 293 .open = hfsplus_file_open,
294 .release = hfsplus_file_release, 294 .release = hfsplus_file_release,
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 8286491dbf31..c77862032e84 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -390,7 +390,7 @@ int hostfs_fsync(struct file *file, struct dentry *dentry, int datasync)
390static const struct file_operations hostfs_file_fops = { 390static const struct file_operations hostfs_file_fops = {
391 .llseek = generic_file_llseek, 391 .llseek = generic_file_llseek,
392 .read = do_sync_read, 392 .read = do_sync_read,
393 .sendfile = generic_file_sendfile, 393 .splice_read = generic_file_splice_read,
394 .aio_read = generic_file_aio_read, 394 .aio_read = generic_file_aio_read,
395 .aio_write = generic_file_aio_write, 395 .aio_write = generic_file_aio_write,
396 .write = do_sync_write, 396 .write = do_sync_write,
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index b4eafc0f1e54..5b53e5c5d8df 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -129,7 +129,7 @@ const struct file_operations hpfs_file_ops =
129 .mmap = generic_file_mmap, 129 .mmap = generic_file_mmap,
130 .release = hpfs_file_release, 130 .release = hpfs_file_release,
131 .fsync = hpfs_file_fsync, 131 .fsync = hpfs_file_fsync,
132 .sendfile = generic_file_sendfile, 132 .splice_read = generic_file_splice_read,
133}; 133};
134 134
135const struct inode_operations hpfs_file_iops = 135const struct inode_operations hpfs_file_iops =
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 99871279a1ed..c2530197be0c 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -47,7 +47,7 @@ const struct file_operations jffs2_file_operations =
47 .ioctl = jffs2_ioctl, 47 .ioctl = jffs2_ioctl,
48 .mmap = generic_file_readonly_mmap, 48 .mmap = generic_file_readonly_mmap,
49 .fsync = jffs2_fsync, 49 .fsync = jffs2_fsync,
50 .sendfile = generic_file_sendfile 50 .splice_read = generic_file_splice_read,
51}; 51};
52 52
53/* jffs2_file_inode_operations */ 53/* jffs2_file_inode_operations */
diff --git a/fs/jfs/endian24.h b/fs/jfs/endian24.h
index 79494c4f2b10..fa92f7f1d0d0 100644
--- a/fs/jfs/endian24.h
+++ b/fs/jfs/endian24.h
@@ -29,7 +29,7 @@
29 __u32 __x = (x); \ 29 __u32 __x = (x); \
30 ((__u32)( \ 30 ((__u32)( \
31 ((__x & (__u32)0x000000ffUL) << 16) | \ 31 ((__x & (__u32)0x000000ffUL) << 16) | \
32 (__x & (__u32)0x0000ff00UL) | \ 32 (__x & (__u32)0x0000ff00UL) | \
33 ((__x & (__u32)0x00ff0000UL) >> 16) )); \ 33 ((__x & (__u32)0x00ff0000UL) >> 16) )); \
34}) 34})
35 35
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index f7f8eff19b7b..87eb93694af7 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -108,7 +108,6 @@ const struct file_operations jfs_file_operations = {
108 .aio_read = generic_file_aio_read, 108 .aio_read = generic_file_aio_read,
109 .aio_write = generic_file_aio_write, 109 .aio_write = generic_file_aio_write,
110 .mmap = generic_file_mmap, 110 .mmap = generic_file_mmap,
111 .sendfile = generic_file_sendfile,
112 .splice_read = generic_file_splice_read, 111 .splice_read = generic_file_splice_read,
113 .splice_write = generic_file_splice_write, 112 .splice_write = generic_file_splice_write,
114 .fsync = jfs_fsync, 113 .fsync = jfs_fsync,
diff --git a/fs/jfs/jfs_debug.c b/fs/jfs/jfs_debug.c
index 9c5d59632aac..887f5759e536 100644
--- a/fs/jfs/jfs_debug.c
+++ b/fs/jfs/jfs_debug.c
@@ -26,34 +26,6 @@
26#include "jfs_filsys.h" 26#include "jfs_filsys.h"
27#include "jfs_debug.h" 27#include "jfs_debug.h"
28 28
29#ifdef CONFIG_JFS_DEBUG
30void dump_mem(char *label, void *data, int length)
31{
32 int i, j;
33 int *intptr = data;
34 char *charptr = data;
35 char buf[10], line[80];
36
37 printk("%s: dump of %d bytes of data at 0x%p\n\n", label, length,
38 data);
39 for (i = 0; i < length; i += 16) {
40 line[0] = 0;
41 for (j = 0; (j < 4) && (i + j * 4 < length); j++) {
42 sprintf(buf, " %08x", intptr[i / 4 + j]);
43 strcat(line, buf);
44 }
45 buf[0] = ' ';
46 buf[2] = 0;
47 for (j = 0; (j < 16) && (i + j < length); j++) {
48 buf[1] =
49 isprint(charptr[i + j]) ? charptr[i + j] : '.';
50 strcat(line, buf);
51 }
52 printk("%s\n", line);
53 }
54}
55#endif
56
57#ifdef PROC_FS_JFS /* see jfs_debug.h */ 29#ifdef PROC_FS_JFS /* see jfs_debug.h */
58 30
59static struct proc_dir_entry *base; 31static struct proc_dir_entry *base;
diff --git a/fs/jfs/jfs_debug.h b/fs/jfs/jfs_debug.h
index 7378798f0b21..044c1e654cc0 100644
--- a/fs/jfs/jfs_debug.h
+++ b/fs/jfs/jfs_debug.h
@@ -62,7 +62,6 @@ extern void jfs_proc_clean(void);
62 62
63extern int jfsloglevel; 63extern int jfsloglevel;
64 64
65extern void dump_mem(char *label, void *data, int length);
66extern int jfs_txanchor_read(char *, char **, off_t, int, int *, void *); 65extern int jfs_txanchor_read(char *, char **, off_t, int, int *, void *);
67 66
68/* information message: e.g., configuration, major event */ 67/* information message: e.g., configuration, major event */
@@ -94,7 +93,6 @@ extern int jfs_txanchor_read(char *, char **, off_t, int, int *, void *);
94 * --------- 93 * ---------
95 */ 94 */
96#else /* CONFIG_JFS_DEBUG */ 95#else /* CONFIG_JFS_DEBUG */
97#define dump_mem(label,data,length) do {} while (0)
98#define ASSERT(p) do {} while (0) 96#define ASSERT(p) do {} while (0)
99#define jfs_info(fmt, arg...) do {} while (0) 97#define jfs_info(fmt, arg...) do {} while (0)
100#define jfs_debug(fmt, arg...) do {} while (0) 98#define jfs_debug(fmt, arg...) do {} while (0)
diff --git a/fs/jfs/jfs_dinode.h b/fs/jfs/jfs_dinode.h
index 40b20111383c..c387540d3425 100644
--- a/fs/jfs/jfs_dinode.h
+++ b/fs/jfs/jfs_dinode.h
@@ -19,23 +19,23 @@
19#define _H_JFS_DINODE 19#define _H_JFS_DINODE
20 20
21/* 21/*
22 * jfs_dinode.h: on-disk inode manager 22 * jfs_dinode.h: on-disk inode manager
23 */ 23 */
24 24
25#define INODESLOTSIZE 128 25#define INODESLOTSIZE 128
26#define L2INODESLOTSIZE 7 26#define L2INODESLOTSIZE 7
27#define log2INODESIZE 9 /* log2(bytes per dinode) */ 27#define log2INODESIZE 9 /* log2(bytes per dinode) */
28 28
29 29
30/* 30/*
31 * on-disk inode : 512 bytes 31 * on-disk inode : 512 bytes
32 * 32 *
33 * note: align 64-bit fields on 8-byte boundary. 33 * note: align 64-bit fields on 8-byte boundary.
34 */ 34 */
35struct dinode { 35struct dinode {
36 /* 36 /*
37 * I. base area (128 bytes) 37 * I. base area (128 bytes)
38 * ------------------------ 38 * ------------------------
39 * 39 *
40 * define generic/POSIX attributes 40 * define generic/POSIX attributes
41 */ 41 */
@@ -70,16 +70,16 @@ struct dinode {
70 __le32 di_acltype; /* 4: Type of ACL */ 70 __le32 di_acltype; /* 4: Type of ACL */
71 71
72 /* 72 /*
73 * Extension Areas. 73 * Extension Areas.
74 * 74 *
75 * Historically, the inode was partitioned into 4 128-byte areas, 75 * Historically, the inode was partitioned into 4 128-byte areas,
76 * the last 3 being defined as unions which could have multiple 76 * the last 3 being defined as unions which could have multiple
77 * uses. The first 96 bytes had been completely unused until 77 * uses. The first 96 bytes had been completely unused until
78 * an index table was added to the directory. It is now more 78 * an index table was added to the directory. It is now more
79 * useful to describe the last 3/4 of the inode as a single 79 * useful to describe the last 3/4 of the inode as a single
80 * union. We would probably be better off redesigning the 80 * union. We would probably be better off redesigning the
81 * entire structure from scratch, but we don't want to break 81 * entire structure from scratch, but we don't want to break
82 * commonality with OS/2's JFS at this time. 82 * commonality with OS/2's JFS at this time.
83 */ 83 */
84 union { 84 union {
85 struct { 85 struct {
@@ -95,7 +95,7 @@ struct dinode {
95 } _dir; /* (384) */ 95 } _dir; /* (384) */
96#define di_dirtable u._dir._table 96#define di_dirtable u._dir._table
97#define di_dtroot u._dir._dtroot 97#define di_dtroot u._dir._dtroot
98#define di_parent di_dtroot.header.idotdot 98#define di_parent di_dtroot.header.idotdot
99#define di_DASD di_dtroot.header.DASD 99#define di_DASD di_dtroot.header.DASD
100 100
101 struct { 101 struct {
@@ -127,14 +127,14 @@ struct dinode {
127#define di_inlinedata u._file._u2._special._u 127#define di_inlinedata u._file._u2._special._u
128#define di_rdev u._file._u2._special._u._rdev 128#define di_rdev u._file._u2._special._u._rdev
129#define di_fastsymlink u._file._u2._special._u._fastsymlink 129#define di_fastsymlink u._file._u2._special._u._fastsymlink
130#define di_inlineea u._file._u2._special._inlineea 130#define di_inlineea u._file._u2._special._inlineea
131 } u; 131 } u;
132}; 132};
133 133
134/* extended mode bits (on-disk inode di_mode) */ 134/* extended mode bits (on-disk inode di_mode) */
135#define IFJOURNAL 0x00010000 /* journalled file */ 135#define IFJOURNAL 0x00010000 /* journalled file */
136#define ISPARSE 0x00020000 /* sparse file enabled */ 136#define ISPARSE 0x00020000 /* sparse file enabled */
137#define INLINEEA 0x00040000 /* inline EA area free */ 137#define INLINEEA 0x00040000 /* inline EA area free */
138#define ISWAPFILE 0x00800000 /* file open for pager swap space */ 138#define ISWAPFILE 0x00800000 /* file open for pager swap space */
139 139
140/* more extended mode bits: attributes for OS/2 */ 140/* more extended mode bits: attributes for OS/2 */
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index f3b1ebb22280..e1985066b1c6 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -154,12 +154,12 @@ static const s8 budtab[256] = {
154 * the in-core descriptor is initialized from disk. 154 * the in-core descriptor is initialized from disk.
155 * 155 *
156 * PARAMETERS: 156 * PARAMETERS:
157 * ipbmap - pointer to in-core inode for the block map. 157 * ipbmap - pointer to in-core inode for the block map.
158 * 158 *
159 * RETURN VALUES: 159 * RETURN VALUES:
160 * 0 - success 160 * 0 - success
161 * -ENOMEM - insufficient memory 161 * -ENOMEM - insufficient memory
162 * -EIO - i/o error 162 * -EIO - i/o error
163 */ 163 */
164int dbMount(struct inode *ipbmap) 164int dbMount(struct inode *ipbmap)
165{ 165{
@@ -232,11 +232,11 @@ int dbMount(struct inode *ipbmap)
232 * the memory for this descriptor is freed. 232 * the memory for this descriptor is freed.
233 * 233 *
234 * PARAMETERS: 234 * PARAMETERS:
235 * ipbmap - pointer to in-core inode for the block map. 235 * ipbmap - pointer to in-core inode for the block map.
236 * 236 *
237 * RETURN VALUES: 237 * RETURN VALUES:
238 * 0 - success 238 * 0 - success
239 * -EIO - i/o error 239 * -EIO - i/o error
240 */ 240 */
241int dbUnmount(struct inode *ipbmap, int mounterror) 241int dbUnmount(struct inode *ipbmap, int mounterror)
242{ 242{
@@ -320,13 +320,13 @@ int dbSync(struct inode *ipbmap)
320 * at a time. 320 * at a time.
321 * 321 *
322 * PARAMETERS: 322 * PARAMETERS:
323 * ip - pointer to in-core inode; 323 * ip - pointer to in-core inode;
324 * blkno - starting block number to be freed. 324 * blkno - starting block number to be freed.
325 * nblocks - number of blocks to be freed. 325 * nblocks - number of blocks to be freed.
326 * 326 *
327 * RETURN VALUES: 327 * RETURN VALUES:
328 * 0 - success 328 * 0 - success
329 * -EIO - i/o error 329 * -EIO - i/o error
330 */ 330 */
331int dbFree(struct inode *ip, s64 blkno, s64 nblocks) 331int dbFree(struct inode *ip, s64 blkno, s64 nblocks)
332{ 332{
@@ -395,23 +395,23 @@ int dbFree(struct inode *ip, s64 blkno, s64 nblocks)
395/* 395/*
396 * NAME: dbUpdatePMap() 396 * NAME: dbUpdatePMap()
397 * 397 *
398 * FUNCTION: update the allocation state (free or allocate) of the 398 * FUNCTION: update the allocation state (free or allocate) of the
399 * specified block range in the persistent block allocation map. 399 * specified block range in the persistent block allocation map.
400 * 400 *
401 * the blocks will be updated in the persistent map one 401 * the blocks will be updated in the persistent map one
402 * dmap at a time. 402 * dmap at a time.
403 * 403 *
404 * PARAMETERS: 404 * PARAMETERS:
405 * ipbmap - pointer to in-core inode for the block map. 405 * ipbmap - pointer to in-core inode for the block map.
406 * free - 'true' if block range is to be freed from the persistent 406 * free - 'true' if block range is to be freed from the persistent
407 * map; 'false' if it is to be allocated. 407 * map; 'false' if it is to be allocated.
408 * blkno - starting block number of the range. 408 * blkno - starting block number of the range.
409 * nblocks - number of contiguous blocks in the range. 409 * nblocks - number of contiguous blocks in the range.
410 * tblk - transaction block; 410 * tblk - transaction block;
411 * 411 *
412 * RETURN VALUES: 412 * RETURN VALUES:
413 * 0 - success 413 * 0 - success
414 * -EIO - i/o error 414 * -EIO - i/o error
415 */ 415 */
416int 416int
417dbUpdatePMap(struct inode *ipbmap, 417dbUpdatePMap(struct inode *ipbmap,
@@ -573,7 +573,7 @@ dbUpdatePMap(struct inode *ipbmap,
573/* 573/*
574 * NAME: dbNextAG() 574 * NAME: dbNextAG()
575 * 575 *
576 * FUNCTION: find the preferred allocation group for new allocations. 576 * FUNCTION: find the preferred allocation group for new allocations.
577 * 577 *
578 * Within the allocation groups, we maintain a preferred 578 * Within the allocation groups, we maintain a preferred
579 * allocation group which consists of a group with at least 579 * allocation group which consists of a group with at least
@@ -589,10 +589,10 @@ dbUpdatePMap(struct inode *ipbmap,
589 * empty ags around for large allocations. 589 * empty ags around for large allocations.
590 * 590 *
591 * PARAMETERS: 591 * PARAMETERS:
592 * ipbmap - pointer to in-core inode for the block map. 592 * ipbmap - pointer to in-core inode for the block map.
593 * 593 *
594 * RETURN VALUES: 594 * RETURN VALUES:
595 * the preferred allocation group number. 595 * the preferred allocation group number.
596 */ 596 */
597int dbNextAG(struct inode *ipbmap) 597int dbNextAG(struct inode *ipbmap)
598{ 598{
@@ -656,7 +656,7 @@ unlock:
656/* 656/*
657 * NAME: dbAlloc() 657 * NAME: dbAlloc()
658 * 658 *
659 * FUNCTION: attempt to allocate a specified number of contiguous free 659 * FUNCTION: attempt to allocate a specified number of contiguous free
660 * blocks from the working allocation block map. 660 * blocks from the working allocation block map.
661 * 661 *
662 * the block allocation policy uses hints and a multi-step 662 * the block allocation policy uses hints and a multi-step
@@ -680,16 +680,16 @@ unlock:
680 * size or requests that specify no hint value. 680 * size or requests that specify no hint value.
681 * 681 *
682 * PARAMETERS: 682 * PARAMETERS:
683 * ip - pointer to in-core inode; 683 * ip - pointer to in-core inode;
684 * hint - allocation hint. 684 * hint - allocation hint.
685 * nblocks - number of contiguous blocks in the range. 685 * nblocks - number of contiguous blocks in the range.
686 * results - on successful return, set to the starting block number 686 * results - on successful return, set to the starting block number
687 * of the newly allocated contiguous range. 687 * of the newly allocated contiguous range.
688 * 688 *
689 * RETURN VALUES: 689 * RETURN VALUES:
690 * 0 - success 690 * 0 - success
691 * -ENOSPC - insufficient disk resources 691 * -ENOSPC - insufficient disk resources
692 * -EIO - i/o error 692 * -EIO - i/o error
693 */ 693 */
694int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results) 694int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
695{ 695{
@@ -706,12 +706,6 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
706 /* assert that nblocks is valid */ 706 /* assert that nblocks is valid */
707 assert(nblocks > 0); 707 assert(nblocks > 0);
708 708
709#ifdef _STILL_TO_PORT
710 /* DASD limit check F226941 */
711 if (OVER_LIMIT(ip, nblocks))
712 return -ENOSPC;
713#endif /* _STILL_TO_PORT */
714
715 /* get the log2 number of blocks to be allocated. 709 /* get the log2 number of blocks to be allocated.
716 * if the number of blocks is not a log2 multiple, 710 * if the number of blocks is not a log2 multiple,
717 * it will be rounded up to the next log2 multiple. 711 * it will be rounded up to the next log2 multiple.
@@ -720,7 +714,6 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
720 714
721 bmp = JFS_SBI(ip->i_sb)->bmap; 715 bmp = JFS_SBI(ip->i_sb)->bmap;
722 716
723//retry: /* serialize w.r.t.extendfs() */
724 mapSize = bmp->db_mapsize; 717 mapSize = bmp->db_mapsize;
725 718
726 /* the hint should be within the map */ 719 /* the hint should be within the map */
@@ -879,17 +872,17 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
879/* 872/*
880 * NAME: dbAllocExact() 873 * NAME: dbAllocExact()
881 * 874 *
882 * FUNCTION: try to allocate the requested extent; 875 * FUNCTION: try to allocate the requested extent;
883 * 876 *
884 * PARAMETERS: 877 * PARAMETERS:
885 * ip - pointer to in-core inode; 878 * ip - pointer to in-core inode;
886 * blkno - extent address; 879 * blkno - extent address;
887 * nblocks - extent length; 880 * nblocks - extent length;
888 * 881 *
889 * RETURN VALUES: 882 * RETURN VALUES:
890 * 0 - success 883 * 0 - success
891 * -ENOSPC - insufficient disk resources 884 * -ENOSPC - insufficient disk resources
892 * -EIO - i/o error 885 * -EIO - i/o error
893 */ 886 */
894int dbAllocExact(struct inode *ip, s64 blkno, int nblocks) 887int dbAllocExact(struct inode *ip, s64 blkno, int nblocks)
895{ 888{
@@ -946,7 +939,7 @@ int dbAllocExact(struct inode *ip, s64 blkno, int nblocks)
946/* 939/*
947 * NAME: dbReAlloc() 940 * NAME: dbReAlloc()
948 * 941 *
949 * FUNCTION: attempt to extend a current allocation by a specified 942 * FUNCTION: attempt to extend a current allocation by a specified
950 * number of blocks. 943 * number of blocks.
951 * 944 *
952 * this routine attempts to satisfy the allocation request 945 * this routine attempts to satisfy the allocation request
@@ -959,21 +952,21 @@ int dbAllocExact(struct inode *ip, s64 blkno, int nblocks)
959 * number of blocks required. 952 * number of blocks required.
960 * 953 *
961 * PARAMETERS: 954 * PARAMETERS:
962 * ip - pointer to in-core inode requiring allocation. 955 * ip - pointer to in-core inode requiring allocation.
963 * blkno - starting block of the current allocation. 956 * blkno - starting block of the current allocation.
964 * nblocks - number of contiguous blocks within the current 957 * nblocks - number of contiguous blocks within the current
965 * allocation. 958 * allocation.
966 * addnblocks - number of blocks to add to the allocation. 959 * addnblocks - number of blocks to add to the allocation.
967 * results - on successful return, set to the starting block number 960 * results - on successful return, set to the starting block number
968 * of the existing allocation if the existing allocation 961 * of the existing allocation if the existing allocation
969 * was extended in place or to a newly allocated contiguous 962 * was extended in place or to a newly allocated contiguous
970 * range if the existing allocation could not be extended 963 * range if the existing allocation could not be extended
971 * in place. 964 * in place.
972 * 965 *
973 * RETURN VALUES: 966 * RETURN VALUES:
974 * 0 - success 967 * 0 - success
975 * -ENOSPC - insufficient disk resources 968 * -ENOSPC - insufficient disk resources
976 * -EIO - i/o error 969 * -EIO - i/o error
977 */ 970 */
978int 971int
979dbReAlloc(struct inode *ip, 972dbReAlloc(struct inode *ip,
@@ -1004,7 +997,7 @@ dbReAlloc(struct inode *ip,
1004/* 997/*
1005 * NAME: dbExtend() 998 * NAME: dbExtend()
1006 * 999 *
1007 * FUNCTION: attempt to extend a current allocation by a specified 1000 * FUNCTION: attempt to extend a current allocation by a specified
1008 * number of blocks. 1001 * number of blocks.
1009 * 1002 *
1010 * this routine attempts to satisfy the allocation request 1003 * this routine attempts to satisfy the allocation request
@@ -1013,16 +1006,16 @@ dbReAlloc(struct inode *ip,
1013 * immediately following the current allocation. 1006 * immediately following the current allocation.
1014 * 1007 *
1015 * PARAMETERS: 1008 * PARAMETERS:
1016 * ip - pointer to in-core inode requiring allocation. 1009 * ip - pointer to in-core inode requiring allocation.
1017 * blkno - starting block of the current allocation. 1010 * blkno - starting block of the current allocation.
1018 * nblocks - number of contiguous blocks within the current 1011 * nblocks - number of contiguous blocks within the current
1019 * allocation. 1012 * allocation.
1020 * addnblocks - number of blocks to add to the allocation. 1013 * addnblocks - number of blocks to add to the allocation.
1021 * 1014 *
1022 * RETURN VALUES: 1015 * RETURN VALUES:
1023 * 0 - success 1016 * 0 - success
1024 * -ENOSPC - insufficient disk resources 1017 * -ENOSPC - insufficient disk resources
1025 * -EIO - i/o error 1018 * -EIO - i/o error
1026 */ 1019 */
1027static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks) 1020static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks)
1028{ 1021{
@@ -1109,19 +1102,19 @@ static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks)
1109/* 1102/*
1110 * NAME: dbAllocNext() 1103 * NAME: dbAllocNext()
1111 * 1104 *
1112 * FUNCTION: attempt to allocate the blocks of the specified block 1105 * FUNCTION: attempt to allocate the blocks of the specified block
1113 * range within a dmap. 1106 * range within a dmap.
1114 * 1107 *
1115 * PARAMETERS: 1108 * PARAMETERS:
1116 * bmp - pointer to bmap descriptor 1109 * bmp - pointer to bmap descriptor
1117 * dp - pointer to dmap. 1110 * dp - pointer to dmap.
1118 * blkno - starting block number of the range. 1111 * blkno - starting block number of the range.
1119 * nblocks - number of contiguous free blocks of the range. 1112 * nblocks - number of contiguous free blocks of the range.
1120 * 1113 *
1121 * RETURN VALUES: 1114 * RETURN VALUES:
1122 * 0 - success 1115 * 0 - success
1123 * -ENOSPC - insufficient disk resources 1116 * -ENOSPC - insufficient disk resources
1124 * -EIO - i/o error 1117 * -EIO - i/o error
1125 * 1118 *
1126 * serialization: IREAD_LOCK(ipbmap) held on entry/exit; 1119 * serialization: IREAD_LOCK(ipbmap) held on entry/exit;
1127 */ 1120 */
@@ -1233,7 +1226,7 @@ static int dbAllocNext(struct bmap * bmp, struct dmap * dp, s64 blkno,
1233/* 1226/*
1234 * NAME: dbAllocNear() 1227 * NAME: dbAllocNear()
1235 * 1228 *
1236 * FUNCTION: attempt to allocate a number of contiguous free blocks near 1229 * FUNCTION: attempt to allocate a number of contiguous free blocks near
1237 * a specified block (hint) within a dmap. 1230 * a specified block (hint) within a dmap.
1238 * 1231 *
1239 * starting with the dmap leaf that covers the hint, we'll 1232 * starting with the dmap leaf that covers the hint, we'll
@@ -1242,18 +1235,18 @@ static int dbAllocNext(struct bmap * bmp, struct dmap * dp, s64 blkno,
1242 * the desired free space. 1235 * the desired free space.
1243 * 1236 *
1244 * PARAMETERS: 1237 * PARAMETERS:
1245 * bmp - pointer to bmap descriptor 1238 * bmp - pointer to bmap descriptor
1246 * dp - pointer to dmap. 1239 * dp - pointer to dmap.
1247 * blkno - block number to allocate near. 1240 * blkno - block number to allocate near.
1248 * nblocks - actual number of contiguous free blocks desired. 1241 * nblocks - actual number of contiguous free blocks desired.
1249 * l2nb - log2 number of contiguous free blocks desired. 1242 * l2nb - log2 number of contiguous free blocks desired.
1250 * results - on successful return, set to the starting block number 1243 * results - on successful return, set to the starting block number
1251 * of the newly allocated range. 1244 * of the newly allocated range.
1252 * 1245 *
1253 * RETURN VALUES: 1246 * RETURN VALUES:
1254 * 0 - success 1247 * 0 - success
1255 * -ENOSPC - insufficient disk resources 1248 * -ENOSPC - insufficient disk resources
1256 * -EIO - i/o error 1249 * -EIO - i/o error
1257 * 1250 *
1258 * serialization: IREAD_LOCK(ipbmap) held on entry/exit; 1251 * serialization: IREAD_LOCK(ipbmap) held on entry/exit;
1259 */ 1252 */
@@ -1316,7 +1309,7 @@ dbAllocNear(struct bmap * bmp,
1316/* 1309/*
1317 * NAME: dbAllocAG() 1310 * NAME: dbAllocAG()
1318 * 1311 *
1319 * FUNCTION: attempt to allocate the specified number of contiguous 1312 * FUNCTION: attempt to allocate the specified number of contiguous
1320 * free blocks within the specified allocation group. 1313 * free blocks within the specified allocation group.
1321 * 1314 *
1322 * unless the allocation group size is equal to the number 1315 * unless the allocation group size is equal to the number
@@ -1353,17 +1346,17 @@ dbAllocNear(struct bmap * bmp,
1353 * the allocation group. 1346 * the allocation group.
1354 * 1347 *
1355 * PARAMETERS: 1348 * PARAMETERS:
1356 * bmp - pointer to bmap descriptor 1349 * bmp - pointer to bmap descriptor
1357 * agno - allocation group number. 1350 * agno - allocation group number.
1358 * nblocks - actual number of contiguous free blocks desired. 1351 * nblocks - actual number of contiguous free blocks desired.
1359 * l2nb - log2 number of contiguous free blocks desired. 1352 * l2nb - log2 number of contiguous free blocks desired.
1360 * results - on successful return, set to the starting block number 1353 * results - on successful return, set to the starting block number
1361 * of the newly allocated range. 1354 * of the newly allocated range.
1362 * 1355 *
1363 * RETURN VALUES: 1356 * RETURN VALUES:
1364 * 0 - success 1357 * 0 - success
1365 * -ENOSPC - insufficient disk resources 1358 * -ENOSPC - insufficient disk resources
1366 * -EIO - i/o error 1359 * -EIO - i/o error
1367 * 1360 *
1368 * note: IWRITE_LOCK(ipmap) held on entry/exit; 1361 * note: IWRITE_LOCK(ipmap) held on entry/exit;
1369 */ 1362 */
@@ -1546,7 +1539,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results)
1546/* 1539/*
1547 * NAME: dbAllocAny() 1540 * NAME: dbAllocAny()
1548 * 1541 *
1549 * FUNCTION: attempt to allocate the specified number of contiguous 1542 * FUNCTION: attempt to allocate the specified number of contiguous
1550 * free blocks anywhere in the file system. 1543 * free blocks anywhere in the file system.
1551 * 1544 *
1552 * dbAllocAny() attempts to find the sufficient free space by 1545 * dbAllocAny() attempts to find the sufficient free space by
@@ -1556,16 +1549,16 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results)
1556 * desired free space is allocated. 1549 * desired free space is allocated.
1557 * 1550 *
1558 * PARAMETERS: 1551 * PARAMETERS:
1559 * bmp - pointer to bmap descriptor 1552 * bmp - pointer to bmap descriptor
1560 * nblocks - actual number of contiguous free blocks desired. 1553 * nblocks - actual number of contiguous free blocks desired.
1561 * l2nb - log2 number of contiguous free blocks desired. 1554 * l2nb - log2 number of contiguous free blocks desired.
1562 * results - on successful return, set to the starting block number 1555 * results - on successful return, set to the starting block number
1563 * of the newly allocated range. 1556 * of the newly allocated range.
1564 * 1557 *
1565 * RETURN VALUES: 1558 * RETURN VALUES:
1566 * 0 - success 1559 * 0 - success
1567 * -ENOSPC - insufficient disk resources 1560 * -ENOSPC - insufficient disk resources
1568 * -EIO - i/o error 1561 * -EIO - i/o error
1569 * 1562 *
1570 * serialization: IWRITE_LOCK(ipbmap) held on entry/exit; 1563 * serialization: IWRITE_LOCK(ipbmap) held on entry/exit;
1571 */ 1564 */
@@ -1598,9 +1591,9 @@ static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results)
1598/* 1591/*
1599 * NAME: dbFindCtl() 1592 * NAME: dbFindCtl()
1600 * 1593 *
1601 * FUNCTION: starting at a specified dmap control page level and block 1594 * FUNCTION: starting at a specified dmap control page level and block
1602 * number, search down the dmap control levels for a range of 1595 * number, search down the dmap control levels for a range of
1603 * contiguous free blocks large enough to satisfy an allocation 1596 * contiguous free blocks large enough to satisfy an allocation
1604 * request for the specified number of free blocks. 1597 * request for the specified number of free blocks.
1605 * 1598 *
1606 * if sufficient contiguous free blocks are found, this routine 1599 * if sufficient contiguous free blocks are found, this routine
@@ -1609,17 +1602,17 @@ static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results)
1609 * is sufficient in size. 1602 * is sufficient in size.
1610 * 1603 *
1611 * PARAMETERS: 1604 * PARAMETERS:
1612 * bmp - pointer to bmap descriptor 1605 * bmp - pointer to bmap descriptor
1613 * level - starting dmap control page level. 1606 * level - starting dmap control page level.
1614 * l2nb - log2 number of contiguous free blocks desired. 1607 * l2nb - log2 number of contiguous free blocks desired.
1615 * *blkno - on entry, starting block number for conducting the search. 1608 * *blkno - on entry, starting block number for conducting the search.
1616 * on successful return, the first block within a dmap page 1609 * on successful return, the first block within a dmap page
1617 * that contains or starts a range of contiguous free blocks. 1610 * that contains or starts a range of contiguous free blocks.
1618 * 1611 *
1619 * RETURN VALUES: 1612 * RETURN VALUES:
1620 * 0 - success 1613 * 0 - success
1621 * -ENOSPC - insufficient disk resources 1614 * -ENOSPC - insufficient disk resources
1622 * -EIO - i/o error 1615 * -EIO - i/o error
1623 * 1616 *
1624 * serialization: IWRITE_LOCK(ipbmap) held on entry/exit; 1617 * serialization: IWRITE_LOCK(ipbmap) held on entry/exit;
1625 */ 1618 */
@@ -1699,7 +1692,7 @@ static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno)
1699/* 1692/*
1700 * NAME: dbAllocCtl() 1693 * NAME: dbAllocCtl()
1701 * 1694 *
1702 * FUNCTION: attempt to allocate a specified number of contiguous 1695 * FUNCTION: attempt to allocate a specified number of contiguous
1703 * blocks starting within a specific dmap. 1696 * blocks starting within a specific dmap.
1704 * 1697 *
1705 * this routine is called by higher level routines that search 1698 * this routine is called by higher level routines that search
@@ -1726,18 +1719,18 @@ static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno)
1726 * first dmap (i.e. blkno). 1719 * first dmap (i.e. blkno).
1727 * 1720 *
1728 * PARAMETERS: 1721 * PARAMETERS:
1729 * bmp - pointer to bmap descriptor 1722 * bmp - pointer to bmap descriptor
1730 * nblocks - actual number of contiguous free blocks to allocate. 1723 * nblocks - actual number of contiguous free blocks to allocate.
1731 * l2nb - log2 number of contiguous free blocks to allocate. 1724 * l2nb - log2 number of contiguous free blocks to allocate.
1732 * blkno - starting block number of the dmap to start the allocation 1725 * blkno - starting block number of the dmap to start the allocation
1733 * from. 1726 * from.
1734 * results - on successful return, set to the starting block number 1727 * results - on successful return, set to the starting block number
1735 * of the newly allocated range. 1728 * of the newly allocated range.
1736 * 1729 *
1737 * RETURN VALUES: 1730 * RETURN VALUES:
1738 * 0 - success 1731 * 0 - success
1739 * -ENOSPC - insufficient disk resources 1732 * -ENOSPC - insufficient disk resources
1740 * -EIO - i/o error 1733 * -EIO - i/o error
1741 * 1734 *
1742 * serialization: IWRITE_LOCK(ipbmap) held on entry/exit; 1735 * serialization: IWRITE_LOCK(ipbmap) held on entry/exit;
1743 */ 1736 */
@@ -1870,7 +1863,7 @@ dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results)
1870/* 1863/*
1871 * NAME: dbAllocDmapLev() 1864 * NAME: dbAllocDmapLev()
1872 * 1865 *
1873 * FUNCTION: attempt to allocate a specified number of contiguous blocks 1866 * FUNCTION: attempt to allocate a specified number of contiguous blocks
1874 * from a specified dmap. 1867 * from a specified dmap.
1875 * 1868 *
1876 * this routine checks if the contiguous blocks are available. 1869 * this routine checks if the contiguous blocks are available.
@@ -1878,17 +1871,17 @@ dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results)
1878 * returned. 1871 * returned.
1879 * 1872 *
1880 * PARAMETERS: 1873 * PARAMETERS:
1881 * mp - pointer to bmap descriptor 1874 * mp - pointer to bmap descriptor
1882 * dp - pointer to dmap to attempt to allocate blocks from. 1875 * dp - pointer to dmap to attempt to allocate blocks from.
1883 * l2nb - log2 number of contiguous block desired. 1876 * l2nb - log2 number of contiguous block desired.
1884 * nblocks - actual number of contiguous block desired. 1877 * nblocks - actual number of contiguous block desired.
1885 * results - on successful return, set to the starting block number 1878 * results - on successful return, set to the starting block number
1886 * of the newly allocated range. 1879 * of the newly allocated range.
1887 * 1880 *
1888 * RETURN VALUES: 1881 * RETURN VALUES:
1889 * 0 - success 1882 * 0 - success
1890 * -ENOSPC - insufficient disk resources 1883 * -ENOSPC - insufficient disk resources
1891 * -EIO - i/o error 1884 * -EIO - i/o error
1892 * 1885 *
1893 * serialization: IREAD_LOCK(ipbmap), e.g., from dbAlloc(), or 1886 * serialization: IREAD_LOCK(ipbmap), e.g., from dbAlloc(), or
1894 * IWRITE_LOCK(ipbmap), e.g., dbAllocCtl(), held on entry/exit; 1887 * IWRITE_LOCK(ipbmap), e.g., dbAllocCtl(), held on entry/exit;
@@ -1933,7 +1926,7 @@ dbAllocDmapLev(struct bmap * bmp,
1933/* 1926/*
1934 * NAME: dbAllocDmap() 1927 * NAME: dbAllocDmap()
1935 * 1928 *
1936 * FUNCTION: adjust the disk allocation map to reflect the allocation 1929 * FUNCTION: adjust the disk allocation map to reflect the allocation
1937 * of a specified block range within a dmap. 1930 * of a specified block range within a dmap.
1938 * 1931 *
1939 * this routine allocates the specified blocks from the dmap 1932 * this routine allocates the specified blocks from the dmap
@@ -1946,14 +1939,14 @@ dbAllocDmapLev(struct bmap * bmp,
1946 * covers this dmap. 1939 * covers this dmap.
1947 * 1940 *
1948 * PARAMETERS: 1941 * PARAMETERS:
1949 * bmp - pointer to bmap descriptor 1942 * bmp - pointer to bmap descriptor
1950 * dp - pointer to dmap to allocate the block range from. 1943 * dp - pointer to dmap to allocate the block range from.
1951 * blkno - starting block number of the block to be allocated. 1944 * blkno - starting block number of the block to be allocated.
1952 * nblocks - number of blocks to be allocated. 1945 * nblocks - number of blocks to be allocated.
1953 * 1946 *
1954 * RETURN VALUES: 1947 * RETURN VALUES:
1955 * 0 - success 1948 * 0 - success
1956 * -EIO - i/o error 1949 * -EIO - i/o error
1957 * 1950 *
1958 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; 1951 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
1959 */ 1952 */
@@ -1989,7 +1982,7 @@ static int dbAllocDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
1989/* 1982/*
1990 * NAME: dbFreeDmap() 1983 * NAME: dbFreeDmap()
1991 * 1984 *
1992 * FUNCTION: adjust the disk allocation map to reflect the allocation 1985 * FUNCTION: adjust the disk allocation map to reflect the allocation
1993 * of a specified block range within a dmap. 1986 * of a specified block range within a dmap.
1994 * 1987 *
1995 * this routine frees the specified blocks from the dmap through 1988 * this routine frees the specified blocks from the dmap through
@@ -1997,18 +1990,18 @@ static int dbAllocDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
1997 * causes the maximum string of free blocks within the dmap to 1990 * causes the maximum string of free blocks within the dmap to
1998 * change (i.e. the value of the root of the dmap's dmtree), this 1991 * change (i.e. the value of the root of the dmap's dmtree), this
1999 * routine will cause this change to be reflected up through the 1992 * routine will cause this change to be reflected up through the
2000 * appropriate levels of the dmap control pages by a call to 1993 * appropriate levels of the dmap control pages by a call to
2001 * dbAdjCtl() for the L0 dmap control page that covers this dmap. 1994 * dbAdjCtl() for the L0 dmap control page that covers this dmap.
2002 * 1995 *
2003 * PARAMETERS: 1996 * PARAMETERS:
2004 * bmp - pointer to bmap descriptor 1997 * bmp - pointer to bmap descriptor
2005 * dp - pointer to dmap to free the block range from. 1998 * dp - pointer to dmap to free the block range from.
2006 * blkno - starting block number of the block to be freed. 1999 * blkno - starting block number of the block to be freed.
2007 * nblocks - number of blocks to be freed. 2000 * nblocks - number of blocks to be freed.
2008 * 2001 *
2009 * RETURN VALUES: 2002 * RETURN VALUES:
2010 * 0 - success 2003 * 0 - success
2011 * -EIO - i/o error 2004 * -EIO - i/o error
2012 * 2005 *
2013 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; 2006 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
2014 */ 2007 */
@@ -2055,7 +2048,7 @@ static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
2055/* 2048/*
2056 * NAME: dbAllocBits() 2049 * NAME: dbAllocBits()
2057 * 2050 *
2058 * FUNCTION: allocate a specified block range from a dmap. 2051 * FUNCTION: allocate a specified block range from a dmap.
2059 * 2052 *
2060 * this routine updates the dmap to reflect the working 2053 * this routine updates the dmap to reflect the working
2061 * state allocation of the specified block range. it directly 2054 * state allocation of the specified block range. it directly
@@ -2065,10 +2058,10 @@ static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
2065 * dmap's dmtree, as a whole, to reflect the allocated range. 2058 * dmap's dmtree, as a whole, to reflect the allocated range.
2066 * 2059 *
2067 * PARAMETERS: 2060 * PARAMETERS:
2068 * bmp - pointer to bmap descriptor 2061 * bmp - pointer to bmap descriptor
2069 * dp - pointer to dmap to allocate bits from. 2062 * dp - pointer to dmap to allocate bits from.
2070 * blkno - starting block number of the bits to be allocated. 2063 * blkno - starting block number of the bits to be allocated.
2071 * nblocks - number of bits to be allocated. 2064 * nblocks - number of bits to be allocated.
2072 * 2065 *
2073 * RETURN VALUES: none 2066 * RETURN VALUES: none
2074 * 2067 *
@@ -2149,7 +2142,7 @@ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
2149 * the allocated words. 2142 * the allocated words.
2150 */ 2143 */
2151 for (; nwords > 0; nwords -= nw) { 2144 for (; nwords > 0; nwords -= nw) {
2152 if (leaf[word] < BUDMIN) { 2145 if (leaf[word] < BUDMIN) {
2153 jfs_error(bmp->db_ipbmap->i_sb, 2146 jfs_error(bmp->db_ipbmap->i_sb,
2154 "dbAllocBits: leaf page " 2147 "dbAllocBits: leaf page "
2155 "corrupt"); 2148 "corrupt");
@@ -2202,7 +2195,7 @@ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
2202/* 2195/*
2203 * NAME: dbFreeBits() 2196 * NAME: dbFreeBits()
2204 * 2197 *
2205 * FUNCTION: free a specified block range from a dmap. 2198 * FUNCTION: free a specified block range from a dmap.
2206 * 2199 *
2207 * this routine updates the dmap to reflect the working 2200 * this routine updates the dmap to reflect the working
2208 * state allocation of the specified block range. it directly 2201 * state allocation of the specified block range. it directly
@@ -2212,10 +2205,10 @@ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
2212 * dmtree, as a whole, to reflect the deallocated range. 2205 * dmtree, as a whole, to reflect the deallocated range.
2213 * 2206 *
2214 * PARAMETERS: 2207 * PARAMETERS:
2215 * bmp - pointer to bmap descriptor 2208 * bmp - pointer to bmap descriptor
2216 * dp - pointer to dmap to free bits from. 2209 * dp - pointer to dmap to free bits from.
2217 * blkno - starting block number of the bits to be freed. 2210 * blkno - starting block number of the bits to be freed.
2218 * nblocks - number of bits to be freed. 2211 * nblocks - number of bits to be freed.
2219 * 2212 *
2220 * RETURN VALUES: 0 for success 2213 * RETURN VALUES: 0 for success
2221 * 2214 *
@@ -2388,19 +2381,19 @@ static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
2388 * the new root value and the next dmap control page level to 2381 * the new root value and the next dmap control page level to
2389 * be adjusted. 2382 * be adjusted.
2390 * PARAMETERS: 2383 * PARAMETERS:
2391 * bmp - pointer to bmap descriptor 2384 * bmp - pointer to bmap descriptor
2392 * blkno - the first block of a block range within a dmap. it is 2385 * blkno - the first block of a block range within a dmap. it is
2393 * the allocation or deallocation of this block range that 2386 * the allocation or deallocation of this block range that
2394 * requires the dmap control page to be adjusted. 2387 * requires the dmap control page to be adjusted.
2395 * newval - the new value of the lower level dmap or dmap control 2388 * newval - the new value of the lower level dmap or dmap control
2396 * page root. 2389 * page root.
2397 * alloc - 'true' if adjustment is due to an allocation. 2390 * alloc - 'true' if adjustment is due to an allocation.
2398 * level - current level of dmap control page (i.e. L0, L1, L2) to 2391 * level - current level of dmap control page (i.e. L0, L1, L2) to
2399 * be adjusted. 2392 * be adjusted.
2400 * 2393 *
2401 * RETURN VALUES: 2394 * RETURN VALUES:
2402 * 0 - success 2395 * 0 - success
2403 * -EIO - i/o error 2396 * -EIO - i/o error
2404 * 2397 *
2405 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; 2398 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
2406 */ 2399 */
@@ -2544,16 +2537,16 @@ dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level)
2544/* 2537/*
2545 * NAME: dbSplit() 2538 * NAME: dbSplit()
2546 * 2539 *
2547 * FUNCTION: update the leaf of a dmtree with a new value, splitting 2540 * FUNCTION: update the leaf of a dmtree with a new value, splitting
2548 * the leaf from the binary buddy system of the dmtree's 2541 * the leaf from the binary buddy system of the dmtree's
2549 * leaves, as required. 2542 * leaves, as required.
2550 * 2543 *
2551 * PARAMETERS: 2544 * PARAMETERS:
2552 * tp - pointer to the tree containing the leaf. 2545 * tp - pointer to the tree containing the leaf.
2553 * leafno - the number of the leaf to be updated. 2546 * leafno - the number of the leaf to be updated.
2554 * splitsz - the size the binary buddy system starting at the leaf 2547 * splitsz - the size the binary buddy system starting at the leaf
2555 * must be split to, specified as the log2 number of blocks. 2548 * must be split to, specified as the log2 number of blocks.
2556 * newval - the new value for the leaf. 2549 * newval - the new value for the leaf.
2557 * 2550 *
2558 * RETURN VALUES: none 2551 * RETURN VALUES: none
2559 * 2552 *
@@ -2600,7 +2593,7 @@ static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval)
2600/* 2593/*
2601 * NAME: dbBackSplit() 2594 * NAME: dbBackSplit()
2602 * 2595 *
2603 * FUNCTION: back split the binary buddy system of dmtree leaves 2596 * FUNCTION: back split the binary buddy system of dmtree leaves
2604 * that hold a specified leaf until the specified leaf 2597 * that hold a specified leaf until the specified leaf
2605 * starts its own binary buddy system. 2598 * starts its own binary buddy system.
2606 * 2599 *
@@ -2617,8 +2610,8 @@ static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval)
2617 * in which a previous join operation must be backed out. 2610 * in which a previous join operation must be backed out.
2618 * 2611 *
2619 * PARAMETERS: 2612 * PARAMETERS:
2620 * tp - pointer to the tree containing the leaf. 2613 * tp - pointer to the tree containing the leaf.
2621 * leafno - the number of the leaf to be updated. 2614 * leafno - the number of the leaf to be updated.
2622 * 2615 *
2623 * RETURN VALUES: none 2616 * RETURN VALUES: none
2624 * 2617 *
@@ -2692,14 +2685,14 @@ static int dbBackSplit(dmtree_t * tp, int leafno)
2692/* 2685/*
2693 * NAME: dbJoin() 2686 * NAME: dbJoin()
2694 * 2687 *
2695 * FUNCTION: update the leaf of a dmtree with a new value, joining 2688 * FUNCTION: update the leaf of a dmtree with a new value, joining
2696 * the leaf with other leaves of the dmtree into a multi-leaf 2689 * the leaf with other leaves of the dmtree into a multi-leaf
2697 * binary buddy system, as required. 2690 * binary buddy system, as required.
2698 * 2691 *
2699 * PARAMETERS: 2692 * PARAMETERS:
2700 * tp - pointer to the tree containing the leaf. 2693 * tp - pointer to the tree containing the leaf.
2701 * leafno - the number of the leaf to be updated. 2694 * leafno - the number of the leaf to be updated.
2702 * newval - the new value for the leaf. 2695 * newval - the new value for the leaf.
2703 * 2696 *
2704 * RETURN VALUES: none 2697 * RETURN VALUES: none
2705 */ 2698 */
@@ -2785,15 +2778,15 @@ static int dbJoin(dmtree_t * tp, int leafno, int newval)
2785/* 2778/*
2786 * NAME: dbAdjTree() 2779 * NAME: dbAdjTree()
2787 * 2780 *
2788 * FUNCTION: update a leaf of a dmtree with a new value, adjusting 2781 * FUNCTION: update a leaf of a dmtree with a new value, adjusting
2789 * the dmtree, as required, to reflect the new leaf value. 2782 * the dmtree, as required, to reflect the new leaf value.
2790 * the combination of any buddies must already be done before 2783 * the combination of any buddies must already be done before
2791 * this is called. 2784 * this is called.
2792 * 2785 *
2793 * PARAMETERS: 2786 * PARAMETERS:
2794 * tp - pointer to the tree to be adjusted. 2787 * tp - pointer to the tree to be adjusted.
2795 * leafno - the number of the leaf to be updated. 2788 * leafno - the number of the leaf to be updated.
2796 * newval - the new value for the leaf. 2789 * newval - the new value for the leaf.
2797 * 2790 *
2798 * RETURN VALUES: none 2791 * RETURN VALUES: none
2799 */ 2792 */
@@ -2852,7 +2845,7 @@ static void dbAdjTree(dmtree_t * tp, int leafno, int newval)
2852/* 2845/*
2853 * NAME: dbFindLeaf() 2846 * NAME: dbFindLeaf()
2854 * 2847 *
2855 * FUNCTION: search a dmtree_t for sufficient free blocks, returning 2848 * FUNCTION: search a dmtree_t for sufficient free blocks, returning
2856 * the index of a leaf describing the free blocks if 2849 * the index of a leaf describing the free blocks if
2857 * sufficient free blocks are found. 2850 * sufficient free blocks are found.
2858 * 2851 *
@@ -2861,15 +2854,15 @@ static void dbAdjTree(dmtree_t * tp, int leafno, int newval)
2861 * free space. 2854 * free space.
2862 * 2855 *
2863 * PARAMETERS: 2856 * PARAMETERS:
2864 * tp - pointer to the tree to be searched. 2857 * tp - pointer to the tree to be searched.
2865 * l2nb - log2 number of free blocks to search for. 2858 * l2nb - log2 number of free blocks to search for.
2866 * leafidx - return pointer to be set to the index of the leaf 2859 * leafidx - return pointer to be set to the index of the leaf
2867 * describing at least l2nb free blocks if sufficient 2860 * describing at least l2nb free blocks if sufficient
2868 * free blocks are found. 2861 * free blocks are found.
2869 * 2862 *
2870 * RETURN VALUES: 2863 * RETURN VALUES:
2871 * 0 - success 2864 * 0 - success
2872 * -ENOSPC - insufficient free blocks. 2865 * -ENOSPC - insufficient free blocks.
2873 */ 2866 */
2874static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx) 2867static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx)
2875{ 2868{
@@ -2916,18 +2909,18 @@ static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx)
2916/* 2909/*
2917 * NAME: dbFindBits() 2910 * NAME: dbFindBits()
2918 * 2911 *
2919 * FUNCTION: find a specified number of binary buddy free bits within a 2912 * FUNCTION: find a specified number of binary buddy free bits within a
2920 * dmap bitmap word value. 2913 * dmap bitmap word value.
2921 * 2914 *
2922 * this routine searches the bitmap value for (1 << l2nb) free 2915 * this routine searches the bitmap value for (1 << l2nb) free
2923 * bits at (1 << l2nb) alignments within the value. 2916 * bits at (1 << l2nb) alignments within the value.
2924 * 2917 *
2925 * PARAMETERS: 2918 * PARAMETERS:
2926 * word - dmap bitmap word value. 2919 * word - dmap bitmap word value.
2927 * l2nb - number of free bits specified as a log2 number. 2920 * l2nb - number of free bits specified as a log2 number.
2928 * 2921 *
2929 * RETURN VALUES: 2922 * RETURN VALUES:
2930 * starting bit number of free bits. 2923 * starting bit number of free bits.
2931 */ 2924 */
2932static int dbFindBits(u32 word, int l2nb) 2925static int dbFindBits(u32 word, int l2nb)
2933{ 2926{
@@ -2963,14 +2956,14 @@ static int dbFindBits(u32 word, int l2nb)
2963/* 2956/*
2964 * NAME: dbMaxBud(u8 *cp) 2957 * NAME: dbMaxBud(u8 *cp)
2965 * 2958 *
2966 * FUNCTION: determine the largest binary buddy string of free 2959 * FUNCTION: determine the largest binary buddy string of free
2967 * bits within 32-bits of the map. 2960 * bits within 32-bits of the map.
2968 * 2961 *
2969 * PARAMETERS: 2962 * PARAMETERS:
2970 * cp - pointer to the 32-bit value. 2963 * cp - pointer to the 32-bit value.
2971 * 2964 *
2972 * RETURN VALUES: 2965 * RETURN VALUES:
2973 * largest binary buddy of free bits within a dmap word. 2966 * largest binary buddy of free bits within a dmap word.
2974 */ 2967 */
2975static int dbMaxBud(u8 * cp) 2968static int dbMaxBud(u8 * cp)
2976{ 2969{
@@ -3000,14 +2993,14 @@ static int dbMaxBud(u8 * cp)
3000/* 2993/*
3001 * NAME: cnttz(uint word) 2994 * NAME: cnttz(uint word)
3002 * 2995 *
3003 * FUNCTION: determine the number of trailing zeros within a 32-bit 2996 * FUNCTION: determine the number of trailing zeros within a 32-bit
3004 * value. 2997 * value.
3005 * 2998 *
3006 * PARAMETERS: 2999 * PARAMETERS:
3007 * value - 32-bit value to be examined. 3000 * value - 32-bit value to be examined.
3008 * 3001 *
3009 * RETURN VALUES: 3002 * RETURN VALUES:
3010 * count of trailing zeros 3003 * count of trailing zeros
3011 */ 3004 */
3012static int cnttz(u32 word) 3005static int cnttz(u32 word)
3013{ 3006{
@@ -3025,14 +3018,14 @@ static int cnttz(u32 word)
3025/* 3018/*
3026 * NAME: cntlz(u32 value) 3019 * NAME: cntlz(u32 value)
3027 * 3020 *
3028 * FUNCTION: determine the number of leading zeros within a 32-bit 3021 * FUNCTION: determine the number of leading zeros within a 32-bit
3029 * value. 3022 * value.
3030 * 3023 *
3031 * PARAMETERS: 3024 * PARAMETERS:
3032 * value - 32-bit value to be examined. 3025 * value - 32-bit value to be examined.
3033 * 3026 *
3034 * RETURN VALUES: 3027 * RETURN VALUES:
3035 * count of leading zeros 3028 * count of leading zeros
3036 */ 3029 */
3037static int cntlz(u32 value) 3030static int cntlz(u32 value)
3038{ 3031{
@@ -3050,14 +3043,14 @@ static int cntlz(u32 value)
3050 * NAME: blkstol2(s64 nb) 3043 * NAME: blkstol2(s64 nb)
3051 * 3044 *
3052 * FUNCTION: convert a block count to its log2 value. if the block 3045 * FUNCTION: convert a block count to its log2 value. if the block
3053 * count is not a l2 multiple, it is rounded up to the next 3046 * count is not a l2 multiple, it is rounded up to the next
3054 * larger l2 multiple. 3047 * larger l2 multiple.
3055 * 3048 *
3056 * PARAMETERS: 3049 * PARAMETERS:
3057 * nb - number of blocks 3050 * nb - number of blocks
3058 * 3051 *
3059 * RETURN VALUES: 3052 * RETURN VALUES:
3060 * log2 number of blocks 3053 * log2 number of blocks
3061 */ 3054 */
3062static int blkstol2(s64 nb) 3055static int blkstol2(s64 nb)
3063{ 3056{
@@ -3099,13 +3092,13 @@ static int blkstol2(s64 nb)
3099 * at a time. 3092 * at a time.
3100 * 3093 *
3101 * PARAMETERS: 3094 * PARAMETERS:
3102 * ip - pointer to in-core inode; 3095 * ip - pointer to in-core inode;
3103 * blkno - starting block number to be freed. 3096 * blkno - starting block number to be freed.
3104 * nblocks - number of blocks to be freed. 3097 * nblocks - number of blocks to be freed.
3105 * 3098 *
3106 * RETURN VALUES: 3099 * RETURN VALUES:
3107 * 0 - success 3100 * 0 - success
3108 * -EIO - i/o error 3101 * -EIO - i/o error
3109 */ 3102 */
3110int dbAllocBottomUp(struct inode *ip, s64 blkno, s64 nblocks) 3103int dbAllocBottomUp(struct inode *ip, s64 blkno, s64 nblocks)
3111{ 3104{
@@ -3278,10 +3271,10 @@ static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno,
3278 * L2 3271 * L2
3279 * | 3272 * |
3280 * L1---------------------------------L1 3273 * L1---------------------------------L1
3281 * | | 3274 * | |
3282 * L0---------L0---------L0 L0---------L0---------L0 3275 * L0---------L0---------L0 L0---------L0---------L0
3283 * | | | | | | 3276 * | | | | | |
3284 * d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,.,dm; 3277 * d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,.,dm;
3285 * L2L1L0d0,...,dnL0d0,...,dnL0d0,...,dnL1L0d0,...,dnL0d0,...,dnL0d0,..dm 3278 * L2L1L0d0,...,dnL0d0,...,dnL0d0,...,dnL1L0d0,...,dnL0d0,...,dnL0d0,..dm
3286 * 3279 *
3287 * <---old---><----------------------------extend-----------------------> 3280 * <---old---><----------------------------extend----------------------->
@@ -3307,7 +3300,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks)
3307 (long long) blkno, (long long) nblocks, (long long) newsize); 3300 (long long) blkno, (long long) nblocks, (long long) newsize);
3308 3301
3309 /* 3302 /*
3310 * initialize bmap control page. 3303 * initialize bmap control page.
3311 * 3304 *
3312 * all the data in bmap control page should exclude 3305 * all the data in bmap control page should exclude
3313 * the mkfs hidden dmap page. 3306 * the mkfs hidden dmap page.
@@ -3330,7 +3323,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks)
3330 bmp->db_numag += ((u32) newsize % (u32) bmp->db_agsize) ? 1 : 0; 3323 bmp->db_numag += ((u32) newsize % (u32) bmp->db_agsize) ? 1 : 0;
3331 3324
3332 /* 3325 /*
3333 * reconfigure db_agfree[] 3326 * reconfigure db_agfree[]
3334 * from old AG configuration to new AG configuration; 3327 * from old AG configuration to new AG configuration;
3335 * 3328 *
3336 * coalesce contiguous k (newAGSize/oldAGSize) AGs; 3329 * coalesce contiguous k (newAGSize/oldAGSize) AGs;
@@ -3362,7 +3355,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks)
3362 bmp->db_maxag = bmp->db_maxag / k; 3355 bmp->db_maxag = bmp->db_maxag / k;
3363 3356
3364 /* 3357 /*
3365 * extend bmap 3358 * extend bmap
3366 * 3359 *
3367 * update bit maps and corresponding level control pages; 3360 * update bit maps and corresponding level control pages;
3368 * global control page db_nfree, db_agfree[agno], db_maxfreebud; 3361 * global control page db_nfree, db_agfree[agno], db_maxfreebud;
@@ -3410,7 +3403,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks)
3410 /* compute start L0 */ 3403 /* compute start L0 */
3411 j = 0; 3404 j = 0;
3412 l1leaf = l1dcp->stree + CTLLEAFIND; 3405 l1leaf = l1dcp->stree + CTLLEAFIND;
3413 p += nbperpage; /* 1st L0 of L1.k */ 3406 p += nbperpage; /* 1st L0 of L1.k */
3414 } 3407 }
3415 3408
3416 /* 3409 /*
@@ -3548,7 +3541,7 @@ errout:
3548 return -EIO; 3541 return -EIO;
3549 3542
3550 /* 3543 /*
3551 * finalize bmap control page 3544 * finalize bmap control page
3552 */ 3545 */
3553finalize: 3546finalize:
3554 3547
@@ -3567,7 +3560,7 @@ void dbFinalizeBmap(struct inode *ipbmap)
3567 int i, n; 3560 int i, n;
3568 3561
3569 /* 3562 /*
3570 * finalize bmap control page 3563 * finalize bmap control page
3571 */ 3564 */
3572//finalize: 3565//finalize:
3573 /* 3566 /*
@@ -3953,8 +3946,8 @@ static int dbGetL2AGSize(s64 nblocks)
3953 * convert number of map pages to the zero origin top dmapctl level 3946 * convert number of map pages to the zero origin top dmapctl level
3954 */ 3947 */
3955#define BMAPPGTOLEV(npages) \ 3948#define BMAPPGTOLEV(npages) \
3956 (((npages) <= 3 + MAXL0PAGES) ? 0 \ 3949 (((npages) <= 3 + MAXL0PAGES) ? 0 : \
3957 : ((npages) <= 2 + MAXL1PAGES) ? 1 : 2) 3950 ((npages) <= 2 + MAXL1PAGES) ? 1 : 2)
3958 3951
3959s64 dbMapFileSizeToMapSize(struct inode * ipbmap) 3952s64 dbMapFileSizeToMapSize(struct inode * ipbmap)
3960{ 3953{
@@ -3981,8 +3974,8 @@ s64 dbMapFileSizeToMapSize(struct inode * ipbmap)
3981 factor = 3974 factor =
3982 (i == 2) ? MAXL1PAGES : ((i == 1) ? MAXL0PAGES : 1); 3975 (i == 2) ? MAXL1PAGES : ((i == 1) ? MAXL0PAGES : 1);
3983 complete = (u32) npages / factor; 3976 complete = (u32) npages / factor;
3984 ndmaps += complete * ((i == 2) ? LPERCTL * LPERCTL 3977 ndmaps += complete * ((i == 2) ? LPERCTL * LPERCTL :
3985 : ((i == 1) ? LPERCTL : 1)); 3978 ((i == 1) ? LPERCTL : 1));
3986 3979
3987 /* pages in last/incomplete child */ 3980 /* pages in last/incomplete child */
3988 npages = (u32) npages % factor; 3981 npages = (u32) npages % factor;
diff --git a/fs/jfs/jfs_dmap.h b/fs/jfs/jfs_dmap.h
index 45ea454c74bd..11e6d471b364 100644
--- a/fs/jfs/jfs_dmap.h
+++ b/fs/jfs/jfs_dmap.h
@@ -83,7 +83,7 @@ static __inline signed char TREEMAX(signed char *cp)
83 * - 1 is added to account for the control page of the map. 83 * - 1 is added to account for the control page of the map.
84 */ 84 */
85#define BLKTODMAP(b,s) \ 85#define BLKTODMAP(b,s) \
86 ((((b) >> 13) + ((b) >> 23) + ((b) >> 33) + 3 + 1) << (s)) 86 ((((b) >> 13) + ((b) >> 23) + ((b) >> 33) + 3 + 1) << (s))
87 87
88/* 88/*
89 * convert disk block number to the logical block number of the LEVEL 0 89 * convert disk block number to the logical block number of the LEVEL 0
@@ -98,7 +98,7 @@ static __inline signed char TREEMAX(signed char *cp)
98 * - 1 is added to account for the control page of the map. 98 * - 1 is added to account for the control page of the map.
99 */ 99 */
100#define BLKTOL0(b,s) \ 100#define BLKTOL0(b,s) \
101 (((((b) >> 23) << 10) + ((b) >> 23) + ((b) >> 33) + 2 + 1) << (s)) 101 (((((b) >> 23) << 10) + ((b) >> 23) + ((b) >> 33) + 2 + 1) << (s))
102 102
103/* 103/*
104 * convert disk block number to the logical block number of the LEVEL 1 104 * convert disk block number to the logical block number of the LEVEL 1
@@ -120,7 +120,7 @@ static __inline signed char TREEMAX(signed char *cp)
120 * at the specified level which describes the disk block. 120 * at the specified level which describes the disk block.
121 */ 121 */
122#define BLKTOCTL(b,s,l) \ 122#define BLKTOCTL(b,s,l) \
123 (((l) == 2) ? 1 : ((l) == 1) ? BLKTOL1((b),(s)) : BLKTOL0((b),(s))) 123 (((l) == 2) ? 1 : ((l) == 1) ? BLKTOL1((b),(s)) : BLKTOL0((b),(s)))
124 124
125/* 125/*
126 * convert aggregate map size to the zero origin dmapctl level of the 126 * convert aggregate map size to the zero origin dmapctl level of the
@@ -145,27 +145,27 @@ static __inline signed char TREEMAX(signed char *cp)
145 * dmaptree must be consistent with dmapctl. 145 * dmaptree must be consistent with dmapctl.
146 */ 146 */
147struct dmaptree { 147struct dmaptree {
148 __le32 nleafs; /* 4: number of tree leafs */ 148 __le32 nleafs; /* 4: number of tree leafs */
149 __le32 l2nleafs; /* 4: l2 number of tree leafs */ 149 __le32 l2nleafs; /* 4: l2 number of tree leafs */
150 __le32 leafidx; /* 4: index of first tree leaf */ 150 __le32 leafidx; /* 4: index of first tree leaf */
151 __le32 height; /* 4: height of the tree */ 151 __le32 height; /* 4: height of the tree */
152 s8 budmin; /* 1: min l2 tree leaf value to combine */ 152 s8 budmin; /* 1: min l2 tree leaf value to combine */
153 s8 stree[TREESIZE]; /* TREESIZE: tree */ 153 s8 stree[TREESIZE]; /* TREESIZE: tree */
154 u8 pad[2]; /* 2: pad to word boundary */ 154 u8 pad[2]; /* 2: pad to word boundary */
155}; /* - 360 - */ 155}; /* - 360 - */
156 156
157/* 157/*
158 * dmap page per 8K blocks bitmap 158 * dmap page per 8K blocks bitmap
159 */ 159 */
160struct dmap { 160struct dmap {
161 __le32 nblocks; /* 4: num blks covered by this dmap */ 161 __le32 nblocks; /* 4: num blks covered by this dmap */
162 __le32 nfree; /* 4: num of free blks in this dmap */ 162 __le32 nfree; /* 4: num of free blks in this dmap */
163 __le64 start; /* 8: starting blkno for this dmap */ 163 __le64 start; /* 8: starting blkno for this dmap */
164 struct dmaptree tree; /* 360: dmap tree */ 164 struct dmaptree tree; /* 360: dmap tree */
165 u8 pad[1672]; /* 1672: pad to 2048 bytes */ 165 u8 pad[1672]; /* 1672: pad to 2048 bytes */
166 __le32 wmap[LPERDMAP]; /* 1024: bits of the working map */ 166 __le32 wmap[LPERDMAP]; /* 1024: bits of the working map */
167 __le32 pmap[LPERDMAP]; /* 1024: bits of the persistent map */ 167 __le32 pmap[LPERDMAP]; /* 1024: bits of the persistent map */
168}; /* - 4096 - */ 168}; /* - 4096 - */
169 169
170/* 170/*
171 * disk map control page per level. 171 * disk map control page per level.
@@ -173,14 +173,14 @@ struct dmap {
173 * dmapctl must be consistent with dmaptree. 173 * dmapctl must be consistent with dmaptree.
174 */ 174 */
175struct dmapctl { 175struct dmapctl {
176 __le32 nleafs; /* 4: number of tree leafs */ 176 __le32 nleafs; /* 4: number of tree leafs */
177 __le32 l2nleafs; /* 4: l2 number of tree leafs */ 177 __le32 l2nleafs; /* 4: l2 number of tree leafs */
178 __le32 leafidx; /* 4: index of the first tree leaf */ 178 __le32 leafidx; /* 4: index of the first tree leaf */
179 __le32 height; /* 4: height of tree */ 179 __le32 height; /* 4: height of tree */
180 s8 budmin; /* 1: minimum l2 tree leaf value */ 180 s8 budmin; /* 1: minimum l2 tree leaf value */
181 s8 stree[CTLTREESIZE]; /* CTLTREESIZE: dmapctl tree */ 181 s8 stree[CTLTREESIZE]; /* CTLTREESIZE: dmapctl tree */
182 u8 pad[2714]; /* 2714: pad to 4096 */ 182 u8 pad[2714]; /* 2714: pad to 4096 */
183}; /* - 4096 - */ 183}; /* - 4096 - */
184 184
185/* 185/*
186 * common definition for dmaptree within dmap and dmapctl 186 * common definition for dmaptree within dmap and dmapctl
@@ -202,41 +202,41 @@ typedef union dmtree {
202 * on-disk aggregate disk allocation map descriptor. 202 * on-disk aggregate disk allocation map descriptor.
203 */ 203 */
204struct dbmap_disk { 204struct dbmap_disk {
205 __le64 dn_mapsize; /* 8: number of blocks in aggregate */ 205 __le64 dn_mapsize; /* 8: number of blocks in aggregate */
206 __le64 dn_nfree; /* 8: num free blks in aggregate map */ 206 __le64 dn_nfree; /* 8: num free blks in aggregate map */
207 __le32 dn_l2nbperpage; /* 4: number of blks per page */ 207 __le32 dn_l2nbperpage; /* 4: number of blks per page */
208 __le32 dn_numag; /* 4: total number of ags */ 208 __le32 dn_numag; /* 4: total number of ags */
209 __le32 dn_maxlevel; /* 4: number of active ags */ 209 __le32 dn_maxlevel; /* 4: number of active ags */
210 __le32 dn_maxag; /* 4: max active alloc group number */ 210 __le32 dn_maxag; /* 4: max active alloc group number */
211 __le32 dn_agpref; /* 4: preferred alloc group (hint) */ 211 __le32 dn_agpref; /* 4: preferred alloc group (hint) */
212 __le32 dn_aglevel; /* 4: dmapctl level holding the AG */ 212 __le32 dn_aglevel; /* 4: dmapctl level holding the AG */
213 __le32 dn_agheigth; /* 4: height in dmapctl of the AG */ 213 __le32 dn_agheigth; /* 4: height in dmapctl of the AG */
214 __le32 dn_agwidth; /* 4: width in dmapctl of the AG */ 214 __le32 dn_agwidth; /* 4: width in dmapctl of the AG */
215 __le32 dn_agstart; /* 4: start tree index at AG height */ 215 __le32 dn_agstart; /* 4: start tree index at AG height */
216 __le32 dn_agl2size; /* 4: l2 num of blks per alloc group */ 216 __le32 dn_agl2size; /* 4: l2 num of blks per alloc group */
217 __le64 dn_agfree[MAXAG];/* 8*MAXAG: per AG free count */ 217 __le64 dn_agfree[MAXAG];/* 8*MAXAG: per AG free count */
218 __le64 dn_agsize; /* 8: num of blks per alloc group */ 218 __le64 dn_agsize; /* 8: num of blks per alloc group */
219 s8 dn_maxfreebud; /* 1: max free buddy system */ 219 s8 dn_maxfreebud; /* 1: max free buddy system */
220 u8 pad[3007]; /* 3007: pad to 4096 */ 220 u8 pad[3007]; /* 3007: pad to 4096 */
221}; /* - 4096 - */ 221}; /* - 4096 - */
222 222
223struct dbmap { 223struct dbmap {
224 s64 dn_mapsize; /* number of blocks in aggregate */ 224 s64 dn_mapsize; /* number of blocks in aggregate */
225 s64 dn_nfree; /* num free blks in aggregate map */ 225 s64 dn_nfree; /* num free blks in aggregate map */
226 int dn_l2nbperpage; /* number of blks per page */ 226 int dn_l2nbperpage; /* number of blks per page */
227 int dn_numag; /* total number of ags */ 227 int dn_numag; /* total number of ags */
228 int dn_maxlevel; /* number of active ags */ 228 int dn_maxlevel; /* number of active ags */
229 int dn_maxag; /* max active alloc group number */ 229 int dn_maxag; /* max active alloc group number */
230 int dn_agpref; /* preferred alloc group (hint) */ 230 int dn_agpref; /* preferred alloc group (hint) */
231 int dn_aglevel; /* dmapctl level holding the AG */ 231 int dn_aglevel; /* dmapctl level holding the AG */
232 int dn_agheigth; /* height in dmapctl of the AG */ 232 int dn_agheigth; /* height in dmapctl of the AG */
233 int dn_agwidth; /* width in dmapctl of the AG */ 233 int dn_agwidth; /* width in dmapctl of the AG */
234 int dn_agstart; /* start tree index at AG height */ 234 int dn_agstart; /* start tree index at AG height */
235 int dn_agl2size; /* l2 num of blks per alloc group */ 235 int dn_agl2size; /* l2 num of blks per alloc group */
236 s64 dn_agfree[MAXAG]; /* per AG free count */ 236 s64 dn_agfree[MAXAG]; /* per AG free count */
237 s64 dn_agsize; /* num of blks per alloc group */ 237 s64 dn_agsize; /* num of blks per alloc group */
238 signed char dn_maxfreebud; /* max free buddy system */ 238 signed char dn_maxfreebud; /* max free buddy system */
239}; /* - 4096 - */ 239}; /* - 4096 - */
240/* 240/*
241 * in-memory aggregate disk allocation map descriptor. 241 * in-memory aggregate disk allocation map descriptor.
242 */ 242 */
diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c
index 6d62f3222892..c14ba3cfa818 100644
--- a/fs/jfs/jfs_dtree.c
+++ b/fs/jfs/jfs_dtree.c
@@ -315,8 +315,8 @@ static inline void lock_index(tid_t tid, struct inode *ip, struct metapage * mp,
315 lv = &llck->lv[llck->index]; 315 lv = &llck->lv[llck->index];
316 316
317 /* 317 /*
318 * Linelock slot size is twice the size of directory table 318 * Linelock slot size is twice the size of directory table
319 * slot size. 512 entries per page. 319 * slot size. 512 entries per page.
320 */ 320 */
321 lv->offset = ((index - 2) & 511) >> 1; 321 lv->offset = ((index - 2) & 511) >> 1;
322 lv->length = 1; 322 lv->length = 1;
@@ -615,7 +615,7 @@ int dtSearch(struct inode *ip, struct component_name * key, ino_t * data,
615 btstack->nsplit = 1; 615 btstack->nsplit = 1;
616 616
617 /* 617 /*
618 * search down tree from root: 618 * search down tree from root:
619 * 619 *
620 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of 620 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
621 * internal page, child page Pi contains entry with k, Ki <= K < Kj. 621 * internal page, child page Pi contains entry with k, Ki <= K < Kj.
@@ -659,7 +659,7 @@ int dtSearch(struct inode *ip, struct component_name * key, ino_t * data,
659 } 659 }
660 if (cmp == 0) { 660 if (cmp == 0) {
661 /* 661 /*
662 * search hit 662 * search hit
663 */ 663 */
664 /* search hit - leaf page: 664 /* search hit - leaf page:
665 * return the entry found 665 * return the entry found
@@ -723,7 +723,7 @@ int dtSearch(struct inode *ip, struct component_name * key, ino_t * data,
723 } 723 }
724 724
725 /* 725 /*
726 * search miss 726 * search miss
727 * 727 *
728 * base is the smallest index with key (Kj) greater than 728 * base is the smallest index with key (Kj) greater than
729 * search key (K) and may be zero or (maxindex + 1) index. 729 * search key (K) and may be zero or (maxindex + 1) index.
@@ -834,7 +834,7 @@ int dtInsert(tid_t tid, struct inode *ip,
834 struct lv *lv; 834 struct lv *lv;
835 835
836 /* 836 /*
837 * retrieve search result 837 * retrieve search result
838 * 838 *
839 * dtSearch() returns (leaf page pinned, index at which to insert). 839 * dtSearch() returns (leaf page pinned, index at which to insert).
840 * n.b. dtSearch() may return index of (maxindex + 1) of 840 * n.b. dtSearch() may return index of (maxindex + 1) of
@@ -843,7 +843,7 @@ int dtInsert(tid_t tid, struct inode *ip,
843 DT_GETSEARCH(ip, btstack->top, bn, mp, p, index); 843 DT_GETSEARCH(ip, btstack->top, bn, mp, p, index);
844 844
845 /* 845 /*
846 * insert entry for new key 846 * insert entry for new key
847 */ 847 */
848 if (DO_INDEX(ip)) { 848 if (DO_INDEX(ip)) {
849 if (JFS_IP(ip)->next_index == DIREND) { 849 if (JFS_IP(ip)->next_index == DIREND) {
@@ -860,9 +860,9 @@ int dtInsert(tid_t tid, struct inode *ip,
860 data.leaf.ino = *fsn; 860 data.leaf.ino = *fsn;
861 861
862 /* 862 /*
863 * leaf page does not have enough room for new entry: 863 * leaf page does not have enough room for new entry:
864 * 864 *
865 * extend/split the leaf page; 865 * extend/split the leaf page;
866 * 866 *
867 * dtSplitUp() will insert the entry and unpin the leaf page. 867 * dtSplitUp() will insert the entry and unpin the leaf page.
868 */ 868 */
@@ -877,9 +877,9 @@ int dtInsert(tid_t tid, struct inode *ip,
877 } 877 }
878 878
879 /* 879 /*
880 * leaf page does have enough room for new entry: 880 * leaf page does have enough room for new entry:
881 * 881 *
882 * insert the new data entry into the leaf page; 882 * insert the new data entry into the leaf page;
883 */ 883 */
884 BT_MARK_DIRTY(mp, ip); 884 BT_MARK_DIRTY(mp, ip);
885 /* 885 /*
@@ -967,13 +967,13 @@ static int dtSplitUp(tid_t tid,
967 } 967 }
968 968
969 /* 969 /*
970 * split leaf page 970 * split leaf page
971 * 971 *
972 * The split routines insert the new entry, and 972 * The split routines insert the new entry, and
973 * acquire txLock as appropriate. 973 * acquire txLock as appropriate.
974 */ 974 */
975 /* 975 /*
976 * split root leaf page: 976 * split root leaf page:
977 */ 977 */
978 if (sp->header.flag & BT_ROOT) { 978 if (sp->header.flag & BT_ROOT) {
979 /* 979 /*
@@ -1012,7 +1012,7 @@ static int dtSplitUp(tid_t tid,
1012 } 1012 }
1013 1013
1014 /* 1014 /*
1015 * extend first leaf page 1015 * extend first leaf page
1016 * 1016 *
1017 * extend the 1st extent if less than buffer page size 1017 * extend the 1st extent if less than buffer page size
1018 * (dtExtendPage() reurns leaf page unpinned) 1018 * (dtExtendPage() reurns leaf page unpinned)
@@ -1068,7 +1068,7 @@ static int dtSplitUp(tid_t tid,
1068 } 1068 }
1069 1069
1070 /* 1070 /*
1071 * split leaf page <sp> into <sp> and a new right page <rp>. 1071 * split leaf page <sp> into <sp> and a new right page <rp>.
1072 * 1072 *
1073 * return <rp> pinned and its extent descriptor <rpxd> 1073 * return <rp> pinned and its extent descriptor <rpxd>
1074 */ 1074 */
@@ -1433,7 +1433,7 @@ static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split,
1433 rp->header.freecnt = rp->header.maxslot - fsi; 1433 rp->header.freecnt = rp->header.maxslot - fsi;
1434 1434
1435 /* 1435 /*
1436 * sequential append at tail: append without split 1436 * sequential append at tail: append without split
1437 * 1437 *
1438 * If splitting the last page on a level because of appending 1438 * If splitting the last page on a level because of appending
1439 * a entry to it (skip is maxentry), it's likely that the access is 1439 * a entry to it (skip is maxentry), it's likely that the access is
@@ -1467,7 +1467,7 @@ static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split,
1467 } 1467 }
1468 1468
1469 /* 1469 /*
1470 * non-sequential insert (at possibly middle page) 1470 * non-sequential insert (at possibly middle page)
1471 */ 1471 */
1472 1472
1473 /* 1473 /*
@@ -1508,7 +1508,7 @@ static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split,
1508 left = 0; 1508 left = 0;
1509 1509
1510 /* 1510 /*
1511 * compute fill factor for split pages 1511 * compute fill factor for split pages
1512 * 1512 *
1513 * <nxt> traces the next entry to move to rp 1513 * <nxt> traces the next entry to move to rp
1514 * <off> traces the next entry to stay in sp 1514 * <off> traces the next entry to stay in sp
@@ -1551,7 +1551,7 @@ static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split,
1551 /* <nxt> poins to the 1st entry to move */ 1551 /* <nxt> poins to the 1st entry to move */
1552 1552
1553 /* 1553 /*
1554 * move entries to right page 1554 * move entries to right page
1555 * 1555 *
1556 * dtMoveEntry() initializes rp and reserves entry for insertion 1556 * dtMoveEntry() initializes rp and reserves entry for insertion
1557 * 1557 *
@@ -1677,7 +1677,7 @@ static int dtExtendPage(tid_t tid,
1677 return (rc); 1677 return (rc);
1678 1678
1679 /* 1679 /*
1680 * extend the extent 1680 * extend the extent
1681 */ 1681 */
1682 pxdlist = split->pxdlist; 1682 pxdlist = split->pxdlist;
1683 pxd = &pxdlist->pxd[pxdlist->npxd]; 1683 pxd = &pxdlist->pxd[pxdlist->npxd];
@@ -1722,7 +1722,7 @@ static int dtExtendPage(tid_t tid,
1722 } 1722 }
1723 1723
1724 /* 1724 /*
1725 * extend the page 1725 * extend the page
1726 */ 1726 */
1727 sp->header.self = *pxd; 1727 sp->header.self = *pxd;
1728 1728
@@ -1739,9 +1739,6 @@ static int dtExtendPage(tid_t tid,
1739 /* update buffer extent descriptor of extended page */ 1739 /* update buffer extent descriptor of extended page */
1740 xlen = lengthPXD(pxd); 1740 xlen = lengthPXD(pxd);
1741 xsize = xlen << JFS_SBI(sb)->l2bsize; 1741 xsize = xlen << JFS_SBI(sb)->l2bsize;
1742#ifdef _STILL_TO_PORT
1743 bmSetXD(smp, xaddr, xsize);
1744#endif /* _STILL_TO_PORT */
1745 1742
1746 /* 1743 /*
1747 * copy old stbl to new stbl at start of extended area 1744 * copy old stbl to new stbl at start of extended area
@@ -1836,7 +1833,7 @@ static int dtExtendPage(tid_t tid,
1836 } 1833 }
1837 1834
1838 /* 1835 /*
1839 * update parent entry on the parent/root page 1836 * update parent entry on the parent/root page
1840 */ 1837 */
1841 /* 1838 /*
1842 * acquire a transaction lock on the parent/root page 1839 * acquire a transaction lock on the parent/root page
@@ -1904,7 +1901,7 @@ static int dtSplitRoot(tid_t tid,
1904 sp = &JFS_IP(ip)->i_dtroot; 1901 sp = &JFS_IP(ip)->i_dtroot;
1905 1902
1906 /* 1903 /*
1907 * allocate/initialize a single (right) child page 1904 * allocate/initialize a single (right) child page
1908 * 1905 *
1909 * N.B. at first split, a one (or two) block to fit new entry 1906 * N.B. at first split, a one (or two) block to fit new entry
1910 * is allocated; at subsequent split, a full page is allocated; 1907 * is allocated; at subsequent split, a full page is allocated;
@@ -1943,7 +1940,7 @@ static int dtSplitRoot(tid_t tid,
1943 rp->header.prev = 0; 1940 rp->header.prev = 0;
1944 1941
1945 /* 1942 /*
1946 * move in-line root page into new right page extent 1943 * move in-line root page into new right page extent
1947 */ 1944 */
1948 /* linelock header + copied entries + new stbl (1st slot) in new page */ 1945 /* linelock header + copied entries + new stbl (1st slot) in new page */
1949 ASSERT(dtlck->index == 0); 1946 ASSERT(dtlck->index == 0);
@@ -2016,7 +2013,7 @@ static int dtSplitRoot(tid_t tid,
2016 dtInsertEntry(rp, split->index, split->key, split->data, &dtlck); 2013 dtInsertEntry(rp, split->index, split->key, split->data, &dtlck);
2017 2014
2018 /* 2015 /*
2019 * reset parent/root page 2016 * reset parent/root page
2020 * 2017 *
2021 * set the 1st entry offset to 0, which force the left-most key 2018 * set the 1st entry offset to 0, which force the left-most key
2022 * at any level of the tree to be less than any search key. 2019 * at any level of the tree to be less than any search key.
@@ -2102,7 +2099,7 @@ int dtDelete(tid_t tid,
2102 dtpage_t *np; 2099 dtpage_t *np;
2103 2100
2104 /* 2101 /*
2105 * search for the entry to delete: 2102 * search for the entry to delete:
2106 * 2103 *
2107 * dtSearch() returns (leaf page pinned, index at which to delete). 2104 * dtSearch() returns (leaf page pinned, index at which to delete).
2108 */ 2105 */
@@ -2253,7 +2250,7 @@ static int dtDeleteUp(tid_t tid, struct inode *ip,
2253 int i; 2250 int i;
2254 2251
2255 /* 2252 /*
2256 * keep the root leaf page which has become empty 2253 * keep the root leaf page which has become empty
2257 */ 2254 */
2258 if (BT_IS_ROOT(fmp)) { 2255 if (BT_IS_ROOT(fmp)) {
2259 /* 2256 /*
@@ -2269,7 +2266,7 @@ static int dtDeleteUp(tid_t tid, struct inode *ip,
2269 } 2266 }
2270 2267
2271 /* 2268 /*
2272 * free the non-root leaf page 2269 * free the non-root leaf page
2273 */ 2270 */
2274 /* 2271 /*
2275 * acquire a transaction lock on the page 2272 * acquire a transaction lock on the page
@@ -2299,7 +2296,7 @@ static int dtDeleteUp(tid_t tid, struct inode *ip,
2299 discard_metapage(fmp); 2296 discard_metapage(fmp);
2300 2297
2301 /* 2298 /*
2302 * propagate page deletion up the directory tree 2299 * propagate page deletion up the directory tree
2303 * 2300 *
2304 * If the delete from the parent page makes it empty, 2301 * If the delete from the parent page makes it empty,
2305 * continue all the way up the tree. 2302 * continue all the way up the tree.
@@ -2440,10 +2437,10 @@ static int dtDeleteUp(tid_t tid, struct inode *ip,
2440 2437
2441#ifdef _NOTYET 2438#ifdef _NOTYET
2442/* 2439/*
2443 * NAME: dtRelocate() 2440 * NAME: dtRelocate()
2444 * 2441 *
2445 * FUNCTION: relocate dtpage (internal or leaf) of directory; 2442 * FUNCTION: relocate dtpage (internal or leaf) of directory;
2446 * This function is mainly used by defragfs utility. 2443 * This function is mainly used by defragfs utility.
2447 */ 2444 */
2448int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd, 2445int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd,
2449 s64 nxaddr) 2446 s64 nxaddr)
@@ -2471,8 +2468,8 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd,
2471 xlen); 2468 xlen);
2472 2469
2473 /* 2470 /*
2474 * 1. get the internal parent dtpage covering 2471 * 1. get the internal parent dtpage covering
2475 * router entry for the tartget page to be relocated; 2472 * router entry for the tartget page to be relocated;
2476 */ 2473 */
2477 rc = dtSearchNode(ip, lmxaddr, opxd, &btstack); 2474 rc = dtSearchNode(ip, lmxaddr, opxd, &btstack);
2478 if (rc) 2475 if (rc)
@@ -2483,7 +2480,7 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd,
2483 jfs_info("dtRelocate: parent router entry validated."); 2480 jfs_info("dtRelocate: parent router entry validated.");
2484 2481
2485 /* 2482 /*
2486 * 2. relocate the target dtpage 2483 * 2. relocate the target dtpage
2487 */ 2484 */
2488 /* read in the target page from src extent */ 2485 /* read in the target page from src extent */
2489 DT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc); 2486 DT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc);
@@ -2581,9 +2578,7 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd,
2581 2578
2582 /* update the buffer extent descriptor of the dtpage */ 2579 /* update the buffer extent descriptor of the dtpage */
2583 xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize; 2580 xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize;
2584#ifdef _STILL_TO_PORT 2581
2585 bmSetXD(mp, nxaddr, xsize);
2586#endif /* _STILL_TO_PORT */
2587 /* unpin the relocated page */ 2582 /* unpin the relocated page */
2588 DT_PUTPAGE(mp); 2583 DT_PUTPAGE(mp);
2589 jfs_info("dtRelocate: target dtpage relocated."); 2584 jfs_info("dtRelocate: target dtpage relocated.");
@@ -2594,7 +2589,7 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd,
2594 */ 2589 */
2595 2590
2596 /* 2591 /*
2597 * 3. acquire maplock for the source extent to be freed; 2592 * 3. acquire maplock for the source extent to be freed;
2598 */ 2593 */
2599 /* for dtpage relocation, write a LOG_NOREDOPAGE record 2594 /* for dtpage relocation, write a LOG_NOREDOPAGE record
2600 * for the source dtpage (logredo() will init NoRedoPage 2595 * for the source dtpage (logredo() will init NoRedoPage
@@ -2609,7 +2604,7 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd,
2609 pxdlock->index = 1; 2604 pxdlock->index = 1;
2610 2605
2611 /* 2606 /*
2612 * 4. update the parent router entry for relocation; 2607 * 4. update the parent router entry for relocation;
2613 * 2608 *
2614 * acquire tlck for the parent entry covering the target dtpage; 2609 * acquire tlck for the parent entry covering the target dtpage;
2615 * write LOG_REDOPAGE to apply after image only; 2610 * write LOG_REDOPAGE to apply after image only;
@@ -2637,7 +2632,7 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd,
2637 * NAME: dtSearchNode() 2632 * NAME: dtSearchNode()
2638 * 2633 *
2639 * FUNCTION: Search for an dtpage containing a specified address 2634 * FUNCTION: Search for an dtpage containing a specified address
2640 * This function is mainly used by defragfs utility. 2635 * This function is mainly used by defragfs utility.
2641 * 2636 *
2642 * NOTE: Search result on stack, the found page is pinned at exit. 2637 * NOTE: Search result on stack, the found page is pinned at exit.
2643 * The result page must be an internal dtpage. 2638 * The result page must be an internal dtpage.
@@ -2660,7 +2655,7 @@ static int dtSearchNode(struct inode *ip, s64 lmxaddr, pxd_t * kpxd,
2660 BT_CLR(btstack); /* reset stack */ 2655 BT_CLR(btstack); /* reset stack */
2661 2656
2662 /* 2657 /*
2663 * descend tree to the level with specified leftmost page 2658 * descend tree to the level with specified leftmost page
2664 * 2659 *
2665 * by convention, root bn = 0. 2660 * by convention, root bn = 0.
2666 */ 2661 */
@@ -2699,7 +2694,7 @@ static int dtSearchNode(struct inode *ip, s64 lmxaddr, pxd_t * kpxd,
2699 } 2694 }
2700 2695
2701 /* 2696 /*
2702 * search each page at the current levevl 2697 * search each page at the current levevl
2703 */ 2698 */
2704 loop: 2699 loop:
2705 stbl = DT_GETSTBL(p); 2700 stbl = DT_GETSTBL(p);
@@ -3044,9 +3039,9 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
3044 if (DO_INDEX(ip)) { 3039 if (DO_INDEX(ip)) {
3045 /* 3040 /*
3046 * persistent index is stored in directory entries. 3041 * persistent index is stored in directory entries.
3047 * Special cases: 0 = . 3042 * Special cases: 0 = .
3048 * 1 = .. 3043 * 1 = ..
3049 * -1 = End of directory 3044 * -1 = End of directory
3050 */ 3045 */
3051 do_index = 1; 3046 do_index = 1;
3052 3047
@@ -3128,10 +3123,10 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
3128 /* 3123 /*
3129 * Legacy filesystem - OS/2 & Linux JFS < 0.3.6 3124 * Legacy filesystem - OS/2 & Linux JFS < 0.3.6
3130 * 3125 *
3131 * pn = index = 0: First entry "." 3126 * pn = index = 0: First entry "."
3132 * pn = 0; index = 1: Second entry ".." 3127 * pn = 0; index = 1: Second entry ".."
3133 * pn > 0: Real entries, pn=1 -> leftmost page 3128 * pn > 0: Real entries, pn=1 -> leftmost page
3134 * pn = index = -1: No more entries 3129 * pn = index = -1: No more entries
3135 */ 3130 */
3136 dtpos = filp->f_pos; 3131 dtpos = filp->f_pos;
3137 if (dtpos == 0) { 3132 if (dtpos == 0) {
@@ -3351,7 +3346,7 @@ static int dtReadFirst(struct inode *ip, struct btstack * btstack)
3351 BT_CLR(btstack); /* reset stack */ 3346 BT_CLR(btstack); /* reset stack */
3352 3347
3353 /* 3348 /*
3354 * descend leftmost path of the tree 3349 * descend leftmost path of the tree
3355 * 3350 *
3356 * by convention, root bn = 0. 3351 * by convention, root bn = 0.
3357 */ 3352 */
@@ -4531,7 +4526,7 @@ int dtModify(tid_t tid, struct inode *ip,
4531 struct ldtentry *entry; 4526 struct ldtentry *entry;
4532 4527
4533 /* 4528 /*
4534 * search for the entry to modify: 4529 * search for the entry to modify:
4535 * 4530 *
4536 * dtSearch() returns (leaf page pinned, index at which to modify). 4531 * dtSearch() returns (leaf page pinned, index at which to modify).
4537 */ 4532 */
diff --git a/fs/jfs/jfs_dtree.h b/fs/jfs/jfs_dtree.h
index af8513f78648..8561c6ecece0 100644
--- a/fs/jfs/jfs_dtree.h
+++ b/fs/jfs/jfs_dtree.h
@@ -35,7 +35,7 @@ typedef union {
35 35
36 36
37/* 37/*
38 * entry segment/slot 38 * entry segment/slot
39 * 39 *
40 * an entry consists of type dependent head/only segment/slot and 40 * an entry consists of type dependent head/only segment/slot and
41 * additional segments/slots linked vi next field; 41 * additional segments/slots linked vi next field;
diff --git a/fs/jfs/jfs_extent.c b/fs/jfs/jfs_extent.c
index a35bdca6a805..7ae1e3281de9 100644
--- a/fs/jfs/jfs_extent.c
+++ b/fs/jfs/jfs_extent.c
@@ -34,8 +34,8 @@ static int extBrealloc(struct inode *, s64, s64, s64 *, s64 *);
34#endif 34#endif
35static s64 extRoundDown(s64 nb); 35static s64 extRoundDown(s64 nb);
36 36
37#define DPD(a) (printk("(a): %d\n",(a))) 37#define DPD(a) (printk("(a): %d\n",(a)))
38#define DPC(a) (printk("(a): %c\n",(a))) 38#define DPC(a) (printk("(a): %c\n",(a)))
39#define DPL1(a) \ 39#define DPL1(a) \
40{ \ 40{ \
41 if ((a) >> 32) \ 41 if ((a) >> 32) \
@@ -51,19 +51,19 @@ static s64 extRoundDown(s64 nb);
51 printk("(a): %x\n",(a) << 32); \ 51 printk("(a): %x\n",(a) << 32); \
52} 52}
53 53
54#define DPD1(a) (printk("(a): %d ",(a))) 54#define DPD1(a) (printk("(a): %d ",(a)))
55#define DPX(a) (printk("(a): %08x\n",(a))) 55#define DPX(a) (printk("(a): %08x\n",(a)))
56#define DPX1(a) (printk("(a): %08x ",(a))) 56#define DPX1(a) (printk("(a): %08x ",(a)))
57#define DPS(a) (printk("%s\n",(a))) 57#define DPS(a) (printk("%s\n",(a)))
58#define DPE(a) (printk("\nENTERING: %s\n",(a))) 58#define DPE(a) (printk("\nENTERING: %s\n",(a)))
59#define DPE1(a) (printk("\nENTERING: %s",(a))) 59#define DPE1(a) (printk("\nENTERING: %s",(a)))
60#define DPS1(a) (printk(" %s ",(a))) 60#define DPS1(a) (printk(" %s ",(a)))
61 61
62 62
63/* 63/*
64 * NAME: extAlloc() 64 * NAME: extAlloc()
65 * 65 *
66 * FUNCTION: allocate an extent for a specified page range within a 66 * FUNCTION: allocate an extent for a specified page range within a
67 * file. 67 * file.
68 * 68 *
69 * PARAMETERS: 69 * PARAMETERS:
@@ -78,9 +78,9 @@ static s64 extRoundDown(s64 nb);
78 * should be marked as allocated but not recorded. 78 * should be marked as allocated but not recorded.
79 * 79 *
80 * RETURN VALUES: 80 * RETURN VALUES:
81 * 0 - success 81 * 0 - success
82 * -EIO - i/o error. 82 * -EIO - i/o error.
83 * -ENOSPC - insufficient disk resources. 83 * -ENOSPC - insufficient disk resources.
84 */ 84 */
85int 85int
86extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, bool abnr) 86extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, bool abnr)
@@ -192,9 +192,9 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, bool abnr)
192 192
193#ifdef _NOTYET 193#ifdef _NOTYET
194/* 194/*
195 * NAME: extRealloc() 195 * NAME: extRealloc()
196 * 196 *
197 * FUNCTION: extend the allocation of a file extent containing a 197 * FUNCTION: extend the allocation of a file extent containing a
198 * partial back last page. 198 * partial back last page.
199 * 199 *
200 * PARAMETERS: 200 * PARAMETERS:
@@ -207,9 +207,9 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, bool abnr)
207 * should be marked as allocated but not recorded. 207 * should be marked as allocated but not recorded.
208 * 208 *
209 * RETURN VALUES: 209 * RETURN VALUES:
210 * 0 - success 210 * 0 - success
211 * -EIO - i/o error. 211 * -EIO - i/o error.
212 * -ENOSPC - insufficient disk resources. 212 * -ENOSPC - insufficient disk resources.
213 */ 213 */
214int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, bool abnr) 214int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, bool abnr)
215{ 215{
@@ -345,9 +345,9 @@ exit:
345 345
346 346
347/* 347/*
348 * NAME: extHint() 348 * NAME: extHint()
349 * 349 *
350 * FUNCTION: produce an extent allocation hint for a file offset. 350 * FUNCTION: produce an extent allocation hint for a file offset.
351 * 351 *
352 * PARAMETERS: 352 * PARAMETERS:
353 * ip - the inode of the file. 353 * ip - the inode of the file.
@@ -356,8 +356,8 @@ exit:
356 * the hint. 356 * the hint.
357 * 357 *
358 * RETURN VALUES: 358 * RETURN VALUES:
359 * 0 - success 359 * 0 - success
360 * -EIO - i/o error. 360 * -EIO - i/o error.
361 */ 361 */
362int extHint(struct inode *ip, s64 offset, xad_t * xp) 362int extHint(struct inode *ip, s64 offset, xad_t * xp)
363{ 363{
@@ -387,7 +387,7 @@ int extHint(struct inode *ip, s64 offset, xad_t * xp)
387 lxdl.nlxd = 1; 387 lxdl.nlxd = 1;
388 lxdl.lxd = &lxd; 388 lxdl.lxd = &lxd;
389 LXDoffset(&lxd, prev) 389 LXDoffset(&lxd, prev)
390 LXDlength(&lxd, nbperpage); 390 LXDlength(&lxd, nbperpage);
391 391
392 xadl.maxnxad = 1; 392 xadl.maxnxad = 1;
393 xadl.nxad = 0; 393 xadl.nxad = 0;
@@ -397,11 +397,11 @@ int extHint(struct inode *ip, s64 offset, xad_t * xp)
397 if ((rc = xtLookupList(ip, &lxdl, &xadl, 0))) 397 if ((rc = xtLookupList(ip, &lxdl, &xadl, 0)))
398 return (rc); 398 return (rc);
399 399
400 /* check if not extent exists for the previous page. 400 /* check if no extent exists for the previous page.
401 * this is possible for sparse files. 401 * this is possible for sparse files.
402 */ 402 */
403 if (xadl.nxad == 0) { 403 if (xadl.nxad == 0) {
404// assert(ISSPARSE(ip)); 404// assert(ISSPARSE(ip));
405 return (0); 405 return (0);
406 } 406 }
407 407
@@ -410,28 +410,28 @@ int extHint(struct inode *ip, s64 offset, xad_t * xp)
410 */ 410 */
411 xp->flag &= XAD_NOTRECORDED; 411 xp->flag &= XAD_NOTRECORDED;
412 412
413 if(xadl.nxad != 1 || lengthXAD(xp) != nbperpage) { 413 if(xadl.nxad != 1 || lengthXAD(xp) != nbperpage) {
414 jfs_error(ip->i_sb, "extHint: corrupt xtree"); 414 jfs_error(ip->i_sb, "extHint: corrupt xtree");
415 return -EIO; 415 return -EIO;
416 } 416 }
417 417
418 return (0); 418 return (0);
419} 419}
420 420
421 421
422/* 422/*
423 * NAME: extRecord() 423 * NAME: extRecord()
424 * 424 *
425 * FUNCTION: change a page with a file from not recorded to recorded. 425 * FUNCTION: change a page with a file from not recorded to recorded.
426 * 426 *
427 * PARAMETERS: 427 * PARAMETERS:
428 * ip - inode of the file. 428 * ip - inode of the file.
429 * cp - cbuf of the file page. 429 * cp - cbuf of the file page.
430 * 430 *
431 * RETURN VALUES: 431 * RETURN VALUES:
432 * 0 - success 432 * 0 - success
433 * -EIO - i/o error. 433 * -EIO - i/o error.
434 * -ENOSPC - insufficient disk resources. 434 * -ENOSPC - insufficient disk resources.
435 */ 435 */
436int extRecord(struct inode *ip, xad_t * xp) 436int extRecord(struct inode *ip, xad_t * xp)
437{ 437{
@@ -451,9 +451,9 @@ int extRecord(struct inode *ip, xad_t * xp)
451 451
452#ifdef _NOTYET 452#ifdef _NOTYET
453/* 453/*
454 * NAME: extFill() 454 * NAME: extFill()
455 * 455 *
456 * FUNCTION: allocate disk space for a file page that represents 456 * FUNCTION: allocate disk space for a file page that represents
457 * a file hole. 457 * a file hole.
458 * 458 *
459 * PARAMETERS: 459 * PARAMETERS:
@@ -461,16 +461,16 @@ int extRecord(struct inode *ip, xad_t * xp)
461 * cp - cbuf of the file page represent the hole. 461 * cp - cbuf of the file page represent the hole.
462 * 462 *
463 * RETURN VALUES: 463 * RETURN VALUES:
464 * 0 - success 464 * 0 - success
465 * -EIO - i/o error. 465 * -EIO - i/o error.
466 * -ENOSPC - insufficient disk resources. 466 * -ENOSPC - insufficient disk resources.
467 */ 467 */
468int extFill(struct inode *ip, xad_t * xp) 468int extFill(struct inode *ip, xad_t * xp)
469{ 469{
470 int rc, nbperpage = JFS_SBI(ip->i_sb)->nbperpage; 470 int rc, nbperpage = JFS_SBI(ip->i_sb)->nbperpage;
471 s64 blkno = offsetXAD(xp) >> ip->i_blkbits; 471 s64 blkno = offsetXAD(xp) >> ip->i_blkbits;
472 472
473// assert(ISSPARSE(ip)); 473// assert(ISSPARSE(ip));
474 474
475 /* initialize the extent allocation hint */ 475 /* initialize the extent allocation hint */
476 XADaddress(xp, 0); 476 XADaddress(xp, 0);
@@ -489,7 +489,7 @@ int extFill(struct inode *ip, xad_t * xp)
489/* 489/*
490 * NAME: extBalloc() 490 * NAME: extBalloc()
491 * 491 *
492 * FUNCTION: allocate disk blocks to form an extent. 492 * FUNCTION: allocate disk blocks to form an extent.
493 * 493 *
494 * initially, we will try to allocate disk blocks for the 494 * initially, we will try to allocate disk blocks for the
495 * requested size (nblocks). if this fails (nblocks 495 * requested size (nblocks). if this fails (nblocks
@@ -513,9 +513,9 @@ int extFill(struct inode *ip, xad_t * xp)
513 * allocated block range. 513 * allocated block range.
514 * 514 *
515 * RETURN VALUES: 515 * RETURN VALUES:
516 * 0 - success 516 * 0 - success
517 * -EIO - i/o error. 517 * -EIO - i/o error.
518 * -ENOSPC - insufficient disk resources. 518 * -ENOSPC - insufficient disk resources.
519 */ 519 */
520static int 520static int
521extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno) 521extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno)
@@ -580,7 +580,7 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno)
580/* 580/*
581 * NAME: extBrealloc() 581 * NAME: extBrealloc()
582 * 582 *
583 * FUNCTION: attempt to extend an extent's allocation. 583 * FUNCTION: attempt to extend an extent's allocation.
584 * 584 *
585 * Initially, we will try to extend the extent's allocation 585 * Initially, we will try to extend the extent's allocation
586 * in place. If this fails, we'll try to move the extent 586 * in place. If this fails, we'll try to move the extent
@@ -597,8 +597,8 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno)
597 * 597 *
598 * PARAMETERS: 598 * PARAMETERS:
599 * ip - the inode of the file. 599 * ip - the inode of the file.
600 * blkno - starting block number of the extents current allocation. 600 * blkno - starting block number of the extents current allocation.
601 * nblks - number of blocks within the extents current allocation. 601 * nblks - number of blocks within the extents current allocation.
602 * newnblks - pointer to a s64 value. on entry, this value is the 602 * newnblks - pointer to a s64 value. on entry, this value is the
603 * the new desired extent size (number of blocks). on 603 * the new desired extent size (number of blocks). on
604 * successful exit, this value is set to the extent's actual 604 * successful exit, this value is set to the extent's actual
@@ -606,9 +606,9 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno)
606 * newblkno - the starting block number of the extents new allocation. 606 * newblkno - the starting block number of the extents new allocation.
607 * 607 *
608 * RETURN VALUES: 608 * RETURN VALUES:
609 * 0 - success 609 * 0 - success
610 * -EIO - i/o error. 610 * -EIO - i/o error.
611 * -ENOSPC - insufficient disk resources. 611 * -ENOSPC - insufficient disk resources.
612 */ 612 */
613static int 613static int
614extBrealloc(struct inode *ip, 614extBrealloc(struct inode *ip,
@@ -634,16 +634,16 @@ extBrealloc(struct inode *ip,
634 634
635 635
636/* 636/*
637 * NAME: extRoundDown() 637 * NAME: extRoundDown()
638 * 638 *
639 * FUNCTION: round down a specified number of blocks to the next 639 * FUNCTION: round down a specified number of blocks to the next
640 * smallest power of 2 number. 640 * smallest power of 2 number.
641 * 641 *
642 * PARAMETERS: 642 * PARAMETERS:
643 * nb - the inode of the file. 643 * nb - the inode of the file.
644 * 644 *
645 * RETURN VALUES: 645 * RETURN VALUES:
646 * next smallest power of 2 number. 646 * next smallest power of 2 number.
647 */ 647 */
648static s64 extRoundDown(s64 nb) 648static s64 extRoundDown(s64 nb)
649{ 649{
diff --git a/fs/jfs/jfs_filsys.h b/fs/jfs/jfs_filsys.h
index 38f70ac03bec..b3f5463fbe52 100644
--- a/fs/jfs/jfs_filsys.h
+++ b/fs/jfs/jfs_filsys.h
@@ -34,9 +34,9 @@
34#define JFS_UNICODE 0x00000001 /* unicode name */ 34#define JFS_UNICODE 0x00000001 /* unicode name */
35 35
36/* mount time flags for error handling */ 36/* mount time flags for error handling */
37#define JFS_ERR_REMOUNT_RO 0x00000002 /* remount read-only */ 37#define JFS_ERR_REMOUNT_RO 0x00000002 /* remount read-only */
38#define JFS_ERR_CONTINUE 0x00000004 /* continue */ 38#define JFS_ERR_CONTINUE 0x00000004 /* continue */
39#define JFS_ERR_PANIC 0x00000008 /* panic */ 39#define JFS_ERR_PANIC 0x00000008 /* panic */
40 40
41/* Quota support */ 41/* Quota support */
42#define JFS_USRQUOTA 0x00000010 42#define JFS_USRQUOTA 0x00000010
@@ -83,7 +83,6 @@
83/* case-insensitive name/directory support */ 83/* case-insensitive name/directory support */
84 84
85#define JFS_AIX 0x80000000 /* AIX support */ 85#define JFS_AIX 0x80000000 /* AIX support */
86/* POSIX name/directory support - Never implemented*/
87 86
88/* 87/*
89 * buffer cache configuration 88 * buffer cache configuration
@@ -113,10 +112,10 @@
113#define IDATASIZE 256 /* inode inline data size */ 112#define IDATASIZE 256 /* inode inline data size */
114#define IXATTRSIZE 128 /* inode inline extended attribute size */ 113#define IXATTRSIZE 128 /* inode inline extended attribute size */
115 114
116#define XTPAGE_SIZE 4096 115#define XTPAGE_SIZE 4096
117#define log2_PAGESIZE 12 116#define log2_PAGESIZE 12
118 117
119#define IAG_SIZE 4096 118#define IAG_SIZE 4096
120#define IAG_EXTENT_SIZE 4096 119#define IAG_EXTENT_SIZE 4096
121#define INOSPERIAG 4096 /* number of disk inodes per iag */ 120#define INOSPERIAG 4096 /* number of disk inodes per iag */
122#define L2INOSPERIAG 12 /* l2 number of disk inodes per iag */ 121#define L2INOSPERIAG 12 /* l2 number of disk inodes per iag */
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index c6530227cda6..3870ba8b9086 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -93,21 +93,21 @@ static int copy_from_dinode(struct dinode *, struct inode *);
93static void copy_to_dinode(struct dinode *, struct inode *); 93static void copy_to_dinode(struct dinode *, struct inode *);
94 94
95/* 95/*
96 * NAME: diMount() 96 * NAME: diMount()
97 * 97 *
98 * FUNCTION: initialize the incore inode map control structures for 98 * FUNCTION: initialize the incore inode map control structures for
99 * a fileset or aggregate init time. 99 * a fileset or aggregate init time.
100 * 100 *
101 * the inode map's control structure (dinomap) is 101 * the inode map's control structure (dinomap) is
102 * brought in from disk and placed in virtual memory. 102 * brought in from disk and placed in virtual memory.
103 * 103 *
104 * PARAMETERS: 104 * PARAMETERS:
105 * ipimap - pointer to inode map inode for the aggregate or fileset. 105 * ipimap - pointer to inode map inode for the aggregate or fileset.
106 * 106 *
107 * RETURN VALUES: 107 * RETURN VALUES:
108 * 0 - success 108 * 0 - success
109 * -ENOMEM - insufficient free virtual memory. 109 * -ENOMEM - insufficient free virtual memory.
110 * -EIO - i/o error. 110 * -EIO - i/o error.
111 */ 111 */
112int diMount(struct inode *ipimap) 112int diMount(struct inode *ipimap)
113{ 113{
@@ -180,18 +180,18 @@ int diMount(struct inode *ipimap)
180 180
181 181
182/* 182/*
183 * NAME: diUnmount() 183 * NAME: diUnmount()
184 * 184 *
185 * FUNCTION: write to disk the incore inode map control structures for 185 * FUNCTION: write to disk the incore inode map control structures for
186 * a fileset or aggregate at unmount time. 186 * a fileset or aggregate at unmount time.
187 * 187 *
188 * PARAMETERS: 188 * PARAMETERS:
189 * ipimap - pointer to inode map inode for the aggregate or fileset. 189 * ipimap - pointer to inode map inode for the aggregate or fileset.
190 * 190 *
191 * RETURN VALUES: 191 * RETURN VALUES:
192 * 0 - success 192 * 0 - success
193 * -ENOMEM - insufficient free virtual memory. 193 * -ENOMEM - insufficient free virtual memory.
194 * -EIO - i/o error. 194 * -EIO - i/o error.
195 */ 195 */
196int diUnmount(struct inode *ipimap, int mounterror) 196int diUnmount(struct inode *ipimap, int mounterror)
197{ 197{
@@ -274,9 +274,9 @@ int diSync(struct inode *ipimap)
274 274
275 275
276/* 276/*
277 * NAME: diRead() 277 * NAME: diRead()
278 * 278 *
279 * FUNCTION: initialize an incore inode from disk. 279 * FUNCTION: initialize an incore inode from disk.
280 * 280 *
281 * on entry, the specifed incore inode should itself 281 * on entry, the specifed incore inode should itself
282 * specify the disk inode number corresponding to the 282 * specify the disk inode number corresponding to the
@@ -285,7 +285,7 @@ int diSync(struct inode *ipimap)
285 * this routine handles incore inode initialization for 285 * this routine handles incore inode initialization for
286 * both "special" and "regular" inodes. special inodes 286 * both "special" and "regular" inodes. special inodes
287 * are those required early in the mount process and 287 * are those required early in the mount process and
288 * require special handling since much of the file system 288 * require special handling since much of the file system
289 * is not yet initialized. these "special" inodes are 289 * is not yet initialized. these "special" inodes are
290 * identified by a NULL inode map inode pointer and are 290 * identified by a NULL inode map inode pointer and are
291 * actually initialized by a call to diReadSpecial(). 291 * actually initialized by a call to diReadSpecial().
@@ -298,12 +298,12 @@ int diSync(struct inode *ipimap)
298 * incore inode. 298 * incore inode.
299 * 299 *
300 * PARAMETERS: 300 * PARAMETERS:
301 * ip - pointer to incore inode to be initialized from disk. 301 * ip - pointer to incore inode to be initialized from disk.
302 * 302 *
303 * RETURN VALUES: 303 * RETURN VALUES:
304 * 0 - success 304 * 0 - success
305 * -EIO - i/o error. 305 * -EIO - i/o error.
306 * -ENOMEM - insufficient memory 306 * -ENOMEM - insufficient memory
307 * 307 *
308 */ 308 */
309int diRead(struct inode *ip) 309int diRead(struct inode *ip)
@@ -410,26 +410,26 @@ int diRead(struct inode *ip)
410 410
411 411
412/* 412/*
413 * NAME: diReadSpecial() 413 * NAME: diReadSpecial()
414 * 414 *
415 * FUNCTION: initialize a 'special' inode from disk. 415 * FUNCTION: initialize a 'special' inode from disk.
416 * 416 *
417 * this routines handles aggregate level inodes. The 417 * this routines handles aggregate level inodes. The
418 * inode cache cannot differentiate between the 418 * inode cache cannot differentiate between the
419 * aggregate inodes and the filesystem inodes, so we 419 * aggregate inodes and the filesystem inodes, so we
420 * handle these here. We don't actually use the aggregate 420 * handle these here. We don't actually use the aggregate
421 * inode map, since these inodes are at a fixed location 421 * inode map, since these inodes are at a fixed location
422 * and in some cases the aggregate inode map isn't initialized 422 * and in some cases the aggregate inode map isn't initialized
423 * yet. 423 * yet.
424 * 424 *
425 * PARAMETERS: 425 * PARAMETERS:
426 * sb - filesystem superblock 426 * sb - filesystem superblock
427 * inum - aggregate inode number 427 * inum - aggregate inode number
428 * secondary - 1 if secondary aggregate inode table 428 * secondary - 1 if secondary aggregate inode table
429 * 429 *
430 * RETURN VALUES: 430 * RETURN VALUES:
431 * new inode - success 431 * new inode - success
432 * NULL - i/o error. 432 * NULL - i/o error.
433 */ 433 */
434struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary) 434struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary)
435{ 435{
@@ -502,12 +502,12 @@ struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary)
502} 502}
503 503
504/* 504/*
505 * NAME: diWriteSpecial() 505 * NAME: diWriteSpecial()
506 * 506 *
507 * FUNCTION: Write the special inode to disk 507 * FUNCTION: Write the special inode to disk
508 * 508 *
509 * PARAMETERS: 509 * PARAMETERS:
510 * ip - special inode 510 * ip - special inode
511 * secondary - 1 if secondary aggregate inode table 511 * secondary - 1 if secondary aggregate inode table
512 * 512 *
513 * RETURN VALUES: none 513 * RETURN VALUES: none
@@ -554,9 +554,9 @@ void diWriteSpecial(struct inode *ip, int secondary)
554} 554}
555 555
556/* 556/*
557 * NAME: diFreeSpecial() 557 * NAME: diFreeSpecial()
558 * 558 *
559 * FUNCTION: Free allocated space for special inode 559 * FUNCTION: Free allocated space for special inode
560 */ 560 */
561void diFreeSpecial(struct inode *ip) 561void diFreeSpecial(struct inode *ip)
562{ 562{
@@ -572,9 +572,9 @@ void diFreeSpecial(struct inode *ip)
572 572
573 573
574/* 574/*
575 * NAME: diWrite() 575 * NAME: diWrite()
576 * 576 *
577 * FUNCTION: write the on-disk inode portion of the in-memory inode 577 * FUNCTION: write the on-disk inode portion of the in-memory inode
578 * to its corresponding on-disk inode. 578 * to its corresponding on-disk inode.
579 * 579 *
580 * on entry, the specifed incore inode should itself 580 * on entry, the specifed incore inode should itself
@@ -589,11 +589,11 @@ void diFreeSpecial(struct inode *ip)
589 * 589 *
590 * PARAMETERS: 590 * PARAMETERS:
591 * tid - transacation id 591 * tid - transacation id
592 * ip - pointer to incore inode to be written to the inode extent. 592 * ip - pointer to incore inode to be written to the inode extent.
593 * 593 *
594 * RETURN VALUES: 594 * RETURN VALUES:
595 * 0 - success 595 * 0 - success
596 * -EIO - i/o error. 596 * -EIO - i/o error.
597 */ 597 */
598int diWrite(tid_t tid, struct inode *ip) 598int diWrite(tid_t tid, struct inode *ip)
599{ 599{
@@ -730,7 +730,7 @@ int diWrite(tid_t tid, struct inode *ip)
730 ilinelock = (struct linelock *) & tlck->lock; 730 ilinelock = (struct linelock *) & tlck->lock;
731 731
732 /* 732 /*
733 * regular file: 16 byte (XAD slot) granularity 733 * regular file: 16 byte (XAD slot) granularity
734 */ 734 */
735 if (type & tlckXTREE) { 735 if (type & tlckXTREE) {
736 xtpage_t *p, *xp; 736 xtpage_t *p, *xp;
@@ -755,7 +755,7 @@ int diWrite(tid_t tid, struct inode *ip)
755 xad->flag &= ~(XAD_NEW | XAD_EXTENDED); 755 xad->flag &= ~(XAD_NEW | XAD_EXTENDED);
756 } 756 }
757 /* 757 /*
758 * directory: 32 byte (directory entry slot) granularity 758 * directory: 32 byte (directory entry slot) granularity
759 */ 759 */
760 else if (type & tlckDTREE) { 760 else if (type & tlckDTREE) {
761 dtpage_t *p, *xp; 761 dtpage_t *p, *xp;
@@ -800,9 +800,8 @@ int diWrite(tid_t tid, struct inode *ip)
800 } 800 }
801 801
802 /* 802 /*
803 * lock/copy inode base: 128 byte slot granularity 803 * lock/copy inode base: 128 byte slot granularity
804 */ 804 */
805// baseDinode:
806 lv = & dilinelock->lv[dilinelock->index]; 805 lv = & dilinelock->lv[dilinelock->index];
807 lv->offset = dioffset >> L2INODESLOTSIZE; 806 lv->offset = dioffset >> L2INODESLOTSIZE;
808 copy_to_dinode(dp, ip); 807 copy_to_dinode(dp, ip);
@@ -813,17 +812,6 @@ int diWrite(tid_t tid, struct inode *ip)
813 lv->length = 1; 812 lv->length = 1;
814 dilinelock->index++; 813 dilinelock->index++;
815 814
816#ifdef _JFS_FASTDASD
817 /*
818 * We aren't logging changes to the DASD used in directory inodes,
819 * but we need to write them to disk. If we don't unmount cleanly,
820 * mount will recalculate the DASD used.
821 */
822 if (S_ISDIR(ip->i_mode)
823 && (ip->i_ipmnt->i_mntflag & JFS_DASD_ENABLED))
824 memcpy(&dp->di_DASD, &ip->i_DASD, sizeof(struct dasd));
825#endif /* _JFS_FASTDASD */
826
827 /* release the buffer holding the updated on-disk inode. 815 /* release the buffer holding the updated on-disk inode.
828 * the buffer will be later written by commit processing. 816 * the buffer will be later written by commit processing.
829 */ 817 */
@@ -834,9 +822,9 @@ int diWrite(tid_t tid, struct inode *ip)
834 822
835 823
836/* 824/*
837 * NAME: diFree(ip) 825 * NAME: diFree(ip)
838 * 826 *
839 * FUNCTION: free a specified inode from the inode working map 827 * FUNCTION: free a specified inode from the inode working map
840 * for a fileset or aggregate. 828 * for a fileset or aggregate.
841 * 829 *
842 * if the inode to be freed represents the first (only) 830 * if the inode to be freed represents the first (only)
@@ -865,11 +853,11 @@ int diWrite(tid_t tid, struct inode *ip)
865 * any updates and are held until all updates are complete. 853 * any updates and are held until all updates are complete.
866 * 854 *
867 * PARAMETERS: 855 * PARAMETERS:
868 * ip - inode to be freed. 856 * ip - inode to be freed.
869 * 857 *
870 * RETURN VALUES: 858 * RETURN VALUES:
871 * 0 - success 859 * 0 - success
872 * -EIO - i/o error. 860 * -EIO - i/o error.
873 */ 861 */
874int diFree(struct inode *ip) 862int diFree(struct inode *ip)
875{ 863{
@@ -902,7 +890,8 @@ int diFree(struct inode *ip)
902 * the map. 890 * the map.
903 */ 891 */
904 if (iagno >= imap->im_nextiag) { 892 if (iagno >= imap->im_nextiag) {
905 dump_mem("imap", imap, 32); 893 print_hex_dump(KERN_ERR, "imap: ", DUMP_PREFIX_ADDRESS, 16, 4,
894 imap, 32, 0);
906 jfs_error(ip->i_sb, 895 jfs_error(ip->i_sb,
907 "diFree: inum = %d, iagno = %d, nextiag = %d", 896 "diFree: inum = %d, iagno = %d, nextiag = %d",
908 (uint) inum, iagno, imap->im_nextiag); 897 (uint) inum, iagno, imap->im_nextiag);
@@ -964,8 +953,8 @@ int diFree(struct inode *ip)
964 return -EIO; 953 return -EIO;
965 } 954 }
966 /* 955 /*
967 * inode extent still has some inodes or below low water mark: 956 * inode extent still has some inodes or below low water mark:
968 * keep the inode extent; 957 * keep the inode extent;
969 */ 958 */
970 if (bitmap || 959 if (bitmap ||
971 imap->im_agctl[agno].numfree < 96 || 960 imap->im_agctl[agno].numfree < 96 ||
@@ -1047,12 +1036,12 @@ int diFree(struct inode *ip)
1047 1036
1048 1037
1049 /* 1038 /*
1050 * inode extent has become free and above low water mark: 1039 * inode extent has become free and above low water mark:
1051 * free the inode extent; 1040 * free the inode extent;
1052 */ 1041 */
1053 1042
1054 /* 1043 /*
1055 * prepare to update iag list(s) (careful update step 1) 1044 * prepare to update iag list(s) (careful update step 1)
1056 */ 1045 */
1057 amp = bmp = cmp = dmp = NULL; 1046 amp = bmp = cmp = dmp = NULL;
1058 fwd = back = -1; 1047 fwd = back = -1;
@@ -1152,7 +1141,7 @@ int diFree(struct inode *ip)
1152 invalidate_pxd_metapages(ip, freepxd); 1141 invalidate_pxd_metapages(ip, freepxd);
1153 1142
1154 /* 1143 /*
1155 * update iag list(s) (careful update step 2) 1144 * update iag list(s) (careful update step 2)
1156 */ 1145 */
1157 /* add the iag to the ag extent free list if this is the 1146 /* add the iag to the ag extent free list if this is the
1158 * first free extent for the iag. 1147 * first free extent for the iag.
@@ -1338,20 +1327,20 @@ diInitInode(struct inode *ip, int iagno, int ino, int extno, struct iag * iagp)
1338 1327
1339 1328
1340/* 1329/*
1341 * NAME: diAlloc(pip,dir,ip) 1330 * NAME: diAlloc(pip,dir,ip)
1342 * 1331 *
1343 * FUNCTION: allocate a disk inode from the inode working map 1332 * FUNCTION: allocate a disk inode from the inode working map
1344 * for a fileset or aggregate. 1333 * for a fileset or aggregate.
1345 * 1334 *
1346 * PARAMETERS: 1335 * PARAMETERS:
1347 * pip - pointer to incore inode for the parent inode. 1336 * pip - pointer to incore inode for the parent inode.
1348 * dir - 'true' if the new disk inode is for a directory. 1337 * dir - 'true' if the new disk inode is for a directory.
1349 * ip - pointer to a new inode 1338 * ip - pointer to a new inode
1350 * 1339 *
1351 * RETURN VALUES: 1340 * RETURN VALUES:
1352 * 0 - success. 1341 * 0 - success.
1353 * -ENOSPC - insufficient disk resources. 1342 * -ENOSPC - insufficient disk resources.
1354 * -EIO - i/o error. 1343 * -EIO - i/o error.
1355 */ 1344 */
1356int diAlloc(struct inode *pip, bool dir, struct inode *ip) 1345int diAlloc(struct inode *pip, bool dir, struct inode *ip)
1357{ 1346{
@@ -1433,7 +1422,7 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip)
1433 addext = (imap->im_agctl[agno].numfree < 32 && iagp->nfreeexts); 1422 addext = (imap->im_agctl[agno].numfree < 32 && iagp->nfreeexts);
1434 1423
1435 /* 1424 /*
1436 * try to allocate from the IAG 1425 * try to allocate from the IAG
1437 */ 1426 */
1438 /* check if the inode may be allocated from the iag 1427 /* check if the inode may be allocated from the iag
1439 * (i.e. the inode has free inodes or new extent can be added). 1428 * (i.e. the inode has free inodes or new extent can be added).
@@ -1633,9 +1622,9 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip)
1633 1622
1634 1623
1635/* 1624/*
1636 * NAME: diAllocAG(imap,agno,dir,ip) 1625 * NAME: diAllocAG(imap,agno,dir,ip)
1637 * 1626 *
1638 * FUNCTION: allocate a disk inode from the allocation group. 1627 * FUNCTION: allocate a disk inode from the allocation group.
1639 * 1628 *
1640 * this routine first determines if a new extent of free 1629 * this routine first determines if a new extent of free
1641 * inodes should be added for the allocation group, with 1630 * inodes should be added for the allocation group, with
@@ -1649,17 +1638,17 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip)
1649 * PRE CONDITION: Already have the AG lock for this AG. 1638 * PRE CONDITION: Already have the AG lock for this AG.
1650 * 1639 *
1651 * PARAMETERS: 1640 * PARAMETERS:
1652 * imap - pointer to inode map control structure. 1641 * imap - pointer to inode map control structure.
1653 * agno - allocation group to allocate from. 1642 * agno - allocation group to allocate from.
1654 * dir - 'true' if the new disk inode is for a directory. 1643 * dir - 'true' if the new disk inode is for a directory.
1655 * ip - pointer to the new inode to be filled in on successful return 1644 * ip - pointer to the new inode to be filled in on successful return
1656 * with the disk inode number allocated, its extent address 1645 * with the disk inode number allocated, its extent address
1657 * and the start of the ag. 1646 * and the start of the ag.
1658 * 1647 *
1659 * RETURN VALUES: 1648 * RETURN VALUES:
1660 * 0 - success. 1649 * 0 - success.
1661 * -ENOSPC - insufficient disk resources. 1650 * -ENOSPC - insufficient disk resources.
1662 * -EIO - i/o error. 1651 * -EIO - i/o error.
1663 */ 1652 */
1664static int 1653static int
1665diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip) 1654diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip)
@@ -1709,9 +1698,9 @@ diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip)
1709 1698
1710 1699
1711/* 1700/*
1712 * NAME: diAllocAny(imap,agno,dir,iap) 1701 * NAME: diAllocAny(imap,agno,dir,iap)
1713 * 1702 *
1714 * FUNCTION: allocate a disk inode from any other allocation group. 1703 * FUNCTION: allocate a disk inode from any other allocation group.
1715 * 1704 *
1716 * this routine is called when an allocation attempt within 1705 * this routine is called when an allocation attempt within
1717 * the primary allocation group has failed. if attempts to 1706 * the primary allocation group has failed. if attempts to
@@ -1719,17 +1708,17 @@ diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip)
1719 * specified primary group. 1708 * specified primary group.
1720 * 1709 *
1721 * PARAMETERS: 1710 * PARAMETERS:
1722 * imap - pointer to inode map control structure. 1711 * imap - pointer to inode map control structure.
1723 * agno - primary allocation group (to avoid). 1712 * agno - primary allocation group (to avoid).
1724 * dir - 'true' if the new disk inode is for a directory. 1713 * dir - 'true' if the new disk inode is for a directory.
1725 * ip - pointer to a new inode to be filled in on successful return 1714 * ip - pointer to a new inode to be filled in on successful return
1726 * with the disk inode number allocated, its extent address 1715 * with the disk inode number allocated, its extent address
1727 * and the start of the ag. 1716 * and the start of the ag.
1728 * 1717 *
1729 * RETURN VALUES: 1718 * RETURN VALUES:
1730 * 0 - success. 1719 * 0 - success.
1731 * -ENOSPC - insufficient disk resources. 1720 * -ENOSPC - insufficient disk resources.
1732 * -EIO - i/o error. 1721 * -EIO - i/o error.
1733 */ 1722 */
1734static int 1723static int
1735diAllocAny(struct inomap * imap, int agno, bool dir, struct inode *ip) 1724diAllocAny(struct inomap * imap, int agno, bool dir, struct inode *ip)
@@ -1772,9 +1761,9 @@ diAllocAny(struct inomap * imap, int agno, bool dir, struct inode *ip)
1772 1761
1773 1762
1774/* 1763/*
1775 * NAME: diAllocIno(imap,agno,ip) 1764 * NAME: diAllocIno(imap,agno,ip)
1776 * 1765 *
1777 * FUNCTION: allocate a disk inode from the allocation group's free 1766 * FUNCTION: allocate a disk inode from the allocation group's free
1778 * inode list, returning an error if this free list is 1767 * inode list, returning an error if this free list is
1779 * empty (i.e. no iags on the list). 1768 * empty (i.e. no iags on the list).
1780 * 1769 *
@@ -1785,16 +1774,16 @@ diAllocAny(struct inomap * imap, int agno, bool dir, struct inode *ip)
1785 * PRE CONDITION: Already have AG lock for this AG. 1774 * PRE CONDITION: Already have AG lock for this AG.
1786 * 1775 *
1787 * PARAMETERS: 1776 * PARAMETERS:
1788 * imap - pointer to inode map control structure. 1777 * imap - pointer to inode map control structure.
1789 * agno - allocation group. 1778 * agno - allocation group.
1790 * ip - pointer to new inode to be filled in on successful return 1779 * ip - pointer to new inode to be filled in on successful return
1791 * with the disk inode number allocated, its extent address 1780 * with the disk inode number allocated, its extent address
1792 * and the start of the ag. 1781 * and the start of the ag.
1793 * 1782 *
1794 * RETURN VALUES: 1783 * RETURN VALUES:
1795 * 0 - success. 1784 * 0 - success.
1796 * -ENOSPC - insufficient disk resources. 1785 * -ENOSPC - insufficient disk resources.
1797 * -EIO - i/o error. 1786 * -EIO - i/o error.
1798 */ 1787 */
1799static int diAllocIno(struct inomap * imap, int agno, struct inode *ip) 1788static int diAllocIno(struct inomap * imap, int agno, struct inode *ip)
1800{ 1789{
@@ -1890,7 +1879,7 @@ static int diAllocIno(struct inomap * imap, int agno, struct inode *ip)
1890 1879
1891 1880
1892/* 1881/*
1893 * NAME: diAllocExt(imap,agno,ip) 1882 * NAME: diAllocExt(imap,agno,ip)
1894 * 1883 *
1895 * FUNCTION: add a new extent of free inodes to an iag, allocating 1884 * FUNCTION: add a new extent of free inodes to an iag, allocating
1896 * an inode from this extent to satisfy the current allocation 1885 * an inode from this extent to satisfy the current allocation
@@ -1910,16 +1899,16 @@ static int diAllocIno(struct inomap * imap, int agno, struct inode *ip)
1910 * for the purpose of satisfying this request. 1899 * for the purpose of satisfying this request.
1911 * 1900 *
1912 * PARAMETERS: 1901 * PARAMETERS:
1913 * imap - pointer to inode map control structure. 1902 * imap - pointer to inode map control structure.
1914 * agno - allocation group number. 1903 * agno - allocation group number.
1915 * ip - pointer to new inode to be filled in on successful return 1904 * ip - pointer to new inode to be filled in on successful return
1916 * with the disk inode number allocated, its extent address 1905 * with the disk inode number allocated, its extent address
1917 * and the start of the ag. 1906 * and the start of the ag.
1918 * 1907 *
1919 * RETURN VALUES: 1908 * RETURN VALUES:
1920 * 0 - success. 1909 * 0 - success.
1921 * -ENOSPC - insufficient disk resources. 1910 * -ENOSPC - insufficient disk resources.
1922 * -EIO - i/o error. 1911 * -EIO - i/o error.
1923 */ 1912 */
1924static int diAllocExt(struct inomap * imap, int agno, struct inode *ip) 1913static int diAllocExt(struct inomap * imap, int agno, struct inode *ip)
1925{ 1914{
@@ -2010,7 +1999,7 @@ static int diAllocExt(struct inomap * imap, int agno, struct inode *ip)
2010 1999
2011 2000
2012/* 2001/*
2013 * NAME: diAllocBit(imap,iagp,ino) 2002 * NAME: diAllocBit(imap,iagp,ino)
2014 * 2003 *
2015 * FUNCTION: allocate a backed inode from an iag. 2004 * FUNCTION: allocate a backed inode from an iag.
2016 * 2005 *
@@ -2030,14 +2019,14 @@ static int diAllocExt(struct inomap * imap, int agno, struct inode *ip)
2030 * this AG. Must have read lock on imap inode. 2019 * this AG. Must have read lock on imap inode.
2031 * 2020 *
2032 * PARAMETERS: 2021 * PARAMETERS:
2033 * imap - pointer to inode map control structure. 2022 * imap - pointer to inode map control structure.
2034 * iagp - pointer to iag. 2023 * iagp - pointer to iag.
2035 * ino - inode number to be allocated within the iag. 2024 * ino - inode number to be allocated within the iag.
2036 * 2025 *
2037 * RETURN VALUES: 2026 * RETURN VALUES:
2038 * 0 - success. 2027 * 0 - success.
2039 * -ENOSPC - insufficient disk resources. 2028 * -ENOSPC - insufficient disk resources.
2040 * -EIO - i/o error. 2029 * -EIO - i/o error.
2041 */ 2030 */
2042static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino) 2031static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino)
2043{ 2032{
@@ -2144,11 +2133,11 @@ static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino)
2144 2133
2145 2134
2146/* 2135/*
2147 * NAME: diNewExt(imap,iagp,extno) 2136 * NAME: diNewExt(imap,iagp,extno)
2148 * 2137 *
2149 * FUNCTION: initialize a new extent of inodes for an iag, allocating 2138 * FUNCTION: initialize a new extent of inodes for an iag, allocating
2150 * the first inode of the extent for use for the current 2139 * the first inode of the extent for use for the current
2151 * allocation request. 2140 * allocation request.
2152 * 2141 *
2153 * disk resources are allocated for the new extent of inodes 2142 * disk resources are allocated for the new extent of inodes
2154 * and the inodes themselves are initialized to reflect their 2143 * and the inodes themselves are initialized to reflect their
@@ -2177,14 +2166,14 @@ static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino)
2177 * this AG. Must have read lock on imap inode. 2166 * this AG. Must have read lock on imap inode.
2178 * 2167 *
2179 * PARAMETERS: 2168 * PARAMETERS:
2180 * imap - pointer to inode map control structure. 2169 * imap - pointer to inode map control structure.
2181 * iagp - pointer to iag. 2170 * iagp - pointer to iag.
2182 * extno - extent number. 2171 * extno - extent number.
2183 * 2172 *
2184 * RETURN VALUES: 2173 * RETURN VALUES:
2185 * 0 - success. 2174 * 0 - success.
2186 * -ENOSPC - insufficient disk resources. 2175 * -ENOSPC - insufficient disk resources.
2187 * -EIO - i/o error. 2176 * -EIO - i/o error.
2188 */ 2177 */
2189static int diNewExt(struct inomap * imap, struct iag * iagp, int extno) 2178static int diNewExt(struct inomap * imap, struct iag * iagp, int extno)
2190{ 2179{
@@ -2430,7 +2419,7 @@ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno)
2430 2419
2431 2420
2432/* 2421/*
2433 * NAME: diNewIAG(imap,iagnop,agno) 2422 * NAME: diNewIAG(imap,iagnop,agno)
2434 * 2423 *
2435 * FUNCTION: allocate a new iag for an allocation group. 2424 * FUNCTION: allocate a new iag for an allocation group.
2436 * 2425 *
@@ -2443,16 +2432,16 @@ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno)
2443 * and returned to satisfy the request. 2432 * and returned to satisfy the request.
2444 * 2433 *
2445 * PARAMETERS: 2434 * PARAMETERS:
2446 * imap - pointer to inode map control structure. 2435 * imap - pointer to inode map control structure.
2447 * iagnop - pointer to an iag number set with the number of the 2436 * iagnop - pointer to an iag number set with the number of the
2448 * newly allocated iag upon successful return. 2437 * newly allocated iag upon successful return.
2449 * agno - allocation group number. 2438 * agno - allocation group number.
2450 * bpp - Buffer pointer to be filled in with new IAG's buffer 2439 * bpp - Buffer pointer to be filled in with new IAG's buffer
2451 * 2440 *
2452 * RETURN VALUES: 2441 * RETURN VALUES:
2453 * 0 - success. 2442 * 0 - success.
2454 * -ENOSPC - insufficient disk resources. 2443 * -ENOSPC - insufficient disk resources.
2455 * -EIO - i/o error. 2444 * -EIO - i/o error.
2456 * 2445 *
2457 * serialization: 2446 * serialization:
2458 * AG lock held on entry/exit; 2447 * AG lock held on entry/exit;
@@ -2461,7 +2450,7 @@ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno)
2461 * 2450 *
2462 * note: new iag transaction: 2451 * note: new iag transaction:
2463 * . synchronously write iag; 2452 * . synchronously write iag;
2464 * . write log of xtree and inode of imap; 2453 * . write log of xtree and inode of imap;
2465 * . commit; 2454 * . commit;
2466 * . synchronous write of xtree (right to left, bottom to top); 2455 * . synchronous write of xtree (right to left, bottom to top);
2467 * . at start of logredo(): init in-memory imap with one additional iag page; 2456 * . at start of logredo(): init in-memory imap with one additional iag page;
@@ -2481,9 +2470,6 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp)
2481 s64 xaddr = 0; 2470 s64 xaddr = 0;
2482 s64 blkno; 2471 s64 blkno;
2483 tid_t tid; 2472 tid_t tid;
2484#ifdef _STILL_TO_PORT
2485 xad_t xad;
2486#endif /* _STILL_TO_PORT */
2487 struct inode *iplist[1]; 2473 struct inode *iplist[1];
2488 2474
2489 /* pick up pointers to the inode map and mount inodes */ 2475 /* pick up pointers to the inode map and mount inodes */
@@ -2674,15 +2660,15 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp)
2674} 2660}
2675 2661
2676/* 2662/*
2677 * NAME: diIAGRead() 2663 * NAME: diIAGRead()
2678 * 2664 *
2679 * FUNCTION: get the buffer for the specified iag within a fileset 2665 * FUNCTION: get the buffer for the specified iag within a fileset
2680 * or aggregate inode map. 2666 * or aggregate inode map.
2681 * 2667 *
2682 * PARAMETERS: 2668 * PARAMETERS:
2683 * imap - pointer to inode map control structure. 2669 * imap - pointer to inode map control structure.
2684 * iagno - iag number. 2670 * iagno - iag number.
2685 * bpp - point to buffer pointer to be filled in on successful 2671 * bpp - point to buffer pointer to be filled in on successful
2686 * exit. 2672 * exit.
2687 * 2673 *
2688 * SERIALIZATION: 2674 * SERIALIZATION:
@@ -2691,8 +2677,8 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp)
2691 * the read lock is unnecessary.) 2677 * the read lock is unnecessary.)
2692 * 2678 *
2693 * RETURN VALUES: 2679 * RETURN VALUES:
2694 * 0 - success. 2680 * 0 - success.
2695 * -EIO - i/o error. 2681 * -EIO - i/o error.
2696 */ 2682 */
2697static int diIAGRead(struct inomap * imap, int iagno, struct metapage ** mpp) 2683static int diIAGRead(struct inomap * imap, int iagno, struct metapage ** mpp)
2698{ 2684{
@@ -2712,17 +2698,17 @@ static int diIAGRead(struct inomap * imap, int iagno, struct metapage ** mpp)
2712} 2698}
2713 2699
2714/* 2700/*
2715 * NAME: diFindFree() 2701 * NAME: diFindFree()
2716 * 2702 *
2717 * FUNCTION: find the first free bit in a word starting at 2703 * FUNCTION: find the first free bit in a word starting at
2718 * the specified bit position. 2704 * the specified bit position.
2719 * 2705 *
2720 * PARAMETERS: 2706 * PARAMETERS:
2721 * word - word to be examined. 2707 * word - word to be examined.
2722 * start - starting bit position. 2708 * start - starting bit position.
2723 * 2709 *
2724 * RETURN VALUES: 2710 * RETURN VALUES:
2725 * bit position of first free bit in the word or 32 if 2711 * bit position of first free bit in the word or 32 if
2726 * no free bits were found. 2712 * no free bits were found.
2727 */ 2713 */
2728static int diFindFree(u32 word, int start) 2714static int diFindFree(u32 word, int start)
@@ -2897,7 +2883,7 @@ int diExtendFS(struct inode *ipimap, struct inode *ipbmap)
2897 atomic_read(&imap->im_numfree)); 2883 atomic_read(&imap->im_numfree));
2898 2884
2899 /* 2885 /*
2900 * reconstruct imap 2886 * reconstruct imap
2901 * 2887 *
2902 * coalesce contiguous k (newAGSize/oldAGSize) AGs; 2888 * coalesce contiguous k (newAGSize/oldAGSize) AGs;
2903 * i.e., (AGi, ..., AGj) where i = k*n and j = k*(n+1) - 1 to AGn; 2889 * i.e., (AGi, ..., AGj) where i = k*n and j = k*(n+1) - 1 to AGn;
@@ -2913,7 +2899,7 @@ int diExtendFS(struct inode *ipimap, struct inode *ipbmap)
2913 } 2899 }
2914 2900
2915 /* 2901 /*
2916 * process each iag page of the map. 2902 * process each iag page of the map.
2917 * 2903 *
2918 * rebuild AG Free Inode List, AG Free Inode Extent List; 2904 * rebuild AG Free Inode List, AG Free Inode Extent List;
2919 */ 2905 */
@@ -2932,7 +2918,7 @@ int diExtendFS(struct inode *ipimap, struct inode *ipbmap)
2932 2918
2933 /* leave free iag in the free iag list */ 2919 /* leave free iag in the free iag list */
2934 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { 2920 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) {
2935 release_metapage(bp); 2921 release_metapage(bp);
2936 continue; 2922 continue;
2937 } 2923 }
2938 2924
@@ -3063,13 +3049,13 @@ static void duplicateIXtree(struct super_block *sb, s64 blkno,
3063} 3049}
3064 3050
3065/* 3051/*
3066 * NAME: copy_from_dinode() 3052 * NAME: copy_from_dinode()
3067 * 3053 *
3068 * FUNCTION: Copies inode info from disk inode to in-memory inode 3054 * FUNCTION: Copies inode info from disk inode to in-memory inode
3069 * 3055 *
3070 * RETURN VALUES: 3056 * RETURN VALUES:
3071 * 0 - success 3057 * 0 - success
3072 * -ENOMEM - insufficient memory 3058 * -ENOMEM - insufficient memory
3073 */ 3059 */
3074static int copy_from_dinode(struct dinode * dip, struct inode *ip) 3060static int copy_from_dinode(struct dinode * dip, struct inode *ip)
3075{ 3061{
@@ -3151,9 +3137,9 @@ static int copy_from_dinode(struct dinode * dip, struct inode *ip)
3151} 3137}
3152 3138
3153/* 3139/*
3154 * NAME: copy_to_dinode() 3140 * NAME: copy_to_dinode()
3155 * 3141 *
3156 * FUNCTION: Copies inode info from in-memory inode to disk inode 3142 * FUNCTION: Copies inode info from in-memory inode to disk inode
3157 */ 3143 */
3158static void copy_to_dinode(struct dinode * dip, struct inode *ip) 3144static void copy_to_dinode(struct dinode * dip, struct inode *ip)
3159{ 3145{
diff --git a/fs/jfs/jfs_imap.h b/fs/jfs/jfs_imap.h
index 4f9c346ed498..610a0e9d8941 100644
--- a/fs/jfs/jfs_imap.h
+++ b/fs/jfs/jfs_imap.h
@@ -24,17 +24,17 @@
24 * jfs_imap.h: disk inode manager 24 * jfs_imap.h: disk inode manager
25 */ 25 */
26 26
27#define EXTSPERIAG 128 /* number of disk inode extent per iag */ 27#define EXTSPERIAG 128 /* number of disk inode extent per iag */
28#define IMAPBLKNO 0 /* lblkno of dinomap within inode map */ 28#define IMAPBLKNO 0 /* lblkno of dinomap within inode map */
29#define SMAPSZ 4 /* number of words per summary map */ 29#define SMAPSZ 4 /* number of words per summary map */
30#define EXTSPERSUM 32 /* number of extents per summary map entry */ 30#define EXTSPERSUM 32 /* number of extents per summary map entry */
31#define L2EXTSPERSUM 5 /* l2 number of extents per summary map */ 31#define L2EXTSPERSUM 5 /* l2 number of extents per summary map */
32#define PGSPERIEXT 4 /* number of 4K pages per dinode extent */ 32#define PGSPERIEXT 4 /* number of 4K pages per dinode extent */
33#define MAXIAGS ((1<<20)-1) /* maximum number of iags */ 33#define MAXIAGS ((1<<20)-1) /* maximum number of iags */
34#define MAXAG 128 /* maximum number of allocation groups */ 34#define MAXAG 128 /* maximum number of allocation groups */
35 35
36#define AMAPSIZE 512 /* bytes in the IAG allocation maps */ 36#define AMAPSIZE 512 /* bytes in the IAG allocation maps */
37#define SMAPSIZE 16 /* bytes in the IAG summary maps */ 37#define SMAPSIZE 16 /* bytes in the IAG summary maps */
38 38
39/* convert inode number to iag number */ 39/* convert inode number to iag number */
40#define INOTOIAG(ino) ((ino) >> L2INOSPERIAG) 40#define INOTOIAG(ino) ((ino) >> L2INOSPERIAG)
@@ -60,31 +60,31 @@
60 * inode allocation group page (per 4096 inodes of an AG) 60 * inode allocation group page (per 4096 inodes of an AG)
61 */ 61 */
62struct iag { 62struct iag {
63 __le64 agstart; /* 8: starting block of ag */ 63 __le64 agstart; /* 8: starting block of ag */
64 __le32 iagnum; /* 4: inode allocation group number */ 64 __le32 iagnum; /* 4: inode allocation group number */
65 __le32 inofreefwd; /* 4: ag inode free list forward */ 65 __le32 inofreefwd; /* 4: ag inode free list forward */
66 __le32 inofreeback; /* 4: ag inode free list back */ 66 __le32 inofreeback; /* 4: ag inode free list back */
67 __le32 extfreefwd; /* 4: ag inode extent free list forward */ 67 __le32 extfreefwd; /* 4: ag inode extent free list forward */
68 __le32 extfreeback; /* 4: ag inode extent free list back */ 68 __le32 extfreeback; /* 4: ag inode extent free list back */
69 __le32 iagfree; /* 4: iag free list */ 69 __le32 iagfree; /* 4: iag free list */
70 70
71 /* summary map: 1 bit per inode extent */ 71 /* summary map: 1 bit per inode extent */
72 __le32 inosmap[SMAPSZ]; /* 16: sum map of mapwords w/ free inodes; 72 __le32 inosmap[SMAPSZ]; /* 16: sum map of mapwords w/ free inodes;
73 * note: this indicates free and backed 73 * note: this indicates free and backed
74 * inodes, if the extent is not backed the 74 * inodes, if the extent is not backed the
75 * value will be 1. if the extent is 75 * value will be 1. if the extent is
76 * backed but all inodes are being used the 76 * backed but all inodes are being used the
77 * value will be 1. if the extent is 77 * value will be 1. if the extent is
78 * backed but at least one of the inodes is 78 * backed but at least one of the inodes is
79 * free the value will be 0. 79 * free the value will be 0.
80 */ 80 */
81 __le32 extsmap[SMAPSZ]; /* 16: sum map of mapwords w/ free extents */ 81 __le32 extsmap[SMAPSZ]; /* 16: sum map of mapwords w/ free extents */
82 __le32 nfreeinos; /* 4: number of free inodes */ 82 __le32 nfreeinos; /* 4: number of free inodes */
83 __le32 nfreeexts; /* 4: number of free extents */ 83 __le32 nfreeexts; /* 4: number of free extents */
84 /* (72) */ 84 /* (72) */
85 u8 pad[1976]; /* 1976: pad to 2048 bytes */ 85 u8 pad[1976]; /* 1976: pad to 2048 bytes */
86 /* allocation bit map: 1 bit per inode (0 - free, 1 - allocated) */ 86 /* allocation bit map: 1 bit per inode (0 - free, 1 - allocated) */
87 __le32 wmap[EXTSPERIAG]; /* 512: working allocation map */ 87 __le32 wmap[EXTSPERIAG]; /* 512: working allocation map */
88 __le32 pmap[EXTSPERIAG]; /* 512: persistent allocation map */ 88 __le32 pmap[EXTSPERIAG]; /* 512: persistent allocation map */
89 pxd_t inoext[EXTSPERIAG]; /* 1024: inode extent addresses */ 89 pxd_t inoext[EXTSPERIAG]; /* 1024: inode extent addresses */
90}; /* (4096) */ 90}; /* (4096) */
@@ -93,44 +93,44 @@ struct iag {
93 * per AG control information (in inode map control page) 93 * per AG control information (in inode map control page)
94 */ 94 */
95struct iagctl_disk { 95struct iagctl_disk {
96 __le32 inofree; /* 4: free inode list anchor */ 96 __le32 inofree; /* 4: free inode list anchor */
97 __le32 extfree; /* 4: free extent list anchor */ 97 __le32 extfree; /* 4: free extent list anchor */
98 __le32 numinos; /* 4: number of backed inodes */ 98 __le32 numinos; /* 4: number of backed inodes */
99 __le32 numfree; /* 4: number of free inodes */ 99 __le32 numfree; /* 4: number of free inodes */
100}; /* (16) */ 100}; /* (16) */
101 101
102struct iagctl { 102struct iagctl {
103 int inofree; /* free inode list anchor */ 103 int inofree; /* free inode list anchor */
104 int extfree; /* free extent list anchor */ 104 int extfree; /* free extent list anchor */
105 int numinos; /* number of backed inodes */ 105 int numinos; /* number of backed inodes */
106 int numfree; /* number of free inodes */ 106 int numfree; /* number of free inodes */
107}; 107};
108 108
109/* 109/*
110 * per fileset/aggregate inode map control page 110 * per fileset/aggregate inode map control page
111 */ 111 */
112struct dinomap_disk { 112struct dinomap_disk {
113 __le32 in_freeiag; /* 4: free iag list anchor */ 113 __le32 in_freeiag; /* 4: free iag list anchor */
114 __le32 in_nextiag; /* 4: next free iag number */ 114 __le32 in_nextiag; /* 4: next free iag number */
115 __le32 in_numinos; /* 4: num of backed inodes */ 115 __le32 in_numinos; /* 4: num of backed inodes */
116 __le32 in_numfree; /* 4: num of free backed inodes */ 116 __le32 in_numfree; /* 4: num of free backed inodes */
117 __le32 in_nbperiext; /* 4: num of blocks per inode extent */ 117 __le32 in_nbperiext; /* 4: num of blocks per inode extent */
118 __le32 in_l2nbperiext; /* 4: l2 of in_nbperiext */ 118 __le32 in_l2nbperiext; /* 4: l2 of in_nbperiext */
119 __le32 in_diskblock; /* 4: for standalone test driver */ 119 __le32 in_diskblock; /* 4: for standalone test driver */
120 __le32 in_maxag; /* 4: for standalone test driver */ 120 __le32 in_maxag; /* 4: for standalone test driver */
121 u8 pad[2016]; /* 2016: pad to 2048 */ 121 u8 pad[2016]; /* 2016: pad to 2048 */
122 struct iagctl_disk in_agctl[MAXAG]; /* 2048: AG control information */ 122 struct iagctl_disk in_agctl[MAXAG]; /* 2048: AG control information */
123}; /* (4096) */ 123}; /* (4096) */
124 124
125struct dinomap { 125struct dinomap {
126 int in_freeiag; /* free iag list anchor */ 126 int in_freeiag; /* free iag list anchor */
127 int in_nextiag; /* next free iag number */ 127 int in_nextiag; /* next free iag number */
128 int in_numinos; /* num of backed inodes */ 128 int in_numinos; /* num of backed inodes */
129 int in_numfree; /* num of free backed inodes */ 129 int in_numfree; /* num of free backed inodes */
130 int in_nbperiext; /* num of blocks per inode extent */ 130 int in_nbperiext; /* num of blocks per inode extent */
131 int in_l2nbperiext; /* l2 of in_nbperiext */ 131 int in_l2nbperiext; /* l2 of in_nbperiext */
132 int in_diskblock; /* for standalone test driver */ 132 int in_diskblock; /* for standalone test driver */
133 int in_maxag; /* for standalone test driver */ 133 int in_maxag; /* for standalone test driver */
134 struct iagctl in_agctl[MAXAG]; /* AG control information */ 134 struct iagctl in_agctl[MAXAG]; /* AG control information */
135}; 135};
136 136
@@ -139,9 +139,9 @@ struct dinomap {
139 */ 139 */
140struct inomap { 140struct inomap {
141 struct dinomap im_imap; /* 4096: inode allocation control */ 141 struct dinomap im_imap; /* 4096: inode allocation control */
142 struct inode *im_ipimap; /* 4: ptr to inode for imap */ 142 struct inode *im_ipimap; /* 4: ptr to inode for imap */
143 struct mutex im_freelock; /* 4: iag free list lock */ 143 struct mutex im_freelock; /* 4: iag free list lock */
144 struct mutex im_aglock[MAXAG]; /* 512: per AG locks */ 144 struct mutex im_aglock[MAXAG]; /* 512: per AG locks */
145 u32 *im_DBGdimap; 145 u32 *im_DBGdimap;
146 atomic_t im_numinos; /* num of backed inodes */ 146 atomic_t im_numinos; /* num of backed inodes */
147 atomic_t im_numfree; /* num of free backed inodes */ 147 atomic_t im_numfree; /* num of free backed inodes */
diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h
index 8f453eff3c83..cb8f30985ad1 100644
--- a/fs/jfs/jfs_incore.h
+++ b/fs/jfs/jfs_incore.h
@@ -40,7 +40,7 @@ struct jfs_inode_info {
40 uint mode2; /* jfs-specific mode */ 40 uint mode2; /* jfs-specific mode */
41 uint saved_uid; /* saved for uid mount option */ 41 uint saved_uid; /* saved for uid mount option */
42 uint saved_gid; /* saved for gid mount option */ 42 uint saved_gid; /* saved for gid mount option */
43 pxd_t ixpxd; /* inode extent descriptor */ 43 pxd_t ixpxd; /* inode extent descriptor */
44 dxd_t acl; /* dxd describing acl */ 44 dxd_t acl; /* dxd describing acl */
45 dxd_t ea; /* dxd describing ea */ 45 dxd_t ea; /* dxd describing ea */
46 time_t otime; /* time created */ 46 time_t otime; /* time created */
@@ -190,7 +190,7 @@ struct jfs_sb_info {
190 uint gengen; /* inode generation generator*/ 190 uint gengen; /* inode generation generator*/
191 uint inostamp; /* shows inode belongs to fileset*/ 191 uint inostamp; /* shows inode belongs to fileset*/
192 192
193 /* Formerly in ipbmap */ 193 /* Formerly in ipbmap */
194 struct bmap *bmap; /* incore bmap descriptor */ 194 struct bmap *bmap; /* incore bmap descriptor */
195 struct nls_table *nls_tab; /* current codepage */ 195 struct nls_table *nls_tab; /* current codepage */
196 struct inode *direct_inode; /* metadata inode */ 196 struct inode *direct_inode; /* metadata inode */
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index 44a2f33cb98d..de3e4a506dbc 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -244,7 +244,7 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
244 goto writeRecord; 244 goto writeRecord;
245 245
246 /* 246 /*
247 * initialize/update page/transaction recovery lsn 247 * initialize/update page/transaction recovery lsn
248 */ 248 */
249 lsn = log->lsn; 249 lsn = log->lsn;
250 250
@@ -263,7 +263,7 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
263 } 263 }
264 264
265 /* 265 /*
266 * initialize/update lsn of tblock of the page 266 * initialize/update lsn of tblock of the page
267 * 267 *
268 * transaction inherits oldest lsn of pages associated 268 * transaction inherits oldest lsn of pages associated
269 * with allocation/deallocation of resources (their 269 * with allocation/deallocation of resources (their
@@ -307,7 +307,7 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
307 LOGSYNC_UNLOCK(log, flags); 307 LOGSYNC_UNLOCK(log, flags);
308 308
309 /* 309 /*
310 * write the log record 310 * write the log record
311 */ 311 */
312 writeRecord: 312 writeRecord:
313 lsn = lmWriteRecord(log, tblk, lrd, tlck); 313 lsn = lmWriteRecord(log, tblk, lrd, tlck);
@@ -372,7 +372,7 @@ lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
372 goto moveLrd; 372 goto moveLrd;
373 373
374 /* 374 /*
375 * move log record data 375 * move log record data
376 */ 376 */
377 /* retrieve source meta-data page to log */ 377 /* retrieve source meta-data page to log */
378 if (tlck->flag & tlckPAGELOCK) { 378 if (tlck->flag & tlckPAGELOCK) {
@@ -465,7 +465,7 @@ lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
465 } 465 }
466 466
467 /* 467 /*
468 * move log record descriptor 468 * move log record descriptor
469 */ 469 */
470 moveLrd: 470 moveLrd:
471 lrd->length = cpu_to_le16(len); 471 lrd->length = cpu_to_le16(len);
@@ -574,7 +574,7 @@ static int lmNextPage(struct jfs_log * log)
574 LOGGC_LOCK(log); 574 LOGGC_LOCK(log);
575 575
576 /* 576 /*
577 * write or queue the full page at the tail of write queue 577 * write or queue the full page at the tail of write queue
578 */ 578 */
579 /* get the tail tblk on commit queue */ 579 /* get the tail tblk on commit queue */
580 if (list_empty(&log->cqueue)) 580 if (list_empty(&log->cqueue))
@@ -625,7 +625,7 @@ static int lmNextPage(struct jfs_log * log)
625 LOGGC_UNLOCK(log); 625 LOGGC_UNLOCK(log);
626 626
627 /* 627 /*
628 * allocate/initialize next page 628 * allocate/initialize next page
629 */ 629 */
630 /* if log wraps, the first data page of log is 2 630 /* if log wraps, the first data page of log is 2
631 * (0 never used, 1 is superblock). 631 * (0 never used, 1 is superblock).
@@ -953,7 +953,7 @@ static int lmLogSync(struct jfs_log * log, int hard_sync)
953 } 953 }
954 954
955 /* 955 /*
956 * forward syncpt 956 * forward syncpt
957 */ 957 */
958 /* if last sync is same as last syncpt, 958 /* if last sync is same as last syncpt,
959 * invoke sync point forward processing to update sync. 959 * invoke sync point forward processing to update sync.
@@ -989,7 +989,7 @@ static int lmLogSync(struct jfs_log * log, int hard_sync)
989 lsn = log->lsn; 989 lsn = log->lsn;
990 990
991 /* 991 /*
992 * setup next syncpt trigger (SWAG) 992 * setup next syncpt trigger (SWAG)
993 */ 993 */
994 logsize = log->logsize; 994 logsize = log->logsize;
995 995
@@ -1000,11 +1000,11 @@ static int lmLogSync(struct jfs_log * log, int hard_sync)
1000 if (more < 2 * LOGPSIZE) { 1000 if (more < 2 * LOGPSIZE) {
1001 jfs_warn("\n ... Log Wrap ... Log Wrap ... Log Wrap ...\n"); 1001 jfs_warn("\n ... Log Wrap ... Log Wrap ... Log Wrap ...\n");
1002 /* 1002 /*
1003 * log wrapping 1003 * log wrapping
1004 * 1004 *
1005 * option 1 - panic ? No.! 1005 * option 1 - panic ? No.!
1006 * option 2 - shutdown file systems 1006 * option 2 - shutdown file systems
1007 * associated with log ? 1007 * associated with log ?
1008 * option 3 - extend log ? 1008 * option 3 - extend log ?
1009 */ 1009 */
1010 /* 1010 /*
@@ -1062,7 +1062,7 @@ void jfs_syncpt(struct jfs_log *log, int hard_sync)
1062/* 1062/*
1063 * NAME: lmLogOpen() 1063 * NAME: lmLogOpen()
1064 * 1064 *
1065 * FUNCTION: open the log on first open; 1065 * FUNCTION: open the log on first open;
1066 * insert filesystem in the active list of the log. 1066 * insert filesystem in the active list of the log.
1067 * 1067 *
1068 * PARAMETER: ipmnt - file system mount inode 1068 * PARAMETER: ipmnt - file system mount inode
@@ -1113,7 +1113,7 @@ int lmLogOpen(struct super_block *sb)
1113 init_waitqueue_head(&log->syncwait); 1113 init_waitqueue_head(&log->syncwait);
1114 1114
1115 /* 1115 /*
1116 * external log as separate logical volume 1116 * external log as separate logical volume
1117 * 1117 *
1118 * file systems to log may have n-to-1 relationship; 1118 * file systems to log may have n-to-1 relationship;
1119 */ 1119 */
@@ -1155,7 +1155,7 @@ journal_found:
1155 return 0; 1155 return 0;
1156 1156
1157 /* 1157 /*
1158 * unwind on error 1158 * unwind on error
1159 */ 1159 */
1160 shutdown: /* unwind lbmLogInit() */ 1160 shutdown: /* unwind lbmLogInit() */
1161 list_del(&log->journal_list); 1161 list_del(&log->journal_list);
@@ -1427,7 +1427,7 @@ int lmLogInit(struct jfs_log * log)
1427 return 0; 1427 return 0;
1428 1428
1429 /* 1429 /*
1430 * unwind on error 1430 * unwind on error
1431 */ 1431 */
1432 errout30: /* release log page */ 1432 errout30: /* release log page */
1433 log->wqueue = NULL; 1433 log->wqueue = NULL;
@@ -1480,7 +1480,7 @@ int lmLogClose(struct super_block *sb)
1480 1480
1481 if (test_bit(log_INLINELOG, &log->flag)) { 1481 if (test_bit(log_INLINELOG, &log->flag)) {
1482 /* 1482 /*
1483 * in-line log in host file system 1483 * in-line log in host file system
1484 */ 1484 */
1485 rc = lmLogShutdown(log); 1485 rc = lmLogShutdown(log);
1486 kfree(log); 1486 kfree(log);
@@ -1504,7 +1504,7 @@ int lmLogClose(struct super_block *sb)
1504 goto out; 1504 goto out;
1505 1505
1506 /* 1506 /*
1507 * external log as separate logical volume 1507 * external log as separate logical volume
1508 */ 1508 */
1509 list_del(&log->journal_list); 1509 list_del(&log->journal_list);
1510 bdev = log->bdev; 1510 bdev = log->bdev;
@@ -1622,20 +1622,26 @@ void jfs_flush_journal(struct jfs_log *log, int wait)
1622 if (!list_empty(&log->synclist)) { 1622 if (!list_empty(&log->synclist)) {
1623 struct logsyncblk *lp; 1623 struct logsyncblk *lp;
1624 1624
1625 printk(KERN_ERR "jfs_flush_journal: synclist not empty\n");
1625 list_for_each_entry(lp, &log->synclist, synclist) { 1626 list_for_each_entry(lp, &log->synclist, synclist) {
1626 if (lp->xflag & COMMIT_PAGE) { 1627 if (lp->xflag & COMMIT_PAGE) {
1627 struct metapage *mp = (struct metapage *)lp; 1628 struct metapage *mp = (struct metapage *)lp;
1628 dump_mem("orphan metapage", lp, 1629 print_hex_dump(KERN_ERR, "metapage: ",
1629 sizeof(struct metapage)); 1630 DUMP_PREFIX_ADDRESS, 16, 4,
1630 dump_mem("page", mp->page, sizeof(struct page)); 1631 mp, sizeof(struct metapage), 0);
1631 } 1632 print_hex_dump(KERN_ERR, "page: ",
1632 else 1633 DUMP_PREFIX_ADDRESS, 16,
1633 dump_mem("orphan tblock", lp, 1634 sizeof(long), mp->page,
1634 sizeof(struct tblock)); 1635 sizeof(struct page), 0);
1636 } else
1637 print_hex_dump(KERN_ERR, "tblock:",
1638 DUMP_PREFIX_ADDRESS, 16, 4,
1639 lp, sizeof(struct tblock), 0);
1635 } 1640 }
1636 } 1641 }
1642#else
1643 WARN_ON(!list_empty(&log->synclist));
1637#endif 1644#endif
1638 //assert(list_empty(&log->synclist));
1639 clear_bit(log_FLUSH, &log->flag); 1645 clear_bit(log_FLUSH, &log->flag);
1640} 1646}
1641 1647
@@ -1723,7 +1729,7 @@ int lmLogShutdown(struct jfs_log * log)
1723 * 1729 *
1724 * PARAMETE: log - pointer to logs inode. 1730 * PARAMETE: log - pointer to logs inode.
1725 * fsdev - kdev_t of filesystem. 1731 * fsdev - kdev_t of filesystem.
1726 * serial - pointer to returned log serial number 1732 * serial - pointer to returned log serial number
1727 * activate - insert/remove device from active list. 1733 * activate - insert/remove device from active list.
1728 * 1734 *
1729 * RETURN: 0 - success 1735 * RETURN: 0 - success
@@ -1963,7 +1969,7 @@ static void lbmfree(struct lbuf * bp)
1963 * FUNCTION: add a log buffer to the log redrive list 1969 * FUNCTION: add a log buffer to the log redrive list
1964 * 1970 *
1965 * PARAMETER: 1971 * PARAMETER:
1966 * bp - log buffer 1972 * bp - log buffer
1967 * 1973 *
1968 * NOTES: 1974 * NOTES:
1969 * Takes log_redrive_lock. 1975 * Takes log_redrive_lock.
@@ -2054,7 +2060,7 @@ static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag,
2054 bp->l_flag = flag; 2060 bp->l_flag = flag;
2055 2061
2056 /* 2062 /*
2057 * insert bp at tail of write queue associated with log 2063 * insert bp at tail of write queue associated with log
2058 * 2064 *
2059 * (request is either for bp already/currently at head of queue 2065 * (request is either for bp already/currently at head of queue
2060 * or new bp to be inserted at tail) 2066 * or new bp to be inserted at tail)
@@ -2117,7 +2123,7 @@ static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag)
2117 log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize)); 2123 log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
2118 2124
2119 /* 2125 /*
2120 * initiate pageout of the page 2126 * initiate pageout of the page
2121 */ 2127 */
2122 lbmStartIO(bp); 2128 lbmStartIO(bp);
2123} 2129}
@@ -2128,7 +2134,7 @@ static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag)
2128 * 2134 *
2129 * FUNCTION: Interface to DD strategy routine 2135 * FUNCTION: Interface to DD strategy routine
2130 * 2136 *
2131 * RETURN: none 2137 * RETURN: none
2132 * 2138 *
2133 * serialization: LCACHE_LOCK() is NOT held during log i/o; 2139 * serialization: LCACHE_LOCK() is NOT held during log i/o;
2134 */ 2140 */
@@ -2222,7 +2228,7 @@ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error)
2222 bio_put(bio); 2228 bio_put(bio);
2223 2229
2224 /* 2230 /*
2225 * pagein completion 2231 * pagein completion
2226 */ 2232 */
2227 if (bp->l_flag & lbmREAD) { 2233 if (bp->l_flag & lbmREAD) {
2228 bp->l_flag &= ~lbmREAD; 2234 bp->l_flag &= ~lbmREAD;
@@ -2236,7 +2242,7 @@ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error)
2236 } 2242 }
2237 2243
2238 /* 2244 /*
2239 * pageout completion 2245 * pageout completion
2240 * 2246 *
2241 * the bp at the head of write queue has completed pageout. 2247 * the bp at the head of write queue has completed pageout.
2242 * 2248 *
@@ -2302,7 +2308,7 @@ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error)
2302 } 2308 }
2303 2309
2304 /* 2310 /*
2305 * synchronous pageout: 2311 * synchronous pageout:
2306 * 2312 *
2307 * buffer has not necessarily been removed from write queue 2313 * buffer has not necessarily been removed from write queue
2308 * (e.g., synchronous write of partial-page with COMMIT): 2314 * (e.g., synchronous write of partial-page with COMMIT):
@@ -2316,7 +2322,7 @@ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error)
2316 } 2322 }
2317 2323
2318 /* 2324 /*
2319 * Group Commit pageout: 2325 * Group Commit pageout:
2320 */ 2326 */
2321 else if (bp->l_flag & lbmGC) { 2327 else if (bp->l_flag & lbmGC) {
2322 LCACHE_UNLOCK(flags); 2328 LCACHE_UNLOCK(flags);
@@ -2324,7 +2330,7 @@ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error)
2324 } 2330 }
2325 2331
2326 /* 2332 /*
2327 * asynchronous pageout: 2333 * asynchronous pageout:
2328 * 2334 *
2329 * buffer must have been removed from write queue: 2335 * buffer must have been removed from write queue:
2330 * insert buffer at head of freelist where it can be recycled 2336 * insert buffer at head of freelist where it can be recycled
@@ -2375,7 +2381,7 @@ int jfsIOWait(void *arg)
2375 * FUNCTION: format file system log 2381 * FUNCTION: format file system log
2376 * 2382 *
2377 * PARAMETERS: 2383 * PARAMETERS:
2378 * log - volume log 2384 * log - volume log
2379 * logAddress - start address of log space in FS block 2385 * logAddress - start address of log space in FS block
2380 * logSize - length of log space in FS block; 2386 * logSize - length of log space in FS block;
2381 * 2387 *
@@ -2407,16 +2413,16 @@ int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize)
2407 npages = logSize >> sbi->l2nbperpage; 2413 npages = logSize >> sbi->l2nbperpage;
2408 2414
2409 /* 2415 /*
2410 * log space: 2416 * log space:
2411 * 2417 *
2412 * page 0 - reserved; 2418 * page 0 - reserved;
2413 * page 1 - log superblock; 2419 * page 1 - log superblock;
2414 * page 2 - log data page: A SYNC log record is written 2420 * page 2 - log data page: A SYNC log record is written
2415 * into this page at logform time; 2421 * into this page at logform time;
2416 * pages 3-N - log data page: set to empty log data pages; 2422 * pages 3-N - log data page: set to empty log data pages;
2417 */ 2423 */
2418 /* 2424 /*
2419 * init log superblock: log page 1 2425 * init log superblock: log page 1
2420 */ 2426 */
2421 logsuper = (struct logsuper *) bp->l_ldata; 2427 logsuper = (struct logsuper *) bp->l_ldata;
2422 2428
@@ -2436,7 +2442,7 @@ int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize)
2436 goto exit; 2442 goto exit;
2437 2443
2438 /* 2444 /*
2439 * init pages 2 to npages-1 as log data pages: 2445 * init pages 2 to npages-1 as log data pages:
2440 * 2446 *
2441 * log page sequence number (lpsn) initialization: 2447 * log page sequence number (lpsn) initialization:
2442 * 2448 *
@@ -2479,7 +2485,7 @@ int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize)
2479 goto exit; 2485 goto exit;
2480 2486
2481 /* 2487 /*
2482 * initialize succeeding log pages: lpsn = 0, 1, ..., (N-2) 2488 * initialize succeeding log pages: lpsn = 0, 1, ..., (N-2)
2483 */ 2489 */
2484 for (lspn = 0; lspn < npages - 3; lspn++) { 2490 for (lspn = 0; lspn < npages - 3; lspn++) {
2485 lp->h.page = lp->t.page = cpu_to_le32(lspn); 2491 lp->h.page = lp->t.page = cpu_to_le32(lspn);
@@ -2495,7 +2501,7 @@ int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize)
2495 rc = 0; 2501 rc = 0;
2496exit: 2502exit:
2497 /* 2503 /*
2498 * finalize log 2504 * finalize log
2499 */ 2505 */
2500 /* release the buffer */ 2506 /* release the buffer */
2501 lbmFree(bp); 2507 lbmFree(bp);
diff --git a/fs/jfs/jfs_logmgr.h b/fs/jfs/jfs_logmgr.h
index a53fb17ea219..1f85ef0ec045 100644
--- a/fs/jfs/jfs_logmgr.h
+++ b/fs/jfs/jfs_logmgr.h
@@ -144,7 +144,7 @@ struct logpage {
144 * 144 *
145 * (this comment should be rewritten !) 145 * (this comment should be rewritten !)
146 * jfs uses only "after" log records (only a single writer is allowed 146 * jfs uses only "after" log records (only a single writer is allowed
147 * in a page, pages are written to temporary paging space if 147 * in a page, pages are written to temporary paging space if
148 * if they must be written to disk before commit, and i/o is 148 * if they must be written to disk before commit, and i/o is
149 * scheduled for modified pages to their home location after 149 * scheduled for modified pages to their home location after
150 * the log records containing the after values and the commit 150 * the log records containing the after values and the commit
@@ -153,7 +153,7 @@ struct logpage {
153 * 153 *
154 * a log record consists of a data area of variable length followed by 154 * a log record consists of a data area of variable length followed by
155 * a descriptor of fixed size LOGRDSIZE bytes. 155 * a descriptor of fixed size LOGRDSIZE bytes.
156 * the data area is rounded up to an integral number of 4-bytes and 156 * the data area is rounded up to an integral number of 4-bytes and
157 * must be no longer than LOGPSIZE. 157 * must be no longer than LOGPSIZE.
158 * the descriptor is of size of multiple of 4-bytes and aligned on a 158 * the descriptor is of size of multiple of 4-bytes and aligned on a
159 * 4-byte boundary. 159 * 4-byte boundary.
@@ -215,13 +215,13 @@ struct lrd {
215 union { 215 union {
216 216
217 /* 217 /*
218 * COMMIT: commit 218 * COMMIT: commit
219 * 219 *
220 * transaction commit: no type-dependent information; 220 * transaction commit: no type-dependent information;
221 */ 221 */
222 222
223 /* 223 /*
224 * REDOPAGE: after-image 224 * REDOPAGE: after-image
225 * 225 *
226 * apply after-image; 226 * apply after-image;
227 * 227 *
@@ -236,7 +236,7 @@ struct lrd {
236 } redopage; /* (20) */ 236 } redopage; /* (20) */
237 237
238 /* 238 /*
239 * NOREDOPAGE: the page is freed 239 * NOREDOPAGE: the page is freed
240 * 240 *
241 * do not apply after-image records which precede this record 241 * do not apply after-image records which precede this record
242 * in the log with the same page block number to this page. 242 * in the log with the same page block number to this page.
@@ -252,7 +252,7 @@ struct lrd {
252 } noredopage; /* (20) */ 252 } noredopage; /* (20) */
253 253
254 /* 254 /*
255 * UPDATEMAP: update block allocation map 255 * UPDATEMAP: update block allocation map
256 * 256 *
257 * either in-line PXD, 257 * either in-line PXD,
258 * or out-of-line XADLIST; 258 * or out-of-line XADLIST;
@@ -268,7 +268,7 @@ struct lrd {
268 } updatemap; /* (20) */ 268 } updatemap; /* (20) */
269 269
270 /* 270 /*
271 * NOREDOINOEXT: the inode extent is freed 271 * NOREDOINOEXT: the inode extent is freed
272 * 272 *
273 * do not apply after-image records which precede this 273 * do not apply after-image records which precede this
274 * record in the log with the any of the 4 page block 274 * record in the log with the any of the 4 page block
@@ -286,7 +286,7 @@ struct lrd {
286 } noredoinoext; /* (20) */ 286 } noredoinoext; /* (20) */
287 287
288 /* 288 /*
289 * SYNCPT: log sync point 289 * SYNCPT: log sync point
290 * 290 *
291 * replay log upto syncpt address specified; 291 * replay log upto syncpt address specified;
292 */ 292 */
@@ -295,13 +295,13 @@ struct lrd {
295 } syncpt; 295 } syncpt;
296 296
297 /* 297 /*
298 * MOUNT: file system mount 298 * MOUNT: file system mount
299 * 299 *
300 * file system mount: no type-dependent information; 300 * file system mount: no type-dependent information;
301 */ 301 */
302 302
303 /* 303 /*
304 * ? FREEXTENT: free specified extent(s) 304 * ? FREEXTENT: free specified extent(s)
305 * 305 *
306 * free specified extent(s) from block allocation map 306 * free specified extent(s) from block allocation map
307 * N.B.: nextents should be length of data/sizeof(xad_t) 307 * N.B.: nextents should be length of data/sizeof(xad_t)
@@ -314,7 +314,7 @@ struct lrd {
314 } freextent; 314 } freextent;
315 315
316 /* 316 /*
317 * ? NOREDOFILE: this file is freed 317 * ? NOREDOFILE: this file is freed
318 * 318 *
319 * do not apply records which precede this record in the log 319 * do not apply records which precede this record in the log
320 * with the same inode number. 320 * with the same inode number.
@@ -330,7 +330,7 @@ struct lrd {
330 } noredofile; 330 } noredofile;
331 331
332 /* 332 /*
333 * ? NEWPAGE: 333 * ? NEWPAGE:
334 * 334 *
335 * metadata type dependent 335 * metadata type dependent
336 */ 336 */
@@ -342,7 +342,7 @@ struct lrd {
342 } newpage; 342 } newpage;
343 343
344 /* 344 /*
345 * ? DUMMY: filler 345 * ? DUMMY: filler
346 * 346 *
347 * no type-dependent information 347 * no type-dependent information
348 */ 348 */
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 43d4f69afbec..77c7f1129dde 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -472,7 +472,8 @@ add_failed:
472 printk(KERN_ERR "JFS: bio_add_page failed unexpectedly\n"); 472 printk(KERN_ERR "JFS: bio_add_page failed unexpectedly\n");
473 goto skip; 473 goto skip;
474dump_bio: 474dump_bio:
475 dump_mem("bio", bio, sizeof(*bio)); 475 print_hex_dump(KERN_ERR, "JFS: dump of bio: ", DUMP_PREFIX_ADDRESS, 16,
476 4, bio, sizeof(*bio), 0);
476skip: 477skip:
477 bio_put(bio); 478 bio_put(bio);
478 unlock_page(page); 479 unlock_page(page);
diff --git a/fs/jfs/jfs_mount.c b/fs/jfs/jfs_mount.c
index 4dd479834897..644429acb8c0 100644
--- a/fs/jfs/jfs_mount.c
+++ b/fs/jfs/jfs_mount.c
@@ -80,7 +80,7 @@ static int logMOUNT(struct super_block *sb);
80 */ 80 */
81int jfs_mount(struct super_block *sb) 81int jfs_mount(struct super_block *sb)
82{ 82{
83 int rc = 0; /* Return code */ 83 int rc = 0; /* Return code */
84 struct jfs_sb_info *sbi = JFS_SBI(sb); 84 struct jfs_sb_info *sbi = JFS_SBI(sb);
85 struct inode *ipaimap = NULL; 85 struct inode *ipaimap = NULL;
86 struct inode *ipaimap2 = NULL; 86 struct inode *ipaimap2 = NULL;
@@ -169,7 +169,7 @@ int jfs_mount(struct super_block *sb)
169 sbi->ipaimap2 = NULL; 169 sbi->ipaimap2 = NULL;
170 170
171 /* 171 /*
172 * mount (the only/single) fileset 172 * mount (the only/single) fileset
173 */ 173 */
174 /* 174 /*
175 * open fileset inode allocation map (aka fileset inode) 175 * open fileset inode allocation map (aka fileset inode)
@@ -195,7 +195,7 @@ int jfs_mount(struct super_block *sb)
195 goto out; 195 goto out;
196 196
197 /* 197 /*
198 * unwind on error 198 * unwind on error
199 */ 199 */
200 errout41: /* close fileset inode allocation map inode */ 200 errout41: /* close fileset inode allocation map inode */
201 diFreeSpecial(ipimap); 201 diFreeSpecial(ipimap);
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index 25430d0b0d59..7aa1f7004eaf 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -18,7 +18,7 @@
18 */ 18 */
19 19
20/* 20/*
21 * jfs_txnmgr.c: transaction manager 21 * jfs_txnmgr.c: transaction manager
22 * 22 *
23 * notes: 23 * notes:
24 * transaction starts with txBegin() and ends with txCommit() 24 * transaction starts with txBegin() and ends with txCommit()
@@ -60,7 +60,7 @@
60#include "jfs_debug.h" 60#include "jfs_debug.h"
61 61
62/* 62/*
63 * transaction management structures 63 * transaction management structures
64 */ 64 */
65static struct { 65static struct {
66 int freetid; /* index of a free tid structure */ 66 int freetid; /* index of a free tid structure */
@@ -103,19 +103,19 @@ module_param(nTxLock, int, 0);
103MODULE_PARM_DESC(nTxLock, 103MODULE_PARM_DESC(nTxLock,
104 "Number of transaction locks (max:65536)"); 104 "Number of transaction locks (max:65536)");
105 105
106struct tblock *TxBlock; /* transaction block table */ 106struct tblock *TxBlock; /* transaction block table */
107static int TxLockLWM; /* Low water mark for number of txLocks used */ 107static int TxLockLWM; /* Low water mark for number of txLocks used */
108static int TxLockHWM; /* High water mark for number of txLocks used */ 108static int TxLockHWM; /* High water mark for number of txLocks used */
109static int TxLockVHWM; /* Very High water mark */ 109static int TxLockVHWM; /* Very High water mark */
110struct tlock *TxLock; /* transaction lock table */ 110struct tlock *TxLock; /* transaction lock table */
111 111
112/* 112/*
113 * transaction management lock 113 * transaction management lock
114 */ 114 */
115static DEFINE_SPINLOCK(jfsTxnLock); 115static DEFINE_SPINLOCK(jfsTxnLock);
116 116
117#define TXN_LOCK() spin_lock(&jfsTxnLock) 117#define TXN_LOCK() spin_lock(&jfsTxnLock)
118#define TXN_UNLOCK() spin_unlock(&jfsTxnLock) 118#define TXN_UNLOCK() spin_unlock(&jfsTxnLock)
119 119
120#define LAZY_LOCK_INIT() spin_lock_init(&TxAnchor.LazyLock); 120#define LAZY_LOCK_INIT() spin_lock_init(&TxAnchor.LazyLock);
121#define LAZY_LOCK(flags) spin_lock_irqsave(&TxAnchor.LazyLock, flags) 121#define LAZY_LOCK(flags) spin_lock_irqsave(&TxAnchor.LazyLock, flags)
@@ -148,7 +148,7 @@ static inline void TXN_SLEEP_DROP_LOCK(wait_queue_head_t * event)
148#define TXN_WAKEUP(event) wake_up_all(event) 148#define TXN_WAKEUP(event) wake_up_all(event)
149 149
150/* 150/*
151 * statistics 151 * statistics
152 */ 152 */
153static struct { 153static struct {
154 tid_t maxtid; /* 4: biggest tid ever used */ 154 tid_t maxtid; /* 4: biggest tid ever used */
@@ -181,8 +181,8 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
181static void LogSyncRelease(struct metapage * mp); 181static void LogSyncRelease(struct metapage * mp);
182 182
183/* 183/*
184 * transaction block/lock management 184 * transaction block/lock management
185 * --------------------------------- 185 * ---------------------------------
186 */ 186 */
187 187
188/* 188/*
@@ -227,9 +227,9 @@ static void txLockFree(lid_t lid)
227} 227}
228 228
229/* 229/*
230 * NAME: txInit() 230 * NAME: txInit()
231 * 231 *
232 * FUNCTION: initialize transaction management structures 232 * FUNCTION: initialize transaction management structures
233 * 233 *
234 * RETURN: 234 * RETURN:
235 * 235 *
@@ -333,9 +333,9 @@ int txInit(void)
333} 333}
334 334
335/* 335/*
336 * NAME: txExit() 336 * NAME: txExit()
337 * 337 *
338 * FUNCTION: clean up when module is unloaded 338 * FUNCTION: clean up when module is unloaded
339 */ 339 */
340void txExit(void) 340void txExit(void)
341{ 341{
@@ -346,12 +346,12 @@ void txExit(void)
346} 346}
347 347
348/* 348/*
349 * NAME: txBegin() 349 * NAME: txBegin()
350 * 350 *
351 * FUNCTION: start a transaction. 351 * FUNCTION: start a transaction.
352 * 352 *
353 * PARAMETER: sb - superblock 353 * PARAMETER: sb - superblock
354 * flag - force for nested tx; 354 * flag - force for nested tx;
355 * 355 *
356 * RETURN: tid - transaction id 356 * RETURN: tid - transaction id
357 * 357 *
@@ -447,13 +447,13 @@ tid_t txBegin(struct super_block *sb, int flag)
447} 447}
448 448
449/* 449/*
450 * NAME: txBeginAnon() 450 * NAME: txBeginAnon()
451 * 451 *
452 * FUNCTION: start an anonymous transaction. 452 * FUNCTION: start an anonymous transaction.
453 * Blocks if logsync or available tlocks are low to prevent 453 * Blocks if logsync or available tlocks are low to prevent
454 * anonymous tlocks from depleting supply. 454 * anonymous tlocks from depleting supply.
455 * 455 *
456 * PARAMETER: sb - superblock 456 * PARAMETER: sb - superblock
457 * 457 *
458 * RETURN: none 458 * RETURN: none
459 */ 459 */
@@ -489,11 +489,11 @@ void txBeginAnon(struct super_block *sb)
489} 489}
490 490
491/* 491/*
492 * txEnd() 492 * txEnd()
493 * 493 *
494 * function: free specified transaction block. 494 * function: free specified transaction block.
495 * 495 *
496 * logsync barrier processing: 496 * logsync barrier processing:
497 * 497 *
498 * serialization: 498 * serialization:
499 */ 499 */
@@ -577,13 +577,13 @@ wakeup:
577} 577}
578 578
579/* 579/*
580 * txLock() 580 * txLock()
581 * 581 *
582 * function: acquire a transaction lock on the specified <mp> 582 * function: acquire a transaction lock on the specified <mp>
583 * 583 *
584 * parameter: 584 * parameter:
585 * 585 *
586 * return: transaction lock id 586 * return: transaction lock id
587 * 587 *
588 * serialization: 588 * serialization:
589 */ 589 */
@@ -829,12 +829,16 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
829 /* Only locks on ipimap or ipaimap should reach here */ 829 /* Only locks on ipimap or ipaimap should reach here */
830 /* assert(jfs_ip->fileset == AGGREGATE_I); */ 830 /* assert(jfs_ip->fileset == AGGREGATE_I); */
831 if (jfs_ip->fileset != AGGREGATE_I) { 831 if (jfs_ip->fileset != AGGREGATE_I) {
832 jfs_err("txLock: trying to lock locked page!"); 832 printk(KERN_ERR "txLock: trying to lock locked page!");
833 dump_mem("ip", ip, sizeof(struct inode)); 833 print_hex_dump(KERN_ERR, "ip: ", DUMP_PREFIX_ADDRESS, 16, 4,
834 dump_mem("mp", mp, sizeof(struct metapage)); 834 ip, sizeof(*ip), 0);
835 dump_mem("Locker's tblk", tid_to_tblock(tid), 835 print_hex_dump(KERN_ERR, "mp: ", DUMP_PREFIX_ADDRESS, 16, 4,
836 sizeof(struct tblock)); 836 mp, sizeof(*mp), 0);
837 dump_mem("Tlock", tlck, sizeof(struct tlock)); 837 print_hex_dump(KERN_ERR, "Locker's tblock: ",
838 DUMP_PREFIX_ADDRESS, 16, 4, tid_to_tblock(tid),
839 sizeof(struct tblock), 0);
840 print_hex_dump(KERN_ERR, "Tlock: ", DUMP_PREFIX_ADDRESS, 16, 4,
841 tlck, sizeof(*tlck), 0);
838 BUG(); 842 BUG();
839 } 843 }
840 INCREMENT(stattx.waitlock); /* statistics */ 844 INCREMENT(stattx.waitlock); /* statistics */
@@ -857,17 +861,17 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
857} 861}
858 862
859/* 863/*
860 * NAME: txRelease() 864 * NAME: txRelease()
861 * 865 *
862 * FUNCTION: Release buffers associated with transaction locks, but don't 866 * FUNCTION: Release buffers associated with transaction locks, but don't
863 * mark homeok yet. The allows other transactions to modify 867 * mark homeok yet. The allows other transactions to modify
864 * buffers, but won't let them go to disk until commit record 868 * buffers, but won't let them go to disk until commit record
865 * actually gets written. 869 * actually gets written.
866 * 870 *
867 * PARAMETER: 871 * PARAMETER:
868 * tblk - 872 * tblk -
869 * 873 *
870 * RETURN: Errors from subroutines. 874 * RETURN: Errors from subroutines.
871 */ 875 */
872static void txRelease(struct tblock * tblk) 876static void txRelease(struct tblock * tblk)
873{ 877{
@@ -896,10 +900,10 @@ static void txRelease(struct tblock * tblk)
896} 900}
897 901
898/* 902/*
899 * NAME: txUnlock() 903 * NAME: txUnlock()
900 * 904 *
901 * FUNCTION: Initiates pageout of pages modified by tid in journalled 905 * FUNCTION: Initiates pageout of pages modified by tid in journalled
902 * objects and frees their lockwords. 906 * objects and frees their lockwords.
903 */ 907 */
904static void txUnlock(struct tblock * tblk) 908static void txUnlock(struct tblock * tblk)
905{ 909{
@@ -983,10 +987,10 @@ static void txUnlock(struct tblock * tblk)
983} 987}
984 988
985/* 989/*
986 * txMaplock() 990 * txMaplock()
987 * 991 *
988 * function: allocate a transaction lock for freed page/entry; 992 * function: allocate a transaction lock for freed page/entry;
989 * for freed page, maplock is used as xtlock/dtlock type; 993 * for freed page, maplock is used as xtlock/dtlock type;
990 */ 994 */
991struct tlock *txMaplock(tid_t tid, struct inode *ip, int type) 995struct tlock *txMaplock(tid_t tid, struct inode *ip, int type)
992{ 996{
@@ -1057,7 +1061,7 @@ struct tlock *txMaplock(tid_t tid, struct inode *ip, int type)
1057} 1061}
1058 1062
1059/* 1063/*
1060 * txLinelock() 1064 * txLinelock()
1061 * 1065 *
1062 * function: allocate a transaction lock for log vector list 1066 * function: allocate a transaction lock for log vector list
1063 */ 1067 */
@@ -1092,39 +1096,39 @@ struct linelock *txLinelock(struct linelock * tlock)
1092} 1096}
1093 1097
1094/* 1098/*
1095 * transaction commit management 1099 * transaction commit management
1096 * ----------------------------- 1100 * -----------------------------
1097 */ 1101 */
1098 1102
1099/* 1103/*
1100 * NAME: txCommit() 1104 * NAME: txCommit()
1101 * 1105 *
1102 * FUNCTION: commit the changes to the objects specified in 1106 * FUNCTION: commit the changes to the objects specified in
1103 * clist. For journalled segments only the 1107 * clist. For journalled segments only the
1104 * changes of the caller are committed, ie by tid. 1108 * changes of the caller are committed, ie by tid.
1105 * for non-journalled segments the data are flushed to 1109 * for non-journalled segments the data are flushed to
1106 * disk and then the change to the disk inode and indirect 1110 * disk and then the change to the disk inode and indirect
1107 * blocks committed (so blocks newly allocated to the 1111 * blocks committed (so blocks newly allocated to the
1108 * segment will be made a part of the segment atomically). 1112 * segment will be made a part of the segment atomically).
1109 * 1113 *
1110 * all of the segments specified in clist must be in 1114 * all of the segments specified in clist must be in
1111 * one file system. no more than 6 segments are needed 1115 * one file system. no more than 6 segments are needed
1112 * to handle all unix svcs. 1116 * to handle all unix svcs.
1113 * 1117 *
1114 * if the i_nlink field (i.e. disk inode link count) 1118 * if the i_nlink field (i.e. disk inode link count)
1115 * is zero, and the type of inode is a regular file or 1119 * is zero, and the type of inode is a regular file or
1116 * directory, or symbolic link , the inode is truncated 1120 * directory, or symbolic link , the inode is truncated
1117 * to zero length. the truncation is committed but the 1121 * to zero length. the truncation is committed but the
1118 * VM resources are unaffected until it is closed (see 1122 * VM resources are unaffected until it is closed (see
1119 * iput and iclose). 1123 * iput and iclose).
1120 * 1124 *
1121 * PARAMETER: 1125 * PARAMETER:
1122 * 1126 *
1123 * RETURN: 1127 * RETURN:
1124 * 1128 *
1125 * serialization: 1129 * serialization:
1126 * on entry the inode lock on each segment is assumed 1130 * on entry the inode lock on each segment is assumed
1127 * to be held. 1131 * to be held.
1128 * 1132 *
1129 * i/o error: 1133 * i/o error:
1130 */ 1134 */
@@ -1175,7 +1179,7 @@ int txCommit(tid_t tid, /* transaction identifier */
1175 if ((flag & (COMMIT_FORCE | COMMIT_SYNC)) == 0) 1179 if ((flag & (COMMIT_FORCE | COMMIT_SYNC)) == 0)
1176 tblk->xflag |= COMMIT_LAZY; 1180 tblk->xflag |= COMMIT_LAZY;
1177 /* 1181 /*
1178 * prepare non-journaled objects for commit 1182 * prepare non-journaled objects for commit
1179 * 1183 *
1180 * flush data pages of non-journaled file 1184 * flush data pages of non-journaled file
1181 * to prevent the file getting non-initialized disk blocks 1185 * to prevent the file getting non-initialized disk blocks
@@ -1186,7 +1190,7 @@ int txCommit(tid_t tid, /* transaction identifier */
1186 cd.nip = nip; 1190 cd.nip = nip;
1187 1191
1188 /* 1192 /*
1189 * acquire transaction lock on (on-disk) inodes 1193 * acquire transaction lock on (on-disk) inodes
1190 * 1194 *
1191 * update on-disk inode from in-memory inode 1195 * update on-disk inode from in-memory inode
1192 * acquiring transaction locks for AFTER records 1196 * acquiring transaction locks for AFTER records
@@ -1262,7 +1266,7 @@ int txCommit(tid_t tid, /* transaction identifier */
1262 } 1266 }
1263 1267
1264 /* 1268 /*
1265 * write log records from transaction locks 1269 * write log records from transaction locks
1266 * 1270 *
1267 * txUpdateMap() resets XAD_NEW in XAD. 1271 * txUpdateMap() resets XAD_NEW in XAD.
1268 */ 1272 */
@@ -1294,7 +1298,7 @@ int txCommit(tid_t tid, /* transaction identifier */
1294 !test_cflag(COMMIT_Nolink, tblk->u.ip))); 1298 !test_cflag(COMMIT_Nolink, tblk->u.ip)));
1295 1299
1296 /* 1300 /*
1297 * write COMMIT log record 1301 * write COMMIT log record
1298 */ 1302 */
1299 lrd->type = cpu_to_le16(LOG_COMMIT); 1303 lrd->type = cpu_to_le16(LOG_COMMIT);
1300 lrd->length = 0; 1304 lrd->length = 0;
@@ -1303,7 +1307,7 @@ int txCommit(tid_t tid, /* transaction identifier */
1303 lmGroupCommit(log, tblk); 1307 lmGroupCommit(log, tblk);
1304 1308
1305 /* 1309 /*
1306 * - transaction is now committed - 1310 * - transaction is now committed -
1307 */ 1311 */
1308 1312
1309 /* 1313 /*
@@ -1314,11 +1318,11 @@ int txCommit(tid_t tid, /* transaction identifier */
1314 txForce(tblk); 1318 txForce(tblk);
1315 1319
1316 /* 1320 /*
1317 * update allocation map. 1321 * update allocation map.
1318 * 1322 *
1319 * update inode allocation map and inode: 1323 * update inode allocation map and inode:
1320 * free pager lock on memory object of inode if any. 1324 * free pager lock on memory object of inode if any.
1321 * update block allocation map. 1325 * update block allocation map.
1322 * 1326 *
1323 * txUpdateMap() resets XAD_NEW in XAD. 1327 * txUpdateMap() resets XAD_NEW in XAD.
1324 */ 1328 */
@@ -1326,7 +1330,7 @@ int txCommit(tid_t tid, /* transaction identifier */
1326 txUpdateMap(tblk); 1330 txUpdateMap(tblk);
1327 1331
1328 /* 1332 /*
1329 * free transaction locks and pageout/free pages 1333 * free transaction locks and pageout/free pages
1330 */ 1334 */
1331 txRelease(tblk); 1335 txRelease(tblk);
1332 1336
@@ -1335,7 +1339,7 @@ int txCommit(tid_t tid, /* transaction identifier */
1335 1339
1336 1340
1337 /* 1341 /*
1338 * reset in-memory object state 1342 * reset in-memory object state
1339 */ 1343 */
1340 for (k = 0; k < cd.nip; k++) { 1344 for (k = 0; k < cd.nip; k++) {
1341 ip = cd.iplist[k]; 1345 ip = cd.iplist[k];
@@ -1358,11 +1362,11 @@ int txCommit(tid_t tid, /* transaction identifier */
1358} 1362}
1359 1363
1360/* 1364/*
1361 * NAME: txLog() 1365 * NAME: txLog()
1362 * 1366 *
1363 * FUNCTION: Writes AFTER log records for all lines modified 1367 * FUNCTION: Writes AFTER log records for all lines modified
1364 * by tid for segments specified by inodes in comdata. 1368 * by tid for segments specified by inodes in comdata.
1365 * Code assumes only WRITELOCKS are recorded in lockwords. 1369 * Code assumes only WRITELOCKS are recorded in lockwords.
1366 * 1370 *
1367 * PARAMETERS: 1371 * PARAMETERS:
1368 * 1372 *
@@ -1421,12 +1425,12 @@ static int txLog(struct jfs_log * log, struct tblock * tblk, struct commit * cd)
1421} 1425}
1422 1426
1423/* 1427/*
1424 * diLog() 1428 * diLog()
1425 * 1429 *
1426 * function: log inode tlock and format maplock to update bmap; 1430 * function: log inode tlock and format maplock to update bmap;
1427 */ 1431 */
1428static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, 1432static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1429 struct tlock * tlck, struct commit * cd) 1433 struct tlock * tlck, struct commit * cd)
1430{ 1434{
1431 int rc = 0; 1435 int rc = 0;
1432 struct metapage *mp; 1436 struct metapage *mp;
@@ -1442,7 +1446,7 @@ static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1442 pxd = &lrd->log.redopage.pxd; 1446 pxd = &lrd->log.redopage.pxd;
1443 1447
1444 /* 1448 /*
1445 * inode after image 1449 * inode after image
1446 */ 1450 */
1447 if (tlck->type & tlckENTRY) { 1451 if (tlck->type & tlckENTRY) {
1448 /* log after-image for logredo(): */ 1452 /* log after-image for logredo(): */
@@ -1456,7 +1460,7 @@ static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1456 tlck->flag |= tlckWRITEPAGE; 1460 tlck->flag |= tlckWRITEPAGE;
1457 } else if (tlck->type & tlckFREE) { 1461 } else if (tlck->type & tlckFREE) {
1458 /* 1462 /*
1459 * free inode extent 1463 * free inode extent
1460 * 1464 *
1461 * (pages of the freed inode extent have been invalidated and 1465 * (pages of the freed inode extent have been invalidated and
1462 * a maplock for free of the extent has been formatted at 1466 * a maplock for free of the extent has been formatted at
@@ -1498,7 +1502,7 @@ static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1498 jfs_err("diLog: UFO type tlck:0x%p", tlck); 1502 jfs_err("diLog: UFO type tlck:0x%p", tlck);
1499#ifdef _JFS_WIP 1503#ifdef _JFS_WIP
1500 /* 1504 /*
1501 * alloc/free external EA extent 1505 * alloc/free external EA extent
1502 * 1506 *
1503 * a maplock for txUpdateMap() to update bPWMAP for alloc/free 1507 * a maplock for txUpdateMap() to update bPWMAP for alloc/free
1504 * of the extent has been formatted at txLock() time; 1508 * of the extent has been formatted at txLock() time;
@@ -1534,9 +1538,9 @@ static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1534} 1538}
1535 1539
1536/* 1540/*
1537 * dataLog() 1541 * dataLog()
1538 * 1542 *
1539 * function: log data tlock 1543 * function: log data tlock
1540 */ 1544 */
1541static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, 1545static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1542 struct tlock * tlck) 1546 struct tlock * tlck)
@@ -1580,9 +1584,9 @@ static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1580} 1584}
1581 1585
1582/* 1586/*
1583 * dtLog() 1587 * dtLog()
1584 * 1588 *
1585 * function: log dtree tlock and format maplock to update bmap; 1589 * function: log dtree tlock and format maplock to update bmap;
1586 */ 1590 */
1587static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, 1591static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1588 struct tlock * tlck) 1592 struct tlock * tlck)
@@ -1603,10 +1607,10 @@ static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1603 lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT); 1607 lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT);
1604 1608
1605 /* 1609 /*
1606 * page extension via relocation: entry insertion; 1610 * page extension via relocation: entry insertion;
1607 * page extension in-place: entry insertion; 1611 * page extension in-place: entry insertion;
1608 * new right page from page split, reinitialized in-line 1612 * new right page from page split, reinitialized in-line
1609 * root from root page split: entry insertion; 1613 * root from root page split: entry insertion;
1610 */ 1614 */
1611 if (tlck->type & (tlckNEW | tlckEXTEND)) { 1615 if (tlck->type & (tlckNEW | tlckEXTEND)) {
1612 /* log after-image of the new page for logredo(): 1616 /* log after-image of the new page for logredo():
@@ -1641,8 +1645,8 @@ static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1641 } 1645 }
1642 1646
1643 /* 1647 /*
1644 * entry insertion/deletion, 1648 * entry insertion/deletion,
1645 * sibling page link update (old right page before split); 1649 * sibling page link update (old right page before split);
1646 */ 1650 */
1647 if (tlck->type & (tlckENTRY | tlckRELINK)) { 1651 if (tlck->type & (tlckENTRY | tlckRELINK)) {
1648 /* log after-image for logredo(): */ 1652 /* log after-image for logredo(): */
@@ -1658,11 +1662,11 @@ static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1658 } 1662 }
1659 1663
1660 /* 1664 /*
1661 * page deletion: page has been invalidated 1665 * page deletion: page has been invalidated
1662 * page relocation: source extent 1666 * page relocation: source extent
1663 * 1667 *
1664 * a maplock for free of the page has been formatted 1668 * a maplock for free of the page has been formatted
1665 * at txLock() time); 1669 * at txLock() time);
1666 */ 1670 */
1667 if (tlck->type & (tlckFREE | tlckRELOCATE)) { 1671 if (tlck->type & (tlckFREE | tlckRELOCATE)) {
1668 /* log LOG_NOREDOPAGE of the deleted page for logredo() 1672 /* log LOG_NOREDOPAGE of the deleted page for logredo()
@@ -1683,9 +1687,9 @@ static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1683} 1687}
1684 1688
1685/* 1689/*
1686 * xtLog() 1690 * xtLog()
1687 * 1691 *
1688 * function: log xtree tlock and format maplock to update bmap; 1692 * function: log xtree tlock and format maplock to update bmap;
1689 */ 1693 */
1690static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, 1694static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1691 struct tlock * tlck) 1695 struct tlock * tlck)
@@ -1725,8 +1729,8 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1725 xadlock = (struct xdlistlock *) maplock; 1729 xadlock = (struct xdlistlock *) maplock;
1726 1730
1727 /* 1731 /*
1728 * entry insertion/extension; 1732 * entry insertion/extension;
1729 * sibling page link update (old right page before split); 1733 * sibling page link update (old right page before split);
1730 */ 1734 */
1731 if (tlck->type & (tlckNEW | tlckGROW | tlckRELINK)) { 1735 if (tlck->type & (tlckNEW | tlckGROW | tlckRELINK)) {
1732 /* log after-image for logredo(): 1736 /* log after-image for logredo():
@@ -1801,7 +1805,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1801 } 1805 }
1802 1806
1803 /* 1807 /*
1804 * page deletion: file deletion/truncation (ref. xtTruncate()) 1808 * page deletion: file deletion/truncation (ref. xtTruncate())
1805 * 1809 *
1806 * (page will be invalidated after log is written and bmap 1810 * (page will be invalidated after log is written and bmap
1807 * is updated from the page); 1811 * is updated from the page);
@@ -1908,13 +1912,13 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1908 } 1912 }
1909 1913
1910 /* 1914 /*
1911 * page/entry truncation: file truncation (ref. xtTruncate()) 1915 * page/entry truncation: file truncation (ref. xtTruncate())
1912 * 1916 *
1913 * |----------+------+------+---------------| 1917 * |----------+------+------+---------------|
1914 * | | | 1918 * | | |
1915 * | | hwm - hwm before truncation 1919 * | | hwm - hwm before truncation
1916 * | next - truncation point 1920 * | next - truncation point
1917 * lwm - lwm before truncation 1921 * lwm - lwm before truncation
1918 * header ? 1922 * header ?
1919 */ 1923 */
1920 if (tlck->type & tlckTRUNCATE) { 1924 if (tlck->type & tlckTRUNCATE) {
@@ -1937,7 +1941,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1937 twm = xtlck->twm.offset; 1941 twm = xtlck->twm.offset;
1938 1942
1939 /* 1943 /*
1940 * write log records 1944 * write log records
1941 */ 1945 */
1942 /* log after-image for logredo(): 1946 /* log after-image for logredo():
1943 * 1947 *
@@ -1997,7 +2001,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1997 } 2001 }
1998 2002
1999 /* 2003 /*
2000 * format maplock(s) for txUpdateMap() to update bmap 2004 * format maplock(s) for txUpdateMap() to update bmap
2001 */ 2005 */
2002 maplock->index = 0; 2006 maplock->index = 0;
2003 2007
@@ -2069,9 +2073,9 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
2069} 2073}
2070 2074
2071/* 2075/*
2072 * mapLog() 2076 * mapLog()
2073 * 2077 *
2074 * function: log from maplock of freed data extents; 2078 * function: log from maplock of freed data extents;
2075 */ 2079 */
2076static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, 2080static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
2077 struct tlock * tlck) 2081 struct tlock * tlck)
@@ -2081,7 +2085,7 @@ static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
2081 pxd_t *pxd; 2085 pxd_t *pxd;
2082 2086
2083 /* 2087 /*
2084 * page relocation: free the source page extent 2088 * page relocation: free the source page extent
2085 * 2089 *
2086 * a maplock for txUpdateMap() for free of the page 2090 * a maplock for txUpdateMap() for free of the page
2087 * has been formatted at txLock() time saving the src 2091 * has been formatted at txLock() time saving the src
@@ -2155,10 +2159,10 @@ static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
2155} 2159}
2156 2160
2157/* 2161/*
2158 * txEA() 2162 * txEA()
2159 * 2163 *
2160 * function: acquire maplock for EA/ACL extents or 2164 * function: acquire maplock for EA/ACL extents or
2161 * set COMMIT_INLINE flag; 2165 * set COMMIT_INLINE flag;
2162 */ 2166 */
2163void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea) 2167void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea)
2164{ 2168{
@@ -2207,10 +2211,10 @@ void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea)
2207} 2211}
2208 2212
2209/* 2213/*
2210 * txForce() 2214 * txForce()
2211 * 2215 *
2212 * function: synchronously write pages locked by transaction 2216 * function: synchronously write pages locked by transaction
2213 * after txLog() but before txUpdateMap(); 2217 * after txLog() but before txUpdateMap();
2214 */ 2218 */
2215static void txForce(struct tblock * tblk) 2219static void txForce(struct tblock * tblk)
2216{ 2220{
@@ -2273,10 +2277,10 @@ static void txForce(struct tblock * tblk)
2273} 2277}
2274 2278
2275/* 2279/*
2276 * txUpdateMap() 2280 * txUpdateMap()
2277 * 2281 *
2278 * function: update persistent allocation map (and working map 2282 * function: update persistent allocation map (and working map
2279 * if appropriate); 2283 * if appropriate);
2280 * 2284 *
2281 * parameter: 2285 * parameter:
2282 */ 2286 */
@@ -2298,7 +2302,7 @@ static void txUpdateMap(struct tblock * tblk)
2298 2302
2299 2303
2300 /* 2304 /*
2301 * update block allocation map 2305 * update block allocation map
2302 * 2306 *
2303 * update allocation state in pmap (and wmap) and 2307 * update allocation state in pmap (and wmap) and
2304 * update lsn of the pmap page; 2308 * update lsn of the pmap page;
@@ -2382,7 +2386,7 @@ static void txUpdateMap(struct tblock * tblk)
2382 } 2386 }
2383 } 2387 }
2384 /* 2388 /*
2385 * update inode allocation map 2389 * update inode allocation map
2386 * 2390 *
2387 * update allocation state in pmap and 2391 * update allocation state in pmap and
2388 * update lsn of the pmap page; 2392 * update lsn of the pmap page;
@@ -2407,24 +2411,24 @@ static void txUpdateMap(struct tblock * tblk)
2407} 2411}
2408 2412
2409/* 2413/*
2410 * txAllocPMap() 2414 * txAllocPMap()
2411 * 2415 *
2412 * function: allocate from persistent map; 2416 * function: allocate from persistent map;
2413 * 2417 *
2414 * parameter: 2418 * parameter:
2415 * ipbmap - 2419 * ipbmap -
2416 * malock - 2420 * malock -
2417 * xad list: 2421 * xad list:
2418 * pxd: 2422 * pxd:
2419 * 2423 *
2420 * maptype - 2424 * maptype -
2421 * allocate from persistent map; 2425 * allocate from persistent map;
2422 * free from persistent map; 2426 * free from persistent map;
2423 * (e.g., tmp file - free from working map at releae 2427 * (e.g., tmp file - free from working map at releae
2424 * of last reference); 2428 * of last reference);
2425 * free from persistent and working map; 2429 * free from persistent and working map;
2426 * 2430 *
2427 * lsn - log sequence number; 2431 * lsn - log sequence number;
2428 */ 2432 */
2429static void txAllocPMap(struct inode *ip, struct maplock * maplock, 2433static void txAllocPMap(struct inode *ip, struct maplock * maplock,
2430 struct tblock * tblk) 2434 struct tblock * tblk)
@@ -2478,9 +2482,9 @@ static void txAllocPMap(struct inode *ip, struct maplock * maplock,
2478} 2482}
2479 2483
2480/* 2484/*
2481 * txFreeMap() 2485 * txFreeMap()
2482 * 2486 *
2483 * function: free from persistent and/or working map; 2487 * function: free from persistent and/or working map;
2484 * 2488 *
2485 * todo: optimization 2489 * todo: optimization
2486 */ 2490 */
@@ -2579,9 +2583,9 @@ void txFreeMap(struct inode *ip,
2579} 2583}
2580 2584
2581/* 2585/*
2582 * txFreelock() 2586 * txFreelock()
2583 * 2587 *
2584 * function: remove tlock from inode anonymous locklist 2588 * function: remove tlock from inode anonymous locklist
2585 */ 2589 */
2586void txFreelock(struct inode *ip) 2590void txFreelock(struct inode *ip)
2587{ 2591{
@@ -2619,7 +2623,7 @@ void txFreelock(struct inode *ip)
2619} 2623}
2620 2624
2621/* 2625/*
2622 * txAbort() 2626 * txAbort()
2623 * 2627 *
2624 * function: abort tx before commit; 2628 * function: abort tx before commit;
2625 * 2629 *
@@ -2679,7 +2683,7 @@ void txAbort(tid_t tid, int dirty)
2679} 2683}
2680 2684
2681/* 2685/*
2682 * txLazyCommit(void) 2686 * txLazyCommit(void)
2683 * 2687 *
2684 * All transactions except those changing ipimap (COMMIT_FORCE) are 2688 * All transactions except those changing ipimap (COMMIT_FORCE) are
2685 * processed by this routine. This insures that the inode and block 2689 * processed by this routine. This insures that the inode and block
@@ -2728,7 +2732,7 @@ static void txLazyCommit(struct tblock * tblk)
2728} 2732}
2729 2733
2730/* 2734/*
2731 * jfs_lazycommit(void) 2735 * jfs_lazycommit(void)
2732 * 2736 *
2733 * To be run as a kernel daemon. If lbmIODone is called in an interrupt 2737 * To be run as a kernel daemon. If lbmIODone is called in an interrupt
2734 * context, or where blocking is not wanted, this routine will process 2738 * context, or where blocking is not wanted, this routine will process
@@ -2913,7 +2917,7 @@ void txResume(struct super_block *sb)
2913} 2917}
2914 2918
2915/* 2919/*
2916 * jfs_sync(void) 2920 * jfs_sync(void)
2917 * 2921 *
2918 * To be run as a kernel daemon. This is awakened when tlocks run low. 2922 * To be run as a kernel daemon. This is awakened when tlocks run low.
2919 * We write any inodes that have anonymous tlocks so they will become 2923 * We write any inodes that have anonymous tlocks so they will become
diff --git a/fs/jfs/jfs_txnmgr.h b/fs/jfs/jfs_txnmgr.h
index 7863cf21afca..ab7288937019 100644
--- a/fs/jfs/jfs_txnmgr.h
+++ b/fs/jfs/jfs_txnmgr.h
@@ -94,7 +94,7 @@ extern struct tblock *TxBlock; /* transaction block table */
94 */ 94 */
95struct tlock { 95struct tlock {
96 lid_t next; /* 2: index next lockword on tid locklist 96 lid_t next; /* 2: index next lockword on tid locklist
97 * next lockword on freelist 97 * next lockword on freelist
98 */ 98 */
99 tid_t tid; /* 2: transaction id holding lock */ 99 tid_t tid; /* 2: transaction id holding lock */
100 100
diff --git a/fs/jfs/jfs_types.h b/fs/jfs/jfs_types.h
index 09b252958687..649f9817accd 100644
--- a/fs/jfs/jfs_types.h
+++ b/fs/jfs/jfs_types.h
@@ -21,7 +21,7 @@
21/* 21/*
22 * jfs_types.h: 22 * jfs_types.h:
23 * 23 *
24 * basic type/utility definitions 24 * basic type/utility definitions
25 * 25 *
26 * note: this header file must be the 1st include file 26 * note: this header file must be the 1st include file
27 * of JFS include list in all JFS .c file. 27 * of JFS include list in all JFS .c file.
@@ -54,8 +54,8 @@ struct timestruc_t {
54 */ 54 */
55 55
56#define LEFTMOSTONE 0x80000000 56#define LEFTMOSTONE 0x80000000
57#define HIGHORDER 0x80000000u /* high order bit on */ 57#define HIGHORDER 0x80000000u /* high order bit on */
58#define ONES 0xffffffffu /* all bit on */ 58#define ONES 0xffffffffu /* all bit on */
59 59
60/* 60/*
61 * logical xd (lxd) 61 * logical xd (lxd)
@@ -148,7 +148,7 @@ typedef struct {
148#define sizeDXD(dxd) le32_to_cpu((dxd)->size) 148#define sizeDXD(dxd) le32_to_cpu((dxd)->size)
149 149
150/* 150/*
151 * directory entry argument 151 * directory entry argument
152 */ 152 */
153struct component_name { 153struct component_name {
154 int namlen; 154 int namlen;
@@ -160,14 +160,14 @@ struct component_name {
160 * DASD limit information - stored in directory inode 160 * DASD limit information - stored in directory inode
161 */ 161 */
162struct dasd { 162struct dasd {
163 u8 thresh; /* Alert Threshold (in percent) */ 163 u8 thresh; /* Alert Threshold (in percent) */
164 u8 delta; /* Alert Threshold delta (in percent) */ 164 u8 delta; /* Alert Threshold delta (in percent) */
165 u8 rsrvd1; 165 u8 rsrvd1;
166 u8 limit_hi; /* DASD limit (in logical blocks) */ 166 u8 limit_hi; /* DASD limit (in logical blocks) */
167 __le32 limit_lo; /* DASD limit (in logical blocks) */ 167 __le32 limit_lo; /* DASD limit (in logical blocks) */
168 u8 rsrvd2[3]; 168 u8 rsrvd2[3];
169 u8 used_hi; /* DASD usage (in logical blocks) */ 169 u8 used_hi; /* DASD usage (in logical blocks) */
170 __le32 used_lo; /* DASD usage (in logical blocks) */ 170 __le32 used_lo; /* DASD usage (in logical blocks) */
171}; 171};
172 172
173#define DASDLIMIT(dasdp) \ 173#define DASDLIMIT(dasdp) \
diff --git a/fs/jfs/jfs_umount.c b/fs/jfs/jfs_umount.c
index a386f48c73fc..7971f37534a3 100644
--- a/fs/jfs/jfs_umount.c
+++ b/fs/jfs/jfs_umount.c
@@ -60,7 +60,7 @@ int jfs_umount(struct super_block *sb)
60 jfs_info("UnMount JFS: sb:0x%p", sb); 60 jfs_info("UnMount JFS: sb:0x%p", sb);
61 61
62 /* 62 /*
63 * update superblock and close log 63 * update superblock and close log
64 * 64 *
65 * if mounted read-write and log based recovery was enabled 65 * if mounted read-write and log based recovery was enabled
66 */ 66 */
diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c
index acc97c46d8a4..1543906a2e0d 100644
--- a/fs/jfs/jfs_xtree.c
+++ b/fs/jfs/jfs_xtree.c
@@ -16,7 +16,7 @@
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */ 17 */
18/* 18/*
19 * jfs_xtree.c: extent allocation descriptor B+-tree manager 19 * jfs_xtree.c: extent allocation descriptor B+-tree manager
20 */ 20 */
21 21
22#include <linux/fs.h> 22#include <linux/fs.h>
@@ -32,30 +32,30 @@
32/* 32/*
33 * xtree local flag 33 * xtree local flag
34 */ 34 */
35#define XT_INSERT 0x00000001 35#define XT_INSERT 0x00000001
36 36
37/* 37/*
38 * xtree key/entry comparison: extent offset 38 * xtree key/entry comparison: extent offset
39 * 39 *
40 * return: 40 * return:
41 * -1: k < start of extent 41 * -1: k < start of extent
42 * 0: start_of_extent <= k <= end_of_extent 42 * 0: start_of_extent <= k <= end_of_extent
43 * 1: k > end_of_extent 43 * 1: k > end_of_extent
44 */ 44 */
45#define XT_CMP(CMP, K, X, OFFSET64)\ 45#define XT_CMP(CMP, K, X, OFFSET64)\
46{\ 46{\
47 OFFSET64 = offsetXAD(X);\ 47 OFFSET64 = offsetXAD(X);\
48 (CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\ 48 (CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\
49 ((K) < OFFSET64) ? -1 : 0;\ 49 ((K) < OFFSET64) ? -1 : 0;\
50} 50}
51 51
52/* write a xad entry */ 52/* write a xad entry */
53#define XT_PUTENTRY(XAD, FLAG, OFF, LEN, ADDR)\ 53#define XT_PUTENTRY(XAD, FLAG, OFF, LEN, ADDR)\
54{\ 54{\
55 (XAD)->flag = (FLAG);\ 55 (XAD)->flag = (FLAG);\
56 XADoffset((XAD), (OFF));\ 56 XADoffset((XAD), (OFF));\
57 XADlength((XAD), (LEN));\ 57 XADlength((XAD), (LEN));\
58 XADaddress((XAD), (ADDR));\ 58 XADaddress((XAD), (ADDR));\
59} 59}
60 60
61#define XT_PAGE(IP, MP) BT_PAGE(IP, MP, xtpage_t, i_xtroot) 61#define XT_PAGE(IP, MP) BT_PAGE(IP, MP, xtpage_t, i_xtroot)
@@ -76,13 +76,13 @@
76 MP = NULL;\ 76 MP = NULL;\
77 RC = -EIO;\ 77 RC = -EIO;\
78 }\ 78 }\
79 }\ 79 }\
80} 80}
81 81
82/* for consistency */ 82/* for consistency */
83#define XT_PUTPAGE(MP) BT_PUTPAGE(MP) 83#define XT_PUTPAGE(MP) BT_PUTPAGE(MP)
84 84
85#define XT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \ 85#define XT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \
86 BT_GETSEARCH(IP, LEAF, BN, MP, xtpage_t, P, INDEX, i_xtroot) 86 BT_GETSEARCH(IP, LEAF, BN, MP, xtpage_t, P, INDEX, i_xtroot)
87/* xtree entry parameter descriptor */ 87/* xtree entry parameter descriptor */
88struct xtsplit { 88struct xtsplit {
@@ -97,7 +97,7 @@ struct xtsplit {
97 97
98 98
99/* 99/*
100 * statistics 100 * statistics
101 */ 101 */
102#ifdef CONFIG_JFS_STATISTICS 102#ifdef CONFIG_JFS_STATISTICS
103static struct { 103static struct {
@@ -136,7 +136,7 @@ static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * fp);
136#endif /* _STILL_TO_PORT */ 136#endif /* _STILL_TO_PORT */
137 137
138/* 138/*
139 * xtLookup() 139 * xtLookup()
140 * 140 *
141 * function: map a single page into a physical extent; 141 * function: map a single page into a physical extent;
142 */ 142 */
@@ -179,7 +179,7 @@ int xtLookup(struct inode *ip, s64 lstart,
179 } 179 }
180 180
181 /* 181 /*
182 * compute the physical extent covering logical extent 182 * compute the physical extent covering logical extent
183 * 183 *
184 * N.B. search may have failed (e.g., hole in sparse file), 184 * N.B. search may have failed (e.g., hole in sparse file),
185 * and returned the index of the next entry. 185 * and returned the index of the next entry.
@@ -220,27 +220,27 @@ int xtLookup(struct inode *ip, s64 lstart,
220 220
221 221
222/* 222/*
223 * xtLookupList() 223 * xtLookupList()
224 * 224 *
225 * function: map a single logical extent into a list of physical extent; 225 * function: map a single logical extent into a list of physical extent;
226 * 226 *
227 * parameter: 227 * parameter:
228 * struct inode *ip, 228 * struct inode *ip,
229 * struct lxdlist *lxdlist, lxd list (in) 229 * struct lxdlist *lxdlist, lxd list (in)
230 * struct xadlist *xadlist, xad list (in/out) 230 * struct xadlist *xadlist, xad list (in/out)
231 * int flag) 231 * int flag)
232 * 232 *
233 * coverage of lxd by xad under assumption of 233 * coverage of lxd by xad under assumption of
234 * . lxd's are ordered and disjoint. 234 * . lxd's are ordered and disjoint.
235 * . xad's are ordered and disjoint. 235 * . xad's are ordered and disjoint.
236 * 236 *
237 * return: 237 * return:
238 * 0: success 238 * 0: success
239 * 239 *
240 * note: a page being written (even a single byte) is backed fully, 240 * note: a page being written (even a single byte) is backed fully,
241 * except the last page which is only backed with blocks 241 * except the last page which is only backed with blocks
242 * required to cover the last byte; 242 * required to cover the last byte;
243 * the extent backing a page is fully contained within an xad; 243 * the extent backing a page is fully contained within an xad;
244 */ 244 */
245int xtLookupList(struct inode *ip, struct lxdlist * lxdlist, 245int xtLookupList(struct inode *ip, struct lxdlist * lxdlist,
246 struct xadlist * xadlist, int flag) 246 struct xadlist * xadlist, int flag)
@@ -284,7 +284,7 @@ int xtLookupList(struct inode *ip, struct lxdlist * lxdlist,
284 return rc; 284 return rc;
285 285
286 /* 286 /*
287 * compute the physical extent covering logical extent 287 * compute the physical extent covering logical extent
288 * 288 *
289 * N.B. search may have failed (e.g., hole in sparse file), 289 * N.B. search may have failed (e.g., hole in sparse file),
290 * and returned the index of the next entry. 290 * and returned the index of the next entry.
@@ -343,7 +343,7 @@ int xtLookupList(struct inode *ip, struct lxdlist * lxdlist,
343 if (lstart >= size) 343 if (lstart >= size)
344 goto mapend; 344 goto mapend;
345 345
346 /* compare with the current xad */ 346 /* compare with the current xad */
347 goto compare1; 347 goto compare1;
348 } 348 }
349 /* lxd is covered by xad */ 349 /* lxd is covered by xad */
@@ -430,7 +430,7 @@ int xtLookupList(struct inode *ip, struct lxdlist * lxdlist,
430 /* 430 /*
431 * lxd is partially covered by xad 431 * lxd is partially covered by xad
432 */ 432 */
433 else { /* (xend < lend) */ 433 else { /* (xend < lend) */
434 434
435 /* 435 /*
436 * get next xad 436 * get next xad
@@ -477,22 +477,22 @@ int xtLookupList(struct inode *ip, struct lxdlist * lxdlist,
477 477
478 478
479/* 479/*
480 * xtSearch() 480 * xtSearch()
481 * 481 *
482 * function: search for the xad entry covering specified offset. 482 * function: search for the xad entry covering specified offset.
483 * 483 *
484 * parameters: 484 * parameters:
485 * ip - file object; 485 * ip - file object;
486 * xoff - extent offset; 486 * xoff - extent offset;
487 * nextp - address of next extent (if any) for search miss 487 * nextp - address of next extent (if any) for search miss
488 * cmpp - comparison result: 488 * cmpp - comparison result:
489 * btstack - traverse stack; 489 * btstack - traverse stack;
490 * flag - search process flag (XT_INSERT); 490 * flag - search process flag (XT_INSERT);
491 * 491 *
492 * returns: 492 * returns:
493 * btstack contains (bn, index) of search path traversed to the entry. 493 * btstack contains (bn, index) of search path traversed to the entry.
494 * *cmpp is set to result of comparison with the entry returned. 494 * *cmpp is set to result of comparison with the entry returned.
495 * the page containing the entry is pinned at exit. 495 * the page containing the entry is pinned at exit.
496 */ 496 */
497static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp, 497static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp,
498 int *cmpp, struct btstack * btstack, int flag) 498 int *cmpp, struct btstack * btstack, int flag)
@@ -517,7 +517,7 @@ static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp,
517 btstack->nsplit = 0; 517 btstack->nsplit = 0;
518 518
519 /* 519 /*
520 * search down tree from root: 520 * search down tree from root:
521 * 521 *
522 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of 522 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
523 * internal page, child page Pi contains entry with k, Ki <= K < Kj. 523 * internal page, child page Pi contains entry with k, Ki <= K < Kj.
@@ -642,7 +642,7 @@ static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp,
642 XT_CMP(cmp, xoff, &p->xad[index], t64); 642 XT_CMP(cmp, xoff, &p->xad[index], t64);
643 if (cmp == 0) { 643 if (cmp == 0) {
644 /* 644 /*
645 * search hit 645 * search hit
646 */ 646 */
647 /* search hit - leaf page: 647 /* search hit - leaf page:
648 * return the entry found 648 * return the entry found
@@ -692,7 +692,7 @@ static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp,
692 } 692 }
693 693
694 /* 694 /*
695 * search miss 695 * search miss
696 * 696 *
697 * base is the smallest index with key (Kj) greater than 697 * base is the smallest index with key (Kj) greater than
698 * search key (K) and may be zero or maxentry index. 698 * search key (K) and may be zero or maxentry index.
@@ -773,22 +773,22 @@ static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp,
773} 773}
774 774
775/* 775/*
776 * xtInsert() 776 * xtInsert()
777 * 777 *
778 * function: 778 * function:
779 * 779 *
780 * parameter: 780 * parameter:
781 * tid - transaction id; 781 * tid - transaction id;
782 * ip - file object; 782 * ip - file object;
783 * xflag - extent flag (XAD_NOTRECORDED): 783 * xflag - extent flag (XAD_NOTRECORDED):
784 * xoff - extent offset; 784 * xoff - extent offset;
785 * xlen - extent length; 785 * xlen - extent length;
786 * xaddrp - extent address pointer (in/out): 786 * xaddrp - extent address pointer (in/out):
787 * if (*xaddrp) 787 * if (*xaddrp)
788 * caller allocated data extent at *xaddrp; 788 * caller allocated data extent at *xaddrp;
789 * else 789 * else
790 * allocate data extent and return its xaddr; 790 * allocate data extent and return its xaddr;
791 * flag - 791 * flag -
792 * 792 *
793 * return: 793 * return:
794 */ 794 */
@@ -813,7 +813,7 @@ int xtInsert(tid_t tid, /* transaction id */
813 jfs_info("xtInsert: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen); 813 jfs_info("xtInsert: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen);
814 814
815 /* 815 /*
816 * search for the entry location at which to insert: 816 * search for the entry location at which to insert:
817 * 817 *
818 * xtFastSearch() and xtSearch() both returns (leaf page 818 * xtFastSearch() and xtSearch() both returns (leaf page
819 * pinned, index at which to insert). 819 * pinned, index at which to insert).
@@ -853,13 +853,13 @@ int xtInsert(tid_t tid, /* transaction id */
853 } 853 }
854 854
855 /* 855 /*
856 * insert entry for new extent 856 * insert entry for new extent
857 */ 857 */
858 xflag |= XAD_NEW; 858 xflag |= XAD_NEW;
859 859
860 /* 860 /*
861 * if the leaf page is full, split the page and 861 * if the leaf page is full, split the page and
862 * propagate up the router entry for the new page from split 862 * propagate up the router entry for the new page from split
863 * 863 *
864 * The xtSplitUp() will insert the entry and unpin the leaf page. 864 * The xtSplitUp() will insert the entry and unpin the leaf page.
865 */ 865 */
@@ -886,7 +886,7 @@ int xtInsert(tid_t tid, /* transaction id */
886 } 886 }
887 887
888 /* 888 /*
889 * insert the new entry into the leaf page 889 * insert the new entry into the leaf page
890 */ 890 */
891 /* 891 /*
892 * acquire a transaction lock on the leaf page; 892 * acquire a transaction lock on the leaf page;
@@ -930,16 +930,16 @@ int xtInsert(tid_t tid, /* transaction id */
930 930
931 931
932/* 932/*
933 * xtSplitUp() 933 * xtSplitUp()
934 * 934 *
935 * function: 935 * function:
936 * split full pages as propagating insertion up the tree 936 * split full pages as propagating insertion up the tree
937 * 937 *
938 * parameter: 938 * parameter:
939 * tid - transaction id; 939 * tid - transaction id;
940 * ip - file object; 940 * ip - file object;
941 * split - entry parameter descriptor; 941 * split - entry parameter descriptor;
942 * btstack - traverse stack from xtSearch() 942 * btstack - traverse stack from xtSearch()
943 * 943 *
944 * return: 944 * return:
945 */ 945 */
@@ -1199,22 +1199,22 @@ xtSplitUp(tid_t tid,
1199 1199
1200 1200
1201/* 1201/*
1202 * xtSplitPage() 1202 * xtSplitPage()
1203 * 1203 *
1204 * function: 1204 * function:
1205 * split a full non-root page into 1205 * split a full non-root page into
1206 * original/split/left page and new right page 1206 * original/split/left page and new right page
1207 * i.e., the original/split page remains as left page. 1207 * i.e., the original/split page remains as left page.
1208 * 1208 *
1209 * parameter: 1209 * parameter:
1210 * int tid, 1210 * int tid,
1211 * struct inode *ip, 1211 * struct inode *ip,
1212 * struct xtsplit *split, 1212 * struct xtsplit *split,
1213 * struct metapage **rmpp, 1213 * struct metapage **rmpp,
1214 * u64 *rbnp, 1214 * u64 *rbnp,
1215 * 1215 *
1216 * return: 1216 * return:
1217 * Pointer to page in which to insert or NULL on error. 1217 * Pointer to page in which to insert or NULL on error.
1218 */ 1218 */
1219static int 1219static int
1220xtSplitPage(tid_t tid, struct inode *ip, 1220xtSplitPage(tid_t tid, struct inode *ip,
@@ -1248,9 +1248,9 @@ xtSplitPage(tid_t tid, struct inode *ip,
1248 rbn = addressPXD(pxd); 1248 rbn = addressPXD(pxd);
1249 1249
1250 /* Allocate blocks to quota. */ 1250 /* Allocate blocks to quota. */
1251 if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) { 1251 if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) {
1252 rc = -EDQUOT; 1252 rc = -EDQUOT;
1253 goto clean_up; 1253 goto clean_up;
1254 } 1254 }
1255 1255
1256 quota_allocation += lengthPXD(pxd); 1256 quota_allocation += lengthPXD(pxd);
@@ -1304,7 +1304,7 @@ xtSplitPage(tid_t tid, struct inode *ip,
1304 skip = split->index; 1304 skip = split->index;
1305 1305
1306 /* 1306 /*
1307 * sequential append at tail (after last entry of last page) 1307 * sequential append at tail (after last entry of last page)
1308 * 1308 *
1309 * if splitting the last page on a level because of appending 1309 * if splitting the last page on a level because of appending
1310 * a entry to it (skip is maxentry), it's likely that the access is 1310 * a entry to it (skip is maxentry), it's likely that the access is
@@ -1342,7 +1342,7 @@ xtSplitPage(tid_t tid, struct inode *ip,
1342 } 1342 }
1343 1343
1344 /* 1344 /*
1345 * non-sequential insert (at possibly middle page) 1345 * non-sequential insert (at possibly middle page)
1346 */ 1346 */
1347 1347
1348 /* 1348 /*
@@ -1465,25 +1465,24 @@ xtSplitPage(tid_t tid, struct inode *ip,
1465 1465
1466 1466
1467/* 1467/*
1468 * xtSplitRoot() 1468 * xtSplitRoot()
1469 * 1469 *
1470 * function: 1470 * function:
1471 * split the full root page into 1471 * split the full root page into original/root/split page and new
1472 * original/root/split page and new right page 1472 * right page
1473 * i.e., root remains fixed in tree anchor (inode) and 1473 * i.e., root remains fixed in tree anchor (inode) and the root is
1474 * the root is copied to a single new right child page 1474 * copied to a single new right child page since root page <<
1475 * since root page << non-root page, and 1475 * non-root page, and the split root page contains a single entry
1476 * the split root page contains a single entry for the 1476 * for the new right child page.
1477 * new right child page.
1478 * 1477 *
1479 * parameter: 1478 * parameter:
1480 * int tid, 1479 * int tid,
1481 * struct inode *ip, 1480 * struct inode *ip,
1482 * struct xtsplit *split, 1481 * struct xtsplit *split,
1483 * struct metapage **rmpp) 1482 * struct metapage **rmpp)
1484 * 1483 *
1485 * return: 1484 * return:
1486 * Pointer to page in which to insert or NULL on error. 1485 * Pointer to page in which to insert or NULL on error.
1487 */ 1486 */
1488static int 1487static int
1489xtSplitRoot(tid_t tid, 1488xtSplitRoot(tid_t tid,
@@ -1505,7 +1504,7 @@ xtSplitRoot(tid_t tid,
1505 INCREMENT(xtStat.split); 1504 INCREMENT(xtStat.split);
1506 1505
1507 /* 1506 /*
1508 * allocate a single (right) child page 1507 * allocate a single (right) child page
1509 */ 1508 */
1510 pxdlist = split->pxdlist; 1509 pxdlist = split->pxdlist;
1511 pxd = &pxdlist->pxd[pxdlist->npxd]; 1510 pxd = &pxdlist->pxd[pxdlist->npxd];
@@ -1573,7 +1572,7 @@ xtSplitRoot(tid_t tid,
1573 } 1572 }
1574 1573
1575 /* 1574 /*
1576 * reset the root 1575 * reset the root
1577 * 1576 *
1578 * init root with the single entry for the new right page 1577 * init root with the single entry for the new right page
1579 * set the 1st entry offset to 0, which force the left-most key 1578 * set the 1st entry offset to 0, which force the left-most key
@@ -1610,7 +1609,7 @@ xtSplitRoot(tid_t tid,
1610 1609
1611 1610
1612/* 1611/*
1613 * xtExtend() 1612 * xtExtend()
1614 * 1613 *
1615 * function: extend in-place; 1614 * function: extend in-place;
1616 * 1615 *
@@ -1677,7 +1676,7 @@ int xtExtend(tid_t tid, /* transaction id */
1677 goto extendOld; 1676 goto extendOld;
1678 1677
1679 /* 1678 /*
1680 * extent overflow: insert entry for new extent 1679 * extent overflow: insert entry for new extent
1681 */ 1680 */
1682//insertNew: 1681//insertNew:
1683 xoff = offsetXAD(xad) + MAXXLEN; 1682 xoff = offsetXAD(xad) + MAXXLEN;
@@ -1685,8 +1684,8 @@ int xtExtend(tid_t tid, /* transaction id */
1685 nextindex = le16_to_cpu(p->header.nextindex); 1684 nextindex = le16_to_cpu(p->header.nextindex);
1686 1685
1687 /* 1686 /*
1688 * if the leaf page is full, insert the new entry and 1687 * if the leaf page is full, insert the new entry and
1689 * propagate up the router entry for the new page from split 1688 * propagate up the router entry for the new page from split
1690 * 1689 *
1691 * The xtSplitUp() will insert the entry and unpin the leaf page. 1690 * The xtSplitUp() will insert the entry and unpin the leaf page.
1692 */ 1691 */
@@ -1731,7 +1730,7 @@ int xtExtend(tid_t tid, /* transaction id */
1731 } 1730 }
1732 } 1731 }
1733 /* 1732 /*
1734 * insert the new entry into the leaf page 1733 * insert the new entry into the leaf page
1735 */ 1734 */
1736 else { 1735 else {
1737 /* insert the new entry: mark the entry NEW */ 1736 /* insert the new entry: mark the entry NEW */
@@ -1771,11 +1770,11 @@ int xtExtend(tid_t tid, /* transaction id */
1771 1770
1772#ifdef _NOTYET 1771#ifdef _NOTYET
1773/* 1772/*
1774 * xtTailgate() 1773 * xtTailgate()
1775 * 1774 *
1776 * function: split existing 'tail' extent 1775 * function: split existing 'tail' extent
1777 * (split offset >= start offset of tail extent), and 1776 * (split offset >= start offset of tail extent), and
1778 * relocate and extend the split tail half; 1777 * relocate and extend the split tail half;
1779 * 1778 *
1780 * note: existing extent may or may not have been committed. 1779 * note: existing extent may or may not have been committed.
1781 * caller is responsible for pager buffer cache update, and 1780 * caller is responsible for pager buffer cache update, and
@@ -1804,7 +1803,7 @@ int xtTailgate(tid_t tid, /* transaction id */
1804 1803
1805/* 1804/*
1806printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n", 1805printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n",
1807 (ulong)xoff, xlen, (ulong)xaddr); 1806 (ulong)xoff, xlen, (ulong)xaddr);
1808*/ 1807*/
1809 1808
1810 /* there must exist extent to be tailgated */ 1809 /* there must exist extent to be tailgated */
@@ -1842,18 +1841,18 @@ printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n",
1842 xad = &p->xad[index]; 1841 xad = &p->xad[index];
1843/* 1842/*
1844printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n", 1843printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n",
1845 (ulong)offsetXAD(xad), lengthXAD(xad), (ulong)addressXAD(xad)); 1844 (ulong)offsetXAD(xad), lengthXAD(xad), (ulong)addressXAD(xad));
1846*/ 1845*/
1847 if ((llen = xoff - offsetXAD(xad)) == 0) 1846 if ((llen = xoff - offsetXAD(xad)) == 0)
1848 goto updateOld; 1847 goto updateOld;
1849 1848
1850 /* 1849 /*
1851 * partially replace extent: insert entry for new extent 1850 * partially replace extent: insert entry for new extent
1852 */ 1851 */
1853//insertNew: 1852//insertNew:
1854 /* 1853 /*
1855 * if the leaf page is full, insert the new entry and 1854 * if the leaf page is full, insert the new entry and
1856 * propagate up the router entry for the new page from split 1855 * propagate up the router entry for the new page from split
1857 * 1856 *
1858 * The xtSplitUp() will insert the entry and unpin the leaf page. 1857 * The xtSplitUp() will insert the entry and unpin the leaf page.
1859 */ 1858 */
@@ -1898,7 +1897,7 @@ printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n",
1898 } 1897 }
1899 } 1898 }
1900 /* 1899 /*
1901 * insert the new entry into the leaf page 1900 * insert the new entry into the leaf page
1902 */ 1901 */
1903 else { 1902 else {
1904 /* insert the new entry: mark the entry NEW */ 1903 /* insert the new entry: mark the entry NEW */
@@ -1955,17 +1954,17 @@ printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n",
1955#endif /* _NOTYET */ 1954#endif /* _NOTYET */
1956 1955
1957/* 1956/*
1958 * xtUpdate() 1957 * xtUpdate()
1959 * 1958 *
1960 * function: update XAD; 1959 * function: update XAD;
1961 * 1960 *
1962 * update extent for allocated_but_not_recorded or 1961 * update extent for allocated_but_not_recorded or
1963 * compressed extent; 1962 * compressed extent;
1964 * 1963 *
1965 * parameter: 1964 * parameter:
1966 * nxad - new XAD; 1965 * nxad - new XAD;
1967 * logical extent of the specified XAD must be completely 1966 * logical extent of the specified XAD must be completely
1968 * contained by an existing XAD; 1967 * contained by an existing XAD;
1969 */ 1968 */
1970int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad) 1969int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad)
1971{ /* new XAD */ 1970{ /* new XAD */
@@ -2416,19 +2415,19 @@ printf("xtUpdate.updateLeft.split p:0x%p\n", p);
2416 2415
2417 2416
2418/* 2417/*
2419 * xtAppend() 2418 * xtAppend()
2420 * 2419 *
2421 * function: grow in append mode from contiguous region specified ; 2420 * function: grow in append mode from contiguous region specified ;
2422 * 2421 *
2423 * parameter: 2422 * parameter:
2424 * tid - transaction id; 2423 * tid - transaction id;
2425 * ip - file object; 2424 * ip - file object;
2426 * xflag - extent flag: 2425 * xflag - extent flag:
2427 * xoff - extent offset; 2426 * xoff - extent offset;
2428 * maxblocks - max extent length; 2427 * maxblocks - max extent length;
2429 * xlen - extent length (in/out); 2428 * xlen - extent length (in/out);
2430 * xaddrp - extent address pointer (in/out): 2429 * xaddrp - extent address pointer (in/out):
2431 * flag - 2430 * flag -
2432 * 2431 *
2433 * return: 2432 * return:
2434 */ 2433 */
@@ -2460,7 +2459,7 @@ int xtAppend(tid_t tid, /* transaction id */
2460 (ulong) xoff, maxblocks, xlen, (ulong) xaddr); 2459 (ulong) xoff, maxblocks, xlen, (ulong) xaddr);
2461 2460
2462 /* 2461 /*
2463 * search for the entry location at which to insert: 2462 * search for the entry location at which to insert:
2464 * 2463 *
2465 * xtFastSearch() and xtSearch() both returns (leaf page 2464 * xtFastSearch() and xtSearch() both returns (leaf page
2466 * pinned, index at which to insert). 2465 * pinned, index at which to insert).
@@ -2482,13 +2481,13 @@ int xtAppend(tid_t tid, /* transaction id */
2482 xlen = min(xlen, (int)(next - xoff)); 2481 xlen = min(xlen, (int)(next - xoff));
2483//insert: 2482//insert:
2484 /* 2483 /*
2485 * insert entry for new extent 2484 * insert entry for new extent
2486 */ 2485 */
2487 xflag |= XAD_NEW; 2486 xflag |= XAD_NEW;
2488 2487
2489 /* 2488 /*
2490 * if the leaf page is full, split the page and 2489 * if the leaf page is full, split the page and
2491 * propagate up the router entry for the new page from split 2490 * propagate up the router entry for the new page from split
2492 * 2491 *
2493 * The xtSplitUp() will insert the entry and unpin the leaf page. 2492 * The xtSplitUp() will insert the entry and unpin the leaf page.
2494 */ 2493 */
@@ -2545,7 +2544,7 @@ int xtAppend(tid_t tid, /* transaction id */
2545 return 0; 2544 return 0;
2546 2545
2547 /* 2546 /*
2548 * insert the new entry into the leaf page 2547 * insert the new entry into the leaf page
2549 */ 2548 */
2550 insertLeaf: 2549 insertLeaf:
2551 /* 2550 /*
@@ -2589,17 +2588,17 @@ int xtAppend(tid_t tid, /* transaction id */
2589 2588
2590/* - TBD for defragmentaion/reorganization - 2589/* - TBD for defragmentaion/reorganization -
2591 * 2590 *
2592 * xtDelete() 2591 * xtDelete()
2593 * 2592 *
2594 * function: 2593 * function:
2595 * delete the entry with the specified key. 2594 * delete the entry with the specified key.
2596 * 2595 *
2597 * N.B.: whole extent of the entry is assumed to be deleted. 2596 * N.B.: whole extent of the entry is assumed to be deleted.
2598 * 2597 *
2599 * parameter: 2598 * parameter:
2600 * 2599 *
2601 * return: 2600 * return:
2602 * ENOENT: if the entry is not found. 2601 * ENOENT: if the entry is not found.
2603 * 2602 *
2604 * exception: 2603 * exception:
2605 */ 2604 */
@@ -2665,10 +2664,10 @@ int xtDelete(tid_t tid, struct inode *ip, s64 xoff, s32 xlen, int flag)
2665 2664
2666/* - TBD for defragmentaion/reorganization - 2665/* - TBD for defragmentaion/reorganization -
2667 * 2666 *
2668 * xtDeleteUp() 2667 * xtDeleteUp()
2669 * 2668 *
2670 * function: 2669 * function:
2671 * free empty pages as propagating deletion up the tree 2670 * free empty pages as propagating deletion up the tree
2672 * 2671 *
2673 * parameter: 2672 * parameter:
2674 * 2673 *
@@ -2815,15 +2814,15 @@ xtDeleteUp(tid_t tid, struct inode *ip,
2815 2814
2816 2815
2817/* 2816/*
2818 * NAME: xtRelocate() 2817 * NAME: xtRelocate()
2819 * 2818 *
2820 * FUNCTION: relocate xtpage or data extent of regular file; 2819 * FUNCTION: relocate xtpage or data extent of regular file;
2821 * This function is mainly used by defragfs utility. 2820 * This function is mainly used by defragfs utility.
2822 * 2821 *
2823 * NOTE: This routine does not have the logic to handle 2822 * NOTE: This routine does not have the logic to handle
2824 * uncommitted allocated extent. The caller should call 2823 * uncommitted allocated extent. The caller should call
2825 * txCommit() to commit all the allocation before call 2824 * txCommit() to commit all the allocation before call
2826 * this routine. 2825 * this routine.
2827 */ 2826 */
2828int 2827int
2829xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ 2828xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */
@@ -2865,8 +2864,8 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */
2865 xtype, (ulong) xoff, xlen, (ulong) oxaddr, (ulong) nxaddr); 2864 xtype, (ulong) xoff, xlen, (ulong) oxaddr, (ulong) nxaddr);
2866 2865
2867 /* 2866 /*
2868 * 1. get and validate the parent xtpage/xad entry 2867 * 1. get and validate the parent xtpage/xad entry
2869 * covering the source extent to be relocated; 2868 * covering the source extent to be relocated;
2870 */ 2869 */
2871 if (xtype == DATAEXT) { 2870 if (xtype == DATAEXT) {
2872 /* search in leaf entry */ 2871 /* search in leaf entry */
@@ -2910,7 +2909,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */
2910 jfs_info("xtRelocate: parent xad entry validated."); 2909 jfs_info("xtRelocate: parent xad entry validated.");
2911 2910
2912 /* 2911 /*
2913 * 2. relocate the extent 2912 * 2. relocate the extent
2914 */ 2913 */
2915 if (xtype == DATAEXT) { 2914 if (xtype == DATAEXT) {
2916 /* if the extent is allocated-but-not-recorded 2915 /* if the extent is allocated-but-not-recorded
@@ -2923,7 +2922,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */
2923 XT_PUTPAGE(pmp); 2922 XT_PUTPAGE(pmp);
2924 2923
2925 /* 2924 /*
2926 * cmRelocate() 2925 * cmRelocate()
2927 * 2926 *
2928 * copy target data pages to be relocated; 2927 * copy target data pages to be relocated;
2929 * 2928 *
@@ -2945,8 +2944,8 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */
2945 pno = offset >> CM_L2BSIZE; 2944 pno = offset >> CM_L2BSIZE;
2946 npages = (nbytes + (CM_BSIZE - 1)) >> CM_L2BSIZE; 2945 npages = (nbytes + (CM_BSIZE - 1)) >> CM_L2BSIZE;
2947/* 2946/*
2948 npages = ((offset + nbytes - 1) >> CM_L2BSIZE) - 2947 npages = ((offset + nbytes - 1) >> CM_L2BSIZE) -
2949 (offset >> CM_L2BSIZE) + 1; 2948 (offset >> CM_L2BSIZE) + 1;
2950*/ 2949*/
2951 sxaddr = oxaddr; 2950 sxaddr = oxaddr;
2952 dxaddr = nxaddr; 2951 dxaddr = nxaddr;
@@ -2981,7 +2980,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */
2981 2980
2982 XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); 2981 XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
2983 jfs_info("xtRelocate: target data extent relocated."); 2982 jfs_info("xtRelocate: target data extent relocated.");
2984 } else { /* (xtype == XTPAGE) */ 2983 } else { /* (xtype == XTPAGE) */
2985 2984
2986 /* 2985 /*
2987 * read in the target xtpage from the source extent; 2986 * read in the target xtpage from the source extent;
@@ -3026,16 +3025,14 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */
3026 */ 3025 */
3027 if (lmp) { 3026 if (lmp) {
3028 BT_MARK_DIRTY(lmp, ip); 3027 BT_MARK_DIRTY(lmp, ip);
3029 tlck = 3028 tlck = txLock(tid, ip, lmp, tlckXTREE | tlckRELINK);
3030 txLock(tid, ip, lmp, tlckXTREE | tlckRELINK);
3031 lp->header.next = cpu_to_le64(nxaddr); 3029 lp->header.next = cpu_to_le64(nxaddr);
3032 XT_PUTPAGE(lmp); 3030 XT_PUTPAGE(lmp);
3033 } 3031 }
3034 3032
3035 if (rmp) { 3033 if (rmp) {
3036 BT_MARK_DIRTY(rmp, ip); 3034 BT_MARK_DIRTY(rmp, ip);
3037 tlck = 3035 tlck = txLock(tid, ip, rmp, tlckXTREE | tlckRELINK);
3038 txLock(tid, ip, rmp, tlckXTREE | tlckRELINK);
3039 rp->header.prev = cpu_to_le64(nxaddr); 3036 rp->header.prev = cpu_to_le64(nxaddr);
3040 XT_PUTPAGE(rmp); 3037 XT_PUTPAGE(rmp);
3041 } 3038 }
@@ -3062,7 +3059,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */
3062 * scan may be skipped by commit() and logredo(); 3059 * scan may be skipped by commit() and logredo();
3063 */ 3060 */
3064 BT_MARK_DIRTY(mp, ip); 3061 BT_MARK_DIRTY(mp, ip);
3065 /* tlckNEW init xtlck->lwm.offset = XTENTRYSTART; */ 3062 /* tlckNEW init xtlck->lwm.offset = XTENTRYSTART; */
3066 tlck = txLock(tid, ip, mp, tlckXTREE | tlckNEW); 3063 tlck = txLock(tid, ip, mp, tlckXTREE | tlckNEW);
3067 xtlck = (struct xtlock *) & tlck->lock; 3064 xtlck = (struct xtlock *) & tlck->lock;
3068 3065
@@ -3084,7 +3081,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */
3084 } 3081 }
3085 3082
3086 /* 3083 /*
3087 * 3. acquire maplock for the source extent to be freed; 3084 * 3. acquire maplock for the source extent to be freed;
3088 * 3085 *
3089 * acquire a maplock saving the src relocated extent address; 3086 * acquire a maplock saving the src relocated extent address;
3090 * to free of the extent at commit time; 3087 * to free of the extent at commit time;
@@ -3105,7 +3102,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */
3105 * is no buffer associated with this lock since the buffer 3102 * is no buffer associated with this lock since the buffer
3106 * has been redirected to the target location. 3103 * has been redirected to the target location.
3107 */ 3104 */
3108 else /* (xtype == XTPAGE) */ 3105 else /* (xtype == XTPAGE) */
3109 tlck = txMaplock(tid, ip, tlckMAP | tlckRELOCATE); 3106 tlck = txMaplock(tid, ip, tlckMAP | tlckRELOCATE);
3110 3107
3111 pxdlock = (struct pxd_lock *) & tlck->lock; 3108 pxdlock = (struct pxd_lock *) & tlck->lock;
@@ -3115,7 +3112,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */
3115 pxdlock->index = 1; 3112 pxdlock->index = 1;
3116 3113
3117 /* 3114 /*
3118 * 4. update the parent xad entry for relocation; 3115 * 4. update the parent xad entry for relocation;
3119 * 3116 *
3120 * acquire tlck for the parent entry with XAD_NEW as entry 3117 * acquire tlck for the parent entry with XAD_NEW as entry
3121 * update which will write LOG_REDOPAGE and update bmap for 3118 * update which will write LOG_REDOPAGE and update bmap for
@@ -3143,22 +3140,22 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */
3143 3140
3144 3141
3145/* 3142/*
3146 * xtSearchNode() 3143 * xtSearchNode()
3147 * 3144 *
3148 * function: search for the internal xad entry covering specified extent. 3145 * function: search for the internal xad entry covering specified extent.
3149 * This function is mainly used by defragfs utility. 3146 * This function is mainly used by defragfs utility.
3150 * 3147 *
3151 * parameters: 3148 * parameters:
3152 * ip - file object; 3149 * ip - file object;
3153 * xad - extent to find; 3150 * xad - extent to find;
3154 * cmpp - comparison result: 3151 * cmpp - comparison result:
3155 * btstack - traverse stack; 3152 * btstack - traverse stack;
3156 * flag - search process flag; 3153 * flag - search process flag;
3157 * 3154 *
3158 * returns: 3155 * returns:
3159 * btstack contains (bn, index) of search path traversed to the entry. 3156 * btstack contains (bn, index) of search path traversed to the entry.
3160 * *cmpp is set to result of comparison with the entry returned. 3157 * *cmpp is set to result of comparison with the entry returned.
3161 * the page containing the entry is pinned at exit. 3158 * the page containing the entry is pinned at exit.
3162 */ 3159 */
3163static int xtSearchNode(struct inode *ip, xad_t * xad, /* required XAD entry */ 3160static int xtSearchNode(struct inode *ip, xad_t * xad, /* required XAD entry */
3164 int *cmpp, struct btstack * btstack, int flag) 3161 int *cmpp, struct btstack * btstack, int flag)
@@ -3181,7 +3178,7 @@ static int xtSearchNode(struct inode *ip, xad_t * xad, /* required XAD entry */
3181 xaddr = addressXAD(xad); 3178 xaddr = addressXAD(xad);
3182 3179
3183 /* 3180 /*
3184 * search down tree from root: 3181 * search down tree from root:
3185 * 3182 *
3186 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of 3183 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
3187 * internal page, child page Pi contains entry with k, Ki <= K < Kj. 3184 * internal page, child page Pi contains entry with k, Ki <= K < Kj.
@@ -3217,7 +3214,7 @@ static int xtSearchNode(struct inode *ip, xad_t * xad, /* required XAD entry */
3217 XT_CMP(cmp, xoff, &p->xad[index], t64); 3214 XT_CMP(cmp, xoff, &p->xad[index], t64);
3218 if (cmp == 0) { 3215 if (cmp == 0) {
3219 /* 3216 /*
3220 * search hit 3217 * search hit
3221 * 3218 *
3222 * verify for exact match; 3219 * verify for exact match;
3223 */ 3220 */
@@ -3245,7 +3242,7 @@ static int xtSearchNode(struct inode *ip, xad_t * xad, /* required XAD entry */
3245 } 3242 }
3246 3243
3247 /* 3244 /*
3248 * search miss - non-leaf page: 3245 * search miss - non-leaf page:
3249 * 3246 *
3250 * base is the smallest index with key (Kj) greater than 3247 * base is the smallest index with key (Kj) greater than
3251 * search key (K) and may be zero or maxentry index. 3248 * search key (K) and may be zero or maxentry index.
@@ -3268,15 +3265,15 @@ static int xtSearchNode(struct inode *ip, xad_t * xad, /* required XAD entry */
3268 3265
3269 3266
3270/* 3267/*
3271 * xtRelink() 3268 * xtRelink()
3272 * 3269 *
3273 * function: 3270 * function:
3274 * link around a freed page. 3271 * link around a freed page.
3275 * 3272 *
3276 * Parameter: 3273 * Parameter:
3277 * int tid, 3274 * int tid,
3278 * struct inode *ip, 3275 * struct inode *ip,
3279 * xtpage_t *p) 3276 * xtpage_t *p)
3280 * 3277 *
3281 * returns: 3278 * returns:
3282 */ 3279 */
@@ -3338,7 +3335,7 @@ static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * p)
3338 3335
3339 3336
3340/* 3337/*
3341 * xtInitRoot() 3338 * xtInitRoot()
3342 * 3339 *
3343 * initialize file root (inline in inode) 3340 * initialize file root (inline in inode)
3344 */ 3341 */
@@ -3385,42 +3382,42 @@ void xtInitRoot(tid_t tid, struct inode *ip)
3385#define MAX_TRUNCATE_LEAVES 50 3382#define MAX_TRUNCATE_LEAVES 50
3386 3383
3387/* 3384/*
3388 * xtTruncate() 3385 * xtTruncate()
3389 * 3386 *
3390 * function: 3387 * function:
3391 * traverse for truncation logging backward bottom up; 3388 * traverse for truncation logging backward bottom up;
3392 * terminate at the last extent entry at the current subtree 3389 * terminate at the last extent entry at the current subtree
3393 * root page covering new down size. 3390 * root page covering new down size.
3394 * truncation may occur within the last extent entry. 3391 * truncation may occur within the last extent entry.
3395 * 3392 *
3396 * parameter: 3393 * parameter:
3397 * int tid, 3394 * int tid,
3398 * struct inode *ip, 3395 * struct inode *ip,
3399 * s64 newsize, 3396 * s64 newsize,
3400 * int type) {PWMAP, PMAP, WMAP; DELETE, TRUNCATE} 3397 * int type) {PWMAP, PMAP, WMAP; DELETE, TRUNCATE}
3401 * 3398 *
3402 * return: 3399 * return:
3403 * 3400 *
3404 * note: 3401 * note:
3405 * PWMAP: 3402 * PWMAP:
3406 * 1. truncate (non-COMMIT_NOLINK file) 3403 * 1. truncate (non-COMMIT_NOLINK file)
3407 * by jfs_truncate() or jfs_open(O_TRUNC): 3404 * by jfs_truncate() or jfs_open(O_TRUNC):
3408 * xtree is updated; 3405 * xtree is updated;
3409 * 2. truncate index table of directory when last entry removed 3406 * 2. truncate index table of directory when last entry removed
3410 * map update via tlock at commit time; 3407 * map update via tlock at commit time;
3411 * PMAP: 3408 * PMAP:
3412 * Call xtTruncate_pmap instead 3409 * Call xtTruncate_pmap instead
3413 * WMAP: 3410 * WMAP:
3414 * 1. remove (free zero link count) on last reference release 3411 * 1. remove (free zero link count) on last reference release
3415 * (pmap has been freed at commit zero link count); 3412 * (pmap has been freed at commit zero link count);
3416 * 2. truncate (COMMIT_NOLINK file, i.e., tmp file): 3413 * 2. truncate (COMMIT_NOLINK file, i.e., tmp file):
3417 * xtree is updated; 3414 * xtree is updated;
3418 * map update directly at truncation time; 3415 * map update directly at truncation time;
3419 * 3416 *
3420 * if (DELETE) 3417 * if (DELETE)
3421 * no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient); 3418 * no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient);
3422 * else if (TRUNCATE) 3419 * else if (TRUNCATE)
3423 * must write LOG_NOREDOPAGE for deleted index page; 3420 * must write LOG_NOREDOPAGE for deleted index page;
3424 * 3421 *
3425 * pages may already have been tlocked by anonymous transactions 3422 * pages may already have been tlocked by anonymous transactions
3426 * during file growth (i.e., write) before truncation; 3423 * during file growth (i.e., write) before truncation;
@@ -3493,7 +3490,7 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
3493 * retained in the new sized file. 3490 * retained in the new sized file.
3494 * if type is PMAP, the data and index pages are NOT 3491 * if type is PMAP, the data and index pages are NOT
3495 * freed, and the data and index blocks are NOT freed 3492 * freed, and the data and index blocks are NOT freed
3496 * from working map. 3493 * from working map.
3497 * (this will allow continued access of data/index of 3494 * (this will allow continued access of data/index of
3498 * temporary file (zerolink count file truncated to zero-length)). 3495 * temporary file (zerolink count file truncated to zero-length)).
3499 */ 3496 */
@@ -3542,7 +3539,7 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
3542 goto getChild; 3539 goto getChild;
3543 3540
3544 /* 3541 /*
3545 * leaf page 3542 * leaf page
3546 */ 3543 */
3547 freed = 0; 3544 freed = 0;
3548 3545
@@ -3916,7 +3913,7 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
3916 } 3913 }
3917 3914
3918 /* 3915 /*
3919 * internal page: go down to child page of current entry 3916 * internal page: go down to child page of current entry
3920 */ 3917 */
3921 getChild: 3918 getChild:
3922 /* save current parent entry for the child page */ 3919 /* save current parent entry for the child page */
@@ -3965,7 +3962,7 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
3965 3962
3966 3963
3967/* 3964/*
3968 * xtTruncate_pmap() 3965 * xtTruncate_pmap()
3969 * 3966 *
3970 * function: 3967 * function:
3971 * Perform truncate to zero lenghth for deleted file, leaving the 3968 * Perform truncate to zero lenghth for deleted file, leaving the
@@ -3974,9 +3971,9 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
3974 * is committed to disk. 3971 * is committed to disk.
3975 * 3972 *
3976 * parameter: 3973 * parameter:
3977 * tid_t tid, 3974 * tid_t tid,
3978 * struct inode *ip, 3975 * struct inode *ip,
3979 * s64 committed_size) 3976 * s64 committed_size)
3980 * 3977 *
3981 * return: new committed size 3978 * return: new committed size
3982 * 3979 *
@@ -4050,7 +4047,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
4050 } 4047 }
4051 4048
4052 /* 4049 /*
4053 * leaf page 4050 * leaf page
4054 */ 4051 */
4055 4052
4056 if (++locked_leaves > MAX_TRUNCATE_LEAVES) { 4053 if (++locked_leaves > MAX_TRUNCATE_LEAVES) {
@@ -4062,7 +4059,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
4062 xoff = offsetXAD(xad); 4059 xoff = offsetXAD(xad);
4063 xlen = lengthXAD(xad); 4060 xlen = lengthXAD(xad);
4064 XT_PUTPAGE(mp); 4061 XT_PUTPAGE(mp);
4065 return (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize; 4062 return (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize;
4066 } 4063 }
4067 tlck = txLock(tid, ip, mp, tlckXTREE); 4064 tlck = txLock(tid, ip, mp, tlckXTREE);
4068 tlck->type = tlckXTREE | tlckFREE; 4065 tlck->type = tlckXTREE | tlckFREE;
@@ -4099,8 +4096,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
4099 */ 4096 */
4100 tlck = txLock(tid, ip, mp, tlckXTREE); 4097 tlck = txLock(tid, ip, mp, tlckXTREE);
4101 xtlck = (struct xtlock *) & tlck->lock; 4098 xtlck = (struct xtlock *) & tlck->lock;
4102 xtlck->hwm.offset = 4099 xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1;
4103 le16_to_cpu(p->header.nextindex) - 1;
4104 tlck->type = tlckXTREE | tlckFREE; 4100 tlck->type = tlckXTREE | tlckFREE;
4105 4101
4106 XT_PUTPAGE(mp); 4102 XT_PUTPAGE(mp);
@@ -4118,7 +4114,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
4118 else 4114 else
4119 index--; 4115 index--;
4120 /* 4116 /*
4121 * internal page: go down to child page of current entry 4117 * internal page: go down to child page of current entry
4122 */ 4118 */
4123 getChild: 4119 getChild:
4124 /* save current parent entry for the child page */ 4120 /* save current parent entry for the child page */
diff --git a/fs/jfs/jfs_xtree.h b/fs/jfs/jfs_xtree.h
index 164f6f2b1019..70815c8a3d6a 100644
--- a/fs/jfs/jfs_xtree.h
+++ b/fs/jfs/jfs_xtree.h
@@ -19,14 +19,14 @@
19#define _H_JFS_XTREE 19#define _H_JFS_XTREE
20 20
21/* 21/*
22 * jfs_xtree.h: extent allocation descriptor B+-tree manager 22 * jfs_xtree.h: extent allocation descriptor B+-tree manager
23 */ 23 */
24 24
25#include "jfs_btree.h" 25#include "jfs_btree.h"
26 26
27 27
28/* 28/*
29 * extent allocation descriptor (xad) 29 * extent allocation descriptor (xad)
30 */ 30 */
31typedef struct xad { 31typedef struct xad {
32 unsigned flag:8; /* 1: flag */ 32 unsigned flag:8; /* 1: flag */
@@ -38,30 +38,30 @@ typedef struct xad {
38 __le32 addr2; /* 4: address in unit of fsblksize */ 38 __le32 addr2; /* 4: address in unit of fsblksize */
39} xad_t; /* (16) */ 39} xad_t; /* (16) */
40 40
41#define MAXXLEN ((1 << 24) - 1) 41#define MAXXLEN ((1 << 24) - 1)
42 42
43#define XTSLOTSIZE 16 43#define XTSLOTSIZE 16
44#define L2XTSLOTSIZE 4 44#define L2XTSLOTSIZE 4
45 45
46/* xad_t field construction */ 46/* xad_t field construction */
47#define XADoffset(xad, offset64)\ 47#define XADoffset(xad, offset64)\
48{\ 48{\
49 (xad)->off1 = ((u64)offset64) >> 32;\ 49 (xad)->off1 = ((u64)offset64) >> 32;\
50 (xad)->off2 = __cpu_to_le32((offset64) & 0xffffffff);\ 50 (xad)->off2 = __cpu_to_le32((offset64) & 0xffffffff);\
51} 51}
52#define XADaddress(xad, address64)\ 52#define XADaddress(xad, address64)\
53{\ 53{\
54 (xad)->addr1 = ((u64)address64) >> 32;\ 54 (xad)->addr1 = ((u64)address64) >> 32;\
55 (xad)->addr2 = __cpu_to_le32((address64) & 0xffffffff);\ 55 (xad)->addr2 = __cpu_to_le32((address64) & 0xffffffff);\
56} 56}
57#define XADlength(xad, length32) (xad)->len = __cpu_to_le24(length32) 57#define XADlength(xad, length32) (xad)->len = __cpu_to_le24(length32)
58 58
59/* xad_t field extraction */ 59/* xad_t field extraction */
60#define offsetXAD(xad)\ 60#define offsetXAD(xad)\
61 ( ((s64)((xad)->off1)) << 32 | __le32_to_cpu((xad)->off2)) 61 ( ((s64)((xad)->off1)) << 32 | __le32_to_cpu((xad)->off2))
62#define addressXAD(xad)\ 62#define addressXAD(xad)\
63 ( ((s64)((xad)->addr1)) << 32 | __le32_to_cpu((xad)->addr2)) 63 ( ((s64)((xad)->addr1)) << 32 | __le32_to_cpu((xad)->addr2))
64#define lengthXAD(xad) __le24_to_cpu((xad)->len) 64#define lengthXAD(xad) __le24_to_cpu((xad)->len)
65 65
66/* xad list */ 66/* xad list */
67struct xadlist { 67struct xadlist {
@@ -71,22 +71,22 @@ struct xadlist {
71}; 71};
72 72
73/* xad_t flags */ 73/* xad_t flags */
74#define XAD_NEW 0x01 /* new */ 74#define XAD_NEW 0x01 /* new */
75#define XAD_EXTENDED 0x02 /* extended */ 75#define XAD_EXTENDED 0x02 /* extended */
76#define XAD_COMPRESSED 0x04 /* compressed with recorded length */ 76#define XAD_COMPRESSED 0x04 /* compressed with recorded length */
77#define XAD_NOTRECORDED 0x08 /* allocated but not recorded */ 77#define XAD_NOTRECORDED 0x08 /* allocated but not recorded */
78#define XAD_COW 0x10 /* copy-on-write */ 78#define XAD_COW 0x10 /* copy-on-write */
79 79
80 80
81/* possible values for maxentry */ 81/* possible values for maxentry */
82#define XTROOTINITSLOT_DIR 6 82#define XTROOTINITSLOT_DIR 6
83#define XTROOTINITSLOT 10 83#define XTROOTINITSLOT 10
84#define XTROOTMAXSLOT 18 84#define XTROOTMAXSLOT 18
85#define XTPAGEMAXSLOT 256 85#define XTPAGEMAXSLOT 256
86#define XTENTRYSTART 2 86#define XTENTRYSTART 2
87 87
88/* 88/*
89 * xtree page: 89 * xtree page:
90 */ 90 */
91typedef union { 91typedef union {
92 struct xtheader { 92 struct xtheader {
@@ -106,7 +106,7 @@ typedef union {
106} xtpage_t; 106} xtpage_t;
107 107
108/* 108/*
109 * external declaration 109 * external declaration
110 */ 110 */
111extern int xtLookup(struct inode *ip, s64 lstart, s64 llen, 111extern int xtLookup(struct inode *ip, s64 lstart, s64 llen,
112 int *pflag, s64 * paddr, int *plen, int flag); 112 int *pflag, s64 * paddr, int *plen, int flag);
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 41c204771262..25161c4121e4 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -328,7 +328,7 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
328 * dentry - child directory dentry 328 * dentry - child directory dentry
329 * 329 *
330 * RETURN: -EINVAL - if name is . or .. 330 * RETURN: -EINVAL - if name is . or ..
331 * -EINVAL - if . or .. exist but are invalid. 331 * -EINVAL - if . or .. exist but are invalid.
332 * errors from subroutines 332 * errors from subroutines
333 * 333 *
334 * note: 334 * note:
@@ -517,7 +517,7 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry)
517 inode_dec_link_count(ip); 517 inode_dec_link_count(ip);
518 518
519 /* 519 /*
520 * commit zero link count object 520 * commit zero link count object
521 */ 521 */
522 if (ip->i_nlink == 0) { 522 if (ip->i_nlink == 0) {
523 assert(!test_cflag(COMMIT_Nolink, ip)); 523 assert(!test_cflag(COMMIT_Nolink, ip));
@@ -596,7 +596,7 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry)
596/* 596/*
597 * NAME: commitZeroLink() 597 * NAME: commitZeroLink()
598 * 598 *
599 * FUNCTION: for non-directory, called by jfs_remove(), 599 * FUNCTION: for non-directory, called by jfs_remove(),
600 * truncate a regular file, directory or symbolic 600 * truncate a regular file, directory or symbolic
601 * link to zero length. return 0 if type is not 601 * link to zero length. return 0 if type is not
602 * one of these. 602 * one of these.
@@ -676,7 +676,7 @@ static s64 commitZeroLink(tid_t tid, struct inode *ip)
676/* 676/*
677 * NAME: jfs_free_zero_link() 677 * NAME: jfs_free_zero_link()
678 * 678 *
679 * FUNCTION: for non-directory, called by iClose(), 679 * FUNCTION: for non-directory, called by iClose(),
680 * free resources of a file from cache and WORKING map 680 * free resources of a file from cache and WORKING map
681 * for a file previously committed with zero link count 681 * for a file previously committed with zero link count
682 * while associated with a pager object, 682 * while associated with a pager object,
@@ -855,12 +855,12 @@ static int jfs_link(struct dentry *old_dentry,
855 * NAME: jfs_symlink(dip, dentry, name) 855 * NAME: jfs_symlink(dip, dentry, name)
856 * 856 *
857 * FUNCTION: creates a symbolic link to <symlink> by name <name> 857 * FUNCTION: creates a symbolic link to <symlink> by name <name>
858 * in directory <dip> 858 * in directory <dip>
859 * 859 *
860 * PARAMETER: dip - parent directory vnode 860 * PARAMETER: dip - parent directory vnode
861 * dentry - dentry of symbolic link 861 * dentry - dentry of symbolic link
862 * name - the path name of the existing object 862 * name - the path name of the existing object
863 * that will be the source of the link 863 * that will be the source of the link
864 * 864 *
865 * RETURN: errors from subroutines 865 * RETURN: errors from subroutines
866 * 866 *
@@ -1052,9 +1052,9 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
1052 1052
1053 1053
1054/* 1054/*
1055 * NAME: jfs_rename 1055 * NAME: jfs_rename
1056 * 1056 *
1057 * FUNCTION: rename a file or directory 1057 * FUNCTION: rename a file or directory
1058 */ 1058 */
1059static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, 1059static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1060 struct inode *new_dir, struct dentry *new_dentry) 1060 struct inode *new_dir, struct dentry *new_dentry)
@@ -1331,9 +1331,9 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1331 1331
1332 1332
1333/* 1333/*
1334 * NAME: jfs_mknod 1334 * NAME: jfs_mknod
1335 * 1335 *
1336 * FUNCTION: Create a special file (device) 1336 * FUNCTION: Create a special file (device)
1337 */ 1337 */
1338static int jfs_mknod(struct inode *dir, struct dentry *dentry, 1338static int jfs_mknod(struct inode *dir, struct dentry *dentry,
1339 int mode, dev_t rdev) 1339 int mode, dev_t rdev)
diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c
index 79d625f3f733..71984ee95346 100644
--- a/fs/jfs/resize.c
+++ b/fs/jfs/resize.c
@@ -29,17 +29,17 @@
29#include "jfs_txnmgr.h" 29#include "jfs_txnmgr.h"
30#include "jfs_debug.h" 30#include "jfs_debug.h"
31 31
32#define BITSPERPAGE (PSIZE << 3) 32#define BITSPERPAGE (PSIZE << 3)
33#define L2MEGABYTE 20 33#define L2MEGABYTE 20
34#define MEGABYTE (1 << L2MEGABYTE) 34#define MEGABYTE (1 << L2MEGABYTE)
35#define MEGABYTE32 (MEGABYTE << 5) 35#define MEGABYTE32 (MEGABYTE << 5)
36 36
37/* convert block number to bmap file page number */ 37/* convert block number to bmap file page number */
38#define BLKTODMAPN(b)\ 38#define BLKTODMAPN(b)\
39 (((b) >> 13) + ((b) >> 23) + ((b) >> 33) + 3 + 1) 39 (((b) >> 13) + ((b) >> 23) + ((b) >> 33) + 3 + 1)
40 40
41/* 41/*
42 * jfs_extendfs() 42 * jfs_extendfs()
43 * 43 *
44 * function: extend file system; 44 * function: extend file system;
45 * 45 *
@@ -48,9 +48,9 @@
48 * workspace space 48 * workspace space
49 * 49 *
50 * input: 50 * input:
51 * new LVSize: in LV blocks (required) 51 * new LVSize: in LV blocks (required)
52 * new LogSize: in LV blocks (optional) 52 * new LogSize: in LV blocks (optional)
53 * new FSSize: in LV blocks (optional) 53 * new FSSize: in LV blocks (optional)
54 * 54 *
55 * new configuration: 55 * new configuration:
56 * 1. set new LogSize as specified or default from new LVSize; 56 * 1. set new LogSize as specified or default from new LVSize;
@@ -125,8 +125,8 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
125 } 125 }
126 126
127 /* 127 /*
128 * reconfigure LV spaces 128 * reconfigure LV spaces
129 * --------------------- 129 * ---------------------
130 * 130 *
131 * validate new size, or, if not specified, determine new size 131 * validate new size, or, if not specified, determine new size
132 */ 132 */
@@ -198,7 +198,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
198 log_formatted = 1; 198 log_formatted = 1;
199 } 199 }
200 /* 200 /*
201 * quiesce file system 201 * quiesce file system
202 * 202 *
203 * (prepare to move the inline log and to prevent map update) 203 * (prepare to move the inline log and to prevent map update)
204 * 204 *
@@ -270,8 +270,8 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
270 } 270 }
271 271
272 /* 272 /*
273 * extend block allocation map 273 * extend block allocation map
274 * --------------------------- 274 * ---------------------------
275 * 275 *
276 * extendfs() for new extension, retry after crash recovery; 276 * extendfs() for new extension, retry after crash recovery;
277 * 277 *
@@ -283,7 +283,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
283 * s_size: aggregate size in physical blocks; 283 * s_size: aggregate size in physical blocks;
284 */ 284 */
285 /* 285 /*
286 * compute the new block allocation map configuration 286 * compute the new block allocation map configuration
287 * 287 *
288 * map dinode: 288 * map dinode:
289 * di_size: map file size in byte; 289 * di_size: map file size in byte;
@@ -301,7 +301,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
301 newNpages = BLKTODMAPN(t64) + 1; 301 newNpages = BLKTODMAPN(t64) + 1;
302 302
303 /* 303 /*
304 * extend map from current map (WITHOUT growing mapfile) 304 * extend map from current map (WITHOUT growing mapfile)
305 * 305 *
306 * map new extension with unmapped part of the last partial 306 * map new extension with unmapped part of the last partial
307 * dmap page, if applicable, and extra page(s) allocated 307 * dmap page, if applicable, and extra page(s) allocated
@@ -341,8 +341,8 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
341 XSize -= nblocks; 341 XSize -= nblocks;
342 342
343 /* 343 /*
344 * grow map file to cover remaining extension 344 * grow map file to cover remaining extension
345 * and/or one extra dmap page for next extendfs(); 345 * and/or one extra dmap page for next extendfs();
346 * 346 *
347 * allocate new map pages and its backing blocks, and 347 * allocate new map pages and its backing blocks, and
348 * update map file xtree 348 * update map file xtree
@@ -422,8 +422,8 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
422 dbFinalizeBmap(ipbmap); 422 dbFinalizeBmap(ipbmap);
423 423
424 /* 424 /*
425 * update inode allocation map 425 * update inode allocation map
426 * --------------------------- 426 * ---------------------------
427 * 427 *
428 * move iag lists from old to new iag; 428 * move iag lists from old to new iag;
429 * agstart field is not updated for logredo() to reconstruct 429 * agstart field is not updated for logredo() to reconstruct
@@ -442,8 +442,8 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
442 } 442 }
443 443
444 /* 444 /*
445 * finalize 445 * finalize
446 * -------- 446 * --------
447 * 447 *
448 * extension is committed when on-disk super block is 448 * extension is committed when on-disk super block is
449 * updated with new descriptors: logredo will recover 449 * updated with new descriptors: logredo will recover
@@ -480,7 +480,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
480 diFreeSpecial(ipbmap2); 480 diFreeSpecial(ipbmap2);
481 481
482 /* 482 /*
483 * update superblock 483 * update superblock
484 */ 484 */
485 if ((rc = readSuper(sb, &bh))) 485 if ((rc = readSuper(sb, &bh)))
486 goto error_out; 486 goto error_out;
@@ -530,7 +530,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
530 530
531 resume: 531 resume:
532 /* 532 /*
533 * resume file system transactions 533 * resume file system transactions
534 */ 534 */
535 txResume(sb); 535 txResume(sb);
536 536
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index b753ba216450..b2375f0774b7 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -63,9 +63,9 @@
63 * 63 *
64 * On-disk: 64 * On-disk:
65 * 65 *
66 * FEALISTs are stored on disk using blocks allocated by dbAlloc() and 66 * FEALISTs are stored on disk using blocks allocated by dbAlloc() and
67 * written directly. An EA list may be in-lined in the inode if there is 67 * written directly. An EA list may be in-lined in the inode if there is
68 * sufficient room available. 68 * sufficient room available.
69 */ 69 */
70 70
71struct ea_buffer { 71struct ea_buffer {
@@ -590,7 +590,8 @@ static int ea_get(struct inode *inode, struct ea_buffer *ea_buf, int min_size)
590 size_check: 590 size_check:
591 if (EALIST_SIZE(ea_buf->xattr) != ea_size) { 591 if (EALIST_SIZE(ea_buf->xattr) != ea_size) {
592 printk(KERN_ERR "ea_get: invalid extended attribute\n"); 592 printk(KERN_ERR "ea_get: invalid extended attribute\n");
593 dump_mem("xattr", ea_buf->xattr, ea_size); 593 print_hex_dump(KERN_ERR, "", DUMP_PREFIX_ADDRESS, 16, 1,
594 ea_buf->xattr, ea_size, 1);
594 ea_release(inode, ea_buf); 595 ea_release(inode, ea_buf);
595 rc = -EIO; 596 rc = -EIO;
596 goto clean_up; 597 goto clean_up;
diff --git a/fs/minix/file.c b/fs/minix/file.c
index f92baa1d7570..17765f697e50 100644
--- a/fs/minix/file.c
+++ b/fs/minix/file.c
@@ -23,7 +23,7 @@ const struct file_operations minix_file_operations = {
23 .aio_write = generic_file_aio_write, 23 .aio_write = generic_file_aio_write,
24 .mmap = generic_file_mmap, 24 .mmap = generic_file_mmap,
25 .fsync = minix_sync_file, 25 .fsync = minix_sync_file,
26 .sendfile = generic_file_sendfile, 26 .splice_read = generic_file_splice_read,
27}; 27};
28 28
29const struct inode_operations minix_file_inode_operations = { 29const struct inode_operations minix_file_inode_operations = {
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 9eb8eb4e4a08..8689b736fdd9 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -41,7 +41,9 @@ static int nfs_file_open(struct inode *, struct file *);
41static int nfs_file_release(struct inode *, struct file *); 41static int nfs_file_release(struct inode *, struct file *);
42static loff_t nfs_file_llseek(struct file *file, loff_t offset, int origin); 42static loff_t nfs_file_llseek(struct file *file, loff_t offset, int origin);
43static int nfs_file_mmap(struct file *, struct vm_area_struct *); 43static int nfs_file_mmap(struct file *, struct vm_area_struct *);
44static ssize_t nfs_file_sendfile(struct file *, loff_t *, size_t, read_actor_t, void *); 44static ssize_t nfs_file_splice_read(struct file *filp, loff_t *ppos,
45 struct pipe_inode_info *pipe,
46 size_t count, unsigned int flags);
45static ssize_t nfs_file_read(struct kiocb *, const struct iovec *iov, 47static ssize_t nfs_file_read(struct kiocb *, const struct iovec *iov,
46 unsigned long nr_segs, loff_t pos); 48 unsigned long nr_segs, loff_t pos);
47static ssize_t nfs_file_write(struct kiocb *, const struct iovec *iov, 49static ssize_t nfs_file_write(struct kiocb *, const struct iovec *iov,
@@ -65,7 +67,7 @@ const struct file_operations nfs_file_operations = {
65 .fsync = nfs_fsync, 67 .fsync = nfs_fsync,
66 .lock = nfs_lock, 68 .lock = nfs_lock,
67 .flock = nfs_flock, 69 .flock = nfs_flock,
68 .sendfile = nfs_file_sendfile, 70 .splice_read = nfs_file_splice_read,
69 .check_flags = nfs_check_flags, 71 .check_flags = nfs_check_flags,
70}; 72};
71 73
@@ -224,20 +226,21 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov,
224} 226}
225 227
226static ssize_t 228static ssize_t
227nfs_file_sendfile(struct file *filp, loff_t *ppos, size_t count, 229nfs_file_splice_read(struct file *filp, loff_t *ppos,
228 read_actor_t actor, void *target) 230 struct pipe_inode_info *pipe, size_t count,
231 unsigned int flags)
229{ 232{
230 struct dentry *dentry = filp->f_path.dentry; 233 struct dentry *dentry = filp->f_path.dentry;
231 struct inode *inode = dentry->d_inode; 234 struct inode *inode = dentry->d_inode;
232 ssize_t res; 235 ssize_t res;
233 236
234 dfprintk(VFS, "nfs: sendfile(%s/%s, %lu@%Lu)\n", 237 dfprintk(VFS, "nfs: splice_read(%s/%s, %lu@%Lu)\n",
235 dentry->d_parent->d_name.name, dentry->d_name.name, 238 dentry->d_parent->d_name.name, dentry->d_name.name,
236 (unsigned long) count, (unsigned long long) *ppos); 239 (unsigned long) count, (unsigned long long) *ppos);
237 240
238 res = nfs_revalidate_mapping(inode, filp->f_mapping); 241 res = nfs_revalidate_mapping(inode, filp->f_mapping);
239 if (!res) 242 if (!res)
240 res = generic_file_sendfile(filp, ppos, count, actor, target); 243 res = generic_file_splice_read(filp, ppos, pipe, count, flags);
241 return res; 244 return res;
242} 245}
243 246
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 7e6aa245b5d5..8604e35bd48e 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -23,7 +23,7 @@
23#include <linux/file.h> 23#include <linux/file.h>
24#include <linux/mount.h> 24#include <linux/mount.h>
25#include <linux/major.h> 25#include <linux/major.h>
26#include <linux/ext2_fs.h> 26#include <linux/splice.h>
27#include <linux/proc_fs.h> 27#include <linux/proc_fs.h>
28#include <linux/stat.h> 28#include <linux/stat.h>
29#include <linux/fcntl.h> 29#include <linux/fcntl.h>
@@ -801,26 +801,32 @@ found:
801} 801}
802 802
803/* 803/*
804 * Grab and keep cached pages assosiated with a file in the svc_rqst 804 * Grab and keep cached pages associated with a file in the svc_rqst
805 * so that they can be passed to the netowork sendmsg/sendpage routines 805 * so that they can be passed to the network sendmsg/sendpage routines
806 * directrly. They will be released after the sending has completed. 806 * directly. They will be released after the sending has completed.
807 */ 807 */
808static int 808static int
809nfsd_read_actor(read_descriptor_t *desc, struct page *page, unsigned long offset , unsigned long size) 809nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
810 struct splice_desc *sd)
810{ 811{
811 unsigned long count = desc->count; 812 struct svc_rqst *rqstp = sd->u.data;
812 struct svc_rqst *rqstp = desc->arg.data;
813 struct page **pp = rqstp->rq_respages + rqstp->rq_resused; 813 struct page **pp = rqstp->rq_respages + rqstp->rq_resused;
814 struct page *page = buf->page;
815 size_t size;
816 int ret;
817
818 ret = buf->ops->confirm(pipe, buf);
819 if (unlikely(ret))
820 return ret;
814 821
815 if (size > count) 822 size = sd->len;
816 size = count;
817 823
818 if (rqstp->rq_res.page_len == 0) { 824 if (rqstp->rq_res.page_len == 0) {
819 get_page(page); 825 get_page(page);
820 put_page(*pp); 826 put_page(*pp);
821 *pp = page; 827 *pp = page;
822 rqstp->rq_resused++; 828 rqstp->rq_resused++;
823 rqstp->rq_res.page_base = offset; 829 rqstp->rq_res.page_base = buf->offset;
824 rqstp->rq_res.page_len = size; 830 rqstp->rq_res.page_len = size;
825 } else if (page != pp[-1]) { 831 } else if (page != pp[-1]) {
826 get_page(page); 832 get_page(page);
@@ -832,11 +838,15 @@ nfsd_read_actor(read_descriptor_t *desc, struct page *page, unsigned long offset
832 } else 838 } else
833 rqstp->rq_res.page_len += size; 839 rqstp->rq_res.page_len += size;
834 840
835 desc->count = count - size;
836 desc->written += size;
837 return size; 841 return size;
838} 842}
839 843
844static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe,
845 struct splice_desc *sd)
846{
847 return __splice_from_pipe(pipe, sd, nfsd_splice_actor);
848}
849
840static __be32 850static __be32
841nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, 851nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
842 loff_t offset, struct kvec *vec, int vlen, unsigned long *count) 852 loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
@@ -861,10 +871,15 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
861 if (ra && ra->p_set) 871 if (ra && ra->p_set)
862 file->f_ra = ra->p_ra; 872 file->f_ra = ra->p_ra;
863 873
864 if (file->f_op->sendfile && rqstp->rq_sendfile_ok) { 874 if (file->f_op->splice_read && rqstp->rq_splice_ok) {
865 rqstp->rq_resused = 1; 875 struct splice_desc sd = {
866 host_err = file->f_op->sendfile(file, &offset, *count, 876 .len = 0,
867 nfsd_read_actor, rqstp); 877 .total_len = *count,
878 .pos = offset,
879 .u.data = rqstp,
880 };
881
882 host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor);
868 } else { 883 } else {
869 oldfs = get_fs(); 884 oldfs = get_fs();
870 set_fs(KERNEL_DS); 885 set_fs(KERNEL_DS);
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 7ed56390b582..ffcc504a1667 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -2276,7 +2276,7 @@ const struct file_operations ntfs_file_ops = {
2276 mounted filesystem. */ 2276 mounted filesystem. */
2277 .mmap = generic_file_mmap, /* Mmap file. */ 2277 .mmap = generic_file_mmap, /* Mmap file. */
2278 .open = ntfs_file_open, /* Open file. */ 2278 .open = ntfs_file_open, /* Open file. */
2279 .sendfile = generic_file_sendfile, /* Zero-copy data send with 2279 .splice_read = generic_file_splice_read /* Zero-copy data send with
2280 the data source being on 2280 the data source being on
2281 the ntfs partition. We do 2281 the ntfs partition. We do
2282 not need to care about the 2282 not need to care about the
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index ac6c96431bbc..4979b6675717 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -31,7 +31,7 @@
31#include <linux/pagemap.h> 31#include <linux/pagemap.h>
32#include <linux/uio.h> 32#include <linux/uio.h>
33#include <linux/sched.h> 33#include <linux/sched.h>
34#include <linux/pipe_fs_i.h> 34#include <linux/splice.h>
35#include <linux/mount.h> 35#include <linux/mount.h>
36#include <linux/writeback.h> 36#include <linux/writeback.h>
37 37
@@ -1583,7 +1583,7 @@ static int ocfs2_splice_write_actor(struct pipe_inode_info *pipe,
1583 ssize_t copied = 0; 1583 ssize_t copied = 0;
1584 struct ocfs2_splice_write_priv sp; 1584 struct ocfs2_splice_write_priv sp;
1585 1585
1586 ret = buf->ops->pin(pipe, buf); 1586 ret = buf->ops->confirm(pipe, buf);
1587 if (ret) 1587 if (ret)
1588 goto out; 1588 goto out;
1589 1589
@@ -1604,7 +1604,7 @@ static int ocfs2_splice_write_actor(struct pipe_inode_info *pipe,
1604 * might enter ocfs2_buffered_write_cluster() more 1604 * might enter ocfs2_buffered_write_cluster() more
1605 * than once, so keep track of our progress here. 1605 * than once, so keep track of our progress here.
1606 */ 1606 */
1607 copied = ocfs2_buffered_write_cluster(sd->file, 1607 copied = ocfs2_buffered_write_cluster(sd->u.file,
1608 (loff_t)sd->pos + total, 1608 (loff_t)sd->pos + total,
1609 count, 1609 count,
1610 ocfs2_map_and_write_splice_data, 1610 ocfs2_map_and_write_splice_data,
@@ -1636,9 +1636,14 @@ static ssize_t __ocfs2_file_splice_write(struct pipe_inode_info *pipe,
1636 int ret, err; 1636 int ret, err;
1637 struct address_space *mapping = out->f_mapping; 1637 struct address_space *mapping = out->f_mapping;
1638 struct inode *inode = mapping->host; 1638 struct inode *inode = mapping->host;
1639 1639 struct splice_desc sd = {
1640 ret = __splice_from_pipe(pipe, out, ppos, len, flags, 1640 .total_len = len,
1641 ocfs2_splice_write_actor); 1641 .flags = flags,
1642 .pos = *ppos,
1643 .u.file = out,
1644 };
1645
1646 ret = __splice_from_pipe(pipe, &sd, ocfs2_splice_write_actor);
1642 if (ret > 0) { 1647 if (ret > 0) {
1643 *ppos += ret; 1648 *ppos += ret;
1644 1649
@@ -1817,7 +1822,6 @@ const struct inode_operations ocfs2_special_file_iops = {
1817const struct file_operations ocfs2_fops = { 1822const struct file_operations ocfs2_fops = {
1818 .read = do_sync_read, 1823 .read = do_sync_read,
1819 .write = do_sync_write, 1824 .write = do_sync_write,
1820 .sendfile = generic_file_sendfile,
1821 .mmap = ocfs2_mmap, 1825 .mmap = ocfs2_mmap,
1822 .fsync = ocfs2_sync_file, 1826 .fsync = ocfs2_sync_file,
1823 .release = ocfs2_file_release, 1827 .release = ocfs2_file_release,
diff --git a/fs/partitions/ibm.c b/fs/partitions/ibm.c
index 9f7ad4244f63..1e064c4a4f86 100644
--- a/fs/partitions/ibm.c
+++ b/fs/partitions/ibm.c
@@ -45,7 +45,7 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
45{ 45{
46 int blocksize, offset, size,res; 46 int blocksize, offset, size,res;
47 loff_t i_size; 47 loff_t i_size;
48 dasd_information_t *info; 48 dasd_information2_t *info;
49 struct hd_geometry *geo; 49 struct hd_geometry *geo;
50 char type[5] = {0,}; 50 char type[5] = {0,};
51 char name[7] = {0,}; 51 char name[7] = {0,};
@@ -64,14 +64,17 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
64 if (i_size == 0) 64 if (i_size == 0)
65 goto out_exit; 65 goto out_exit;
66 66
67 if ((info = kmalloc(sizeof(dasd_information_t), GFP_KERNEL)) == NULL) 67 info = kmalloc(sizeof(dasd_information2_t), GFP_KERNEL);
68 if (info == NULL)
68 goto out_exit; 69 goto out_exit;
69 if ((geo = kmalloc(sizeof(struct hd_geometry), GFP_KERNEL)) == NULL) 70 geo = kmalloc(sizeof(struct hd_geometry), GFP_KERNEL);
71 if (geo == NULL)
70 goto out_nogeo; 72 goto out_nogeo;
71 if ((label = kmalloc(sizeof(union label_t), GFP_KERNEL)) == NULL) 73 label = kmalloc(sizeof(union label_t), GFP_KERNEL);
74 if (label == NULL)
72 goto out_nolab; 75 goto out_nolab;
73 76
74 if (ioctl_by_bdev(bdev, BIODASDINFO, (unsigned long)info) != 0 || 77 if (ioctl_by_bdev(bdev, BIODASDINFO2, (unsigned long)info) != 0 ||
75 ioctl_by_bdev(bdev, HDIO_GETGEO, (unsigned long)geo) != 0) 78 ioctl_by_bdev(bdev, HDIO_GETGEO, (unsigned long)geo) != 0)
76 goto out_freeall; 79 goto out_freeall;
77 80
@@ -96,84 +99,108 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
96 res = 1; 99 res = 1;
97 100
98 /* 101 /*
99 * Three different types: CMS1, VOL1 and LNX1/unlabeled 102 * Three different formats: LDL, CDL and unformated disk
103 *
104 * identified by info->format
105 *
106 * unformated disks we do not have to care about
100 */ 107 */
101 if (strncmp(type, "CMS1", 4) == 0) { 108 if (info->format == DASD_FORMAT_LDL) {
102 /* 109 if (strncmp(type, "CMS1", 4) == 0) {
103 * VM style CMS1 labeled disk 110 /*
104 */ 111 * VM style CMS1 labeled disk
105 if (label->cms.disk_offset != 0) { 112 */
106 printk("CMS1/%8s(MDSK):", name); 113 if (label->cms.disk_offset != 0) {
107 /* disk is reserved minidisk */ 114 printk("CMS1/%8s(MDSK):", name);
108 blocksize = label->cms.block_size; 115 /* disk is reserved minidisk */
109 offset = label->cms.disk_offset; 116 blocksize = label->cms.block_size;
110 size = (label->cms.block_count - 1) * (blocksize >> 9); 117 offset = label->cms.disk_offset;
118 size = (label->cms.block_count - 1)
119 * (blocksize >> 9);
120 } else {
121 printk("CMS1/%8s:", name);
122 offset = (info->label_block + 1);
123 size = i_size >> 9;
124 }
111 } else { 125 } else {
112 printk("CMS1/%8s:", name); 126 /*
127 * Old style LNX1 or unlabeled disk
128 */
129 if (strncmp(type, "LNX1", 4) == 0)
130 printk ("LNX1/%8s:", name);
131 else
132 printk("(nonl)");
113 offset = (info->label_block + 1); 133 offset = (info->label_block + 1);
114 size = i_size >> 9; 134 size = i_size >> 9;
115 } 135 }
116 put_partition(state, 1, offset*(blocksize >> 9), 136 put_partition(state, 1, offset*(blocksize >> 9),
117 size-offset*(blocksize >> 9)); 137 size-offset*(blocksize >> 9));
118 } else if ((strncmp(type, "VOL1", 4) == 0) && 138 } else if (info->format == DASD_FORMAT_CDL) {
119 (!info->FBA_layout) && (!strcmp(info->type, "ECKD"))) {
120 /* 139 /*
121 * New style VOL1 labeled disk 140 * New style CDL formatted disk
122 */ 141 */
123 unsigned int blk; 142 unsigned int blk;
124 int counter; 143 int counter;
125 144
126 printk("VOL1/%8s:", name);
127
128 /* get block number and read then go through format1 labels */
129 blk = cchhb2blk(&label->vol.vtoc, geo) + 1;
130 counter = 0;
131 while ((data = read_dev_sector(bdev, blk*(blocksize/512),
132 &sect)) != NULL) {
133 struct vtoc_format1_label f1;
134
135 memcpy(&f1, data, sizeof(struct vtoc_format1_label));
136 put_dev_sector(sect);
137
138 /* skip FMT4 / FMT5 / FMT7 labels */
139 if (f1.DS1FMTID == _ascebc['4']
140 || f1.DS1FMTID == _ascebc['5']
141 || f1.DS1FMTID == _ascebc['7']) {
142 blk++;
143 continue;
144 }
145
146 /* only FMT1 valid at this point */
147 if (f1.DS1FMTID != _ascebc['1'])
148 break;
149
150 /* OK, we got valid partition data */
151 offset = cchh2blk(&f1.DS1EXT1.llimit, geo);
152 size = cchh2blk(&f1.DS1EXT1.ulimit, geo) -
153 offset + geo->sectors;
154 if (counter >= state->limit)
155 break;
156 put_partition(state, counter + 1,
157 offset * (blocksize >> 9),
158 size * (blocksize >> 9));
159 counter++;
160 blk++;
161 }
162 if (!data)
163 /* Are we not supposed to report this ? */
164 goto out_readerr;
165 } else {
166 /* 145 /*
167 * Old style LNX1 or unlabeled disk 146 * check if VOL1 label is available
147 * if not, something is wrong, skipping partition detection
168 */ 148 */
169 if (strncmp(type, "LNX1", 4) == 0) 149 if (strncmp(type, "VOL1", 4) == 0) {
170 printk ("LNX1/%8s:", name); 150 printk("VOL1/%8s:", name);
171 else 151 /*
172 printk("(nonl)/%8s:", name); 152 * get block number and read then go through format1
173 offset = (info->label_block + 1); 153 * labels
174 size = i_size >> 9; 154 */
175 put_partition(state, 1, offset*(blocksize >> 9), 155 blk = cchhb2blk(&label->vol.vtoc, geo) + 1;
176 size-offset*(blocksize >> 9)); 156 counter = 0;
157 data = read_dev_sector(bdev, blk * (blocksize/512),
158 &sect);
159 while (data != NULL) {
160 struct vtoc_format1_label f1;
161
162 memcpy(&f1, data,
163 sizeof(struct vtoc_format1_label));
164 put_dev_sector(sect);
165
166 /* skip FMT4 / FMT5 / FMT7 labels */
167 if (f1.DS1FMTID == _ascebc['4']
168 || f1.DS1FMTID == _ascebc['5']
169 || f1.DS1FMTID == _ascebc['7']) {
170 blk++;
171 data = read_dev_sector(bdev, blk *
172 (blocksize/512),
173 &sect);
174 continue;
175 }
176
177 /* only FMT1 valid at this point */
178 if (f1.DS1FMTID != _ascebc['1'])
179 break;
180
181 /* OK, we got valid partition data */
182 offset = cchh2blk(&f1.DS1EXT1.llimit, geo);
183 size = cchh2blk(&f1.DS1EXT1.ulimit, geo) -
184 offset + geo->sectors;
185 if (counter >= state->limit)
186 break;
187 put_partition(state, counter + 1,
188 offset * (blocksize >> 9),
189 size * (blocksize >> 9));
190 counter++;
191 blk++;
192 data = read_dev_sector(bdev,
193 blk * (blocksize/512),
194 &sect);
195 }
196
197 if (!data)
198 /* Are we not supposed to report this ? */
199 goto out_readerr;
200 } else
201 printk(KERN_WARNING "Warning, expected Label VOL1 not "
202 "found, treating as CDL formated Disk");
203
177 } 204 }
178 205
179 printk("\n"); 206 printk("\n");
diff --git a/fs/pipe.c b/fs/pipe.c
index 3a89592bdf57..d007830d9c87 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -164,6 +164,20 @@ static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
164 page_cache_release(page); 164 page_cache_release(page);
165} 165}
166 166
167/**
168 * generic_pipe_buf_map - virtually map a pipe buffer
169 * @pipe: the pipe that the buffer belongs to
170 * @buf: the buffer that should be mapped
171 * @atomic: whether to use an atomic map
172 *
173 * Description:
174 * This function returns a kernel virtual address mapping for the
175 * passed in @pipe_buffer. If @atomic is set, an atomic map is provided
176 * and the caller has to be careful not to fault before calling
177 * the unmap function.
178 *
179 * Note that this function occupies KM_USER0 if @atomic != 0.
180 */
167void *generic_pipe_buf_map(struct pipe_inode_info *pipe, 181void *generic_pipe_buf_map(struct pipe_inode_info *pipe,
168 struct pipe_buffer *buf, int atomic) 182 struct pipe_buffer *buf, int atomic)
169{ 183{
@@ -175,6 +189,15 @@ void *generic_pipe_buf_map(struct pipe_inode_info *pipe,
175 return kmap(buf->page); 189 return kmap(buf->page);
176} 190}
177 191
192/**
193 * generic_pipe_buf_unmap - unmap a previously mapped pipe buffer
194 * @pipe: the pipe that the buffer belongs to
195 * @buf: the buffer that should be unmapped
196 * @map_data: the data that the mapping function returned
197 *
198 * Description:
199 * This function undoes the mapping that ->map() provided.
200 */
178void generic_pipe_buf_unmap(struct pipe_inode_info *pipe, 201void generic_pipe_buf_unmap(struct pipe_inode_info *pipe,
179 struct pipe_buffer *buf, void *map_data) 202 struct pipe_buffer *buf, void *map_data)
180{ 203{
@@ -185,11 +208,28 @@ void generic_pipe_buf_unmap(struct pipe_inode_info *pipe,
185 kunmap(buf->page); 208 kunmap(buf->page);
186} 209}
187 210
211/**
212 * generic_pipe_buf_steal - attempt to take ownership of a @pipe_buffer
213 * @pipe: the pipe that the buffer belongs to
214 * @buf: the buffer to attempt to steal
215 *
216 * Description:
217 * This function attempts to steal the @struct page attached to
218 * @buf. If successful, this function returns 0 and returns with
219 * the page locked. The caller may then reuse the page for whatever
220 * he wishes, the typical use is insertion into a different file
221 * page cache.
222 */
188int generic_pipe_buf_steal(struct pipe_inode_info *pipe, 223int generic_pipe_buf_steal(struct pipe_inode_info *pipe,
189 struct pipe_buffer *buf) 224 struct pipe_buffer *buf)
190{ 225{
191 struct page *page = buf->page; 226 struct page *page = buf->page;
192 227
228 /*
229 * A reference of one is golden, that means that the owner of this
230 * page is the only one holding a reference to it. lock the page
231 * and return OK.
232 */
193 if (page_count(page) == 1) { 233 if (page_count(page) == 1) {
194 lock_page(page); 234 lock_page(page);
195 return 0; 235 return 0;
@@ -198,12 +238,32 @@ int generic_pipe_buf_steal(struct pipe_inode_info *pipe,
198 return 1; 238 return 1;
199} 239}
200 240
201void generic_pipe_buf_get(struct pipe_inode_info *info, struct pipe_buffer *buf) 241/**
242 * generic_pipe_buf_get - get a reference to a @struct pipe_buffer
243 * @pipe: the pipe that the buffer belongs to
244 * @buf: the buffer to get a reference to
245 *
246 * Description:
247 * This function grabs an extra reference to @buf. It's used in
248 * in the tee() system call, when we duplicate the buffers in one
249 * pipe into another.
250 */
251void generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
202{ 252{
203 page_cache_get(buf->page); 253 page_cache_get(buf->page);
204} 254}
205 255
206int generic_pipe_buf_pin(struct pipe_inode_info *info, struct pipe_buffer *buf) 256/**
257 * generic_pipe_buf_confirm - verify contents of the pipe buffer
258 * @pipe: the pipe that the buffer belongs to
259 * @buf: the buffer to confirm
260 *
261 * Description:
262 * This function does nothing, because the generic pipe code uses
263 * pages that are always good when inserted into the pipe.
264 */
265int generic_pipe_buf_confirm(struct pipe_inode_info *info,
266 struct pipe_buffer *buf)
207{ 267{
208 return 0; 268 return 0;
209} 269}
@@ -212,7 +272,7 @@ static const struct pipe_buf_operations anon_pipe_buf_ops = {
212 .can_merge = 1, 272 .can_merge = 1,
213 .map = generic_pipe_buf_map, 273 .map = generic_pipe_buf_map,
214 .unmap = generic_pipe_buf_unmap, 274 .unmap = generic_pipe_buf_unmap,
215 .pin = generic_pipe_buf_pin, 275 .confirm = generic_pipe_buf_confirm,
216 .release = anon_pipe_buf_release, 276 .release = anon_pipe_buf_release,
217 .steal = generic_pipe_buf_steal, 277 .steal = generic_pipe_buf_steal,
218 .get = generic_pipe_buf_get, 278 .get = generic_pipe_buf_get,
@@ -252,7 +312,7 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov,
252 if (chars > total_len) 312 if (chars > total_len)
253 chars = total_len; 313 chars = total_len;
254 314
255 error = ops->pin(pipe, buf); 315 error = ops->confirm(pipe, buf);
256 if (error) { 316 if (error) {
257 if (!ret) 317 if (!ret)
258 error = ret; 318 error = ret;
@@ -373,7 +433,7 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov,
373 int error, atomic = 1; 433 int error, atomic = 1;
374 void *addr; 434 void *addr;
375 435
376 error = ops->pin(pipe, buf); 436 error = ops->confirm(pipe, buf);
377 if (error) 437 if (error)
378 goto out; 438 goto out;
379 439
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 74f30e0c0381..98e78e2f18d6 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -165,7 +165,6 @@ static inline char * task_state(struct task_struct *p, char *buffer)
165 rcu_read_lock(); 165 rcu_read_lock();
166 buffer += sprintf(buffer, 166 buffer += sprintf(buffer,
167 "State:\t%s\n" 167 "State:\t%s\n"
168 "SleepAVG:\t%lu%%\n"
169 "Tgid:\t%d\n" 168 "Tgid:\t%d\n"
170 "Pid:\t%d\n" 169 "Pid:\t%d\n"
171 "PPid:\t%d\n" 170 "PPid:\t%d\n"
@@ -173,7 +172,6 @@ static inline char * task_state(struct task_struct *p, char *buffer)
173 "Uid:\t%d\t%d\t%d\t%d\n" 172 "Uid:\t%d\t%d\t%d\t%d\n"
174 "Gid:\t%d\t%d\t%d\t%d\n", 173 "Gid:\t%d\t%d\t%d\t%d\n",
175 get_task_state(p), 174 get_task_state(p),
176 (p->sleep_avg/1024)*100/(1020000000/1024),
177 p->tgid, p->pid, 175 p->tgid, p->pid,
178 pid_alive(p) ? rcu_dereference(p->real_parent)->tgid : 0, 176 pid_alive(p) ? rcu_dereference(p->real_parent)->tgid : 0,
179 pid_alive(p) && p->ptrace ? rcu_dereference(p->parent)->pid : 0, 177 pid_alive(p) && p->ptrace ? rcu_dereference(p->parent)->pid : 0,
@@ -312,6 +310,41 @@ int proc_pid_status(struct task_struct *task, char * buffer)
312 return buffer - orig; 310 return buffer - orig;
313} 311}
314 312
313static clock_t task_utime(struct task_struct *p)
314{
315 clock_t utime = cputime_to_clock_t(p->utime),
316 total = utime + cputime_to_clock_t(p->stime);
317 u64 temp;
318
319 /*
320 * Use CFS's precise accounting:
321 */
322 temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime);
323
324 if (total) {
325 temp *= utime;
326 do_div(temp, total);
327 }
328 utime = (clock_t)temp;
329
330 return utime;
331}
332
333static clock_t task_stime(struct task_struct *p)
334{
335 clock_t stime = cputime_to_clock_t(p->stime);
336
337 /*
338 * Use CFS's precise accounting. (we subtract utime from
339 * the total, to make sure the total observed by userspace
340 * grows monotonically - apps rely on that):
341 */
342 stime = nsec_to_clock_t(p->se.sum_exec_runtime) - task_utime(p);
343
344 return stime;
345}
346
347
315static int do_task_stat(struct task_struct *task, char * buffer, int whole) 348static int do_task_stat(struct task_struct *task, char * buffer, int whole)
316{ 349{
317 unsigned long vsize, eip, esp, wchan = ~0UL; 350 unsigned long vsize, eip, esp, wchan = ~0UL;
@@ -326,7 +359,8 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
326 unsigned long long start_time; 359 unsigned long long start_time;
327 unsigned long cmin_flt = 0, cmaj_flt = 0; 360 unsigned long cmin_flt = 0, cmaj_flt = 0;
328 unsigned long min_flt = 0, maj_flt = 0; 361 unsigned long min_flt = 0, maj_flt = 0;
329 cputime_t cutime, cstime, utime, stime; 362 cputime_t cutime, cstime;
363 clock_t utime, stime;
330 unsigned long rsslim = 0; 364 unsigned long rsslim = 0;
331 char tcomm[sizeof(task->comm)]; 365 char tcomm[sizeof(task->comm)];
332 unsigned long flags; 366 unsigned long flags;
@@ -344,7 +378,8 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
344 378
345 sigemptyset(&sigign); 379 sigemptyset(&sigign);
346 sigemptyset(&sigcatch); 380 sigemptyset(&sigcatch);
347 cutime = cstime = utime = stime = cputime_zero; 381 cutime = cstime = cputime_zero;
382 utime = stime = 0;
348 383
349 rcu_read_lock(); 384 rcu_read_lock();
350 if (lock_task_sighand(task, &flags)) { 385 if (lock_task_sighand(task, &flags)) {
@@ -370,15 +405,15 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
370 do { 405 do {
371 min_flt += t->min_flt; 406 min_flt += t->min_flt;
372 maj_flt += t->maj_flt; 407 maj_flt += t->maj_flt;
373 utime = cputime_add(utime, t->utime); 408 utime += task_utime(t);
374 stime = cputime_add(stime, t->stime); 409 stime += task_stime(t);
375 t = next_thread(t); 410 t = next_thread(t);
376 } while (t != task); 411 } while (t != task);
377 412
378 min_flt += sig->min_flt; 413 min_flt += sig->min_flt;
379 maj_flt += sig->maj_flt; 414 maj_flt += sig->maj_flt;
380 utime = cputime_add(utime, sig->utime); 415 utime += cputime_to_clock_t(sig->utime);
381 stime = cputime_add(stime, sig->stime); 416 stime += cputime_to_clock_t(sig->stime);
382 } 417 }
383 418
384 sid = signal_session(sig); 419 sid = signal_session(sig);
@@ -394,8 +429,8 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
394 if (!whole) { 429 if (!whole) {
395 min_flt = task->min_flt; 430 min_flt = task->min_flt;
396 maj_flt = task->maj_flt; 431 maj_flt = task->maj_flt;
397 utime = task->utime; 432 utime = task_utime(task);
398 stime = task->stime; 433 stime = task_stime(task);
399 } 434 }
400 435
401 /* scale priority and nice values from timeslices to -20..20 */ 436 /* scale priority and nice values from timeslices to -20..20 */
@@ -426,8 +461,8 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
426 cmin_flt, 461 cmin_flt,
427 maj_flt, 462 maj_flt,
428 cmaj_flt, 463 cmaj_flt,
429 cputime_to_clock_t(utime), 464 utime,
430 cputime_to_clock_t(stime), 465 stime,
431 cputime_to_clock_t(cutime), 466 cputime_to_clock_t(cutime),
432 cputime_to_clock_t(cstime), 467 cputime_to_clock_t(cstime),
433 priority, 468 priority,
diff --git a/fs/proc/base.c b/fs/proc/base.c
index a5fa1fdafc4e..46ea5d56e1bb 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -296,7 +296,7 @@ static int proc_pid_wchan(struct task_struct *task, char *buffer)
296 */ 296 */
297static int proc_pid_schedstat(struct task_struct *task, char *buffer) 297static int proc_pid_schedstat(struct task_struct *task, char *buffer)
298{ 298{
299 return sprintf(buffer, "%lu %lu %lu\n", 299 return sprintf(buffer, "%llu %llu %lu\n",
300 task->sched_info.cpu_time, 300 task->sched_info.cpu_time,
301 task->sched_info.run_delay, 301 task->sched_info.run_delay,
302 task->sched_info.pcnt); 302 task->sched_info.pcnt);
@@ -929,6 +929,69 @@ static const struct file_operations proc_fault_inject_operations = {
929}; 929};
930#endif 930#endif
931 931
932#ifdef CONFIG_SCHED_DEBUG
933/*
934 * Print out various scheduling related per-task fields:
935 */
936static int sched_show(struct seq_file *m, void *v)
937{
938 struct inode *inode = m->private;
939 struct task_struct *p;
940
941 WARN_ON(!inode);
942
943 p = get_proc_task(inode);
944 if (!p)
945 return -ESRCH;
946 proc_sched_show_task(p, m);
947
948 put_task_struct(p);
949
950 return 0;
951}
952
953static ssize_t
954sched_write(struct file *file, const char __user *buf,
955 size_t count, loff_t *offset)
956{
957 struct inode *inode = file->f_path.dentry->d_inode;
958 struct task_struct *p;
959
960 WARN_ON(!inode);
961
962 p = get_proc_task(inode);
963 if (!p)
964 return -ESRCH;
965 proc_sched_set_task(p);
966
967 put_task_struct(p);
968
969 return count;
970}
971
972static int sched_open(struct inode *inode, struct file *filp)
973{
974 int ret;
975
976 ret = single_open(filp, sched_show, NULL);
977 if (!ret) {
978 struct seq_file *m = filp->private_data;
979
980 m->private = inode;
981 }
982 return ret;
983}
984
985static const struct file_operations proc_pid_sched_operations = {
986 .open = sched_open,
987 .read = seq_read,
988 .write = sched_write,
989 .llseek = seq_lseek,
990 .release = seq_release,
991};
992
993#endif
994
932static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) 995static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
933{ 996{
934 struct inode *inode = dentry->d_inode; 997 struct inode *inode = dentry->d_inode;
@@ -1963,6 +2026,9 @@ static const struct pid_entry tgid_base_stuff[] = {
1963 INF("environ", S_IRUSR, pid_environ), 2026 INF("environ", S_IRUSR, pid_environ),
1964 INF("auxv", S_IRUSR, pid_auxv), 2027 INF("auxv", S_IRUSR, pid_auxv),
1965 INF("status", S_IRUGO, pid_status), 2028 INF("status", S_IRUGO, pid_status),
2029#ifdef CONFIG_SCHED_DEBUG
2030 REG("sched", S_IRUGO|S_IWUSR, pid_sched),
2031#endif
1966 INF("cmdline", S_IRUGO, pid_cmdline), 2032 INF("cmdline", S_IRUGO, pid_cmdline),
1967 INF("stat", S_IRUGO, tgid_stat), 2033 INF("stat", S_IRUGO, tgid_stat),
1968 INF("statm", S_IRUGO, pid_statm), 2034 INF("statm", S_IRUGO, pid_statm),
@@ -2247,6 +2313,9 @@ static const struct pid_entry tid_base_stuff[] = {
2247 INF("environ", S_IRUSR, pid_environ), 2313 INF("environ", S_IRUSR, pid_environ),
2248 INF("auxv", S_IRUSR, pid_auxv), 2314 INF("auxv", S_IRUSR, pid_auxv),
2249 INF("status", S_IRUGO, pid_status), 2315 INF("status", S_IRUGO, pid_status),
2316#ifdef CONFIG_SCHED_DEBUG
2317 REG("sched", S_IRUGO|S_IWUSR, pid_sched),
2318#endif
2250 INF("cmdline", S_IRUGO, pid_cmdline), 2319 INF("cmdline", S_IRUGO, pid_cmdline),
2251 INF("stat", S_IRUGO, tid_stat), 2320 INF("stat", S_IRUGO, tid_stat),
2252 INF("statm", S_IRUGO, pid_statm), 2321 INF("statm", S_IRUGO, pid_statm),
diff --git a/fs/qnx4/file.c b/fs/qnx4/file.c
index 44649981bbc8..867f42b02035 100644
--- a/fs/qnx4/file.c
+++ b/fs/qnx4/file.c
@@ -25,7 +25,7 @@ const struct file_operations qnx4_file_operations =
25 .read = do_sync_read, 25 .read = do_sync_read,
26 .aio_read = generic_file_aio_read, 26 .aio_read = generic_file_aio_read,
27 .mmap = generic_file_mmap, 27 .mmap = generic_file_mmap,
28 .sendfile = generic_file_sendfile, 28 .splice_read = generic_file_splice_read,
29#ifdef CONFIG_QNX4FS_RW 29#ifdef CONFIG_QNX4FS_RW
30 .write = do_sync_write, 30 .write = do_sync_write,
31 .aio_write = generic_file_aio_write, 31 .aio_write = generic_file_aio_write,
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c
index 2f14774a124f..97bdc0b2f9d2 100644
--- a/fs/ramfs/file-mmu.c
+++ b/fs/ramfs/file-mmu.c
@@ -41,7 +41,7 @@ const struct file_operations ramfs_file_operations = {
41 .aio_write = generic_file_aio_write, 41 .aio_write = generic_file_aio_write,
42 .mmap = generic_file_mmap, 42 .mmap = generic_file_mmap,
43 .fsync = simple_sync_file, 43 .fsync = simple_sync_file,
44 .sendfile = generic_file_sendfile, 44 .splice_read = generic_file_splice_read,
45 .llseek = generic_file_llseek, 45 .llseek = generic_file_llseek,
46}; 46};
47 47
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 5d258c40a2fd..cad2b7ace630 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -42,7 +42,7 @@ const struct file_operations ramfs_file_operations = {
42 .write = do_sync_write, 42 .write = do_sync_write,
43 .aio_write = generic_file_aio_write, 43 .aio_write = generic_file_aio_write,
44 .fsync = simple_sync_file, 44 .fsync = simple_sync_file,
45 .sendfile = generic_file_sendfile, 45 .splice_read = generic_file_splice_read,
46 .llseek = generic_file_llseek, 46 .llseek = generic_file_llseek,
47}; 47};
48 48
diff --git a/fs/read_write.c b/fs/read_write.c
index 4d03008f015b..507ddff48a9a 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -15,6 +15,7 @@
15#include <linux/module.h> 15#include <linux/module.h>
16#include <linux/syscalls.h> 16#include <linux/syscalls.h>
17#include <linux/pagemap.h> 17#include <linux/pagemap.h>
18#include <linux/splice.h>
18#include "read_write.h" 19#include "read_write.h"
19 20
20#include <asm/uaccess.h> 21#include <asm/uaccess.h>
@@ -25,7 +26,7 @@ const struct file_operations generic_ro_fops = {
25 .read = do_sync_read, 26 .read = do_sync_read,
26 .aio_read = generic_file_aio_read, 27 .aio_read = generic_file_aio_read,
27 .mmap = generic_file_readonly_mmap, 28 .mmap = generic_file_readonly_mmap,
28 .sendfile = generic_file_sendfile, 29 .splice_read = generic_file_splice_read,
29}; 30};
30 31
31EXPORT_SYMBOL(generic_ro_fops); 32EXPORT_SYMBOL(generic_ro_fops);
@@ -708,7 +709,7 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
708 struct inode * in_inode, * out_inode; 709 struct inode * in_inode, * out_inode;
709 loff_t pos; 710 loff_t pos;
710 ssize_t retval; 711 ssize_t retval;
711 int fput_needed_in, fput_needed_out; 712 int fput_needed_in, fput_needed_out, fl;
712 713
713 /* 714 /*
714 * Get input file, and verify that it is ok.. 715 * Get input file, and verify that it is ok..
@@ -723,7 +724,7 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
723 in_inode = in_file->f_path.dentry->d_inode; 724 in_inode = in_file->f_path.dentry->d_inode;
724 if (!in_inode) 725 if (!in_inode)
725 goto fput_in; 726 goto fput_in;
726 if (!in_file->f_op || !in_file->f_op->sendfile) 727 if (!in_file->f_op || !in_file->f_op->splice_read)
727 goto fput_in; 728 goto fput_in;
728 retval = -ESPIPE; 729 retval = -ESPIPE;
729 if (!ppos) 730 if (!ppos)
@@ -776,7 +777,18 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
776 count = max - pos; 777 count = max - pos;
777 } 778 }
778 779
779 retval = in_file->f_op->sendfile(in_file, ppos, count, file_send_actor, out_file); 780 fl = 0;
781#if 0
782 /*
783 * We need to debate whether we can enable this or not. The
784 * man page documents EAGAIN return for the output at least,
785 * and the application is arguably buggy if it doesn't expect
786 * EAGAIN on a non-blocking file descriptor.
787 */
788 if (in_file->f_flags & O_NONBLOCK)
789 fl = SPLICE_F_NONBLOCK;
790#endif
791 retval = do_splice_direct(in_file, ppos, out_file, count, fl);
780 792
781 if (retval > 0) { 793 if (retval > 0) {
782 add_rchar(current, retval); 794 add_rchar(current, retval);
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 9e451a68580f..30eebfb1b2d8 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -1531,7 +1531,6 @@ const struct file_operations reiserfs_file_operations = {
1531 .open = generic_file_open, 1531 .open = generic_file_open,
1532 .release = reiserfs_file_release, 1532 .release = reiserfs_file_release,
1533 .fsync = reiserfs_sync_file, 1533 .fsync = reiserfs_sync_file,
1534 .sendfile = generic_file_sendfile,
1535 .aio_read = generic_file_aio_read, 1534 .aio_read = generic_file_aio_read,
1536 .aio_write = generic_file_aio_write, 1535 .aio_write = generic_file_aio_write,
1537 .splice_read = generic_file_splice_read, 1536 .splice_read = generic_file_splice_read,
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 0ac22af7afe5..49194a4e6b91 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -447,3 +447,37 @@ int seq_puts(struct seq_file *m, const char *s)
447 return -1; 447 return -1;
448} 448}
449EXPORT_SYMBOL(seq_puts); 449EXPORT_SYMBOL(seq_puts);
450
451struct list_head *seq_list_start(struct list_head *head, loff_t pos)
452{
453 struct list_head *lh;
454
455 list_for_each(lh, head)
456 if (pos-- == 0)
457 return lh;
458
459 return NULL;
460}
461
462EXPORT_SYMBOL(seq_list_start);
463
464struct list_head *seq_list_start_head(struct list_head *head, loff_t pos)
465{
466 if (!pos)
467 return head;
468
469 return seq_list_start(head, pos - 1);
470}
471
472EXPORT_SYMBOL(seq_list_start_head);
473
474struct list_head *seq_list_next(void *v, struct list_head *head, loff_t *ppos)
475{
476 struct list_head *lh;
477
478 lh = ((struct list_head *)v)->next;
479 ++*ppos;
480 return lh == head ? NULL : lh;
481}
482
483EXPORT_SYMBOL(seq_list_next);
diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c
index aea3f8aa54c0..c5d78a7e492b 100644
--- a/fs/smbfs/file.c
+++ b/fs/smbfs/file.c
@@ -262,8 +262,9 @@ out:
262} 262}
263 263
264static ssize_t 264static ssize_t
265smb_file_sendfile(struct file *file, loff_t *ppos, 265smb_file_splice_read(struct file *file, loff_t *ppos,
266 size_t count, read_actor_t actor, void *target) 266 struct pipe_inode_info *pipe, size_t count,
267 unsigned int flags)
267{ 268{
268 struct dentry *dentry = file->f_path.dentry; 269 struct dentry *dentry = file->f_path.dentry;
269 ssize_t status; 270 ssize_t status;
@@ -277,7 +278,7 @@ smb_file_sendfile(struct file *file, loff_t *ppos,
277 DENTRY_PATH(dentry), status); 278 DENTRY_PATH(dentry), status);
278 goto out; 279 goto out;
279 } 280 }
280 status = generic_file_sendfile(file, ppos, count, actor, target); 281 status = generic_file_splice_read(file, ppos, pipe, count, flags);
281out: 282out:
282 return status; 283 return status;
283} 284}
@@ -416,7 +417,7 @@ const struct file_operations smb_file_operations =
416 .open = smb_file_open, 417 .open = smb_file_open,
417 .release = smb_file_release, 418 .release = smb_file_release,
418 .fsync = smb_fsync, 419 .fsync = smb_fsync,
419 .sendfile = smb_file_sendfile, 420 .splice_read = smb_file_splice_read,
420}; 421};
421 422
422const struct inode_operations smb_file_inode_operations = 423const struct inode_operations smb_file_inode_operations =
diff --git a/fs/splice.c b/fs/splice.c
index e7d7080de2f9..ed2ce995475c 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -20,7 +20,7 @@
20#include <linux/fs.h> 20#include <linux/fs.h>
21#include <linux/file.h> 21#include <linux/file.h>
22#include <linux/pagemap.h> 22#include <linux/pagemap.h>
23#include <linux/pipe_fs_i.h> 23#include <linux/splice.h>
24#include <linux/mm_inline.h> 24#include <linux/mm_inline.h>
25#include <linux/swap.h> 25#include <linux/swap.h>
26#include <linux/writeback.h> 26#include <linux/writeback.h>
@@ -29,22 +29,6 @@
29#include <linux/syscalls.h> 29#include <linux/syscalls.h>
30#include <linux/uio.h> 30#include <linux/uio.h>
31 31
32struct partial_page {
33 unsigned int offset;
34 unsigned int len;
35};
36
37/*
38 * Passed to splice_to_pipe
39 */
40struct splice_pipe_desc {
41 struct page **pages; /* page map */
42 struct partial_page *partial; /* pages[] may not be contig */
43 int nr_pages; /* number of pages in map */
44 unsigned int flags; /* splice flags */
45 const struct pipe_buf_operations *ops;/* ops associated with output pipe */
46};
47
48/* 32/*
49 * Attempt to steal a page from a pipe buffer. This should perhaps go into 33 * Attempt to steal a page from a pipe buffer. This should perhaps go into
50 * a vm helper function, it's already simplified quite a bit by the 34 * a vm helper function, it's already simplified quite a bit by the
@@ -101,8 +85,12 @@ static void page_cache_pipe_buf_release(struct pipe_inode_info *pipe,
101 buf->flags &= ~PIPE_BUF_FLAG_LRU; 85 buf->flags &= ~PIPE_BUF_FLAG_LRU;
102} 86}
103 87
104static int page_cache_pipe_buf_pin(struct pipe_inode_info *pipe, 88/*
105 struct pipe_buffer *buf) 89 * Check whether the contents of buf is OK to access. Since the content
90 * is a page cache page, IO may be in flight.
91 */
92static int page_cache_pipe_buf_confirm(struct pipe_inode_info *pipe,
93 struct pipe_buffer *buf)
106{ 94{
107 struct page *page = buf->page; 95 struct page *page = buf->page;
108 int err; 96 int err;
@@ -143,7 +131,7 @@ static const struct pipe_buf_operations page_cache_pipe_buf_ops = {
143 .can_merge = 0, 131 .can_merge = 0,
144 .map = generic_pipe_buf_map, 132 .map = generic_pipe_buf_map,
145 .unmap = generic_pipe_buf_unmap, 133 .unmap = generic_pipe_buf_unmap,
146 .pin = page_cache_pipe_buf_pin, 134 .confirm = page_cache_pipe_buf_confirm,
147 .release = page_cache_pipe_buf_release, 135 .release = page_cache_pipe_buf_release,
148 .steal = page_cache_pipe_buf_steal, 136 .steal = page_cache_pipe_buf_steal,
149 .get = generic_pipe_buf_get, 137 .get = generic_pipe_buf_get,
@@ -163,18 +151,25 @@ static const struct pipe_buf_operations user_page_pipe_buf_ops = {
163 .can_merge = 0, 151 .can_merge = 0,
164 .map = generic_pipe_buf_map, 152 .map = generic_pipe_buf_map,
165 .unmap = generic_pipe_buf_unmap, 153 .unmap = generic_pipe_buf_unmap,
166 .pin = generic_pipe_buf_pin, 154 .confirm = generic_pipe_buf_confirm,
167 .release = page_cache_pipe_buf_release, 155 .release = page_cache_pipe_buf_release,
168 .steal = user_page_pipe_buf_steal, 156 .steal = user_page_pipe_buf_steal,
169 .get = generic_pipe_buf_get, 157 .get = generic_pipe_buf_get,
170}; 158};
171 159
172/* 160/**
173 * Pipe output worker. This sets up our pipe format with the page cache 161 * splice_to_pipe - fill passed data into a pipe
174 * pipe buffer operations. Otherwise very similar to the regular pipe_writev(). 162 * @pipe: pipe to fill
163 * @spd: data to fill
164 *
165 * Description:
166 * @spd contains a map of pages and len/offset tupples, a long with
167 * the struct pipe_buf_operations associated with these pages. This
168 * function will link that data to the pipe.
169 *
175 */ 170 */
176static ssize_t splice_to_pipe(struct pipe_inode_info *pipe, 171ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
177 struct splice_pipe_desc *spd) 172 struct splice_pipe_desc *spd)
178{ 173{
179 unsigned int spd_pages = spd->nr_pages; 174 unsigned int spd_pages = spd->nr_pages;
180 int ret, do_wakeup, page_nr; 175 int ret, do_wakeup, page_nr;
@@ -201,6 +196,7 @@ static ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
201 buf->page = spd->pages[page_nr]; 196 buf->page = spd->pages[page_nr];
202 buf->offset = spd->partial[page_nr].offset; 197 buf->offset = spd->partial[page_nr].offset;
203 buf->len = spd->partial[page_nr].len; 198 buf->len = spd->partial[page_nr].len;
199 buf->private = spd->partial[page_nr].private;
204 buf->ops = spd->ops; 200 buf->ops = spd->ops;
205 if (spd->flags & SPLICE_F_GIFT) 201 if (spd->flags & SPLICE_F_GIFT)
206 buf->flags |= PIPE_BUF_FLAG_GIFT; 202 buf->flags |= PIPE_BUF_FLAG_GIFT;
@@ -296,19 +292,15 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
296 page_cache_readahead(mapping, &in->f_ra, in, index, nr_pages); 292 page_cache_readahead(mapping, &in->f_ra, in, index, nr_pages);
297 293
298 /* 294 /*
299 * Now fill in the holes:
300 */
301 error = 0;
302
303 /*
304 * Lookup the (hopefully) full range of pages we need. 295 * Lookup the (hopefully) full range of pages we need.
305 */ 296 */
306 spd.nr_pages = find_get_pages_contig(mapping, index, nr_pages, pages); 297 spd.nr_pages = find_get_pages_contig(mapping, index, nr_pages, pages);
307 298
308 /* 299 /*
309 * If find_get_pages_contig() returned fewer pages than we needed, 300 * If find_get_pages_contig() returned fewer pages than we needed,
310 * allocate the rest. 301 * allocate the rest and fill in the holes.
311 */ 302 */
303 error = 0;
312 index += spd.nr_pages; 304 index += spd.nr_pages;
313 while (spd.nr_pages < nr_pages) { 305 while (spd.nr_pages < nr_pages) {
314 /* 306 /*
@@ -470,11 +462,16 @@ fill_it:
470/** 462/**
471 * generic_file_splice_read - splice data from file to a pipe 463 * generic_file_splice_read - splice data from file to a pipe
472 * @in: file to splice from 464 * @in: file to splice from
465 * @ppos: position in @in
473 * @pipe: pipe to splice to 466 * @pipe: pipe to splice to
474 * @len: number of bytes to splice 467 * @len: number of bytes to splice
475 * @flags: splice modifier flags 468 * @flags: splice modifier flags
476 * 469 *
477 * Will read pages from given file and fill them into a pipe. 470 * Description:
471 * Will read pages from given file and fill them into a pipe. Can be
472 * used as long as the address_space operations for the source implements
473 * a readpage() hook.
474 *
478 */ 475 */
479ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, 476ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
480 struct pipe_inode_info *pipe, size_t len, 477 struct pipe_inode_info *pipe, size_t len,
@@ -528,11 +525,11 @@ EXPORT_SYMBOL(generic_file_splice_read);
528static int pipe_to_sendpage(struct pipe_inode_info *pipe, 525static int pipe_to_sendpage(struct pipe_inode_info *pipe,
529 struct pipe_buffer *buf, struct splice_desc *sd) 526 struct pipe_buffer *buf, struct splice_desc *sd)
530{ 527{
531 struct file *file = sd->file; 528 struct file *file = sd->u.file;
532 loff_t pos = sd->pos; 529 loff_t pos = sd->pos;
533 int ret, more; 530 int ret, more;
534 531
535 ret = buf->ops->pin(pipe, buf); 532 ret = buf->ops->confirm(pipe, buf);
536 if (!ret) { 533 if (!ret) {
537 more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len; 534 more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len;
538 535
@@ -566,7 +563,7 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe,
566static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, 563static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
567 struct splice_desc *sd) 564 struct splice_desc *sd)
568{ 565{
569 struct file *file = sd->file; 566 struct file *file = sd->u.file;
570 struct address_space *mapping = file->f_mapping; 567 struct address_space *mapping = file->f_mapping;
571 unsigned int offset, this_len; 568 unsigned int offset, this_len;
572 struct page *page; 569 struct page *page;
@@ -576,7 +573,7 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
576 /* 573 /*
577 * make sure the data in this buffer is uptodate 574 * make sure the data in this buffer is uptodate
578 */ 575 */
579 ret = buf->ops->pin(pipe, buf); 576 ret = buf->ops->confirm(pipe, buf);
580 if (unlikely(ret)) 577 if (unlikely(ret))
581 return ret; 578 return ret;
582 579
@@ -663,36 +660,37 @@ out_ret:
663 return ret; 660 return ret;
664} 661}
665 662
666/* 663/**
667 * Pipe input worker. Most of this logic works like a regular pipe, the 664 * __splice_from_pipe - splice data from a pipe to given actor
668 * key here is the 'actor' worker passed in that actually moves the data 665 * @pipe: pipe to splice from
669 * to the wanted destination. See pipe_to_file/pipe_to_sendpage above. 666 * @sd: information to @actor
667 * @actor: handler that splices the data
668 *
669 * Description:
670 * This function does little more than loop over the pipe and call
671 * @actor to do the actual moving of a single struct pipe_buffer to
672 * the desired destination. See pipe_to_file, pipe_to_sendpage, or
673 * pipe_to_user.
674 *
670 */ 675 */
671ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, 676ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd,
672 struct file *out, loff_t *ppos, size_t len, 677 splice_actor *actor)
673 unsigned int flags, splice_actor *actor)
674{ 678{
675 int ret, do_wakeup, err; 679 int ret, do_wakeup, err;
676 struct splice_desc sd;
677 680
678 ret = 0; 681 ret = 0;
679 do_wakeup = 0; 682 do_wakeup = 0;
680 683
681 sd.total_len = len;
682 sd.flags = flags;
683 sd.file = out;
684 sd.pos = *ppos;
685
686 for (;;) { 684 for (;;) {
687 if (pipe->nrbufs) { 685 if (pipe->nrbufs) {
688 struct pipe_buffer *buf = pipe->bufs + pipe->curbuf; 686 struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
689 const struct pipe_buf_operations *ops = buf->ops; 687 const struct pipe_buf_operations *ops = buf->ops;
690 688
691 sd.len = buf->len; 689 sd->len = buf->len;
692 if (sd.len > sd.total_len) 690 if (sd->len > sd->total_len)
693 sd.len = sd.total_len; 691 sd->len = sd->total_len;
694 692
695 err = actor(pipe, buf, &sd); 693 err = actor(pipe, buf, sd);
696 if (err <= 0) { 694 if (err <= 0) {
697 if (!ret && err != -ENODATA) 695 if (!ret && err != -ENODATA)
698 ret = err; 696 ret = err;
@@ -704,10 +702,10 @@ ssize_t __splice_from_pipe(struct pipe_inode_info *pipe,
704 buf->offset += err; 702 buf->offset += err;
705 buf->len -= err; 703 buf->len -= err;
706 704
707 sd.len -= err; 705 sd->len -= err;
708 sd.pos += err; 706 sd->pos += err;
709 sd.total_len -= err; 707 sd->total_len -= err;
710 if (sd.len) 708 if (sd->len)
711 continue; 709 continue;
712 710
713 if (!buf->len) { 711 if (!buf->len) {
@@ -719,7 +717,7 @@ ssize_t __splice_from_pipe(struct pipe_inode_info *pipe,
719 do_wakeup = 1; 717 do_wakeup = 1;
720 } 718 }
721 719
722 if (!sd.total_len) 720 if (!sd->total_len)
723 break; 721 break;
724 } 722 }
725 723
@@ -732,7 +730,7 @@ ssize_t __splice_from_pipe(struct pipe_inode_info *pipe,
732 break; 730 break;
733 } 731 }
734 732
735 if (flags & SPLICE_F_NONBLOCK) { 733 if (sd->flags & SPLICE_F_NONBLOCK) {
736 if (!ret) 734 if (!ret)
737 ret = -EAGAIN; 735 ret = -EAGAIN;
738 break; 736 break;
@@ -766,12 +764,32 @@ ssize_t __splice_from_pipe(struct pipe_inode_info *pipe,
766} 764}
767EXPORT_SYMBOL(__splice_from_pipe); 765EXPORT_SYMBOL(__splice_from_pipe);
768 766
767/**
768 * splice_from_pipe - splice data from a pipe to a file
769 * @pipe: pipe to splice from
770 * @out: file to splice to
771 * @ppos: position in @out
772 * @len: how many bytes to splice
773 * @flags: splice modifier flags
774 * @actor: handler that splices the data
775 *
776 * Description:
777 * See __splice_from_pipe. This function locks the input and output inodes,
778 * otherwise it's identical to __splice_from_pipe().
779 *
780 */
769ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, 781ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
770 loff_t *ppos, size_t len, unsigned int flags, 782 loff_t *ppos, size_t len, unsigned int flags,
771 splice_actor *actor) 783 splice_actor *actor)
772{ 784{
773 ssize_t ret; 785 ssize_t ret;
774 struct inode *inode = out->f_mapping->host; 786 struct inode *inode = out->f_mapping->host;
787 struct splice_desc sd = {
788 .total_len = len,
789 .flags = flags,
790 .pos = *ppos,
791 .u.file = out,
792 };
775 793
776 /* 794 /*
777 * The actor worker might be calling ->prepare_write and 795 * The actor worker might be calling ->prepare_write and
@@ -780,7 +798,7 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
780 * pipe->inode, we have to order lock acquiry here. 798 * pipe->inode, we have to order lock acquiry here.
781 */ 799 */
782 inode_double_lock(inode, pipe->inode); 800 inode_double_lock(inode, pipe->inode);
783 ret = __splice_from_pipe(pipe, out, ppos, len, flags, actor); 801 ret = __splice_from_pipe(pipe, &sd, actor);
784 inode_double_unlock(inode, pipe->inode); 802 inode_double_unlock(inode, pipe->inode);
785 803
786 return ret; 804 return ret;
@@ -790,12 +808,14 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
790 * generic_file_splice_write_nolock - generic_file_splice_write without mutexes 808 * generic_file_splice_write_nolock - generic_file_splice_write without mutexes
791 * @pipe: pipe info 809 * @pipe: pipe info
792 * @out: file to write to 810 * @out: file to write to
811 * @ppos: position in @out
793 * @len: number of bytes to splice 812 * @len: number of bytes to splice
794 * @flags: splice modifier flags 813 * @flags: splice modifier flags
795 * 814 *
796 * Will either move or copy pages (determined by @flags options) from 815 * Description:
797 * the given pipe inode to the given file. The caller is responsible 816 * Will either move or copy pages (determined by @flags options) from
798 * for acquiring i_mutex on both inodes. 817 * the given pipe inode to the given file. The caller is responsible
818 * for acquiring i_mutex on both inodes.
799 * 819 *
800 */ 820 */
801ssize_t 821ssize_t
@@ -804,6 +824,12 @@ generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out,
804{ 824{
805 struct address_space *mapping = out->f_mapping; 825 struct address_space *mapping = out->f_mapping;
806 struct inode *inode = mapping->host; 826 struct inode *inode = mapping->host;
827 struct splice_desc sd = {
828 .total_len = len,
829 .flags = flags,
830 .pos = *ppos,
831 .u.file = out,
832 };
807 ssize_t ret; 833 ssize_t ret;
808 int err; 834 int err;
809 835
@@ -811,7 +837,7 @@ generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out,
811 if (unlikely(err)) 837 if (unlikely(err))
812 return err; 838 return err;
813 839
814 ret = __splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file); 840 ret = __splice_from_pipe(pipe, &sd, pipe_to_file);
815 if (ret > 0) { 841 if (ret > 0) {
816 unsigned long nr_pages; 842 unsigned long nr_pages;
817 843
@@ -841,11 +867,13 @@ EXPORT_SYMBOL(generic_file_splice_write_nolock);
841 * generic_file_splice_write - splice data from a pipe to a file 867 * generic_file_splice_write - splice data from a pipe to a file
842 * @pipe: pipe info 868 * @pipe: pipe info
843 * @out: file to write to 869 * @out: file to write to
870 * @ppos: position in @out
844 * @len: number of bytes to splice 871 * @len: number of bytes to splice
845 * @flags: splice modifier flags 872 * @flags: splice modifier flags
846 * 873 *
847 * Will either move or copy pages (determined by @flags options) from 874 * Description:
848 * the given pipe inode to the given file. 875 * Will either move or copy pages (determined by @flags options) from
876 * the given pipe inode to the given file.
849 * 877 *
850 */ 878 */
851ssize_t 879ssize_t
@@ -896,13 +924,15 @@ EXPORT_SYMBOL(generic_file_splice_write);
896 924
897/** 925/**
898 * generic_splice_sendpage - splice data from a pipe to a socket 926 * generic_splice_sendpage - splice data from a pipe to a socket
899 * @inode: pipe inode 927 * @pipe: pipe to splice from
900 * @out: socket to write to 928 * @out: socket to write to
929 * @ppos: position in @out
901 * @len: number of bytes to splice 930 * @len: number of bytes to splice
902 * @flags: splice modifier flags 931 * @flags: splice modifier flags
903 * 932 *
904 * Will send @len bytes from the pipe to a network socket. No data copying 933 * Description:
905 * is involved. 934 * Will send @len bytes from the pipe to a network socket. No data copying
935 * is involved.
906 * 936 *
907 */ 937 */
908ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out, 938ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out,
@@ -956,14 +986,27 @@ static long do_splice_to(struct file *in, loff_t *ppos,
956 return in->f_op->splice_read(in, ppos, pipe, len, flags); 986 return in->f_op->splice_read(in, ppos, pipe, len, flags);
957} 987}
958 988
959long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, 989/**
960 size_t len, unsigned int flags) 990 * splice_direct_to_actor - splices data directly between two non-pipes
991 * @in: file to splice from
992 * @sd: actor information on where to splice to
993 * @actor: handles the data splicing
994 *
995 * Description:
996 * This is a special case helper to splice directly between two
997 * points, without requiring an explicit pipe. Internally an allocated
998 * pipe is cached in the process, and reused during the life time of
999 * that process.
1000 *
1001 */
1002ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
1003 splice_direct_actor *actor)
961{ 1004{
962 struct pipe_inode_info *pipe; 1005 struct pipe_inode_info *pipe;
963 long ret, bytes; 1006 long ret, bytes;
964 loff_t out_off;
965 umode_t i_mode; 1007 umode_t i_mode;
966 int i; 1008 size_t len;
1009 int i, flags;
967 1010
968 /* 1011 /*
969 * We require the input being a regular file, as we don't want to 1012 * We require the input being a regular file, as we don't want to
@@ -999,7 +1042,13 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
999 */ 1042 */
1000 ret = 0; 1043 ret = 0;
1001 bytes = 0; 1044 bytes = 0;
1002 out_off = 0; 1045 len = sd->total_len;
1046 flags = sd->flags;
1047
1048 /*
1049 * Don't block on output, we have to drain the direct pipe.
1050 */
1051 sd->flags &= ~SPLICE_F_NONBLOCK;
1003 1052
1004 while (len) { 1053 while (len) {
1005 size_t read_len, max_read_len; 1054 size_t read_len, max_read_len;
@@ -1009,19 +1058,19 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
1009 */ 1058 */
1010 max_read_len = min(len, (size_t)(PIPE_BUFFERS*PAGE_SIZE)); 1059 max_read_len = min(len, (size_t)(PIPE_BUFFERS*PAGE_SIZE));
1011 1060
1012 ret = do_splice_to(in, ppos, pipe, max_read_len, flags); 1061 ret = do_splice_to(in, &sd->pos, pipe, max_read_len, flags);
1013 if (unlikely(ret < 0)) 1062 if (unlikely(ret < 0))
1014 goto out_release; 1063 goto out_release;
1015 1064
1016 read_len = ret; 1065 read_len = ret;
1066 sd->total_len = read_len;
1017 1067
1018 /* 1068 /*
1019 * NOTE: nonblocking mode only applies to the input. We 1069 * NOTE: nonblocking mode only applies to the input. We
1020 * must not do the output in nonblocking mode as then we 1070 * must not do the output in nonblocking mode as then we
1021 * could get stuck data in the internal pipe: 1071 * could get stuck data in the internal pipe:
1022 */ 1072 */
1023 ret = do_splice_from(pipe, out, &out_off, read_len, 1073 ret = actor(pipe, sd);
1024 flags & ~SPLICE_F_NONBLOCK);
1025 if (unlikely(ret < 0)) 1074 if (unlikely(ret < 0))
1026 goto out_release; 1075 goto out_release;
1027 1076
@@ -1066,6 +1115,48 @@ out_release:
1066 return bytes; 1115 return bytes;
1067 1116
1068 return ret; 1117 return ret;
1118
1119}
1120EXPORT_SYMBOL(splice_direct_to_actor);
1121
1122static int direct_splice_actor(struct pipe_inode_info *pipe,
1123 struct splice_desc *sd)
1124{
1125 struct file *file = sd->u.file;
1126
1127 return do_splice_from(pipe, file, &sd->pos, sd->total_len, sd->flags);
1128}
1129
1130/**
1131 * do_splice_direct - splices data directly between two files
1132 * @in: file to splice from
1133 * @ppos: input file offset
1134 * @out: file to splice to
1135 * @len: number of bytes to splice
1136 * @flags: splice modifier flags
1137 *
1138 * Description:
1139 * For use by do_sendfile(). splice can easily emulate sendfile, but
1140 * doing it in the application would incur an extra system call
1141 * (splice in + splice out, as compared to just sendfile()). So this helper
1142 * can splice directly through a process-private pipe.
1143 *
1144 */
1145long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
1146 size_t len, unsigned int flags)
1147{
1148 struct splice_desc sd = {
1149 .len = len,
1150 .total_len = len,
1151 .flags = flags,
1152 .pos = *ppos,
1153 .u.file = out,
1154 };
1155 size_t ret;
1156
1157 ret = splice_direct_to_actor(in, &sd, direct_splice_actor);
1158 *ppos = sd.pos;
1159 return ret;
1069} 1160}
1070 1161
1071/* 1162/*
@@ -1248,28 +1339,131 @@ static int get_iovec_page_array(const struct iovec __user *iov,
1248 return error; 1339 return error;
1249} 1340}
1250 1341
1342static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
1343 struct splice_desc *sd)
1344{
1345 char *src;
1346 int ret;
1347
1348 ret = buf->ops->confirm(pipe, buf);
1349 if (unlikely(ret))
1350 return ret;
1351
1352 /*
1353 * See if we can use the atomic maps, by prefaulting in the
1354 * pages and doing an atomic copy
1355 */
1356 if (!fault_in_pages_writeable(sd->u.userptr, sd->len)) {
1357 src = buf->ops->map(pipe, buf, 1);
1358 ret = __copy_to_user_inatomic(sd->u.userptr, src + buf->offset,
1359 sd->len);
1360 buf->ops->unmap(pipe, buf, src);
1361 if (!ret) {
1362 ret = sd->len;
1363 goto out;
1364 }
1365 }
1366
1367 /*
1368 * No dice, use slow non-atomic map and copy
1369 */
1370 src = buf->ops->map(pipe, buf, 0);
1371
1372 ret = sd->len;
1373 if (copy_to_user(sd->u.userptr, src + buf->offset, sd->len))
1374 ret = -EFAULT;
1375
1376out:
1377 if (ret > 0)
1378 sd->u.userptr += ret;
1379 buf->ops->unmap(pipe, buf, src);
1380 return ret;
1381}
1382
1383/*
1384 * For lack of a better implementation, implement vmsplice() to userspace
1385 * as a simple copy of the pipes pages to the user iov.
1386 */
1387static long vmsplice_to_user(struct file *file, const struct iovec __user *iov,
1388 unsigned long nr_segs, unsigned int flags)
1389{
1390 struct pipe_inode_info *pipe;
1391 struct splice_desc sd;
1392 ssize_t size;
1393 int error;
1394 long ret;
1395
1396 pipe = pipe_info(file->f_path.dentry->d_inode);
1397 if (!pipe)
1398 return -EBADF;
1399
1400 if (pipe->inode)
1401 mutex_lock(&pipe->inode->i_mutex);
1402
1403 error = ret = 0;
1404 while (nr_segs) {
1405 void __user *base;
1406 size_t len;
1407
1408 /*
1409 * Get user address base and length for this iovec.
1410 */
1411 error = get_user(base, &iov->iov_base);
1412 if (unlikely(error))
1413 break;
1414 error = get_user(len, &iov->iov_len);
1415 if (unlikely(error))
1416 break;
1417
1418 /*
1419 * Sanity check this iovec. 0 read succeeds.
1420 */
1421 if (unlikely(!len))
1422 break;
1423 if (unlikely(!base)) {
1424 error = -EFAULT;
1425 break;
1426 }
1427
1428 sd.len = 0;
1429 sd.total_len = len;
1430 sd.flags = flags;
1431 sd.u.userptr = base;
1432 sd.pos = 0;
1433
1434 size = __splice_from_pipe(pipe, &sd, pipe_to_user);
1435 if (size < 0) {
1436 if (!ret)
1437 ret = size;
1438
1439 break;
1440 }
1441
1442 ret += size;
1443
1444 if (size < len)
1445 break;
1446
1447 nr_segs--;
1448 iov++;
1449 }
1450
1451 if (pipe->inode)
1452 mutex_unlock(&pipe->inode->i_mutex);
1453
1454 if (!ret)
1455 ret = error;
1456
1457 return ret;
1458}
1459
1251/* 1460/*
1252 * vmsplice splices a user address range into a pipe. It can be thought of 1461 * vmsplice splices a user address range into a pipe. It can be thought of
1253 * as splice-from-memory, where the regular splice is splice-from-file (or 1462 * as splice-from-memory, where the regular splice is splice-from-file (or
1254 * to file). In both cases the output is a pipe, naturally. 1463 * to file). In both cases the output is a pipe, naturally.
1255 *
1256 * Note that vmsplice only supports splicing _from_ user memory to a pipe,
1257 * not the other way around. Splicing from user memory is a simple operation
1258 * that can be supported without any funky alignment restrictions or nasty
1259 * vm tricks. We simply map in the user memory and fill them into a pipe.
1260 * The reverse isn't quite as easy, though. There are two possible solutions
1261 * for that:
1262 *
1263 * - memcpy() the data internally, at which point we might as well just
1264 * do a regular read() on the buffer anyway.
1265 * - Lots of nasty vm tricks, that are neither fast nor flexible (it
1266 * has restriction limitations on both ends of the pipe).
1267 *
1268 * Alas, it isn't here.
1269 *
1270 */ 1464 */
1271static long do_vmsplice(struct file *file, const struct iovec __user *iov, 1465static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov,
1272 unsigned long nr_segs, unsigned int flags) 1466 unsigned long nr_segs, unsigned int flags)
1273{ 1467{
1274 struct pipe_inode_info *pipe; 1468 struct pipe_inode_info *pipe;
1275 struct page *pages[PIPE_BUFFERS]; 1469 struct page *pages[PIPE_BUFFERS];
@@ -1284,10 +1478,6 @@ static long do_vmsplice(struct file *file, const struct iovec __user *iov,
1284 pipe = pipe_info(file->f_path.dentry->d_inode); 1478 pipe = pipe_info(file->f_path.dentry->d_inode);
1285 if (!pipe) 1479 if (!pipe)
1286 return -EBADF; 1480 return -EBADF;
1287 if (unlikely(nr_segs > UIO_MAXIOV))
1288 return -EINVAL;
1289 else if (unlikely(!nr_segs))
1290 return 0;
1291 1481
1292 spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial, 1482 spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial,
1293 flags & SPLICE_F_GIFT); 1483 flags & SPLICE_F_GIFT);
@@ -1297,6 +1487,22 @@ static long do_vmsplice(struct file *file, const struct iovec __user *iov,
1297 return splice_to_pipe(pipe, &spd); 1487 return splice_to_pipe(pipe, &spd);
1298} 1488}
1299 1489
1490/*
1491 * Note that vmsplice only really supports true splicing _from_ user memory
1492 * to a pipe, not the other way around. Splicing from user memory is a simple
1493 * operation that can be supported without any funky alignment restrictions
1494 * or nasty vm tricks. We simply map in the user memory and fill them into
1495 * a pipe. The reverse isn't quite as easy, though. There are two possible
1496 * solutions for that:
1497 *
1498 * - memcpy() the data internally, at which point we might as well just
1499 * do a regular read() on the buffer anyway.
1500 * - Lots of nasty vm tricks, that are neither fast nor flexible (it
1501 * has restriction limitations on both ends of the pipe).
1502 *
1503 * Currently we punt and implement it as a normal copy, see pipe_to_user().
1504 *
1505 */
1300asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov, 1506asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov,
1301 unsigned long nr_segs, unsigned int flags) 1507 unsigned long nr_segs, unsigned int flags)
1302{ 1508{
@@ -1304,11 +1510,18 @@ asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov,
1304 long error; 1510 long error;
1305 int fput; 1511 int fput;
1306 1512
1513 if (unlikely(nr_segs > UIO_MAXIOV))
1514 return -EINVAL;
1515 else if (unlikely(!nr_segs))
1516 return 0;
1517
1307 error = -EBADF; 1518 error = -EBADF;
1308 file = fget_light(fd, &fput); 1519 file = fget_light(fd, &fput);
1309 if (file) { 1520 if (file) {
1310 if (file->f_mode & FMODE_WRITE) 1521 if (file->f_mode & FMODE_WRITE)
1311 error = do_vmsplice(file, iov, nr_segs, flags); 1522 error = vmsplice_to_pipe(file, iov, nr_segs, flags);
1523 else if (file->f_mode & FMODE_READ)
1524 error = vmsplice_to_user(file, iov, nr_segs, flags);
1312 1525
1313 fput_light(file, fput); 1526 fput_light(file, fput);
1314 } 1527 }
diff --git a/fs/sysv/file.c b/fs/sysv/file.c
index 0732ddb9020b..589be21d884e 100644
--- a/fs/sysv/file.c
+++ b/fs/sysv/file.c
@@ -27,7 +27,7 @@ const struct file_operations sysv_file_operations = {
27 .aio_write = generic_file_aio_write, 27 .aio_write = generic_file_aio_write,
28 .mmap = generic_file_mmap, 28 .mmap = generic_file_mmap,
29 .fsync = sysv_sync_file, 29 .fsync = sysv_sync_file,
30 .sendfile = generic_file_sendfile, 30 .splice_read = generic_file_splice_read,
31}; 31};
32 32
33const struct inode_operations sysv_file_inode_operations = { 33const struct inode_operations sysv_file_inode_operations = {
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 51b5764685e7..df070bee8d4f 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -261,7 +261,7 @@ const struct file_operations udf_file_operations = {
261 .aio_write = udf_file_aio_write, 261 .aio_write = udf_file_aio_write,
262 .release = udf_release_file, 262 .release = udf_release_file,
263 .fsync = udf_fsync_file, 263 .fsync = udf_fsync_file,
264 .sendfile = generic_file_sendfile, 264 .splice_read = generic_file_splice_read,
265}; 265};
266 266
267const struct inode_operations udf_file_inode_operations = { 267const struct inode_operations udf_file_inode_operations = {
diff --git a/fs/ufs/file.c b/fs/ufs/file.c
index 1e096323bad4..6705d74c6d2d 100644
--- a/fs/ufs/file.c
+++ b/fs/ufs/file.c
@@ -60,5 +60,5 @@ const struct file_operations ufs_file_operations = {
60 .mmap = generic_file_mmap, 60 .mmap = generic_file_mmap,
61 .open = generic_file_open, 61 .open = generic_file_open,
62 .fsync = ufs_sync_file, 62 .fsync = ufs_sync_file,
63 .sendfile = generic_file_sendfile, 63 .splice_read = generic_file_splice_read,
64}; 64};
diff --git a/fs/utimes.c b/fs/utimes.c
index 480f7c8c29da..b3c88952465f 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -106,9 +106,16 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags
106 if (IS_IMMUTABLE(inode)) 106 if (IS_IMMUTABLE(inode))
107 goto dput_and_out; 107 goto dput_and_out;
108 108
109 if (current->fsuid != inode->i_uid && 109 if (current->fsuid != inode->i_uid) {
110 (error = vfs_permission(&nd, MAY_WRITE)) != 0) 110 if (f) {
111 goto dput_and_out; 111 if (!(f->f_mode & FMODE_WRITE))
112 goto dput_and_out;
113 } else {
114 error = vfs_permission(&nd, MAY_WRITE);
115 if (error)
116 goto dput_and_out;
117 }
118 }
112 } 119 }
113 mutex_lock(&inode->i_mutex); 120 mutex_lock(&inode->i_mutex);
114 error = notify_change(dentry, &newattrs); 121 error = notify_change(dentry, &newattrs);
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index cb51dc961355..8c43cd2e237a 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -124,30 +124,6 @@ xfs_file_aio_write_invis(
124} 124}
125 125
126STATIC ssize_t 126STATIC ssize_t
127xfs_file_sendfile(
128 struct file *filp,
129 loff_t *pos,
130 size_t count,
131 read_actor_t actor,
132 void *target)
133{
134 return bhv_vop_sendfile(vn_from_inode(filp->f_path.dentry->d_inode),
135 filp, pos, 0, count, actor, target, NULL);
136}
137
138STATIC ssize_t
139xfs_file_sendfile_invis(
140 struct file *filp,
141 loff_t *pos,
142 size_t count,
143 read_actor_t actor,
144 void *target)
145{
146 return bhv_vop_sendfile(vn_from_inode(filp->f_path.dentry->d_inode),
147 filp, pos, IO_INVIS, count, actor, target, NULL);
148}
149
150STATIC ssize_t
151xfs_file_splice_read( 127xfs_file_splice_read(
152 struct file *infilp, 128 struct file *infilp,
153 loff_t *ppos, 129 loff_t *ppos,
@@ -452,7 +428,6 @@ const struct file_operations xfs_file_operations = {
452 .write = do_sync_write, 428 .write = do_sync_write,
453 .aio_read = xfs_file_aio_read, 429 .aio_read = xfs_file_aio_read,
454 .aio_write = xfs_file_aio_write, 430 .aio_write = xfs_file_aio_write,
455 .sendfile = xfs_file_sendfile,
456 .splice_read = xfs_file_splice_read, 431 .splice_read = xfs_file_splice_read,
457 .splice_write = xfs_file_splice_write, 432 .splice_write = xfs_file_splice_write,
458 .unlocked_ioctl = xfs_file_ioctl, 433 .unlocked_ioctl = xfs_file_ioctl,
@@ -475,7 +450,6 @@ const struct file_operations xfs_invis_file_operations = {
475 .write = do_sync_write, 450 .write = do_sync_write,
476 .aio_read = xfs_file_aio_read_invis, 451 .aio_read = xfs_file_aio_read_invis,
477 .aio_write = xfs_file_aio_write_invis, 452 .aio_write = xfs_file_aio_write_invis,
478 .sendfile = xfs_file_sendfile_invis,
479 .splice_read = xfs_file_splice_read_invis, 453 .splice_read = xfs_file_splice_read_invis,
480 .splice_write = xfs_file_splice_write_invis, 454 .splice_write = xfs_file_splice_write_invis,
481 .unlocked_ioctl = xfs_file_ioctl_invis, 455 .unlocked_ioctl = xfs_file_ioctl_invis,
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 715adad7dd4d..af24a457d3a3 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -101,7 +101,6 @@
101 * Feature macros (disable/enable) 101 * Feature macros (disable/enable)
102 */ 102 */
103#undef HAVE_REFCACHE /* reference cache not needed for NFS in 2.6 */ 103#undef HAVE_REFCACHE /* reference cache not needed for NFS in 2.6 */
104#define HAVE_SENDFILE /* sendfile(2) exists in 2.6, but not in 2.4 */
105#define HAVE_SPLICE /* a splice(2) exists in 2.6, but not in 2.4 */ 104#define HAVE_SPLICE /* a splice(2) exists in 2.6, but not in 2.4 */
106#ifdef CONFIG_SMP 105#ifdef CONFIG_SMP
107#define HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */ 106#define HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index ed90403f0ee7..765ec16a6e39 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -287,50 +287,6 @@ xfs_read(
287} 287}
288 288
289ssize_t 289ssize_t
290xfs_sendfile(
291 bhv_desc_t *bdp,
292 struct file *filp,
293 loff_t *offset,
294 int ioflags,
295 size_t count,
296 read_actor_t actor,
297 void *target,
298 cred_t *credp)
299{
300 xfs_inode_t *ip = XFS_BHVTOI(bdp);
301 xfs_mount_t *mp = ip->i_mount;
302 ssize_t ret;
303
304 XFS_STATS_INC(xs_read_calls);
305 if (XFS_FORCED_SHUTDOWN(mp))
306 return -EIO;
307
308 xfs_ilock(ip, XFS_IOLOCK_SHARED);
309
310 if (DM_EVENT_ENABLED(BHV_TO_VNODE(bdp)->v_vfsp, ip, DM_EVENT_READ) &&
311 (!(ioflags & IO_INVIS))) {
312 bhv_vrwlock_t locktype = VRWLOCK_READ;
313 int error;
314
315 error = XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp),
316 *offset, count,
317 FILP_DELAY_FLAG(filp), &locktype);
318 if (error) {
319 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
320 return -error;
321 }
322 }
323 xfs_rw_enter_trace(XFS_SENDFILE_ENTER, &ip->i_iocore,
324 (void *)(unsigned long)target, count, *offset, ioflags);
325 ret = generic_file_sendfile(filp, offset, count, actor, target);
326 if (ret > 0)
327 XFS_STATS_ADD(xs_read_bytes, ret);
328
329 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
330 return ret;
331}
332
333ssize_t
334xfs_splice_read( 290xfs_splice_read(
335 bhv_desc_t *bdp, 291 bhv_desc_t *bdp,
336 struct file *infilp, 292 struct file *infilp,
diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h
index 7ac51b1d2161..7c60a1eed88b 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.h
+++ b/fs/xfs/linux-2.6/xfs_lrw.h
@@ -90,9 +90,6 @@ extern ssize_t xfs_read(struct bhv_desc *, struct kiocb *,
90extern ssize_t xfs_write(struct bhv_desc *, struct kiocb *, 90extern ssize_t xfs_write(struct bhv_desc *, struct kiocb *,
91 const struct iovec *, unsigned int, 91 const struct iovec *, unsigned int,
92 loff_t *, int, struct cred *); 92 loff_t *, int, struct cred *);
93extern ssize_t xfs_sendfile(struct bhv_desc *, struct file *,
94 loff_t *, int, size_t, read_actor_t,
95 void *, struct cred *);
96extern ssize_t xfs_splice_read(struct bhv_desc *, struct file *, loff_t *, 93extern ssize_t xfs_splice_read(struct bhv_desc *, struct file *, loff_t *,
97 struct pipe_inode_info *, size_t, int, int, 94 struct pipe_inode_info *, size_t, int, int,
98 struct cred *); 95 struct cred *);
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index d1b2d01843d1..013048a92643 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -139,9 +139,6 @@ typedef ssize_t (*vop_read_t)(bhv_desc_t *, struct kiocb *,
139typedef ssize_t (*vop_write_t)(bhv_desc_t *, struct kiocb *, 139typedef ssize_t (*vop_write_t)(bhv_desc_t *, struct kiocb *,
140 const struct iovec *, unsigned int, 140 const struct iovec *, unsigned int,
141 loff_t *, int, struct cred *); 141 loff_t *, int, struct cred *);
142typedef ssize_t (*vop_sendfile_t)(bhv_desc_t *, struct file *,
143 loff_t *, int, size_t, read_actor_t,
144 void *, struct cred *);
145typedef ssize_t (*vop_splice_read_t)(bhv_desc_t *, struct file *, loff_t *, 142typedef ssize_t (*vop_splice_read_t)(bhv_desc_t *, struct file *, loff_t *,
146 struct pipe_inode_info *, size_t, int, int, 143 struct pipe_inode_info *, size_t, int, int,
147 struct cred *); 144 struct cred *);
@@ -206,7 +203,6 @@ typedef struct bhv_vnodeops {
206 vop_close_t vop_close; 203 vop_close_t vop_close;
207 vop_read_t vop_read; 204 vop_read_t vop_read;
208 vop_write_t vop_write; 205 vop_write_t vop_write;
209 vop_sendfile_t vop_sendfile;
210 vop_splice_read_t vop_splice_read; 206 vop_splice_read_t vop_splice_read;
211 vop_splice_write_t vop_splice_write; 207 vop_splice_write_t vop_splice_write;
212 vop_ioctl_t vop_ioctl; 208 vop_ioctl_t vop_ioctl;
@@ -254,8 +250,6 @@ typedef struct bhv_vnodeops {
254 VOP(vop_read, vp)(VNHEAD(vp),file,iov,segs,offset,ioflags,cr) 250 VOP(vop_read, vp)(VNHEAD(vp),file,iov,segs,offset,ioflags,cr)
255#define bhv_vop_write(vp,file,iov,segs,offset,ioflags,cr) \ 251#define bhv_vop_write(vp,file,iov,segs,offset,ioflags,cr) \
256 VOP(vop_write, vp)(VNHEAD(vp),file,iov,segs,offset,ioflags,cr) 252 VOP(vop_write, vp)(VNHEAD(vp),file,iov,segs,offset,ioflags,cr)
257#define bhv_vop_sendfile(vp,f,off,ioflags,cnt,act,targ,cr) \
258 VOP(vop_sendfile, vp)(VNHEAD(vp),f,off,ioflags,cnt,act,targ,cr)
259#define bhv_vop_splice_read(vp,f,o,pipe,cnt,fl,iofl,cr) \ 253#define bhv_vop_splice_read(vp,f,o,pipe,cnt,fl,iofl,cr) \
260 VOP(vop_splice_read, vp)(VNHEAD(vp),f,o,pipe,cnt,fl,iofl,cr) 254 VOP(vop_splice_read, vp)(VNHEAD(vp),f,o,pipe,cnt,fl,iofl,cr)
261#define bhv_vop_splice_write(vp,f,o,pipe,cnt,fl,iofl,cr) \ 255#define bhv_vop_splice_write(vp,f,o,pipe,cnt,fl,iofl,cr) \
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index de17aed578f0..70bc82f65311 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -4680,9 +4680,6 @@ bhv_vnodeops_t xfs_vnodeops = {
4680 .vop_open = xfs_open, 4680 .vop_open = xfs_open,
4681 .vop_close = xfs_close, 4681 .vop_close = xfs_close,
4682 .vop_read = xfs_read, 4682 .vop_read = xfs_read,
4683#ifdef HAVE_SENDFILE
4684 .vop_sendfile = xfs_sendfile,
4685#endif
4686#ifdef HAVE_SPLICE 4683#ifdef HAVE_SPLICE
4687 .vop_splice_read = xfs_splice_read, 4684 .vop_splice_read = xfs_splice_read,
4688 .vop_splice_write = xfs_splice_write, 4685 .vop_splice_write = xfs_splice_write,