aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorJeff Garzik <jgarzik@pobox.com>2005-08-29 16:40:27 -0400
committerJeff Garzik <jgarzik@pobox.com>2005-08-29 16:40:27 -0400
commitc1b054d03f5b31c33eaa0b267c629b118eaf3790 (patch)
tree9333907ca767be24fcb3667877242976c3e3c8dd /fs
parent559fb51ba7e66fe298b8355fabde1275b7def35f (diff)
parentbf4e70e54cf31dcca48d279c7f7e71328eebe749 (diff)
Merge /spare/repo/linux-2.6/
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig75
-rw-r--r--fs/Makefile2
-rw-r--r--fs/adfs/adfs.h4
-rw-r--r--fs/afs/kafsasyncd.c2
-rw-r--r--fs/afs/kafstimod.c2
-rw-r--r--fs/afs/mntpt.c8
-rw-r--r--fs/aio.c9
-rw-r--r--fs/attr.c33
-rw-r--r--fs/autofs/symlink.c5
-rw-r--r--fs/autofs4/autofs_i.h1
-rw-r--r--fs/autofs4/inode.c73
-rw-r--r--fs/autofs4/symlink.c4
-rw-r--r--fs/autofs4/waitq.c4
-rw-r--r--fs/befs/linuxvfs.c10
-rw-r--r--fs/bio.c11
-rw-r--r--fs/block_dev.c5
-rw-r--r--fs/buffer.c86
-rw-r--r--fs/char_dev.c18
-rw-r--r--fs/cifs/CHANGES6
-rw-r--r--fs/cifs/cifsfs.h4
-rw-r--r--fs/cifs/cifssmb.c3
-rw-r--r--fs/cifs/file.c2
-rw-r--r--fs/cifs/link.c6
-rw-r--r--fs/cifs/misc.c1
-rw-r--r--fs/compat.c12
-rw-r--r--fs/dcache.c7
-rw-r--r--fs/dcookies.c6
-rw-r--r--fs/debugfs/inode.c12
-rw-r--r--fs/devfs/base.c4
-rw-r--r--fs/direct-io.c2
-rw-r--r--fs/dquot.c23
-rw-r--r--fs/eventpoll.c195
-rw-r--r--fs/exec.c35
-rw-r--r--fs/ext2/Makefile1
-rw-r--r--fs/ext2/acl.c12
-rw-r--r--fs/ext2/acl.h2
-rw-r--r--fs/ext2/ext2.h11
-rw-r--r--fs/ext2/file.c14
-rw-r--r--fs/ext2/ialloc.c1
-rw-r--r--fs/ext2/inode.c31
-rw-r--r--fs/ext2/namei.c12
-rw-r--r--fs/ext2/super.c51
-rw-r--r--fs/ext2/symlink.c4
-rw-r--r--fs/ext2/xattr.c2
-rw-r--r--fs/ext2/xip.c93
-rw-r--r--fs/ext2/xip.h25
-rw-r--r--fs/ext3/acl.c17
-rw-r--r--fs/ext3/acl.h2
-rw-r--r--fs/ext3/balloc.c135
-rw-r--r--fs/ext3/file.c4
-rw-r--r--fs/ext3/ialloc.c2
-rw-r--r--fs/ext3/inode.c9
-rw-r--r--fs/ext3/namei.c37
-rw-r--r--fs/ext3/super.c128
-rw-r--r--fs/ext3/symlink.c4
-rw-r--r--fs/ext3/xattr.c4
-rw-r--r--fs/fat/cache.c2
-rw-r--r--fs/fat/inode.c21
-rw-r--r--fs/fcntl.c5
-rw-r--r--fs/file_table.c60
-rw-r--r--fs/freevxfs/vxfs.h1
-rw-r--r--fs/freevxfs/vxfs_bmap.c2
-rw-r--r--fs/freevxfs/vxfs_fshead.c11
-rw-r--r--fs/freevxfs/vxfs_immed.c6
-rw-r--r--fs/freevxfs/vxfs_kcompat.h49
-rw-r--r--fs/freevxfs/vxfs_lookup.c8
-rw-r--r--fs/freevxfs/vxfs_olt.c10
-rw-r--r--fs/freevxfs/vxfs_subr.c1
-rw-r--r--fs/freevxfs/vxfs_super.c7
-rw-r--r--fs/fs-writeback.c64
-rw-r--r--fs/hfs/bnode.c2
-rw-r--r--fs/hfs/extent.c3
-rw-r--r--fs/hfsplus/bnode.c2
-rw-r--r--fs/hfsplus/extents.c4
-rw-r--r--fs/hostfs/hostfs.h1
-rw-r--r--fs/hostfs/hostfs_kern.c11
-rw-r--r--fs/hostfs/hostfs_user.c16
-rw-r--r--fs/hppfs/hppfs_kern.c43
-rw-r--r--fs/inode.c108
-rw-r--r--fs/inotify.c1057
-rw-r--r--fs/ioprio.c174
-rw-r--r--fs/isofs/compress.c6
-rw-r--r--fs/jbd/journal.c4
-rw-r--r--fs/jffs/intrep.c117
-rw-r--r--fs/jffs/intrep.h2
-rw-r--r--fs/jffs/jffs_fm.c105
-rw-r--r--fs/jffs/jffs_fm.h3
-rw-r--r--fs/jffs2/Makefile5
-rw-r--r--fs/jffs2/README.Locking6
-rw-r--r--fs/jffs2/background.c15
-rw-r--r--fs/jffs2/build.c16
-rw-r--r--fs/jffs2/compr_zlib.c4
-rw-r--r--fs/jffs2/dir.c46
-rw-r--r--fs/jffs2/erase.c178
-rw-r--r--fs/jffs2/file.c5
-rw-r--r--fs/jffs2/fs.c24
-rw-r--r--fs/jffs2/gc.c41
-rw-r--r--fs/jffs2/nodelist.c101
-rw-r--r--fs/jffs2/nodelist.h21
-rw-r--r--fs/jffs2/nodemgmt.c31
-rw-r--r--fs/jffs2/os-linux.h62
-rw-r--r--fs/jffs2/read.c32
-rw-r--r--fs/jffs2/readinode.c103
-rw-r--r--fs/jffs2/scan.c39
-rw-r--r--fs/jffs2/super.c17
-rw-r--r--fs/jffs2/symlink.c50
-rw-r--r--fs/jffs2/wbuf.c164
-rw-r--r--fs/jffs2/write.c55
-rw-r--r--fs/jfs/acl.c11
-rw-r--r--fs/jfs/inode.c4
-rw-r--r--fs/jfs/jfs_acl.h2
-rw-r--r--fs/jfs/jfs_dmap.c344
-rw-r--r--fs/jfs/jfs_dtree.c212
-rw-r--r--fs/jfs/jfs_dtree.h7
-rw-r--r--fs/jfs/jfs_imap.c105
-rw-r--r--fs/jfs/jfs_logmgr.c43
-rw-r--r--fs/jfs/jfs_logmgr.h2
-rw-r--r--fs/jfs/jfs_metapage.c11
-rw-r--r--fs/jfs/jfs_txnmgr.c20
-rw-r--r--fs/jfs/jfs_unicode.c7
-rw-r--r--fs/jfs/jfs_xtree.c340
-rw-r--r--fs/jfs/jfs_xtree.h6
-rw-r--r--fs/jfs/super.c5
-rw-r--r--fs/jfs/symlink.c4
-rw-r--r--fs/jfs/xattr.c13
-rw-r--r--fs/libfs.c1
-rw-r--r--fs/lockd/clntproc.c2
-rw-r--r--fs/lockd/svc.c8
-rw-r--r--fs/locks.c85
-rw-r--r--fs/mbcache.c3
-rw-r--r--fs/namei.c91
-rw-r--r--fs/namespace.c134
-rw-r--r--fs/ncpfs/dir.c13
-rw-r--r--fs/ncpfs/ncplib_kernel.c40
-rw-r--r--fs/ncpfs/ncplib_kernel.h3
-rw-r--r--fs/nfs/dir.c28
-rw-r--r--fs/nfs/direct.c5
-rw-r--r--fs/nfs/file.c5
-rw-r--r--fs/nfs/inode.c197
-rw-r--r--fs/nfs/nfs3acl.c18
-rw-r--r--fs/nfs/nfs3proc.c4
-rw-r--r--fs/nfs/nfs4proc.c10
-rw-r--r--fs/nfs/proc.c2
-rw-r--r--fs/nfs/read.c8
-rw-r--r--fs/nfs/symlink.c37
-rw-r--r--fs/nfs_common/nfsacl.c1
-rw-r--r--fs/nfsd/Makefile2
-rw-r--r--fs/nfsd/nfs4acl.c4
-rw-r--r--fs/nfsd/nfs4callback.c13
-rw-r--r--fs/nfsd/nfs4idmap.c12
-rw-r--r--fs/nfsd/nfs4proc.c32
-rw-r--r--fs/nfsd/nfs4recover.c421
-rw-r--r--fs/nfsd/nfs4state.c1253
-rw-r--r--fs/nfsd/nfs4xdr.c22
-rw-r--r--fs/nfsd/nfsctl.c28
-rw-r--r--fs/nfsd/nfssvc.c3
-rw-r--r--fs/nfsd/vfs.c30
-rw-r--r--fs/ntfs/ChangeLog182
-rw-r--r--fs/ntfs/Makefile4
-rw-r--r--fs/ntfs/aops.c167
-rw-r--r--fs/ntfs/attrib.c630
-rw-r--r--fs/ntfs/attrib.h16
-rw-r--r--fs/ntfs/compress.c46
-rw-r--r--fs/ntfs/debug.c15
-rw-r--r--fs/ntfs/dir.c32
-rw-r--r--fs/ntfs/file.c2
-rw-r--r--fs/ntfs/index.c16
-rw-r--r--fs/ntfs/inode.c530
-rw-r--r--fs/ntfs/inode.h7
-rw-r--r--fs/ntfs/layout.h83
-rw-r--r--fs/ntfs/lcnalloc.c72
-rw-r--r--fs/ntfs/logfile.c11
-rw-r--r--fs/ntfs/mft.c229
-rw-r--r--fs/ntfs/namei.c34
-rw-r--r--fs/ntfs/ntfs.h8
-rw-r--r--fs/ntfs/runlist.c278
-rw-r--r--fs/ntfs/runlist.h16
-rw-r--r--fs/ntfs/super.c692
-rw-r--r--fs/ntfs/sysctl.c4
-rw-r--r--fs/ntfs/sysctl.h2
-rw-r--r--fs/ntfs/time.h4
-rw-r--r--fs/ntfs/types.h10
-rw-r--r--fs/ntfs/unistr.c2
-rw-r--r--fs/ntfs/usnjrnl.c84
-rw-r--r--fs/ntfs/usnjrnl.h205
-rw-r--r--fs/ntfs/volume.h12
-rw-r--r--fs/open.c48
-rw-r--r--fs/partitions/Makefile1
-rw-r--r--fs/partitions/check.c3
-rw-r--r--fs/partitions/check.h4
-rw-r--r--fs/partitions/msdos.c4
-rw-r--r--fs/proc/Makefile1
-rw-r--r--fs/proc/base.c14
-rw-r--r--fs/proc/generic.c4
-rw-r--r--fs/proc/proc_misc.c8
-rw-r--r--fs/proc/vmcore.c669
-rw-r--r--fs/qnx4/dir.c2
-rw-r--r--fs/qnx4/inode.c4
-rw-r--r--fs/quota.c60
-rw-r--r--fs/read_write.c35
-rw-r--r--fs/reiserfs/bitmap.c1842
-rw-r--r--fs/reiserfs/dir.c488
-rw-r--r--fs/reiserfs/do_balan.c3236
-rw-r--r--fs/reiserfs/file.c2564
-rw-r--r--fs/reiserfs/fix_node.c4051
-rw-r--r--fs/reiserfs/hashes.c193
-rw-r--r--fs/reiserfs/ibalance.c1844
-rw-r--r--fs/reiserfs/inode.c4909
-rw-r--r--fs/reiserfs/ioctl.c197
-rw-r--r--fs/reiserfs/item_ops.c979
-rw-r--r--fs/reiserfs/journal.c6841
-rw-r--r--fs/reiserfs/lbalance.c2218
-rw-r--r--fs/reiserfs/namei.c2574
-rw-r--r--fs/reiserfs/objectid.c303
-rw-r--r--fs/reiserfs/prints.c1003
-rw-r--r--fs/reiserfs/procfs.c695
-rw-r--r--fs/reiserfs/resize.c207
-rw-r--r--fs/reiserfs/stree.c3369
-rw-r--r--fs/reiserfs/super.c3604
-rw-r--r--fs/reiserfs/tail_conversion.c463
-rw-r--r--fs/reiserfs/xattr.c2174
-rw-r--r--fs/reiserfs/xattr_acl.c647
-rw-r--r--fs/reiserfs/xattr_security.c54
-rw-r--r--fs/reiserfs/xattr_trusted.c70
-rw-r--r--fs/reiserfs/xattr_user.c89
-rw-r--r--fs/smbfs/symlink.c6
-rw-r--r--fs/super.c84
-rw-r--r--fs/sysfs/dir.c5
-rw-r--r--fs/sysfs/file.c35
-rw-r--r--fs/sysfs/group.c4
-rw-r--r--fs/sysfs/inode.c16
-rw-r--r--fs/sysfs/symlink.c6
-rw-r--r--fs/sysfs/sysfs.h1
-rw-r--r--fs/sysv/symlink.c4
-rw-r--r--fs/udf/namei.c6
-rw-r--r--fs/ufs/symlink.c4
-rw-r--r--fs/xattr.c5
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c10
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c2
241 files changed, 31310 insertions, 25591 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index a7c0cc3203cb..e54be7058359 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -29,6 +29,7 @@ config EXT2_FS_XATTR
29config EXT2_FS_POSIX_ACL 29config EXT2_FS_POSIX_ACL
30 bool "Ext2 POSIX Access Control Lists" 30 bool "Ext2 POSIX Access Control Lists"
31 depends on EXT2_FS_XATTR 31 depends on EXT2_FS_XATTR
32 select FS_POSIX_ACL
32 help 33 help
33 Posix Access Control Lists (ACLs) support permissions for users and 34 Posix Access Control Lists (ACLs) support permissions for users and
34 groups beyond the owner/group/world scheme. 35 groups beyond the owner/group/world scheme.
@@ -50,6 +51,23 @@ config EXT2_FS_SECURITY
50 If you are not using a security module that requires using 51 If you are not using a security module that requires using
51 extended attributes for file security labels, say N. 52 extended attributes for file security labels, say N.
52 53
54config EXT2_FS_XIP
55 bool "Ext2 execute in place support"
56 depends on EXT2_FS
57 help
58 Execute in place can be used on memory-backed block devices. If you
59 enable this option, you can select to mount block devices which are
60 capable of this feature without using the page cache.
61
62 If you do not use a block device that is capable of using this,
63 or if unsure, say N.
64
65config FS_XIP
66# execute in place
67 bool
68 depends on EXT2_FS_XIP
69 default y
70
53config EXT3_FS 71config EXT3_FS
54 tristate "Ext3 journalling file system support" 72 tristate "Ext3 journalling file system support"
55 help 73 help
@@ -97,6 +115,7 @@ config EXT3_FS_XATTR
97config EXT3_FS_POSIX_ACL 115config EXT3_FS_POSIX_ACL
98 bool "Ext3 POSIX Access Control Lists" 116 bool "Ext3 POSIX Access Control Lists"
99 depends on EXT3_FS_XATTR 117 depends on EXT3_FS_XATTR
118 select FS_POSIX_ACL
100 help 119 help
101 Posix Access Control Lists (ACLs) support permissions for users and 120 Posix Access Control Lists (ACLs) support permissions for users and
102 groups beyond the owner/group/world scheme. 121 groups beyond the owner/group/world scheme.
@@ -224,6 +243,7 @@ config REISERFS_FS_XATTR
224config REISERFS_FS_POSIX_ACL 243config REISERFS_FS_POSIX_ACL
225 bool "ReiserFS POSIX Access Control Lists" 244 bool "ReiserFS POSIX Access Control Lists"
226 depends on REISERFS_FS_XATTR 245 depends on REISERFS_FS_XATTR
246 select FS_POSIX_ACL
227 help 247 help
228 Posix Access Control Lists (ACLs) support permissions for users and 248 Posix Access Control Lists (ACLs) support permissions for users and
229 groups beyond the owner/group/world scheme. 249 groups beyond the owner/group/world scheme.
@@ -257,6 +277,7 @@ config JFS_FS
257config JFS_POSIX_ACL 277config JFS_POSIX_ACL
258 bool "JFS POSIX Access Control Lists" 278 bool "JFS POSIX Access Control Lists"
259 depends on JFS_FS 279 depends on JFS_FS
280 select FS_POSIX_ACL
260 help 281 help
261 Posix Access Control Lists (ACLs) support permissions for users and 282 Posix Access Control Lists (ACLs) support permissions for users and
262 groups beyond the owner/group/world scheme. 283 groups beyond the owner/group/world scheme.
@@ -301,8 +322,7 @@ config FS_POSIX_ACL
301# Never use this symbol for ifdefs. 322# Never use this symbol for ifdefs.
302# 323#
303 bool 324 bool
304 depends on EXT2_FS_POSIX_ACL || EXT3_FS_POSIX_ACL || JFS_POSIX_ACL || REISERFS_FS_POSIX_ACL || NFSD_V4 325 default n
305 default y
306 326
307source "fs/xfs/Kconfig" 327source "fs/xfs/Kconfig"
308 328
@@ -339,6 +359,22 @@ config ROMFS_FS
339 If you don't know whether you need it, then you don't need it: 359 If you don't know whether you need it, then you don't need it:
340 answer N. 360 answer N.
341 361
362config INOTIFY
363 bool "Inotify file change notification support"
364 default y
365 ---help---
366 Say Y here to enable inotify support and the associated system
367 calls. Inotify is a file change notification system and a
368 replacement for dnotify. Inotify fixes numerous shortcomings in
369 dnotify and introduces several new features. It allows monitoring
370 of both files and directories via a single open fd. Other features
371 include multiple file events, one-shot support, and unmount
372 notification.
373
374 For more information, see Documentation/filesystems/inotify.txt
375
376 If unsure, say Y.
377
342config QUOTA 378config QUOTA
343 bool "Quota support" 379 bool "Quota support"
344 help 380 help
@@ -717,6 +753,12 @@ config PROC_KCORE
717 bool "/proc/kcore support" if !ARM 753 bool "/proc/kcore support" if !ARM
718 depends on PROC_FS && MMU 754 depends on PROC_FS && MMU
719 755
756config PROC_VMCORE
757 bool "/proc/vmcore support (EXPERIMENTAL)"
758 depends on PROC_FS && EMBEDDED && EXPERIMENTAL && CRASH_DUMP
759 help
760 Exports the dump image of crashed kernel in ELF format.
761
720config SYSFS 762config SYSFS
721 bool "sysfs file system support" if EMBEDDED 763 bool "sysfs file system support" if EMBEDDED
722 default y 764 default y
@@ -1013,26 +1055,18 @@ config JFFS2_FS_DEBUG
1013 If reporting bugs, please try to have available a full dump of the 1055 If reporting bugs, please try to have available a full dump of the
1014 messages at debug level 1 while the misbehaviour was occurring. 1056 messages at debug level 1 while the misbehaviour was occurring.
1015 1057
1016config JFFS2_FS_NAND 1058config JFFS2_FS_WRITEBUFFER
1017 bool "JFFS2 support for NAND flash" 1059 bool "JFFS2 write-buffering support"
1018 depends on JFFS2_FS 1060 depends on JFFS2_FS
1019 default n 1061 default y
1020 help 1062 help
1021 This enables the support for NAND flash in JFFS2. NAND is a newer 1063 This enables the write-buffering support in JFFS2.
1022 type of flash chip design than the traditional NOR flash, with
1023 higher density but a handful of characteristics which make it more
1024 interesting for the file system to use.
1025 1064
1026 Say 'N' unless you have NAND flash. 1065 This functionality is required to support JFFS2 on the following
1027 1066 types of flash devices:
1028config JFFS2_FS_NOR_ECC 1067 - NAND flash
1029 bool "JFFS2 support for ECC'd NOR flash (EXPERIMENTAL)" 1068 - NOR flash with transparent ECC
1030 depends on JFFS2_FS && EXPERIMENTAL 1069 - DataFlash
1031 default n
1032 help
1033 This enables the experimental support for NOR flash with transparent
1034 ECC for JFFS2. This type of flash chip is not common, however it is
1035 available from ST Microelectronics.
1036 1070
1037config JFFS2_COMPRESSION_OPTIONS 1071config JFFS2_COMPRESSION_OPTIONS
1038 bool "Advanced compression options for JFFS2" 1072 bool "Advanced compression options for JFFS2"
@@ -1413,6 +1447,9 @@ config NFSD_V4
1413 bool "Provide NFSv4 server support (EXPERIMENTAL)" 1447 bool "Provide NFSv4 server support (EXPERIMENTAL)"
1414 depends on NFSD_V3 && EXPERIMENTAL 1448 depends on NFSD_V3 && EXPERIMENTAL
1415 select NFSD_TCP 1449 select NFSD_TCP
1450 select CRYPTO_MD5
1451 select CRYPTO
1452 select FS_POSIX_ACL
1416 help 1453 help
1417 If you would like to include the NFSv4 server as well as the NFSv2 1454 If you would like to include the NFSv4 server as well as the NFSv2
1418 and NFSv3 servers, say Y here. This feature is experimental, and 1455 and NFSv3 servers, say Y here. This feature is experimental, and
diff --git a/fs/Makefile b/fs/Makefile
index fc92e59e9faf..cf95eb894fd5 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -10,7 +10,9 @@ obj-y := open.o read_write.o file_table.o buffer.o bio.o super.o \
10 ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \ 10 ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \
11 attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \ 11 attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \
12 seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \ 12 seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \
13 ioprio.o
13 14
15obj-$(CONFIG_INOTIFY) += inotify.o
14obj-$(CONFIG_EPOLL) += eventpoll.o 16obj-$(CONFIG_EPOLL) += eventpoll.o
15obj-$(CONFIG_COMPAT) += compat.o 17obj-$(CONFIG_COMPAT) += compat.o
16 18
diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h
index 63f5df9afb71..fd528433de43 100644
--- a/fs/adfs/adfs.h
+++ b/fs/adfs/adfs.h
@@ -97,7 +97,7 @@ extern int adfs_dir_update(struct super_block *sb, struct object_info *obj);
97extern struct inode_operations adfs_file_inode_operations; 97extern struct inode_operations adfs_file_inode_operations;
98extern struct file_operations adfs_file_operations; 98extern struct file_operations adfs_file_operations;
99 99
100extern inline __u32 signed_asl(__u32 val, signed int shift) 100static inline __u32 signed_asl(__u32 val, signed int shift)
101{ 101{
102 if (shift >= 0) 102 if (shift >= 0)
103 val <<= shift; 103 val <<= shift;
@@ -112,7 +112,7 @@ extern inline __u32 signed_asl(__u32 val, signed int shift)
112 * 112 *
113 * The root directory ID should always be looked up in the map [3.4] 113 * The root directory ID should always be looked up in the map [3.4]
114 */ 114 */
115extern inline int 115static inline int
116__adfs_block_map(struct super_block *sb, unsigned int object_id, 116__adfs_block_map(struct super_block *sb, unsigned int object_id,
117 unsigned int block) 117 unsigned int block)
118{ 118{
diff --git a/fs/afs/kafsasyncd.c b/fs/afs/kafsasyncd.c
index 6fc88ae8ad94..7ac07d0d47b9 100644
--- a/fs/afs/kafsasyncd.c
+++ b/fs/afs/kafsasyncd.c
@@ -116,7 +116,7 @@ static int kafsasyncd(void *arg)
116 remove_wait_queue(&kafsasyncd_sleepq, &myself); 116 remove_wait_queue(&kafsasyncd_sleepq, &myself);
117 set_current_state(TASK_RUNNING); 117 set_current_state(TASK_RUNNING);
118 118
119 try_to_freeze(PF_FREEZE); 119 try_to_freeze();
120 120
121 /* discard pending signals */ 121 /* discard pending signals */
122 afs_discard_my_signals(); 122 afs_discard_my_signals();
diff --git a/fs/afs/kafstimod.c b/fs/afs/kafstimod.c
index 86e710dd057e..65bc05ab8182 100644
--- a/fs/afs/kafstimod.c
+++ b/fs/afs/kafstimod.c
@@ -91,7 +91,7 @@ static int kafstimod(void *arg)
91 complete_and_exit(&kafstimod_dead, 0); 91 complete_and_exit(&kafstimod_dead, 0);
92 } 92 }
93 93
94 try_to_freeze(PF_FREEZE); 94 try_to_freeze();
95 95
96 /* discard pending signals */ 96 /* discard pending signals */
97 afs_discard_my_signals(); 97 afs_discard_my_signals();
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index bfc28abe1cb1..31ee06590de5 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -30,7 +30,7 @@ static struct dentry *afs_mntpt_lookup(struct inode *dir,
30 struct dentry *dentry, 30 struct dentry *dentry,
31 struct nameidata *nd); 31 struct nameidata *nd);
32static int afs_mntpt_open(struct inode *inode, struct file *file); 32static int afs_mntpt_open(struct inode *inode, struct file *file);
33static int afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd); 33static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd);
34 34
35struct file_operations afs_mntpt_file_operations = { 35struct file_operations afs_mntpt_file_operations = {
36 .open = afs_mntpt_open, 36 .open = afs_mntpt_open,
@@ -233,7 +233,7 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
233/* 233/*
234 * follow a link from a mountpoint directory, thus causing it to be mounted 234 * follow a link from a mountpoint directory, thus causing it to be mounted
235 */ 235 */
236static int afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd) 236static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd)
237{ 237{
238 struct vfsmount *newmnt; 238 struct vfsmount *newmnt;
239 struct dentry *old_dentry; 239 struct dentry *old_dentry;
@@ -249,7 +249,7 @@ static int afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd)
249 newmnt = afs_mntpt_do_automount(dentry); 249 newmnt = afs_mntpt_do_automount(dentry);
250 if (IS_ERR(newmnt)) { 250 if (IS_ERR(newmnt)) {
251 path_release(nd); 251 path_release(nd);
252 return PTR_ERR(newmnt); 252 return (void *)newmnt;
253 } 253 }
254 254
255 old_dentry = nd->dentry; 255 old_dentry = nd->dentry;
@@ -267,7 +267,7 @@ static int afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd)
267 } 267 }
268 268
269 kleave(" = %d", err); 269 kleave(" = %d", err);
270 return err; 270 return ERR_PTR(err);
271} /* end afs_mntpt_follow_link() */ 271} /* end afs_mntpt_follow_link() */
272 272
273/*****************************************************************************/ 273/*****************************************************************************/
diff --git a/fs/aio.c b/fs/aio.c
index 7afa222f6802..06d7d4390fe7 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -58,6 +58,7 @@ static DEFINE_SPINLOCK(fput_lock);
58static LIST_HEAD(fput_head); 58static LIST_HEAD(fput_head);
59 59
60static void aio_kick_handler(void *); 60static void aio_kick_handler(void *);
61static void aio_queue_work(struct kioctx *);
61 62
62/* aio_setup 63/* aio_setup
63 * Creates the slab caches used by the aio routines, panic on 64 * Creates the slab caches used by the aio routines, panic on
@@ -747,6 +748,14 @@ out:
747 * has already been kicked */ 748 * has already been kicked */
748 if (kiocbIsKicked(iocb)) { 749 if (kiocbIsKicked(iocb)) {
749 __queue_kicked_iocb(iocb); 750 __queue_kicked_iocb(iocb);
751
752 /*
753 * __queue_kicked_iocb will always return 1 here, because
754 * iocb->ki_run_list is empty at this point so it should
755 * be safe to unconditionally queue the context into the
756 * work queue.
757 */
758 aio_queue_work(ctx);
750 } 759 }
751 } 760 }
752 return ret; 761 return ret;
diff --git a/fs/attr.c b/fs/attr.c
index c3c76fe78346..b1796fb9e524 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -10,7 +10,7 @@
10#include <linux/mm.h> 10#include <linux/mm.h>
11#include <linux/string.h> 11#include <linux/string.h>
12#include <linux/smp_lock.h> 12#include <linux/smp_lock.h>
13#include <linux/dnotify.h> 13#include <linux/fsnotify.h>
14#include <linux/fcntl.h> 14#include <linux/fcntl.h>
15#include <linux/quotaops.h> 15#include <linux/quotaops.h>
16#include <linux/security.h> 16#include <linux/security.h>
@@ -107,31 +107,8 @@ int inode_setattr(struct inode * inode, struct iattr * attr)
107out: 107out:
108 return error; 108 return error;
109} 109}
110
111EXPORT_SYMBOL(inode_setattr); 110EXPORT_SYMBOL(inode_setattr);
112 111
113int setattr_mask(unsigned int ia_valid)
114{
115 unsigned long dn_mask = 0;
116
117 if (ia_valid & ATTR_UID)
118 dn_mask |= DN_ATTRIB;
119 if (ia_valid & ATTR_GID)
120 dn_mask |= DN_ATTRIB;
121 if (ia_valid & ATTR_SIZE)
122 dn_mask |= DN_MODIFY;
123 /* both times implies a utime(s) call */
124 if ((ia_valid & (ATTR_ATIME|ATTR_MTIME)) == (ATTR_ATIME|ATTR_MTIME))
125 dn_mask |= DN_ATTRIB;
126 else if (ia_valid & ATTR_ATIME)
127 dn_mask |= DN_ACCESS;
128 else if (ia_valid & ATTR_MTIME)
129 dn_mask |= DN_MODIFY;
130 if (ia_valid & ATTR_MODE)
131 dn_mask |= DN_ATTRIB;
132 return dn_mask;
133}
134
135int notify_change(struct dentry * dentry, struct iattr * attr) 112int notify_change(struct dentry * dentry, struct iattr * attr)
136{ 113{
137 struct inode *inode = dentry->d_inode; 114 struct inode *inode = dentry->d_inode;
@@ -197,11 +174,9 @@ int notify_change(struct dentry * dentry, struct iattr * attr)
197 if (ia_valid & ATTR_SIZE) 174 if (ia_valid & ATTR_SIZE)
198 up_write(&dentry->d_inode->i_alloc_sem); 175 up_write(&dentry->d_inode->i_alloc_sem);
199 176
200 if (!error) { 177 if (!error)
201 unsigned long dn_mask = setattr_mask(ia_valid); 178 fsnotify_change(dentry, ia_valid);
202 if (dn_mask) 179
203 dnotify_parent(dentry, dn_mask);
204 }
205 return error; 180 return error;
206} 181}
207 182
diff --git a/fs/autofs/symlink.c b/fs/autofs/symlink.c
index f028396f1383..52e8772b066e 100644
--- a/fs/autofs/symlink.c
+++ b/fs/autofs/symlink.c
@@ -12,11 +12,12 @@
12 12
13#include "autofs_i.h" 13#include "autofs_i.h"
14 14
15static int autofs_follow_link(struct dentry *dentry, struct nameidata *nd) 15/* Nothing to release.. */
16static void *autofs_follow_link(struct dentry *dentry, struct nameidata *nd)
16{ 17{
17 char *s=((struct autofs_symlink *)dentry->d_inode->u.generic_ip)->data; 18 char *s=((struct autofs_symlink *)dentry->d_inode->u.generic_ip)->data;
18 nd_set_link(nd, s); 19 nd_set_link(nd, s);
19 return 0; 20 return NULL;
20} 21}
21 22
22struct inode_operations autofs_symlink_inode_operations = { 23struct inode_operations autofs_symlink_inode_operations = {
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 9c09641ce907..fca83e28edcf 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -92,6 +92,7 @@ struct autofs_wait_queue {
92 92
93struct autofs_sb_info { 93struct autofs_sb_info {
94 u32 magic; 94 u32 magic;
95 struct dentry *root;
95 struct file *pipe; 96 struct file *pipe;
96 pid_t oz_pgrp; 97 pid_t oz_pgrp;
97 int catatonic; 98 int catatonic;
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 4bb14cc68040..0a3c05d10167 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -16,6 +16,7 @@
16#include <linux/pagemap.h> 16#include <linux/pagemap.h>
17#include <linux/parser.h> 17#include <linux/parser.h>
18#include <linux/bitops.h> 18#include <linux/bitops.h>
19#include <linux/smp_lock.h>
19#include "autofs_i.h" 20#include "autofs_i.h"
20#include <linux/module.h> 21#include <linux/module.h>
21 22
@@ -76,6 +77,66 @@ void autofs4_free_ino(struct autofs_info *ino)
76 kfree(ino); 77 kfree(ino);
77} 78}
78 79
80/*
81 * Deal with the infamous "Busy inodes after umount ..." message.
82 *
83 * Clean up the dentry tree. This happens with autofs if the user
84 * space program goes away due to a SIGKILL, SIGSEGV etc.
85 */
86static void autofs4_force_release(struct autofs_sb_info *sbi)
87{
88 struct dentry *this_parent = sbi->root;
89 struct list_head *next;
90
91 spin_lock(&dcache_lock);
92repeat:
93 next = this_parent->d_subdirs.next;
94resume:
95 while (next != &this_parent->d_subdirs) {
96 struct dentry *dentry = list_entry(next, struct dentry, d_child);
97
98 /* Negative dentry - don`t care */
99 if (!simple_positive(dentry)) {
100 next = next->next;
101 continue;
102 }
103
104 if (!list_empty(&dentry->d_subdirs)) {
105 this_parent = dentry;
106 goto repeat;
107 }
108
109 next = next->next;
110 spin_unlock(&dcache_lock);
111
112 DPRINTK("dentry %p %.*s",
113 dentry, (int)dentry->d_name.len, dentry->d_name.name);
114
115 dput(dentry);
116 spin_lock(&dcache_lock);
117 }
118
119 if (this_parent != sbi->root) {
120 struct dentry *dentry = this_parent;
121
122 next = this_parent->d_child.next;
123 this_parent = this_parent->d_parent;
124 spin_unlock(&dcache_lock);
125 DPRINTK("parent dentry %p %.*s",
126 dentry, (int)dentry->d_name.len, dentry->d_name.name);
127 dput(dentry);
128 spin_lock(&dcache_lock);
129 goto resume;
130 }
131 spin_unlock(&dcache_lock);
132
133 dput(sbi->root);
134 sbi->root = NULL;
135 shrink_dcache_sb(sbi->sb);
136
137 return;
138}
139
79static void autofs4_put_super(struct super_block *sb) 140static void autofs4_put_super(struct super_block *sb)
80{ 141{
81 struct autofs_sb_info *sbi = autofs4_sbi(sb); 142 struct autofs_sb_info *sbi = autofs4_sbi(sb);
@@ -85,6 +146,10 @@ static void autofs4_put_super(struct super_block *sb)
85 if ( !sbi->catatonic ) 146 if ( !sbi->catatonic )
86 autofs4_catatonic_mode(sbi); /* Free wait queues, close pipe */ 147 autofs4_catatonic_mode(sbi); /* Free wait queues, close pipe */
87 148
149 /* Clean up and release dangling references */
150 if (sbi)
151 autofs4_force_release(sbi);
152
88 kfree(sbi); 153 kfree(sbi);
89 154
90 DPRINTK("shutting down"); 155 DPRINTK("shutting down");
@@ -199,6 +264,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
199 264
200 s->s_fs_info = sbi; 265 s->s_fs_info = sbi;
201 sbi->magic = AUTOFS_SBI_MAGIC; 266 sbi->magic = AUTOFS_SBI_MAGIC;
267 sbi->root = NULL;
202 sbi->catatonic = 0; 268 sbi->catatonic = 0;
203 sbi->exp_timeout = 0; 269 sbi->exp_timeout = 0;
204 sbi->oz_pgrp = process_group(current); 270 sbi->oz_pgrp = process_group(current);
@@ -267,6 +333,13 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
267 sbi->pipe = pipe; 333 sbi->pipe = pipe;
268 334
269 /* 335 /*
336 * Take a reference to the root dentry so we get a chance to
337 * clean up the dentry tree on umount.
338 * See autofs4_force_release.
339 */
340 sbi->root = dget(root);
341
342 /*
270 * Success! Install the root dentry now to indicate completion. 343 * Success! Install the root dentry now to indicate completion.
271 */ 344 */
272 s->s_root = root; 345 s->s_root = root;
diff --git a/fs/autofs4/symlink.c b/fs/autofs4/symlink.c
index c265a66edf0f..2ea2c98fd84b 100644
--- a/fs/autofs4/symlink.c
+++ b/fs/autofs4/symlink.c
@@ -12,11 +12,11 @@
12 12
13#include "autofs_i.h" 13#include "autofs_i.h"
14 14
15static int autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) 15static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
16{ 16{
17 struct autofs_info *ino = autofs4_dentry_ino(dentry); 17 struct autofs_info *ino = autofs4_dentry_ino(dentry);
18 nd_set_link(nd, (char *)ino->u.symlink); 18 nd_set_link(nd, (char *)ino->u.symlink);
19 return 0; 19 return NULL;
20} 20}
21 21
22struct inode_operations autofs4_symlink_inode_operations = { 22struct inode_operations autofs4_symlink_inode_operations = {
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index fa2348dcd671..3df86285a1c7 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -231,8 +231,8 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
231 int type = (notify == NFY_MOUNT ? 231 int type = (notify == NFY_MOUNT ?
232 autofs_ptype_missing : autofs_ptype_expire_multi); 232 autofs_ptype_missing : autofs_ptype_expire_multi);
233 233
234 DPRINTK(("new wait id = 0x%08lx, name = %.*s, nfy=%d\n", 234 DPRINTK("new wait id = 0x%08lx, name = %.*s, nfy=%d\n",
235 (unsigned long) wq->wait_queue_token, wq->len, wq->name, notify)); 235 (unsigned long) wq->wait_queue_token, wq->len, wq->name, notify);
236 236
237 /* autofs4_notify_daemon() may block */ 237 /* autofs4_notify_daemon() may block */
238 autofs4_notify_daemon(sbi, wq, type); 238 autofs4_notify_daemon(sbi, wq, type);
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index de5bb280a828..e0a6025f1d06 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -41,8 +41,8 @@ static struct inode *befs_alloc_inode(struct super_block *sb);
41static void befs_destroy_inode(struct inode *inode); 41static void befs_destroy_inode(struct inode *inode);
42static int befs_init_inodecache(void); 42static int befs_init_inodecache(void);
43static void befs_destroy_inodecache(void); 43static void befs_destroy_inodecache(void);
44static int befs_follow_link(struct dentry *, struct nameidata *); 44static void *befs_follow_link(struct dentry *, struct nameidata *);
45static void befs_put_link(struct dentry *, struct nameidata *); 45static void befs_put_link(struct dentry *, struct nameidata *, void *);
46static int befs_utf2nls(struct super_block *sb, const char *in, int in_len, 46static int befs_utf2nls(struct super_block *sb, const char *in, int in_len,
47 char **out, int *out_len); 47 char **out, int *out_len);
48static int befs_nls2utf(struct super_block *sb, const char *in, int in_len, 48static int befs_nls2utf(struct super_block *sb, const char *in, int in_len,
@@ -461,7 +461,7 @@ befs_destroy_inodecache(void)
461 * The data stream become link name. Unless the LONG_SYMLINK 461 * The data stream become link name. Unless the LONG_SYMLINK
462 * flag is set. 462 * flag is set.
463 */ 463 */
464static int 464static void *
465befs_follow_link(struct dentry *dentry, struct nameidata *nd) 465befs_follow_link(struct dentry *dentry, struct nameidata *nd)
466{ 466{
467 befs_inode_info *befs_ino = BEFS_I(dentry->d_inode); 467 befs_inode_info *befs_ino = BEFS_I(dentry->d_inode);
@@ -487,10 +487,10 @@ befs_follow_link(struct dentry *dentry, struct nameidata *nd)
487 } 487 }
488 488
489 nd_set_link(nd, link); 489 nd_set_link(nd, link);
490 return 0; 490 return NULL;
491} 491}
492 492
493static void befs_put_link(struct dentry *dentry, struct nameidata *nd) 493static void befs_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
494{ 494{
495 befs_inode_info *befs_ino = BEFS_I(dentry->d_inode); 495 befs_inode_info *befs_ino = BEFS_I(dentry->d_inode);
496 if (befs_ino->i_flags & BEFS_LONG_SYMLINK) { 496 if (befs_ino->i_flags & BEFS_LONG_SYMLINK) {
diff --git a/fs/bio.c b/fs/bio.c
index 3a1472acc361..1f2d4649b188 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -52,7 +52,7 @@ struct biovec_slab {
52 */ 52 */
53 53
54#define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) } 54#define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) }
55static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] = { 55static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
56 BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES), 56 BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES),
57}; 57};
58#undef BV 58#undef BV
@@ -248,19 +248,16 @@ inline void __bio_clone(struct bio *bio, struct bio *bio_src)
248{ 248{
249 request_queue_t *q = bdev_get_queue(bio_src->bi_bdev); 249 request_queue_t *q = bdev_get_queue(bio_src->bi_bdev);
250 250
251 memcpy(bio->bi_io_vec, bio_src->bi_io_vec, bio_src->bi_max_vecs * sizeof(struct bio_vec)); 251 memcpy(bio->bi_io_vec, bio_src->bi_io_vec,
252 bio_src->bi_max_vecs * sizeof(struct bio_vec));
252 253
253 bio->bi_sector = bio_src->bi_sector; 254 bio->bi_sector = bio_src->bi_sector;
254 bio->bi_bdev = bio_src->bi_bdev; 255 bio->bi_bdev = bio_src->bi_bdev;
255 bio->bi_flags |= 1 << BIO_CLONED; 256 bio->bi_flags |= 1 << BIO_CLONED;
256 bio->bi_rw = bio_src->bi_rw; 257 bio->bi_rw = bio_src->bi_rw;
257
258 /*
259 * notes -- maybe just leave bi_idx alone. assume identical mapping
260 * for the clone
261 */
262 bio->bi_vcnt = bio_src->bi_vcnt; 258 bio->bi_vcnt = bio_src->bi_vcnt;
263 bio->bi_size = bio_src->bi_size; 259 bio->bi_size = bio_src->bi_size;
260 bio->bi_idx = bio_src->bi_idx;
264 bio_phys_segments(q, bio); 261 bio_phys_segments(q, bio);
265 bio_hw_segments(q, bio); 262 bio_hw_segments(q, bio);
266} 263}
diff --git a/fs/block_dev.c b/fs/block_dev.c
index c0cbd1bc1a02..e0df94c37b7e 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -777,8 +777,7 @@ static ssize_t blkdev_file_aio_write(struct kiocb *iocb, const char __user *buf,
777 return generic_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos); 777 return generic_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos);
778} 778}
779 779
780static int block_ioctl(struct inode *inode, struct file *file, unsigned cmd, 780static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
781 unsigned long arg)
782{ 781{
783 return blkdev_ioctl(file->f_mapping->host, file, cmd, arg); 782 return blkdev_ioctl(file->f_mapping->host, file, cmd, arg);
784} 783}
@@ -803,7 +802,7 @@ struct file_operations def_blk_fops = {
803 .aio_write = blkdev_file_aio_write, 802 .aio_write = blkdev_file_aio_write,
804 .mmap = generic_file_mmap, 803 .mmap = generic_file_mmap,
805 .fsync = block_fsync, 804 .fsync = block_fsync,
806 .ioctl = block_ioctl, 805 .unlocked_ioctl = block_ioctl,
807#ifdef CONFIG_COMPAT 806#ifdef CONFIG_COMPAT
808 .compat_ioctl = compat_blkdev_ioctl, 807 .compat_ioctl = compat_blkdev_ioctl,
809#endif 808#endif
diff --git a/fs/buffer.c b/fs/buffer.c
index 0befa724ab98..6a25d7df89b1 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -278,7 +278,7 @@ EXPORT_SYMBOL(thaw_bdev);
278 */ 278 */
279static void do_sync(unsigned long wait) 279static void do_sync(unsigned long wait)
280{ 280{
281 wakeup_bdflush(0); 281 wakeup_pdflush(0);
282 sync_inodes(0); /* All mappings, inodes and their blockdevs */ 282 sync_inodes(0); /* All mappings, inodes and their blockdevs */
283 DQUOT_SYNC(NULL); 283 DQUOT_SYNC(NULL);
284 sync_supers(); /* Write the superblocks */ 284 sync_supers(); /* Write the superblocks */
@@ -331,7 +331,7 @@ int file_fsync(struct file *filp, struct dentry *dentry, int datasync)
331 return ret; 331 return ret;
332} 332}
333 333
334asmlinkage long sys_fsync(unsigned int fd) 334static long do_fsync(unsigned int fd, int datasync)
335{ 335{
336 struct file * file; 336 struct file * file;
337 struct address_space *mapping; 337 struct address_space *mapping;
@@ -342,14 +342,14 @@ asmlinkage long sys_fsync(unsigned int fd)
342 if (!file) 342 if (!file)
343 goto out; 343 goto out;
344 344
345 mapping = file->f_mapping;
346
347 ret = -EINVAL; 345 ret = -EINVAL;
348 if (!file->f_op || !file->f_op->fsync) { 346 if (!file->f_op || !file->f_op->fsync) {
349 /* Why? We can still call filemap_fdatawrite */ 347 /* Why? We can still call filemap_fdatawrite */
350 goto out_putf; 348 goto out_putf;
351 } 349 }
352 350
351 mapping = file->f_mapping;
352
353 current->flags |= PF_SYNCWRITE; 353 current->flags |= PF_SYNCWRITE;
354 ret = filemap_fdatawrite(mapping); 354 ret = filemap_fdatawrite(mapping);
355 355
@@ -358,7 +358,7 @@ asmlinkage long sys_fsync(unsigned int fd)
358 * which could cause livelocks in fsync_buffers_list 358 * which could cause livelocks in fsync_buffers_list
359 */ 359 */
360 down(&mapping->host->i_sem); 360 down(&mapping->host->i_sem);
361 err = file->f_op->fsync(file, file->f_dentry, 0); 361 err = file->f_op->fsync(file, file->f_dentry, datasync);
362 if (!ret) 362 if (!ret)
363 ret = err; 363 ret = err;
364 up(&mapping->host->i_sem); 364 up(&mapping->host->i_sem);
@@ -373,39 +373,14 @@ out:
373 return ret; 373 return ret;
374} 374}
375 375
376asmlinkage long sys_fdatasync(unsigned int fd) 376asmlinkage long sys_fsync(unsigned int fd)
377{ 377{
378 struct file * file; 378 return do_fsync(fd, 0);
379 struct address_space *mapping; 379}
380 int ret, err;
381
382 ret = -EBADF;
383 file = fget(fd);
384 if (!file)
385 goto out;
386
387 ret = -EINVAL;
388 if (!file->f_op || !file->f_op->fsync)
389 goto out_putf;
390
391 mapping = file->f_mapping;
392
393 current->flags |= PF_SYNCWRITE;
394 ret = filemap_fdatawrite(mapping);
395 down(&mapping->host->i_sem);
396 err = file->f_op->fsync(file, file->f_dentry, 1);
397 if (!ret)
398 ret = err;
399 up(&mapping->host->i_sem);
400 err = filemap_fdatawait(mapping);
401 if (!ret)
402 ret = err;
403 current->flags &= ~PF_SYNCWRITE;
404 380
405out_putf: 381asmlinkage long sys_fdatasync(unsigned int fd)
406 fput(file); 382{
407out: 383 return do_fsync(fd, 1);
408 return ret;
409} 384}
410 385
411/* 386/*
@@ -522,7 +497,7 @@ static void free_more_memory(void)
522 struct zone **zones; 497 struct zone **zones;
523 pg_data_t *pgdat; 498 pg_data_t *pgdat;
524 499
525 wakeup_bdflush(1024); 500 wakeup_pdflush(1024);
526 yield(); 501 yield();
527 502
528 for_each_pgdat(pgdat) { 503 for_each_pgdat(pgdat) {
@@ -538,8 +513,8 @@ static void free_more_memory(void)
538 */ 513 */
539static void end_buffer_async_read(struct buffer_head *bh, int uptodate) 514static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
540{ 515{
541 static DEFINE_SPINLOCK(page_uptodate_lock);
542 unsigned long flags; 516 unsigned long flags;
517 struct buffer_head *first;
543 struct buffer_head *tmp; 518 struct buffer_head *tmp;
544 struct page *page; 519 struct page *page;
545 int page_uptodate = 1; 520 int page_uptodate = 1;
@@ -561,7 +536,9 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
561 * two buffer heads end IO at almost the same time and both 536 * two buffer heads end IO at almost the same time and both
562 * decide that the page is now completely done. 537 * decide that the page is now completely done.
563 */ 538 */
564 spin_lock_irqsave(&page_uptodate_lock, flags); 539 first = page_buffers(page);
540 local_irq_save(flags);
541 bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
565 clear_buffer_async_read(bh); 542 clear_buffer_async_read(bh);
566 unlock_buffer(bh); 543 unlock_buffer(bh);
567 tmp = bh; 544 tmp = bh;
@@ -574,7 +551,8 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
574 } 551 }
575 tmp = tmp->b_this_page; 552 tmp = tmp->b_this_page;
576 } while (tmp != bh); 553 } while (tmp != bh);
577 spin_unlock_irqrestore(&page_uptodate_lock, flags); 554 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
555 local_irq_restore(flags);
578 556
579 /* 557 /*
580 * If none of the buffers had errors and they are all 558 * If none of the buffers had errors and they are all
@@ -586,7 +564,8 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
586 return; 564 return;
587 565
588still_busy: 566still_busy:
589 spin_unlock_irqrestore(&page_uptodate_lock, flags); 567 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
568 local_irq_restore(flags);
590 return; 569 return;
591} 570}
592 571
@@ -597,8 +576,8 @@ still_busy:
597void end_buffer_async_write(struct buffer_head *bh, int uptodate) 576void end_buffer_async_write(struct buffer_head *bh, int uptodate)
598{ 577{
599 char b[BDEVNAME_SIZE]; 578 char b[BDEVNAME_SIZE];
600 static DEFINE_SPINLOCK(page_uptodate_lock);
601 unsigned long flags; 579 unsigned long flags;
580 struct buffer_head *first;
602 struct buffer_head *tmp; 581 struct buffer_head *tmp;
603 struct page *page; 582 struct page *page;
604 583
@@ -619,7 +598,10 @@ void end_buffer_async_write(struct buffer_head *bh, int uptodate)
619 SetPageError(page); 598 SetPageError(page);
620 } 599 }
621 600
622 spin_lock_irqsave(&page_uptodate_lock, flags); 601 first = page_buffers(page);
602 local_irq_save(flags);
603 bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
604
623 clear_buffer_async_write(bh); 605 clear_buffer_async_write(bh);
624 unlock_buffer(bh); 606 unlock_buffer(bh);
625 tmp = bh->b_this_page; 607 tmp = bh->b_this_page;
@@ -630,12 +612,14 @@ void end_buffer_async_write(struct buffer_head *bh, int uptodate)
630 } 612 }
631 tmp = tmp->b_this_page; 613 tmp = tmp->b_this_page;
632 } 614 }
633 spin_unlock_irqrestore(&page_uptodate_lock, flags); 615 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
616 local_irq_restore(flags);
634 end_page_writeback(page); 617 end_page_writeback(page);
635 return; 618 return;
636 619
637still_busy: 620still_busy:
638 spin_unlock_irqrestore(&page_uptodate_lock, flags); 621 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
622 local_irq_restore(flags);
639 return; 623 return;
640} 624}
641 625
@@ -1951,7 +1935,6 @@ static int __block_prepare_write(struct inode *inode, struct page *page,
1951 if (err) 1935 if (err)
1952 break; 1936 break;
1953 if (buffer_new(bh)) { 1937 if (buffer_new(bh)) {
1954 clear_buffer_new(bh);
1955 unmap_underlying_metadata(bh->b_bdev, 1938 unmap_underlying_metadata(bh->b_bdev,
1956 bh->b_blocknr); 1939 bh->b_blocknr);
1957 if (PageUptodate(page)) { 1940 if (PageUptodate(page)) {
@@ -1993,9 +1976,14 @@ static int __block_prepare_write(struct inode *inode, struct page *page,
1993 if (!buffer_uptodate(*wait_bh)) 1976 if (!buffer_uptodate(*wait_bh))
1994 err = -EIO; 1977 err = -EIO;
1995 } 1978 }
1996 if (!err) 1979 if (!err) {
1997 return err; 1980 bh = head;
1998 1981 do {
1982 if (buffer_new(bh))
1983 clear_buffer_new(bh);
1984 } while ((bh = bh->b_this_page) != head);
1985 return 0;
1986 }
1999 /* Error case: */ 1987 /* Error case: */
2000 /* 1988 /*
2001 * Zero out any newly allocated blocks to avoid exposing stale 1989 * Zero out any newly allocated blocks to avoid exposing stale
diff --git a/fs/char_dev.c b/fs/char_dev.c
index c1e3537909fc..3b1b1eefdbb0 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -56,10 +56,21 @@ int get_chrdev_list(char *page)
56 56
57 down(&chrdevs_lock); 57 down(&chrdevs_lock);
58 for (i = 0; i < ARRAY_SIZE(chrdevs) ; i++) { 58 for (i = 0; i < ARRAY_SIZE(chrdevs) ; i++) {
59 for (cd = chrdevs[i]; cd; cd = cd->next) 59 for (cd = chrdevs[i]; cd; cd = cd->next) {
60 /*
61 * if the current name, plus the 5 extra characters
62 * in the device line for this entry
63 * would run us off the page, we're done
64 */
65 if ((len+strlen(cd->name) + 5) >= PAGE_SIZE)
66 goto page_full;
67
68
60 len += sprintf(page+len, "%3d %s\n", 69 len += sprintf(page+len, "%3d %s\n",
61 cd->major, cd->name); 70 cd->major, cd->name);
71 }
62 } 72 }
73page_full:
63 up(&chrdevs_lock); 74 up(&chrdevs_lock);
64 75
65 return len; 76 return len;
@@ -139,7 +150,7 @@ __unregister_chrdev_region(unsigned major, unsigned baseminor, int minorct)
139 struct char_device_struct *cd = NULL, **cp; 150 struct char_device_struct *cd = NULL, **cp;
140 int i = major_to_index(major); 151 int i = major_to_index(major);
141 152
142 up(&chrdevs_lock); 153 down(&chrdevs_lock);
143 for (cp = &chrdevs[i]; *cp; cp = &(*cp)->next) 154 for (cp = &chrdevs[i]; *cp; cp = &(*cp)->next)
144 if ((*cp)->major == major && 155 if ((*cp)->major == major &&
145 (*cp)->baseminor == baseminor && 156 (*cp)->baseminor == baseminor &&
@@ -266,8 +277,9 @@ static struct kobject *cdev_get(struct cdev *p)
266void cdev_put(struct cdev *p) 277void cdev_put(struct cdev *p)
267{ 278{
268 if (p) { 279 if (p) {
280 struct module *owner = p->owner;
269 kobject_put(&p->kobj); 281 kobject_put(&p->kobj);
270 module_put(p->owner); 282 module_put(owner);
271 } 283 }
272} 284}
273 285
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index dab4774ee7bb..3196d4c4eed3 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,3 +1,9 @@
1Version 1.35
2------------
3Add writepage performance improvements. Fix path name conversions
4for long filenames on mounts which were done with "mapchars" mount option
5specified.
6
1Version 1.34 7Version 1.34
2------------ 8------------
3Fix error mapping of the TOO_MANY_LINKS (hardlinks) case. 9Fix error mapping of the TOO_MANY_LINKS (hardlinks) case.
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 78af5850c558..1fd21f66f243 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -83,8 +83,8 @@ extern int cifs_dir_notify(struct file *, unsigned long arg);
83extern struct dentry_operations cifs_dentry_ops; 83extern struct dentry_operations cifs_dentry_ops;
84 84
85/* Functions related to symlinks */ 85/* Functions related to symlinks */
86extern int cifs_follow_link(struct dentry *direntry, struct nameidata *nd); 86extern void *cifs_follow_link(struct dentry *direntry, struct nameidata *nd);
87extern void cifs_put_link(struct dentry *direntry, struct nameidata *nd); 87extern void cifs_put_link(struct dentry *direntry, struct nameidata *nd, void *);
88extern int cifs_readlink(struct dentry *direntry, char __user *buffer, 88extern int cifs_readlink(struct dentry *direntry, char __user *buffer,
89 int buflen); 89 int buflen);
90extern int cifs_symlink(struct inode *inode, struct dentry *direntry, 90extern int cifs_symlink(struct inode *inode, struct dentry *direntry,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 3c628bf667a5..0db0b313d715 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -2602,6 +2602,9 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon,
2602 if(name_len < PATH_MAX) { 2602 if(name_len < PATH_MAX) {
2603 memcpy(pSMB->ResumeFileName, psrch_inf->presume_name, name_len); 2603 memcpy(pSMB->ResumeFileName, psrch_inf->presume_name, name_len);
2604 byte_count += name_len; 2604 byte_count += name_len;
2605 /* 14 byte parm len above enough for 2 byte null terminator */
2606 pSMB->ResumeFileName[name_len] = 0;
2607 pSMB->ResumeFileName[name_len+1] = 0;
2605 } else { 2608 } else {
2606 rc = -EINVAL; 2609 rc = -EINVAL;
2607 goto FNext2_err_exit; 2610 goto FNext2_err_exit;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 30ab70ce5547..3497125189df 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -643,7 +643,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
643 netfid, length, 643 netfid, length,
644 pfLock->fl_start, numUnlock, numLock, lockType, 644 pfLock->fl_start, numUnlock, numLock, lockType,
645 wait_flag); 645 wait_flag);
646 if (rc == 0 && (pfLock->fl_flags & FL_POSIX)) 646 if (pfLock->fl_flags & FL_POSIX)
647 posix_lock_file_wait(file, pfLock); 647 posix_lock_file_wait(file, pfLock);
648 FreeXid(xid); 648 FreeXid(xid);
649 return rc; 649 return rc;
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index bde0fabfece0..ab925ef4f863 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -92,7 +92,7 @@ cifs_hl_exit:
92 return rc; 92 return rc;
93} 93}
94 94
95int 95void *
96cifs_follow_link(struct dentry *direntry, struct nameidata *nd) 96cifs_follow_link(struct dentry *direntry, struct nameidata *nd)
97{ 97{
98 struct inode *inode = direntry->d_inode; 98 struct inode *inode = direntry->d_inode;
@@ -148,7 +148,7 @@ out:
148out_no_free: 148out_no_free:
149 FreeXid(xid); 149 FreeXid(xid);
150 nd_set_link(nd, target_path); 150 nd_set_link(nd, target_path);
151 return 0; 151 return NULL; /* No cookie */
152} 152}
153 153
154int 154int
@@ -330,7 +330,7 @@ cifs_readlink(struct dentry *direntry, char __user *pBuffer, int buflen)
330 return rc; 330 return rc;
331} 331}
332 332
333void cifs_put_link(struct dentry *direntry, struct nameidata *nd) 333void cifs_put_link(struct dentry *direntry, struct nameidata *nd, void *cookie)
334{ 334{
335 char *p = nd_get_link(nd); 335 char *p = nd_get_link(nd);
336 if (!IS_ERR(p)) 336 if (!IS_ERR(p))
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 072b4ee8c53e..20ae4153f791 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -611,6 +611,7 @@ cifsConvertToUCS(__le16 * target, const char *source, int maxlen,
611 src_char = source[i]; 611 src_char = source[i];
612 switch (src_char) { 612 switch (src_char) {
613 case 0: 613 case 0:
614 target[j] = 0;
614 goto ctoUCS_out; 615 goto ctoUCS_out;
615 case ':': 616 case ':':
616 target[j] = cpu_to_le16(UNI_COLON); 617 target[j] = cpu_to_le16(UNI_COLON);
diff --git a/fs/compat.c b/fs/compat.c
index 728cd8365384..6b06b6bae35e 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -37,7 +37,7 @@
37#include <linux/ctype.h> 37#include <linux/ctype.h>
38#include <linux/module.h> 38#include <linux/module.h>
39#include <linux/dirent.h> 39#include <linux/dirent.h>
40#include <linux/dnotify.h> 40#include <linux/fsnotify.h>
41#include <linux/highuid.h> 41#include <linux/highuid.h>
42#include <linux/sunrpc/svc.h> 42#include <linux/sunrpc/svc.h>
43#include <linux/nfsd/nfsd.h> 43#include <linux/nfsd/nfsd.h>
@@ -1307,9 +1307,13 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
1307out: 1307out:
1308 if (iov != iovstack) 1308 if (iov != iovstack)
1309 kfree(iov); 1309 kfree(iov);
1310 if ((ret + (type == READ)) > 0) 1310 if ((ret + (type == READ)) > 0) {
1311 dnotify_parent(file->f_dentry, 1311 struct dentry *dentry = file->f_dentry;
1312 (type == READ) ? DN_ACCESS : DN_MODIFY); 1312 if (type == READ)
1313 fsnotify_access(dentry);
1314 else
1315 fsnotify_modify(dentry);
1316 }
1313 return ret; 1317 return ret;
1314} 1318}
1315 1319
diff --git a/fs/dcache.c b/fs/dcache.c
index 3aa8a7e980d8..a15a2e1f5520 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -19,6 +19,7 @@
19#include <linux/string.h> 19#include <linux/string.h>
20#include <linux/mm.h> 20#include <linux/mm.h>
21#include <linux/fs.h> 21#include <linux/fs.h>
22#include <linux/fsnotify.h>
22#include <linux/slab.h> 23#include <linux/slab.h>
23#include <linux/init.h> 24#include <linux/init.h>
24#include <linux/smp_lock.h> 25#include <linux/smp_lock.h>
@@ -101,6 +102,7 @@ static inline void dentry_iput(struct dentry * dentry)
101 list_del_init(&dentry->d_alias); 102 list_del_init(&dentry->d_alias);
102 spin_unlock(&dentry->d_lock); 103 spin_unlock(&dentry->d_lock);
103 spin_unlock(&dcache_lock); 104 spin_unlock(&dcache_lock);
105 fsnotify_inoderemove(inode);
104 if (dentry->d_op && dentry->d_op->d_iput) 106 if (dentry->d_op && dentry->d_op->d_iput)
105 dentry->d_op->d_iput(dentry, inode); 107 dentry->d_op->d_iput(dentry, inode);
106 else 108 else
@@ -1165,13 +1167,16 @@ out:
1165 1167
1166void d_delete(struct dentry * dentry) 1168void d_delete(struct dentry * dentry)
1167{ 1169{
1170 int isdir = 0;
1168 /* 1171 /*
1169 * Are we the only user? 1172 * Are we the only user?
1170 */ 1173 */
1171 spin_lock(&dcache_lock); 1174 spin_lock(&dcache_lock);
1172 spin_lock(&dentry->d_lock); 1175 spin_lock(&dentry->d_lock);
1176 isdir = S_ISDIR(dentry->d_inode->i_mode);
1173 if (atomic_read(&dentry->d_count) == 1) { 1177 if (atomic_read(&dentry->d_count) == 1) {
1174 dentry_iput(dentry); 1178 dentry_iput(dentry);
1179 fsnotify_nameremove(dentry, isdir);
1175 return; 1180 return;
1176 } 1181 }
1177 1182
@@ -1180,6 +1185,8 @@ void d_delete(struct dentry * dentry)
1180 1185
1181 spin_unlock(&dentry->d_lock); 1186 spin_unlock(&dentry->d_lock);
1182 spin_unlock(&dcache_lock); 1187 spin_unlock(&dcache_lock);
1188
1189 fsnotify_nameremove(dentry, isdir);
1183} 1190}
1184 1191
1185static void __d_rehash(struct dentry * entry, struct hlist_head *list) 1192static void __d_rehash(struct dentry * entry, struct hlist_head *list)
diff --git a/fs/dcookies.c b/fs/dcookies.c
index 581aac959cd3..02aa0ddc582a 100644
--- a/fs/dcookies.c
+++ b/fs/dcookies.c
@@ -94,12 +94,10 @@ static struct dcookie_struct * alloc_dcookie(struct dentry * dentry,
94 if (!dcs) 94 if (!dcs)
95 return NULL; 95 return NULL;
96 96
97 atomic_inc(&dentry->d_count);
98 atomic_inc(&vfsmnt->mnt_count);
99 dentry->d_cookie = dcs; 97 dentry->d_cookie = dcs;
100 98
101 dcs->dentry = dentry; 99 dcs->dentry = dget(dentry);
102 dcs->vfsmnt = vfsmnt; 100 dcs->vfsmnt = mntget(vfsmnt);
103 hash_dcookie(dcs); 101 hash_dcookie(dcs);
104 102
105 return dcs; 103 return dcs;
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index b529786699e7..a86ac4aeaedb 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -110,16 +110,6 @@ static int debug_fill_super(struct super_block *sb, void *data, int silent)
110 return simple_fill_super(sb, DEBUGFS_MAGIC, debug_files); 110 return simple_fill_super(sb, DEBUGFS_MAGIC, debug_files);
111} 111}
112 112
113static struct dentry * get_dentry(struct dentry *parent, const char *name)
114{
115 struct qstr qstr;
116
117 qstr.name = name;
118 qstr.len = strlen(name);
119 qstr.hash = full_name_hash(name,qstr.len);
120 return lookup_hash(&qstr,parent);
121}
122
123static struct super_block *debug_get_sb(struct file_system_type *fs_type, 113static struct super_block *debug_get_sb(struct file_system_type *fs_type,
124 int flags, const char *dev_name, 114 int flags, const char *dev_name,
125 void *data) 115 void *data)
@@ -157,7 +147,7 @@ static int debugfs_create_by_name(const char *name, mode_t mode,
157 147
158 *dentry = NULL; 148 *dentry = NULL;
159 down(&parent->d_inode->i_sem); 149 down(&parent->d_inode->i_sem);
160 *dentry = get_dentry (parent, name); 150 *dentry = lookup_one_len(name, parent, strlen(name));
161 if (!IS_ERR(dentry)) { 151 if (!IS_ERR(dentry)) {
162 if ((mode & S_IFMT) == S_IFDIR) 152 if ((mode & S_IFMT) == S_IFDIR)
163 error = debugfs_mkdir(parent->d_inode, *dentry, mode); 153 error = debugfs_mkdir(parent->d_inode, *dentry, mode);
diff --git a/fs/devfs/base.c b/fs/devfs/base.c
index 1ecfe1f184d4..8b679b67e5e0 100644
--- a/fs/devfs/base.c
+++ b/fs/devfs/base.c
@@ -2491,11 +2491,11 @@ static int devfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
2491 return 0; 2491 return 0;
2492} /* End Function devfs_mknod */ 2492} /* End Function devfs_mknod */
2493 2493
2494static int devfs_follow_link(struct dentry *dentry, struct nameidata *nd) 2494static void *devfs_follow_link(struct dentry *dentry, struct nameidata *nd)
2495{ 2495{
2496 struct devfs_entry *p = get_devfs_entry_from_vfs_inode(dentry->d_inode); 2496 struct devfs_entry *p = get_devfs_entry_from_vfs_inode(dentry->d_inode);
2497 nd_set_link(nd, p ? p->u.symlink.linkname : ERR_PTR(-ENODEV)); 2497 nd_set_link(nd, p ? p->u.symlink.linkname : ERR_PTR(-ENODEV));
2498 return 0; 2498 return NULL;
2499} /* End Function devfs_follow_link */ 2499} /* End Function devfs_follow_link */
2500 2500
2501static struct inode_operations devfs_iops = { 2501static struct inode_operations devfs_iops = {
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 1d55e7e67342..0d06097bc995 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -215,7 +215,7 @@ static struct page *dio_get_page(struct dio *dio)
215static void dio_complete(struct dio *dio, loff_t offset, ssize_t bytes) 215static void dio_complete(struct dio *dio, loff_t offset, ssize_t bytes)
216{ 216{
217 if (dio->end_io && dio->result) 217 if (dio->end_io && dio->result)
218 dio->end_io(dio->inode, offset, bytes, dio->map_bh.b_private); 218 dio->end_io(dio->iocb, offset, bytes, dio->map_bh.b_private);
219 if (dio->lock_type == DIO_LOCKING) 219 if (dio->lock_type == DIO_LOCKING)
220 up_read(&dio->inode->i_alloc_sem); 220 up_read(&dio->inode->i_alloc_sem);
221} 221}
diff --git a/fs/dquot.c b/fs/dquot.c
index 3995ce7907cc..b9732335bcdc 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -409,13 +409,10 @@ out_dqlock:
409 * for this sb+type at all. */ 409 * for this sb+type at all. */
410static void invalidate_dquots(struct super_block *sb, int type) 410static void invalidate_dquots(struct super_block *sb, int type)
411{ 411{
412 struct dquot *dquot; 412 struct dquot *dquot, *tmp;
413 struct list_head *head;
414 413
415 spin_lock(&dq_list_lock); 414 spin_lock(&dq_list_lock);
416 for (head = inuse_list.next; head != &inuse_list;) { 415 list_for_each_entry_safe(dquot, tmp, &inuse_list, dq_inuse) {
417 dquot = list_entry(head, struct dquot, dq_inuse);
418 head = head->next;
419 if (dquot->dq_sb != sb) 416 if (dquot->dq_sb != sb)
420 continue; 417 continue;
421 if (dquot->dq_type != type) 418 if (dquot->dq_type != type)
@@ -1519,14 +1516,22 @@ out_path:
1519 * This function is used when filesystem needs to initialize quotas 1516 * This function is used when filesystem needs to initialize quotas
1520 * during mount time. 1517 * during mount time.
1521 */ 1518 */
1522int vfs_quota_on_mount(int type, int format_id, struct dentry *dentry) 1519int vfs_quota_on_mount(struct super_block *sb, char *qf_name,
1520 int format_id, int type)
1523{ 1521{
1522 struct dentry *dentry;
1524 int error; 1523 int error;
1525 1524
1525 dentry = lookup_one_len(qf_name, sb->s_root, strlen(qf_name));
1526 if (IS_ERR(dentry))
1527 return PTR_ERR(dentry);
1528
1526 error = security_quota_on(dentry); 1529 error = security_quota_on(dentry);
1527 if (error) 1530 if (!error)
1528 return error; 1531 error = vfs_quota_on_inode(dentry->d_inode, type, format_id);
1529 return vfs_quota_on_inode(dentry->d_inode, type, format_id); 1532
1533 dput(dentry);
1534 return error;
1530} 1535}
1531 1536
1532/* Generic routine for getting common part of quota structure */ 1537/* Generic routine for getting common part of quota structure */
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 9900e333655a..6ab1dd0ca904 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -101,57 +101,6 @@
101/* Maximum number of poll wake up nests we are allowing */ 101/* Maximum number of poll wake up nests we are allowing */
102#define EP_MAX_POLLWAKE_NESTS 4 102#define EP_MAX_POLLWAKE_NESTS 4
103 103
104/* Macro to allocate a "struct epitem" from the slab cache */
105#define EPI_MEM_ALLOC() (struct epitem *) kmem_cache_alloc(epi_cache, SLAB_KERNEL)
106
107/* Macro to free a "struct epitem" to the slab cache */
108#define EPI_MEM_FREE(p) kmem_cache_free(epi_cache, p)
109
110/* Macro to allocate a "struct eppoll_entry" from the slab cache */
111#define PWQ_MEM_ALLOC() (struct eppoll_entry *) kmem_cache_alloc(pwq_cache, SLAB_KERNEL)
112
113/* Macro to free a "struct eppoll_entry" to the slab cache */
114#define PWQ_MEM_FREE(p) kmem_cache_free(pwq_cache, p)
115
116/* Fast test to see if the file is an evenpoll file */
117#define IS_FILE_EPOLL(f) ((f)->f_op == &eventpoll_fops)
118
119/* Setup the structure that is used as key for the rb-tree */
120#define EP_SET_FFD(p, f, d) do { (p)->file = (f); (p)->fd = (d); } while (0)
121
122/* Compare rb-tree keys */
123#define EP_CMP_FFD(p1, p2) ((p1)->file > (p2)->file ? +1: \
124 ((p1)->file < (p2)->file ? -1: (p1)->fd - (p2)->fd))
125
126/* Special initialization for the rb-tree node to detect linkage */
127#define EP_RB_INITNODE(n) (n)->rb_parent = (n)
128
129/* Removes a node from the rb-tree and marks it for a fast is-linked check */
130#define EP_RB_ERASE(n, r) do { rb_erase(n, r); (n)->rb_parent = (n); } while (0)
131
132/* Fast check to verify that the item is linked to the main rb-tree */
133#define EP_RB_LINKED(n) ((n)->rb_parent != (n))
134
135/*
136 * Remove the item from the list and perform its initialization.
137 * This is useful for us because we can test if the item is linked
138 * using "EP_IS_LINKED(p)".
139 */
140#define EP_LIST_DEL(p) do { list_del(p); INIT_LIST_HEAD(p); } while (0)
141
142/* Tells us if the item is currently linked */
143#define EP_IS_LINKED(p) (!list_empty(p))
144
145/* Get the "struct epitem" from a wait queue pointer */
146#define EP_ITEM_FROM_WAIT(p) ((struct epitem *) container_of(p, struct eppoll_entry, wait)->base)
147
148/* Get the "struct epitem" from an epoll queue wrapper */
149#define EP_ITEM_FROM_EPQUEUE(p) (container_of(p, struct ep_pqueue, pt)->epi)
150
151/* Tells if the epoll_ctl(2) operation needs an event copy from userspace */
152#define EP_OP_HASH_EVENT(op) ((op) != EPOLL_CTL_DEL)
153
154
155struct epoll_filefd { 104struct epoll_filefd {
156 struct file *file; 105 struct file *file;
157 int fd; 106 int fd;
@@ -357,6 +306,82 @@ static struct dentry_operations eventpollfs_dentry_operations = {
357 306
358 307
359 308
309/* Fast test to see if the file is an evenpoll file */
310static inline int is_file_epoll(struct file *f)
311{
312 return f->f_op == &eventpoll_fops;
313}
314
315/* Setup the structure that is used as key for the rb-tree */
316static inline void ep_set_ffd(struct epoll_filefd *ffd,
317 struct file *file, int fd)
318{
319 ffd->file = file;
320 ffd->fd = fd;
321}
322
323/* Compare rb-tree keys */
324static inline int ep_cmp_ffd(struct epoll_filefd *p1,
325 struct epoll_filefd *p2)
326{
327 return (p1->file > p2->file ? +1:
328 (p1->file < p2->file ? -1 : p1->fd - p2->fd));
329}
330
331/* Special initialization for the rb-tree node to detect linkage */
332static inline void ep_rb_initnode(struct rb_node *n)
333{
334 n->rb_parent = n;
335}
336
337/* Removes a node from the rb-tree and marks it for a fast is-linked check */
338static inline void ep_rb_erase(struct rb_node *n, struct rb_root *r)
339{
340 rb_erase(n, r);
341 n->rb_parent = n;
342}
343
344/* Fast check to verify that the item is linked to the main rb-tree */
345static inline int ep_rb_linked(struct rb_node *n)
346{
347 return n->rb_parent != n;
348}
349
350/*
351 * Remove the item from the list and perform its initialization.
352 * This is useful for us because we can test if the item is linked
353 * using "ep_is_linked(p)".
354 */
355static inline void ep_list_del(struct list_head *p)
356{
357 list_del(p);
358 INIT_LIST_HEAD(p);
359}
360
361/* Tells us if the item is currently linked */
362static inline int ep_is_linked(struct list_head *p)
363{
364 return !list_empty(p);
365}
366
367/* Get the "struct epitem" from a wait queue pointer */
368static inline struct epitem * ep_item_from_wait(wait_queue_t *p)
369{
370 return container_of(p, struct eppoll_entry, wait)->base;
371}
372
373/* Get the "struct epitem" from an epoll queue wrapper */
374static inline struct epitem * ep_item_from_epqueue(poll_table *p)
375{
376 return container_of(p, struct ep_pqueue, pt)->epi;
377}
378
379/* Tells if the epoll_ctl(2) operation needs an event copy from userspace */
380static inline int ep_op_hash_event(int op)
381{
382 return op != EPOLL_CTL_DEL;
383}
384
360/* Initialize the poll safe wake up structure */ 385/* Initialize the poll safe wake up structure */
361static void ep_poll_safewake_init(struct poll_safewake *psw) 386static void ep_poll_safewake_init(struct poll_safewake *psw)
362{ 387{
@@ -456,7 +481,7 @@ void eventpoll_release_file(struct file *file)
456 epi = list_entry(lsthead->next, struct epitem, fllink); 481 epi = list_entry(lsthead->next, struct epitem, fllink);
457 482
458 ep = epi->ep; 483 ep = epi->ep;
459 EP_LIST_DEL(&epi->fllink); 484 ep_list_del(&epi->fllink);
460 down_write(&ep->sem); 485 down_write(&ep->sem);
461 ep_remove(ep, epi); 486 ep_remove(ep, epi);
462 up_write(&ep->sem); 487 up_write(&ep->sem);
@@ -534,7 +559,7 @@ sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event __user *event)
534 current, epfd, op, fd, event)); 559 current, epfd, op, fd, event));
535 560
536 error = -EFAULT; 561 error = -EFAULT;
537 if (EP_OP_HASH_EVENT(op) && 562 if (ep_op_hash_event(op) &&
538 copy_from_user(&epds, event, sizeof(struct epoll_event))) 563 copy_from_user(&epds, event, sizeof(struct epoll_event)))
539 goto eexit_1; 564 goto eexit_1;
540 565
@@ -560,7 +585,7 @@ sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event __user *event)
560 * adding an epoll file descriptor inside itself. 585 * adding an epoll file descriptor inside itself.
561 */ 586 */
562 error = -EINVAL; 587 error = -EINVAL;
563 if (file == tfile || !IS_FILE_EPOLL(file)) 588 if (file == tfile || !is_file_epoll(file))
564 goto eexit_3; 589 goto eexit_3;
565 590
566 /* 591 /*
@@ -656,7 +681,7 @@ asmlinkage long sys_epoll_wait(int epfd, struct epoll_event __user *events,
656 * the user passed to us _is_ an eventpoll file. 681 * the user passed to us _is_ an eventpoll file.
657 */ 682 */
658 error = -EINVAL; 683 error = -EINVAL;
659 if (!IS_FILE_EPOLL(file)) 684 if (!is_file_epoll(file))
660 goto eexit_2; 685 goto eexit_2;
661 686
662 /* 687 /*
@@ -831,11 +856,11 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
831 struct epitem *epi, *epir = NULL; 856 struct epitem *epi, *epir = NULL;
832 struct epoll_filefd ffd; 857 struct epoll_filefd ffd;
833 858
834 EP_SET_FFD(&ffd, file, fd); 859 ep_set_ffd(&ffd, file, fd);
835 read_lock_irqsave(&ep->lock, flags); 860 read_lock_irqsave(&ep->lock, flags);
836 for (rbp = ep->rbr.rb_node; rbp; ) { 861 for (rbp = ep->rbr.rb_node; rbp; ) {
837 epi = rb_entry(rbp, struct epitem, rbn); 862 epi = rb_entry(rbp, struct epitem, rbn);
838 kcmp = EP_CMP_FFD(&ffd, &epi->ffd); 863 kcmp = ep_cmp_ffd(&ffd, &epi->ffd);
839 if (kcmp > 0) 864 if (kcmp > 0)
840 rbp = rbp->rb_right; 865 rbp = rbp->rb_right;
841 else if (kcmp < 0) 866 else if (kcmp < 0)
@@ -875,7 +900,7 @@ static void ep_release_epitem(struct epitem *epi)
875{ 900{
876 901
877 if (atomic_dec_and_test(&epi->usecnt)) 902 if (atomic_dec_and_test(&epi->usecnt))
878 EPI_MEM_FREE(epi); 903 kmem_cache_free(epi_cache, epi);
879} 904}
880 905
881 906
@@ -886,10 +911,10 @@ static void ep_release_epitem(struct epitem *epi)
886static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead, 911static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead,
887 poll_table *pt) 912 poll_table *pt)
888{ 913{
889 struct epitem *epi = EP_ITEM_FROM_EPQUEUE(pt); 914 struct epitem *epi = ep_item_from_epqueue(pt);
890 struct eppoll_entry *pwq; 915 struct eppoll_entry *pwq;
891 916
892 if (epi->nwait >= 0 && (pwq = PWQ_MEM_ALLOC())) { 917 if (epi->nwait >= 0 && (pwq = kmem_cache_alloc(pwq_cache, SLAB_KERNEL))) {
893 init_waitqueue_func_entry(&pwq->wait, ep_poll_callback); 918 init_waitqueue_func_entry(&pwq->wait, ep_poll_callback);
894 pwq->whead = whead; 919 pwq->whead = whead;
895 pwq->base = epi; 920 pwq->base = epi;
@@ -912,7 +937,7 @@ static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi)
912 while (*p) { 937 while (*p) {
913 parent = *p; 938 parent = *p;
914 epic = rb_entry(parent, struct epitem, rbn); 939 epic = rb_entry(parent, struct epitem, rbn);
915 kcmp = EP_CMP_FFD(&epi->ffd, &epic->ffd); 940 kcmp = ep_cmp_ffd(&epi->ffd, &epic->ffd);
916 if (kcmp > 0) 941 if (kcmp > 0)
917 p = &parent->rb_right; 942 p = &parent->rb_right;
918 else 943 else
@@ -932,17 +957,17 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
932 struct ep_pqueue epq; 957 struct ep_pqueue epq;
933 958
934 error = -ENOMEM; 959 error = -ENOMEM;
935 if (!(epi = EPI_MEM_ALLOC())) 960 if (!(epi = kmem_cache_alloc(epi_cache, SLAB_KERNEL)))
936 goto eexit_1; 961 goto eexit_1;
937 962
938 /* Item initialization follow here ... */ 963 /* Item initialization follow here ... */
939 EP_RB_INITNODE(&epi->rbn); 964 ep_rb_initnode(&epi->rbn);
940 INIT_LIST_HEAD(&epi->rdllink); 965 INIT_LIST_HEAD(&epi->rdllink);
941 INIT_LIST_HEAD(&epi->fllink); 966 INIT_LIST_HEAD(&epi->fllink);
942 INIT_LIST_HEAD(&epi->txlink); 967 INIT_LIST_HEAD(&epi->txlink);
943 INIT_LIST_HEAD(&epi->pwqlist); 968 INIT_LIST_HEAD(&epi->pwqlist);
944 epi->ep = ep; 969 epi->ep = ep;
945 EP_SET_FFD(&epi->ffd, tfile, fd); 970 ep_set_ffd(&epi->ffd, tfile, fd);
946 epi->event = *event; 971 epi->event = *event;
947 atomic_set(&epi->usecnt, 1); 972 atomic_set(&epi->usecnt, 1);
948 epi->nwait = 0; 973 epi->nwait = 0;
@@ -978,7 +1003,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
978 ep_rbtree_insert(ep, epi); 1003 ep_rbtree_insert(ep, epi);
979 1004
980 /* If the file is already "ready" we drop it inside the ready list */ 1005 /* If the file is already "ready" we drop it inside the ready list */
981 if ((revents & event->events) && !EP_IS_LINKED(&epi->rdllink)) { 1006 if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) {
982 list_add_tail(&epi->rdllink, &ep->rdllist); 1007 list_add_tail(&epi->rdllink, &ep->rdllist);
983 1008
984 /* Notify waiting tasks that events are available */ 1009 /* Notify waiting tasks that events are available */
@@ -1007,11 +1032,11 @@ eexit_2:
1007 * allocated wait queue. 1032 * allocated wait queue.
1008 */ 1033 */
1009 write_lock_irqsave(&ep->lock, flags); 1034 write_lock_irqsave(&ep->lock, flags);
1010 if (EP_IS_LINKED(&epi->rdllink)) 1035 if (ep_is_linked(&epi->rdllink))
1011 EP_LIST_DEL(&epi->rdllink); 1036 ep_list_del(&epi->rdllink);
1012 write_unlock_irqrestore(&ep->lock, flags); 1037 write_unlock_irqrestore(&ep->lock, flags);
1013 1038
1014 EPI_MEM_FREE(epi); 1039 kmem_cache_free(epi_cache, epi);
1015eexit_1: 1040eexit_1:
1016 return error; 1041 return error;
1017} 1042}
@@ -1050,14 +1075,14 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
1050 * If the item is not linked to the hash it means that it's on its 1075 * If the item is not linked to the hash it means that it's on its
1051 * way toward the removal. Do nothing in this case. 1076 * way toward the removal. Do nothing in this case.
1052 */ 1077 */
1053 if (EP_RB_LINKED(&epi->rbn)) { 1078 if (ep_rb_linked(&epi->rbn)) {
1054 /* 1079 /*
1055 * If the item is "hot" and it is not registered inside the ready 1080 * If the item is "hot" and it is not registered inside the ready
1056 * list, push it inside. If the item is not "hot" and it is currently 1081 * list, push it inside. If the item is not "hot" and it is currently
1057 * registered inside the ready list, unlink it. 1082 * registered inside the ready list, unlink it.
1058 */ 1083 */
1059 if (revents & event->events) { 1084 if (revents & event->events) {
1060 if (!EP_IS_LINKED(&epi->rdllink)) { 1085 if (!ep_is_linked(&epi->rdllink)) {
1061 list_add_tail(&epi->rdllink, &ep->rdllist); 1086 list_add_tail(&epi->rdllink, &ep->rdllist);
1062 1087
1063 /* Notify waiting tasks that events are available */ 1088 /* Notify waiting tasks that events are available */
@@ -1097,9 +1122,9 @@ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi)
1097 while (!list_empty(lsthead)) { 1122 while (!list_empty(lsthead)) {
1098 pwq = list_entry(lsthead->next, struct eppoll_entry, llink); 1123 pwq = list_entry(lsthead->next, struct eppoll_entry, llink);
1099 1124
1100 EP_LIST_DEL(&pwq->llink); 1125 ep_list_del(&pwq->llink);
1101 remove_wait_queue(pwq->whead, &pwq->wait); 1126 remove_wait_queue(pwq->whead, &pwq->wait);
1102 PWQ_MEM_FREE(pwq); 1127 kmem_cache_free(pwq_cache, pwq);
1103 } 1128 }
1104 } 1129 }
1105} 1130}
@@ -1118,7 +1143,7 @@ static int ep_unlink(struct eventpoll *ep, struct epitem *epi)
1118 * The check protect us from doing a double unlink ( crash ). 1143 * The check protect us from doing a double unlink ( crash ).
1119 */ 1144 */
1120 error = -ENOENT; 1145 error = -ENOENT;
1121 if (!EP_RB_LINKED(&epi->rbn)) 1146 if (!ep_rb_linked(&epi->rbn))
1122 goto eexit_1; 1147 goto eexit_1;
1123 1148
1124 /* 1149 /*
@@ -1133,14 +1158,14 @@ static int ep_unlink(struct eventpoll *ep, struct epitem *epi)
1133 * This operation togheter with the above check closes the door to 1158 * This operation togheter with the above check closes the door to
1134 * double unlinks. 1159 * double unlinks.
1135 */ 1160 */
1136 EP_RB_ERASE(&epi->rbn, &ep->rbr); 1161 ep_rb_erase(&epi->rbn, &ep->rbr);
1137 1162
1138 /* 1163 /*
1139 * If the item we are going to remove is inside the ready file descriptors 1164 * If the item we are going to remove is inside the ready file descriptors
1140 * we want to remove it from this list to avoid stale events. 1165 * we want to remove it from this list to avoid stale events.
1141 */ 1166 */
1142 if (EP_IS_LINKED(&epi->rdllink)) 1167 if (ep_is_linked(&epi->rdllink))
1143 EP_LIST_DEL(&epi->rdllink); 1168 ep_list_del(&epi->rdllink);
1144 1169
1145 error = 0; 1170 error = 0;
1146eexit_1: 1171eexit_1:
@@ -1174,8 +1199,8 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
1174 1199
1175 /* Remove the current item from the list of epoll hooks */ 1200 /* Remove the current item from the list of epoll hooks */
1176 spin_lock(&file->f_ep_lock); 1201 spin_lock(&file->f_ep_lock);
1177 if (EP_IS_LINKED(&epi->fllink)) 1202 if (ep_is_linked(&epi->fllink))
1178 EP_LIST_DEL(&epi->fllink); 1203 ep_list_del(&epi->fllink);
1179 spin_unlock(&file->f_ep_lock); 1204 spin_unlock(&file->f_ep_lock);
1180 1205
1181 /* We need to acquire the write IRQ lock before calling ep_unlink() */ 1206 /* We need to acquire the write IRQ lock before calling ep_unlink() */
@@ -1210,7 +1235,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
1210{ 1235{
1211 int pwake = 0; 1236 int pwake = 0;
1212 unsigned long flags; 1237 unsigned long flags;
1213 struct epitem *epi = EP_ITEM_FROM_WAIT(wait); 1238 struct epitem *epi = ep_item_from_wait(wait);
1214 struct eventpoll *ep = epi->ep; 1239 struct eventpoll *ep = epi->ep;
1215 1240
1216 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: poll_callback(%p) epi=%p ep=%p\n", 1241 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: poll_callback(%p) epi=%p ep=%p\n",
@@ -1228,7 +1253,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
1228 goto is_disabled; 1253 goto is_disabled;
1229 1254
1230 /* If this file is already in the ready list we exit soon */ 1255 /* If this file is already in the ready list we exit soon */
1231 if (EP_IS_LINKED(&epi->rdllink)) 1256 if (ep_is_linked(&epi->rdllink))
1232 goto is_linked; 1257 goto is_linked;
1233 1258
1234 list_add_tail(&epi->rdllink, &ep->rdllist); 1259 list_add_tail(&epi->rdllink, &ep->rdllist);
@@ -1307,7 +1332,7 @@ static int ep_collect_ready_items(struct eventpoll *ep, struct list_head *txlist
1307 lnk = lnk->next; 1332 lnk = lnk->next;
1308 1333
1309 /* If this file is already in the ready list we exit soon */ 1334 /* If this file is already in the ready list we exit soon */
1310 if (!EP_IS_LINKED(&epi->txlink)) { 1335 if (!ep_is_linked(&epi->txlink)) {
1311 /* 1336 /*
1312 * This is initialized in this way so that the default 1337 * This is initialized in this way so that the default
1313 * behaviour of the reinjecting code will be to push back 1338 * behaviour of the reinjecting code will be to push back
@@ -1322,7 +1347,7 @@ static int ep_collect_ready_items(struct eventpoll *ep, struct list_head *txlist
1322 /* 1347 /*
1323 * Unlink the item from the ready list. 1348 * Unlink the item from the ready list.
1324 */ 1349 */
1325 EP_LIST_DEL(&epi->rdllink); 1350 ep_list_del(&epi->rdllink);
1326 } 1351 }
1327 } 1352 }
1328 1353
@@ -1401,7 +1426,7 @@ static void ep_reinject_items(struct eventpoll *ep, struct list_head *txlist)
1401 epi = list_entry(txlist->next, struct epitem, txlink); 1426 epi = list_entry(txlist->next, struct epitem, txlink);
1402 1427
1403 /* Unlink the current item from the transfer list */ 1428 /* Unlink the current item from the transfer list */
1404 EP_LIST_DEL(&epi->txlink); 1429 ep_list_del(&epi->txlink);
1405 1430
1406 /* 1431 /*
1407 * If the item is no more linked to the interest set, we don't 1432 * If the item is no more linked to the interest set, we don't
@@ -1410,8 +1435,8 @@ static void ep_reinject_items(struct eventpoll *ep, struct list_head *txlist)
1410 * item is set to have an Edge Triggered behaviour, we don't have 1435 * item is set to have an Edge Triggered behaviour, we don't have
1411 * to push it back either. 1436 * to push it back either.
1412 */ 1437 */
1413 if (EP_RB_LINKED(&epi->rbn) && !(epi->event.events & EPOLLET) && 1438 if (ep_rb_linked(&epi->rbn) && !(epi->event.events & EPOLLET) &&
1414 (epi->revents & epi->event.events) && !EP_IS_LINKED(&epi->rdllink)) { 1439 (epi->revents & epi->event.events) && !ep_is_linked(&epi->rdllink)) {
1415 list_add_tail(&epi->rdllink, &ep->rdllist); 1440 list_add_tail(&epi->rdllink, &ep->rdllist);
1416 ricnt++; 1441 ricnt++;
1417 } 1442 }
diff --git a/fs/exec.c b/fs/exec.c
index 3a4b35a14c0d..222ab1c572d8 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -58,6 +58,9 @@
58 58
59int core_uses_pid; 59int core_uses_pid;
60char core_pattern[65] = "core"; 60char core_pattern[65] = "core";
61int suid_dumpable = 0;
62
63EXPORT_SYMBOL(suid_dumpable);
61/* The maximal length of core_pattern is also specified in sysctl.c */ 64/* The maximal length of core_pattern is also specified in sysctl.c */
62 65
63static struct linux_binfmt *formats; 66static struct linux_binfmt *formats;
@@ -639,6 +642,18 @@ static inline int de_thread(struct task_struct *tsk)
639 count = 2; 642 count = 2;
640 if (thread_group_leader(current)) 643 if (thread_group_leader(current))
641 count = 1; 644 count = 1;
645 else {
646 /*
647 * The SIGALRM timer survives the exec, but needs to point
648 * at us as the new group leader now. We have a race with
649 * a timer firing now getting the old leader, so we need to
650 * synchronize with any firing (by calling del_timer_sync)
651 * before we can safely let the old group leader die.
652 */
653 sig->real_timer.data = (unsigned long)current;
654 if (del_timer_sync(&sig->real_timer))
655 add_timer(&sig->real_timer);
656 }
642 while (atomic_read(&sig->count) > count) { 657 while (atomic_read(&sig->count) > count) {
643 sig->group_exit_task = current; 658 sig->group_exit_task = current;
644 sig->notify_count = count; 659 sig->notify_count = count;
@@ -864,6 +879,9 @@ int flush_old_exec(struct linux_binprm * bprm)
864 879
865 if (current->euid == current->uid && current->egid == current->gid) 880 if (current->euid == current->uid && current->egid == current->gid)
866 current->mm->dumpable = 1; 881 current->mm->dumpable = 1;
882 else
883 current->mm->dumpable = suid_dumpable;
884
867 name = bprm->filename; 885 name = bprm->filename;
868 886
869 /* Copies the binary name from after last slash */ 887 /* Copies the binary name from after last slash */
@@ -884,7 +902,7 @@ int flush_old_exec(struct linux_binprm * bprm)
884 permission(bprm->file->f_dentry->d_inode,MAY_READ, NULL) || 902 permission(bprm->file->f_dentry->d_inode,MAY_READ, NULL) ||
885 (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)) { 903 (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)) {
886 suid_keys(current); 904 suid_keys(current);
887 current->mm->dumpable = 0; 905 current->mm->dumpable = suid_dumpable;
888 } 906 }
889 907
890 /* An exec changes our domain. We are no longer part of the thread 908 /* An exec changes our domain. We are no longer part of the thread
@@ -1432,6 +1450,8 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
1432 struct inode * inode; 1450 struct inode * inode;
1433 struct file * file; 1451 struct file * file;
1434 int retval = 0; 1452 int retval = 0;
1453 int fsuid = current->fsuid;
1454 int flag = 0;
1435 1455
1436 binfmt = current->binfmt; 1456 binfmt = current->binfmt;
1437 if (!binfmt || !binfmt->core_dump) 1457 if (!binfmt || !binfmt->core_dump)
@@ -1441,6 +1461,16 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
1441 up_write(&mm->mmap_sem); 1461 up_write(&mm->mmap_sem);
1442 goto fail; 1462 goto fail;
1443 } 1463 }
1464
1465 /*
1466 * We cannot trust fsuid as being the "true" uid of the
1467 * process nor do we know its entire history. We only know it
1468 * was tainted so we dump it as root in mode 2.
1469 */
1470 if (mm->dumpable == 2) { /* Setuid core dump mode */
1471 flag = O_EXCL; /* Stop rewrite attacks */
1472 current->fsuid = 0; /* Dump root private */
1473 }
1444 mm->dumpable = 0; 1474 mm->dumpable = 0;
1445 init_completion(&mm->core_done); 1475 init_completion(&mm->core_done);
1446 spin_lock_irq(&current->sighand->siglock); 1476 spin_lock_irq(&current->sighand->siglock);
@@ -1466,7 +1496,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
1466 lock_kernel(); 1496 lock_kernel();
1467 format_corename(corename, core_pattern, signr); 1497 format_corename(corename, core_pattern, signr);
1468 unlock_kernel(); 1498 unlock_kernel();
1469 file = filp_open(corename, O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE, 0600); 1499 file = filp_open(corename, O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, 0600);
1470 if (IS_ERR(file)) 1500 if (IS_ERR(file))
1471 goto fail_unlock; 1501 goto fail_unlock;
1472 inode = file->f_dentry->d_inode; 1502 inode = file->f_dentry->d_inode;
@@ -1491,6 +1521,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
1491close_fail: 1521close_fail:
1492 filp_close(file, NULL); 1522 filp_close(file, NULL);
1493fail_unlock: 1523fail_unlock:
1524 current->fsuid = fsuid;
1494 complete_all(&mm->core_done); 1525 complete_all(&mm->core_done);
1495fail: 1526fail:
1496 return retval; 1527 return retval;
diff --git a/fs/ext2/Makefile b/fs/ext2/Makefile
index ee240a14e70f..c5d02da73bc3 100644
--- a/fs/ext2/Makefile
+++ b/fs/ext2/Makefile
@@ -10,3 +10,4 @@ ext2-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
10ext2-$(CONFIG_EXT2_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o 10ext2-$(CONFIG_EXT2_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
11ext2-$(CONFIG_EXT2_FS_POSIX_ACL) += acl.o 11ext2-$(CONFIG_EXT2_FS_POSIX_ACL) += acl.o
12ext2-$(CONFIG_EXT2_FS_SECURITY) += xattr_security.o 12ext2-$(CONFIG_EXT2_FS_SECURITY) += xattr_security.o
13ext2-$(CONFIG_EXT2_FS_XIP) += xip.o
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index 25f4a64fd6bc..213148c36ebe 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -396,12 +396,12 @@ static size_t
396ext2_xattr_list_acl_access(struct inode *inode, char *list, size_t list_size, 396ext2_xattr_list_acl_access(struct inode *inode, char *list, size_t list_size,
397 const char *name, size_t name_len) 397 const char *name, size_t name_len)
398{ 398{
399 const size_t size = sizeof(XATTR_NAME_ACL_ACCESS); 399 const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
400 400
401 if (!test_opt(inode->i_sb, POSIX_ACL)) 401 if (!test_opt(inode->i_sb, POSIX_ACL))
402 return 0; 402 return 0;
403 if (list && size <= list_size) 403 if (list && size <= list_size)
404 memcpy(list, XATTR_NAME_ACL_ACCESS, size); 404 memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
405 return size; 405 return size;
406} 406}
407 407
@@ -409,12 +409,12 @@ static size_t
409ext2_xattr_list_acl_default(struct inode *inode, char *list, size_t list_size, 409ext2_xattr_list_acl_default(struct inode *inode, char *list, size_t list_size,
410 const char *name, size_t name_len) 410 const char *name, size_t name_len)
411{ 411{
412 const size_t size = sizeof(XATTR_NAME_ACL_DEFAULT); 412 const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
413 413
414 if (!test_opt(inode->i_sb, POSIX_ACL)) 414 if (!test_opt(inode->i_sb, POSIX_ACL))
415 return 0; 415 return 0;
416 if (list && size <= list_size) 416 if (list && size <= list_size)
417 memcpy(list, XATTR_NAME_ACL_DEFAULT, size); 417 memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
418 return size; 418 return size;
419} 419}
420 420
@@ -506,14 +506,14 @@ ext2_xattr_set_acl_default(struct inode *inode, const char *name,
506} 506}
507 507
508struct xattr_handler ext2_xattr_acl_access_handler = { 508struct xattr_handler ext2_xattr_acl_access_handler = {
509 .prefix = XATTR_NAME_ACL_ACCESS, 509 .prefix = POSIX_ACL_XATTR_ACCESS,
510 .list = ext2_xattr_list_acl_access, 510 .list = ext2_xattr_list_acl_access,
511 .get = ext2_xattr_get_acl_access, 511 .get = ext2_xattr_get_acl_access,
512 .set = ext2_xattr_set_acl_access, 512 .set = ext2_xattr_set_acl_access,
513}; 513};
514 514
515struct xattr_handler ext2_xattr_acl_default_handler = { 515struct xattr_handler ext2_xattr_acl_default_handler = {
516 .prefix = XATTR_NAME_ACL_DEFAULT, 516 .prefix = POSIX_ACL_XATTR_DEFAULT,
517 .list = ext2_xattr_list_acl_default, 517 .list = ext2_xattr_list_acl_default,
518 .get = ext2_xattr_get_acl_default, 518 .get = ext2_xattr_get_acl_default,
519 .set = ext2_xattr_set_acl_default, 519 .set = ext2_xattr_set_acl_default,
diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h
index fed96ae81a7d..0bde85bafe38 100644
--- a/fs/ext2/acl.h
+++ b/fs/ext2/acl.h
@@ -4,7 +4,7 @@
4 (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org> 4 (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org>
5*/ 5*/
6 6
7#include <linux/xattr_acl.h> 7#include <linux/posix_acl_xattr.h>
8 8
9#define EXT2_ACL_VERSION 0x0001 9#define EXT2_ACL_VERSION 0x0001
10 10
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 8f0fd726c3f1..e977f8566d14 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -2,6 +2,15 @@
2#include <linux/ext2_fs.h> 2#include <linux/ext2_fs.h>
3 3
4/* 4/*
5 * ext2 mount options
6 */
7struct ext2_mount_options {
8 unsigned long s_mount_opt;
9 uid_t s_resuid;
10 gid_t s_resgid;
11};
12
13/*
5 * second extended file system inode data in memory 14 * second extended file system inode data in memory
6 */ 15 */
7struct ext2_inode_info { 16struct ext2_inode_info {
@@ -147,9 +156,11 @@ extern struct file_operations ext2_dir_operations;
147/* file.c */ 156/* file.c */
148extern struct inode_operations ext2_file_inode_operations; 157extern struct inode_operations ext2_file_inode_operations;
149extern struct file_operations ext2_file_operations; 158extern struct file_operations ext2_file_operations;
159extern struct file_operations ext2_xip_file_operations;
150 160
151/* inode.c */ 161/* inode.c */
152extern struct address_space_operations ext2_aops; 162extern struct address_space_operations ext2_aops;
163extern struct address_space_operations ext2_aops_xip;
153extern struct address_space_operations ext2_nobh_aops; 164extern struct address_space_operations ext2_nobh_aops;
154 165
155/* namei.c */ 166/* namei.c */
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index f5e86141ec54..a484412fc782 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -55,6 +55,20 @@ struct file_operations ext2_file_operations = {
55 .sendfile = generic_file_sendfile, 55 .sendfile = generic_file_sendfile,
56}; 56};
57 57
58#ifdef CONFIG_EXT2_FS_XIP
59struct file_operations ext2_xip_file_operations = {
60 .llseek = generic_file_llseek,
61 .read = xip_file_read,
62 .write = xip_file_write,
63 .ioctl = ext2_ioctl,
64 .mmap = xip_file_mmap,
65 .open = generic_file_open,
66 .release = ext2_release_file,
67 .fsync = ext2_sync_file,
68 .sendfile = xip_file_sendfile,
69};
70#endif
71
58struct inode_operations ext2_file_inode_operations = { 72struct inode_operations ext2_file_inode_operations = {
59 .truncate = ext2_truncate, 73 .truncate = ext2_truncate,
60#ifdef CONFIG_EXT2_FS_XATTR 74#ifdef CONFIG_EXT2_FS_XATTR
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index 77e059149212..161f156d98c8 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -612,6 +612,7 @@ got:
612 err = ext2_init_acl(inode, dir); 612 err = ext2_init_acl(inode, dir);
613 if (err) { 613 if (err) {
614 DQUOT_FREE_INODE(inode); 614 DQUOT_FREE_INODE(inode);
615 DQUOT_DROP(inode);
615 goto fail2; 616 goto fail2;
616 } 617 }
617 mark_inode_dirty(inode); 618 mark_inode_dirty(inode);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index a50d9db4b6e4..53dceb0c6593 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -33,6 +33,7 @@
33#include <linux/mpage.h> 33#include <linux/mpage.h>
34#include "ext2.h" 34#include "ext2.h"
35#include "acl.h" 35#include "acl.h"
36#include "xip.h"
36 37
37MODULE_AUTHOR("Remy Card and others"); 38MODULE_AUTHOR("Remy Card and others");
38MODULE_DESCRIPTION("Second Extended Filesystem"); 39MODULE_DESCRIPTION("Second Extended Filesystem");
@@ -594,6 +595,16 @@ out:
594 if (err) 595 if (err)
595 goto cleanup; 596 goto cleanup;
596 597
598 if (ext2_use_xip(inode->i_sb)) {
599 /*
600 * we need to clear the block
601 */
602 err = ext2_clear_xip_target (inode,
603 le32_to_cpu(chain[depth-1].key));
604 if (err)
605 goto cleanup;
606 }
607
597 if (ext2_splice_branch(inode, iblock, chain, partial, left) < 0) 608 if (ext2_splice_branch(inode, iblock, chain, partial, left) < 0)
598 goto changed; 609 goto changed;
599 610
@@ -691,6 +702,11 @@ struct address_space_operations ext2_aops = {
691 .writepages = ext2_writepages, 702 .writepages = ext2_writepages,
692}; 703};
693 704
705struct address_space_operations ext2_aops_xip = {
706 .bmap = ext2_bmap,
707 .get_xip_page = ext2_get_xip_page,
708};
709
694struct address_space_operations ext2_nobh_aops = { 710struct address_space_operations ext2_nobh_aops = {
695 .readpage = ext2_readpage, 711 .readpage = ext2_readpage,
696 .readpages = ext2_readpages, 712 .readpages = ext2_readpages,
@@ -910,7 +926,9 @@ void ext2_truncate (struct inode * inode)
910 iblock = (inode->i_size + blocksize-1) 926 iblock = (inode->i_size + blocksize-1)
911 >> EXT2_BLOCK_SIZE_BITS(inode->i_sb); 927 >> EXT2_BLOCK_SIZE_BITS(inode->i_sb);
912 928
913 if (test_opt(inode->i_sb, NOBH)) 929 if (mapping_is_xip(inode->i_mapping))
930 xip_truncate_page(inode->i_mapping, inode->i_size);
931 else if (test_opt(inode->i_sb, NOBH))
914 nobh_truncate_page(inode->i_mapping, inode->i_size); 932 nobh_truncate_page(inode->i_mapping, inode->i_size);
915 else 933 else
916 block_truncate_page(inode->i_mapping, 934 block_truncate_page(inode->i_mapping,
@@ -1110,11 +1128,16 @@ void ext2_read_inode (struct inode * inode)
1110 1128
1111 if (S_ISREG(inode->i_mode)) { 1129 if (S_ISREG(inode->i_mode)) {
1112 inode->i_op = &ext2_file_inode_operations; 1130 inode->i_op = &ext2_file_inode_operations;
1113 inode->i_fop = &ext2_file_operations; 1131 if (ext2_use_xip(inode->i_sb)) {
1114 if (test_opt(inode->i_sb, NOBH)) 1132 inode->i_mapping->a_ops = &ext2_aops_xip;
1133 inode->i_fop = &ext2_xip_file_operations;
1134 } else if (test_opt(inode->i_sb, NOBH)) {
1115 inode->i_mapping->a_ops = &ext2_nobh_aops; 1135 inode->i_mapping->a_ops = &ext2_nobh_aops;
1116 else 1136 inode->i_fop = &ext2_file_operations;
1137 } else {
1117 inode->i_mapping->a_ops = &ext2_aops; 1138 inode->i_mapping->a_ops = &ext2_aops;
1139 inode->i_fop = &ext2_file_operations;
1140 }
1118 } else if (S_ISDIR(inode->i_mode)) { 1141 } else if (S_ISDIR(inode->i_mode)) {
1119 inode->i_op = &ext2_dir_inode_operations; 1142 inode->i_op = &ext2_dir_inode_operations;
1120 inode->i_fop = &ext2_dir_operations; 1143 inode->i_fop = &ext2_dir_operations;
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 3176b3d3ffa8..c5513953c825 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -34,6 +34,7 @@
34#include "ext2.h" 34#include "ext2.h"
35#include "xattr.h" 35#include "xattr.h"
36#include "acl.h" 36#include "acl.h"
37#include "xip.h"
37 38
38/* 39/*
39 * Couple of helper functions - make the code slightly cleaner. 40 * Couple of helper functions - make the code slightly cleaner.
@@ -127,11 +128,16 @@ static int ext2_create (struct inode * dir, struct dentry * dentry, int mode, st
127 int err = PTR_ERR(inode); 128 int err = PTR_ERR(inode);
128 if (!IS_ERR(inode)) { 129 if (!IS_ERR(inode)) {
129 inode->i_op = &ext2_file_inode_operations; 130 inode->i_op = &ext2_file_inode_operations;
130 inode->i_fop = &ext2_file_operations; 131 if (ext2_use_xip(inode->i_sb)) {
131 if (test_opt(inode->i_sb, NOBH)) 132 inode->i_mapping->a_ops = &ext2_aops_xip;
133 inode->i_fop = &ext2_xip_file_operations;
134 } else if (test_opt(inode->i_sb, NOBH)) {
132 inode->i_mapping->a_ops = &ext2_nobh_aops; 135 inode->i_mapping->a_ops = &ext2_nobh_aops;
133 else 136 inode->i_fop = &ext2_file_operations;
137 } else {
134 inode->i_mapping->a_ops = &ext2_aops; 138 inode->i_mapping->a_ops = &ext2_aops;
139 inode->i_fop = &ext2_file_operations;
140 }
135 mark_inode_dirty(inode); 141 mark_inode_dirty(inode);
136 err = ext2_add_nondir(dentry, inode); 142 err = ext2_add_nondir(dentry, inode);
137 } 143 }
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 661c3d98d946..dcfe331dc4c4 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -31,6 +31,7 @@
31#include "ext2.h" 31#include "ext2.h"
32#include "xattr.h" 32#include "xattr.h"
33#include "acl.h" 33#include "acl.h"
34#include "xip.h"
34 35
35static void ext2_sync_super(struct super_block *sb, 36static void ext2_sync_super(struct super_block *sb,
36 struct ext2_super_block *es); 37 struct ext2_super_block *es);
@@ -257,7 +258,7 @@ enum {
257 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, 258 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
258 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, 259 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
259 Opt_nouid32, Opt_check, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, Opt_nobh, 260 Opt_nouid32, Opt_check, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, Opt_nobh,
260 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, 261 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, Opt_xip,
261 Opt_ignore, Opt_err, 262 Opt_ignore, Opt_err,
262}; 263};
263 264
@@ -286,6 +287,7 @@ static match_table_t tokens = {
286 {Opt_nouser_xattr, "nouser_xattr"}, 287 {Opt_nouser_xattr, "nouser_xattr"},
287 {Opt_acl, "acl"}, 288 {Opt_acl, "acl"},
288 {Opt_noacl, "noacl"}, 289 {Opt_noacl, "noacl"},
290 {Opt_xip, "xip"},
289 {Opt_ignore, "grpquota"}, 291 {Opt_ignore, "grpquota"},
290 {Opt_ignore, "noquota"}, 292 {Opt_ignore, "noquota"},
291 {Opt_ignore, "quota"}, 293 {Opt_ignore, "quota"},
@@ -397,6 +399,13 @@ static int parse_options (char * options,
397 printk("EXT2 (no)acl options not supported\n"); 399 printk("EXT2 (no)acl options not supported\n");
398 break; 400 break;
399#endif 401#endif
402 case Opt_xip:
403#ifdef CONFIG_EXT2_FS_XIP
404 set_opt (sbi->s_mount_opt, XIP);
405#else
406 printk("EXT2 xip option not supported\n");
407#endif
408 break;
400 case Opt_ignore: 409 case Opt_ignore:
401 break; 410 break;
402 default: 411 default:
@@ -640,6 +649,9 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
640 ((EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? 649 ((EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ?
641 MS_POSIXACL : 0); 650 MS_POSIXACL : 0);
642 651
652 ext2_xip_verify_sb(sb); /* see if bdev supports xip, unset
653 EXT2_MOUNT_XIP if not */
654
643 if (le32_to_cpu(es->s_rev_level) == EXT2_GOOD_OLD_REV && 655 if (le32_to_cpu(es->s_rev_level) == EXT2_GOOD_OLD_REV &&
644 (EXT2_HAS_COMPAT_FEATURE(sb, ~0U) || 656 (EXT2_HAS_COMPAT_FEATURE(sb, ~0U) ||
645 EXT2_HAS_RO_COMPAT_FEATURE(sb, ~0U) || 657 EXT2_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
@@ -668,6 +680,13 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
668 680
669 blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size); 681 blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
670 682
683 if ((ext2_use_xip(sb)) && ((blocksize != PAGE_SIZE) ||
684 (sb->s_blocksize != blocksize))) {
685 if (!silent)
686 printk("XIP: Unsupported blocksize\n");
687 goto failed_mount;
688 }
689
671 /* If the blocksize doesn't match, re-read the thing.. */ 690 /* If the blocksize doesn't match, re-read the thing.. */
672 if (sb->s_blocksize != blocksize) { 691 if (sb->s_blocksize != blocksize) {
673 brelse(bh); 692 brelse(bh);
@@ -916,17 +935,34 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
916{ 935{
917 struct ext2_sb_info * sbi = EXT2_SB(sb); 936 struct ext2_sb_info * sbi = EXT2_SB(sb);
918 struct ext2_super_block * es; 937 struct ext2_super_block * es;
938 unsigned long old_mount_opt = sbi->s_mount_opt;
939 struct ext2_mount_options old_opts;
940 unsigned long old_sb_flags;
941 int err;
942
943 /* Store the old options */
944 old_sb_flags = sb->s_flags;
945 old_opts.s_mount_opt = sbi->s_mount_opt;
946 old_opts.s_resuid = sbi->s_resuid;
947 old_opts.s_resgid = sbi->s_resgid;
919 948
920 /* 949 /*
921 * Allow the "check" option to be passed as a remount option. 950 * Allow the "check" option to be passed as a remount option.
922 */ 951 */
923 if (!parse_options (data, sbi)) 952 if (!parse_options (data, sbi)) {
924 return -EINVAL; 953 err = -EINVAL;
954 goto restore_opts;
955 }
925 956
926 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 957 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
927 ((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); 958 ((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
928 959
929 es = sbi->s_es; 960 es = sbi->s_es;
961 if (((sbi->s_mount_opt & EXT2_MOUNT_XIP) !=
962 (old_mount_opt & EXT2_MOUNT_XIP)) &&
963 invalidate_inodes(sb))
964 ext2_warning(sb, __FUNCTION__, "busy inodes while remounting "\
965 "xip remain in cache (no functional problem)");
930 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) 966 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
931 return 0; 967 return 0;
932 if (*flags & MS_RDONLY) { 968 if (*flags & MS_RDONLY) {
@@ -946,7 +982,8 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
946 printk("EXT2-fs: %s: couldn't remount RDWR because of " 982 printk("EXT2-fs: %s: couldn't remount RDWR because of "
947 "unsupported optional features (%x).\n", 983 "unsupported optional features (%x).\n",
948 sb->s_id, le32_to_cpu(ret)); 984 sb->s_id, le32_to_cpu(ret));
949 return -EROFS; 985 err = -EROFS;
986 goto restore_opts;
950 } 987 }
951 /* 988 /*
952 * Mounting a RDONLY partition read-write, so reread and 989 * Mounting a RDONLY partition read-write, so reread and
@@ -959,6 +996,12 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
959 } 996 }
960 ext2_sync_super(sb, es); 997 ext2_sync_super(sb, es);
961 return 0; 998 return 0;
999restore_opts:
1000 sbi->s_mount_opt = old_opts.s_mount_opt;
1001 sbi->s_resuid = old_opts.s_resuid;
1002 sbi->s_resgid = old_opts.s_resgid;
1003 sb->s_flags = old_sb_flags;
1004 return err;
962} 1005}
963 1006
964static int ext2_statfs (struct super_block * sb, struct kstatfs * buf) 1007static int ext2_statfs (struct super_block * sb, struct kstatfs * buf)
diff --git a/fs/ext2/symlink.c b/fs/ext2/symlink.c
index 9f7bac01d557..1e67d87cfa91 100644
--- a/fs/ext2/symlink.c
+++ b/fs/ext2/symlink.c
@@ -21,11 +21,11 @@
21#include "xattr.h" 21#include "xattr.h"
22#include <linux/namei.h> 22#include <linux/namei.h>
23 23
24static int ext2_follow_link(struct dentry *dentry, struct nameidata *nd) 24static void *ext2_follow_link(struct dentry *dentry, struct nameidata *nd)
25{ 25{
26 struct ext2_inode_info *ei = EXT2_I(dentry->d_inode); 26 struct ext2_inode_info *ei = EXT2_I(dentry->d_inode);
27 nd_set_link(nd, (char *)ei->i_data); 27 nd_set_link(nd, (char *)ei->i_data);
28 return 0; 28 return NULL;
29} 29}
30 30
31struct inode_operations ext2_symlink_inode_operations = { 31struct inode_operations ext2_symlink_inode_operations = {
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index 27982b500e84..0099462d4271 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -823,7 +823,7 @@ cleanup:
823void 823void
824ext2_xattr_put_super(struct super_block *sb) 824ext2_xattr_put_super(struct super_block *sb)
825{ 825{
826 mb_cache_shrink(ext2_xattr_cache, sb->s_bdev); 826 mb_cache_shrink(sb->s_bdev);
827} 827}
828 828
829 829
diff --git a/fs/ext2/xip.c b/fs/ext2/xip.c
new file mode 100644
index 000000000000..ca7f00312388
--- /dev/null
+++ b/fs/ext2/xip.c
@@ -0,0 +1,93 @@
1/*
2 * linux/fs/ext2/xip.c
3 *
4 * Copyright (C) 2005 IBM Corporation
5 * Author: Carsten Otte (cotte@de.ibm.com)
6 */
7
8#include <linux/mm.h>
9#include <linux/fs.h>
10#include <linux/genhd.h>
11#include <linux/buffer_head.h>
12#include <linux/ext2_fs_sb.h>
13#include <linux/ext2_fs.h>
14#include "ext2.h"
15#include "xip.h"
16
17static inline int
18__inode_direct_access(struct inode *inode, sector_t sector,
19 unsigned long *data)
20{
21 BUG_ON(!inode->i_sb->s_bdev->bd_disk->fops->direct_access);
22 return inode->i_sb->s_bdev->bd_disk->fops
23 ->direct_access(inode->i_sb->s_bdev,sector,data);
24}
25
26static inline int
27__ext2_get_sector(struct inode *inode, sector_t offset, int create,
28 sector_t *result)
29{
30 struct buffer_head tmp;
31 int rc;
32
33 memset(&tmp, 0, sizeof(struct buffer_head));
34 rc = ext2_get_block(inode, offset/ (PAGE_SIZE/512), &tmp,
35 create);
36 *result = tmp.b_blocknr;
37
38 /* did we get a sparse block (hole in the file)? */
39 if (!tmp.b_blocknr && !rc) {
40 BUG_ON(create);
41 rc = -ENODATA;
42 }
43
44 return rc;
45}
46
47int
48ext2_clear_xip_target(struct inode *inode, int block)
49{
50 sector_t sector = block * (PAGE_SIZE/512);
51 unsigned long data;
52 int rc;
53
54 rc = __inode_direct_access(inode, sector, &data);
55 if (!rc)
56 clear_page((void*)data);
57 return rc;
58}
59
60void ext2_xip_verify_sb(struct super_block *sb)
61{
62 struct ext2_sb_info *sbi = EXT2_SB(sb);
63
64 if ((sbi->s_mount_opt & EXT2_MOUNT_XIP) &&
65 !sb->s_bdev->bd_disk->fops->direct_access) {
66 sbi->s_mount_opt &= (~EXT2_MOUNT_XIP);
67 ext2_warning(sb, __FUNCTION__,
68 "ignoring xip option - not supported by bdev");
69 }
70}
71
72struct page *
73ext2_get_xip_page(struct address_space *mapping, sector_t offset,
74 int create)
75{
76 int rc;
77 unsigned long data;
78 sector_t sector;
79
80 /* first, retrieve the sector number */
81 rc = __ext2_get_sector(mapping->host, offset, create, &sector);
82 if (rc)
83 goto error;
84
85 /* retrieve address of the target data */
86 rc = __inode_direct_access
87 (mapping->host, sector * (PAGE_SIZE/512), &data);
88 if (!rc)
89 return virt_to_page(data);
90
91 error:
92 return ERR_PTR(rc);
93}
diff --git a/fs/ext2/xip.h b/fs/ext2/xip.h
new file mode 100644
index 000000000000..aa85331d6c56
--- /dev/null
+++ b/fs/ext2/xip.h
@@ -0,0 +1,25 @@
1/*
2 * linux/fs/ext2/xip.h
3 *
4 * Copyright (C) 2005 IBM Corporation
5 * Author: Carsten Otte (cotte@de.ibm.com)
6 */
7
8#ifdef CONFIG_EXT2_FS_XIP
9extern void ext2_xip_verify_sb (struct super_block *);
10extern int ext2_clear_xip_target (struct inode *, int);
11
12static inline int ext2_use_xip (struct super_block *sb)
13{
14 struct ext2_sb_info *sbi = EXT2_SB(sb);
15 return (sbi->s_mount_opt & EXT2_MOUNT_XIP);
16}
17struct page* ext2_get_xip_page (struct address_space *, sector_t, int);
18#define mapping_is_xip(map) unlikely(map->a_ops->get_xip_page)
19#else
20#define mapping_is_xip(map) 0
21#define ext2_xip_verify_sb(sb) do { } while (0)
22#define ext2_use_xip(sb) 0
23#define ext2_clear_xip_target(inode, chain) 0
24#define ext2_get_xip_page NULL
25#endif
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index 638c13a26c03..3ac38266fc9e 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -393,7 +393,8 @@ ext3_acl_chmod(struct inode *inode)
393 int retries = 0; 393 int retries = 0;
394 394
395 retry: 395 retry:
396 handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS); 396 handle = ext3_journal_start(inode,
397 EXT3_DATA_TRANS_BLOCKS(inode->i_sb));
397 if (IS_ERR(handle)) { 398 if (IS_ERR(handle)) {
398 error = PTR_ERR(handle); 399 error = PTR_ERR(handle);
399 ext3_std_error(inode->i_sb, error); 400 ext3_std_error(inode->i_sb, error);
@@ -417,12 +418,12 @@ static size_t
417ext3_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len, 418ext3_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len,
418 const char *name, size_t name_len) 419 const char *name, size_t name_len)
419{ 420{
420 const size_t size = sizeof(XATTR_NAME_ACL_ACCESS); 421 const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
421 422
422 if (!test_opt(inode->i_sb, POSIX_ACL)) 423 if (!test_opt(inode->i_sb, POSIX_ACL))
423 return 0; 424 return 0;
424 if (list && size <= list_len) 425 if (list && size <= list_len)
425 memcpy(list, XATTR_NAME_ACL_ACCESS, size); 426 memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
426 return size; 427 return size;
427} 428}
428 429
@@ -430,12 +431,12 @@ static size_t
430ext3_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len, 431ext3_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len,
431 const char *name, size_t name_len) 432 const char *name, size_t name_len)
432{ 433{
433 const size_t size = sizeof(XATTR_NAME_ACL_DEFAULT); 434 const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
434 435
435 if (!test_opt(inode->i_sb, POSIX_ACL)) 436 if (!test_opt(inode->i_sb, POSIX_ACL))
436 return 0; 437 return 0;
437 if (list && size <= list_len) 438 if (list && size <= list_len)
438 memcpy(list, XATTR_NAME_ACL_DEFAULT, size); 439 memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
439 return size; 440 return size;
440} 441}
441 442
@@ -503,7 +504,7 @@ ext3_xattr_set_acl(struct inode *inode, int type, const void *value,
503 acl = NULL; 504 acl = NULL;
504 505
505retry: 506retry:
506 handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS); 507 handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS(inode->i_sb));
507 if (IS_ERR(handle)) 508 if (IS_ERR(handle))
508 return PTR_ERR(handle); 509 return PTR_ERR(handle);
509 error = ext3_set_acl(handle, inode, type, acl); 510 error = ext3_set_acl(handle, inode, type, acl);
@@ -535,14 +536,14 @@ ext3_xattr_set_acl_default(struct inode *inode, const char *name,
535} 536}
536 537
537struct xattr_handler ext3_xattr_acl_access_handler = { 538struct xattr_handler ext3_xattr_acl_access_handler = {
538 .prefix = XATTR_NAME_ACL_ACCESS, 539 .prefix = POSIX_ACL_XATTR_ACCESS,
539 .list = ext3_xattr_list_acl_access, 540 .list = ext3_xattr_list_acl_access,
540 .get = ext3_xattr_get_acl_access, 541 .get = ext3_xattr_get_acl_access,
541 .set = ext3_xattr_set_acl_access, 542 .set = ext3_xattr_set_acl_access,
542}; 543};
543 544
544struct xattr_handler ext3_xattr_acl_default_handler = { 545struct xattr_handler ext3_xattr_acl_default_handler = {
545 .prefix = XATTR_NAME_ACL_DEFAULT, 546 .prefix = POSIX_ACL_XATTR_DEFAULT,
546 .list = ext3_xattr_list_acl_default, 547 .list = ext3_xattr_list_acl_default,
547 .get = ext3_xattr_get_acl_default, 548 .get = ext3_xattr_get_acl_default,
548 .set = ext3_xattr_set_acl_default, 549 .set = ext3_xattr_set_acl_default,
diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h
index 98af0c0d0ba9..92d50b53a933 100644
--- a/fs/ext3/acl.h
+++ b/fs/ext3/acl.h
@@ -4,7 +4,7 @@
4 (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org> 4 (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org>
5*/ 5*/
6 6
7#include <linux/xattr_acl.h> 7#include <linux/posix_acl_xattr.h>
8 8
9#define EXT3_ACL_VERSION 0x0001 9#define EXT3_ACL_VERSION 0x0001
10 10
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index ccd632fcc6d8..e463dca008e4 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -749,24 +749,24 @@ fail_access:
749 * to find a free region that is of my size and has not 749 * to find a free region that is of my size and has not
750 * been reserved. 750 * been reserved.
751 * 751 *
752 * on succeed, it returns the reservation window to be appended to.
753 * failed, return NULL.
754 */ 752 */
755static struct ext3_reserve_window_node *find_next_reservable_window( 753static int find_next_reservable_window(
756 struct ext3_reserve_window_node *search_head, 754 struct ext3_reserve_window_node *search_head,
757 unsigned long size, int *start_block, 755 struct ext3_reserve_window_node *my_rsv,
756 struct super_block * sb, int start_block,
758 int last_block) 757 int last_block)
759{ 758{
760 struct rb_node *next; 759 struct rb_node *next;
761 struct ext3_reserve_window_node *rsv, *prev; 760 struct ext3_reserve_window_node *rsv, *prev;
762 int cur; 761 int cur;
762 int size = my_rsv->rsv_goal_size;
763 763
764 /* TODO: make the start of the reservation window byte-aligned */ 764 /* TODO: make the start of the reservation window byte-aligned */
765 /* cur = *start_block & ~7;*/ 765 /* cur = *start_block & ~7;*/
766 cur = *start_block; 766 cur = start_block;
767 rsv = search_head; 767 rsv = search_head;
768 if (!rsv) 768 if (!rsv)
769 return NULL; 769 return -1;
770 770
771 while (1) { 771 while (1) {
772 if (cur <= rsv->rsv_end) 772 if (cur <= rsv->rsv_end)
@@ -782,11 +782,11 @@ static struct ext3_reserve_window_node *find_next_reservable_window(
782 * space with expected-size (or more)... 782 * space with expected-size (or more)...
783 */ 783 */
784 if (cur > last_block) 784 if (cur > last_block)
785 return NULL; /* fail */ 785 return -1; /* fail */
786 786
787 prev = rsv; 787 prev = rsv;
788 next = rb_next(&rsv->rsv_node); 788 next = rb_next(&rsv->rsv_node);
789 rsv = list_entry(next, struct ext3_reserve_window_node, rsv_node); 789 rsv = list_entry(next,struct ext3_reserve_window_node,rsv_node);
790 790
791 /* 791 /*
792 * Reached the last reservation, we can just append to the 792 * Reached the last reservation, we can just append to the
@@ -813,8 +813,25 @@ static struct ext3_reserve_window_node *find_next_reservable_window(
813 * return the reservation window that we could append to. 813 * return the reservation window that we could append to.
814 * succeed. 814 * succeed.
815 */ 815 */
816 *start_block = cur; 816
817 return prev; 817 if ((prev != my_rsv) && (!rsv_is_empty(&my_rsv->rsv_window)))
818 rsv_window_remove(sb, my_rsv);
819
820 /*
821 * Let's book the whole avaliable window for now. We will check the
822 * disk bitmap later and then, if there are free blocks then we adjust
823 * the window size if it's larger than requested.
824 * Otherwise, we will remove this node from the tree next time
825 * call find_next_reservable_window.
826 */
827 my_rsv->rsv_start = cur;
828 my_rsv->rsv_end = cur + size - 1;
829 my_rsv->rsv_alloc_hit = 0;
830
831 if (prev != my_rsv)
832 ext3_rsv_window_add(sb, my_rsv);
833
834 return 0;
818} 835}
819 836
820/** 837/**
@@ -852,6 +869,7 @@ static struct ext3_reserve_window_node *find_next_reservable_window(
852 * @sb: the super block 869 * @sb: the super block
853 * @group: the group we are trying to allocate in 870 * @group: the group we are trying to allocate in
854 * @bitmap_bh: the block group block bitmap 871 * @bitmap_bh: the block group block bitmap
872 *
855 */ 873 */
856static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv, 874static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv,
857 int goal, struct super_block *sb, 875 int goal, struct super_block *sb,
@@ -860,10 +878,10 @@ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv,
860 struct ext3_reserve_window_node *search_head; 878 struct ext3_reserve_window_node *search_head;
861 int group_first_block, group_end_block, start_block; 879 int group_first_block, group_end_block, start_block;
862 int first_free_block; 880 int first_free_block;
863 int reservable_space_start;
864 struct ext3_reserve_window_node *prev_rsv;
865 struct rb_root *fs_rsv_root = &EXT3_SB(sb)->s_rsv_window_root; 881 struct rb_root *fs_rsv_root = &EXT3_SB(sb)->s_rsv_window_root;
866 unsigned long size; 882 unsigned long size;
883 int ret;
884 spinlock_t *rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock;
867 885
868 group_first_block = le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block) + 886 group_first_block = le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block) +
869 group * EXT3_BLOCKS_PER_GROUP(sb); 887 group * EXT3_BLOCKS_PER_GROUP(sb);
@@ -875,6 +893,7 @@ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv,
875 start_block = goal + group_first_block; 893 start_block = goal + group_first_block;
876 894
877 size = my_rsv->rsv_goal_size; 895 size = my_rsv->rsv_goal_size;
896
878 if (!rsv_is_empty(&my_rsv->rsv_window)) { 897 if (!rsv_is_empty(&my_rsv->rsv_window)) {
879 /* 898 /*
880 * if the old reservation is cross group boundary 899 * if the old reservation is cross group boundary
@@ -908,6 +927,8 @@ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv,
908 my_rsv->rsv_goal_size= size; 927 my_rsv->rsv_goal_size= size;
909 } 928 }
910 } 929 }
930
931 spin_lock(rsv_lock);
911 /* 932 /*
912 * shift the search start to the window near the goal block 933 * shift the search start to the window near the goal block
913 */ 934 */
@@ -921,11 +942,16 @@ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv,
921 * need to check the bitmap after we found a reservable window. 942 * need to check the bitmap after we found a reservable window.
922 */ 943 */
923retry: 944retry:
924 prev_rsv = find_next_reservable_window(search_head, size, 945 ret = find_next_reservable_window(search_head, my_rsv, sb,
925 &start_block, group_end_block); 946 start_block, group_end_block);
926 if (prev_rsv == NULL) 947
927 goto failed; 948 if (ret == -1) {
928 reservable_space_start = start_block; 949 if (!rsv_is_empty(&my_rsv->rsv_window))
950 rsv_window_remove(sb, my_rsv);
951 spin_unlock(rsv_lock);
952 return -1;
953 }
954
929 /* 955 /*
930 * On success, find_next_reservable_window() returns the 956 * On success, find_next_reservable_window() returns the
931 * reservation window where there is a reservable space after it. 957 * reservation window where there is a reservable space after it.
@@ -937,8 +963,9 @@ retry:
937 * block. Search start from the start block of the reservable space 963 * block. Search start from the start block of the reservable space
938 * we just found. 964 * we just found.
939 */ 965 */
966 spin_unlock(rsv_lock);
940 first_free_block = bitmap_search_next_usable_block( 967 first_free_block = bitmap_search_next_usable_block(
941 reservable_space_start - group_first_block, 968 my_rsv->rsv_start - group_first_block,
942 bitmap_bh, group_end_block - group_first_block + 1); 969 bitmap_bh, group_end_block - group_first_block + 1);
943 970
944 if (first_free_block < 0) { 971 if (first_free_block < 0) {
@@ -946,54 +973,29 @@ retry:
946 * no free block left on the bitmap, no point 973 * no free block left on the bitmap, no point
947 * to reserve the space. return failed. 974 * to reserve the space. return failed.
948 */ 975 */
949 goto failed; 976 spin_lock(rsv_lock);
977 if (!rsv_is_empty(&my_rsv->rsv_window))
978 rsv_window_remove(sb, my_rsv);
979 spin_unlock(rsv_lock);
980 return -1; /* failed */
950 } 981 }
982
951 start_block = first_free_block + group_first_block; 983 start_block = first_free_block + group_first_block;
952 /* 984 /*
953 * check if the first free block is within the 985 * check if the first free block is within the
954 * free space we just found 986 * free space we just reserved
955 */ 987 */
956 if ((start_block >= reservable_space_start) && 988 if (start_block >= my_rsv->rsv_start && start_block < my_rsv->rsv_end)
957 (start_block < reservable_space_start + size)) 989 return 0; /* success */
958 goto found_rsv_window;
959 /* 990 /*
960 * if the first free bit we found is out of the reservable space 991 * if the first free bit we found is out of the reservable space
961 * this means there is no free block on the reservable space 992 * continue search for next reservable space,
962 * we should continue search for next reservable space,
963 * start from where the free block is, 993 * start from where the free block is,
964 * we also shift the list head to where we stopped last time 994 * we also shift the list head to where we stopped last time
965 */ 995 */
966 search_head = prev_rsv; 996 search_head = my_rsv;
997 spin_lock(rsv_lock);
967 goto retry; 998 goto retry;
968
969found_rsv_window:
970 /*
971 * great! the reservable space contains some free blocks.
972 * if the search returns that we should add the new
973 * window just next to where the old window, we don't
974 * need to remove the old window first then add it to the
975 * same place, just update the new start and new end.
976 */
977 if (my_rsv != prev_rsv) {
978 if (!rsv_is_empty(&my_rsv->rsv_window))
979 rsv_window_remove(sb, my_rsv);
980 }
981 my_rsv->rsv_start = reservable_space_start;
982 my_rsv->rsv_end = my_rsv->rsv_start + size - 1;
983 my_rsv->rsv_alloc_hit = 0;
984 if (my_rsv != prev_rsv) {
985 ext3_rsv_window_add(sb, my_rsv);
986 }
987 return 0; /* succeed */
988failed:
989 /*
990 * failed to find a new reservation window in the current
991 * group, remove the current(stale) reservation window
992 * if there is any
993 */
994 if (!rsv_is_empty(&my_rsv->rsv_window))
995 rsv_window_remove(sb, my_rsv);
996 return -1; /* failed */
997} 999}
998 1000
999/* 1001/*
@@ -1023,7 +1025,6 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
1023 int goal, struct ext3_reserve_window_node * my_rsv, 1025 int goal, struct ext3_reserve_window_node * my_rsv,
1024 int *errp) 1026 int *errp)
1025{ 1027{
1026 spinlock_t *rsv_lock;
1027 unsigned long group_first_block; 1028 unsigned long group_first_block;
1028 int ret = 0; 1029 int ret = 0;
1029 int fatal; 1030 int fatal;
@@ -1052,7 +1053,6 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
1052 ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, goal, NULL); 1053 ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, goal, NULL);
1053 goto out; 1054 goto out;
1054 } 1055 }
1055 rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock;
1056 /* 1056 /*
1057 * goal is a group relative block number (if there is a goal) 1057 * goal is a group relative block number (if there is a goal)
1058 * 0 < goal < EXT3_BLOCKS_PER_GROUP(sb) 1058 * 0 < goal < EXT3_BLOCKS_PER_GROUP(sb)
@@ -1078,30 +1078,21 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
1078 * then we could go to allocate from the reservation window directly. 1078 * then we could go to allocate from the reservation window directly.
1079 */ 1079 */
1080 while (1) { 1080 while (1) {
1081 struct ext3_reserve_window rsv_copy; 1081 if (rsv_is_empty(&my_rsv->rsv_window) || (ret < 0) ||
1082 1082 !goal_in_my_reservation(&my_rsv->rsv_window, goal, group, sb)) {
1083 rsv_copy._rsv_start = my_rsv->rsv_start;
1084 rsv_copy._rsv_end = my_rsv->rsv_end;
1085
1086 if (rsv_is_empty(&rsv_copy) || (ret < 0) ||
1087 !goal_in_my_reservation(&rsv_copy, goal, group, sb)) {
1088 spin_lock(rsv_lock);
1089 ret = alloc_new_reservation(my_rsv, goal, sb, 1083 ret = alloc_new_reservation(my_rsv, goal, sb,
1090 group, bitmap_bh); 1084 group, bitmap_bh);
1091 rsv_copy._rsv_start = my_rsv->rsv_start;
1092 rsv_copy._rsv_end = my_rsv->rsv_end;
1093 spin_unlock(rsv_lock);
1094 if (ret < 0) 1085 if (ret < 0)
1095 break; /* failed */ 1086 break; /* failed */
1096 1087
1097 if (!goal_in_my_reservation(&rsv_copy, goal, group, sb)) 1088 if (!goal_in_my_reservation(&my_rsv->rsv_window, goal, group, sb))
1098 goal = -1; 1089 goal = -1;
1099 } 1090 }
1100 if ((rsv_copy._rsv_start >= group_first_block + EXT3_BLOCKS_PER_GROUP(sb)) 1091 if ((my_rsv->rsv_start >= group_first_block + EXT3_BLOCKS_PER_GROUP(sb))
1101 || (rsv_copy._rsv_end < group_first_block)) 1092 || (my_rsv->rsv_end < group_first_block))
1102 BUG(); 1093 BUG();
1103 ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, goal, 1094 ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, goal,
1104 &rsv_copy); 1095 &my_rsv->rsv_window);
1105 if (ret >= 0) { 1096 if (ret >= 0) {
1106 my_rsv->rsv_alloc_hit++; 1097 my_rsv->rsv_alloc_hit++;
1107 break; /* succeed */ 1098 break; /* succeed */
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index 5ad8cf0292df..98e78345ead9 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -36,7 +36,11 @@ static int ext3_release_file (struct inode * inode, struct file * filp)
36 /* if we are the last writer on the inode, drop the block reservation */ 36 /* if we are the last writer on the inode, drop the block reservation */
37 if ((filp->f_mode & FMODE_WRITE) && 37 if ((filp->f_mode & FMODE_WRITE) &&
38 (atomic_read(&inode->i_writecount) == 1)) 38 (atomic_read(&inode->i_writecount) == 1))
39 {
40 down(&EXT3_I(inode)->truncate_sem);
39 ext3_discard_reservation(inode); 41 ext3_discard_reservation(inode);
42 up(&EXT3_I(inode)->truncate_sem);
43 }
40 if (is_dx(inode) && filp->private_data) 44 if (is_dx(inode) && filp->private_data)
41 ext3_htree_free_dir_info(filp->private_data); 45 ext3_htree_free_dir_info(filp->private_data);
42 46
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 1e6f3ea28713..6981bd014ede 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -604,12 +604,14 @@ got:
604 err = ext3_init_acl(handle, inode, dir); 604 err = ext3_init_acl(handle, inode, dir);
605 if (err) { 605 if (err) {
606 DQUOT_FREE_INODE(inode); 606 DQUOT_FREE_INODE(inode);
607 DQUOT_DROP(inode);
607 goto fail2; 608 goto fail2;
608 } 609 }
609 err = ext3_mark_inode_dirty(handle, inode); 610 err = ext3_mark_inode_dirty(handle, inode);
610 if (err) { 611 if (err) {
611 ext3_std_error(sb, err); 612 ext3_std_error(sb, err);
612 DQUOT_FREE_INODE(inode); 613 DQUOT_FREE_INODE(inode);
614 DQUOT_DROP(inode);
613 goto fail2; 615 goto fail2;
614 } 616 }
615 617
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 0d5fa73b18dc..9989fdcf4d5a 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -128,7 +128,7 @@ static unsigned long blocks_for_truncate(struct inode *inode)
128 if (needed > EXT3_MAX_TRANS_DATA) 128 if (needed > EXT3_MAX_TRANS_DATA)
129 needed = EXT3_MAX_TRANS_DATA; 129 needed = EXT3_MAX_TRANS_DATA;
130 130
131 return EXT3_DATA_TRANS_BLOCKS + needed; 131 return EXT3_DATA_TRANS_BLOCKS(inode->i_sb) + needed;
132} 132}
133 133
134/* 134/*
@@ -2663,7 +2663,7 @@ static int ext3_do_update_inode(handle_t *handle,
2663 } else for (block = 0; block < EXT3_N_BLOCKS; block++) 2663 } else for (block = 0; block < EXT3_N_BLOCKS; block++)
2664 raw_inode->i_block[block] = ei->i_data[block]; 2664 raw_inode->i_block[block] = ei->i_data[block];
2665 2665
2666 if (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) 2666 if (ei->i_extra_isize)
2667 raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); 2667 raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize);
2668 2668
2669 BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); 2669 BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
@@ -2763,7 +2763,8 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
2763 2763
2764 /* (user+group)*(old+new) structure, inode write (sb, 2764 /* (user+group)*(old+new) structure, inode write (sb,
2765 * inode block, ? - but truncate inode update has it) */ 2765 * inode block, ? - but truncate inode update has it) */
2766 handle = ext3_journal_start(inode, 4*EXT3_QUOTA_INIT_BLOCKS+3); 2766 handle = ext3_journal_start(inode, 2*(EXT3_QUOTA_INIT_BLOCKS(inode->i_sb)+
2767 EXT3_QUOTA_DEL_BLOCKS(inode->i_sb))+3);
2767 if (IS_ERR(handle)) { 2768 if (IS_ERR(handle)) {
2768 error = PTR_ERR(handle); 2769 error = PTR_ERR(handle);
2769 goto err_out; 2770 goto err_out;
@@ -2861,7 +2862,7 @@ static int ext3_writepage_trans_blocks(struct inode *inode)
2861#ifdef CONFIG_QUOTA 2862#ifdef CONFIG_QUOTA
2862 /* We know that structure was already allocated during DQUOT_INIT so 2863 /* We know that structure was already allocated during DQUOT_INIT so
2863 * we will be updating only the data blocks + inodes */ 2864 * we will be updating only the data blocks + inodes */
2864 ret += 2*EXT3_QUOTA_TRANS_BLOCKS; 2865 ret += 2*EXT3_QUOTA_TRANS_BLOCKS(inode->i_sb);
2865#endif 2866#endif
2866 2867
2867 return ret; 2868 return ret;
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 79742d824a0a..50378d8ff84b 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -932,8 +932,16 @@ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
932 struct inode *dir = dentry->d_parent->d_inode; 932 struct inode *dir = dentry->d_parent->d_inode;
933 933
934 sb = dir->i_sb; 934 sb = dir->i_sb;
935 if (!(frame = dx_probe(dentry, NULL, &hinfo, frames, err))) 935 /* NFS may look up ".." - look at dx_root directory block */
936 return NULL; 936 if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){
937 if (!(frame = dx_probe(dentry, NULL, &hinfo, frames, err)))
938 return NULL;
939 } else {
940 frame = frames;
941 frame->bh = NULL; /* for dx_release() */
942 frame->at = (struct dx_entry *)frames; /* hack for zero entry*/
943 dx_set_block(frame->at, 0); /* dx_root block is 0 */
944 }
937 hash = hinfo.hash; 945 hash = hinfo.hash;
938 do { 946 do {
939 block = dx_get_block(frame->at); 947 block = dx_get_block(frame->at);
@@ -1637,9 +1645,9 @@ static int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
1637 int err, retries = 0; 1645 int err, retries = 0;
1638 1646
1639retry: 1647retry:
1640 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 1648 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
1641 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 + 1649 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
1642 2*EXT3_QUOTA_INIT_BLOCKS); 1650 2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
1643 if (IS_ERR(handle)) 1651 if (IS_ERR(handle))
1644 return PTR_ERR(handle); 1652 return PTR_ERR(handle);
1645 1653
@@ -1671,9 +1679,9 @@ static int ext3_mknod (struct inode * dir, struct dentry *dentry,
1671 return -EINVAL; 1679 return -EINVAL;
1672 1680
1673retry: 1681retry:
1674 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 1682 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
1675 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 + 1683 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
1676 2*EXT3_QUOTA_INIT_BLOCKS); 1684 2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
1677 if (IS_ERR(handle)) 1685 if (IS_ERR(handle))
1678 return PTR_ERR(handle); 1686 return PTR_ERR(handle);
1679 1687
@@ -1707,9 +1715,9 @@ static int ext3_mkdir(struct inode * dir, struct dentry * dentry, int mode)
1707 return -EMLINK; 1715 return -EMLINK;
1708 1716
1709retry: 1717retry:
1710 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 1718 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
1711 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 + 1719 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
1712 2*EXT3_QUOTA_INIT_BLOCKS); 1720 2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
1713 if (IS_ERR(handle)) 1721 if (IS_ERR(handle))
1714 return PTR_ERR(handle); 1722 return PTR_ERR(handle);
1715 1723
@@ -1998,7 +2006,7 @@ static int ext3_rmdir (struct inode * dir, struct dentry *dentry)
1998 /* Initialize quotas before so that eventual writes go in 2006 /* Initialize quotas before so that eventual writes go in
1999 * separate transaction */ 2007 * separate transaction */
2000 DQUOT_INIT(dentry->d_inode); 2008 DQUOT_INIT(dentry->d_inode);
2001 handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS); 2009 handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb));
2002 if (IS_ERR(handle)) 2010 if (IS_ERR(handle))
2003 return PTR_ERR(handle); 2011 return PTR_ERR(handle);
2004 2012
@@ -2057,7 +2065,7 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry)
2057 /* Initialize quotas before so that eventual writes go 2065 /* Initialize quotas before so that eventual writes go
2058 * in separate transaction */ 2066 * in separate transaction */
2059 DQUOT_INIT(dentry->d_inode); 2067 DQUOT_INIT(dentry->d_inode);
2060 handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS); 2068 handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb));
2061 if (IS_ERR(handle)) 2069 if (IS_ERR(handle))
2062 return PTR_ERR(handle); 2070 return PTR_ERR(handle);
2063 2071
@@ -2112,9 +2120,9 @@ static int ext3_symlink (struct inode * dir,
2112 return -ENAMETOOLONG; 2120 return -ENAMETOOLONG;
2113 2121
2114retry: 2122retry:
2115 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 2123 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
2116 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5 + 2124 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5 +
2117 2*EXT3_QUOTA_INIT_BLOCKS); 2125 2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
2118 if (IS_ERR(handle)) 2126 if (IS_ERR(handle))
2119 return PTR_ERR(handle); 2127 return PTR_ERR(handle);
2120 2128
@@ -2166,7 +2174,7 @@ static int ext3_link (struct dentry * old_dentry,
2166 return -EMLINK; 2174 return -EMLINK;
2167 2175
2168retry: 2176retry:
2169 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 2177 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
2170 EXT3_INDEX_EXTRA_TRANS_BLOCKS); 2178 EXT3_INDEX_EXTRA_TRANS_BLOCKS);
2171 if (IS_ERR(handle)) 2179 if (IS_ERR(handle))
2172 return PTR_ERR(handle); 2180 return PTR_ERR(handle);
@@ -2208,7 +2216,8 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
2208 * in separate transaction */ 2216 * in separate transaction */
2209 if (new_dentry->d_inode) 2217 if (new_dentry->d_inode)
2210 DQUOT_INIT(new_dentry->d_inode); 2218 DQUOT_INIT(new_dentry->d_inode);
2211 handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS + 2219 handle = ext3_journal_start(old_dir, 2 *
2220 EXT3_DATA_TRANS_BLOCKS(old_dir->i_sb) +
2212 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2); 2221 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2);
2213 if (IS_ERR(handle)) 2222 if (IS_ERR(handle))
2214 return PTR_ERR(handle); 2223 return PTR_ERR(handle);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 981ccb233ef5..3c3c6e399fb3 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -589,7 +589,7 @@ enum {
589 Opt_commit, Opt_journal_update, Opt_journal_inum, 589 Opt_commit, Opt_journal_update, Opt_journal_inum,
590 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 590 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
591 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 591 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
592 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, 592 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
593 Opt_ignore, Opt_barrier, Opt_err, Opt_resize, 593 Opt_ignore, Opt_barrier, Opt_err, Opt_resize,
594}; 594};
595 595
@@ -634,10 +634,10 @@ static match_table_t tokens = {
634 {Opt_grpjquota, "grpjquota=%s"}, 634 {Opt_grpjquota, "grpjquota=%s"},
635 {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, 635 {Opt_jqfmt_vfsold, "jqfmt=vfsold"},
636 {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, 636 {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
637 {Opt_ignore, "grpquota"}, 637 {Opt_quota, "grpquota"},
638 {Opt_ignore, "noquota"}, 638 {Opt_noquota, "noquota"},
639 {Opt_ignore, "quota"}, 639 {Opt_quota, "quota"},
640 {Opt_ignore, "usrquota"}, 640 {Opt_quota, "usrquota"},
641 {Opt_barrier, "barrier=%u"}, 641 {Opt_barrier, "barrier=%u"},
642 {Opt_err, NULL}, 642 {Opt_err, NULL},
643 {Opt_resize, "resize"}, 643 {Opt_resize, "resize"},
@@ -876,6 +876,7 @@ set_qf_name:
876 sbi->s_qf_names[qtype] = NULL; 876 sbi->s_qf_names[qtype] = NULL;
877 return 0; 877 return 0;
878 } 878 }
879 set_opt(sbi->s_mount_opt, QUOTA);
879 break; 880 break;
880 case Opt_offusrjquota: 881 case Opt_offusrjquota:
881 qtype = USRQUOTA; 882 qtype = USRQUOTA;
@@ -889,7 +890,10 @@ clear_qf_name:
889 "quota turned on.\n"); 890 "quota turned on.\n");
890 return 0; 891 return 0;
891 } 892 }
892 kfree(sbi->s_qf_names[qtype]); 893 /*
894 * The space will be released later when all options
895 * are confirmed to be correct
896 */
893 sbi->s_qf_names[qtype] = NULL; 897 sbi->s_qf_names[qtype] = NULL;
894 break; 898 break;
895 case Opt_jqfmt_vfsold: 899 case Opt_jqfmt_vfsold:
@@ -898,6 +902,17 @@ clear_qf_name:
898 case Opt_jqfmt_vfsv0: 902 case Opt_jqfmt_vfsv0:
899 sbi->s_jquota_fmt = QFMT_VFS_V0; 903 sbi->s_jquota_fmt = QFMT_VFS_V0;
900 break; 904 break;
905 case Opt_quota:
906 set_opt(sbi->s_mount_opt, QUOTA);
907 break;
908 case Opt_noquota:
909 if (sb_any_quota_enabled(sb)) {
910 printk(KERN_ERR "EXT3-fs: Cannot change quota "
911 "options when quota turned on.\n");
912 return 0;
913 }
914 clear_opt(sbi->s_mount_opt, QUOTA);
915 break;
901#else 916#else
902 case Opt_usrjquota: 917 case Opt_usrjquota:
903 case Opt_grpjquota: 918 case Opt_grpjquota:
@@ -909,6 +924,9 @@ clear_qf_name:
909 "EXT3-fs: journalled quota options not " 924 "EXT3-fs: journalled quota options not "
910 "supported.\n"); 925 "supported.\n");
911 break; 926 break;
927 case Opt_quota:
928 case Opt_noquota:
929 break;
912#endif 930#endif
913 case Opt_abort: 931 case Opt_abort:
914 set_opt(sbi->s_mount_opt, ABORT); 932 set_opt(sbi->s_mount_opt, ABORT);
@@ -924,12 +942,13 @@ clear_qf_name:
924 case Opt_ignore: 942 case Opt_ignore:
925 break; 943 break;
926 case Opt_resize: 944 case Opt_resize:
927 if (!n_blocks_count) { 945 if (!is_remount) {
928 printk("EXT3-fs: resize option only available " 946 printk("EXT3-fs: resize option only available "
929 "for remount\n"); 947 "for remount\n");
930 return 0; 948 return 0;
931 } 949 }
932 match_int(&args[0], &option); 950 if (match_int(&args[0], &option) != 0)
951 return 0;
933 *n_blocks_count = option; 952 *n_blocks_count = option;
934 break; 953 break;
935 case Opt_nobh: 954 case Opt_nobh:
@@ -2093,14 +2112,33 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
2093{ 2112{
2094 struct ext3_super_block * es; 2113 struct ext3_super_block * es;
2095 struct ext3_sb_info *sbi = EXT3_SB(sb); 2114 struct ext3_sb_info *sbi = EXT3_SB(sb);
2096 unsigned long tmp;
2097 unsigned long n_blocks_count = 0; 2115 unsigned long n_blocks_count = 0;
2116 unsigned long old_sb_flags;
2117 struct ext3_mount_options old_opts;
2118 int err;
2119#ifdef CONFIG_QUOTA
2120 int i;
2121#endif
2122
2123 /* Store the original options */
2124 old_sb_flags = sb->s_flags;
2125 old_opts.s_mount_opt = sbi->s_mount_opt;
2126 old_opts.s_resuid = sbi->s_resuid;
2127 old_opts.s_resgid = sbi->s_resgid;
2128 old_opts.s_commit_interval = sbi->s_commit_interval;
2129#ifdef CONFIG_QUOTA
2130 old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
2131 for (i = 0; i < MAXQUOTAS; i++)
2132 old_opts.s_qf_names[i] = sbi->s_qf_names[i];
2133#endif
2098 2134
2099 /* 2135 /*
2100 * Allow the "check" option to be passed as a remount option. 2136 * Allow the "check" option to be passed as a remount option.
2101 */ 2137 */
2102 if (!parse_options(data, sb, &tmp, &n_blocks_count, 1)) 2138 if (!parse_options(data, sb, NULL, &n_blocks_count, 1)) {
2103 return -EINVAL; 2139 err = -EINVAL;
2140 goto restore_opts;
2141 }
2104 2142
2105 if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) 2143 if (sbi->s_mount_opt & EXT3_MOUNT_ABORT)
2106 ext3_abort(sb, __FUNCTION__, "Abort forced by user"); 2144 ext3_abort(sb, __FUNCTION__, "Abort forced by user");
@@ -2114,8 +2152,10 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
2114 2152
2115 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) || 2153 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) ||
2116 n_blocks_count > le32_to_cpu(es->s_blocks_count)) { 2154 n_blocks_count > le32_to_cpu(es->s_blocks_count)) {
2117 if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) 2155 if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) {
2118 return -EROFS; 2156 err = -EROFS;
2157 goto restore_opts;
2158 }
2119 2159
2120 if (*flags & MS_RDONLY) { 2160 if (*flags & MS_RDONLY) {
2121 /* 2161 /*
@@ -2142,7 +2182,8 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
2142 "remount RDWR because of unsupported " 2182 "remount RDWR because of unsupported "
2143 "optional features (%x).\n", 2183 "optional features (%x).\n",
2144 sb->s_id, le32_to_cpu(ret)); 2184 sb->s_id, le32_to_cpu(ret));
2145 return -EROFS; 2185 err = -EROFS;
2186 goto restore_opts;
2146 } 2187 }
2147 /* 2188 /*
2148 * Mounting a RDONLY partition read-write, so reread 2189 * Mounting a RDONLY partition read-write, so reread
@@ -2152,13 +2193,38 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
2152 */ 2193 */
2153 ext3_clear_journal_err(sb, es); 2194 ext3_clear_journal_err(sb, es);
2154 sbi->s_mount_state = le16_to_cpu(es->s_state); 2195 sbi->s_mount_state = le16_to_cpu(es->s_state);
2155 if ((ret = ext3_group_extend(sb, es, n_blocks_count))) 2196 if ((ret = ext3_group_extend(sb, es, n_blocks_count))) {
2156 return ret; 2197 err = ret;
2198 goto restore_opts;
2199 }
2157 if (!ext3_setup_super (sb, es, 0)) 2200 if (!ext3_setup_super (sb, es, 0))
2158 sb->s_flags &= ~MS_RDONLY; 2201 sb->s_flags &= ~MS_RDONLY;
2159 } 2202 }
2160 } 2203 }
2204#ifdef CONFIG_QUOTA
2205 /* Release old quota file names */
2206 for (i = 0; i < MAXQUOTAS; i++)
2207 if (old_opts.s_qf_names[i] &&
2208 old_opts.s_qf_names[i] != sbi->s_qf_names[i])
2209 kfree(old_opts.s_qf_names[i]);
2210#endif
2161 return 0; 2211 return 0;
2212restore_opts:
2213 sb->s_flags = old_sb_flags;
2214 sbi->s_mount_opt = old_opts.s_mount_opt;
2215 sbi->s_resuid = old_opts.s_resuid;
2216 sbi->s_resgid = old_opts.s_resgid;
2217 sbi->s_commit_interval = old_opts.s_commit_interval;
2218#ifdef CONFIG_QUOTA
2219 sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
2220 for (i = 0; i < MAXQUOTAS; i++) {
2221 if (sbi->s_qf_names[i] &&
2222 old_opts.s_qf_names[i] != sbi->s_qf_names[i])
2223 kfree(sbi->s_qf_names[i]);
2224 sbi->s_qf_names[i] = old_opts.s_qf_names[i];
2225 }
2226#endif
2227 return err;
2162} 2228}
2163 2229
2164static int ext3_statfs (struct super_block * sb, struct kstatfs * buf) 2230static int ext3_statfs (struct super_block * sb, struct kstatfs * buf)
@@ -2238,7 +2304,7 @@ static int ext3_dquot_initialize(struct inode *inode, int type)
2238 int ret, err; 2304 int ret, err;
2239 2305
2240 /* We may create quota structure so we need to reserve enough blocks */ 2306 /* We may create quota structure so we need to reserve enough blocks */
2241 handle = ext3_journal_start(inode, 2*EXT3_QUOTA_INIT_BLOCKS); 2307 handle = ext3_journal_start(inode, 2*EXT3_QUOTA_INIT_BLOCKS(inode->i_sb));
2242 if (IS_ERR(handle)) 2308 if (IS_ERR(handle))
2243 return PTR_ERR(handle); 2309 return PTR_ERR(handle);
2244 ret = dquot_initialize(inode, type); 2310 ret = dquot_initialize(inode, type);
@@ -2254,7 +2320,7 @@ static int ext3_dquot_drop(struct inode *inode)
2254 int ret, err; 2320 int ret, err;
2255 2321
2256 /* We may delete quota structure so we need to reserve enough blocks */ 2322 /* We may delete quota structure so we need to reserve enough blocks */
2257 handle = ext3_journal_start(inode, 2*EXT3_QUOTA_INIT_BLOCKS); 2323 handle = ext3_journal_start(inode, 2*EXT3_QUOTA_DEL_BLOCKS(inode->i_sb));
2258 if (IS_ERR(handle)) 2324 if (IS_ERR(handle))
2259 return PTR_ERR(handle); 2325 return PTR_ERR(handle);
2260 ret = dquot_drop(inode); 2326 ret = dquot_drop(inode);
@@ -2272,7 +2338,7 @@ static int ext3_write_dquot(struct dquot *dquot)
2272 2338
2273 inode = dquot_to_inode(dquot); 2339 inode = dquot_to_inode(dquot);
2274 handle = ext3_journal_start(inode, 2340 handle = ext3_journal_start(inode,
2275 EXT3_QUOTA_TRANS_BLOCKS); 2341 EXT3_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
2276 if (IS_ERR(handle)) 2342 if (IS_ERR(handle))
2277 return PTR_ERR(handle); 2343 return PTR_ERR(handle);
2278 ret = dquot_commit(dquot); 2344 ret = dquot_commit(dquot);
@@ -2288,7 +2354,7 @@ static int ext3_acquire_dquot(struct dquot *dquot)
2288 handle_t *handle; 2354 handle_t *handle;
2289 2355
2290 handle = ext3_journal_start(dquot_to_inode(dquot), 2356 handle = ext3_journal_start(dquot_to_inode(dquot),
2291 EXT3_QUOTA_INIT_BLOCKS); 2357 EXT3_QUOTA_INIT_BLOCKS(dquot->dq_sb));
2292 if (IS_ERR(handle)) 2358 if (IS_ERR(handle))
2293 return PTR_ERR(handle); 2359 return PTR_ERR(handle);
2294 ret = dquot_acquire(dquot); 2360 ret = dquot_acquire(dquot);
@@ -2304,7 +2370,7 @@ static int ext3_release_dquot(struct dquot *dquot)
2304 handle_t *handle; 2370 handle_t *handle;
2305 2371
2306 handle = ext3_journal_start(dquot_to_inode(dquot), 2372 handle = ext3_journal_start(dquot_to_inode(dquot),
2307 EXT3_QUOTA_INIT_BLOCKS); 2373 EXT3_QUOTA_DEL_BLOCKS(dquot->dq_sb));
2308 if (IS_ERR(handle)) 2374 if (IS_ERR(handle))
2309 return PTR_ERR(handle); 2375 return PTR_ERR(handle);
2310 ret = dquot_release(dquot); 2376 ret = dquot_release(dquot);
@@ -2348,22 +2414,8 @@ static int ext3_write_info(struct super_block *sb, int type)
2348 */ 2414 */
2349static int ext3_quota_on_mount(struct super_block *sb, int type) 2415static int ext3_quota_on_mount(struct super_block *sb, int type)
2350{ 2416{
2351 int err; 2417 return vfs_quota_on_mount(sb, EXT3_SB(sb)->s_qf_names[type],
2352 struct dentry *dentry; 2418 EXT3_SB(sb)->s_jquota_fmt, type);
2353 struct qstr name = { .name = EXT3_SB(sb)->s_qf_names[type],
2354 .hash = 0,
2355 .len = strlen(EXT3_SB(sb)->s_qf_names[type])};
2356
2357 dentry = lookup_hash(&name, sb->s_root);
2358 if (IS_ERR(dentry))
2359 return PTR_ERR(dentry);
2360 err = vfs_quota_on_mount(type, EXT3_SB(sb)->s_jquota_fmt, dentry);
2361 /* Now invalidate and put the dentry - quota got its own reference
2362 * to inode and dentry has at least wrong hash so we had better
2363 * throw it away */
2364 d_invalidate(dentry);
2365 dput(dentry);
2366 return err;
2367} 2419}
2368 2420
2369/* 2421/*
@@ -2375,6 +2427,8 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id,
2375 int err; 2427 int err;
2376 struct nameidata nd; 2428 struct nameidata nd;
2377 2429
2430 if (!test_opt(sb, QUOTA))
2431 return -EINVAL;
2378 /* Not journalling quota? */ 2432 /* Not journalling quota? */
2379 if (!EXT3_SB(sb)->s_qf_names[USRQUOTA] && 2433 if (!EXT3_SB(sb)->s_qf_names[USRQUOTA] &&
2380 !EXT3_SB(sb)->s_qf_names[GRPQUOTA]) 2434 !EXT3_SB(sb)->s_qf_names[GRPQUOTA])
diff --git a/fs/ext3/symlink.c b/fs/ext3/symlink.c
index 8c3e72818fb0..4f79122cde67 100644
--- a/fs/ext3/symlink.c
+++ b/fs/ext3/symlink.c
@@ -23,11 +23,11 @@
23#include <linux/namei.h> 23#include <linux/namei.h>
24#include "xattr.h" 24#include "xattr.h"
25 25
26static int ext3_follow_link(struct dentry *dentry, struct nameidata *nd) 26static void * ext3_follow_link(struct dentry *dentry, struct nameidata *nd)
27{ 27{
28 struct ext3_inode_info *ei = EXT3_I(dentry->d_inode); 28 struct ext3_inode_info *ei = EXT3_I(dentry->d_inode);
29 nd_set_link(nd, (char*)ei->i_data); 29 nd_set_link(nd, (char*)ei->i_data);
30 return 0; 30 return NULL;
31} 31}
32 32
33struct inode_operations ext3_symlink_inode_operations = { 33struct inode_operations ext3_symlink_inode_operations = {
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index 4cbc6d0212d3..269c7b92db9a 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -1044,7 +1044,7 @@ ext3_xattr_set(struct inode *inode, int name_index, const char *name,
1044 int error, retries = 0; 1044 int error, retries = 0;
1045 1045
1046retry: 1046retry:
1047 handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS); 1047 handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS(inode->i_sb));
1048 if (IS_ERR(handle)) { 1048 if (IS_ERR(handle)) {
1049 error = PTR_ERR(handle); 1049 error = PTR_ERR(handle);
1050 } else { 1050 } else {
@@ -1106,7 +1106,7 @@ cleanup:
1106void 1106void
1107ext3_xattr_put_super(struct super_block *sb) 1107ext3_xattr_put_super(struct super_block *sb)
1108{ 1108{
1109 mb_cache_shrink(ext3_xattr_cache, sb->s_bdev); 1109 mb_cache_shrink(sb->s_bdev);
1110} 1110}
1111 1111
1112/* 1112/*
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index 7c52e465a619..77c24fcf712a 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -56,7 +56,7 @@ int __init fat_cache_init(void)
56 return 0; 56 return 0;
57} 57}
58 58
59void __exit fat_cache_destroy(void) 59void fat_cache_destroy(void)
60{ 60{
61 if (kmem_cache_destroy(fat_cache_cachep)) 61 if (kmem_cache_destroy(fat_cache_cachep))
62 printk(KERN_INFO "fat_cache: not all structures were freed\n"); 62 printk(KERN_INFO "fat_cache: not all structures were freed\n");
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 8ccee8415488..96ae85b67eba 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -1327,16 +1327,25 @@ out_fail:
1327EXPORT_SYMBOL(fat_fill_super); 1327EXPORT_SYMBOL(fat_fill_super);
1328 1328
1329int __init fat_cache_init(void); 1329int __init fat_cache_init(void);
1330void __exit fat_cache_destroy(void); 1330void fat_cache_destroy(void);
1331 1331
1332static int __init init_fat_fs(void) 1332static int __init init_fat_fs(void)
1333{ 1333{
1334 int ret; 1334 int err;
1335 1335
1336 ret = fat_cache_init(); 1336 err = fat_cache_init();
1337 if (ret < 0) 1337 if (err)
1338 return ret; 1338 return err;
1339 return fat_init_inodecache(); 1339
1340 err = fat_init_inodecache();
1341 if (err)
1342 goto failed;
1343
1344 return 0;
1345
1346failed:
1347 fat_cache_destroy();
1348 return err;
1340} 1349}
1341 1350
1342static void __exit exit_fat_fs(void) 1351static void __exit exit_fat_fs(void)
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 286a9f8f3d49..6fbc9d8fcc36 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -288,7 +288,7 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
288 break; 288 break;
289 case F_SETLK: 289 case F_SETLK:
290 case F_SETLKW: 290 case F_SETLKW:
291 err = fcntl_setlk(filp, cmd, (struct flock __user *) arg); 291 err = fcntl_setlk(fd, filp, cmd, (struct flock __user *) arg);
292 break; 292 break;
293 case F_GETOWN: 293 case F_GETOWN:
294 /* 294 /*
@@ -376,7 +376,8 @@ asmlinkage long sys_fcntl64(unsigned int fd, unsigned int cmd, unsigned long arg
376 break; 376 break;
377 case F_SETLK64: 377 case F_SETLK64:
378 case F_SETLKW64: 378 case F_SETLKW64:
379 err = fcntl_setlk64(filp, cmd, (struct flock64 __user *) arg); 379 err = fcntl_setlk64(fd, filp, cmd,
380 (struct flock64 __user *) arg);
380 break; 381 break;
381 default: 382 default:
382 err = do_fcntl(fd, cmd, arg, filp); 383 err = do_fcntl(fd, cmd, arg, filp);
diff --git a/fs/file_table.c b/fs/file_table.c
index 03d83cb686b1..1d3de78e6bc9 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -16,6 +16,7 @@
16#include <linux/eventpoll.h> 16#include <linux/eventpoll.h>
17#include <linux/mount.h> 17#include <linux/mount.h>
18#include <linux/cdev.h> 18#include <linux/cdev.h>
19#include <linux/fsnotify.h>
19 20
20/* sysctl tunables... */ 21/* sysctl tunables... */
21struct files_stat_struct files_stat = { 22struct files_stat_struct files_stat = {
@@ -63,42 +64,45 @@ static inline void file_free(struct file *f)
63 */ 64 */
64struct file *get_empty_filp(void) 65struct file *get_empty_filp(void)
65{ 66{
66static int old_max; 67 static int old_max;
67 struct file * f; 68 struct file * f;
68 69
69 /* 70 /*
70 * Privileged users can go above max_files 71 * Privileged users can go above max_files
71 */ 72 */
72 if (files_stat.nr_files < files_stat.max_files || 73 if (files_stat.nr_files >= files_stat.max_files &&
73 capable(CAP_SYS_ADMIN)) { 74 !capable(CAP_SYS_ADMIN))
74 f = kmem_cache_alloc(filp_cachep, GFP_KERNEL); 75 goto over;
75 if (f) { 76
76 memset(f, 0, sizeof(*f)); 77 f = kmem_cache_alloc(filp_cachep, GFP_KERNEL);
77 if (security_file_alloc(f)) { 78 if (f == NULL)
78 file_free(f); 79 goto fail;
79 goto fail; 80
80 } 81 memset(f, 0, sizeof(*f));
81 eventpoll_init_file(f); 82 if (security_file_alloc(f))
82 atomic_set(&f->f_count, 1); 83 goto fail_sec;
83 f->f_uid = current->fsuid; 84
84 f->f_gid = current->fsgid; 85 eventpoll_init_file(f);
85 rwlock_init(&f->f_owner.lock); 86 atomic_set(&f->f_count, 1);
86 /* f->f_version: 0 */ 87 f->f_uid = current->fsuid;
87 INIT_LIST_HEAD(&f->f_list); 88 f->f_gid = current->fsgid;
88 f->f_maxcount = INT_MAX; 89 rwlock_init(&f->f_owner.lock);
89 return f; 90 /* f->f_version: 0 */
90 } 91 INIT_LIST_HEAD(&f->f_list);
91 } 92 f->f_maxcount = INT_MAX;
92 93 return f;
94
95over:
93 /* Ran out of filps - report that */ 96 /* Ran out of filps - report that */
94 if (files_stat.max_files >= old_max) { 97 if (files_stat.nr_files > old_max) {
95 printk(KERN_INFO "VFS: file-max limit %d reached\n", 98 printk(KERN_INFO "VFS: file-max limit %d reached\n",
96 files_stat.max_files); 99 files_stat.max_files);
97 old_max = files_stat.max_files; 100 old_max = files_stat.nr_files;
98 } else {
99 /* Big problems... */
100 printk(KERN_WARNING "VFS: filp allocation failed\n");
101 } 101 }
102 goto fail;
103
104fail_sec:
105 file_free(f);
102fail: 106fail:
103 return NULL; 107 return NULL;
104} 108}
@@ -123,6 +127,8 @@ void fastcall __fput(struct file *file)
123 struct inode *inode = dentry->d_inode; 127 struct inode *inode = dentry->d_inode;
124 128
125 might_sleep(); 129 might_sleep();
130
131 fsnotify_close(file);
126 /* 132 /*
127 * The function eventpoll_release() should be the first called 133 * The function eventpoll_release() should be the first called
128 * in the file cleanup chain. 134 * in the file cleanup chain.
diff --git a/fs/freevxfs/vxfs.h b/fs/freevxfs/vxfs.h
index 8da0252642a4..583bd78086d8 100644
--- a/fs/freevxfs/vxfs.h
+++ b/fs/freevxfs/vxfs.h
@@ -37,7 +37,6 @@
37 * superblocks of the Veritas Filesystem. 37 * superblocks of the Veritas Filesystem.
38 */ 38 */
39#include <linux/types.h> 39#include <linux/types.h>
40#include "vxfs_kcompat.h"
41 40
42 41
43/* 42/*
diff --git a/fs/freevxfs/vxfs_bmap.c b/fs/freevxfs/vxfs_bmap.c
index bc4b57da306a..d3f6b2835bc8 100644
--- a/fs/freevxfs/vxfs_bmap.c
+++ b/fs/freevxfs/vxfs_bmap.c
@@ -101,7 +101,7 @@ vxfs_bmap_ext4(struct inode *ip, long bn)
101 return 0; 101 return 0;
102 102
103fail_size: 103fail_size:
104 printk("vxfs: indirect extent to big!\n"); 104 printk("vxfs: indirect extent too big!\n");
105fail_buf: 105fail_buf:
106 return 0; 106 return 0;
107} 107}
diff --git a/fs/freevxfs/vxfs_fshead.c b/fs/freevxfs/vxfs_fshead.c
index 05b19f70bf97..6dee109aeea4 100644
--- a/fs/freevxfs/vxfs_fshead.c
+++ b/fs/freevxfs/vxfs_fshead.c
@@ -78,17 +78,18 @@ vxfs_getfsh(struct inode *ip, int which)
78 struct buffer_head *bp; 78 struct buffer_head *bp;
79 79
80 bp = vxfs_bread(ip, which); 80 bp = vxfs_bread(ip, which);
81 if (buffer_mapped(bp)) { 81 if (bp) {
82 struct vxfs_fsh *fhp; 82 struct vxfs_fsh *fhp;
83 83
84 if (!(fhp = kmalloc(sizeof(*fhp), SLAB_KERNEL))) 84 if (!(fhp = kmalloc(sizeof(*fhp), GFP_KERNEL)))
85 return NULL; 85 goto out;
86 memcpy(fhp, bp->b_data, sizeof(*fhp)); 86 memcpy(fhp, bp->b_data, sizeof(*fhp));
87 87
88 brelse(bp); 88 put_bh(bp);
89 return (fhp); 89 return (fhp);
90 } 90 }
91 91out:
92 brelse(bp);
92 return NULL; 93 return NULL;
93} 94}
94 95
diff --git a/fs/freevxfs/vxfs_immed.c b/fs/freevxfs/vxfs_immed.c
index ac677ab262b2..d0401dc68d41 100644
--- a/fs/freevxfs/vxfs_immed.c
+++ b/fs/freevxfs/vxfs_immed.c
@@ -38,7 +38,7 @@
38#include "vxfs_inode.h" 38#include "vxfs_inode.h"
39 39
40 40
41static int vxfs_immed_follow_link(struct dentry *, struct nameidata *); 41static void * vxfs_immed_follow_link(struct dentry *, struct nameidata *);
42 42
43static int vxfs_immed_readpage(struct file *, struct page *); 43static int vxfs_immed_readpage(struct file *, struct page *);
44 44
@@ -72,12 +72,12 @@ struct address_space_operations vxfs_immed_aops = {
72 * Returns: 72 * Returns:
73 * Zero on success, else a negative error code. 73 * Zero on success, else a negative error code.
74 */ 74 */
75static int 75static void *
76vxfs_immed_follow_link(struct dentry *dp, struct nameidata *np) 76vxfs_immed_follow_link(struct dentry *dp, struct nameidata *np)
77{ 77{
78 struct vxfs_inode_info *vip = VXFS_INO(dp->d_inode); 78 struct vxfs_inode_info *vip = VXFS_INO(dp->d_inode);
79 nd_set_link(np, vip->vii_immed.vi_immed); 79 nd_set_link(np, vip->vii_immed.vi_immed);
80 return 0; 80 return NULL;
81} 81}
82 82
83/** 83/**
diff --git a/fs/freevxfs/vxfs_kcompat.h b/fs/freevxfs/vxfs_kcompat.h
deleted file mode 100644
index 342a4cc860f4..000000000000
--- a/fs/freevxfs/vxfs_kcompat.h
+++ /dev/null
@@ -1,49 +0,0 @@
1#ifndef _VXFS_KCOMPAT_H
2#define _VXFS_KCOMPAT_H
3
4#include <linux/version.h>
5
6#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
7
8#include <linux/blkdev.h>
9
10typedef long sector_t;
11
12/* From include/linux/fs.h (Linux 2.5.2-pre3) */
13static inline struct buffer_head * sb_bread(struct super_block *sb, int block)
14{
15 return bread(sb->s_dev, block, sb->s_blocksize);
16}
17
18/* Dito. */
19static inline void map_bh(struct buffer_head *bh, struct super_block *sb, int block)
20{
21 bh->b_state |= 1 << BH_Mapped;
22 bh->b_dev = sb->s_dev;
23 bh->b_blocknr = block;
24}
25
26/* From fs/block_dev.c (Linux 2.5.2-pre2) */
27static inline int sb_set_blocksize(struct super_block *sb, int size)
28{
29 int bits;
30 if (set_blocksize(sb->s_dev, size) < 0)
31 return 0;
32 sb->s_blocksize = size;
33 for (bits = 9, size >>= 9; size >>= 1; bits++)
34 ;
35 sb->s_blocksize_bits = bits;
36 return sb->s_blocksize;
37}
38
39/* Dito. */
40static inline int sb_min_blocksize(struct super_block *sb, int size)
41{
42 int minsize = get_hardsect_size(sb->s_dev);
43 if (size < minsize)
44 size = minsize;
45 return sb_set_blocksize(sb, size);
46}
47
48#endif /* Kernel 2.4 */
49#endif /* _VXFS_KCOMPAT_H */
diff --git a/fs/freevxfs/vxfs_lookup.c b/fs/freevxfs/vxfs_lookup.c
index 506ae251d2c0..554eb455722c 100644
--- a/fs/freevxfs/vxfs_lookup.c
+++ b/fs/freevxfs/vxfs_lookup.c
@@ -61,13 +61,13 @@ struct file_operations vxfs_dir_operations = {
61}; 61};
62 62
63 63
64static __inline__ u_long 64static inline u_long
65dir_pages(struct inode *inode) 65dir_pages(struct inode *inode)
66{ 66{
67 return (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 67 return (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
68} 68}
69 69
70static __inline__ u_long 70static inline u_long
71dir_blocks(struct inode *ip) 71dir_blocks(struct inode *ip)
72{ 72{
73 u_long bsize = ip->i_sb->s_blocksize; 73 u_long bsize = ip->i_sb->s_blocksize;
@@ -79,7 +79,7 @@ dir_blocks(struct inode *ip)
79 * 79 *
80 * len <= VXFS_NAMELEN and de != NULL are guaranteed by caller. 80 * len <= VXFS_NAMELEN and de != NULL are guaranteed by caller.
81 */ 81 */
82static __inline__ int 82static inline int
83vxfs_match(int len, const char * const name, struct vxfs_direct *de) 83vxfs_match(int len, const char * const name, struct vxfs_direct *de)
84{ 84{
85 if (len != de->d_namelen) 85 if (len != de->d_namelen)
@@ -89,7 +89,7 @@ vxfs_match(int len, const char * const name, struct vxfs_direct *de)
89 return !memcmp(name, de->d_name, len); 89 return !memcmp(name, de->d_name, len);
90} 90}
91 91
92static __inline__ struct vxfs_direct * 92static inline struct vxfs_direct *
93vxfs_next_entry(struct vxfs_direct *de) 93vxfs_next_entry(struct vxfs_direct *de)
94{ 94{
95 return ((struct vxfs_direct *)((char*)de + de->d_reclen)); 95 return ((struct vxfs_direct *)((char*)de + de->d_reclen));
diff --git a/fs/freevxfs/vxfs_olt.c b/fs/freevxfs/vxfs_olt.c
index 7a204e31aad9..133476201d84 100644
--- a/fs/freevxfs/vxfs_olt.c
+++ b/fs/freevxfs/vxfs_olt.c
@@ -38,7 +38,7 @@
38#include "vxfs_olt.h" 38#include "vxfs_olt.h"
39 39
40 40
41static __inline__ void 41static inline void
42vxfs_get_fshead(struct vxfs_oltfshead *fshp, struct vxfs_sb_info *infp) 42vxfs_get_fshead(struct vxfs_oltfshead *fshp, struct vxfs_sb_info *infp)
43{ 43{
44 if (infp->vsi_fshino) 44 if (infp->vsi_fshino)
@@ -46,7 +46,7 @@ vxfs_get_fshead(struct vxfs_oltfshead *fshp, struct vxfs_sb_info *infp)
46 infp->vsi_fshino = fshp->olt_fsino[0]; 46 infp->vsi_fshino = fshp->olt_fsino[0];
47} 47}
48 48
49static __inline__ void 49static inline void
50vxfs_get_ilist(struct vxfs_oltilist *ilistp, struct vxfs_sb_info *infp) 50vxfs_get_ilist(struct vxfs_oltilist *ilistp, struct vxfs_sb_info *infp)
51{ 51{
52 if (infp->vsi_iext) 52 if (infp->vsi_iext)
@@ -54,7 +54,7 @@ vxfs_get_ilist(struct vxfs_oltilist *ilistp, struct vxfs_sb_info *infp)
54 infp->vsi_iext = ilistp->olt_iext[0]; 54 infp->vsi_iext = ilistp->olt_iext[0];
55} 55}
56 56
57static __inline__ u_long 57static inline u_long
58vxfs_oblock(struct super_block *sbp, daddr_t block, u_long bsize) 58vxfs_oblock(struct super_block *sbp, daddr_t block, u_long bsize)
59{ 59{
60 if (sbp->s_blocksize % bsize) 60 if (sbp->s_blocksize % bsize)
@@ -104,8 +104,8 @@ vxfs_read_olt(struct super_block *sbp, u_long bsize)
104 goto fail; 104 goto fail;
105 } 105 }
106 106
107 oaddr = (char *)bp->b_data + op->olt_size; 107 oaddr = bp->b_data + op->olt_size;
108 eaddr = (char *)bp->b_data + (infp->vsi_oltsize * sbp->s_blocksize); 108 eaddr = bp->b_data + (infp->vsi_oltsize * sbp->s_blocksize);
109 109
110 while (oaddr < eaddr) { 110 while (oaddr < eaddr) {
111 struct vxfs_oltcommon *ocp = 111 struct vxfs_oltcommon *ocp =
diff --git a/fs/freevxfs/vxfs_subr.c b/fs/freevxfs/vxfs_subr.c
index 5e305612054a..50aae77651b2 100644
--- a/fs/freevxfs/vxfs_subr.c
+++ b/fs/freevxfs/vxfs_subr.c
@@ -36,7 +36,6 @@
36#include <linux/slab.h> 36#include <linux/slab.h>
37#include <linux/pagemap.h> 37#include <linux/pagemap.h>
38 38
39#include "vxfs_kcompat.h"
40#include "vxfs_extern.h" 39#include "vxfs_extern.h"
41 40
42 41
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index 0ae2c7b8182a..27f66d3e8a04 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -155,12 +155,11 @@ static int vxfs_fill_super(struct super_block *sbp, void *dp, int silent)
155 155
156 sbp->s_flags |= MS_RDONLY; 156 sbp->s_flags |= MS_RDONLY;
157 157
158 infp = kmalloc(sizeof(*infp), GFP_KERNEL); 158 infp = kcalloc(1, sizeof(*infp), GFP_KERNEL);
159 if (!infp) { 159 if (!infp) {
160 printk(KERN_WARNING "vxfs: unable to allocate incore superblock\n"); 160 printk(KERN_WARNING "vxfs: unable to allocate incore superblock\n");
161 return -ENOMEM; 161 return -ENOMEM;
162 } 162 }
163 memset(infp, 0, sizeof(*infp));
164 163
165 bsize = sb_min_blocksize(sbp, BLOCK_SIZE); 164 bsize = sb_min_blocksize(sbp, BLOCK_SIZE);
166 if (!bsize) { 165 if (!bsize) {
@@ -196,7 +195,7 @@ static int vxfs_fill_super(struct super_block *sbp, void *dp, int silent)
196#endif 195#endif
197 196
198 sbp->s_magic = rsbp->vs_magic; 197 sbp->s_magic = rsbp->vs_magic;
199 sbp->s_fs_info = (void *)infp; 198 sbp->s_fs_info = infp;
200 199
201 infp->vsi_raw = rsbp; 200 infp->vsi_raw = rsbp;
202 infp->vsi_bp = bp; 201 infp->vsi_bp = bp;
@@ -263,7 +262,7 @@ vxfs_init(void)
263 sizeof(struct vxfs_inode_info), 0, 262 sizeof(struct vxfs_inode_info), 0,
264 SLAB_RECLAIM_ACCOUNT, NULL, NULL); 263 SLAB_RECLAIM_ACCOUNT, NULL, NULL);
265 if (vxfs_inode_cachep) 264 if (vxfs_inode_cachep)
266 return (register_filesystem(&vxfs_fs_type)); 265 return register_filesystem(&vxfs_fs_type);
267 return -ENOMEM; 266 return -ENOMEM;
268} 267}
269 268
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 8e050fa58218..e94ab398b717 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -485,32 +485,6 @@ static void set_sb_syncing(int val)
485 spin_unlock(&sb_lock); 485 spin_unlock(&sb_lock);
486} 486}
487 487
488/*
489 * Find a superblock with inodes that need to be synced
490 */
491static struct super_block *get_super_to_sync(void)
492{
493 struct super_block *sb;
494restart:
495 spin_lock(&sb_lock);
496 sb = sb_entry(super_blocks.prev);
497 for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.prev)) {
498 if (sb->s_syncing)
499 continue;
500 sb->s_syncing = 1;
501 sb->s_count++;
502 spin_unlock(&sb_lock);
503 down_read(&sb->s_umount);
504 if (!sb->s_root) {
505 drop_super(sb);
506 goto restart;
507 }
508 return sb;
509 }
510 spin_unlock(&sb_lock);
511 return NULL;
512}
513
514/** 488/**
515 * sync_inodes - writes all inodes to disk 489 * sync_inodes - writes all inodes to disk
516 * @wait: wait for completion 490 * @wait: wait for completion
@@ -530,23 +504,39 @@ restart:
530 * outstanding dirty inodes, the writeback goes block-at-a-time within the 504 * outstanding dirty inodes, the writeback goes block-at-a-time within the
531 * filesystem's write_inode(). This is extremely slow. 505 * filesystem's write_inode(). This is extremely slow.
532 */ 506 */
533void sync_inodes(int wait) 507static void __sync_inodes(int wait)
534{ 508{
535 struct super_block *sb; 509 struct super_block *sb;
536 510
537 set_sb_syncing(0); 511 spin_lock(&sb_lock);
538 while ((sb = get_super_to_sync()) != NULL) { 512restart:
539 sync_inodes_sb(sb, 0); 513 list_for_each_entry(sb, &super_blocks, s_list) {
540 sync_blockdev(sb->s_bdev); 514 if (sb->s_syncing)
541 drop_super(sb); 515 continue;
516 sb->s_syncing = 1;
517 sb->s_count++;
518 spin_unlock(&sb_lock);
519 down_read(&sb->s_umount);
520 if (sb->s_root) {
521 sync_inodes_sb(sb, wait);
522 sync_blockdev(sb->s_bdev);
523 }
524 up_read(&sb->s_umount);
525 spin_lock(&sb_lock);
526 if (__put_super_and_need_restart(sb))
527 goto restart;
542 } 528 }
529 spin_unlock(&sb_lock);
530}
531
532void sync_inodes(int wait)
533{
534 set_sb_syncing(0);
535 __sync_inodes(0);
536
543 if (wait) { 537 if (wait) {
544 set_sb_syncing(0); 538 set_sb_syncing(0);
545 while ((sb = get_super_to_sync()) != NULL) { 539 __sync_inodes(1);
546 sync_inodes_sb(sb, 1);
547 sync_blockdev(sb->s_bdev);
548 drop_super(sb);
549 }
550 } 540 }
551} 541}
552 542
diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c
index 6ad1211f84ed..a096c5a56664 100644
--- a/fs/hfs/bnode.c
+++ b/fs/hfs/bnode.c
@@ -480,6 +480,8 @@ void hfs_bnode_put(struct hfs_bnode *node)
480 return; 480 return;
481 } 481 }
482 for (i = 0; i < tree->pages_per_bnode; i++) { 482 for (i = 0; i < tree->pages_per_bnode; i++) {
483 if (!node->page[i])
484 continue;
483 mark_page_accessed(node->page[i]); 485 mark_page_accessed(node->page[i]);
484#if REF_PAGES 486#if REF_PAGES
485 put_page(node->page[i]); 487 put_page(node->page[i]);
diff --git a/fs/hfs/extent.c b/fs/hfs/extent.c
index cbc8510ad222..5ea6b3d45eaa 100644
--- a/fs/hfs/extent.c
+++ b/fs/hfs/extent.c
@@ -482,7 +482,8 @@ void hfs_file_truncate(struct inode *inode)
482 page_cache_release(page); 482 page_cache_release(page);
483 mark_inode_dirty(inode); 483 mark_inode_dirty(inode);
484 return; 484 return;
485 } 485 } else if (inode->i_size == HFS_I(inode)->phys_size)
486 return;
486 size = inode->i_size + HFS_SB(sb)->alloc_blksz - 1; 487 size = inode->i_size + HFS_SB(sb)->alloc_blksz - 1;
487 blk_cnt = size / HFS_SB(sb)->alloc_blksz; 488 blk_cnt = size / HFS_SB(sb)->alloc_blksz;
488 alloc_cnt = HFS_I(inode)->alloc_blocks; 489 alloc_cnt = HFS_I(inode)->alloc_blocks;
diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c
index 267872e84d71..8868d3b766fd 100644
--- a/fs/hfsplus/bnode.c
+++ b/fs/hfsplus/bnode.c
@@ -643,6 +643,8 @@ void hfs_bnode_put(struct hfs_bnode *node)
643 return; 643 return;
644 } 644 }
645 for (i = 0; i < tree->pages_per_bnode; i++) { 645 for (i = 0; i < tree->pages_per_bnode; i++) {
646 if (!node->page[i])
647 continue;
646 mark_page_accessed(node->page[i]); 648 mark_page_accessed(node->page[i]);
647#if REF_PAGES 649#if REF_PAGES
648 put_page(node->page[i]); 650 put_page(node->page[i]);
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c
index 376498cc64fd..e7235ca79a95 100644
--- a/fs/hfsplus/extents.c
+++ b/fs/hfsplus/extents.c
@@ -461,7 +461,9 @@ void hfsplus_file_truncate(struct inode *inode)
461 page_cache_release(page); 461 page_cache_release(page);
462 mark_inode_dirty(inode); 462 mark_inode_dirty(inode);
463 return; 463 return;
464 } 464 } else if (inode->i_size == HFSPLUS_I(inode).phys_size)
465 return;
466
465 blk_cnt = (inode->i_size + HFSPLUS_SB(sb).alloc_blksz - 1) >> HFSPLUS_SB(sb).alloc_blksz_shift; 467 blk_cnt = (inode->i_size + HFSPLUS_SB(sb).alloc_blksz - 1) >> HFSPLUS_SB(sb).alloc_blksz_shift;
466 alloc_cnt = HFSPLUS_I(inode).alloc_blocks; 468 alloc_cnt = HFSPLUS_I(inode).alloc_blocks;
467 if (blk_cnt == alloc_cnt) 469 if (blk_cnt == alloc_cnt)
diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h
index c1516d013bf6..67bca0d4a33b 100644
--- a/fs/hostfs/hostfs.h
+++ b/fs/hostfs/hostfs.h
@@ -69,6 +69,7 @@ extern int read_file(int fd, unsigned long long *offset, char *buf, int len);
69extern int write_file(int fd, unsigned long long *offset, const char *buf, 69extern int write_file(int fd, unsigned long long *offset, const char *buf,
70 int len); 70 int len);
71extern int lseek_file(int fd, long long offset, int whence); 71extern int lseek_file(int fd, long long offset, int whence);
72extern int fsync_file(int fd, int datasync);
72extern int file_create(char *name, int ur, int uw, int ux, int gr, 73extern int file_create(char *name, int ur, int uw, int ux, int gr,
73 int gw, int gx, int or, int ow, int ox); 74 int gw, int gx, int or, int ow, int ox);
74extern int set_attr(const char *file, struct hostfs_iattr *attrs); 75extern int set_attr(const char *file, struct hostfs_iattr *attrs);
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 4bf43ea87c46..b2d18200a003 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -15,7 +15,6 @@
15#include <linux/pagemap.h> 15#include <linux/pagemap.h>
16#include <linux/blkdev.h> 16#include <linux/blkdev.h>
17#include <linux/list.h> 17#include <linux/list.h>
18#include <linux/root_dev.h>
19#include <linux/statfs.h> 18#include <linux/statfs.h>
20#include <linux/kdev_t.h> 19#include <linux/kdev_t.h>
21#include <asm/uaccess.h> 20#include <asm/uaccess.h>
@@ -160,8 +159,6 @@ static int read_name(struct inode *ino, char *name)
160 ino->i_size = i_size; 159 ino->i_size = i_size;
161 ino->i_blksize = i_blksize; 160 ino->i_blksize = i_blksize;
162 ino->i_blocks = i_blocks; 161 ino->i_blocks = i_blocks;
163 if((ino->i_sb->s_dev == ROOT_DEV) && (ino->i_uid == getuid()))
164 ino->i_uid = 0;
165 return(0); 162 return(0);
166} 163}
167 164
@@ -385,7 +382,7 @@ int hostfs_file_open(struct inode *ino, struct file *file)
385 382
386int hostfs_fsync(struct file *file, struct dentry *dentry, int datasync) 383int hostfs_fsync(struct file *file, struct dentry *dentry, int datasync)
387{ 384{
388 return(0); 385 return fsync_file(HOSTFS_I(dentry->d_inode)->fd, datasync);
389} 386}
390 387
391static struct file_operations hostfs_file_fops = { 388static struct file_operations hostfs_file_fops = {
@@ -841,16 +838,10 @@ int hostfs_setattr(struct dentry *dentry, struct iattr *attr)
841 attrs.ia_mode = attr->ia_mode; 838 attrs.ia_mode = attr->ia_mode;
842 } 839 }
843 if(attr->ia_valid & ATTR_UID){ 840 if(attr->ia_valid & ATTR_UID){
844 if((dentry->d_inode->i_sb->s_dev == ROOT_DEV) &&
845 (attr->ia_uid == 0))
846 attr->ia_uid = getuid();
847 attrs.ia_valid |= HOSTFS_ATTR_UID; 841 attrs.ia_valid |= HOSTFS_ATTR_UID;
848 attrs.ia_uid = attr->ia_uid; 842 attrs.ia_uid = attr->ia_uid;
849 } 843 }
850 if(attr->ia_valid & ATTR_GID){ 844 if(attr->ia_valid & ATTR_GID){
851 if((dentry->d_inode->i_sb->s_dev == ROOT_DEV) &&
852 (attr->ia_gid == 0))
853 attr->ia_gid = getgid();
854 attrs.ia_valid |= HOSTFS_ATTR_GID; 845 attrs.ia_valid |= HOSTFS_ATTR_GID;
855 attrs.ia_gid = attr->ia_gid; 846 attrs.ia_gid = attr->ia_gid;
856 } 847 }
diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c
index 4796e8490f7d..b97809deba66 100644
--- a/fs/hostfs/hostfs_user.c
+++ b/fs/hostfs/hostfs_user.c
@@ -153,10 +153,24 @@ int lseek_file(int fd, long long offset, int whence)
153 int ret; 153 int ret;
154 154
155 ret = lseek64(fd, offset, whence); 155 ret = lseek64(fd, offset, whence);
156 if(ret < 0) return(-errno); 156 if(ret < 0)
157 return(-errno);
157 return(0); 158 return(0);
158} 159}
159 160
161int fsync_file(int fd, int datasync)
162{
163 int ret;
164 if (datasync)
165 ret = fdatasync(fd);
166 else
167 ret = fsync(fd);
168
169 if (ret < 0)
170 return -errno;
171 return 0;
172}
173
160void close_file(void *stream) 174void close_file(void *stream)
161{ 175{
162 close(*((int *) stream)); 176 close(*((int *) stream));
diff --git a/fs/hppfs/hppfs_kern.c b/fs/hppfs/hppfs_kern.c
index f8e0cbd0cb60..52930915bad8 100644
--- a/fs/hppfs/hppfs_kern.c
+++ b/fs/hppfs/hppfs_kern.c
@@ -4,6 +4,7 @@
4 */ 4 */
5 5
6#include <linux/fs.h> 6#include <linux/fs.h>
7#include <linux/file.h>
7#include <linux/module.h> 8#include <linux/module.h>
8#include <linux/init.h> 9#include <linux/init.h>
9#include <linux/slab.h> 10#include <linux/slab.h>
@@ -37,7 +38,7 @@ struct hppfs_inode_info {
37 38
38static inline struct hppfs_inode_info *HPPFS_I(struct inode *inode) 39static inline struct hppfs_inode_info *HPPFS_I(struct inode *inode)
39{ 40{
40 return(list_entry(inode, struct hppfs_inode_info, vfs_inode)); 41 return container_of(inode, struct hppfs_inode_info, vfs_inode);
41} 42}
42 43
43#define HPPFS_SUPER_MAGIC 0xb00000ee 44#define HPPFS_SUPER_MAGIC 0xb00000ee
@@ -232,7 +233,7 @@ static ssize_t read_proc(struct file *file, char *buf, ssize_t count,
232 set_fs(USER_DS); 233 set_fs(USER_DS);
233 234
234 if(ppos) *ppos = file->f_pos; 235 if(ppos) *ppos = file->f_pos;
235 return(n); 236 return n;
236} 237}
237 238
238static ssize_t hppfs_read_file(int fd, char *buf, ssize_t count) 239static ssize_t hppfs_read_file(int fd, char *buf, ssize_t count)
@@ -253,7 +254,7 @@ static ssize_t hppfs_read_file(int fd, char *buf, ssize_t count)
253 err = os_read_file(fd, new_buf, cur); 254 err = os_read_file(fd, new_buf, cur);
254 if(err < 0){ 255 if(err < 0){
255 printk("hppfs_read : read failed, errno = %d\n", 256 printk("hppfs_read : read failed, errno = %d\n",
256 count); 257 err);
257 n = err; 258 n = err;
258 goto out_free; 259 goto out_free;
259 } 260 }
@@ -270,7 +271,7 @@ static ssize_t hppfs_read_file(int fd, char *buf, ssize_t count)
270 out_free: 271 out_free:
271 kfree(new_buf); 272 kfree(new_buf);
272 out: 273 out:
273 return(n); 274 return n;
274} 275}
275 276
276static ssize_t hppfs_read(struct file *file, char *buf, size_t count, 277static ssize_t hppfs_read(struct file *file, char *buf, size_t count,
@@ -491,7 +492,7 @@ static int hppfs_open(struct inode *inode, struct file *file)
491 fd = open_host_sock(host_file, &filter); 492 fd = open_host_sock(host_file, &filter);
492 if(fd > 0){ 493 if(fd > 0){
493 data->contents = hppfs_get_data(fd, filter, 494 data->contents = hppfs_get_data(fd, filter,
494 &data->proc_file, 495 data->proc_file,
495 file, &data->len); 496 file, &data->len);
496 if(!IS_ERR(data->contents)) 497 if(!IS_ERR(data->contents))
497 data->host_fd = fd; 498 data->host_fd = fd;
@@ -543,7 +544,7 @@ static int hppfs_dir_open(struct inode *inode, struct file *file)
543static loff_t hppfs_llseek(struct file *file, loff_t off, int where) 544static loff_t hppfs_llseek(struct file *file, loff_t off, int where)
544{ 545{
545 struct hppfs_private *data = file->private_data; 546 struct hppfs_private *data = file->private_data;
546 struct file *proc_file = &data->proc_file; 547 struct file *proc_file = data->proc_file;
547 loff_t (*llseek)(struct file *, loff_t, int); 548 loff_t (*llseek)(struct file *, loff_t, int);
548 loff_t ret; 549 loff_t ret;
549 550
@@ -586,7 +587,7 @@ static int hppfs_filldir(void *d, const char *name, int size,
586static int hppfs_readdir(struct file *file, void *ent, filldir_t filldir) 587static int hppfs_readdir(struct file *file, void *ent, filldir_t filldir)
587{ 588{
588 struct hppfs_private *data = file->private_data; 589 struct hppfs_private *data = file->private_data;
589 struct file *proc_file = &data->proc_file; 590 struct file *proc_file = data->proc_file;
590 int (*readdir)(struct file *, void *, filldir_t); 591 int (*readdir)(struct file *, void *, filldir_t);
591 struct hppfs_dirent dirent = ((struct hppfs_dirent) 592 struct hppfs_dirent dirent = ((struct hppfs_dirent)
592 { .vfs_dirent = ent, 593 { .vfs_dirent = ent,
@@ -661,42 +662,36 @@ static int hppfs_readlink(struct dentry *dentry, char *buffer, int buflen)
661{ 662{
662 struct file *proc_file; 663 struct file *proc_file;
663 struct dentry *proc_dentry; 664 struct dentry *proc_dentry;
664 int (*readlink)(struct dentry *, char *, int); 665 int ret;
665 int err, n;
666 666
667 proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry; 667 proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
668 proc_file = dentry_open(dget(proc_dentry), NULL, O_RDONLY); 668 proc_file = dentry_open(dget(proc_dentry), NULL, O_RDONLY);
669 err = PTR_ERR(proc_dentry); 669 if (IS_ERR(proc_file))
670 if(IS_ERR(proc_dentry)) 670 return PTR_ERR(proc_file);
671 return(err);
672 671
673 readlink = proc_dentry->d_inode->i_op->readlink; 672 ret = proc_dentry->d_inode->i_op->readlink(proc_dentry, buffer, buflen);
674 n = (*readlink)(proc_dentry, buffer, buflen);
675 673
676 fput(proc_file); 674 fput(proc_file);
677 675
678 return(n); 676 return ret;
679} 677}
680 678
681static int hppfs_follow_link(struct dentry *dentry, struct nameidata *nd) 679static void* hppfs_follow_link(struct dentry *dentry, struct nameidata *nd)
682{ 680{
683 struct file *proc_file; 681 struct file *proc_file;
684 struct dentry *proc_dentry; 682 struct dentry *proc_dentry;
685 int (*follow_link)(struct dentry *, struct nameidata *); 683 void *ret;
686 int err, n;
687 684
688 proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry; 685 proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
689 proc_file = dentry_open(dget(proc_dentry), NULL, O_RDONLY); 686 proc_file = dentry_open(dget(proc_dentry), NULL, O_RDONLY);
690 err = PTR_ERR(proc_dentry); 687 if (IS_ERR(proc_file))
691 if(IS_ERR(proc_dentry)) 688 return proc_file;
692 return(err);
693 689
694 follow_link = proc_dentry->d_inode->i_op->follow_link; 690 ret = proc_dentry->d_inode->i_op->follow_link(proc_dentry, nd);
695 n = (*follow_link)(proc_dentry, nd);
696 691
697 fput(proc_file); 692 fput(proc_file);
698 693
699 return(n); 694 return ret;
700} 695}
701 696
702static struct inode_operations hppfs_dir_iops = { 697static struct inode_operations hppfs_dir_iops = {
diff --git a/fs/inode.c b/fs/inode.c
index 801fe7f36280..e57f1724db3e 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -21,6 +21,7 @@
21#include <linux/pagemap.h> 21#include <linux/pagemap.h>
22#include <linux/cdev.h> 22#include <linux/cdev.h>
23#include <linux/bootmem.h> 23#include <linux/bootmem.h>
24#include <linux/inotify.h>
24 25
25/* 26/*
26 * This is needed for the following functions: 27 * This is needed for the following functions:
@@ -202,6 +203,10 @@ void inode_init_once(struct inode *inode)
202 INIT_LIST_HEAD(&inode->i_data.i_mmap_nonlinear); 203 INIT_LIST_HEAD(&inode->i_data.i_mmap_nonlinear);
203 spin_lock_init(&inode->i_lock); 204 spin_lock_init(&inode->i_lock);
204 i_size_ordered_init(inode); 205 i_size_ordered_init(inode);
206#ifdef CONFIG_INOTIFY
207 INIT_LIST_HEAD(&inode->inotify_watches);
208 sema_init(&inode->inotify_sem, 1);
209#endif
205} 210}
206 211
207EXPORT_SYMBOL(inode_init_once); 212EXPORT_SYMBOL(inode_init_once);
@@ -282,6 +287,13 @@ static void dispose_list(struct list_head *head)
282 if (inode->i_data.nrpages) 287 if (inode->i_data.nrpages)
283 truncate_inode_pages(&inode->i_data, 0); 288 truncate_inode_pages(&inode->i_data, 0);
284 clear_inode(inode); 289 clear_inode(inode);
290
291 spin_lock(&inode_lock);
292 hlist_del_init(&inode->i_hash);
293 list_del_init(&inode->i_sb_list);
294 spin_unlock(&inode_lock);
295
296 wake_up_inode(inode);
285 destroy_inode(inode); 297 destroy_inode(inode);
286 nr_disposed++; 298 nr_disposed++;
287 } 299 }
@@ -317,8 +329,6 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose)
317 inode = list_entry(tmp, struct inode, i_sb_list); 329 inode = list_entry(tmp, struct inode, i_sb_list);
318 invalidate_inode_buffers(inode); 330 invalidate_inode_buffers(inode);
319 if (!atomic_read(&inode->i_count)) { 331 if (!atomic_read(&inode->i_count)) {
320 hlist_del_init(&inode->i_hash);
321 list_del(&inode->i_sb_list);
322 list_move(&inode->i_list, dispose); 332 list_move(&inode->i_list, dispose);
323 inode->i_state |= I_FREEING; 333 inode->i_state |= I_FREEING;
324 count++; 334 count++;
@@ -346,6 +356,7 @@ int invalidate_inodes(struct super_block * sb)
346 356
347 down(&iprune_sem); 357 down(&iprune_sem);
348 spin_lock(&inode_lock); 358 spin_lock(&inode_lock);
359 inotify_unmount_inodes(&sb->s_inodes);
349 busy = invalidate_list(&sb->s_inodes, &throw_away); 360 busy = invalidate_list(&sb->s_inodes, &throw_away);
350 spin_unlock(&inode_lock); 361 spin_unlock(&inode_lock);
351 362
@@ -439,8 +450,6 @@ static void prune_icache(int nr_to_scan)
439 if (!can_unuse(inode)) 450 if (!can_unuse(inode))
440 continue; 451 continue;
441 } 452 }
442 hlist_del_init(&inode->i_hash);
443 list_del_init(&inode->i_sb_list);
444 list_move(&inode->i_list, &freeable); 453 list_move(&inode->i_list, &freeable);
445 inode->i_state |= I_FREEING; 454 inode->i_state |= I_FREEING;
446 nr_pruned++; 455 nr_pruned++;
@@ -500,7 +509,7 @@ repeat:
500 continue; 509 continue;
501 if (!test(inode, data)) 510 if (!test(inode, data))
502 continue; 511 continue;
503 if (inode->i_state & (I_FREEING|I_CLEAR)) { 512 if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) {
504 __wait_on_freeing_inode(inode); 513 __wait_on_freeing_inode(inode);
505 goto repeat; 514 goto repeat;
506 } 515 }
@@ -525,7 +534,7 @@ repeat:
525 continue; 534 continue;
526 if (inode->i_sb != sb) 535 if (inode->i_sb != sb)
527 continue; 536 continue;
528 if (inode->i_state & (I_FREEING|I_CLEAR)) { 537 if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) {
529 __wait_on_freeing_inode(inode); 538 __wait_on_freeing_inode(inode);
530 goto repeat; 539 goto repeat;
531 } 540 }
@@ -727,7 +736,7 @@ EXPORT_SYMBOL(iunique);
727struct inode *igrab(struct inode *inode) 736struct inode *igrab(struct inode *inode)
728{ 737{
729 spin_lock(&inode_lock); 738 spin_lock(&inode_lock);
730 if (!(inode->i_state & I_FREEING)) 739 if (!(inode->i_state & (I_FREEING|I_WILL_FREE)))
731 __iget(inode); 740 __iget(inode);
732 else 741 else
733 /* 742 /*
@@ -748,6 +757,7 @@ EXPORT_SYMBOL(igrab);
748 * @head: the head of the list to search 757 * @head: the head of the list to search
749 * @test: callback used for comparisons between inodes 758 * @test: callback used for comparisons between inodes
750 * @data: opaque data pointer to pass to @test 759 * @data: opaque data pointer to pass to @test
760 * @wait: if true wait for the inode to be unlocked, if false do not
751 * 761 *
752 * ifind() searches for the inode specified by @data in the inode 762 * ifind() searches for the inode specified by @data in the inode
753 * cache. This is a generalized version of ifind_fast() for file systems where 763 * cache. This is a generalized version of ifind_fast() for file systems where
@@ -762,7 +772,7 @@ EXPORT_SYMBOL(igrab);
762 */ 772 */
763static inline struct inode *ifind(struct super_block *sb, 773static inline struct inode *ifind(struct super_block *sb,
764 struct hlist_head *head, int (*test)(struct inode *, void *), 774 struct hlist_head *head, int (*test)(struct inode *, void *),
765 void *data) 775 void *data, const int wait)
766{ 776{
767 struct inode *inode; 777 struct inode *inode;
768 778
@@ -771,7 +781,8 @@ static inline struct inode *ifind(struct super_block *sb,
771 if (inode) { 781 if (inode) {
772 __iget(inode); 782 __iget(inode);
773 spin_unlock(&inode_lock); 783 spin_unlock(&inode_lock);
774 wait_on_inode(inode); 784 if (likely(wait))
785 wait_on_inode(inode);
775 return inode; 786 return inode;
776 } 787 }
777 spin_unlock(&inode_lock); 788 spin_unlock(&inode_lock);
@@ -811,7 +822,7 @@ static inline struct inode *ifind_fast(struct super_block *sb,
811} 822}
812 823
813/** 824/**
814 * ilookup5 - search for an inode in the inode cache 825 * ilookup5_nowait - search for an inode in the inode cache
815 * @sb: super block of file system to search 826 * @sb: super block of file system to search
816 * @hashval: hash value (usually inode number) to search for 827 * @hashval: hash value (usually inode number) to search for
817 * @test: callback used for comparisons between inodes 828 * @test: callback used for comparisons between inodes
@@ -823,7 +834,38 @@ static inline struct inode *ifind_fast(struct super_block *sb,
823 * identification of an inode. 834 * identification of an inode.
824 * 835 *
825 * If the inode is in the cache, the inode is returned with an incremented 836 * If the inode is in the cache, the inode is returned with an incremented
826 * reference count. 837 * reference count. Note, the inode lock is not waited upon so you have to be
838 * very careful what you do with the returned inode. You probably should be
839 * using ilookup5() instead.
840 *
841 * Otherwise NULL is returned.
842 *
843 * Note, @test is called with the inode_lock held, so can't sleep.
844 */
845struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
846 int (*test)(struct inode *, void *), void *data)
847{
848 struct hlist_head *head = inode_hashtable + hash(sb, hashval);
849
850 return ifind(sb, head, test, data, 0);
851}
852
853EXPORT_SYMBOL(ilookup5_nowait);
854
855/**
856 * ilookup5 - search for an inode in the inode cache
857 * @sb: super block of file system to search
858 * @hashval: hash value (usually inode number) to search for
859 * @test: callback used for comparisons between inodes
860 * @data: opaque data pointer to pass to @test
861 *
862 * ilookup5() uses ifind() to search for the inode specified by @hashval and
863 * @data in the inode cache. This is a generalized version of ilookup() for
864 * file systems where the inode number is not sufficient for unique
865 * identification of an inode.
866 *
867 * If the inode is in the cache, the inode lock is waited upon and the inode is
868 * returned with an incremented reference count.
827 * 869 *
828 * Otherwise NULL is returned. 870 * Otherwise NULL is returned.
829 * 871 *
@@ -834,7 +876,7 @@ struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
834{ 876{
835 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 877 struct hlist_head *head = inode_hashtable + hash(sb, hashval);
836 878
837 return ifind(sb, head, test, data); 879 return ifind(sb, head, test, data, 1);
838} 880}
839 881
840EXPORT_SYMBOL(ilookup5); 882EXPORT_SYMBOL(ilookup5);
@@ -891,7 +933,7 @@ struct inode *iget5_locked(struct super_block *sb, unsigned long hashval,
891 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 933 struct hlist_head *head = inode_hashtable + hash(sb, hashval);
892 struct inode *inode; 934 struct inode *inode;
893 935
894 inode = ifind(sb, head, test, data); 936 inode = ifind(sb, head, test, data, 1);
895 if (inode) 937 if (inode)
896 return inode; 938 return inode;
897 /* 939 /*
@@ -1024,17 +1066,21 @@ static void generic_forget_inode(struct inode *inode)
1024 if (!(inode->i_state & (I_DIRTY|I_LOCK))) 1066 if (!(inode->i_state & (I_DIRTY|I_LOCK)))
1025 list_move(&inode->i_list, &inode_unused); 1067 list_move(&inode->i_list, &inode_unused);
1026 inodes_stat.nr_unused++; 1068 inodes_stat.nr_unused++;
1027 spin_unlock(&inode_lock); 1069 if (!sb || (sb->s_flags & MS_ACTIVE)) {
1028 if (!sb || (sb->s_flags & MS_ACTIVE)) 1070 spin_unlock(&inode_lock);
1029 return; 1071 return;
1072 }
1073 inode->i_state |= I_WILL_FREE;
1074 spin_unlock(&inode_lock);
1030 write_inode_now(inode, 1); 1075 write_inode_now(inode, 1);
1031 spin_lock(&inode_lock); 1076 spin_lock(&inode_lock);
1077 inode->i_state &= ~I_WILL_FREE;
1032 inodes_stat.nr_unused--; 1078 inodes_stat.nr_unused--;
1033 hlist_del_init(&inode->i_hash); 1079 hlist_del_init(&inode->i_hash);
1034 } 1080 }
1035 list_del_init(&inode->i_list); 1081 list_del_init(&inode->i_list);
1036 list_del_init(&inode->i_sb_list); 1082 list_del_init(&inode->i_sb_list);
1037 inode->i_state|=I_FREEING; 1083 inode->i_state |= I_FREEING;
1038 inodes_stat.nr_inodes--; 1084 inodes_stat.nr_inodes--;
1039 spin_unlock(&inode_lock); 1085 spin_unlock(&inode_lock);
1040 if (inode->i_data.nrpages) 1086 if (inode->i_data.nrpages)
@@ -1048,7 +1094,7 @@ static void generic_forget_inode(struct inode *inode)
1048 * inode when the usage count drops to zero, and 1094 * inode when the usage count drops to zero, and
1049 * i_nlink is zero. 1095 * i_nlink is zero.
1050 */ 1096 */
1051static void generic_drop_inode(struct inode *inode) 1097void generic_drop_inode(struct inode *inode)
1052{ 1098{
1053 if (!inode->i_nlink) 1099 if (!inode->i_nlink)
1054 generic_delete_inode(inode); 1100 generic_delete_inode(inode);
@@ -1056,6 +1102,8 @@ static void generic_drop_inode(struct inode *inode)
1056 generic_forget_inode(inode); 1102 generic_forget_inode(inode);
1057} 1103}
1058 1104
1105EXPORT_SYMBOL_GPL(generic_drop_inode);
1106
1059/* 1107/*
1060 * Called when we're dropping the last reference 1108 * Called when we're dropping the last reference
1061 * to an inode. 1109 * to an inode.
@@ -1238,29 +1286,21 @@ int inode_wait(void *word)
1238} 1286}
1239 1287
1240/* 1288/*
1241 * If we try to find an inode in the inode hash while it is being deleted, we 1289 * If we try to find an inode in the inode hash while it is being
1242 * have to wait until the filesystem completes its deletion before reporting 1290 * deleted, we have to wait until the filesystem completes its
1243 * that it isn't found. This is because iget will immediately call 1291 * deletion before reporting that it isn't found. This function waits
1244 * ->read_inode, and we want to be sure that evidence of the deletion is found 1292 * until the deletion _might_ have completed. Callers are responsible
1245 * by ->read_inode. 1293 * to recheck inode state.
1294 *
1295 * It doesn't matter if I_LOCK is not set initially, a call to
1296 * wake_up_inode() after removing from the hash list will DTRT.
1297 *
1246 * This is called with inode_lock held. 1298 * This is called with inode_lock held.
1247 */ 1299 */
1248static void __wait_on_freeing_inode(struct inode *inode) 1300static void __wait_on_freeing_inode(struct inode *inode)
1249{ 1301{
1250 wait_queue_head_t *wq; 1302 wait_queue_head_t *wq;
1251 DEFINE_WAIT_BIT(wait, &inode->i_state, __I_LOCK); 1303 DEFINE_WAIT_BIT(wait, &inode->i_state, __I_LOCK);
1252
1253 /*
1254 * I_FREEING and I_CLEAR are cleared in process context under
1255 * inode_lock, so we have to give the tasks who would clear them
1256 * a chance to run and acquire inode_lock.
1257 */
1258 if (!(inode->i_state & I_LOCK)) {
1259 spin_unlock(&inode_lock);
1260 yield();
1261 spin_lock(&inode_lock);
1262 return;
1263 }
1264 wq = bit_waitqueue(&inode->i_state, __I_LOCK); 1304 wq = bit_waitqueue(&inode->i_state, __I_LOCK);
1265 prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); 1305 prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
1266 spin_unlock(&inode_lock); 1306 spin_unlock(&inode_lock);
diff --git a/fs/inotify.c b/fs/inotify.c
new file mode 100644
index 000000000000..2e4e2a57708c
--- /dev/null
+++ b/fs/inotify.c
@@ -0,0 +1,1057 @@
1/*
2 * fs/inotify.c - inode-based file event notifications
3 *
4 * Authors:
5 * John McCutchan <ttb@tentacle.dhs.org>
6 * Robert Love <rml@novell.com>
7 *
8 * Copyright (C) 2005 John McCutchan
9 *
10 * This program is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU General Public License as published by the
12 * Free Software Foundation; either version 2, or (at your option) any
13 * later version.
14 *
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 */
20
21#include <linux/module.h>
22#include <linux/kernel.h>
23#include <linux/sched.h>
24#include <linux/spinlock.h>
25#include <linux/idr.h>
26#include <linux/slab.h>
27#include <linux/fs.h>
28#include <linux/file.h>
29#include <linux/mount.h>
30#include <linux/namei.h>
31#include <linux/poll.h>
32#include <linux/init.h>
33#include <linux/list.h>
34#include <linux/writeback.h>
35#include <linux/inotify.h>
36
37#include <asm/ioctls.h>
38
39static atomic_t inotify_cookie;
40
41static kmem_cache_t *watch_cachep;
42static kmem_cache_t *event_cachep;
43
44static struct vfsmount *inotify_mnt;
45
46/* these are configurable via /proc/sys/fs/inotify/ */
47int inotify_max_user_instances;
48int inotify_max_user_watches;
49int inotify_max_queued_events;
50
51/*
52 * Lock ordering:
53 *
54 * dentry->d_lock (used to keep d_move() away from dentry->d_parent)
55 * iprune_sem (synchronize shrink_icache_memory())
56 * inode_lock (protects the super_block->s_inodes list)
57 * inode->inotify_sem (protects inode->inotify_watches and watches->i_list)
58 * inotify_dev->sem (protects inotify_device and watches->d_list)
59 */
60
61/*
62 * Lifetimes of the three main data structures--inotify_device, inode, and
63 * inotify_watch--are managed by reference count.
64 *
65 * inotify_device: Lifetime is from inotify_init() until release. Additional
66 * references can bump the count via get_inotify_dev() and drop the count via
67 * put_inotify_dev().
68 *
69 * inotify_watch: Lifetime is from create_watch() to destory_watch().
70 * Additional references can bump the count via get_inotify_watch() and drop
71 * the count via put_inotify_watch().
72 *
73 * inode: Pinned so long as the inode is associated with a watch, from
74 * create_watch() to put_inotify_watch().
75 */
76
77/*
78 * struct inotify_device - represents an inotify instance
79 *
80 * This structure is protected by the semaphore 'sem'.
81 */
82struct inotify_device {
83 wait_queue_head_t wq; /* wait queue for i/o */
84 struct idr idr; /* idr mapping wd -> watch */
85 struct semaphore sem; /* protects this bad boy */
86 struct list_head events; /* list of queued events */
87 struct list_head watches; /* list of watches */
88 atomic_t count; /* reference count */
89 struct user_struct *user; /* user who opened this dev */
90 unsigned int queue_size; /* size of the queue (bytes) */
91 unsigned int event_count; /* number of pending events */
92 unsigned int max_events; /* maximum number of events */
93 u32 last_wd; /* the last wd allocated */
94};
95
96/*
97 * struct inotify_kernel_event - An inotify event, originating from a watch and
98 * queued for user-space. A list of these is attached to each instance of the
99 * device. In read(), this list is walked and all events that can fit in the
100 * buffer are returned.
101 *
102 * Protected by dev->sem of the device in which we are queued.
103 */
104struct inotify_kernel_event {
105 struct inotify_event event; /* the user-space event */
106 struct list_head list; /* entry in inotify_device's list */
107 char *name; /* filename, if any */
108};
109
110/*
111 * struct inotify_watch - represents a watch request on a specific inode
112 *
113 * d_list is protected by dev->sem of the associated watch->dev.
114 * i_list and mask are protected by inode->inotify_sem of the associated inode.
115 * dev, inode, and wd are never written to once the watch is created.
116 */
117struct inotify_watch {
118 struct list_head d_list; /* entry in inotify_device's list */
119 struct list_head i_list; /* entry in inode's list */
120 atomic_t count; /* reference count */
121 struct inotify_device *dev; /* associated device */
122 struct inode *inode; /* associated inode */
123 s32 wd; /* watch descriptor */
124 u32 mask; /* event mask for this watch */
125};
126
127#ifdef CONFIG_SYSCTL
128
129#include <linux/sysctl.h>
130
131static int zero;
132
133ctl_table inotify_table[] = {
134 {
135 .ctl_name = INOTIFY_MAX_USER_INSTANCES,
136 .procname = "max_user_instances",
137 .data = &inotify_max_user_instances,
138 .maxlen = sizeof(int),
139 .mode = 0644,
140 .proc_handler = &proc_dointvec_minmax,
141 .strategy = &sysctl_intvec,
142 .extra1 = &zero,
143 },
144 {
145 .ctl_name = INOTIFY_MAX_USER_WATCHES,
146 .procname = "max_user_watches",
147 .data = &inotify_max_user_watches,
148 .maxlen = sizeof(int),
149 .mode = 0644,
150 .proc_handler = &proc_dointvec_minmax,
151 .strategy = &sysctl_intvec,
152 .extra1 = &zero,
153 },
154 {
155 .ctl_name = INOTIFY_MAX_QUEUED_EVENTS,
156 .procname = "max_queued_events",
157 .data = &inotify_max_queued_events,
158 .maxlen = sizeof(int),
159 .mode = 0644,
160 .proc_handler = &proc_dointvec_minmax,
161 .strategy = &sysctl_intvec,
162 .extra1 = &zero
163 },
164 { .ctl_name = 0 }
165};
166#endif /* CONFIG_SYSCTL */
167
168static inline void get_inotify_dev(struct inotify_device *dev)
169{
170 atomic_inc(&dev->count);
171}
172
173static inline void put_inotify_dev(struct inotify_device *dev)
174{
175 if (atomic_dec_and_test(&dev->count)) {
176 atomic_dec(&dev->user->inotify_devs);
177 free_uid(dev->user);
178 kfree(dev);
179 }
180}
181
182static inline void get_inotify_watch(struct inotify_watch *watch)
183{
184 atomic_inc(&watch->count);
185}
186
187/*
188 * put_inotify_watch - decrements the ref count on a given watch. cleans up
189 * the watch and its references if the count reaches zero.
190 */
191static inline void put_inotify_watch(struct inotify_watch *watch)
192{
193 if (atomic_dec_and_test(&watch->count)) {
194 put_inotify_dev(watch->dev);
195 iput(watch->inode);
196 kmem_cache_free(watch_cachep, watch);
197 }
198}
199
200/*
201 * kernel_event - create a new kernel event with the given parameters
202 *
203 * This function can sleep.
204 */
205static struct inotify_kernel_event * kernel_event(s32 wd, u32 mask, u32 cookie,
206 const char *name)
207{
208 struct inotify_kernel_event *kevent;
209
210 kevent = kmem_cache_alloc(event_cachep, GFP_KERNEL);
211 if (unlikely(!kevent))
212 return NULL;
213
214 /* we hand this out to user-space, so zero it just in case */
215 memset(&kevent->event, 0, sizeof(struct inotify_event));
216
217 kevent->event.wd = wd;
218 kevent->event.mask = mask;
219 kevent->event.cookie = cookie;
220
221 INIT_LIST_HEAD(&kevent->list);
222
223 if (name) {
224 size_t len, rem, event_size = sizeof(struct inotify_event);
225
226 /*
227 * We need to pad the filename so as to properly align an
228 * array of inotify_event structures. Because the structure is
229 * small and the common case is a small filename, we just round
230 * up to the next multiple of the structure's sizeof. This is
231 * simple and safe for all architectures.
232 */
233 len = strlen(name) + 1;
234 rem = event_size - len;
235 if (len > event_size) {
236 rem = event_size - (len % event_size);
237 if (len % event_size == 0)
238 rem = 0;
239 }
240
241 kevent->name = kmalloc(len + rem, GFP_KERNEL);
242 if (unlikely(!kevent->name)) {
243 kmem_cache_free(event_cachep, kevent);
244 return NULL;
245 }
246 memcpy(kevent->name, name, len);
247 if (rem)
248 memset(kevent->name + len, 0, rem);
249 kevent->event.len = len + rem;
250 } else {
251 kevent->event.len = 0;
252 kevent->name = NULL;
253 }
254
255 return kevent;
256}
257
258/*
259 * inotify_dev_get_event - return the next event in the given dev's queue
260 *
261 * Caller must hold dev->sem.
262 */
263static inline struct inotify_kernel_event *
264inotify_dev_get_event(struct inotify_device *dev)
265{
266 return list_entry(dev->events.next, struct inotify_kernel_event, list);
267}
268
269/*
270 * inotify_dev_queue_event - add a new event to the given device
271 *
272 * Caller must hold dev->sem. Can sleep (calls kernel_event()).
273 */
274static void inotify_dev_queue_event(struct inotify_device *dev,
275 struct inotify_watch *watch, u32 mask,
276 u32 cookie, const char *name)
277{
278 struct inotify_kernel_event *kevent, *last;
279
280 /* coalescing: drop this event if it is a dupe of the previous */
281 last = inotify_dev_get_event(dev);
282 if (last && last->event.mask == mask && last->event.wd == watch->wd &&
283 last->event.cookie == cookie) {
284 const char *lastname = last->name;
285
286 if (!name && !lastname)
287 return;
288 if (name && lastname && !strcmp(lastname, name))
289 return;
290 }
291
292 /* the queue overflowed and we already sent the Q_OVERFLOW event */
293 if (unlikely(dev->event_count > dev->max_events))
294 return;
295
296 /* if the queue overflows, we need to notify user space */
297 if (unlikely(dev->event_count == dev->max_events))
298 kevent = kernel_event(-1, IN_Q_OVERFLOW, cookie, NULL);
299 else
300 kevent = kernel_event(watch->wd, mask, cookie, name);
301
302 if (unlikely(!kevent))
303 return;
304
305 /* queue the event and wake up anyone waiting */
306 dev->event_count++;
307 dev->queue_size += sizeof(struct inotify_event) + kevent->event.len;
308 list_add_tail(&kevent->list, &dev->events);
309 wake_up_interruptible(&dev->wq);
310}
311
312/*
313 * remove_kevent - cleans up and ultimately frees the given kevent
314 *
315 * Caller must hold dev->sem.
316 */
317static void remove_kevent(struct inotify_device *dev,
318 struct inotify_kernel_event *kevent)
319{
320 list_del(&kevent->list);
321
322 dev->event_count--;
323 dev->queue_size -= sizeof(struct inotify_event) + kevent->event.len;
324
325 kfree(kevent->name);
326 kmem_cache_free(event_cachep, kevent);
327}
328
329/*
330 * inotify_dev_event_dequeue - destroy an event on the given device
331 *
332 * Caller must hold dev->sem.
333 */
334static void inotify_dev_event_dequeue(struct inotify_device *dev)
335{
336 if (!list_empty(&dev->events)) {
337 struct inotify_kernel_event *kevent;
338 kevent = inotify_dev_get_event(dev);
339 remove_kevent(dev, kevent);
340 }
341}
342
343/*
344 * inotify_dev_get_wd - returns the next WD for use by the given dev
345 *
346 * Callers must hold dev->sem. This function can sleep.
347 */
348static int inotify_dev_get_wd(struct inotify_device *dev,
349 struct inotify_watch *watch)
350{
351 int ret;
352
353 do {
354 if (unlikely(!idr_pre_get(&dev->idr, GFP_KERNEL)))
355 return -ENOSPC;
356 ret = idr_get_new_above(&dev->idr, watch, dev->last_wd+1, &watch->wd);
357 } while (ret == -EAGAIN);
358
359 return ret;
360}
361
362/*
363 * find_inode - resolve a user-given path to a specific inode and return a nd
364 */
365static int find_inode(const char __user *dirname, struct nameidata *nd)
366{
367 int error;
368
369 error = __user_walk(dirname, LOOKUP_FOLLOW, nd);
370 if (error)
371 return error;
372 /* you can only watch an inode if you have read permissions on it */
373 error = permission(nd->dentry->d_inode, MAY_READ, NULL);
374 if (error)
375 path_release(nd);
376 return error;
377}
378
379/*
380 * create_watch - creates a watch on the given device.
381 *
382 * Callers must hold dev->sem. Calls inotify_dev_get_wd() so may sleep.
383 * Both 'dev' and 'inode' (by way of nameidata) need to be pinned.
384 */
385static struct inotify_watch *create_watch(struct inotify_device *dev,
386 u32 mask, struct inode *inode)
387{
388 struct inotify_watch *watch;
389 int ret;
390
391 if (atomic_read(&dev->user->inotify_watches) >=
392 inotify_max_user_watches)
393 return ERR_PTR(-ENOSPC);
394
395 watch = kmem_cache_alloc(watch_cachep, GFP_KERNEL);
396 if (unlikely(!watch))
397 return ERR_PTR(-ENOMEM);
398
399 ret = inotify_dev_get_wd(dev, watch);
400 if (unlikely(ret)) {
401 kmem_cache_free(watch_cachep, watch);
402 return ERR_PTR(ret);
403 }
404
405 dev->last_wd = watch->wd;
406 watch->mask = mask;
407 atomic_set(&watch->count, 0);
408 INIT_LIST_HEAD(&watch->d_list);
409 INIT_LIST_HEAD(&watch->i_list);
410
411 /* save a reference to device and bump the count to make it official */
412 get_inotify_dev(dev);
413 watch->dev = dev;
414
415 /*
416 * Save a reference to the inode and bump the ref count to make it
417 * official. We hold a reference to nameidata, which makes this safe.
418 */
419 watch->inode = igrab(inode);
420
421 /* bump our own count, corresponding to our entry in dev->watches */
422 get_inotify_watch(watch);
423
424 atomic_inc(&dev->user->inotify_watches);
425
426 return watch;
427}
428
429/*
430 * inotify_find_dev - find the watch associated with the given inode and dev
431 *
432 * Callers must hold inode->inotify_sem.
433 */
434static struct inotify_watch *inode_find_dev(struct inode *inode,
435 struct inotify_device *dev)
436{
437 struct inotify_watch *watch;
438
439 list_for_each_entry(watch, &inode->inotify_watches, i_list) {
440 if (watch->dev == dev)
441 return watch;
442 }
443
444 return NULL;
445}
446
447/*
448 * remove_watch_no_event - remove_watch() without the IN_IGNORED event.
449 */
450static void remove_watch_no_event(struct inotify_watch *watch,
451 struct inotify_device *dev)
452{
453 list_del(&watch->i_list);
454 list_del(&watch->d_list);
455
456 atomic_dec(&dev->user->inotify_watches);
457 idr_remove(&dev->idr, watch->wd);
458 put_inotify_watch(watch);
459}
460
461/*
462 * remove_watch - Remove a watch from both the device and the inode. Sends
463 * the IN_IGNORED event to the given device signifying that the inode is no
464 * longer watched.
465 *
466 * Callers must hold both inode->inotify_sem and dev->sem. We drop a
467 * reference to the inode before returning.
468 *
469 * The inode is not iput() so as to remain atomic. If the inode needs to be
470 * iput(), the call returns one. Otherwise, it returns zero.
471 */
472static void remove_watch(struct inotify_watch *watch,struct inotify_device *dev)
473{
474 inotify_dev_queue_event(dev, watch, IN_IGNORED, 0, NULL);
475 remove_watch_no_event(watch, dev);
476}
477
478/*
479 * inotify_inode_watched - returns nonzero if there are watches on this inode
480 * and zero otherwise. We call this lockless, we do not care if we race.
481 */
482static inline int inotify_inode_watched(struct inode *inode)
483{
484 return !list_empty(&inode->inotify_watches);
485}
486
487/* Kernel API */
488
489/**
490 * inotify_inode_queue_event - queue an event to all watches on this inode
491 * @inode: inode event is originating from
492 * @mask: event mask describing this event
493 * @cookie: cookie for synchronization, or zero
494 * @name: filename, if any
495 */
496void inotify_inode_queue_event(struct inode *inode, u32 mask, u32 cookie,
497 const char *name)
498{
499 struct inotify_watch *watch, *next;
500
501 if (!inotify_inode_watched(inode))
502 return;
503
504 down(&inode->inotify_sem);
505 list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) {
506 u32 watch_mask = watch->mask;
507 if (watch_mask & mask) {
508 struct inotify_device *dev = watch->dev;
509 get_inotify_watch(watch);
510 down(&dev->sem);
511 inotify_dev_queue_event(dev, watch, mask, cookie, name);
512 if (watch_mask & IN_ONESHOT)
513 remove_watch_no_event(watch, dev);
514 up(&dev->sem);
515 put_inotify_watch(watch);
516 }
517 }
518 up(&inode->inotify_sem);
519}
520EXPORT_SYMBOL_GPL(inotify_inode_queue_event);
521
522/**
523 * inotify_dentry_parent_queue_event - queue an event to a dentry's parent
524 * @dentry: the dentry in question, we queue against this dentry's parent
525 * @mask: event mask describing this event
526 * @cookie: cookie for synchronization, or zero
527 * @name: filename, if any
528 */
529void inotify_dentry_parent_queue_event(struct dentry *dentry, u32 mask,
530 u32 cookie, const char *name)
531{
532 struct dentry *parent;
533 struct inode *inode;
534
535 spin_lock(&dentry->d_lock);
536 parent = dentry->d_parent;
537 inode = parent->d_inode;
538
539 if (inotify_inode_watched(inode)) {
540 dget(parent);
541 spin_unlock(&dentry->d_lock);
542 inotify_inode_queue_event(inode, mask, cookie, name);
543 dput(parent);
544 } else
545 spin_unlock(&dentry->d_lock);
546}
547EXPORT_SYMBOL_GPL(inotify_dentry_parent_queue_event);
548
549/**
550 * inotify_get_cookie - return a unique cookie for use in synchronizing events.
551 */
552u32 inotify_get_cookie(void)
553{
554 return atomic_inc_return(&inotify_cookie);
555}
556EXPORT_SYMBOL_GPL(inotify_get_cookie);
557
558/**
559 * inotify_unmount_inodes - an sb is unmounting. handle any watched inodes.
560 * @list: list of inodes being unmounted (sb->s_inodes)
561 *
562 * Called with inode_lock held, protecting the unmounting super block's list
563 * of inodes, and with iprune_sem held, keeping shrink_icache_memory() at bay.
564 * We temporarily drop inode_lock, however, and CAN block.
565 */
566void inotify_unmount_inodes(struct list_head *list)
567{
568 struct inode *inode, *next_i, *need_iput = NULL;
569
570 list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
571 struct inotify_watch *watch, *next_w;
572 struct inode *need_iput_tmp;
573 struct list_head *watches;
574
575 /*
576 * If i_count is zero, the inode cannot have any watches and
577 * doing an __iget/iput with MS_ACTIVE clear would actually
578 * evict all inodes with zero i_count from icache which is
579 * unnecessarily violent and may in fact be illegal to do.
580 */
581 if (!atomic_read(&inode->i_count))
582 continue;
583
584 /*
585 * We cannot __iget() an inode in state I_CLEAR, I_FREEING, or
586 * I_WILL_FREE which is fine because by that point the inode
587 * cannot have any associated watches.
588 */
589 if (inode->i_state & (I_CLEAR | I_FREEING | I_WILL_FREE))
590 continue;
591
592 need_iput_tmp = need_iput;
593 need_iput = NULL;
594 /* In case the remove_watch() drops a reference. */
595 if (inode != need_iput_tmp)
596 __iget(inode);
597 else
598 need_iput_tmp = NULL;
599 /* In case the dropping of a reference would nuke next_i. */
600 if ((&next_i->i_sb_list != list) &&
601 atomic_read(&next_i->i_count) &&
602 !(next_i->i_state & (I_CLEAR | I_FREEING |
603 I_WILL_FREE))) {
604 __iget(next_i);
605 need_iput = next_i;
606 }
607
608 /*
609 * We can safely drop inode_lock here because we hold
610 * references on both inode and next_i. Also no new inodes
611 * will be added since the umount has begun. Finally,
612 * iprune_sem keeps shrink_icache_memory() away.
613 */
614 spin_unlock(&inode_lock);
615
616 if (need_iput_tmp)
617 iput(need_iput_tmp);
618
619 /* for each watch, send IN_UNMOUNT and then remove it */
620 down(&inode->inotify_sem);
621 watches = &inode->inotify_watches;
622 list_for_each_entry_safe(watch, next_w, watches, i_list) {
623 struct inotify_device *dev = watch->dev;
624 down(&dev->sem);
625 inotify_dev_queue_event(dev, watch, IN_UNMOUNT,0,NULL);
626 remove_watch(watch, dev);
627 up(&dev->sem);
628 }
629 up(&inode->inotify_sem);
630 iput(inode);
631
632 spin_lock(&inode_lock);
633 }
634}
635EXPORT_SYMBOL_GPL(inotify_unmount_inodes);
636
637/**
638 * inotify_inode_is_dead - an inode has been deleted, cleanup any watches
639 * @inode: inode that is about to be removed
640 */
641void inotify_inode_is_dead(struct inode *inode)
642{
643 struct inotify_watch *watch, *next;
644
645 down(&inode->inotify_sem);
646 list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) {
647 struct inotify_device *dev = watch->dev;
648 down(&dev->sem);
649 remove_watch(watch, dev);
650 up(&dev->sem);
651 }
652 up(&inode->inotify_sem);
653}
654EXPORT_SYMBOL_GPL(inotify_inode_is_dead);
655
656/* Device Interface */
657
658static unsigned int inotify_poll(struct file *file, poll_table *wait)
659{
660 struct inotify_device *dev = file->private_data;
661 int ret = 0;
662
663 poll_wait(file, &dev->wq, wait);
664 down(&dev->sem);
665 if (!list_empty(&dev->events))
666 ret = POLLIN | POLLRDNORM;
667 up(&dev->sem);
668
669 return ret;
670}
671
672static ssize_t inotify_read(struct file *file, char __user *buf,
673 size_t count, loff_t *pos)
674{
675 size_t event_size = sizeof (struct inotify_event);
676 struct inotify_device *dev;
677 char __user *start;
678 int ret;
679 DEFINE_WAIT(wait);
680
681 start = buf;
682 dev = file->private_data;
683
684 while (1) {
685 int events;
686
687 prepare_to_wait(&dev->wq, &wait, TASK_INTERRUPTIBLE);
688
689 down(&dev->sem);
690 events = !list_empty(&dev->events);
691 up(&dev->sem);
692 if (events) {
693 ret = 0;
694 break;
695 }
696
697 if (file->f_flags & O_NONBLOCK) {
698 ret = -EAGAIN;
699 break;
700 }
701
702 if (signal_pending(current)) {
703 ret = -EINTR;
704 break;
705 }
706
707 schedule();
708 }
709
710 finish_wait(&dev->wq, &wait);
711 if (ret)
712 return ret;
713
714 down(&dev->sem);
715 while (1) {
716 struct inotify_kernel_event *kevent;
717
718 ret = buf - start;
719 if (list_empty(&dev->events))
720 break;
721
722 kevent = inotify_dev_get_event(dev);
723 if (event_size + kevent->event.len > count)
724 break;
725
726 if (copy_to_user(buf, &kevent->event, event_size)) {
727 ret = -EFAULT;
728 break;
729 }
730 buf += event_size;
731 count -= event_size;
732
733 if (kevent->name) {
734 if (copy_to_user(buf, kevent->name, kevent->event.len)){
735 ret = -EFAULT;
736 break;
737 }
738 buf += kevent->event.len;
739 count -= kevent->event.len;
740 }
741
742 remove_kevent(dev, kevent);
743 }
744 up(&dev->sem);
745
746 return ret;
747}
748
749static int inotify_release(struct inode *ignored, struct file *file)
750{
751 struct inotify_device *dev = file->private_data;
752
753 /*
754 * Destroy all of the watches on this device. Unfortunately, not very
755 * pretty. We cannot do a simple iteration over the list, because we
756 * do not know the inode until we iterate to the watch. But we need to
757 * hold inode->inotify_sem before dev->sem. The following works.
758 */
759 while (1) {
760 struct inotify_watch *watch;
761 struct list_head *watches;
762 struct inode *inode;
763
764 down(&dev->sem);
765 watches = &dev->watches;
766 if (list_empty(watches)) {
767 up(&dev->sem);
768 break;
769 }
770 watch = list_entry(watches->next, struct inotify_watch, d_list);
771 get_inotify_watch(watch);
772 up(&dev->sem);
773
774 inode = watch->inode;
775 down(&inode->inotify_sem);
776 down(&dev->sem);
777 remove_watch_no_event(watch, dev);
778 up(&dev->sem);
779 up(&inode->inotify_sem);
780 put_inotify_watch(watch);
781 }
782
783 /* destroy all of the events on this device */
784 down(&dev->sem);
785 while (!list_empty(&dev->events))
786 inotify_dev_event_dequeue(dev);
787 up(&dev->sem);
788
789 /* free this device: the put matching the get in inotify_init() */
790 put_inotify_dev(dev);
791
792 return 0;
793}
794
795/*
796 * inotify_ignore - remove a given wd from this inotify instance.
797 *
798 * Can sleep.
799 */
800static int inotify_ignore(struct inotify_device *dev, s32 wd)
801{
802 struct inotify_watch *watch;
803 struct inode *inode;
804
805 down(&dev->sem);
806 watch = idr_find(&dev->idr, wd);
807 if (unlikely(!watch)) {
808 up(&dev->sem);
809 return -EINVAL;
810 }
811 get_inotify_watch(watch);
812 inode = watch->inode;
813 up(&dev->sem);
814
815 down(&inode->inotify_sem);
816 down(&dev->sem);
817
818 /* make sure that we did not race */
819 watch = idr_find(&dev->idr, wd);
820 if (likely(watch))
821 remove_watch(watch, dev);
822
823 up(&dev->sem);
824 up(&inode->inotify_sem);
825 put_inotify_watch(watch);
826
827 return 0;
828}
829
830static long inotify_ioctl(struct file *file, unsigned int cmd,
831 unsigned long arg)
832{
833 struct inotify_device *dev;
834 void __user *p;
835 int ret = -ENOTTY;
836
837 dev = file->private_data;
838 p = (void __user *) arg;
839
840 switch (cmd) {
841 case FIONREAD:
842 ret = put_user(dev->queue_size, (int __user *) p);
843 break;
844 }
845
846 return ret;
847}
848
849static struct file_operations inotify_fops = {
850 .poll = inotify_poll,
851 .read = inotify_read,
852 .release = inotify_release,
853 .unlocked_ioctl = inotify_ioctl,
854 .compat_ioctl = inotify_ioctl,
855};
856
857asmlinkage long sys_inotify_init(void)
858{
859 struct inotify_device *dev;
860 struct user_struct *user;
861 struct file *filp;
862 int fd, ret;
863
864 fd = get_unused_fd();
865 if (fd < 0)
866 return fd;
867
868 filp = get_empty_filp();
869 if (!filp) {
870 ret = -ENFILE;
871 goto out_put_fd;
872 }
873
874 user = get_uid(current->user);
875 if (unlikely(atomic_read(&user->inotify_devs) >=
876 inotify_max_user_instances)) {
877 ret = -EMFILE;
878 goto out_free_uid;
879 }
880
881 dev = kmalloc(sizeof(struct inotify_device), GFP_KERNEL);
882 if (unlikely(!dev)) {
883 ret = -ENOMEM;
884 goto out_free_uid;
885 }
886
887 filp->f_op = &inotify_fops;
888 filp->f_vfsmnt = mntget(inotify_mnt);
889 filp->f_dentry = dget(inotify_mnt->mnt_root);
890 filp->f_mapping = filp->f_dentry->d_inode->i_mapping;
891 filp->f_mode = FMODE_READ;
892 filp->f_flags = O_RDONLY;
893 filp->private_data = dev;
894
895 idr_init(&dev->idr);
896 INIT_LIST_HEAD(&dev->events);
897 INIT_LIST_HEAD(&dev->watches);
898 init_waitqueue_head(&dev->wq);
899 sema_init(&dev->sem, 1);
900 dev->event_count = 0;
901 dev->queue_size = 0;
902 dev->max_events = inotify_max_queued_events;
903 dev->user = user;
904 dev->last_wd = 0;
905 atomic_set(&dev->count, 0);
906
907 get_inotify_dev(dev);
908 atomic_inc(&user->inotify_devs);
909 fd_install(fd, filp);
910
911 return fd;
912out_free_uid:
913 free_uid(user);
914 put_filp(filp);
915out_put_fd:
916 put_unused_fd(fd);
917 return ret;
918}
919
920asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
921{
922 struct inotify_watch *watch, *old;
923 struct inode *inode;
924 struct inotify_device *dev;
925 struct nameidata nd;
926 struct file *filp;
927 int ret, fput_needed;
928
929 filp = fget_light(fd, &fput_needed);
930 if (unlikely(!filp))
931 return -EBADF;
932
933 /* verify that this is indeed an inotify instance */
934 if (unlikely(filp->f_op != &inotify_fops)) {
935 ret = -EINVAL;
936 goto fput_and_out;
937 }
938
939 ret = find_inode(path, &nd);
940 if (unlikely(ret))
941 goto fput_and_out;
942
943 /* inode held in place by reference to nd; dev by fget on fd */
944 inode = nd.dentry->d_inode;
945 dev = filp->private_data;
946
947 down(&inode->inotify_sem);
948 down(&dev->sem);
949
950 /* don't let user-space set invalid bits: we don't want flags set */
951 mask &= IN_ALL_EVENTS;
952 if (unlikely(!mask)) {
953 ret = -EINVAL;
954 goto out;
955 }
956
957 /*
958 * Handle the case of re-adding a watch on an (inode,dev) pair that we
959 * are already watching. We just update the mask and return its wd.
960 */
961 old = inode_find_dev(inode, dev);
962 if (unlikely(old)) {
963 old->mask = mask;
964 ret = old->wd;
965 goto out;
966 }
967
968 watch = create_watch(dev, mask, inode);
969 if (unlikely(IS_ERR(watch))) {
970 ret = PTR_ERR(watch);
971 goto out;
972 }
973
974 /* Add the watch to the device's and the inode's list */
975 list_add(&watch->d_list, &dev->watches);
976 list_add(&watch->i_list, &inode->inotify_watches);
977 ret = watch->wd;
978out:
979 up(&dev->sem);
980 up(&inode->inotify_sem);
981 path_release(&nd);
982fput_and_out:
983 fput_light(filp, fput_needed);
984 return ret;
985}
986
987asmlinkage long sys_inotify_rm_watch(int fd, u32 wd)
988{
989 struct file *filp;
990 struct inotify_device *dev;
991 int ret, fput_needed;
992
993 filp = fget_light(fd, &fput_needed);
994 if (unlikely(!filp))
995 return -EBADF;
996
997 /* verify that this is indeed an inotify instance */
998 if (unlikely(filp->f_op != &inotify_fops)) {
999 ret = -EINVAL;
1000 goto out;
1001 }
1002
1003 dev = filp->private_data;
1004 ret = inotify_ignore(dev, wd);
1005
1006out:
1007 fput_light(filp, fput_needed);
1008 return ret;
1009}
1010
1011static struct super_block *
1012inotify_get_sb(struct file_system_type *fs_type, int flags,
1013 const char *dev_name, void *data)
1014{
1015 return get_sb_pseudo(fs_type, "inotify", NULL, 0xBAD1DEA);
1016}
1017
1018static struct file_system_type inotify_fs_type = {
1019 .name = "inotifyfs",
1020 .get_sb = inotify_get_sb,
1021 .kill_sb = kill_anon_super,
1022};
1023
1024/*
1025 * inotify_setup - Our initialization function. Note that we cannnot return
1026 * error because we have compiled-in VFS hooks. So an (unlikely) failure here
1027 * must result in panic().
1028 */
1029static int __init inotify_setup(void)
1030{
1031 int ret;
1032
1033 ret = register_filesystem(&inotify_fs_type);
1034 if (unlikely(ret))
1035 panic("inotify: register_filesystem returned %d!\n", ret);
1036
1037 inotify_mnt = kern_mount(&inotify_fs_type);
1038 if (IS_ERR(inotify_mnt))
1039 panic("inotify: kern_mount ret %ld!\n", PTR_ERR(inotify_mnt));
1040
1041 inotify_max_queued_events = 16384;
1042 inotify_max_user_instances = 128;
1043 inotify_max_user_watches = 8192;
1044
1045 atomic_set(&inotify_cookie, 0);
1046
1047 watch_cachep = kmem_cache_create("inotify_watch_cache",
1048 sizeof(struct inotify_watch),
1049 0, SLAB_PANIC, NULL, NULL);
1050 event_cachep = kmem_cache_create("inotify_event_cache",
1051 sizeof(struct inotify_kernel_event),
1052 0, SLAB_PANIC, NULL, NULL);
1053
1054 return 0;
1055}
1056
1057module_init(inotify_setup);
diff --git a/fs/ioprio.c b/fs/ioprio.c
new file mode 100644
index 000000000000..d1c1f2b2c9da
--- /dev/null
+++ b/fs/ioprio.c
@@ -0,0 +1,174 @@
1/*
2 * fs/ioprio.c
3 *
4 * Copyright (C) 2004 Jens Axboe <axboe@suse.de>
5 *
6 * Helper functions for setting/querying io priorities of processes. The
7 * system calls closely mimmick getpriority/setpriority, see the man page for
8 * those. The prio argument is a composite of prio class and prio data, where
9 * the data argument has meaning within that class. The standard scheduling
10 * classes have 8 distinct prio levels, with 0 being the highest prio and 7
11 * being the lowest.
12 *
13 * IOW, setting BE scheduling class with prio 2 is done ala:
14 *
15 * unsigned int prio = (IOPRIO_CLASS_BE << IOPRIO_CLASS_SHIFT) | 2;
16 *
17 * ioprio_set(PRIO_PROCESS, pid, prio);
18 *
19 * See also Documentation/block/ioprio.txt
20 *
21 */
22#include <linux/kernel.h>
23#include <linux/ioprio.h>
24#include <linux/blkdev.h>
25
26static int set_task_ioprio(struct task_struct *task, int ioprio)
27{
28 struct io_context *ioc;
29
30 if (task->uid != current->euid &&
31 task->uid != current->uid && !capable(CAP_SYS_NICE))
32 return -EPERM;
33
34 task_lock(task);
35
36 task->ioprio = ioprio;
37
38 ioc = task->io_context;
39 if (ioc && ioc->set_ioprio)
40 ioc->set_ioprio(ioc, ioprio);
41
42 task_unlock(task);
43 return 0;
44}
45
46asmlinkage long sys_ioprio_set(int which, int who, int ioprio)
47{
48 int class = IOPRIO_PRIO_CLASS(ioprio);
49 int data = IOPRIO_PRIO_DATA(ioprio);
50 struct task_struct *p, *g;
51 struct user_struct *user;
52 int ret;
53
54 switch (class) {
55 case IOPRIO_CLASS_RT:
56 if (!capable(CAP_SYS_ADMIN))
57 return -EPERM;
58 /* fall through, rt has prio field too */
59 case IOPRIO_CLASS_BE:
60 if (data >= IOPRIO_BE_NR || data < 0)
61 return -EINVAL;
62
63 break;
64 case IOPRIO_CLASS_IDLE:
65 if (!capable(CAP_SYS_ADMIN))
66 return -EPERM;
67 break;
68 default:
69 return -EINVAL;
70 }
71
72 ret = -ESRCH;
73 read_lock_irq(&tasklist_lock);
74 switch (which) {
75 case IOPRIO_WHO_PROCESS:
76 if (!who)
77 p = current;
78 else
79 p = find_task_by_pid(who);
80 if (p)
81 ret = set_task_ioprio(p, ioprio);
82 break;
83 case IOPRIO_WHO_PGRP:
84 if (!who)
85 who = process_group(current);
86 do_each_task_pid(who, PIDTYPE_PGID, p) {
87 ret = set_task_ioprio(p, ioprio);
88 if (ret)
89 break;
90 } while_each_task_pid(who, PIDTYPE_PGID, p);
91 break;
92 case IOPRIO_WHO_USER:
93 if (!who)
94 user = current->user;
95 else
96 user = find_user(who);
97
98 if (!user)
99 break;
100
101 do_each_thread(g, p) {
102 if (p->uid != who)
103 continue;
104 ret = set_task_ioprio(p, ioprio);
105 if (ret)
106 break;
107 } while_each_thread(g, p);
108
109 if (who)
110 free_uid(user);
111 break;
112 default:
113 ret = -EINVAL;
114 }
115
116 read_unlock_irq(&tasklist_lock);
117 return ret;
118}
119
120asmlinkage long sys_ioprio_get(int which, int who)
121{
122 struct task_struct *g, *p;
123 struct user_struct *user;
124 int ret = -ESRCH;
125
126 read_lock_irq(&tasklist_lock);
127 switch (which) {
128 case IOPRIO_WHO_PROCESS:
129 if (!who)
130 p = current;
131 else
132 p = find_task_by_pid(who);
133 if (p)
134 ret = p->ioprio;
135 break;
136 case IOPRIO_WHO_PGRP:
137 if (!who)
138 who = process_group(current);
139 do_each_task_pid(who, PIDTYPE_PGID, p) {
140 if (ret == -ESRCH)
141 ret = p->ioprio;
142 else
143 ret = ioprio_best(ret, p->ioprio);
144 } while_each_task_pid(who, PIDTYPE_PGID, p);
145 break;
146 case IOPRIO_WHO_USER:
147 if (!who)
148 user = current->user;
149 else
150 user = find_user(who);
151
152 if (!user)
153 break;
154
155 do_each_thread(g, p) {
156 if (p->uid != user->uid)
157 continue;
158 if (ret == -ESRCH)
159 ret = p->ioprio;
160 else
161 ret = ioprio_best(ret, p->ioprio);
162 } while_each_thread(g, p);
163
164 if (who)
165 free_uid(user);
166 break;
167 default:
168 ret = -EINVAL;
169 }
170
171 read_unlock_irq(&tasklist_lock);
172 return ret;
173}
174
diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c
index 34a44e451689..4917315db732 100644
--- a/fs/isofs/compress.c
+++ b/fs/isofs/compress.c
@@ -129,8 +129,14 @@ static int zisofs_readpage(struct file *file, struct page *page)
129 cend = le32_to_cpu(*(__le32 *)(bh->b_data + (blockendptr & bufmask))); 129 cend = le32_to_cpu(*(__le32 *)(bh->b_data + (blockendptr & bufmask)));
130 brelse(bh); 130 brelse(bh);
131 131
132 if (cstart > cend)
133 goto eio;
134
132 csize = cend-cstart; 135 csize = cend-cstart;
133 136
137 if (csize > deflateBound(1UL << zisofs_block_shift))
138 goto eio;
139
134 /* Now page[] contains an array of pages, any of which can be NULL, 140 /* Now page[] contains an array of pages, any of which can be NULL,
135 and the locks on which we hold. We should now read the data and 141 and the locks on which we hold. We should now read the data and
136 release the pages. If the pages are NULL the decompressed data 142 release the pages. If the pages are NULL the decompressed data
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 1e6f2e2ad4a3..5e7b43949517 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -167,7 +167,7 @@ loop:
167 } 167 }
168 168
169 wake_up(&journal->j_wait_done_commit); 169 wake_up(&journal->j_wait_done_commit);
170 if (current->flags & PF_FREEZE) { 170 if (freezing(current)) {
171 /* 171 /*
172 * The simpler the better. Flushing journal isn't a 172 * The simpler the better. Flushing journal isn't a
173 * good idea, because that depends on threads that may 173 * good idea, because that depends on threads that may
@@ -175,7 +175,7 @@ loop:
175 */ 175 */
176 jbd_debug(1, "Now suspending kjournald\n"); 176 jbd_debug(1, "Now suspending kjournald\n");
177 spin_unlock(&journal->j_state_lock); 177 spin_unlock(&journal->j_state_lock);
178 refrigerator(PF_FREEZE); 178 refrigerator();
179 spin_lock(&journal->j_state_lock); 179 spin_lock(&journal->j_state_lock);
180 } else { 180 } else {
181 /* 181 /*
diff --git a/fs/jffs/intrep.c b/fs/jffs/intrep.c
index 8cc6893fc56c..456d7e6e29c2 100644
--- a/fs/jffs/intrep.c
+++ b/fs/jffs/intrep.c
@@ -175,8 +175,64 @@ jffs_hexdump(struct mtd_info *mtd, loff_t pos, int size)
175 } 175 }
176} 176}
177 177
178/* Print the contents of a node. */
179static void
180jffs_print_node(struct jffs_node *n)
181{
182 D(printk("jffs_node: 0x%p\n", n));
183 D(printk("{\n"));
184 D(printk(" 0x%08x, /* version */\n", n->version));
185 D(printk(" 0x%08x, /* data_offset */\n", n->data_offset));
186 D(printk(" 0x%08x, /* data_size */\n", n->data_size));
187 D(printk(" 0x%08x, /* removed_size */\n", n->removed_size));
188 D(printk(" 0x%08x, /* fm_offset */\n", n->fm_offset));
189 D(printk(" 0x%02x, /* name_size */\n", n->name_size));
190 D(printk(" 0x%p, /* fm, fm->offset: %u */\n",
191 n->fm, (n->fm ? n->fm->offset : 0)));
192 D(printk(" 0x%p, /* version_prev */\n", n->version_prev));
193 D(printk(" 0x%p, /* version_next */\n", n->version_next));
194 D(printk(" 0x%p, /* range_prev */\n", n->range_prev));
195 D(printk(" 0x%p, /* range_next */\n", n->range_next));
196 D(printk("}\n"));
197}
198
178#endif 199#endif
179 200
201/* Print the contents of a raw inode. */
202static void
203jffs_print_raw_inode(struct jffs_raw_inode *raw_inode)
204{
205 D(printk("jffs_raw_inode: inode number: %u\n", raw_inode->ino));
206 D(printk("{\n"));
207 D(printk(" 0x%08x, /* magic */\n", raw_inode->magic));
208 D(printk(" 0x%08x, /* ino */\n", raw_inode->ino));
209 D(printk(" 0x%08x, /* pino */\n", raw_inode->pino));
210 D(printk(" 0x%08x, /* version */\n", raw_inode->version));
211 D(printk(" 0x%08x, /* mode */\n", raw_inode->mode));
212 D(printk(" 0x%04x, /* uid */\n", raw_inode->uid));
213 D(printk(" 0x%04x, /* gid */\n", raw_inode->gid));
214 D(printk(" 0x%08x, /* atime */\n", raw_inode->atime));
215 D(printk(" 0x%08x, /* mtime */\n", raw_inode->mtime));
216 D(printk(" 0x%08x, /* ctime */\n", raw_inode->ctime));
217 D(printk(" 0x%08x, /* offset */\n", raw_inode->offset));
218 D(printk(" 0x%08x, /* dsize */\n", raw_inode->dsize));
219 D(printk(" 0x%08x, /* rsize */\n", raw_inode->rsize));
220 D(printk(" 0x%02x, /* nsize */\n", raw_inode->nsize));
221 D(printk(" 0x%02x, /* nlink */\n", raw_inode->nlink));
222 D(printk(" 0x%02x, /* spare */\n",
223 raw_inode->spare));
224 D(printk(" %u, /* rename */\n",
225 raw_inode->rename));
226 D(printk(" %u, /* deleted */\n",
227 raw_inode->deleted));
228 D(printk(" 0x%02x, /* accurate */\n",
229 raw_inode->accurate));
230 D(printk(" 0x%08x, /* dchksum */\n", raw_inode->dchksum));
231 D(printk(" 0x%04x, /* nchksum */\n", raw_inode->nchksum));
232 D(printk(" 0x%04x, /* chksum */\n", raw_inode->chksum));
233 D(printk("}\n"));
234}
235
180#define flash_safe_acquire(arg) 236#define flash_safe_acquire(arg)
181#define flash_safe_release(arg) 237#define flash_safe_release(arg)
182 238
@@ -2507,64 +2563,6 @@ jffs_update_file(struct jffs_file *f, struct jffs_node *node)
2507 return 0; 2563 return 0;
2508} 2564}
2509 2565
2510/* Print the contents of a node. */
2511void
2512jffs_print_node(struct jffs_node *n)
2513{
2514 D(printk("jffs_node: 0x%p\n", n));
2515 D(printk("{\n"));
2516 D(printk(" 0x%08x, /* version */\n", n->version));
2517 D(printk(" 0x%08x, /* data_offset */\n", n->data_offset));
2518 D(printk(" 0x%08x, /* data_size */\n", n->data_size));
2519 D(printk(" 0x%08x, /* removed_size */\n", n->removed_size));
2520 D(printk(" 0x%08x, /* fm_offset */\n", n->fm_offset));
2521 D(printk(" 0x%02x, /* name_size */\n", n->name_size));
2522 D(printk(" 0x%p, /* fm, fm->offset: %u */\n",
2523 n->fm, (n->fm ? n->fm->offset : 0)));
2524 D(printk(" 0x%p, /* version_prev */\n", n->version_prev));
2525 D(printk(" 0x%p, /* version_next */\n", n->version_next));
2526 D(printk(" 0x%p, /* range_prev */\n", n->range_prev));
2527 D(printk(" 0x%p, /* range_next */\n", n->range_next));
2528 D(printk("}\n"));
2529}
2530
2531
2532/* Print the contents of a raw inode. */
2533void
2534jffs_print_raw_inode(struct jffs_raw_inode *raw_inode)
2535{
2536 D(printk("jffs_raw_inode: inode number: %u\n", raw_inode->ino));
2537 D(printk("{\n"));
2538 D(printk(" 0x%08x, /* magic */\n", raw_inode->magic));
2539 D(printk(" 0x%08x, /* ino */\n", raw_inode->ino));
2540 D(printk(" 0x%08x, /* pino */\n", raw_inode->pino));
2541 D(printk(" 0x%08x, /* version */\n", raw_inode->version));
2542 D(printk(" 0x%08x, /* mode */\n", raw_inode->mode));
2543 D(printk(" 0x%04x, /* uid */\n", raw_inode->uid));
2544 D(printk(" 0x%04x, /* gid */\n", raw_inode->gid));
2545 D(printk(" 0x%08x, /* atime */\n", raw_inode->atime));
2546 D(printk(" 0x%08x, /* mtime */\n", raw_inode->mtime));
2547 D(printk(" 0x%08x, /* ctime */\n", raw_inode->ctime));
2548 D(printk(" 0x%08x, /* offset */\n", raw_inode->offset));
2549 D(printk(" 0x%08x, /* dsize */\n", raw_inode->dsize));
2550 D(printk(" 0x%08x, /* rsize */\n", raw_inode->rsize));
2551 D(printk(" 0x%02x, /* nsize */\n", raw_inode->nsize));
2552 D(printk(" 0x%02x, /* nlink */\n", raw_inode->nlink));
2553 D(printk(" 0x%02x, /* spare */\n",
2554 raw_inode->spare));
2555 D(printk(" %u, /* rename */\n",
2556 raw_inode->rename));
2557 D(printk(" %u, /* deleted */\n",
2558 raw_inode->deleted));
2559 D(printk(" 0x%02x, /* accurate */\n",
2560 raw_inode->accurate));
2561 D(printk(" 0x%08x, /* dchksum */\n", raw_inode->dchksum));
2562 D(printk(" 0x%04x, /* nchksum */\n", raw_inode->nchksum));
2563 D(printk(" 0x%04x, /* chksum */\n", raw_inode->chksum));
2564 D(printk("}\n"));
2565}
2566
2567
2568/* Print the contents of a file. */ 2566/* Print the contents of a file. */
2569#if 0 2567#if 0
2570int 2568int
@@ -3399,6 +3397,9 @@ jffs_garbage_collect_thread(void *ptr)
3399 siginfo_t info; 3397 siginfo_t info;
3400 unsigned long signr = 0; 3398 unsigned long signr = 0;
3401 3399
3400 if (try_to_freeze())
3401 continue;
3402
3402 spin_lock_irq(&current->sighand->siglock); 3403 spin_lock_irq(&current->sighand->siglock);
3403 signr = dequeue_signal(current, &current->blocked, &info); 3404 signr = dequeue_signal(current, &current->blocked, &info);
3404 spin_unlock_irq(&current->sighand->siglock); 3405 spin_unlock_irq(&current->sighand->siglock);
diff --git a/fs/jffs/intrep.h b/fs/jffs/intrep.h
index 4ae97b17911c..5c7abe0e2695 100644
--- a/fs/jffs/intrep.h
+++ b/fs/jffs/intrep.h
@@ -49,8 +49,6 @@ int jffs_garbage_collect_thread(void *c);
49void jffs_garbage_collect_trigger(struct jffs_control *c); 49void jffs_garbage_collect_trigger(struct jffs_control *c);
50 50
51/* For debugging purposes. */ 51/* For debugging purposes. */
52void jffs_print_node(struct jffs_node *n);
53void jffs_print_raw_inode(struct jffs_raw_inode *raw_inode);
54#if 0 52#if 0
55int jffs_print_file(struct jffs_file *f); 53int jffs_print_file(struct jffs_file *f);
56#endif /* 0 */ 54#endif /* 0 */
diff --git a/fs/jffs/jffs_fm.c b/fs/jffs/jffs_fm.c
index 0cab8da49d3c..053e3a98a276 100644
--- a/fs/jffs/jffs_fm.c
+++ b/fs/jffs/jffs_fm.c
@@ -31,6 +31,60 @@ static void jffs_free_fm(struct jffs_fm *n);
31extern kmem_cache_t *fm_cache; 31extern kmem_cache_t *fm_cache;
32extern kmem_cache_t *node_cache; 32extern kmem_cache_t *node_cache;
33 33
34#if CONFIG_JFFS_FS_VERBOSE > 0
35void
36jffs_print_fmcontrol(struct jffs_fmcontrol *fmc)
37{
38 D(printk("struct jffs_fmcontrol: 0x%p\n", fmc));
39 D(printk("{\n"));
40 D(printk(" %u, /* flash_size */\n", fmc->flash_size));
41 D(printk(" %u, /* used_size */\n", fmc->used_size));
42 D(printk(" %u, /* dirty_size */\n", fmc->dirty_size));
43 D(printk(" %u, /* free_size */\n", fmc->free_size));
44 D(printk(" %u, /* sector_size */\n", fmc->sector_size));
45 D(printk(" %u, /* min_free_size */\n", fmc->min_free_size));
46 D(printk(" %u, /* max_chunk_size */\n", fmc->max_chunk_size));
47 D(printk(" 0x%p, /* mtd */\n", fmc->mtd));
48 D(printk(" 0x%p, /* head */ "
49 "(head->offset = 0x%08x)\n",
50 fmc->head, (fmc->head ? fmc->head->offset : 0)));
51 D(printk(" 0x%p, /* tail */ "
52 "(tail->offset + tail->size = 0x%08x)\n",
53 fmc->tail,
54 (fmc->tail ? fmc->tail->offset + fmc->tail->size : 0)));
55 D(printk(" 0x%p, /* head_extra */\n", fmc->head_extra));
56 D(printk(" 0x%p, /* tail_extra */\n", fmc->tail_extra));
57 D(printk("}\n"));
58}
59#endif /* CONFIG_JFFS_FS_VERBOSE > 0 */
60
61#if CONFIG_JFFS_FS_VERBOSE > 2
62static void
63jffs_print_fm(struct jffs_fm *fm)
64{
65 D(printk("struct jffs_fm: 0x%p\n", fm));
66 D(printk("{\n"));
67 D(printk(" 0x%08x, /* offset */\n", fm->offset));
68 D(printk(" %u, /* size */\n", fm->size));
69 D(printk(" 0x%p, /* prev */\n", fm->prev));
70 D(printk(" 0x%p, /* next */\n", fm->next));
71 D(printk(" 0x%p, /* nodes */\n", fm->nodes));
72 D(printk("}\n"));
73}
74#endif /* CONFIG_JFFS_FS_VERBOSE > 2 */
75
76#if 0
77void
78jffs_print_node_ref(struct jffs_node_ref *ref)
79{
80 D(printk("struct jffs_node_ref: 0x%p\n", ref));
81 D(printk("{\n"));
82 D(printk(" 0x%p, /* node */\n", ref->node));
83 D(printk(" 0x%p, /* next */\n", ref->next));
84 D(printk("}\n"));
85}
86#endif /* 0 */
87
34/* This function creates a new shiny flash memory control structure. */ 88/* This function creates a new shiny flash memory control structure. */
35struct jffs_fmcontrol * 89struct jffs_fmcontrol *
36jffs_build_begin(struct jffs_control *c, int unit) 90jffs_build_begin(struct jffs_control *c, int unit)
@@ -742,54 +796,3 @@ int jffs_get_node_inuse(void)
742{ 796{
743 return no_jffs_node; 797 return no_jffs_node;
744} 798}
745
746void
747jffs_print_fmcontrol(struct jffs_fmcontrol *fmc)
748{
749 D(printk("struct jffs_fmcontrol: 0x%p\n", fmc));
750 D(printk("{\n"));
751 D(printk(" %u, /* flash_size */\n", fmc->flash_size));
752 D(printk(" %u, /* used_size */\n", fmc->used_size));
753 D(printk(" %u, /* dirty_size */\n", fmc->dirty_size));
754 D(printk(" %u, /* free_size */\n", fmc->free_size));
755 D(printk(" %u, /* sector_size */\n", fmc->sector_size));
756 D(printk(" %u, /* min_free_size */\n", fmc->min_free_size));
757 D(printk(" %u, /* max_chunk_size */\n", fmc->max_chunk_size));
758 D(printk(" 0x%p, /* mtd */\n", fmc->mtd));
759 D(printk(" 0x%p, /* head */ "
760 "(head->offset = 0x%08x)\n",
761 fmc->head, (fmc->head ? fmc->head->offset : 0)));
762 D(printk(" 0x%p, /* tail */ "
763 "(tail->offset + tail->size = 0x%08x)\n",
764 fmc->tail,
765 (fmc->tail ? fmc->tail->offset + fmc->tail->size : 0)));
766 D(printk(" 0x%p, /* head_extra */\n", fmc->head_extra));
767 D(printk(" 0x%p, /* tail_extra */\n", fmc->tail_extra));
768 D(printk("}\n"));
769}
770
771void
772jffs_print_fm(struct jffs_fm *fm)
773{
774 D(printk("struct jffs_fm: 0x%p\n", fm));
775 D(printk("{\n"));
776 D(printk(" 0x%08x, /* offset */\n", fm->offset));
777 D(printk(" %u, /* size */\n", fm->size));
778 D(printk(" 0x%p, /* prev */\n", fm->prev));
779 D(printk(" 0x%p, /* next */\n", fm->next));
780 D(printk(" 0x%p, /* nodes */\n", fm->nodes));
781 D(printk("}\n"));
782}
783
784#if 0
785void
786jffs_print_node_ref(struct jffs_node_ref *ref)
787{
788 D(printk("struct jffs_node_ref: 0x%p\n", ref));
789 D(printk("{\n"));
790 D(printk(" 0x%p, /* node */\n", ref->node));
791 D(printk(" 0x%p, /* next */\n", ref->next));
792 D(printk("}\n"));
793}
794#endif /* 0 */
795
diff --git a/fs/jffs/jffs_fm.h b/fs/jffs/jffs_fm.h
index bc291c431822..f64151e74122 100644
--- a/fs/jffs/jffs_fm.h
+++ b/fs/jffs/jffs_fm.h
@@ -139,8 +139,9 @@ int jffs_add_node(struct jffs_node *node);
139void jffs_fmfree_partly(struct jffs_fmcontrol *fmc, struct jffs_fm *fm, 139void jffs_fmfree_partly(struct jffs_fmcontrol *fmc, struct jffs_fm *fm,
140 __u32 size); 140 __u32 size);
141 141
142#if CONFIG_JFFS_FS_VERBOSE > 0
142void jffs_print_fmcontrol(struct jffs_fmcontrol *fmc); 143void jffs_print_fmcontrol(struct jffs_fmcontrol *fmc);
143void jffs_print_fm(struct jffs_fm *fm); 144#endif
144#if 0 145#if 0
145void jffs_print_node_ref(struct jffs_node_ref *ref); 146void jffs_print_node_ref(struct jffs_node_ref *ref);
146#endif /* 0 */ 147#endif /* 0 */
diff --git a/fs/jffs2/Makefile b/fs/jffs2/Makefile
index e3c38ccf9c7d..f1afe681ecd6 100644
--- a/fs/jffs2/Makefile
+++ b/fs/jffs2/Makefile
@@ -1,7 +1,7 @@
1# 1#
2# Makefile for the Linux Journalling Flash File System v2 (JFFS2) 2# Makefile for the Linux Journalling Flash File System v2 (JFFS2)
3# 3#
4# $Id: Makefile.common,v 1.7 2004/11/03 12:57:38 jwboyer Exp $ 4# $Id: Makefile.common,v 1.9 2005/02/09 09:23:53 pavlov Exp $
5# 5#
6 6
7obj-$(CONFIG_JFFS2_FS) += jffs2.o 7obj-$(CONFIG_JFFS2_FS) += jffs2.o
@@ -11,8 +11,7 @@ jffs2-y += read.o nodemgmt.o readinode.o write.o scan.o gc.o
11jffs2-y += symlink.o build.o erase.o background.o fs.o writev.o 11jffs2-y += symlink.o build.o erase.o background.o fs.o writev.o
12jffs2-y += super.o 12jffs2-y += super.o
13 13
14jffs2-$(CONFIG_JFFS2_FS_NAND) += wbuf.o 14jffs2-$(CONFIG_JFFS2_FS_WRITEBUFFER) += wbuf.o
15jffs2-$(CONFIG_JFFS2_FS_NOR_ECC) += wbuf.o
16jffs2-$(CONFIG_JFFS2_RUBIN) += compr_rubin.o 15jffs2-$(CONFIG_JFFS2_RUBIN) += compr_rubin.o
17jffs2-$(CONFIG_JFFS2_RTIME) += compr_rtime.o 16jffs2-$(CONFIG_JFFS2_RTIME) += compr_rtime.o
18jffs2-$(CONFIG_JFFS2_ZLIB) += compr_zlib.o 17jffs2-$(CONFIG_JFFS2_ZLIB) += compr_zlib.o
diff --git a/fs/jffs2/README.Locking b/fs/jffs2/README.Locking
index 49771cf8513a..b7943439b6ec 100644
--- a/fs/jffs2/README.Locking
+++ b/fs/jffs2/README.Locking
@@ -1,4 +1,4 @@
1 $Id: README.Locking,v 1.9 2004/11/20 10:35:40 dwmw2 Exp $ 1 $Id: README.Locking,v 1.12 2005/04/13 13:22:35 dwmw2 Exp $
2 2
3 JFFS2 LOCKING DOCUMENTATION 3 JFFS2 LOCKING DOCUMENTATION
4 --------------------------- 4 ---------------------------
@@ -108,6 +108,10 @@ in-core jffs2_inode_cache objects (each inode in JFFS2 has the
108correspondent jffs2_inode_cache object). So, the inocache_lock 108correspondent jffs2_inode_cache object). So, the inocache_lock
109has to be locked while walking the c->inocache_list hash buckets. 109has to be locked while walking the c->inocache_list hash buckets.
110 110
111This spinlock also covers allocation of new inode numbers, which is
112currently just '++->highest_ino++', but might one day get more complicated
113if we need to deal with wrapping after 4 milliard inode numbers are used.
114
111Note, the f->sem guarantees that the correspondent jffs2_inode_cache 115Note, the f->sem guarantees that the correspondent jffs2_inode_cache
112will not be removed. So, it is allowed to access it without locking 116will not be removed. So, it is allowed to access it without locking
113the inocache_lock spinlock. 117the inocache_lock spinlock.
diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c
index 1be6de27dd81..0f224384f176 100644
--- a/fs/jffs2/background.c
+++ b/fs/jffs2/background.c
@@ -7,7 +7,7 @@
7 * 7 *
8 * For licensing information, see the file 'LICENCE' in this directory. 8 * For licensing information, see the file 'LICENCE' in this directory.
9 * 9 *
10 * $Id: background.c,v 1.50 2004/11/16 20:36:10 dwmw2 Exp $ 10 * $Id: background.c,v 1.54 2005/05/20 21:37:12 gleixner Exp $
11 * 11 *
12 */ 12 */
13 13
@@ -37,7 +37,7 @@ int jffs2_start_garbage_collect_thread(struct jffs2_sb_info *c)
37 if (c->gc_task) 37 if (c->gc_task)
38 BUG(); 38 BUG();
39 39
40 init_MUTEX_LOCKED(&c->gc_thread_start); 40 init_completion(&c->gc_thread_start);
41 init_completion(&c->gc_thread_exit); 41 init_completion(&c->gc_thread_exit);
42 42
43 pid = kernel_thread(jffs2_garbage_collect_thread, c, CLONE_FS|CLONE_FILES); 43 pid = kernel_thread(jffs2_garbage_collect_thread, c, CLONE_FS|CLONE_FILES);
@@ -48,7 +48,7 @@ int jffs2_start_garbage_collect_thread(struct jffs2_sb_info *c)
48 } else { 48 } else {
49 /* Wait for it... */ 49 /* Wait for it... */
50 D1(printk(KERN_DEBUG "JFFS2: Garbage collect thread is pid %d\n", pid)); 50 D1(printk(KERN_DEBUG "JFFS2: Garbage collect thread is pid %d\n", pid));
51 down(&c->gc_thread_start); 51 wait_for_completion(&c->gc_thread_start);
52 } 52 }
53 53
54 return ret; 54 return ret;
@@ -56,13 +56,16 @@ int jffs2_start_garbage_collect_thread(struct jffs2_sb_info *c)
56 56
57void jffs2_stop_garbage_collect_thread(struct jffs2_sb_info *c) 57void jffs2_stop_garbage_collect_thread(struct jffs2_sb_info *c)
58{ 58{
59 int wait = 0;
59 spin_lock(&c->erase_completion_lock); 60 spin_lock(&c->erase_completion_lock);
60 if (c->gc_task) { 61 if (c->gc_task) {
61 D1(printk(KERN_DEBUG "jffs2: Killing GC task %d\n", c->gc_task->pid)); 62 D1(printk(KERN_DEBUG "jffs2: Killing GC task %d\n", c->gc_task->pid));
62 send_sig(SIGKILL, c->gc_task, 1); 63 send_sig(SIGKILL, c->gc_task, 1);
64 wait = 1;
63 } 65 }
64 spin_unlock(&c->erase_completion_lock); 66 spin_unlock(&c->erase_completion_lock);
65 wait_for_completion(&c->gc_thread_exit); 67 if (wait)
68 wait_for_completion(&c->gc_thread_exit);
66} 69}
67 70
68static int jffs2_garbage_collect_thread(void *_c) 71static int jffs2_garbage_collect_thread(void *_c)
@@ -75,7 +78,7 @@ static int jffs2_garbage_collect_thread(void *_c)
75 allow_signal(SIGCONT); 78 allow_signal(SIGCONT);
76 79
77 c->gc_task = current; 80 c->gc_task = current;
78 up(&c->gc_thread_start); 81 complete(&c->gc_thread_start);
79 82
80 set_user_nice(current, 10); 83 set_user_nice(current, 10);
81 84
@@ -92,7 +95,7 @@ static int jffs2_garbage_collect_thread(void *_c)
92 schedule(); 95 schedule();
93 } 96 }
94 97
95 if (try_to_freeze(0)) 98 if (try_to_freeze())
96 continue; 99 continue;
97 100
98 cond_resched(); 101 cond_resched();
diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c
index a01dd5fdbb95..97dc39796e2c 100644
--- a/fs/jffs2/build.c
+++ b/fs/jffs2/build.c
@@ -7,7 +7,7 @@
7 * 7 *
8 * For licensing information, see the file 'LICENCE' in this directory. 8 * For licensing information, see the file 'LICENCE' in this directory.
9 * 9 *
10 * $Id: build.c,v 1.69 2004/12/16 20:22:18 dmarlin Exp $ 10 * $Id: build.c,v 1.71 2005/07/12 16:37:08 dedekind Exp $
11 * 11 *
12 */ 12 */
13 13
@@ -97,14 +97,16 @@ static int jffs2_build_filesystem(struct jffs2_sb_info *c)
97 /* First, scan the medium and build all the inode caches with 97 /* First, scan the medium and build all the inode caches with
98 lists of physical nodes */ 98 lists of physical nodes */
99 99
100 c->flags |= JFFS2_SB_FLAG_MOUNTING; 100 c->flags |= JFFS2_SB_FLAG_SCANNING;
101 ret = jffs2_scan_medium(c); 101 ret = jffs2_scan_medium(c);
102 c->flags &= ~JFFS2_SB_FLAG_SCANNING;
102 if (ret) 103 if (ret)
103 goto exit; 104 goto exit;
104 105
105 D1(printk(KERN_DEBUG "Scanned flash completely\n")); 106 D1(printk(KERN_DEBUG "Scanned flash completely\n"));
106 D2(jffs2_dump_block_lists(c)); 107 D2(jffs2_dump_block_lists(c));
107 108
109 c->flags |= JFFS2_SB_FLAG_BUILDING;
108 /* Now scan the directory tree, increasing nlink according to every dirent found. */ 110 /* Now scan the directory tree, increasing nlink according to every dirent found. */
109 for_each_inode(i, c, ic) { 111 for_each_inode(i, c, ic) {
110 D1(printk(KERN_DEBUG "Pass 1: ino #%u\n", ic->ino)); 112 D1(printk(KERN_DEBUG "Pass 1: ino #%u\n", ic->ino));
@@ -116,7 +118,6 @@ static int jffs2_build_filesystem(struct jffs2_sb_info *c)
116 cond_resched(); 118 cond_resched();
117 } 119 }
118 } 120 }
119 c->flags &= ~JFFS2_SB_FLAG_MOUNTING;
120 121
121 D1(printk(KERN_DEBUG "Pass 1 complete\n")); 122 D1(printk(KERN_DEBUG "Pass 1 complete\n"));
122 123
@@ -164,6 +165,8 @@ static int jffs2_build_filesystem(struct jffs2_sb_info *c)
164 ic->scan_dents = NULL; 165 ic->scan_dents = NULL;
165 cond_resched(); 166 cond_resched();
166 } 167 }
168 c->flags &= ~JFFS2_SB_FLAG_BUILDING;
169
167 D1(printk(KERN_DEBUG "Pass 3 complete\n")); 170 D1(printk(KERN_DEBUG "Pass 3 complete\n"));
168 D2(jffs2_dump_block_lists(c)); 171 D2(jffs2_dump_block_lists(c));
169 172
@@ -333,13 +336,6 @@ int jffs2_do_mount_fs(struct jffs2_sb_info *c)
333 c->blocks[i].bad_count = 0; 336 c->blocks[i].bad_count = 0;
334 } 337 }
335 338
336 init_MUTEX(&c->alloc_sem);
337 init_MUTEX(&c->erase_free_sem);
338 init_waitqueue_head(&c->erase_wait);
339 init_waitqueue_head(&c->inocache_wq);
340 spin_lock_init(&c->erase_completion_lock);
341 spin_lock_init(&c->inocache_lock);
342
343 INIT_LIST_HEAD(&c->clean_list); 339 INIT_LIST_HEAD(&c->clean_list);
344 INIT_LIST_HEAD(&c->very_dirty_list); 340 INIT_LIST_HEAD(&c->very_dirty_list);
345 INIT_LIST_HEAD(&c->dirty_list); 341 INIT_LIST_HEAD(&c->dirty_list);
diff --git a/fs/jffs2/compr_zlib.c b/fs/jffs2/compr_zlib.c
index 078a30e406b5..83f7e0788fd0 100644
--- a/fs/jffs2/compr_zlib.c
+++ b/fs/jffs2/compr_zlib.c
@@ -7,7 +7,7 @@
7 * 7 *
8 * For licensing information, see the file 'LICENCE' in this directory. 8 * For licensing information, see the file 'LICENCE' in this directory.
9 * 9 *
10 * $Id: compr_zlib.c,v 1.29 2004/11/16 20:36:11 dwmw2 Exp $ 10 * $Id: compr_zlib.c,v 1.31 2005/05/20 19:30:06 gleixner Exp $
11 * 11 *
12 */ 12 */
13 13
@@ -17,10 +17,10 @@
17 17
18#include <linux/config.h> 18#include <linux/config.h>
19#include <linux/kernel.h> 19#include <linux/kernel.h>
20#include <linux/sched.h>
20#include <linux/slab.h> 21#include <linux/slab.h>
21#include <linux/zlib.h> 22#include <linux/zlib.h>
22#include <linux/zutil.h> 23#include <linux/zutil.h>
23#include <asm/semaphore.h>
24#include "nodelist.h" 24#include "nodelist.h"
25#include "compr.h" 25#include "compr.h"
26 26
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 757306fa3ff4..3ca0d25eef1d 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -7,7 +7,7 @@
7 * 7 *
8 * For licensing information, see the file 'LICENCE' in this directory. 8 * For licensing information, see the file 'LICENCE' in this directory.
9 * 9 *
10 * $Id: dir.c,v 1.84 2004/11/16 20:36:11 dwmw2 Exp $ 10 * $Id: dir.c,v 1.86 2005/07/06 12:13:09 dwmw2 Exp $
11 * 11 *
12 */ 12 */
13 13
@@ -22,16 +22,6 @@
22#include <linux/time.h> 22#include <linux/time.h>
23#include "nodelist.h" 23#include "nodelist.h"
24 24
25/* Urgh. Please tell me there's a nicer way of doing these. */
26#include <linux/version.h>
27#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,48)
28typedef int mknod_arg_t;
29#define NAMEI_COMPAT(x) ((void *)x)
30#else
31typedef dev_t mknod_arg_t;
32#define NAMEI_COMPAT(x) (x)
33#endif
34
35static int jffs2_readdir (struct file *, void *, filldir_t); 25static int jffs2_readdir (struct file *, void *, filldir_t);
36 26
37static int jffs2_create (struct inode *,struct dentry *,int, 27static int jffs2_create (struct inode *,struct dentry *,int,
@@ -43,7 +33,7 @@ static int jffs2_unlink (struct inode *,struct dentry *);
43static int jffs2_symlink (struct inode *,struct dentry *,const char *); 33static int jffs2_symlink (struct inode *,struct dentry *,const char *);
44static int jffs2_mkdir (struct inode *,struct dentry *,int); 34static int jffs2_mkdir (struct inode *,struct dentry *,int);
45static int jffs2_rmdir (struct inode *,struct dentry *); 35static int jffs2_rmdir (struct inode *,struct dentry *);
46static int jffs2_mknod (struct inode *,struct dentry *,int,mknod_arg_t); 36static int jffs2_mknod (struct inode *,struct dentry *,int,dev_t);
47static int jffs2_rename (struct inode *, struct dentry *, 37static int jffs2_rename (struct inode *, struct dentry *,
48 struct inode *, struct dentry *); 38 struct inode *, struct dentry *);
49 39
@@ -58,8 +48,8 @@ struct file_operations jffs2_dir_operations =
58 48
59struct inode_operations jffs2_dir_inode_operations = 49struct inode_operations jffs2_dir_inode_operations =
60{ 50{
61 .create = NAMEI_COMPAT(jffs2_create), 51 .create = jffs2_create,
62 .lookup = NAMEI_COMPAT(jffs2_lookup), 52 .lookup = jffs2_lookup,
63 .link = jffs2_link, 53 .link = jffs2_link,
64 .unlink = jffs2_unlink, 54 .unlink = jffs2_unlink,
65 .symlink = jffs2_symlink, 55 .symlink = jffs2_symlink,
@@ -296,11 +286,11 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
296 struct jffs2_full_dirent *fd; 286 struct jffs2_full_dirent *fd;
297 int namelen; 287 int namelen;
298 uint32_t alloclen, phys_ofs; 288 uint32_t alloclen, phys_ofs;
299 int ret; 289 int ret, targetlen = strlen(target);
300 290
301 /* FIXME: If you care. We'd need to use frags for the target 291 /* FIXME: If you care. We'd need to use frags for the target
302 if it grows much more than this */ 292 if it grows much more than this */
303 if (strlen(target) > 254) 293 if (targetlen > 254)
304 return -EINVAL; 294 return -EINVAL;
305 295
306 ri = jffs2_alloc_raw_inode(); 296 ri = jffs2_alloc_raw_inode();
@@ -314,7 +304,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
314 * Just the node will do for now, though 304 * Just the node will do for now, though
315 */ 305 */
316 namelen = dentry->d_name.len; 306 namelen = dentry->d_name.len;
317 ret = jffs2_reserve_space(c, sizeof(*ri) + strlen(target), &phys_ofs, &alloclen, ALLOC_NORMAL); 307 ret = jffs2_reserve_space(c, sizeof(*ri) + targetlen, &phys_ofs, &alloclen, ALLOC_NORMAL);
318 308
319 if (ret) { 309 if (ret) {
320 jffs2_free_raw_inode(ri); 310 jffs2_free_raw_inode(ri);
@@ -333,16 +323,16 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
333 323
334 f = JFFS2_INODE_INFO(inode); 324 f = JFFS2_INODE_INFO(inode);
335 325
336 inode->i_size = strlen(target); 326 inode->i_size = targetlen;
337 ri->isize = ri->dsize = ri->csize = cpu_to_je32(inode->i_size); 327 ri->isize = ri->dsize = ri->csize = cpu_to_je32(inode->i_size);
338 ri->totlen = cpu_to_je32(sizeof(*ri) + inode->i_size); 328 ri->totlen = cpu_to_je32(sizeof(*ri) + inode->i_size);
339 ri->hdr_crc = cpu_to_je32(crc32(0, ri, sizeof(struct jffs2_unknown_node)-4)); 329 ri->hdr_crc = cpu_to_je32(crc32(0, ri, sizeof(struct jffs2_unknown_node)-4));
340 330
341 ri->compr = JFFS2_COMPR_NONE; 331 ri->compr = JFFS2_COMPR_NONE;
342 ri->data_crc = cpu_to_je32(crc32(0, target, strlen(target))); 332 ri->data_crc = cpu_to_je32(crc32(0, target, targetlen));
343 ri->node_crc = cpu_to_je32(crc32(0, ri, sizeof(*ri)-8)); 333 ri->node_crc = cpu_to_je32(crc32(0, ri, sizeof(*ri)-8));
344 334
345 fn = jffs2_write_dnode(c, f, ri, target, strlen(target), phys_ofs, ALLOC_NORMAL); 335 fn = jffs2_write_dnode(c, f, ri, target, targetlen, phys_ofs, ALLOC_NORMAL);
346 336
347 jffs2_free_raw_inode(ri); 337 jffs2_free_raw_inode(ri);
348 338
@@ -353,6 +343,20 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
353 jffs2_clear_inode(inode); 343 jffs2_clear_inode(inode);
354 return PTR_ERR(fn); 344 return PTR_ERR(fn);
355 } 345 }
346
347 /* We use f->dents field to store the target path. */
348 f->dents = kmalloc(targetlen + 1, GFP_KERNEL);
349 if (!f->dents) {
350 printk(KERN_WARNING "Can't allocate %d bytes of memory\n", targetlen + 1);
351 up(&f->sem);
352 jffs2_complete_reservation(c);
353 jffs2_clear_inode(inode);
354 return -ENOMEM;
355 }
356
357 memcpy(f->dents, target, targetlen + 1);
358 D1(printk(KERN_DEBUG "jffs2_symlink: symlink's target '%s' cached\n", (char *)f->dents));
359
356 /* No data here. Only a metadata node, which will be 360 /* No data here. Only a metadata node, which will be
357 obsoleted by the first data write 361 obsoleted by the first data write
358 */ 362 */
@@ -564,7 +568,7 @@ static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry)
564 return ret; 568 return ret;
565} 569}
566 570
567static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, mknod_arg_t rdev) 571static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, dev_t rdev)
568{ 572{
569 struct jffs2_inode_info *f, *dir_f; 573 struct jffs2_inode_info *f, *dir_f;
570 struct jffs2_sb_info *c; 574 struct jffs2_sb_info *c;
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index 41451e8bf361..787d84ac2bcd 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -7,7 +7,7 @@
7 * 7 *
8 * For licensing information, see the file 'LICENCE' in this directory. 8 * For licensing information, see the file 'LICENCE' in this directory.
9 * 9 *
10 * $Id: erase.c,v 1.66 2004/11/16 20:36:11 dwmw2 Exp $ 10 * $Id: erase.c,v 1.80 2005/07/14 19:46:24 joern Exp $
11 * 11 *
12 */ 12 */
13 13
@@ -48,6 +48,7 @@ static void jffs2_erase_block(struct jffs2_sb_info *c,
48#else /* Linux */ 48#else /* Linux */
49 struct erase_info *instr; 49 struct erase_info *instr;
50 50
51 D1(printk(KERN_DEBUG "jffs2_erase_block(): erase block %#x (range %#x-%#x)\n", jeb->offset, jeb->offset, jeb->offset + c->sector_size));
51 instr = kmalloc(sizeof(struct erase_info) + sizeof(struct erase_priv_struct), GFP_KERNEL); 52 instr = kmalloc(sizeof(struct erase_info) + sizeof(struct erase_priv_struct), GFP_KERNEL);
52 if (!instr) { 53 if (!instr) {
53 printk(KERN_WARNING "kmalloc for struct erase_info in jffs2_erase_block failed. Refiling block for later\n"); 54 printk(KERN_WARNING "kmalloc for struct erase_info in jffs2_erase_block failed. Refiling block for later\n");
@@ -233,7 +234,7 @@ static inline void jffs2_remove_node_refs_from_ino_list(struct jffs2_sb_info *c,
233 continue; 234 continue;
234 } 235 }
235 236
236 if (((*prev)->flash_offset & ~(c->sector_size -1)) == jeb->offset) { 237 if (SECTOR_ADDR((*prev)->flash_offset) == jeb->offset) {
237 /* It's in the block we're erasing */ 238 /* It's in the block we're erasing */
238 struct jffs2_raw_node_ref *this; 239 struct jffs2_raw_node_ref *this;
239 240
@@ -277,11 +278,8 @@ static inline void jffs2_remove_node_refs_from_ino_list(struct jffs2_sb_info *c,
277 printk("\n"); 278 printk("\n");
278 }); 279 });
279 280
280 if (ic->nodes == (void *)ic) { 281 if (ic->nodes == (void *)ic && ic->nlink == 0)
281 D1(printk(KERN_DEBUG "inocache for ino #%u is all gone now. Freeing\n", ic->ino));
282 jffs2_del_ino_cache(c, ic); 282 jffs2_del_ino_cache(c, ic);
283 jffs2_free_inode_cache(ic);
284 }
285} 283}
286 284
287static void jffs2_free_all_node_refs(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb) 285static void jffs2_free_all_node_refs(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb)
@@ -302,92 +300,86 @@ static void jffs2_free_all_node_refs(struct jffs2_sb_info *c, struct jffs2_erase
302 jeb->last_node = NULL; 300 jeb->last_node = NULL;
303} 301}
304 302
305static void jffs2_mark_erased_block(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb) 303static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, uint32_t *bad_offset)
306{ 304{
307 struct jffs2_raw_node_ref *marker_ref = NULL; 305 void *ebuf;
308 unsigned char *ebuf; 306 uint32_t ofs;
309 size_t retlen; 307 size_t retlen;
310 int ret; 308 int ret = -EIO;
311 uint32_t bad_offset; 309
312
313 if (!jffs2_cleanmarker_oob(c)) {
314 marker_ref = jffs2_alloc_raw_node_ref();
315 if (!marker_ref) {
316 printk(KERN_WARNING "Failed to allocate raw node ref for clean marker\n");
317 /* Stick it back on the list from whence it came and come back later */
318 jffs2_erase_pending_trigger(c);
319 spin_lock(&c->erase_completion_lock);
320 list_add(&jeb->list, &c->erase_complete_list);
321 spin_unlock(&c->erase_completion_lock);
322 return;
323 }
324 }
325 ebuf = kmalloc(PAGE_SIZE, GFP_KERNEL); 310 ebuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
326 if (!ebuf) { 311 if (!ebuf) {
327 printk(KERN_WARNING "Failed to allocate page buffer for verifying erase at 0x%08x. Assuming it worked\n", jeb->offset); 312 printk(KERN_WARNING "Failed to allocate page buffer for verifying erase at 0x%08x. Refiling\n", jeb->offset);
328 } else { 313 return -EAGAIN;
329 uint32_t ofs = jeb->offset; 314 }
330 315
331 D1(printk(KERN_DEBUG "Verifying erase at 0x%08x\n", jeb->offset)); 316 D1(printk(KERN_DEBUG "Verifying erase at 0x%08x\n", jeb->offset));
332 while(ofs < jeb->offset + c->sector_size) {
333 uint32_t readlen = min((uint32_t)PAGE_SIZE, jeb->offset + c->sector_size - ofs);
334 int i;
335 317
336 bad_offset = ofs; 318 for (ofs = jeb->offset; ofs < jeb->offset + c->sector_size; ) {
319 uint32_t readlen = min((uint32_t)PAGE_SIZE, jeb->offset + c->sector_size - ofs);
320 int i;
337 321
338 ret = jffs2_flash_read(c, ofs, readlen, &retlen, ebuf); 322 *bad_offset = ofs;
339 if (ret) { 323
340 printk(KERN_WARNING "Read of newly-erased block at 0x%08x failed: %d. Putting on bad_list\n", ofs, ret); 324 ret = jffs2_flash_read(c, ofs, readlen, &retlen, ebuf);
341 goto bad; 325 if (ret) {
342 } 326 printk(KERN_WARNING "Read of newly-erased block at 0x%08x failed: %d. Putting on bad_list\n", ofs, ret);
343 if (retlen != readlen) { 327 goto fail;
344 printk(KERN_WARNING "Short read from newly-erased block at 0x%08x. Wanted %d, got %zd\n", ofs, readlen, retlen); 328 }
345 goto bad; 329 if (retlen != readlen) {
346 } 330 printk(KERN_WARNING "Short read from newly-erased block at 0x%08x. Wanted %d, got %zd\n", ofs, readlen, retlen);
347 for (i=0; i<readlen; i += sizeof(unsigned long)) { 331 goto fail;
348 /* It's OK. We know it's properly aligned */ 332 }
349 unsigned long datum = *(unsigned long *)(&ebuf[i]); 333 for (i=0; i<readlen; i += sizeof(unsigned long)) {
350 if (datum + 1) { 334 /* It's OK. We know it's properly aligned */
351 bad_offset += i; 335 unsigned long *datum = ebuf + i;
352 printk(KERN_WARNING "Newly-erased block contained word 0x%lx at offset 0x%08x\n", datum, bad_offset); 336 if (*datum + 1) {
353 bad: 337 *bad_offset += i;
354 if (!jffs2_cleanmarker_oob(c)) 338 printk(KERN_WARNING "Newly-erased block contained word 0x%lx at offset 0x%08x\n", *datum, *bad_offset);
355 jffs2_free_raw_node_ref(marker_ref); 339 goto fail;
356 kfree(ebuf);
357 bad2:
358 spin_lock(&c->erase_completion_lock);
359 /* Stick it on a list (any list) so
360 erase_failed can take it right off
361 again. Silly, but shouldn't happen
362 often. */
363 list_add(&jeb->list, &c->erasing_list);
364 spin_unlock(&c->erase_completion_lock);
365 jffs2_erase_failed(c, jeb, bad_offset);
366 return;
367 }
368 } 340 }
369 ofs += readlen;
370 cond_resched();
371 } 341 }
372 kfree(ebuf); 342 ofs += readlen;
343 cond_resched();
373 } 344 }
345 ret = 0;
346fail:
347 kfree(ebuf);
348 return ret;
349}
374 350
375 bad_offset = jeb->offset; 351static void jffs2_mark_erased_block(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb)
352{
353 struct jffs2_raw_node_ref *marker_ref = NULL;
354 size_t retlen;
355 int ret;
356 uint32_t bad_offset;
357
358 switch (jffs2_block_check_erase(c, jeb, &bad_offset)) {
359 case -EAGAIN: goto refile;
360 case -EIO: goto filebad;
361 }
376 362
377 /* Write the erase complete marker */ 363 /* Write the erase complete marker */
378 D1(printk(KERN_DEBUG "Writing erased marker to block at 0x%08x\n", jeb->offset)); 364 D1(printk(KERN_DEBUG "Writing erased marker to block at 0x%08x\n", jeb->offset));
379 if (jffs2_cleanmarker_oob(c)) { 365 bad_offset = jeb->offset;
380 366
381 if (jffs2_write_nand_cleanmarker(c, jeb)) 367 /* Cleanmarker in oob area or no cleanmarker at all ? */
382 goto bad2; 368 if (jffs2_cleanmarker_oob(c) || c->cleanmarker_size == 0) {
383 369
384 jeb->first_node = jeb->last_node = NULL; 370 if (jffs2_cleanmarker_oob(c)) {
371 if (jffs2_write_nand_cleanmarker(c, jeb))
372 goto filebad;
373 }
385 374
375 jeb->first_node = jeb->last_node = NULL;
386 jeb->free_size = c->sector_size; 376 jeb->free_size = c->sector_size;
387 jeb->used_size = 0; 377 jeb->used_size = 0;
388 jeb->dirty_size = 0; 378 jeb->dirty_size = 0;
389 jeb->wasted_size = 0; 379 jeb->wasted_size = 0;
380
390 } else { 381 } else {
382
391 struct kvec vecs[1]; 383 struct kvec vecs[1];
392 struct jffs2_unknown_node marker = { 384 struct jffs2_unknown_node marker = {
393 .magic = cpu_to_je16(JFFS2_MAGIC_BITMASK), 385 .magic = cpu_to_je16(JFFS2_MAGIC_BITMASK),
@@ -395,21 +387,28 @@ static void jffs2_mark_erased_block(struct jffs2_sb_info *c, struct jffs2_eraseb
395 .totlen = cpu_to_je32(c->cleanmarker_size) 387 .totlen = cpu_to_je32(c->cleanmarker_size)
396 }; 388 };
397 389
390 marker_ref = jffs2_alloc_raw_node_ref();
391 if (!marker_ref) {
392 printk(KERN_WARNING "Failed to allocate raw node ref for clean marker. Refiling\n");
393 goto refile;
394 }
395
398 marker.hdr_crc = cpu_to_je32(crc32(0, &marker, sizeof(struct jffs2_unknown_node)-4)); 396 marker.hdr_crc = cpu_to_je32(crc32(0, &marker, sizeof(struct jffs2_unknown_node)-4));
399 397
400 vecs[0].iov_base = (unsigned char *) &marker; 398 vecs[0].iov_base = (unsigned char *) &marker;
401 vecs[0].iov_len = sizeof(marker); 399 vecs[0].iov_len = sizeof(marker);
402 ret = jffs2_flash_direct_writev(c, vecs, 1, jeb->offset, &retlen); 400 ret = jffs2_flash_direct_writev(c, vecs, 1, jeb->offset, &retlen);
403 401
404 if (ret) { 402 if (ret || retlen != sizeof(marker)) {
405 printk(KERN_WARNING "Write clean marker to block at 0x%08x failed: %d\n", 403 if (ret)
406 jeb->offset, ret); 404 printk(KERN_WARNING "Write clean marker to block at 0x%08x failed: %d\n",
407 goto bad2; 405 jeb->offset, ret);
408 } 406 else
409 if (retlen != sizeof(marker)) { 407 printk(KERN_WARNING "Short write to newly-erased block at 0x%08x: Wanted %zd, got %zd\n",
410 printk(KERN_WARNING "Short write to newly-erased block at 0x%08x: Wanted %zd, got %zd\n", 408 jeb->offset, sizeof(marker), retlen);
411 jeb->offset, sizeof(marker), retlen); 409
412 goto bad2; 410 jffs2_free_raw_node_ref(marker_ref);
411 goto filebad;
413 } 412 }
414 413
415 marker_ref->next_in_ino = NULL; 414 marker_ref->next_in_ino = NULL;
@@ -438,5 +437,22 @@ static void jffs2_mark_erased_block(struct jffs2_sb_info *c, struct jffs2_eraseb
438 c->nr_free_blocks++; 437 c->nr_free_blocks++;
439 spin_unlock(&c->erase_completion_lock); 438 spin_unlock(&c->erase_completion_lock);
440 wake_up(&c->erase_wait); 439 wake_up(&c->erase_wait);
441} 440 return;
441
442filebad:
443 spin_lock(&c->erase_completion_lock);
444 /* Stick it on a list (any list) so erase_failed can take it
445 right off again. Silly, but shouldn't happen often. */
446 list_add(&jeb->list, &c->erasing_list);
447 spin_unlock(&c->erase_completion_lock);
448 jffs2_erase_failed(c, jeb, bad_offset);
449 return;
442 450
451refile:
452 /* Stick it back on the list from whence it came and come back later */
453 jffs2_erase_pending_trigger(c);
454 spin_lock(&c->erase_completion_lock);
455 list_add(&jeb->list, &c->erase_complete_list);
456 spin_unlock(&c->erase_completion_lock);
457 return;
458}
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 771a554701d6..bd9ed9b0247b 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -7,11 +7,10 @@
7 * 7 *
8 * For licensing information, see the file 'LICENCE' in this directory. 8 * For licensing information, see the file 'LICENCE' in this directory.
9 * 9 *
10 * $Id: file.c,v 1.99 2004/11/16 20:36:11 dwmw2 Exp $ 10 * $Id: file.c,v 1.102 2005/07/06 12:13:09 dwmw2 Exp $
11 * 11 *
12 */ 12 */
13 13
14#include <linux/version.h>
15#include <linux/kernel.h> 14#include <linux/kernel.h>
16#include <linux/slab.h> 15#include <linux/slab.h>
17#include <linux/fs.h> 16#include <linux/fs.h>
@@ -51,9 +50,7 @@ struct file_operations jffs2_file_operations =
51 .ioctl = jffs2_ioctl, 50 .ioctl = jffs2_ioctl,
52 .mmap = generic_file_readonly_mmap, 51 .mmap = generic_file_readonly_mmap,
53 .fsync = jffs2_fsync, 52 .fsync = jffs2_fsync,
54#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,29)
55 .sendfile = generic_file_sendfile 53 .sendfile = generic_file_sendfile
56#endif
57}; 54};
58 55
59/* jffs2_file_inode_operations */ 56/* jffs2_file_inode_operations */
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 30ab233fe423..5687c3f42002 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -7,11 +7,10 @@
7 * 7 *
8 * For licensing information, see the file 'LICENCE' in this directory. 8 * For licensing information, see the file 'LICENCE' in this directory.
9 * 9 *
10 * $Id: fs.c,v 1.51 2004/11/28 12:19:37 dedekind Exp $ 10 * $Id: fs.c,v 1.56 2005/07/06 12:13:09 dwmw2 Exp $
11 * 11 *
12 */ 12 */
13 13
14#include <linux/version.h>
15#include <linux/config.h> 14#include <linux/config.h>
16#include <linux/kernel.h> 15#include <linux/kernel.h>
17#include <linux/sched.h> 16#include <linux/sched.h>
@@ -450,11 +449,15 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent)
450 449
451 c = JFFS2_SB_INFO(sb); 450 c = JFFS2_SB_INFO(sb);
452 451
453#ifndef CONFIG_JFFS2_FS_NAND 452#ifndef CONFIG_JFFS2_FS_WRITEBUFFER
454 if (c->mtd->type == MTD_NANDFLASH) { 453 if (c->mtd->type == MTD_NANDFLASH) {
455 printk(KERN_ERR "jffs2: Cannot operate on NAND flash unless jffs2 NAND support is compiled in.\n"); 454 printk(KERN_ERR "jffs2: Cannot operate on NAND flash unless jffs2 NAND support is compiled in.\n");
456 return -EINVAL; 455 return -EINVAL;
457 } 456 }
457 if (c->mtd->type == MTD_DATAFLASH) {
458 printk(KERN_ERR "jffs2: Cannot operate on DataFlash unless jffs2 DataFlash support is compiled in.\n");
459 return -EINVAL;
460 }
458#endif 461#endif
459 462
460 c->flash_size = c->mtd->size; 463 c->flash_size = c->mtd->size;
@@ -522,9 +525,7 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent)
522 if (!sb->s_root) 525 if (!sb->s_root)
523 goto out_root_i; 526 goto out_root_i;
524 527
525#if LINUX_VERSION_CODE >= 0x20403
526 sb->s_maxbytes = 0xFFFFFFFF; 528 sb->s_maxbytes = 0xFFFFFFFF;
527#endif
528 sb->s_blocksize = PAGE_CACHE_SIZE; 529 sb->s_blocksize = PAGE_CACHE_SIZE;
529 sb->s_blocksize_bits = PAGE_CACHE_SHIFT; 530 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
530 sb->s_magic = JFFS2_SUPER_MAGIC; 531 sb->s_magic = JFFS2_SUPER_MAGIC;
@@ -661,6 +662,14 @@ static int jffs2_flash_setup(struct jffs2_sb_info *c) {
661 if (ret) 662 if (ret)
662 return ret; 663 return ret;
663 } 664 }
665
666 /* and Dataflash */
667 if (jffs2_dataflash(c)) {
668 ret = jffs2_dataflash_setup(c);
669 if (ret)
670 return ret;
671 }
672
664 return ret; 673 return ret;
665} 674}
666 675
@@ -674,4 +683,9 @@ void jffs2_flash_cleanup(struct jffs2_sb_info *c) {
674 if (jffs2_nor_ecc(c)) { 683 if (jffs2_nor_ecc(c)) {
675 jffs2_nor_ecc_flash_cleanup(c); 684 jffs2_nor_ecc_flash_cleanup(c);
676 } 685 }
686
687 /* and DataFlash */
688 if (jffs2_dataflash(c)) {
689 jffs2_dataflash_cleanup(c);
690 }
677} 691}
diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c
index 87ec74ff5930..7086cd634503 100644
--- a/fs/jffs2/gc.c
+++ b/fs/jffs2/gc.c
@@ -7,7 +7,7 @@
7 * 7 *
8 * For licensing information, see the file 'LICENCE' in this directory. 8 * For licensing information, see the file 'LICENCE' in this directory.
9 * 9 *
10 * $Id: gc.c,v 1.144 2004/12/21 11:18:50 dwmw2 Exp $ 10 * $Id: gc.c,v 1.148 2005/04/09 10:47:00 dedekind Exp $
11 * 11 *
12 */ 12 */
13 13
@@ -50,6 +50,7 @@ static struct jffs2_eraseblock *jffs2_find_gc_block(struct jffs2_sb_info *c)
50 put the clever wear-levelling algorithms. Eventually. */ 50 put the clever wear-levelling algorithms. Eventually. */
51 /* We possibly want to favour the dirtier blocks more when the 51 /* We possibly want to favour the dirtier blocks more when the
52 number of free blocks is low. */ 52 number of free blocks is low. */
53again:
53 if (!list_empty(&c->bad_used_list) && c->nr_free_blocks > c->resv_blocks_gcbad) { 54 if (!list_empty(&c->bad_used_list) && c->nr_free_blocks > c->resv_blocks_gcbad) {
54 D1(printk(KERN_DEBUG "Picking block from bad_used_list to GC next\n")); 55 D1(printk(KERN_DEBUG "Picking block from bad_used_list to GC next\n"));
55 nextlist = &c->bad_used_list; 56 nextlist = &c->bad_used_list;
@@ -79,6 +80,13 @@ static struct jffs2_eraseblock *jffs2_find_gc_block(struct jffs2_sb_info *c)
79 D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next (clean_list and {very_,}dirty_list were empty)\n")); 80 D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next (clean_list and {very_,}dirty_list were empty)\n"));
80 81
81 nextlist = &c->erasable_list; 82 nextlist = &c->erasable_list;
83 } else if (!list_empty(&c->erasable_pending_wbuf_list)) {
84 /* There are blocks are wating for the wbuf sync */
85 D1(printk(KERN_DEBUG "Synching wbuf in order to reuse erasable_pending_wbuf_list blocks\n"));
86 spin_unlock(&c->erase_completion_lock);
87 jffs2_flush_wbuf_pad(c);
88 spin_lock(&c->erase_completion_lock);
89 goto again;
82 } else { 90 } else {
83 /* Eep. All were empty */ 91 /* Eep. All were empty */
84 D1(printk(KERN_NOTICE "jffs2: No clean, dirty _or_ erasable blocks to GC from! Where are they all?\n")); 92 D1(printk(KERN_NOTICE "jffs2: No clean, dirty _or_ erasable blocks to GC from! Where are they all?\n"));
@@ -661,9 +669,10 @@ static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_
661{ 669{
662 struct jffs2_full_dnode *new_fn; 670 struct jffs2_full_dnode *new_fn;
663 struct jffs2_raw_inode ri; 671 struct jffs2_raw_inode ri;
672 struct jffs2_node_frag *last_frag;
664 jint16_t dev; 673 jint16_t dev;
665 char *mdata = NULL, mdatalen = 0; 674 char *mdata = NULL, mdatalen = 0;
666 uint32_t alloclen, phys_ofs; 675 uint32_t alloclen, phys_ofs, ilen;
667 int ret; 676 int ret;
668 677
669 if (S_ISBLK(JFFS2_F_I_MODE(f)) || 678 if (S_ISBLK(JFFS2_F_I_MODE(f)) ||
@@ -699,6 +708,14 @@ static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_
699 goto out; 708 goto out;
700 } 709 }
701 710
711 last_frag = frag_last(&f->fragtree);
712 if (last_frag)
713 /* Fetch the inode length from the fragtree rather then
714 * from i_size since i_size may have not been updated yet */
715 ilen = last_frag->ofs + last_frag->size;
716 else
717 ilen = JFFS2_F_I_SIZE(f);
718
702 memset(&ri, 0, sizeof(ri)); 719 memset(&ri, 0, sizeof(ri));
703 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK); 720 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
704 ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE); 721 ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
@@ -710,7 +727,7 @@ static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_
710 ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f)); 727 ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
711 ri.uid = cpu_to_je16(JFFS2_F_I_UID(f)); 728 ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
712 ri.gid = cpu_to_je16(JFFS2_F_I_GID(f)); 729 ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
713 ri.isize = cpu_to_je32(JFFS2_F_I_SIZE(f)); 730 ri.isize = cpu_to_je32(ilen);
714 ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f)); 731 ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
715 ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f)); 732 ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
716 ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f)); 733 ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
@@ -816,8 +833,7 @@ static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct
816 833
817 /* Doesn't matter if there's one in the same erase block. We're going to 834 /* Doesn't matter if there's one in the same erase block. We're going to
818 delete it too at the same time. */ 835 delete it too at the same time. */
819 if ((raw->flash_offset & ~(c->sector_size-1)) == 836 if (SECTOR_ADDR(raw->flash_offset) == SECTOR_ADDR(fd->raw->flash_offset))
820 (fd->raw->flash_offset & ~(c->sector_size-1)))
821 continue; 837 continue;
822 838
823 D1(printk(KERN_DEBUG "Check potential deletion dirent at %08x\n", ref_offset(raw))); 839 D1(printk(KERN_DEBUG "Check potential deletion dirent at %08x\n", ref_offset(raw)));
@@ -891,7 +907,7 @@ static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eras
891 struct jffs2_raw_inode ri; 907 struct jffs2_raw_inode ri;
892 struct jffs2_node_frag *frag; 908 struct jffs2_node_frag *frag;
893 struct jffs2_full_dnode *new_fn; 909 struct jffs2_full_dnode *new_fn;
894 uint32_t alloclen, phys_ofs; 910 uint32_t alloclen, phys_ofs, ilen;
895 int ret; 911 int ret;
896 912
897 D1(printk(KERN_DEBUG "Writing replacement hole node for ino #%u from offset 0x%x to 0x%x\n", 913 D1(printk(KERN_DEBUG "Writing replacement hole node for ino #%u from offset 0x%x to 0x%x\n",
@@ -951,10 +967,19 @@ static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eras
951 ri.csize = cpu_to_je32(0); 967 ri.csize = cpu_to_je32(0);
952 ri.compr = JFFS2_COMPR_ZERO; 968 ri.compr = JFFS2_COMPR_ZERO;
953 } 969 }
970
971 frag = frag_last(&f->fragtree);
972 if (frag)
973 /* Fetch the inode length from the fragtree rather then
974 * from i_size since i_size may have not been updated yet */
975 ilen = frag->ofs + frag->size;
976 else
977 ilen = JFFS2_F_I_SIZE(f);
978
954 ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f)); 979 ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
955 ri.uid = cpu_to_je16(JFFS2_F_I_UID(f)); 980 ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
956 ri.gid = cpu_to_je16(JFFS2_F_I_GID(f)); 981 ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
957 ri.isize = cpu_to_je32(JFFS2_F_I_SIZE(f)); 982 ri.isize = cpu_to_je32(ilen);
958 ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f)); 983 ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
959 ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f)); 984 ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
960 ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f)); 985 ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
@@ -1161,7 +1186,7 @@ static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_era
1161 D1(printk(KERN_DEBUG "Expanded dnode to write from (0x%x-0x%x) to (0x%x-0x%x)\n", 1186 D1(printk(KERN_DEBUG "Expanded dnode to write from (0x%x-0x%x) to (0x%x-0x%x)\n",
1162 orig_start, orig_end, start, end)); 1187 orig_start, orig_end, start, end));
1163 1188
1164 BUG_ON(end > JFFS2_F_I_SIZE(f)); 1189 D1(BUG_ON(end > frag_last(&f->fragtree)->ofs + frag_last(&f->fragtree)->size));
1165 BUG_ON(end < orig_end); 1190 BUG_ON(end < orig_end);
1166 BUG_ON(start > orig_start); 1191 BUG_ON(start > orig_start);
1167 } 1192 }
diff --git a/fs/jffs2/nodelist.c b/fs/jffs2/nodelist.c
index cd6a8bd13e0b..4991c348f6ec 100644
--- a/fs/jffs2/nodelist.c
+++ b/fs/jffs2/nodelist.c
@@ -7,7 +7,7 @@
7 * 7 *
8 * For licensing information, see the file 'LICENCE' in this directory. 8 * For licensing information, see the file 'LICENCE' in this directory.
9 * 9 *
10 * $Id: nodelist.c,v 1.90 2004/12/08 17:59:20 dwmw2 Exp $ 10 * $Id: nodelist.c,v 1.98 2005/07/10 15:15:32 dedekind Exp $
11 * 11 *
12 */ 12 */
13 13
@@ -55,30 +55,63 @@ void jffs2_add_fd_to_list(struct jffs2_sb_info *c, struct jffs2_full_dirent *new
55 }); 55 });
56} 56}
57 57
58/* Put a new tmp_dnode_info into the list, keeping the list in 58/*
59 order of increasing version 59 * Put a new tmp_dnode_info into the temporaty RB-tree, keeping the list in
60*/ 60 * order of increasing version.
61static void jffs2_add_tn_to_list(struct jffs2_tmp_dnode_info *tn, struct jffs2_tmp_dnode_info **list) 61 */
62static void jffs2_add_tn_to_tree(struct jffs2_tmp_dnode_info *tn, struct rb_root *list)
62{ 63{
63 struct jffs2_tmp_dnode_info **prev = list; 64 struct rb_node **p = &list->rb_node;
64 65 struct rb_node * parent = NULL;
65 while ((*prev) && (*prev)->version < tn->version) { 66 struct jffs2_tmp_dnode_info *this;
66 prev = &((*prev)->next); 67
67 } 68 while (*p) {
68 tn->next = (*prev); 69 parent = *p;
69 *prev = tn; 70 this = rb_entry(parent, struct jffs2_tmp_dnode_info, rb);
71
72 /* There may actually be a collision here, but it doesn't
73 actually matter. As long as the two nodes with the same
74 version are together, it's all fine. */
75 if (tn->version < this->version)
76 p = &(*p)->rb_left;
77 else
78 p = &(*p)->rb_right;
79 }
80
81 rb_link_node(&tn->rb, parent, p);
82 rb_insert_color(&tn->rb, list);
70} 83}
71 84
72static void jffs2_free_tmp_dnode_info_list(struct jffs2_tmp_dnode_info *tn) 85static void jffs2_free_tmp_dnode_info_list(struct rb_root *list)
73{ 86{
74 struct jffs2_tmp_dnode_info *next; 87 struct rb_node *this;
88 struct jffs2_tmp_dnode_info *tn;
89
90 this = list->rb_node;
91
92 /* Now at bottom of tree */
93 while (this) {
94 if (this->rb_left)
95 this = this->rb_left;
96 else if (this->rb_right)
97 this = this->rb_right;
98 else {
99 tn = rb_entry(this, struct jffs2_tmp_dnode_info, rb);
100 jffs2_free_full_dnode(tn->fn);
101 jffs2_free_tmp_dnode_info(tn);
102
103 this = this->rb_parent;
104 if (!this)
105 break;
75 106
76 while (tn) { 107 if (this->rb_left == &tn->rb)
77 next = tn; 108 this->rb_left = NULL;
78 tn = tn->next; 109 else if (this->rb_right == &tn->rb)
79 jffs2_free_full_dnode(next->fn); 110 this->rb_right = NULL;
80 jffs2_free_tmp_dnode_info(next); 111 else BUG();
112 }
81 } 113 }
114 list->rb_node = NULL;
82} 115}
83 116
84static void jffs2_free_full_dirent_list(struct jffs2_full_dirent *fd) 117static void jffs2_free_full_dirent_list(struct jffs2_full_dirent *fd)
@@ -108,12 +141,13 @@ static struct jffs2_raw_node_ref *jffs2_first_valid_node(struct jffs2_raw_node_r
108 with this ino, returning the former in order of version */ 141 with this ino, returning the former in order of version */
109 142
110int jffs2_get_inode_nodes(struct jffs2_sb_info *c, struct jffs2_inode_info *f, 143int jffs2_get_inode_nodes(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
111 struct jffs2_tmp_dnode_info **tnp, struct jffs2_full_dirent **fdp, 144 struct rb_root *tnp, struct jffs2_full_dirent **fdp,
112 uint32_t *highest_version, uint32_t *latest_mctime, 145 uint32_t *highest_version, uint32_t *latest_mctime,
113 uint32_t *mctime_ver) 146 uint32_t *mctime_ver)
114{ 147{
115 struct jffs2_raw_node_ref *ref, *valid_ref; 148 struct jffs2_raw_node_ref *ref, *valid_ref;
116 struct jffs2_tmp_dnode_info *tn, *ret_tn = NULL; 149 struct jffs2_tmp_dnode_info *tn;
150 struct rb_root ret_tn = RB_ROOT;
117 struct jffs2_full_dirent *fd, *ret_fd = NULL; 151 struct jffs2_full_dirent *fd, *ret_fd = NULL;
118 union jffs2_node_union node; 152 union jffs2_node_union node;
119 size_t retlen; 153 size_t retlen;
@@ -127,7 +161,7 @@ int jffs2_get_inode_nodes(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
127 161
128 valid_ref = jffs2_first_valid_node(f->inocache->nodes); 162 valid_ref = jffs2_first_valid_node(f->inocache->nodes);
129 163
130 if (!valid_ref) 164 if (!valid_ref && (f->inocache->ino != 1))
131 printk(KERN_WARNING "Eep. No valid nodes for ino #%u\n", f->inocache->ino); 165 printk(KERN_WARNING "Eep. No valid nodes for ino #%u\n", f->inocache->ino);
132 166
133 while (valid_ref) { 167 while (valid_ref) {
@@ -386,7 +420,7 @@ int jffs2_get_inode_nodes(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
386 D1(printk(KERN_DEBUG "dnode @%08x: ver %u, offset %04x, dsize %04x\n", 420 D1(printk(KERN_DEBUG "dnode @%08x: ver %u, offset %04x, dsize %04x\n",
387 ref_offset(ref), je32_to_cpu(node.i.version), 421 ref_offset(ref), je32_to_cpu(node.i.version),
388 je32_to_cpu(node.i.offset), je32_to_cpu(node.i.dsize))); 422 je32_to_cpu(node.i.offset), je32_to_cpu(node.i.dsize)));
389 jffs2_add_tn_to_list(tn, &ret_tn); 423 jffs2_add_tn_to_tree(tn, &ret_tn);
390 break; 424 break;
391 425
392 default: 426 default:
@@ -450,7 +484,7 @@ int jffs2_get_inode_nodes(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
450 return 0; 484 return 0;
451 485
452 free_out: 486 free_out:
453 jffs2_free_tmp_dnode_info_list(ret_tn); 487 jffs2_free_tmp_dnode_info_list(&ret_tn);
454 jffs2_free_full_dirent_list(ret_fd); 488 jffs2_free_full_dirent_list(ret_fd);
455 return err; 489 return err;
456} 490}
@@ -489,9 +523,13 @@ struct jffs2_inode_cache *jffs2_get_ino_cache(struct jffs2_sb_info *c, uint32_t
489void jffs2_add_ino_cache (struct jffs2_sb_info *c, struct jffs2_inode_cache *new) 523void jffs2_add_ino_cache (struct jffs2_sb_info *c, struct jffs2_inode_cache *new)
490{ 524{
491 struct jffs2_inode_cache **prev; 525 struct jffs2_inode_cache **prev;
492 D2(printk(KERN_DEBUG "jffs2_add_ino_cache: Add %p (ino #%u)\n", new, new->ino)); 526
493 spin_lock(&c->inocache_lock); 527 spin_lock(&c->inocache_lock);
494 528 if (!new->ino)
529 new->ino = ++c->highest_ino;
530
531 D2(printk(KERN_DEBUG "jffs2_add_ino_cache: Add %p (ino #%u)\n", new, new->ino));
532
495 prev = &c->inocache_list[new->ino % INOCACHE_HASHSIZE]; 533 prev = &c->inocache_list[new->ino % INOCACHE_HASHSIZE];
496 534
497 while ((*prev) && (*prev)->ino < new->ino) { 535 while ((*prev) && (*prev)->ino < new->ino) {
@@ -506,7 +544,7 @@ void jffs2_add_ino_cache (struct jffs2_sb_info *c, struct jffs2_inode_cache *new
506void jffs2_del_ino_cache(struct jffs2_sb_info *c, struct jffs2_inode_cache *old) 544void jffs2_del_ino_cache(struct jffs2_sb_info *c, struct jffs2_inode_cache *old)
507{ 545{
508 struct jffs2_inode_cache **prev; 546 struct jffs2_inode_cache **prev;
509 D2(printk(KERN_DEBUG "jffs2_del_ino_cache: Del %p (ino #%u)\n", old, old->ino)); 547 D1(printk(KERN_DEBUG "jffs2_del_ino_cache: Del %p (ino #%u)\n", old, old->ino));
510 spin_lock(&c->inocache_lock); 548 spin_lock(&c->inocache_lock);
511 549
512 prev = &c->inocache_list[old->ino % INOCACHE_HASHSIZE]; 550 prev = &c->inocache_list[old->ino % INOCACHE_HASHSIZE];
@@ -518,6 +556,14 @@ void jffs2_del_ino_cache(struct jffs2_sb_info *c, struct jffs2_inode_cache *old)
518 *prev = old->next; 556 *prev = old->next;
519 } 557 }
520 558
559 /* Free it now unless it's in READING or CLEARING state, which
560 are the transitions upon read_inode() and clear_inode(). The
561 rest of the time we know nobody else is looking at it, and
562 if it's held by read_inode() or clear_inode() they'll free it
563 for themselves. */
564 if (old->state != INO_STATE_READING && old->state != INO_STATE_CLEARING)
565 jffs2_free_inode_cache(old);
566
521 spin_unlock(&c->inocache_lock); 567 spin_unlock(&c->inocache_lock);
522} 568}
523 569
@@ -530,7 +576,6 @@ void jffs2_free_ino_caches(struct jffs2_sb_info *c)
530 this = c->inocache_list[i]; 576 this = c->inocache_list[i];
531 while (this) { 577 while (this) {
532 next = this->next; 578 next = this->next;
533 D2(printk(KERN_DEBUG "jffs2_free_ino_caches: Freeing ino #%u at %p\n", this->ino, this));
534 jffs2_free_inode_cache(this); 579 jffs2_free_inode_cache(this);
535 this = next; 580 this = next;
536 } 581 }
diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h
index a4864d05ea92..b34c397909ef 100644
--- a/fs/jffs2/nodelist.h
+++ b/fs/jffs2/nodelist.h
@@ -7,7 +7,7 @@
7 * 7 *
8 * For licensing information, see the file 'LICENCE' in this directory. 8 * For licensing information, see the file 'LICENCE' in this directory.
9 * 9 *
10 * $Id: nodelist.h,v 1.126 2004/11/19 15:06:29 dedekind Exp $ 10 * $Id: nodelist.h,v 1.131 2005/07/05 21:03:07 dwmw2 Exp $
11 * 11 *
12 */ 12 */
13 13
@@ -135,6 +135,7 @@ struct jffs2_inode_cache {
135#define INO_STATE_CHECKEDABSENT 3 /* Checked, cleared again */ 135#define INO_STATE_CHECKEDABSENT 3 /* Checked, cleared again */
136#define INO_STATE_GC 4 /* GCing a 'pristine' node */ 136#define INO_STATE_GC 4 /* GCing a 'pristine' node */
137#define INO_STATE_READING 5 /* In read_inode() */ 137#define INO_STATE_READING 5 /* In read_inode() */
138#define INO_STATE_CLEARING 6 /* In clear_inode() */
138 139
139#define INOCACHE_HASHSIZE 128 140#define INOCACHE_HASHSIZE 128
140 141
@@ -160,7 +161,7 @@ struct jffs2_full_dnode
160*/ 161*/
161struct jffs2_tmp_dnode_info 162struct jffs2_tmp_dnode_info
162{ 163{
163 struct jffs2_tmp_dnode_info *next; 164 struct rb_node rb;
164 struct jffs2_full_dnode *fn; 165 struct jffs2_full_dnode *fn;
165 uint32_t version; 166 uint32_t version;
166}; 167};
@@ -362,6 +363,18 @@ static inline struct jffs2_node_frag *frag_first(struct rb_root *root)
362 node = node->rb_left; 363 node = node->rb_left;
363 return rb_entry(node, struct jffs2_node_frag, rb); 364 return rb_entry(node, struct jffs2_node_frag, rb);
364} 365}
366
367static inline struct jffs2_node_frag *frag_last(struct rb_root *root)
368{
369 struct rb_node *node = root->rb_node;
370
371 if (!node)
372 return NULL;
373 while(node->rb_right)
374 node = node->rb_right;
375 return rb_entry(node, struct jffs2_node_frag, rb);
376}
377
365#define rb_parent(rb) ((rb)->rb_parent) 378#define rb_parent(rb) ((rb)->rb_parent)
366#define frag_next(frag) rb_entry(rb_next(&(frag)->rb), struct jffs2_node_frag, rb) 379#define frag_next(frag) rb_entry(rb_next(&(frag)->rb), struct jffs2_node_frag, rb)
367#define frag_prev(frag) rb_entry(rb_prev(&(frag)->rb), struct jffs2_node_frag, rb) 380#define frag_prev(frag) rb_entry(rb_prev(&(frag)->rb), struct jffs2_node_frag, rb)
@@ -374,7 +387,7 @@ static inline struct jffs2_node_frag *frag_first(struct rb_root *root)
374D2(void jffs2_print_frag_list(struct jffs2_inode_info *f)); 387D2(void jffs2_print_frag_list(struct jffs2_inode_info *f));
375void jffs2_add_fd_to_list(struct jffs2_sb_info *c, struct jffs2_full_dirent *new, struct jffs2_full_dirent **list); 388void jffs2_add_fd_to_list(struct jffs2_sb_info *c, struct jffs2_full_dirent *new, struct jffs2_full_dirent **list);
376int jffs2_get_inode_nodes(struct jffs2_sb_info *c, struct jffs2_inode_info *f, 389int jffs2_get_inode_nodes(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
377 struct jffs2_tmp_dnode_info **tnp, struct jffs2_full_dirent **fdp, 390 struct rb_root *tnp, struct jffs2_full_dirent **fdp,
378 uint32_t *highest_version, uint32_t *latest_mctime, 391 uint32_t *highest_version, uint32_t *latest_mctime,
379 uint32_t *mctime_ver); 392 uint32_t *mctime_ver);
380void jffs2_set_inocache_state(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic, int state); 393void jffs2_set_inocache_state(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic, int state);
@@ -462,7 +475,7 @@ int jffs2_do_mount_fs(struct jffs2_sb_info *c);
462/* erase.c */ 475/* erase.c */
463void jffs2_erase_pending_blocks(struct jffs2_sb_info *c, int count); 476void jffs2_erase_pending_blocks(struct jffs2_sb_info *c, int count);
464 477
465#ifdef CONFIG_JFFS2_FS_NAND 478#ifdef CONFIG_JFFS2_FS_WRITEBUFFER
466/* wbuf.c */ 479/* wbuf.c */
467int jffs2_flush_wbuf_gc(struct jffs2_sb_info *c, uint32_t ino); 480int jffs2_flush_wbuf_gc(struct jffs2_sb_info *c, uint32_t ino);
468int jffs2_flush_wbuf_pad(struct jffs2_sb_info *c); 481int jffs2_flush_wbuf_pad(struct jffs2_sb_info *c);
diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c
index 2651135bdf42..c1d8b5ed9ab9 100644
--- a/fs/jffs2/nodemgmt.c
+++ b/fs/jffs2/nodemgmt.c
@@ -7,7 +7,7 @@
7 * 7 *
8 * For licensing information, see the file 'LICENCE' in this directory. 8 * For licensing information, see the file 'LICENCE' in this directory.
9 * 9 *
10 * $Id: nodemgmt.c,v 1.115 2004/11/22 11:07:21 dwmw2 Exp $ 10 * $Id: nodemgmt.c,v 1.122 2005/05/06 09:30:27 dedekind Exp $
11 * 11 *
12 */ 12 */
13 13
@@ -75,7 +75,7 @@ int jffs2_reserve_space(struct jffs2_sb_info *c, uint32_t minsize, uint32_t *ofs
75 dirty = c->dirty_size + c->erasing_size - c->nr_erasing_blocks * c->sector_size + c->unchecked_size; 75 dirty = c->dirty_size + c->erasing_size - c->nr_erasing_blocks * c->sector_size + c->unchecked_size;
76 if (dirty < c->nospc_dirty_size) { 76 if (dirty < c->nospc_dirty_size) {
77 if (prio == ALLOC_DELETION && c->nr_free_blocks + c->nr_erasing_blocks >= c->resv_blocks_deletion) { 77 if (prio == ALLOC_DELETION && c->nr_free_blocks + c->nr_erasing_blocks >= c->resv_blocks_deletion) {
78 printk(KERN_NOTICE "jffs2_reserve_space(): Low on dirty space to GC, but it's a deletion. Allowing...\n"); 78 D1(printk(KERN_NOTICE "jffs2_reserve_space(): Low on dirty space to GC, but it's a deletion. Allowing...\n"));
79 break; 79 break;
80 } 80 }
81 D1(printk(KERN_DEBUG "dirty size 0x%08x + unchecked_size 0x%08x < nospc_dirty_size 0x%08x, returning -ENOSPC\n", 81 D1(printk(KERN_DEBUG "dirty size 0x%08x + unchecked_size 0x%08x < nospc_dirty_size 0x%08x, returning -ENOSPC\n",
@@ -98,7 +98,7 @@ int jffs2_reserve_space(struct jffs2_sb_info *c, uint32_t minsize, uint32_t *ofs
98 avail = c->free_size + c->dirty_size + c->erasing_size + c->unchecked_size; 98 avail = c->free_size + c->dirty_size + c->erasing_size + c->unchecked_size;
99 if ( (avail / c->sector_size) <= blocksneeded) { 99 if ( (avail / c->sector_size) <= blocksneeded) {
100 if (prio == ALLOC_DELETION && c->nr_free_blocks + c->nr_erasing_blocks >= c->resv_blocks_deletion) { 100 if (prio == ALLOC_DELETION && c->nr_free_blocks + c->nr_erasing_blocks >= c->resv_blocks_deletion) {
101 printk(KERN_NOTICE "jffs2_reserve_space(): Low on possibly available space, but it's a deletion. Allowing...\n"); 101 D1(printk(KERN_NOTICE "jffs2_reserve_space(): Low on possibly available space, but it's a deletion. Allowing...\n"));
102 break; 102 break;
103 } 103 }
104 104
@@ -308,7 +308,10 @@ int jffs2_add_physical_node_ref(struct jffs2_sb_info *c, struct jffs2_raw_node_r
308 308
309 D1(printk(KERN_DEBUG "jffs2_add_physical_node_ref(): Node at 0x%x(%d), size 0x%x\n", ref_offset(new), ref_flags(new), len)); 309 D1(printk(KERN_DEBUG "jffs2_add_physical_node_ref(): Node at 0x%x(%d), size 0x%x\n", ref_offset(new), ref_flags(new), len));
310#if 1 310#if 1
311 if (jeb != c->nextblock || (ref_offset(new)) != jeb->offset + (c->sector_size - jeb->free_size)) { 311 /* we could get some obsolete nodes after nextblock was refiled
312 in wbuf.c */
313 if ((c->nextblock || !ref_obsolete(new))
314 &&(jeb != c->nextblock || ref_offset(new) != jeb->offset + (c->sector_size - jeb->free_size))) {
312 printk(KERN_WARNING "argh. node added in wrong place\n"); 315 printk(KERN_WARNING "argh. node added in wrong place\n");
313 jffs2_free_raw_node_ref(new); 316 jffs2_free_raw_node_ref(new);
314 return -EINVAL; 317 return -EINVAL;
@@ -332,7 +335,7 @@ int jffs2_add_physical_node_ref(struct jffs2_sb_info *c, struct jffs2_raw_node_r
332 c->used_size += len; 335 c->used_size += len;
333 } 336 }
334 337
335 if (!jeb->free_size && !jeb->dirty_size) { 338 if (!jeb->free_size && !jeb->dirty_size && !ISDIRTY(jeb->wasted_size)) {
336 /* If it lives on the dirty_list, jffs2_reserve_space will put it there */ 339 /* If it lives on the dirty_list, jffs2_reserve_space will put it there */
337 D1(printk(KERN_DEBUG "Adding full erase block at 0x%08x to clean_list (free 0x%08x, dirty 0x%08x, used 0x%08x\n", 340 D1(printk(KERN_DEBUG "Adding full erase block at 0x%08x to clean_list (free 0x%08x, dirty 0x%08x, used 0x%08x\n",
338 jeb->offset, jeb->free_size, jeb->dirty_size, jeb->used_size)); 341 jeb->offset, jeb->free_size, jeb->dirty_size, jeb->used_size));
@@ -400,7 +403,7 @@ void jffs2_mark_node_obsolete(struct jffs2_sb_info *c, struct jffs2_raw_node_ref
400 jeb = &c->blocks[blocknr]; 403 jeb = &c->blocks[blocknr];
401 404
402 if (jffs2_can_mark_obsolete(c) && !jffs2_is_readonly(c) && 405 if (jffs2_can_mark_obsolete(c) && !jffs2_is_readonly(c) &&
403 !(c->flags & JFFS2_SB_FLAG_MOUNTING)) { 406 !(c->flags & (JFFS2_SB_FLAG_SCANNING | JFFS2_SB_FLAG_BUILDING))) {
404 /* Hm. This may confuse static lock analysis. If any of the above 407 /* Hm. This may confuse static lock analysis. If any of the above
405 three conditions is false, we're going to return from this 408 three conditions is false, we're going to return from this
406 function without actually obliterating any nodes or freeing 409 function without actually obliterating any nodes or freeing
@@ -434,7 +437,7 @@ void jffs2_mark_node_obsolete(struct jffs2_sb_info *c, struct jffs2_raw_node_ref
434 437
435 // Take care, that wasted size is taken into concern 438 // Take care, that wasted size is taken into concern
436 if ((jeb->dirty_size || ISDIRTY(jeb->wasted_size + ref_totlen(c, jeb, ref))) && jeb != c->nextblock) { 439 if ((jeb->dirty_size || ISDIRTY(jeb->wasted_size + ref_totlen(c, jeb, ref))) && jeb != c->nextblock) {
437 D1(printk("Dirtying\n")); 440 D1(printk(KERN_DEBUG "Dirtying\n"));
438 addedsize = ref_totlen(c, jeb, ref); 441 addedsize = ref_totlen(c, jeb, ref);
439 jeb->dirty_size += ref_totlen(c, jeb, ref); 442 jeb->dirty_size += ref_totlen(c, jeb, ref);
440 c->dirty_size += ref_totlen(c, jeb, ref); 443 c->dirty_size += ref_totlen(c, jeb, ref);
@@ -456,7 +459,7 @@ void jffs2_mark_node_obsolete(struct jffs2_sb_info *c, struct jffs2_raw_node_ref
456 } 459 }
457 } 460 }
458 } else { 461 } else {
459 D1(printk("Wasting\n")); 462 D1(printk(KERN_DEBUG "Wasting\n"));
460 addedsize = 0; 463 addedsize = 0;
461 jeb->wasted_size += ref_totlen(c, jeb, ref); 464 jeb->wasted_size += ref_totlen(c, jeb, ref);
462 c->wasted_size += ref_totlen(c, jeb, ref); 465 c->wasted_size += ref_totlen(c, jeb, ref);
@@ -467,8 +470,8 @@ void jffs2_mark_node_obsolete(struct jffs2_sb_info *c, struct jffs2_raw_node_ref
467 470
468 D1(ACCT_PARANOIA_CHECK(jeb)); 471 D1(ACCT_PARANOIA_CHECK(jeb));
469 472
470 if (c->flags & JFFS2_SB_FLAG_MOUNTING) { 473 if (c->flags & JFFS2_SB_FLAG_SCANNING) {
471 /* Mount in progress. Don't muck about with the block 474 /* Flash scanning is in progress. Don't muck about with the block
472 lists because they're not ready yet, and don't actually 475 lists because they're not ready yet, and don't actually
473 obliterate nodes that look obsolete. If they weren't 476 obliterate nodes that look obsolete. If they weren't
474 marked obsolete on the flash at the time they _became_ 477 marked obsolete on the flash at the time they _became_
@@ -527,7 +530,8 @@ void jffs2_mark_node_obsolete(struct jffs2_sb_info *c, struct jffs2_raw_node_ref
527 530
528 spin_unlock(&c->erase_completion_lock); 531 spin_unlock(&c->erase_completion_lock);
529 532
530 if (!jffs2_can_mark_obsolete(c) || jffs2_is_readonly(c)) { 533 if (!jffs2_can_mark_obsolete(c) || jffs2_is_readonly(c) ||
534 (c->flags & JFFS2_SB_FLAG_BUILDING)) {
531 /* We didn't lock the erase_free_sem */ 535 /* We didn't lock the erase_free_sem */
532 return; 536 return;
533 } 537 }
@@ -590,11 +594,8 @@ void jffs2_mark_node_obsolete(struct jffs2_sb_info *c, struct jffs2_raw_node_ref
590 *p = ref->next_in_ino; 594 *p = ref->next_in_ino;
591 ref->next_in_ino = NULL; 595 ref->next_in_ino = NULL;
592 596
593 if (ic->nodes == (void *)ic) { 597 if (ic->nodes == (void *)ic && ic->nlink == 0)
594 D1(printk(KERN_DEBUG "inocache for ino #%u is all gone now. Freeing\n", ic->ino));
595 jffs2_del_ino_cache(c, ic); 598 jffs2_del_ino_cache(c, ic);
596 jffs2_free_inode_cache(ic);
597 }
598 599
599 spin_unlock(&c->erase_completion_lock); 600 spin_unlock(&c->erase_completion_lock);
600 } 601 }
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index 03b0acc37b73..d900c8929b09 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -7,41 +7,24 @@
7 * 7 *
8 * For licensing information, see the file 'LICENCE' in this directory. 8 * For licensing information, see the file 'LICENCE' in this directory.
9 * 9 *
10 * $Id: os-linux.h,v 1.51 2004/11/16 20:36:11 dwmw2 Exp $ 10 * $Id: os-linux.h,v 1.58 2005/07/12 02:34:35 tpoynor Exp $
11 * 11 *
12 */ 12 */
13 13
14#ifndef __JFFS2_OS_LINUX_H__ 14#ifndef __JFFS2_OS_LINUX_H__
15#define __JFFS2_OS_LINUX_H__ 15#define __JFFS2_OS_LINUX_H__
16#include <linux/version.h>
17 16
18/* JFFS2 uses Linux mode bits natively -- no need for conversion */ 17/* JFFS2 uses Linux mode bits natively -- no need for conversion */
19#define os_to_jffs2_mode(x) (x) 18#define os_to_jffs2_mode(x) (x)
20#define jffs2_to_os_mode(x) (x) 19#define jffs2_to_os_mode(x) (x)
21 20
22#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,73)
23#define kstatfs statfs
24#endif
25
26struct kstatfs; 21struct kstatfs;
27struct kvec; 22struct kvec;
28 23
29#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,2)
30#define JFFS2_INODE_INFO(i) (list_entry(i, struct jffs2_inode_info, vfs_inode)) 24#define JFFS2_INODE_INFO(i) (list_entry(i, struct jffs2_inode_info, vfs_inode))
31#define OFNI_EDONI_2SFFJ(f) (&(f)->vfs_inode) 25#define OFNI_EDONI_2SFFJ(f) (&(f)->vfs_inode)
32#define JFFS2_SB_INFO(sb) (sb->s_fs_info) 26#define JFFS2_SB_INFO(sb) (sb->s_fs_info)
33#define OFNI_BS_2SFFJ(c) ((struct super_block *)c->os_priv) 27#define OFNI_BS_2SFFJ(c) ((struct super_block *)c->os_priv)
34#elif defined(JFFS2_OUT_OF_KERNEL)
35#define JFFS2_INODE_INFO(i) ((struct jffs2_inode_info *) &(i)->u)
36#define OFNI_EDONI_2SFFJ(f) ((struct inode *) ( ((char *)f) - ((char *)(&((struct inode *)NULL)->u)) ) )
37#define JFFS2_SB_INFO(sb) ((struct jffs2_sb_info *) &(sb)->u)
38#define OFNI_BS_2SFFJ(c) ((struct super_block *) ( ((char *)c) - ((char *)(&((struct super_block *)NULL)->u)) ) )
39#else
40#define JFFS2_INODE_INFO(i) (&i->u.jffs2_i)
41#define OFNI_EDONI_2SFFJ(f) ((struct inode *) ( ((char *)f) - ((char *)(&((struct inode *)NULL)->u)) ) )
42#define JFFS2_SB_INFO(sb) (&sb->u.jffs2_sb)
43#define OFNI_BS_2SFFJ(c) ((struct super_block *) ( ((char *)c) - ((char *)(&((struct super_block *)NULL)->u)) ) )
44#endif
45 28
46 29
47#define JFFS2_F_I_SIZE(f) (OFNI_EDONI_2SFFJ(f)->i_size) 30#define JFFS2_F_I_SIZE(f) (OFNI_EDONI_2SFFJ(f)->i_size)
@@ -49,28 +32,14 @@ struct kvec;
49#define JFFS2_F_I_UID(f) (OFNI_EDONI_2SFFJ(f)->i_uid) 32#define JFFS2_F_I_UID(f) (OFNI_EDONI_2SFFJ(f)->i_uid)
50#define JFFS2_F_I_GID(f) (OFNI_EDONI_2SFFJ(f)->i_gid) 33#define JFFS2_F_I_GID(f) (OFNI_EDONI_2SFFJ(f)->i_gid)
51 34
52#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,1)
53#define JFFS2_F_I_RDEV_MIN(f) (iminor(OFNI_EDONI_2SFFJ(f))) 35#define JFFS2_F_I_RDEV_MIN(f) (iminor(OFNI_EDONI_2SFFJ(f)))
54#define JFFS2_F_I_RDEV_MAJ(f) (imajor(OFNI_EDONI_2SFFJ(f))) 36#define JFFS2_F_I_RDEV_MAJ(f) (imajor(OFNI_EDONI_2SFFJ(f)))
55#else
56#define JFFS2_F_I_RDEV_MIN(f) (MINOR(to_kdev_t(OFNI_EDONI_2SFFJ(f)->i_rdev)))
57#define JFFS2_F_I_RDEV_MAJ(f) (MAJOR(to_kdev_t(OFNI_EDONI_2SFFJ(f)->i_rdev)))
58#endif
59 37
60/* Urgh. The things we do to keep the 2.4 build working */
61#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,47)
62#define ITIME(sec) ((struct timespec){sec, 0}) 38#define ITIME(sec) ((struct timespec){sec, 0})
63#define I_SEC(tv) ((tv).tv_sec) 39#define I_SEC(tv) ((tv).tv_sec)
64#define JFFS2_F_I_CTIME(f) (OFNI_EDONI_2SFFJ(f)->i_ctime.tv_sec) 40#define JFFS2_F_I_CTIME(f) (OFNI_EDONI_2SFFJ(f)->i_ctime.tv_sec)
65#define JFFS2_F_I_MTIME(f) (OFNI_EDONI_2SFFJ(f)->i_mtime.tv_sec) 41#define JFFS2_F_I_MTIME(f) (OFNI_EDONI_2SFFJ(f)->i_mtime.tv_sec)
66#define JFFS2_F_I_ATIME(f) (OFNI_EDONI_2SFFJ(f)->i_atime.tv_sec) 42#define JFFS2_F_I_ATIME(f) (OFNI_EDONI_2SFFJ(f)->i_atime.tv_sec)
67#else
68#define ITIME(x) (x)
69#define I_SEC(x) (x)
70#define JFFS2_F_I_CTIME(f) (OFNI_EDONI_2SFFJ(f)->i_ctime)
71#define JFFS2_F_I_MTIME(f) (OFNI_EDONI_2SFFJ(f)->i_mtime)
72#define JFFS2_F_I_ATIME(f) (OFNI_EDONI_2SFFJ(f)->i_atime)
73#endif
74 43
75#define sleep_on_spinunlock(wq, s) \ 44#define sleep_on_spinunlock(wq, s) \
76 do { \ 45 do { \
@@ -84,23 +53,21 @@ struct kvec;
84 53
85static inline void jffs2_init_inode_info(struct jffs2_inode_info *f) 54static inline void jffs2_init_inode_info(struct jffs2_inode_info *f)
86{ 55{
87#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,2)
88 f->highest_version = 0; 56 f->highest_version = 0;
89 f->fragtree = RB_ROOT; 57 f->fragtree = RB_ROOT;
90 f->metadata = NULL; 58 f->metadata = NULL;
91 f->dents = NULL; 59 f->dents = NULL;
92 f->flags = 0; 60 f->flags = 0;
93 f->usercompr = 0; 61 f->usercompr = 0;
94#else
95 memset(f, 0, sizeof(*f));
96 init_MUTEX_LOCKED(&f->sem);
97#endif
98} 62}
99 63
64
100#define jffs2_is_readonly(c) (OFNI_BS_2SFFJ(c)->s_flags & MS_RDONLY) 65#define jffs2_is_readonly(c) (OFNI_BS_2SFFJ(c)->s_flags & MS_RDONLY)
101 66
102#if (!defined CONFIG_JFFS2_FS_NAND && !defined CONFIG_JFFS2_FS_NOR_ECC) 67#ifndef CONFIG_JFFS2_FS_WRITEBUFFER
68#define SECTOR_ADDR(x) ( ((unsigned long)(x) & ~(c->sector_size-1)) )
103#define jffs2_can_mark_obsolete(c) (1) 69#define jffs2_can_mark_obsolete(c) (1)
70#define jffs2_is_writebuffered(c) (0)
104#define jffs2_cleanmarker_oob(c) (0) 71#define jffs2_cleanmarker_oob(c) (0)
105#define jffs2_write_nand_cleanmarker(c,jeb) (-EIO) 72#define jffs2_write_nand_cleanmarker(c,jeb) (-EIO)
106 73
@@ -116,11 +83,16 @@ static inline void jffs2_init_inode_info(struct jffs2_inode_info *f)
116#define jffs2_wbuf_timeout NULL 83#define jffs2_wbuf_timeout NULL
117#define jffs2_wbuf_process NULL 84#define jffs2_wbuf_process NULL
118#define jffs2_nor_ecc(c) (0) 85#define jffs2_nor_ecc(c) (0)
86#define jffs2_dataflash(c) (0)
119#define jffs2_nor_ecc_flash_setup(c) (0) 87#define jffs2_nor_ecc_flash_setup(c) (0)
120#define jffs2_nor_ecc_flash_cleanup(c) do {} while (0) 88#define jffs2_nor_ecc_flash_cleanup(c) do {} while (0)
89#define jffs2_dataflash_setup(c) (0)
90#define jffs2_dataflash_cleanup(c) do {} while (0)
121 91
122#else /* NAND and/or ECC'd NOR support present */ 92#else /* NAND and/or ECC'd NOR support present */
123 93
94#define jffs2_is_writebuffered(c) (c->wbuf != NULL)
95#define SECTOR_ADDR(x) ( ((unsigned long)(x) / (unsigned long)(c->sector_size)) * c->sector_size )
124#define jffs2_can_mark_obsolete(c) ((c->mtd->type == MTD_NORFLASH && !(c->mtd->flags & MTD_ECC)) || c->mtd->type == MTD_RAM) 96#define jffs2_can_mark_obsolete(c) ((c->mtd->type == MTD_NORFLASH && !(c->mtd->flags & MTD_ECC)) || c->mtd->type == MTD_RAM)
125#define jffs2_cleanmarker_oob(c) (c->mtd->type == MTD_NANDFLASH) 97#define jffs2_cleanmarker_oob(c) (c->mtd->type == MTD_NANDFLASH)
126 98
@@ -142,16 +114,16 @@ int jffs2_flush_wbuf_gc(struct jffs2_sb_info *c, uint32_t ino);
142int jffs2_flush_wbuf_pad(struct jffs2_sb_info *c); 114int jffs2_flush_wbuf_pad(struct jffs2_sb_info *c);
143int jffs2_nand_flash_setup(struct jffs2_sb_info *c); 115int jffs2_nand_flash_setup(struct jffs2_sb_info *c);
144void jffs2_nand_flash_cleanup(struct jffs2_sb_info *c); 116void jffs2_nand_flash_cleanup(struct jffs2_sb_info *c);
145#ifdef CONFIG_JFFS2_FS_NOR_ECC 117
146#define jffs2_nor_ecc(c) (c->mtd->type == MTD_NORFLASH && (c->mtd->flags & MTD_ECC)) 118#define jffs2_nor_ecc(c) (c->mtd->type == MTD_NORFLASH && (c->mtd->flags & MTD_ECC))
147int jffs2_nor_ecc_flash_setup(struct jffs2_sb_info *c); 119int jffs2_nor_ecc_flash_setup(struct jffs2_sb_info *c);
148void jffs2_nor_ecc_flash_cleanup(struct jffs2_sb_info *c); 120void jffs2_nor_ecc_flash_cleanup(struct jffs2_sb_info *c);
149#else 121
150#define jffs2_nor_ecc(c) (0) 122#define jffs2_dataflash(c) (c->mtd->type == MTD_DATAFLASH)
151#define jffs2_nor_ecc_flash_setup(c) (0) 123int jffs2_dataflash_setup(struct jffs2_sb_info *c);
152#define jffs2_nor_ecc_flash_cleanup(c) do {} while (0) 124void jffs2_dataflash_cleanup(struct jffs2_sb_info *c);
153#endif /* NOR ECC */ 125
154#endif /* NAND */ 126#endif /* WRITEBUFFER */
155 127
156/* erase.c */ 128/* erase.c */
157static inline void jffs2_erase_pending_trigger(struct jffs2_sb_info *c) 129static inline void jffs2_erase_pending_trigger(struct jffs2_sb_info *c)
diff --git a/fs/jffs2/read.c b/fs/jffs2/read.c
index eb493dc06db7..c7f9068907cf 100644
--- a/fs/jffs2/read.c
+++ b/fs/jffs2/read.c
@@ -7,7 +7,7 @@
7 * 7 *
8 * For licensing information, see the file 'LICENCE' in this directory. 8 * For licensing information, see the file 'LICENCE' in this directory.
9 * 9 *
10 * $Id: read.c,v 1.38 2004/11/16 20:36:12 dwmw2 Exp $ 10 * $Id: read.c,v 1.39 2005/03/01 10:34:03 dedekind Exp $
11 * 11 *
12 */ 12 */
13 13
@@ -214,33 +214,3 @@ int jffs2_read_inode_range(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
214 return 0; 214 return 0;
215} 215}
216 216
217/* Core function to read symlink target. */
218char *jffs2_getlink(struct jffs2_sb_info *c, struct jffs2_inode_info *f)
219{
220 char *buf;
221 int ret;
222
223 down(&f->sem);
224
225 if (!f->metadata) {
226 printk(KERN_NOTICE "No metadata for symlink inode #%u\n", f->inocache->ino);
227 up(&f->sem);
228 return ERR_PTR(-EINVAL);
229 }
230 buf = kmalloc(f->metadata->size+1, GFP_USER);
231 if (!buf) {
232 up(&f->sem);
233 return ERR_PTR(-ENOMEM);
234 }
235 buf[f->metadata->size]=0;
236
237 ret = jffs2_read_dnode(c, f, f->metadata, buf, 0, f->metadata->size);
238
239 up(&f->sem);
240
241 if (ret) {
242 kfree(buf);
243 return ERR_PTR(ret);
244 }
245 return buf;
246}
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index aca4a0b17bcd..5b2a83599d73 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -7,7 +7,7 @@
7 * 7 *
8 * For licensing information, see the file 'LICENCE' in this directory. 8 * For licensing information, see the file 'LICENCE' in this directory.
9 * 9 *
10 * $Id: readinode.c,v 1.117 2004/11/20 18:06:54 dwmw2 Exp $ 10 * $Id: readinode.c,v 1.125 2005/07/10 13:13:55 dedekind Exp $
11 * 11 *
12 */ 12 */
13 13
@@ -151,6 +151,9 @@ int jffs2_add_full_dnode_to_inode(struct jffs2_sb_info *c, struct jffs2_inode_in
151 151
152 D1(printk(KERN_DEBUG "jffs2_add_full_dnode_to_inode(ino #%u, f %p, fn %p)\n", f->inocache->ino, f, fn)); 152 D1(printk(KERN_DEBUG "jffs2_add_full_dnode_to_inode(ino #%u, f %p, fn %p)\n", f->inocache->ino, f, fn));
153 153
154 if (unlikely(!fn->size))
155 return 0;
156
154 newfrag = jffs2_alloc_node_frag(); 157 newfrag = jffs2_alloc_node_frag();
155 if (unlikely(!newfrag)) 158 if (unlikely(!newfrag))
156 return -ENOMEM; 159 return -ENOMEM;
@@ -158,11 +161,6 @@ int jffs2_add_full_dnode_to_inode(struct jffs2_sb_info *c, struct jffs2_inode_in
158 D2(printk(KERN_DEBUG "adding node %04x-%04x @0x%08x on flash, newfrag *%p\n", 161 D2(printk(KERN_DEBUG "adding node %04x-%04x @0x%08x on flash, newfrag *%p\n",
159 fn->ofs, fn->ofs+fn->size, ref_offset(fn->raw), newfrag)); 162 fn->ofs, fn->ofs+fn->size, ref_offset(fn->raw), newfrag));
160 163
161 if (unlikely(!fn->size)) {
162 jffs2_free_node_frag(newfrag);
163 return 0;
164 }
165
166 newfrag->ofs = fn->ofs; 164 newfrag->ofs = fn->ofs;
167 newfrag->size = fn->size; 165 newfrag->size = fn->size;
168 newfrag->node = fn; 166 newfrag->node = fn;
@@ -500,7 +498,9 @@ static int jffs2_do_read_inode_internal(struct jffs2_sb_info *c,
500 struct jffs2_inode_info *f, 498 struct jffs2_inode_info *f,
501 struct jffs2_raw_inode *latest_node) 499 struct jffs2_raw_inode *latest_node)
502{ 500{
503 struct jffs2_tmp_dnode_info *tn_list, *tn; 501 struct jffs2_tmp_dnode_info *tn = NULL;
502 struct rb_root tn_list;
503 struct rb_node *rb, *repl_rb;
504 struct jffs2_full_dirent *fd_list; 504 struct jffs2_full_dirent *fd_list;
505 struct jffs2_full_dnode *fn = NULL; 505 struct jffs2_full_dnode *fn = NULL;
506 uint32_t crc; 506 uint32_t crc;
@@ -522,9 +522,10 @@ static int jffs2_do_read_inode_internal(struct jffs2_sb_info *c,
522 } 522 }
523 f->dents = fd_list; 523 f->dents = fd_list;
524 524
525 while (tn_list) { 525 rb = rb_first(&tn_list);
526 tn = tn_list;
527 526
527 while (rb) {
528 tn = rb_entry(rb, struct jffs2_tmp_dnode_info, rb);
528 fn = tn->fn; 529 fn = tn->fn;
529 530
530 if (f->metadata) { 531 if (f->metadata) {
@@ -556,7 +557,29 @@ static int jffs2_do_read_inode_internal(struct jffs2_sb_info *c,
556 mdata_ver = tn->version; 557 mdata_ver = tn->version;
557 } 558 }
558 next_tn: 559 next_tn:
559 tn_list = tn->next; 560 BUG_ON(rb->rb_left);
561 if (rb->rb_parent && rb->rb_parent->rb_left == rb) {
562 /* We were then left-hand child of our parent. We need
563 to move our own right-hand child into our place. */
564 repl_rb = rb->rb_right;
565 if (repl_rb)
566 repl_rb->rb_parent = rb->rb_parent;
567 } else
568 repl_rb = NULL;
569
570 rb = rb_next(rb);
571
572 /* Remove the spent tn from the tree; don't bother rebalancing
573 but put our right-hand child in our own place. */
574 if (tn->rb.rb_parent) {
575 if (tn->rb.rb_parent->rb_left == &tn->rb)
576 tn->rb.rb_parent->rb_left = repl_rb;
577 else if (tn->rb.rb_parent->rb_right == &tn->rb)
578 tn->rb.rb_parent->rb_right = repl_rb;
579 else BUG();
580 } else if (tn->rb.rb_right)
581 tn->rb.rb_right->rb_parent = NULL;
582
560 jffs2_free_tmp_dnode_info(tn); 583 jffs2_free_tmp_dnode_info(tn);
561 } 584 }
562 D1(jffs2_sanitycheck_fragtree(f)); 585 D1(jffs2_sanitycheck_fragtree(f));
@@ -623,6 +646,40 @@ static int jffs2_do_read_inode_internal(struct jffs2_sb_info *c,
623 case. */ 646 case. */
624 if (!je32_to_cpu(latest_node->isize)) 647 if (!je32_to_cpu(latest_node->isize))
625 latest_node->isize = latest_node->dsize; 648 latest_node->isize = latest_node->dsize;
649
650 if (f->inocache->state != INO_STATE_CHECKING) {
651 /* Symlink's inode data is the target path. Read it and
652 * keep in RAM to facilitate quick follow symlink operation.
653 * We use f->dents field to store the target path, which
654 * is somewhat ugly. */
655 f->dents = kmalloc(je32_to_cpu(latest_node->csize) + 1, GFP_KERNEL);
656 if (!f->dents) {
657 printk(KERN_WARNING "Can't allocate %d bytes of memory "
658 "for the symlink target path cache\n",
659 je32_to_cpu(latest_node->csize));
660 up(&f->sem);
661 jffs2_do_clear_inode(c, f);
662 return -ENOMEM;
663 }
664
665 ret = jffs2_flash_read(c, ref_offset(fn->raw) + sizeof(*latest_node),
666 je32_to_cpu(latest_node->csize), &retlen, (char *)f->dents);
667
668 if (ret || retlen != je32_to_cpu(latest_node->csize)) {
669 if (retlen != je32_to_cpu(latest_node->csize))
670 ret = -EIO;
671 kfree(f->dents);
672 f->dents = NULL;
673 up(&f->sem);
674 jffs2_do_clear_inode(c, f);
675 return -ret;
676 }
677
678 ((char *)f->dents)[je32_to_cpu(latest_node->csize)] = '\0';
679 D1(printk(KERN_DEBUG "jffs2_do_read_inode(): symlink's target '%s' cached\n",
680 (char *)f->dents));
681 }
682
626 /* fall through... */ 683 /* fall through... */
627 684
628 case S_IFBLK: 685 case S_IFBLK:
@@ -672,6 +729,9 @@ void jffs2_do_clear_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f)
672 down(&f->sem); 729 down(&f->sem);
673 deleted = f->inocache && !f->inocache->nlink; 730 deleted = f->inocache && !f->inocache->nlink;
674 731
732 if (f->inocache && f->inocache->state != INO_STATE_CHECKING)
733 jffs2_set_inocache_state(c, f->inocache, INO_STATE_CLEARING);
734
675 if (f->metadata) { 735 if (f->metadata) {
676 if (deleted) 736 if (deleted)
677 jffs2_mark_node_obsolete(c, f->metadata->raw); 737 jffs2_mark_node_obsolete(c, f->metadata->raw);
@@ -680,16 +740,27 @@ void jffs2_do_clear_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f)
680 740
681 jffs2_kill_fragtree(&f->fragtree, deleted?c:NULL); 741 jffs2_kill_fragtree(&f->fragtree, deleted?c:NULL);
682 742
683 fds = f->dents; 743 /* For symlink inodes we us f->dents to store the target path name */
744 if (S_ISLNK(OFNI_EDONI_2SFFJ(f)->i_mode)) {
745 if (f->dents) {
746 kfree(f->dents);
747 f->dents = NULL;
748 }
749 } else {
750 fds = f->dents;
684 751
685 while(fds) { 752 while(fds) {
686 fd = fds; 753 fd = fds;
687 fds = fd->next; 754 fds = fd->next;
688 jffs2_free_full_dirent(fd); 755 jffs2_free_full_dirent(fd);
756 }
689 } 757 }
690 758
691 if (f->inocache && f->inocache->state != INO_STATE_CHECKING) 759 if (f->inocache && f->inocache->state != INO_STATE_CHECKING) {
692 jffs2_set_inocache_state(c, f->inocache, INO_STATE_CHECKEDABSENT); 760 jffs2_set_inocache_state(c, f->inocache, INO_STATE_CHECKEDABSENT);
761 if (f->inocache->nodes == (void *)f->inocache)
762 jffs2_del_ino_cache(c, f->inocache);
763 }
693 764
694 up(&f->sem); 765 up(&f->sem);
695} 766}
diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c
index ded53584a897..b63160f83bab 100644
--- a/fs/jffs2/scan.c
+++ b/fs/jffs2/scan.c
@@ -7,7 +7,7 @@
7 * 7 *
8 * For licensing information, see the file 'LICENCE' in this directory. 8 * For licensing information, see the file 'LICENCE' in this directory.
9 * 9 *
10 * $Id: scan.c,v 1.115 2004/11/17 12:59:08 dedekind Exp $ 10 * $Id: scan.c,v 1.119 2005/02/17 17:51:13 dedekind Exp $
11 * 11 *
12 */ 12 */
13#include <linux/kernel.h> 13#include <linux/kernel.h>
@@ -19,7 +19,7 @@
19#include <linux/compiler.h> 19#include <linux/compiler.h>
20#include "nodelist.h" 20#include "nodelist.h"
21 21
22#define EMPTY_SCAN_SIZE 1024 22#define DEFAULT_EMPTY_SCAN_SIZE 1024
23 23
24#define DIRTY_SPACE(x) do { typeof(x) _x = (x); \ 24#define DIRTY_SPACE(x) do { typeof(x) _x = (x); \
25 c->free_size -= _x; c->dirty_size += _x; \ 25 c->free_size -= _x; c->dirty_size += _x; \
@@ -68,13 +68,21 @@ static int jffs2_scan_dirent_node(struct jffs2_sb_info *c, struct jffs2_eraseblo
68static inline int min_free(struct jffs2_sb_info *c) 68static inline int min_free(struct jffs2_sb_info *c)
69{ 69{
70 uint32_t min = 2 * sizeof(struct jffs2_raw_inode); 70 uint32_t min = 2 * sizeof(struct jffs2_raw_inode);
71#if defined CONFIG_JFFS2_FS_NAND || defined CONFIG_JFFS2_FS_NOR_ECC 71#ifdef CONFIG_JFFS2_FS_WRITEBUFFER
72 if (!jffs2_can_mark_obsolete(c) && min < c->wbuf_pagesize) 72 if (!jffs2_can_mark_obsolete(c) && min < c->wbuf_pagesize)
73 return c->wbuf_pagesize; 73 return c->wbuf_pagesize;
74#endif 74#endif
75 return min; 75 return min;
76 76
77} 77}
78
79static inline uint32_t EMPTY_SCAN_SIZE(uint32_t sector_size) {
80 if (sector_size < DEFAULT_EMPTY_SCAN_SIZE)
81 return sector_size;
82 else
83 return DEFAULT_EMPTY_SCAN_SIZE;
84}
85
78int jffs2_scan_medium(struct jffs2_sb_info *c) 86int jffs2_scan_medium(struct jffs2_sb_info *c)
79{ 87{
80 int i, ret; 88 int i, ret;
@@ -220,7 +228,7 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
220 c->dirty_size -= c->nextblock->dirty_size; 228 c->dirty_size -= c->nextblock->dirty_size;
221 c->nextblock->dirty_size = 0; 229 c->nextblock->dirty_size = 0;
222 } 230 }
223#if defined CONFIG_JFFS2_FS_NAND || defined CONFIG_JFFS2_FS_NOR_ECC 231#ifdef CONFIG_JFFS2_FS_WRITEBUFFER
224 if (!jffs2_can_mark_obsolete(c) && c->nextblock && (c->nextblock->free_size & (c->wbuf_pagesize-1))) { 232 if (!jffs2_can_mark_obsolete(c) && c->nextblock && (c->nextblock->free_size & (c->wbuf_pagesize-1))) {
225 /* If we're going to start writing into a block which already 233 /* If we're going to start writing into a block which already
226 contains data, and the end of the data isn't page-aligned, 234 contains data, and the end of the data isn't page-aligned,
@@ -286,7 +294,7 @@ static int jffs2_scan_eraseblock (struct jffs2_sb_info *c, struct jffs2_eraseblo
286 uint32_t hdr_crc, buf_ofs, buf_len; 294 uint32_t hdr_crc, buf_ofs, buf_len;
287 int err; 295 int err;
288 int noise = 0; 296 int noise = 0;
289#ifdef CONFIG_JFFS2_FS_NAND 297#ifdef CONFIG_JFFS2_FS_WRITEBUFFER
290 int cleanmarkerfound = 0; 298 int cleanmarkerfound = 0;
291#endif 299#endif
292 300
@@ -295,7 +303,7 @@ static int jffs2_scan_eraseblock (struct jffs2_sb_info *c, struct jffs2_eraseblo
295 303
296 D1(printk(KERN_DEBUG "jffs2_scan_eraseblock(): Scanning block at 0x%x\n", ofs)); 304 D1(printk(KERN_DEBUG "jffs2_scan_eraseblock(): Scanning block at 0x%x\n", ofs));
297 305
298#ifdef CONFIG_JFFS2_FS_NAND 306#ifdef CONFIG_JFFS2_FS_WRITEBUFFER
299 if (jffs2_cleanmarker_oob(c)) { 307 if (jffs2_cleanmarker_oob(c)) {
300 int ret = jffs2_check_nand_cleanmarker(c, jeb); 308 int ret = jffs2_check_nand_cleanmarker(c, jeb);
301 D2(printk(KERN_NOTICE "jffs_check_nand_cleanmarker returned %d\n",ret)); 309 D2(printk(KERN_NOTICE "jffs_check_nand_cleanmarker returned %d\n",ret));
@@ -316,7 +324,7 @@ static int jffs2_scan_eraseblock (struct jffs2_sb_info *c, struct jffs2_eraseblo
316 if (!buf_size) { 324 if (!buf_size) {
317 buf_len = c->sector_size; 325 buf_len = c->sector_size;
318 } else { 326 } else {
319 buf_len = EMPTY_SCAN_SIZE; 327 buf_len = EMPTY_SCAN_SIZE(c->sector_size);
320 err = jffs2_fill_scan_buf(c, buf, buf_ofs, buf_len); 328 err = jffs2_fill_scan_buf(c, buf, buf_ofs, buf_len);
321 if (err) 329 if (err)
322 return err; 330 return err;
@@ -326,11 +334,11 @@ static int jffs2_scan_eraseblock (struct jffs2_sb_info *c, struct jffs2_eraseblo
326 ofs = 0; 334 ofs = 0;
327 335
328 /* Scan only 4KiB of 0xFF before declaring it's empty */ 336 /* Scan only 4KiB of 0xFF before declaring it's empty */
329 while(ofs < EMPTY_SCAN_SIZE && *(uint32_t *)(&buf[ofs]) == 0xFFFFFFFF) 337 while(ofs < EMPTY_SCAN_SIZE(c->sector_size) && *(uint32_t *)(&buf[ofs]) == 0xFFFFFFFF)
330 ofs += 4; 338 ofs += 4;
331 339
332 if (ofs == EMPTY_SCAN_SIZE) { 340 if (ofs == EMPTY_SCAN_SIZE(c->sector_size)) {
333#ifdef CONFIG_JFFS2_FS_NAND 341#ifdef CONFIG_JFFS2_FS_WRITEBUFFER
334 if (jffs2_cleanmarker_oob(c)) { 342 if (jffs2_cleanmarker_oob(c)) {
335 /* scan oob, take care of cleanmarker */ 343 /* scan oob, take care of cleanmarker */
336 int ret = jffs2_check_oob_empty(c, jeb, cleanmarkerfound); 344 int ret = jffs2_check_oob_empty(c, jeb, cleanmarkerfound);
@@ -343,7 +351,10 @@ static int jffs2_scan_eraseblock (struct jffs2_sb_info *c, struct jffs2_eraseblo
343 } 351 }
344#endif 352#endif
345 D1(printk(KERN_DEBUG "Block at 0x%08x is empty (erased)\n", jeb->offset)); 353 D1(printk(KERN_DEBUG "Block at 0x%08x is empty (erased)\n", jeb->offset));
346 return BLK_STATE_ALLFF; /* OK to erase if all blocks are like this */ 354 if (c->cleanmarker_size == 0)
355 return BLK_STATE_CLEANMARKER; /* don't bother with re-erase */
356 else
357 return BLK_STATE_ALLFF; /* OK to erase if all blocks are like this */
347 } 358 }
348 if (ofs) { 359 if (ofs) {
349 D1(printk(KERN_DEBUG "Free space at %08x ends at %08x\n", jeb->offset, 360 D1(printk(KERN_DEBUG "Free space at %08x ends at %08x\n", jeb->offset,
@@ -422,8 +433,8 @@ scan_more:
422 /* If we're only checking the beginning of a block with a cleanmarker, 433 /* If we're only checking the beginning of a block with a cleanmarker,
423 bail now */ 434 bail now */
424 if (buf_ofs == jeb->offset && jeb->used_size == PAD(c->cleanmarker_size) && 435 if (buf_ofs == jeb->offset && jeb->used_size == PAD(c->cleanmarker_size) &&
425 c->cleanmarker_size && !jeb->dirty_size && !jeb->first_node->next_in_ino) { 436 c->cleanmarker_size && !jeb->dirty_size && !jeb->first_node->next_phys) {
426 D1(printk(KERN_DEBUG "%d bytes at start of block seems clean... assuming all clean\n", EMPTY_SCAN_SIZE)); 437 D1(printk(KERN_DEBUG "%d bytes at start of block seems clean... assuming all clean\n", EMPTY_SCAN_SIZE(c->sector_size)));
427 return BLK_STATE_CLEANMARKER; 438 return BLK_STATE_CLEANMARKER;
428 } 439 }
429 440
@@ -618,7 +629,7 @@ scan_more:
618 } 629 }
619 630
620 if ((jeb->used_size + jeb->unchecked_size) == PAD(c->cleanmarker_size) && !jeb->dirty_size 631 if ((jeb->used_size + jeb->unchecked_size) == PAD(c->cleanmarker_size) && !jeb->dirty_size
621 && (!jeb->first_node || !jeb->first_node->next_in_ino) ) 632 && (!jeb->first_node || !jeb->first_node->next_phys) )
622 return BLK_STATE_CLEANMARKER; 633 return BLK_STATE_CLEANMARKER;
623 634
624 /* move blocks with max 4 byte dirty space to cleanlist */ 635 /* move blocks with max 4 byte dirty space to cleanlist */
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 6b2a441d2766..aaf9475cfb6a 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -7,7 +7,7 @@
7 * 7 *
8 * For licensing information, see the file 'LICENCE' in this directory. 8 * For licensing information, see the file 'LICENCE' in this directory.
9 * 9 *
10 * $Id: super.c,v 1.104 2004/11/23 15:37:31 gleixner Exp $ 10 * $Id: super.c,v 1.107 2005/07/12 16:37:08 dedekind Exp $
11 * 11 *
12 */ 12 */
13 13
@@ -140,6 +140,15 @@ static struct super_block *jffs2_get_sb_mtd(struct file_system_type *fs_type,
140 D1(printk(KERN_DEBUG "jffs2_get_sb_mtd(): New superblock for device %d (\"%s\")\n", 140 D1(printk(KERN_DEBUG "jffs2_get_sb_mtd(): New superblock for device %d (\"%s\")\n",
141 mtd->index, mtd->name)); 141 mtd->index, mtd->name));
142 142
143 /* Initialize JFFS2 superblock locks, the further initialization will be
144 * done later */
145 init_MUTEX(&c->alloc_sem);
146 init_MUTEX(&c->erase_free_sem);
147 init_waitqueue_head(&c->erase_wait);
148 init_waitqueue_head(&c->inocache_wq);
149 spin_lock_init(&c->erase_completion_lock);
150 spin_lock_init(&c->inocache_lock);
151
143 sb->s_op = &jffs2_super_operations; 152 sb->s_op = &jffs2_super_operations;
144 sb->s_flags = flags | MS_NOATIME; 153 sb->s_flags = flags | MS_NOATIME;
145 154
@@ -270,8 +279,6 @@ static void jffs2_put_super (struct super_block *sb)
270 279
271 D2(printk(KERN_DEBUG "jffs2: jffs2_put_super()\n")); 280 D2(printk(KERN_DEBUG "jffs2: jffs2_put_super()\n"));
272 281
273 if (!(sb->s_flags & MS_RDONLY))
274 jffs2_stop_garbage_collect_thread(c);
275 down(&c->alloc_sem); 282 down(&c->alloc_sem);
276 jffs2_flush_wbuf_pad(c); 283 jffs2_flush_wbuf_pad(c);
277 up(&c->alloc_sem); 284 up(&c->alloc_sem);
@@ -292,6 +299,8 @@ static void jffs2_put_super (struct super_block *sb)
292static void jffs2_kill_sb(struct super_block *sb) 299static void jffs2_kill_sb(struct super_block *sb)
293{ 300{
294 struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); 301 struct jffs2_sb_info *c = JFFS2_SB_INFO(sb);
302 if (!(sb->s_flags & MS_RDONLY))
303 jffs2_stop_garbage_collect_thread(c);
295 generic_shutdown_super(sb); 304 generic_shutdown_super(sb);
296 put_mtd_device(c->mtd); 305 put_mtd_device(c->mtd);
297 kfree(c); 306 kfree(c);
@@ -309,7 +318,7 @@ static int __init init_jffs2_fs(void)
309 int ret; 318 int ret;
310 319
311 printk(KERN_INFO "JFFS2 version 2.2." 320 printk(KERN_INFO "JFFS2 version 2.2."
312#ifdef CONFIG_JFFS2_FS_NAND 321#ifdef CONFIG_JFFS2_FS_WRITEBUFFER
313 " (NAND)" 322 " (NAND)"
314#endif 323#endif
315 " (C) 2001-2003 Red Hat, Inc.\n"); 324 " (C) 2001-2003 Red Hat, Inc.\n");
diff --git a/fs/jffs2/symlink.c b/fs/jffs2/symlink.c
index 7b1820d13712..82ef484f5e12 100644
--- a/fs/jffs2/symlink.c
+++ b/fs/jffs2/symlink.c
@@ -7,7 +7,7 @@
7 * 7 *
8 * For licensing information, see the file 'LICENCE' in this directory. 8 * For licensing information, see the file 'LICENCE' in this directory.
9 * 9 *
10 * $Id: symlink.c,v 1.14 2004/11/16 20:36:12 dwmw2 Exp $ 10 * $Id: symlink.c,v 1.16 2005/03/01 10:50:48 dedekind Exp $
11 * 11 *
12 */ 12 */
13 13
@@ -18,28 +18,48 @@
18#include <linux/namei.h> 18#include <linux/namei.h>
19#include "nodelist.h" 19#include "nodelist.h"
20 20
21static int jffs2_follow_link(struct dentry *dentry, struct nameidata *nd); 21static void *jffs2_follow_link(struct dentry *dentry, struct nameidata *nd);
22static void jffs2_put_link(struct dentry *dentry, struct nameidata *nd);
23 22
24struct inode_operations jffs2_symlink_inode_operations = 23struct inode_operations jffs2_symlink_inode_operations =
25{ 24{
26 .readlink = generic_readlink, 25 .readlink = generic_readlink,
27 .follow_link = jffs2_follow_link, 26 .follow_link = jffs2_follow_link,
28 .put_link = jffs2_put_link,
29 .setattr = jffs2_setattr 27 .setattr = jffs2_setattr
30}; 28};
31 29
32static int jffs2_follow_link(struct dentry *dentry, struct nameidata *nd) 30static void *jffs2_follow_link(struct dentry *dentry, struct nameidata *nd)
33{ 31{
34 unsigned char *buf; 32 struct jffs2_inode_info *f = JFFS2_INODE_INFO(dentry->d_inode);
35 buf = jffs2_getlink(JFFS2_SB_INFO(dentry->d_inode->i_sb), JFFS2_INODE_INFO(dentry->d_inode)); 33 char *p = (char *)f->dents;
36 nd_set_link(nd, buf); 34
37 return 0; 35 /*
38} 36 * We don't acquire the f->sem mutex here since the only data we
37 * use is f->dents which in case of the symlink inode points to the
38 * symlink's target path.
39 *
40 * 1. If we are here the inode has already built and f->dents has
41 * to point to the target path.
42 * 2. Nobody uses f->dents (if the inode is symlink's inode). The
43 * exception is inode freeing function which frees f->dents. But
44 * it can't be called while we are here and before VFS has
45 * stopped using our f->dents string which we provide by means of
46 * nd_set_link() call.
47 */
48
49 if (!p) {
50 printk(KERN_ERR "jffs2_follow_link(): can't find symlink taerget\n");
51 p = ERR_PTR(-EIO);
52 } else {
53 D1(printk(KERN_DEBUG "jffs2_follow_link(): target path is '%s'\n", (char *) f->dents));
54 }
39 55
40static void jffs2_put_link(struct dentry *dentry, struct nameidata *nd) 56 nd_set_link(nd, p);
41{ 57
42 char *s = nd_get_link(nd); 58 /*
43 if (!IS_ERR(s)) 59 * We unlock the f->sem mutex but VFS will use the f->dents string. This is safe
44 kfree(s); 60 * since the only way that may cause f->dents to be changed is iput() operation.
61 * But VFS will not use f->dents after iput() has been called.
62 */
63 return NULL;
45} 64}
65
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index c8128069ecf0..996d922e503e 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -9,7 +9,7 @@
9 * 9 *
10 * For licensing information, see the file 'LICENCE' in this directory. 10 * For licensing information, see the file 'LICENCE' in this directory.
11 * 11 *
12 * $Id: wbuf.c,v 1.82 2004/11/20 22:08:31 dwmw2 Exp $ 12 * $Id: wbuf.c,v 1.92 2005/04/05 12:51:54 dedekind Exp $
13 * 13 *
14 */ 14 */
15 15
@@ -83,7 +83,7 @@ static void jffs2_wbuf_dirties_inode(struct jffs2_sb_info *c, uint32_t ino)
83 struct jffs2_inodirty *new; 83 struct jffs2_inodirty *new;
84 84
85 /* Mark the superblock dirty so that kupdated will flush... */ 85 /* Mark the superblock dirty so that kupdated will flush... */
86 OFNI_BS_2SFFJ(c)->s_dirt = 1; 86 jffs2_erase_pending_trigger(c);
87 87
88 if (jffs2_wbuf_pending_for_ino(c, ino)) 88 if (jffs2_wbuf_pending_for_ino(c, ino))
89 return; 89 return;
@@ -130,7 +130,10 @@ static inline void jffs2_refile_wbuf_blocks(struct jffs2_sb_info *c)
130 } 130 }
131} 131}
132 132
133static void jffs2_block_refile(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb) 133#define REFILE_NOTEMPTY 0
134#define REFILE_ANYWAY 1
135
136static void jffs2_block_refile(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, int allow_empty)
134{ 137{
135 D1(printk("About to refile bad block at %08x\n", jeb->offset)); 138 D1(printk("About to refile bad block at %08x\n", jeb->offset));
136 139
@@ -144,7 +147,7 @@ static void jffs2_block_refile(struct jffs2_sb_info *c, struct jffs2_eraseblock
144 D1(printk("Refiling block at %08x to bad_used_list\n", jeb->offset)); 147 D1(printk("Refiling block at %08x to bad_used_list\n", jeb->offset));
145 list_add(&jeb->list, &c->bad_used_list); 148 list_add(&jeb->list, &c->bad_used_list);
146 } else { 149 } else {
147 BUG(); 150 BUG_ON(allow_empty == REFILE_NOTEMPTY);
148 /* It has to have had some nodes or we couldn't be here */ 151 /* It has to have had some nodes or we couldn't be here */
149 D1(printk("Refiling block at %08x to erase_pending_list\n", jeb->offset)); 152 D1(printk("Refiling block at %08x to erase_pending_list\n", jeb->offset));
150 list_add(&jeb->list, &c->erase_pending_list); 153 list_add(&jeb->list, &c->erase_pending_list);
@@ -179,7 +182,7 @@ static void jffs2_wbuf_recover(struct jffs2_sb_info *c)
179 182
180 jeb = &c->blocks[c->wbuf_ofs / c->sector_size]; 183 jeb = &c->blocks[c->wbuf_ofs / c->sector_size];
181 184
182 jffs2_block_refile(c, jeb); 185 jffs2_block_refile(c, jeb, REFILE_NOTEMPTY);
183 186
184 /* Find the first node to be recovered, by skipping over every 187 /* Find the first node to be recovered, by skipping over every
185 node which ends before the wbuf starts, or which is obsolete. */ 188 node which ends before the wbuf starts, or which is obsolete. */
@@ -264,17 +267,16 @@ static void jffs2_wbuf_recover(struct jffs2_sb_info *c)
264 ret = jffs2_reserve_space_gc(c, end-start, &ofs, &len); 267 ret = jffs2_reserve_space_gc(c, end-start, &ofs, &len);
265 if (ret) { 268 if (ret) {
266 printk(KERN_WARNING "Failed to allocate space for wbuf recovery. Data loss ensues.\n"); 269 printk(KERN_WARNING "Failed to allocate space for wbuf recovery. Data loss ensues.\n");
267 if (buf) 270 kfree(buf);
268 kfree(buf);
269 return; 271 return;
270 } 272 }
271 if (end-start >= c->wbuf_pagesize) { 273 if (end-start >= c->wbuf_pagesize) {
272 /* Need to do another write immediately. This, btw, 274 /* Need to do another write immediately, but it's possible
273 means that we'll be writing from 'buf' and not from 275 that this is just because the wbuf itself is completely
274 the wbuf. Since if we're writing from the wbuf there 276 full, and there's nothing earlier read back from the
275 won't be more than a wbuf full of data, now will 277 flash. Hence 'buf' isn't necessarily what we're writing
276 there? :) */ 278 from. */
277 279 unsigned char *rewrite_buf = buf?:c->wbuf;
278 uint32_t towrite = (end-start) - ((end-start)%c->wbuf_pagesize); 280 uint32_t towrite = (end-start) - ((end-start)%c->wbuf_pagesize);
279 281
280 D1(printk(KERN_DEBUG "Write 0x%x bytes at 0x%08x in wbuf recover\n", 282 D1(printk(KERN_DEBUG "Write 0x%x bytes at 0x%08x in wbuf recover\n",
@@ -292,9 +294,9 @@ static void jffs2_wbuf_recover(struct jffs2_sb_info *c)
292#endif 294#endif
293 if (jffs2_cleanmarker_oob(c)) 295 if (jffs2_cleanmarker_oob(c))
294 ret = c->mtd->write_ecc(c->mtd, ofs, towrite, &retlen, 296 ret = c->mtd->write_ecc(c->mtd, ofs, towrite, &retlen,
295 buf, NULL, c->oobinfo); 297 rewrite_buf, NULL, c->oobinfo);
296 else 298 else
297 ret = c->mtd->write(c->mtd, ofs, towrite, &retlen, buf); 299 ret = c->mtd->write(c->mtd, ofs, towrite, &retlen, rewrite_buf);
298 300
299 if (ret || retlen != towrite) { 301 if (ret || retlen != towrite) {
300 /* Argh. We tried. Really we did. */ 302 /* Argh. We tried. Really we did. */
@@ -321,10 +323,10 @@ static void jffs2_wbuf_recover(struct jffs2_sb_info *c)
321 323
322 c->wbuf_len = (end - start) - towrite; 324 c->wbuf_len = (end - start) - towrite;
323 c->wbuf_ofs = ofs + towrite; 325 c->wbuf_ofs = ofs + towrite;
324 memcpy(c->wbuf, buf + towrite, c->wbuf_len); 326 memmove(c->wbuf, rewrite_buf + towrite, c->wbuf_len);
325 /* Don't muck about with c->wbuf_inodes. False positives are harmless. */ 327 /* Don't muck about with c->wbuf_inodes. False positives are harmless. */
326 328 if (buf)
327 kfree(buf); 329 kfree(buf);
328 } else { 330 } else {
329 /* OK, now we're left with the dregs in whichever buffer we're using */ 331 /* OK, now we're left with the dregs in whichever buffer we're using */
330 if (buf) { 332 if (buf) {
@@ -413,9 +415,9 @@ static int __jffs2_flush_wbuf(struct jffs2_sb_info *c, int pad)
413 int ret; 415 int ret;
414 size_t retlen; 416 size_t retlen;
415 417
416 /* Nothing to do if not NAND flash. In particular, we shouldn't 418 /* Nothing to do if not write-buffering the flash. In particular, we shouldn't
417 del_timer() the timer we never initialised. */ 419 del_timer() the timer we never initialised. */
418 if (jffs2_can_mark_obsolete(c)) 420 if (!jffs2_is_writebuffered(c))
419 return 0; 421 return 0;
420 422
421 if (!down_trylock(&c->alloc_sem)) { 423 if (!down_trylock(&c->alloc_sem)) {
@@ -424,7 +426,7 @@ static int __jffs2_flush_wbuf(struct jffs2_sb_info *c, int pad)
424 BUG(); 426 BUG();
425 } 427 }
426 428
427 if(!c->wbuf || !c->wbuf_len) 429 if (!c->wbuf_len) /* already checked c->wbuf above */
428 return 0; 430 return 0;
429 431
430 /* claim remaining space on the page 432 /* claim remaining space on the page
@@ -433,7 +435,7 @@ static int __jffs2_flush_wbuf(struct jffs2_sb_info *c, int pad)
433 if we have a switch to next page, we will not have 435 if we have a switch to next page, we will not have
434 enough remaining space for this. 436 enough remaining space for this.
435 */ 437 */
436 if (pad) { 438 if (pad && !jffs2_dataflash(c)) {
437 c->wbuf_len = PAD(c->wbuf_len); 439 c->wbuf_len = PAD(c->wbuf_len);
438 440
439 /* Pad with JFFS2_DIRTY_BITMASK initially. this helps out ECC'd NOR 441 /* Pad with JFFS2_DIRTY_BITMASK initially. this helps out ECC'd NOR
@@ -484,7 +486,7 @@ static int __jffs2_flush_wbuf(struct jffs2_sb_info *c, int pad)
484 spin_lock(&c->erase_completion_lock); 486 spin_lock(&c->erase_completion_lock);
485 487
486 /* Adjust free size of the block if we padded. */ 488 /* Adjust free size of the block if we padded. */
487 if (pad) { 489 if (pad && !jffs2_dataflash(c)) {
488 struct jffs2_eraseblock *jeb; 490 struct jffs2_eraseblock *jeb;
489 491
490 jeb = &c->blocks[c->wbuf_ofs / c->sector_size]; 492 jeb = &c->blocks[c->wbuf_ofs / c->sector_size];
@@ -532,6 +534,9 @@ int jffs2_flush_wbuf_gc(struct jffs2_sb_info *c, uint32_t ino)
532 534
533 D1(printk(KERN_DEBUG "jffs2_flush_wbuf_gc() called for ino #%u...\n", ino)); 535 D1(printk(KERN_DEBUG "jffs2_flush_wbuf_gc() called for ino #%u...\n", ino));
534 536
537 if (!c->wbuf)
538 return 0;
539
535 down(&c->alloc_sem); 540 down(&c->alloc_sem);
536 if (!jffs2_wbuf_pending_for_ino(c, ino)) { 541 if (!jffs2_wbuf_pending_for_ino(c, ino)) {
537 D1(printk(KERN_DEBUG "Ino #%d not pending in wbuf. Returning\n", ino)); 542 D1(printk(KERN_DEBUG "Ino #%d not pending in wbuf. Returning\n", ino));
@@ -547,6 +552,10 @@ int jffs2_flush_wbuf_gc(struct jffs2_sb_info *c, uint32_t ino)
547 D1(printk(KERN_DEBUG "jffs2_flush_wbuf_gc() padding. Not finished checking\n")); 552 D1(printk(KERN_DEBUG "jffs2_flush_wbuf_gc() padding. Not finished checking\n"));
548 down_write(&c->wbuf_sem); 553 down_write(&c->wbuf_sem);
549 ret = __jffs2_flush_wbuf(c, PAD_ACCOUNTING); 554 ret = __jffs2_flush_wbuf(c, PAD_ACCOUNTING);
555 /* retry flushing wbuf in case jffs2_wbuf_recover
556 left some data in the wbuf */
557 if (ret)
558 ret = __jffs2_flush_wbuf(c, PAD_ACCOUNTING);
550 up_write(&c->wbuf_sem); 559 up_write(&c->wbuf_sem);
551 } else while (old_wbuf_len && 560 } else while (old_wbuf_len &&
552 old_wbuf_ofs == c->wbuf_ofs) { 561 old_wbuf_ofs == c->wbuf_ofs) {
@@ -561,6 +570,10 @@ int jffs2_flush_wbuf_gc(struct jffs2_sb_info *c, uint32_t ino)
561 down(&c->alloc_sem); 570 down(&c->alloc_sem);
562 down_write(&c->wbuf_sem); 571 down_write(&c->wbuf_sem);
563 ret = __jffs2_flush_wbuf(c, PAD_ACCOUNTING); 572 ret = __jffs2_flush_wbuf(c, PAD_ACCOUNTING);
573 /* retry flushing wbuf in case jffs2_wbuf_recover
574 left some data in the wbuf */
575 if (ret)
576 ret = __jffs2_flush_wbuf(c, PAD_ACCOUNTING);
564 up_write(&c->wbuf_sem); 577 up_write(&c->wbuf_sem);
565 break; 578 break;
566 } 579 }
@@ -578,15 +591,27 @@ int jffs2_flush_wbuf_pad(struct jffs2_sb_info *c)
578{ 591{
579 int ret; 592 int ret;
580 593
594 if (!c->wbuf)
595 return 0;
596
581 down_write(&c->wbuf_sem); 597 down_write(&c->wbuf_sem);
582 ret = __jffs2_flush_wbuf(c, PAD_NOACCOUNT); 598 ret = __jffs2_flush_wbuf(c, PAD_NOACCOUNT);
599 /* retry - maybe wbuf recover left some data in wbuf. */
600 if (ret)
601 ret = __jffs2_flush_wbuf(c, PAD_NOACCOUNT);
583 up_write(&c->wbuf_sem); 602 up_write(&c->wbuf_sem);
584 603
585 return ret; 604 return ret;
586} 605}
587 606
607#ifdef CONFIG_JFFS2_FS_WRITEBUFFER
608#define PAGE_DIV(x) ( ((unsigned long)(x) / (unsigned long)(c->wbuf_pagesize)) * (unsigned long)(c->wbuf_pagesize) )
609#define PAGE_MOD(x) ( (unsigned long)(x) % (unsigned long)(c->wbuf_pagesize) )
610#else
588#define PAGE_DIV(x) ( (x) & (~(c->wbuf_pagesize - 1)) ) 611#define PAGE_DIV(x) ( (x) & (~(c->wbuf_pagesize - 1)) )
589#define PAGE_MOD(x) ( (x) & (c->wbuf_pagesize - 1) ) 612#define PAGE_MOD(x) ( (x) & (c->wbuf_pagesize - 1) )
613#endif
614
590int jffs2_flash_writev(struct jffs2_sb_info *c, const struct kvec *invecs, unsigned long count, loff_t to, size_t *retlen, uint32_t ino) 615int jffs2_flash_writev(struct jffs2_sb_info *c, const struct kvec *invecs, unsigned long count, loff_t to, size_t *retlen, uint32_t ino)
591{ 616{
592 struct kvec outvecs[3]; 617 struct kvec outvecs[3];
@@ -601,7 +626,7 @@ int jffs2_flash_writev(struct jffs2_sb_info *c, const struct kvec *invecs, unsig
601 uint32_t outvec_to = to; 626 uint32_t outvec_to = to;
602 627
603 /* If not NAND flash, don't bother */ 628 /* If not NAND flash, don't bother */
604 if (!c->wbuf) 629 if (!jffs2_is_writebuffered(c))
605 return jffs2_flash_direct_writev(c, invecs, count, to, retlen); 630 return jffs2_flash_direct_writev(c, invecs, count, to, retlen);
606 631
607 down_write(&c->wbuf_sem); 632 down_write(&c->wbuf_sem);
@@ -630,7 +655,7 @@ int jffs2_flash_writev(struct jffs2_sb_info *c, const struct kvec *invecs, unsig
630 erase block. Anything else, and you die. 655 erase block. Anything else, and you die.
631 New block starts at xxx000c (0-b = block header) 656 New block starts at xxx000c (0-b = block header)
632 */ 657 */
633 if ( (to & ~(c->sector_size-1)) != (c->wbuf_ofs & ~(c->sector_size-1)) ) { 658 if (SECTOR_ADDR(to) != SECTOR_ADDR(c->wbuf_ofs)) {
634 /* It's a write to a new block */ 659 /* It's a write to a new block */
635 if (c->wbuf_len) { 660 if (c->wbuf_len) {
636 D1(printk(KERN_DEBUG "jffs2_flash_writev() to 0x%lx causes flush of wbuf at 0x%08x\n", (unsigned long)to, c->wbuf_ofs)); 661 D1(printk(KERN_DEBUG "jffs2_flash_writev() to 0x%lx causes flush of wbuf at 0x%08x\n", (unsigned long)to, c->wbuf_ofs));
@@ -762,9 +787,18 @@ int jffs2_flash_writev(struct jffs2_sb_info *c, const struct kvec *invecs, unsig
762 787
763 if (ret < 0 || wbuf_retlen != PAGE_DIV(totlen)) { 788 if (ret < 0 || wbuf_retlen != PAGE_DIV(totlen)) {
764 /* At this point we have no problem, 789 /* At this point we have no problem,
765 c->wbuf is empty. 790 c->wbuf is empty. However refile nextblock to avoid
791 writing again to same address.
766 */ 792 */
767 *retlen = donelen; 793 struct jffs2_eraseblock *jeb;
794
795 spin_lock(&c->erase_completion_lock);
796
797 jeb = &c->blocks[outvec_to / c->sector_size];
798 jffs2_block_refile(c, jeb, REFILE_ANYWAY);
799
800 *retlen = 0;
801 spin_unlock(&c->erase_completion_lock);
768 goto exit; 802 goto exit;
769 } 803 }
770 804
@@ -819,7 +853,7 @@ int jffs2_flash_write(struct jffs2_sb_info *c, loff_t ofs, size_t len, size_t *r
819{ 853{
820 struct kvec vecs[1]; 854 struct kvec vecs[1];
821 855
822 if (jffs2_can_mark_obsolete(c)) 856 if (!jffs2_is_writebuffered(c))
823 return c->mtd->write(c->mtd, ofs, len, retlen, buf); 857 return c->mtd->write(c->mtd, ofs, len, retlen, buf);
824 858
825 vecs[0].iov_base = (unsigned char *) buf; 859 vecs[0].iov_base = (unsigned char *) buf;
@@ -835,39 +869,38 @@ int jffs2_flash_read(struct jffs2_sb_info *c, loff_t ofs, size_t len, size_t *re
835 loff_t orbf = 0, owbf = 0, lwbf = 0; 869 loff_t orbf = 0, owbf = 0, lwbf = 0;
836 int ret; 870 int ret;
837 871
838 /* Read flash */ 872 if (!jffs2_is_writebuffered(c))
839 if (!jffs2_can_mark_obsolete(c)) {
840 down_read(&c->wbuf_sem);
841
842 if (jffs2_cleanmarker_oob(c))
843 ret = c->mtd->read_ecc(c->mtd, ofs, len, retlen, buf, NULL, c->oobinfo);
844 else
845 ret = c->mtd->read(c->mtd, ofs, len, retlen, buf);
846
847 if ( (ret == -EBADMSG) && (*retlen == len) ) {
848 printk(KERN_WARNING "mtd->read(0x%zx bytes from 0x%llx) returned ECC error\n",
849 len, ofs);
850 /*
851 * We have the raw data without ECC correction in the buffer, maybe
852 * we are lucky and all data or parts are correct. We check the node.
853 * If data are corrupted node check will sort it out.
854 * We keep this block, it will fail on write or erase and the we
855 * mark it bad. Or should we do that now? But we should give him a chance.
856 * Maybe we had a system crash or power loss before the ecc write or
857 * a erase was completed.
858 * So we return success. :)
859 */
860 ret = 0;
861 }
862 } else
863 return c->mtd->read(c->mtd, ofs, len, retlen, buf); 873 return c->mtd->read(c->mtd, ofs, len, retlen, buf);
864 874
875 /* Read flash */
876 down_read(&c->wbuf_sem);
877 if (jffs2_cleanmarker_oob(c))
878 ret = c->mtd->read_ecc(c->mtd, ofs, len, retlen, buf, NULL, c->oobinfo);
879 else
880 ret = c->mtd->read(c->mtd, ofs, len, retlen, buf);
881
882 if ( (ret == -EBADMSG) && (*retlen == len) ) {
883 printk(KERN_WARNING "mtd->read(0x%zx bytes from 0x%llx) returned ECC error\n",
884 len, ofs);
885 /*
886 * We have the raw data without ECC correction in the buffer, maybe
887 * we are lucky and all data or parts are correct. We check the node.
888 * If data are corrupted node check will sort it out.
889 * We keep this block, it will fail on write or erase and the we
890 * mark it bad. Or should we do that now? But we should give him a chance.
891 * Maybe we had a system crash or power loss before the ecc write or
892 * a erase was completed.
893 * So we return success. :)
894 */
895 ret = 0;
896 }
897
865 /* if no writebuffer available or write buffer empty, return */ 898 /* if no writebuffer available or write buffer empty, return */
866 if (!c->wbuf_pagesize || !c->wbuf_len) 899 if (!c->wbuf_pagesize || !c->wbuf_len)
867 goto exit; 900 goto exit;
868 901
869 /* if we read in a different block, return */ 902 /* if we read in a different block, return */
870 if ( (ofs & ~(c->sector_size-1)) != (c->wbuf_ofs & ~(c->sector_size-1)) ) 903 if (SECTOR_ADDR(ofs) != SECTOR_ADDR(c->wbuf_ofs))
871 goto exit; 904 goto exit;
872 905
873 if (ofs >= c->wbuf_ofs) { 906 if (ofs >= c->wbuf_ofs) {
@@ -1161,7 +1194,27 @@ void jffs2_nand_flash_cleanup(struct jffs2_sb_info *c)
1161 kfree(c->wbuf); 1194 kfree(c->wbuf);
1162} 1195}
1163 1196
1164#ifdef CONFIG_JFFS2_FS_NOR_ECC 1197int jffs2_dataflash_setup(struct jffs2_sb_info *c) {
1198 c->cleanmarker_size = 0; /* No cleanmarkers needed */
1199
1200 /* Initialize write buffer */
1201 init_rwsem(&c->wbuf_sem);
1202 c->wbuf_pagesize = c->sector_size;
1203 c->wbuf_ofs = 0xFFFFFFFF;
1204
1205 c->wbuf = kmalloc(c->wbuf_pagesize, GFP_KERNEL);
1206 if (!c->wbuf)
1207 return -ENOMEM;
1208
1209 printk(KERN_INFO "JFFS2 write-buffering enabled (%i)\n", c->wbuf_pagesize);
1210
1211 return 0;
1212}
1213
1214void jffs2_dataflash_cleanup(struct jffs2_sb_info *c) {
1215 kfree(c->wbuf);
1216}
1217
1165int jffs2_nor_ecc_flash_setup(struct jffs2_sb_info *c) { 1218int jffs2_nor_ecc_flash_setup(struct jffs2_sb_info *c) {
1166 /* Cleanmarker is actually larger on the flashes */ 1219 /* Cleanmarker is actually larger on the flashes */
1167 c->cleanmarker_size = 16; 1220 c->cleanmarker_size = 16;
@@ -1181,4 +1234,3 @@ int jffs2_nor_ecc_flash_setup(struct jffs2_sb_info *c) {
1181void jffs2_nor_ecc_flash_cleanup(struct jffs2_sb_info *c) { 1234void jffs2_nor_ecc_flash_cleanup(struct jffs2_sb_info *c) {
1182 kfree(c->wbuf); 1235 kfree(c->wbuf);
1183} 1236}
1184#endif
diff --git a/fs/jffs2/write.c b/fs/jffs2/write.c
index 80a5db542629..69100615d9ae 100644
--- a/fs/jffs2/write.c
+++ b/fs/jffs2/write.c
@@ -7,7 +7,7 @@
7 * 7 *
8 * For licensing information, see the file 'LICENCE' in this directory. 8 * For licensing information, see the file 'LICENCE' in this directory.
9 * 9 *
10 * $Id: write.c,v 1.87 2004/11/16 20:36:12 dwmw2 Exp $ 10 * $Id: write.c,v 1.92 2005/04/13 13:22:35 dwmw2 Exp $
11 * 11 *
12 */ 12 */
13 13
@@ -35,13 +35,12 @@ int jffs2_do_new_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f, uint
35 f->inocache = ic; 35 f->inocache = ic;
36 f->inocache->nlink = 1; 36 f->inocache->nlink = 1;
37 f->inocache->nodes = (struct jffs2_raw_node_ref *)f->inocache; 37 f->inocache->nodes = (struct jffs2_raw_node_ref *)f->inocache;
38 f->inocache->ino = ++c->highest_ino;
39 f->inocache->state = INO_STATE_PRESENT; 38 f->inocache->state = INO_STATE_PRESENT;
40 39
41 ri->ino = cpu_to_je32(f->inocache->ino);
42 40
43 D1(printk(KERN_DEBUG "jffs2_do_new_inode(): Assigned ino# %d\n", f->inocache->ino));
44 jffs2_add_ino_cache(c, f->inocache); 41 jffs2_add_ino_cache(c, f->inocache);
42 D1(printk(KERN_DEBUG "jffs2_do_new_inode(): Assigned ino# %d\n", f->inocache->ino));
43 ri->ino = cpu_to_je32(f->inocache->ino);
45 44
46 ri->magic = cpu_to_je16(JFFS2_MAGIC_BITMASK); 45 ri->magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
47 ri->nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE); 46 ri->nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
@@ -136,6 +135,15 @@ struct jffs2_full_dnode *jffs2_write_dnode(struct jffs2_sb_info *c, struct jffs2
136 raw->__totlen = PAD(sizeof(*ri)+datalen); 135 raw->__totlen = PAD(sizeof(*ri)+datalen);
137 raw->next_phys = NULL; 136 raw->next_phys = NULL;
138 137
138 if ((alloc_mode!=ALLOC_GC) && (je32_to_cpu(ri->version) < f->highest_version)) {
139 BUG_ON(!retried);
140 D1(printk(KERN_DEBUG "jffs2_write_dnode : dnode_version %d, "
141 "highest version %d -> updating dnode\n",
142 je32_to_cpu(ri->version), f->highest_version));
143 ri->version = cpu_to_je32(++f->highest_version);
144 ri->node_crc = cpu_to_je32(crc32(0, ri, sizeof(*ri)-8));
145 }
146
139 ret = jffs2_flash_writev(c, vecs, cnt, flash_ofs, &retlen, 147 ret = jffs2_flash_writev(c, vecs, cnt, flash_ofs, &retlen,
140 (alloc_mode==ALLOC_GC)?0:f->inocache->ino); 148 (alloc_mode==ALLOC_GC)?0:f->inocache->ino);
141 149
@@ -280,6 +288,16 @@ struct jffs2_full_dirent *jffs2_write_dirent(struct jffs2_sb_info *c, struct jff
280 raw->__totlen = PAD(sizeof(*rd)+namelen); 288 raw->__totlen = PAD(sizeof(*rd)+namelen);
281 raw->next_phys = NULL; 289 raw->next_phys = NULL;
282 290
291 if ((alloc_mode!=ALLOC_GC) && (je32_to_cpu(rd->version) < f->highest_version)) {
292 BUG_ON(!retried);
293 D1(printk(KERN_DEBUG "jffs2_write_dirent : dirent_version %d, "
294 "highest version %d -> updating dirent\n",
295 je32_to_cpu(rd->version), f->highest_version));
296 rd->version = cpu_to_je32(++f->highest_version);
297 fd->version = je32_to_cpu(rd->version);
298 rd->node_crc = cpu_to_je32(crc32(0, rd, sizeof(*rd)-8));
299 }
300
283 ret = jffs2_flash_writev(c, vecs, 2, flash_ofs, &retlen, 301 ret = jffs2_flash_writev(c, vecs, 2, flash_ofs, &retlen,
284 (alloc_mode==ALLOC_GC)?0:je32_to_cpu(rd->pino)); 302 (alloc_mode==ALLOC_GC)?0:je32_to_cpu(rd->pino));
285 if (ret || (retlen != sizeof(*rd) + namelen)) { 303 if (ret || (retlen != sizeof(*rd) + namelen)) {
@@ -625,20 +643,23 @@ int jffs2_do_unlink(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f,
625 643
626 down(&dead_f->sem); 644 down(&dead_f->sem);
627 645
628 while (dead_f->dents) { 646 if (S_ISDIR(OFNI_EDONI_2SFFJ(dead_f)->i_mode)) {
629 /* There can be only deleted ones */ 647 while (dead_f->dents) {
630 fd = dead_f->dents; 648 /* There can be only deleted ones */
631 649 fd = dead_f->dents;
632 dead_f->dents = fd->next; 650
633 651 dead_f->dents = fd->next;
634 if (fd->ino) { 652
635 printk(KERN_WARNING "Deleting inode #%u with active dentry \"%s\"->ino #%u\n", 653 if (fd->ino) {
636 dead_f->inocache->ino, fd->name, fd->ino); 654 printk(KERN_WARNING "Deleting inode #%u with active dentry \"%s\"->ino #%u\n",
637 } else { 655 dead_f->inocache->ino, fd->name, fd->ino);
638 D1(printk(KERN_DEBUG "Removing deletion dirent for \"%s\" from dir ino #%u\n", fd->name, dead_f->inocache->ino)); 656 } else {
657 D1(printk(KERN_DEBUG "Removing deletion dirent for \"%s\" from dir ino #%u\n",
658 fd->name, dead_f->inocache->ino));
659 }
660 jffs2_mark_node_obsolete(c, fd->raw);
661 jffs2_free_full_dirent(fd);
639 } 662 }
640 jffs2_mark_node_obsolete(c, fd->raw);
641 jffs2_free_full_dirent(fd);
642 } 663 }
643 664
644 dead_f->inocache->nlink--; 665 dead_f->inocache->nlink--;
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index 30a2bf9eeda5..e892dab40c26 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -21,6 +21,7 @@
21#include <linux/sched.h> 21#include <linux/sched.h>
22#include <linux/fs.h> 22#include <linux/fs.h>
23#include <linux/quotaops.h> 23#include <linux/quotaops.h>
24#include <linux/posix_acl_xattr.h>
24#include "jfs_incore.h" 25#include "jfs_incore.h"
25#include "jfs_xattr.h" 26#include "jfs_xattr.h"
26#include "jfs_acl.h" 27#include "jfs_acl.h"
@@ -36,11 +37,11 @@ static struct posix_acl *jfs_get_acl(struct inode *inode, int type)
36 37
37 switch(type) { 38 switch(type) {
38 case ACL_TYPE_ACCESS: 39 case ACL_TYPE_ACCESS:
39 ea_name = XATTR_NAME_ACL_ACCESS; 40 ea_name = POSIX_ACL_XATTR_ACCESS;
40 p_acl = &ji->i_acl; 41 p_acl = &ji->i_acl;
41 break; 42 break;
42 case ACL_TYPE_DEFAULT: 43 case ACL_TYPE_DEFAULT:
43 ea_name = XATTR_NAME_ACL_DEFAULT; 44 ea_name = POSIX_ACL_XATTR_DEFAULT;
44 p_acl = &ji->i_default_acl; 45 p_acl = &ji->i_default_acl;
45 break; 46 break;
46 default: 47 default:
@@ -88,11 +89,11 @@ static int jfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
88 89
89 switch(type) { 90 switch(type) {
90 case ACL_TYPE_ACCESS: 91 case ACL_TYPE_ACCESS:
91 ea_name = XATTR_NAME_ACL_ACCESS; 92 ea_name = POSIX_ACL_XATTR_ACCESS;
92 p_acl = &ji->i_acl; 93 p_acl = &ji->i_acl;
93 break; 94 break;
94 case ACL_TYPE_DEFAULT: 95 case ACL_TYPE_DEFAULT:
95 ea_name = XATTR_NAME_ACL_DEFAULT; 96 ea_name = POSIX_ACL_XATTR_DEFAULT;
96 p_acl = &ji->i_default_acl; 97 p_acl = &ji->i_default_acl;
97 if (!S_ISDIR(inode->i_mode)) 98 if (!S_ISDIR(inode->i_mode))
98 return acl ? -EACCES : 0; 99 return acl ? -EACCES : 0;
@@ -101,7 +102,7 @@ static int jfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
101 return -EINVAL; 102 return -EINVAL;
102 } 103 }
103 if (acl) { 104 if (acl) {
104 size = xattr_acl_size(acl->a_count); 105 size = posix_acl_xattr_size(acl->a_count);
105 value = kmalloc(size, GFP_KERNEL); 106 value = kmalloc(size, GFP_KERNEL);
106 if (!value) 107 if (!value)
107 return -ENOMEM; 108 return -ENOMEM;
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 2137138c59b0..767c7ecb429e 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -128,6 +128,10 @@ void jfs_delete_inode(struct inode *inode)
128{ 128{
129 jfs_info("In jfs_delete_inode, inode = 0x%p", inode); 129 jfs_info("In jfs_delete_inode, inode = 0x%p", inode);
130 130
131 if (is_bad_inode(inode) ||
132 (JFS_IP(inode)->fileset != cpu_to_le32(FILESYSTEM_I)))
133 return;
134
131 if (test_cflag(COMMIT_Freewmap, inode)) 135 if (test_cflag(COMMIT_Freewmap, inode))
132 jfs_free_zero_link(inode); 136 jfs_free_zero_link(inode);
133 137
diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h
index d2ae430adecf..a3acd3eec059 100644
--- a/fs/jfs/jfs_acl.h
+++ b/fs/jfs/jfs_acl.h
@@ -20,8 +20,6 @@
20 20
21#ifdef CONFIG_JFS_POSIX_ACL 21#ifdef CONFIG_JFS_POSIX_ACL
22 22
23#include <linux/xattr_acl.h>
24
25int jfs_permission(struct inode *, int, struct nameidata *); 23int jfs_permission(struct inode *, int, struct nameidata *);
26int jfs_init_acl(struct inode *, struct inode *); 24int jfs_init_acl(struct inode *, struct inode *);
27int jfs_setattr(struct dentry *, struct iattr *); 25int jfs_setattr(struct dentry *, struct iattr *);
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index cced2fed9d0f..c739626f5bf1 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -26,36 +26,6 @@
26#include "jfs_debug.h" 26#include "jfs_debug.h"
27 27
28/* 28/*
29 * Debug code for double-checking block map
30 */
31/* #define _JFS_DEBUG_DMAP 1 */
32
33#ifdef _JFS_DEBUG_DMAP
34#define DBINITMAP(size,ipbmap,results) \
35 DBinitmap(size,ipbmap,results)
36#define DBALLOC(dbmap,mapsize,blkno,nblocks) \
37 DBAlloc(dbmap,mapsize,blkno,nblocks)
38#define DBFREE(dbmap,mapsize,blkno,nblocks) \
39 DBFree(dbmap,mapsize,blkno,nblocks)
40#define DBALLOCCK(dbmap,mapsize,blkno,nblocks) \
41 DBAllocCK(dbmap,mapsize,blkno,nblocks)
42#define DBFREECK(dbmap,mapsize,blkno,nblocks) \
43 DBFreeCK(dbmap,mapsize,blkno,nblocks)
44
45static void DBinitmap(s64, struct inode *, u32 **);
46static void DBAlloc(uint *, s64, s64, s64);
47static void DBFree(uint *, s64, s64, s64);
48static void DBAllocCK(uint *, s64, s64, s64);
49static void DBFreeCK(uint *, s64, s64, s64);
50#else
51#define DBINITMAP(size,ipbmap,results)
52#define DBALLOC(dbmap, mapsize, blkno, nblocks)
53#define DBFREE(dbmap, mapsize, blkno, nblocks)
54#define DBALLOCCK(dbmap, mapsize, blkno, nblocks)
55#define DBFREECK(dbmap, mapsize, blkno, nblocks)
56#endif /* _JFS_DEBUG_DMAP */
57
58/*
59 * SERIALIZATION of the Block Allocation Map. 29 * SERIALIZATION of the Block Allocation Map.
60 * 30 *
61 * the working state of the block allocation map is accessed in 31 * the working state of the block allocation map is accessed in
@@ -105,7 +75,7 @@ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
105 int nblocks); 75 int nblocks);
106static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval); 76static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval);
107static void dbBackSplit(dmtree_t * tp, int leafno); 77static void dbBackSplit(dmtree_t * tp, int leafno);
108static void dbJoin(dmtree_t * tp, int leafno, int newval); 78static int dbJoin(dmtree_t * tp, int leafno, int newval);
109static void dbAdjTree(dmtree_t * tp, int leafno, int newval); 79static void dbAdjTree(dmtree_t * tp, int leafno, int newval);
110static int dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, 80static int dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc,
111 int level); 81 int level);
@@ -128,8 +98,8 @@ static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks);
128static int dbFindBits(u32 word, int l2nb); 98static int dbFindBits(u32 word, int l2nb);
129static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno); 99static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno);
130static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx); 100static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx);
131static void dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno, 101static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
132 int nblocks); 102 int nblocks);
133static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno, 103static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
134 int nblocks); 104 int nblocks);
135static int dbMaxBud(u8 * cp); 105static int dbMaxBud(u8 * cp);
@@ -242,7 +212,6 @@ int dbMount(struct inode *ipbmap)
242 JFS_SBI(ipbmap->i_sb)->bmap = bmp; 212 JFS_SBI(ipbmap->i_sb)->bmap = bmp;
243 213
244 memset(bmp->db_active, 0, sizeof(bmp->db_active)); 214 memset(bmp->db_active, 0, sizeof(bmp->db_active));
245 DBINITMAP(bmp->db_mapsize, ipbmap, &bmp->db_DBmap);
246 215
247 /* 216 /*
248 * allocate/initialize the bmap lock 217 * allocate/initialize the bmap lock
@@ -407,16 +376,13 @@ int dbFree(struct inode *ip, s64 blkno, s64 nblocks)
407 */ 376 */
408 nb = min(rem, BPERDMAP - (blkno & (BPERDMAP - 1))); 377 nb = min(rem, BPERDMAP - (blkno & (BPERDMAP - 1)));
409 378
410 DBALLOCCK(bmp->db_DBmap, bmp->db_mapsize, blkno, nb);
411
412 /* free the blocks. */ 379 /* free the blocks. */
413 if ((rc = dbFreeDmap(bmp, dp, blkno, nb))) { 380 if ((rc = dbFreeDmap(bmp, dp, blkno, nb))) {
381 jfs_error(ip->i_sb, "dbFree: error in block map\n");
414 release_metapage(mp); 382 release_metapage(mp);
415 IREAD_UNLOCK(ipbmap); 383 IREAD_UNLOCK(ipbmap);
416 return (rc); 384 return (rc);
417 } 385 }
418
419 DBFREE(bmp->db_DBmap, bmp->db_mapsize, blkno, nb);
420 } 386 }
421 387
422 /* write the last buffer. */ 388 /* write the last buffer. */
@@ -775,10 +741,6 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
775 IWRITE_LOCK(ipbmap); 741 IWRITE_LOCK(ipbmap);
776 742
777 rc = dbAllocAny(bmp, nblocks, l2nb, results); 743 rc = dbAllocAny(bmp, nblocks, l2nb, results);
778 if (rc == 0) {
779 DBALLOC(bmp->db_DBmap, bmp->db_mapsize, *results,
780 nblocks);
781 }
782 744
783 goto write_unlock; 745 goto write_unlock;
784 } 746 }
@@ -836,8 +798,6 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
836 != -ENOSPC) { 798 != -ENOSPC) {
837 if (rc == 0) { 799 if (rc == 0) {
838 *results = blkno; 800 *results = blkno;
839 DBALLOC(bmp->db_DBmap, bmp->db_mapsize,
840 *results, nblocks);
841 mark_metapage_dirty(mp); 801 mark_metapage_dirty(mp);
842 } 802 }
843 803
@@ -863,11 +823,8 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
863 if ((rc = 823 if ((rc =
864 dbAllocNear(bmp, dp, blkno, (int) nblocks, l2nb, results)) 824 dbAllocNear(bmp, dp, blkno, (int) nblocks, l2nb, results))
865 != -ENOSPC) { 825 != -ENOSPC) {
866 if (rc == 0) { 826 if (rc == 0)
867 DBALLOC(bmp->db_DBmap, bmp->db_mapsize,
868 *results, nblocks);
869 mark_metapage_dirty(mp); 827 mark_metapage_dirty(mp);
870 }
871 828
872 release_metapage(mp); 829 release_metapage(mp);
873 goto read_unlock; 830 goto read_unlock;
@@ -878,11 +835,8 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
878 */ 835 */
879 if ((rc = dbAllocDmapLev(bmp, dp, (int) nblocks, l2nb, results)) 836 if ((rc = dbAllocDmapLev(bmp, dp, (int) nblocks, l2nb, results))
880 != -ENOSPC) { 837 != -ENOSPC) {
881 if (rc == 0) { 838 if (rc == 0)
882 DBALLOC(bmp->db_DBmap, bmp->db_mapsize,
883 *results, nblocks);
884 mark_metapage_dirty(mp); 839 mark_metapage_dirty(mp);
885 }
886 840
887 release_metapage(mp); 841 release_metapage(mp);
888 goto read_unlock; 842 goto read_unlock;
@@ -896,13 +850,9 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
896 * the same allocation group as the hint. 850 * the same allocation group as the hint.
897 */ 851 */
898 IWRITE_LOCK(ipbmap); 852 IWRITE_LOCK(ipbmap);
899 if ((rc = dbAllocAG(bmp, agno, nblocks, l2nb, results)) 853 if ((rc = dbAllocAG(bmp, agno, nblocks, l2nb, results)) != -ENOSPC)
900 != -ENOSPC) {
901 if (rc == 0)
902 DBALLOC(bmp->db_DBmap, bmp->db_mapsize,
903 *results, nblocks);
904 goto write_unlock; 854 goto write_unlock;
905 } 855
906 IWRITE_UNLOCK(ipbmap); 856 IWRITE_UNLOCK(ipbmap);
907 857
908 858
@@ -918,9 +868,6 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
918 */ 868 */
919 if ((rc = dbAllocAG(bmp, agno, nblocks, l2nb, results)) == -ENOSPC) 869 if ((rc = dbAllocAG(bmp, agno, nblocks, l2nb, results)) == -ENOSPC)
920 rc = dbAllocAny(bmp, nblocks, l2nb, results); 870 rc = dbAllocAny(bmp, nblocks, l2nb, results);
921 if (rc == 0) {
922 DBALLOC(bmp->db_DBmap, bmp->db_mapsize, *results, nblocks);
923 }
924 871
925 write_unlock: 872 write_unlock:
926 IWRITE_UNLOCK(ipbmap); 873 IWRITE_UNLOCK(ipbmap);
@@ -992,10 +939,9 @@ int dbAllocExact(struct inode *ip, s64 blkno, int nblocks)
992 939
993 IREAD_UNLOCK(ipbmap); 940 IREAD_UNLOCK(ipbmap);
994 941
995 if (rc == 0) { 942 if (rc == 0)
996 DBALLOC(bmp->db_DBmap, bmp->db_mapsize, blkno, nblocks);
997 mark_metapage_dirty(mp); 943 mark_metapage_dirty(mp);
998 } 944
999 release_metapage(mp); 945 release_metapage(mp);
1000 946
1001 return (rc); 947 return (rc);
@@ -1144,7 +1090,6 @@ static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks)
1144 return -EIO; 1090 return -EIO;
1145 } 1091 }
1146 1092
1147 DBALLOCCK(bmp->db_DBmap, bmp->db_mapsize, blkno, nblocks);
1148 dp = (struct dmap *) mp->data; 1093 dp = (struct dmap *) mp->data;
1149 1094
1150 /* try to allocate the blocks immediately following the 1095 /* try to allocate the blocks immediately following the
@@ -1155,11 +1100,9 @@ static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks)
1155 IREAD_UNLOCK(ipbmap); 1100 IREAD_UNLOCK(ipbmap);
1156 1101
1157 /* were we successful ? */ 1102 /* were we successful ? */
1158 if (rc == 0) { 1103 if (rc == 0)
1159 DBALLOC(bmp->db_DBmap, bmp->db_mapsize, extblkno,
1160 addnblocks);
1161 write_metapage(mp); 1104 write_metapage(mp);
1162 } else 1105 else
1163 /* we were not successful */ 1106 /* we were not successful */
1164 release_metapage(mp); 1107 release_metapage(mp);
1165 1108
@@ -2078,7 +2021,7 @@ static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
2078 int nblocks) 2021 int nblocks)
2079{ 2022{
2080 s8 oldroot; 2023 s8 oldroot;
2081 int rc, word; 2024 int rc = 0, word;
2082 2025
2083 /* save the current value of the root (i.e. maximum free string) 2026 /* save the current value of the root (i.e. maximum free string)
2084 * of the dmap tree. 2027 * of the dmap tree.
@@ -2086,11 +2029,11 @@ static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
2086 oldroot = dp->tree.stree[ROOT]; 2029 oldroot = dp->tree.stree[ROOT];
2087 2030
2088 /* free the specified (blocks) bits */ 2031 /* free the specified (blocks) bits */
2089 dbFreeBits(bmp, dp, blkno, nblocks); 2032 rc = dbFreeBits(bmp, dp, blkno, nblocks);
2090 2033
2091 /* if the root has not changed, done. */ 2034 /* if error or the root has not changed, done. */
2092 if (dp->tree.stree[ROOT] == oldroot) 2035 if (rc || (dp->tree.stree[ROOT] == oldroot))
2093 return (0); 2036 return (rc);
2094 2037
2095 /* root changed. bubble the change up to the dmap control pages. 2038 /* root changed. bubble the change up to the dmap control pages.
2096 * if the adjustment of the upper level control pages fails, 2039 * if the adjustment of the upper level control pages fails,
@@ -2279,15 +2222,16 @@ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
2279 * blkno - starting block number of the bits to be freed. 2222 * blkno - starting block number of the bits to be freed.
2280 * nblocks - number of bits to be freed. 2223 * nblocks - number of bits to be freed.
2281 * 2224 *
2282 * RETURN VALUES: none 2225 * RETURN VALUES: 0 for success
2283 * 2226 *
2284 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; 2227 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
2285 */ 2228 */
2286static void dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno, 2229static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
2287 int nblocks) 2230 int nblocks)
2288{ 2231{
2289 int dbitno, word, rembits, nb, nwords, wbitno, nw, agno; 2232 int dbitno, word, rembits, nb, nwords, wbitno, nw, agno;
2290 dmtree_t *tp = (dmtree_t *) & dp->tree; 2233 dmtree_t *tp = (dmtree_t *) & dp->tree;
2234 int rc = 0;
2291 int size; 2235 int size;
2292 2236
2293 /* determine the bit number and word within the dmap of the 2237 /* determine the bit number and word within the dmap of the
@@ -2336,8 +2280,10 @@ static void dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
2336 2280
2337 /* update the leaf for this dmap word. 2281 /* update the leaf for this dmap word.
2338 */ 2282 */
2339 dbJoin(tp, word, 2283 rc = dbJoin(tp, word,
2340 dbMaxBud((u8 *) & dp->wmap[word])); 2284 dbMaxBud((u8 *) & dp->wmap[word]));
2285 if (rc)
2286 return rc;
2341 2287
2342 word += 1; 2288 word += 1;
2343 } else { 2289 } else {
@@ -2368,7 +2314,9 @@ static void dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
2368 2314
2369 /* update the leaf. 2315 /* update the leaf.
2370 */ 2316 */
2371 dbJoin(tp, word, size); 2317 rc = dbJoin(tp, word, size);
2318 if (rc)
2319 return rc;
2372 2320
2373 /* get the number of dmap words handled. 2321 /* get the number of dmap words handled.
2374 */ 2322 */
@@ -2415,6 +2363,8 @@ static void dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
2415 } 2363 }
2416 2364
2417 BMAP_UNLOCK(bmp); 2365 BMAP_UNLOCK(bmp);
2366
2367 return 0;
2418} 2368}
2419 2369
2420 2370
@@ -2522,7 +2472,9 @@ dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level)
2522 } 2472 }
2523 dbSplit((dmtree_t *) dcp, leafno, dcp->budmin, newval); 2473 dbSplit((dmtree_t *) dcp, leafno, dcp->budmin, newval);
2524 } else { 2474 } else {
2525 dbJoin((dmtree_t *) dcp, leafno, newval); 2475 rc = dbJoin((dmtree_t *) dcp, leafno, newval);
2476 if (rc)
2477 return rc;
2526 } 2478 }
2527 2479
2528 /* check if the root of the current dmap control page changed due 2480 /* check if the root of the current dmap control page changed due
@@ -2747,7 +2699,7 @@ static void dbBackSplit(dmtree_t * tp, int leafno)
2747 * 2699 *
2748 * RETURN VALUES: none 2700 * RETURN VALUES: none
2749 */ 2701 */
2750static void dbJoin(dmtree_t * tp, int leafno, int newval) 2702static int dbJoin(dmtree_t * tp, int leafno, int newval)
2751{ 2703{
2752 int budsz, buddy; 2704 int budsz, buddy;
2753 s8 *leaf; 2705 s8 *leaf;
@@ -2787,7 +2739,9 @@ static void dbJoin(dmtree_t * tp, int leafno, int newval)
2787 if (newval > leaf[buddy]) 2739 if (newval > leaf[buddy])
2788 break; 2740 break;
2789 2741
2790 assert(newval == leaf[buddy]); 2742 /* It shouldn't be less */
2743 if (newval < leaf[buddy])
2744 return -EIO;
2791 2745
2792 /* check which (leafno or buddy) is the left buddy. 2746 /* check which (leafno or buddy) is the left buddy.
2793 * the left buddy gets to claim the blocks resulting 2747 * the left buddy gets to claim the blocks resulting
@@ -2819,6 +2773,8 @@ static void dbJoin(dmtree_t * tp, int leafno, int newval)
2819 /* update the leaf value. 2773 /* update the leaf value.
2820 */ 2774 */
2821 dbAdjTree(tp, leafno, newval); 2775 dbAdjTree(tp, leafno, newval);
2776
2777 return 0;
2822} 2778}
2823 2779
2824 2780
@@ -3185,16 +3141,12 @@ int dbAllocBottomUp(struct inode *ip, s64 blkno, s64 nblocks)
3185 */ 3141 */
3186 nb = min(rem, BPERDMAP - (blkno & (BPERDMAP - 1))); 3142 nb = min(rem, BPERDMAP - (blkno & (BPERDMAP - 1)));
3187 3143
3188 DBFREECK(bmp->db_DBmap, bmp->db_mapsize, blkno, nb);
3189
3190 /* allocate the blocks. */ 3144 /* allocate the blocks. */
3191 if ((rc = dbAllocDmapBU(bmp, dp, blkno, nb))) { 3145 if ((rc = dbAllocDmapBU(bmp, dp, blkno, nb))) {
3192 release_metapage(mp); 3146 release_metapage(mp);
3193 IREAD_UNLOCK(ipbmap); 3147 IREAD_UNLOCK(ipbmap);
3194 return (rc); 3148 return (rc);
3195 } 3149 }
3196
3197 DBALLOC(bmp->db_DBmap, bmp->db_mapsize, blkno, nb);
3198 } 3150 }
3199 3151
3200 /* write the last buffer. */ 3152 /* write the last buffer. */
@@ -4041,223 +3993,3 @@ s64 dbMapFileSizeToMapSize(struct inode * ipbmap)
4041 3993
4042 return (nblocks); 3994 return (nblocks);
4043} 3995}
4044
4045
4046#ifdef _JFS_DEBUG_DMAP
4047/*
4048 * DBinitmap()
4049 */
4050static void DBinitmap(s64 size, struct inode *ipbmap, u32 ** results)
4051{
4052 int npages;
4053 u32 *dbmap, *d;
4054 int n;
4055 s64 lblkno, cur_block;
4056 struct dmap *dp;
4057 struct metapage *mp;
4058
4059 npages = size / 32768;
4060 npages += (size % 32768) ? 1 : 0;
4061
4062 dbmap = (u32 *) xmalloc(npages * 4096, L2PSIZE, kernel_heap);
4063 if (dbmap == NULL)
4064 BUG(); /* Not robust since this is only unused debug code */
4065
4066 for (n = 0, d = dbmap; n < npages; n++, d += 1024)
4067 bzero(d, 4096);
4068
4069 /* Need to initialize from disk map pages
4070 */
4071 for (d = dbmap, cur_block = 0; cur_block < size;
4072 cur_block += BPERDMAP, d += LPERDMAP) {
4073 lblkno = BLKTODMAP(cur_block,
4074 JFS_SBI(ipbmap->i_sb)->bmap->
4075 db_l2nbperpage);
4076 mp = read_metapage(ipbmap, lblkno, PSIZE, 0);
4077 if (mp == NULL) {
4078 jfs_error(ipbmap->i_sb,
4079 "DBinitmap: could not read disk map page");
4080 continue;
4081 }
4082 dp = (struct dmap *) mp->data;
4083
4084 for (n = 0; n < LPERDMAP; n++)
4085 d[n] = le32_to_cpu(dp->wmap[n]);
4086
4087 release_metapage(mp);
4088 }
4089
4090 *results = dbmap;
4091}
4092
4093
4094/*
4095 * DBAlloc()
4096 */
4097void DBAlloc(uint * dbmap, s64 mapsize, s64 blkno, s64 nblocks)
4098{
4099 int word, nb, bitno;
4100 u32 mask;
4101
4102 assert(blkno > 0 && blkno < mapsize);
4103 assert(nblocks > 0 && nblocks <= mapsize);
4104
4105 assert(blkno + nblocks <= mapsize);
4106
4107 dbmap += (blkno / 32);
4108 while (nblocks > 0) {
4109 bitno = blkno & (32 - 1);
4110 nb = min(nblocks, 32 - bitno);
4111
4112 mask = (0xffffffff << (32 - nb) >> bitno);
4113 assert((mask & *dbmap) == 0);
4114 *dbmap |= mask;
4115
4116 dbmap++;
4117 blkno += nb;
4118 nblocks -= nb;
4119 }
4120}
4121
4122
4123/*
4124 * DBFree()
4125 */
4126static void DBFree(uint * dbmap, s64 mapsize, s64 blkno, s64 nblocks)
4127{
4128 int word, nb, bitno;
4129 u32 mask;
4130
4131 assert(blkno > 0 && blkno < mapsize);
4132 assert(nblocks > 0 && nblocks <= mapsize);
4133
4134 assert(blkno + nblocks <= mapsize);
4135
4136 dbmap += (blkno / 32);
4137 while (nblocks > 0) {
4138 bitno = blkno & (32 - 1);
4139 nb = min(nblocks, 32 - bitno);
4140
4141 mask = (0xffffffff << (32 - nb) >> bitno);
4142 assert((mask & *dbmap) == mask);
4143 *dbmap &= ~mask;
4144
4145 dbmap++;
4146 blkno += nb;
4147 nblocks -= nb;
4148 }
4149}
4150
4151
4152/*
4153 * DBAllocCK()
4154 */
4155static void DBAllocCK(uint * dbmap, s64 mapsize, s64 blkno, s64 nblocks)
4156{
4157 int word, nb, bitno;
4158 u32 mask;
4159
4160 assert(blkno > 0 && blkno < mapsize);
4161 assert(nblocks > 0 && nblocks <= mapsize);
4162
4163 assert(blkno + nblocks <= mapsize);
4164
4165 dbmap += (blkno / 32);
4166 while (nblocks > 0) {
4167 bitno = blkno & (32 - 1);
4168 nb = min(nblocks, 32 - bitno);
4169
4170 mask = (0xffffffff << (32 - nb) >> bitno);
4171 assert((mask & *dbmap) == mask);
4172
4173 dbmap++;
4174 blkno += nb;
4175 nblocks -= nb;
4176 }
4177}
4178
4179
4180/*
4181 * DBFreeCK()
4182 */
4183static void DBFreeCK(uint * dbmap, s64 mapsize, s64 blkno, s64 nblocks)
4184{
4185 int word, nb, bitno;
4186 u32 mask;
4187
4188 assert(blkno > 0 && blkno < mapsize);
4189 assert(nblocks > 0 && nblocks <= mapsize);
4190
4191 assert(blkno + nblocks <= mapsize);
4192
4193 dbmap += (blkno / 32);
4194 while (nblocks > 0) {
4195 bitno = blkno & (32 - 1);
4196 nb = min(nblocks, 32 - bitno);
4197
4198 mask = (0xffffffff << (32 - nb) >> bitno);
4199 assert((mask & *dbmap) == 0);
4200
4201 dbmap++;
4202 blkno += nb;
4203 nblocks -= nb;
4204 }
4205}
4206
4207
4208/*
4209 * dbPrtMap()
4210 */
4211static void dbPrtMap(struct bmap * bmp)
4212{
4213 printk(" mapsize: %d%d\n", bmp->db_mapsize);
4214 printk(" nfree: %d%d\n", bmp->db_nfree);
4215 printk(" numag: %d\n", bmp->db_numag);
4216 printk(" agsize: %d%d\n", bmp->db_agsize);
4217 printk(" agl2size: %d\n", bmp->db_agl2size);
4218 printk(" agwidth: %d\n", bmp->db_agwidth);
4219 printk(" agstart: %d\n", bmp->db_agstart);
4220 printk(" agheigth: %d\n", bmp->db_agheigth);
4221 printk(" aglevel: %d\n", bmp->db_aglevel);
4222 printk(" maxlevel: %d\n", bmp->db_maxlevel);
4223 printk(" maxag: %d\n", bmp->db_maxag);
4224 printk(" agpref: %d\n", bmp->db_agpref);
4225 printk(" l2nbppg: %d\n", bmp->db_l2nbperpage);
4226}
4227
4228
4229/*
4230 * dbPrtCtl()
4231 */
4232static void dbPrtCtl(struct dmapctl * dcp)
4233{
4234 int i, j, n;
4235
4236 printk(" height: %08x\n", le32_to_cpu(dcp->height));
4237 printk(" leafidx: %08x\n", le32_to_cpu(dcp->leafidx));
4238 printk(" budmin: %08x\n", dcp->budmin);
4239 printk(" nleafs: %08x\n", le32_to_cpu(dcp->nleafs));
4240 printk(" l2nleafs: %08x\n", le32_to_cpu(dcp->l2nleafs));
4241
4242 printk("\n Tree:\n");
4243 for (i = 0; i < CTLLEAFIND; i += 8) {
4244 n = min(8, CTLLEAFIND - i);
4245
4246 for (j = 0; j < n; j++)
4247 printf(" [%03x]: %02x", i + j,
4248 (char) dcp->stree[i + j]);
4249 printf("\n");
4250 }
4251
4252 printk("\n Tree Leaves:\n");
4253 for (i = 0; i < LPERCTL; i += 8) {
4254 n = min(8, LPERCTL - i);
4255
4256 for (j = 0; j < n; j++)
4257 printf(" [%03x]: %02x",
4258 i + j,
4259 (char) dcp->stree[i + j + CTLLEAFIND]);
4260 printf("\n");
4261 }
4262}
4263#endif /* _JFS_DEBUG_DMAP */
diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c
index 8676aee3ae48..404f33eae507 100644
--- a/fs/jfs/jfs_dtree.c
+++ b/fs/jfs/jfs_dtree.c
@@ -381,9 +381,12 @@ static u32 add_index(tid_t tid, struct inode *ip, s64 bn, int slot)
381 * It's time to move the inline table to an external 381 * It's time to move the inline table to an external
382 * page and begin to build the xtree 382 * page and begin to build the xtree
383 */ 383 */
384 if (DQUOT_ALLOC_BLOCK(ip, sbi->nbperpage) || 384 if (DQUOT_ALLOC_BLOCK(ip, sbi->nbperpage))
385 dbAlloc(ip, 0, sbi->nbperpage, &xaddr)) 385 goto clean_up;
386 goto clean_up; /* No space */ 386 if (dbAlloc(ip, 0, sbi->nbperpage, &xaddr)) {
387 DQUOT_FREE_BLOCK(ip, sbi->nbperpage);
388 goto clean_up;
389 }
387 390
388 /* 391 /*
389 * Save the table, we're going to overwrite it with the 392 * Save the table, we're going to overwrite it with the
@@ -397,13 +400,15 @@ static u32 add_index(tid_t tid, struct inode *ip, s64 bn, int slot)
397 xtInitRoot(tid, ip); 400 xtInitRoot(tid, ip);
398 401
399 /* 402 /*
400 * Allocate the first block & add it to the xtree 403 * Add the first block to the xtree
401 */ 404 */
402 if (xtInsert(tid, ip, 0, 0, sbi->nbperpage, &xaddr, 0)) { 405 if (xtInsert(tid, ip, 0, 0, sbi->nbperpage, &xaddr, 0)) {
403 /* This really shouldn't fail */ 406 /* This really shouldn't fail */
404 jfs_warn("add_index: xtInsert failed!"); 407 jfs_warn("add_index: xtInsert failed!");
405 memcpy(&jfs_ip->i_dirtable, temp_table, 408 memcpy(&jfs_ip->i_dirtable, temp_table,
406 sizeof (temp_table)); 409 sizeof (temp_table));
410 dbFree(ip, xaddr, sbi->nbperpage);
411 DQUOT_FREE_BLOCK(ip, sbi->nbperpage);
407 goto clean_up; 412 goto clean_up;
408 } 413 }
409 ip->i_size = PSIZE; 414 ip->i_size = PSIZE;
@@ -4554,202 +4559,3 @@ int dtModify(tid_t tid, struct inode *ip,
4554 4559
4555 return 0; 4560 return 0;
4556} 4561}
4557
4558#ifdef _JFS_DEBUG_DTREE
4559/*
4560 * dtDisplayTree()
4561 *
4562 * function: traverse forward
4563 */
4564int dtDisplayTree(struct inode *ip)
4565{
4566 int rc;
4567 struct metapage *mp;
4568 dtpage_t *p;
4569 s64 bn, pbn;
4570 int index, lastindex, v, h;
4571 pxd_t *xd;
4572 struct btstack btstack;
4573 struct btframe *btsp;
4574 struct btframe *parent;
4575 u8 *stbl;
4576 int psize = 256;
4577
4578 printk("display B+-tree.\n");
4579
4580 /* clear stack */
4581 btsp = btstack.stack;
4582
4583 /*
4584 * start with root
4585 *
4586 * root resides in the inode
4587 */
4588 bn = 0;
4589 v = h = 0;
4590
4591 /*
4592 * first access of each page:
4593 */
4594 newPage:
4595 DT_GETPAGE(ip, bn, mp, psize, p, rc);
4596 if (rc)
4597 return rc;
4598
4599 /* process entries forward from first index */
4600 index = 0;
4601 lastindex = p->header.nextindex - 1;
4602
4603 if (p->header.flag & BT_INTERNAL) {
4604 /*
4605 * first access of each internal page
4606 */
4607 printf("internal page ");
4608 dtDisplayPage(ip, bn, p);
4609
4610 goto getChild;
4611 } else { /* (p->header.flag & BT_LEAF) */
4612
4613 /*
4614 * first access of each leaf page
4615 */
4616 printf("leaf page ");
4617 dtDisplayPage(ip, bn, p);
4618
4619 /*
4620 * process leaf page entries
4621 *
4622 for ( ; index <= lastindex; index++)
4623 {
4624 }
4625 */
4626
4627 /* unpin the leaf page */
4628 DT_PUTPAGE(mp);
4629 }
4630
4631 /*
4632 * go back up to the parent page
4633 */
4634 getParent:
4635 /* pop/restore parent entry for the current child page */
4636 if ((parent = (btsp == btstack.stack ? NULL : --btsp)) == NULL)
4637 /* current page must have been root */
4638 return;
4639
4640 /*
4641 * parent page scan completed
4642 */
4643 if ((index = parent->index) == (lastindex = parent->lastindex)) {
4644 /* go back up to the parent page */
4645 goto getParent;
4646 }
4647
4648 /*
4649 * parent page has entries remaining
4650 */
4651 /* get back the parent page */
4652 bn = parent->bn;
4653 /* v = parent->level; */
4654 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
4655 if (rc)
4656 return rc;
4657
4658 /* get next parent entry */
4659 index++;
4660
4661 /*
4662 * internal page: go down to child page of current entry
4663 */
4664 getChild:
4665 /* push/save current parent entry for the child page */
4666 btsp->bn = pbn = bn;
4667 btsp->index = index;
4668 btsp->lastindex = lastindex;
4669 /* btsp->level = v; */
4670 /* btsp->node = h; */
4671 ++btsp;
4672
4673 /* get current entry for the child page */
4674 stbl = DT_GETSTBL(p);
4675 xd = (pxd_t *) & p->slot[stbl[index]];
4676
4677 /*
4678 * first access of each internal entry:
4679 */
4680
4681 /* get child page */
4682 bn = addressPXD(xd);
4683 psize = lengthPXD(xd) << ip->i_ipmnt->i_l2bsize;
4684
4685 printk("traverse down 0x%Lx[%d]->0x%Lx\n", pbn, index, bn);
4686 v++;
4687 h = index;
4688
4689 /* release parent page */
4690 DT_PUTPAGE(mp);
4691
4692 /* process the child page */
4693 goto newPage;
4694}
4695
4696
4697/*
4698 * dtDisplayPage()
4699 *
4700 * function: display page
4701 */
4702int dtDisplayPage(struct inode *ip, s64 bn, dtpage_t * p)
4703{
4704 int rc;
4705 struct metapage *mp;
4706 struct ldtentry *lh;
4707 struct idtentry *ih;
4708 pxd_t *xd;
4709 int i, j;
4710 u8 *stbl;
4711 wchar_t name[JFS_NAME_MAX + 1];
4712 struct component_name key = { 0, name };
4713 int freepage = 0;
4714
4715 if (p == NULL) {
4716 freepage = 1;
4717 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
4718 if (rc)
4719 return rc;
4720 }
4721
4722 /* display page control */
4723 printk("bn:0x%Lx flag:0x%08x nextindex:%d\n",
4724 bn, p->header.flag, p->header.nextindex);
4725
4726 /* display entries */
4727 stbl = DT_GETSTBL(p);
4728 for (i = 0, j = 1; i < p->header.nextindex; i++, j++) {
4729 dtGetKey(p, i, &key, JFS_SBI(ip->i_sb)->mntflag);
4730 key.name[key.namlen] = '\0';
4731 if (p->header.flag & BT_LEAF) {
4732 lh = (struct ldtentry *) & p->slot[stbl[i]];
4733 printf("\t[%d] %s:%d", i, key.name,
4734 le32_to_cpu(lh->inumber));
4735 } else {
4736 ih = (struct idtentry *) & p->slot[stbl[i]];
4737 xd = (pxd_t *) ih;
4738 bn = addressPXD(xd);
4739 printf("\t[%d] %s:0x%Lx", i, key.name, bn);
4740 }
4741
4742 if (j == 4) {
4743 printf("\n");
4744 j = 0;
4745 }
4746 }
4747
4748 printf("\n");
4749
4750 if (freepage)
4751 DT_PUTPAGE(mp);
4752
4753 return 0;
4754}
4755#endif /* _JFS_DEBUG_DTREE */
diff --git a/fs/jfs/jfs_dtree.h b/fs/jfs/jfs_dtree.h
index 273a80130c9d..13e4fdf07724 100644
--- a/fs/jfs/jfs_dtree.h
+++ b/fs/jfs/jfs_dtree.h
@@ -269,11 +269,4 @@ extern int dtModify(tid_t tid, struct inode *ip, struct component_name * key,
269 ino_t * orig_ino, ino_t new_ino, int flag); 269 ino_t * orig_ino, ino_t new_ino, int flag);
270 270
271extern int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir); 271extern int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir);
272
273#ifdef _JFS_DEBUG_DTREE
274extern int dtDisplayTree(struct inode *ip);
275
276extern int dtDisplayPage(struct inode *ip, s64 bn, dtpage_t * p);
277#endif /* _JFS_DEBUG_DTREE */
278
279#endif /* !_H_JFS_DTREE */ 272#endif /* !_H_JFS_DTREE */
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index 971af2977eff..4021d46da7e3 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -87,25 +87,6 @@ static int copy_from_dinode(struct dinode *, struct inode *);
87static void copy_to_dinode(struct dinode *, struct inode *); 87static void copy_to_dinode(struct dinode *, struct inode *);
88 88
89/* 89/*
90 * debug code for double-checking inode map
91 */
92/* #define _JFS_DEBUG_IMAP 1 */
93
94#ifdef _JFS_DEBUG_IMAP
95#define DBG_DIINIT(imap) DBGdiInit(imap)
96#define DBG_DIALLOC(imap, ino) DBGdiAlloc(imap, ino)
97#define DBG_DIFREE(imap, ino) DBGdiFree(imap, ino)
98
99static void *DBGdiInit(struct inomap * imap);
100static void DBGdiAlloc(struct inomap * imap, ino_t ino);
101static void DBGdiFree(struct inomap * imap, ino_t ino);
102#else
103#define DBG_DIINIT(imap)
104#define DBG_DIALLOC(imap, ino)
105#define DBG_DIFREE(imap, ino)
106#endif /* _JFS_DEBUG_IMAP */
107
108/*
109 * NAME: diMount() 90 * NAME: diMount()
110 * 91 *
111 * FUNCTION: initialize the incore inode map control structures for 92 * FUNCTION: initialize the incore inode map control structures for
@@ -188,8 +169,6 @@ int diMount(struct inode *ipimap)
188 imap->im_ipimap = ipimap; 169 imap->im_ipimap = ipimap;
189 JFS_IP(ipimap)->i_imap = imap; 170 JFS_IP(ipimap)->i_imap = imap;
190 171
191// DBG_DIINIT(imap);
192
193 return (0); 172 return (0);
194} 173}
195 174
@@ -1043,7 +1022,6 @@ int diFree(struct inode *ip)
1043 /* update the bitmap. 1022 /* update the bitmap.
1044 */ 1023 */
1045 iagp->wmap[extno] = cpu_to_le32(bitmap); 1024 iagp->wmap[extno] = cpu_to_le32(bitmap);
1046 DBG_DIFREE(imap, inum);
1047 1025
1048 /* update the free inode counts at the iag, ag and 1026 /* update the free inode counts at the iag, ag and
1049 * map level. 1027 * map level.
@@ -1231,7 +1209,6 @@ int diFree(struct inode *ip)
1231 jfs_error(ip->i_sb, "diFree: the pmap does not show inode free"); 1209 jfs_error(ip->i_sb, "diFree: the pmap does not show inode free");
1232 } 1210 }
1233 iagp->wmap[extno] = 0; 1211 iagp->wmap[extno] = 0;
1234 DBG_DIFREE(imap, inum);
1235 PXDlength(&iagp->inoext[extno], 0); 1212 PXDlength(&iagp->inoext[extno], 0);
1236 PXDaddress(&iagp->inoext[extno], 0); 1213 PXDaddress(&iagp->inoext[extno], 0);
1237 1214
@@ -1350,7 +1327,6 @@ diInitInode(struct inode *ip, int iagno, int ino, int extno, struct iag * iagp)
1350 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 1327 struct jfs_inode_info *jfs_ip = JFS_IP(ip);
1351 1328
1352 ip->i_ino = (iagno << L2INOSPERIAG) + ino; 1329 ip->i_ino = (iagno << L2INOSPERIAG) + ino;
1353 DBG_DIALLOC(JFS_IP(ipimap)->i_imap, ip->i_ino);
1354 jfs_ip->ixpxd = iagp->inoext[extno]; 1330 jfs_ip->ixpxd = iagp->inoext[extno];
1355 jfs_ip->agno = BLKTOAG(le64_to_cpu(iagp->agstart), sbi); 1331 jfs_ip->agno = BLKTOAG(le64_to_cpu(iagp->agstart), sbi);
1356 jfs_ip->active_ag = -1; 1332 jfs_ip->active_ag = -1;
@@ -3185,84 +3161,3 @@ static void copy_to_dinode(struct dinode * dip, struct inode *ip)
3185 if (S_ISCHR(ip->i_mode) || S_ISBLK(ip->i_mode)) 3161 if (S_ISCHR(ip->i_mode) || S_ISBLK(ip->i_mode))
3186 dip->di_rdev = cpu_to_le32(jfs_ip->dev); 3162 dip->di_rdev = cpu_to_le32(jfs_ip->dev);
3187} 3163}
3188
3189#ifdef _JFS_DEBUG_IMAP
3190/*
3191 * DBGdiInit()
3192 */
3193static void *DBGdiInit(struct inomap * imap)
3194{
3195 u32 *dimap;
3196 int size;
3197 size = 64 * 1024;
3198 if ((dimap = (u32 *) xmalloc(size, L2PSIZE, kernel_heap)) == NULL)
3199 assert(0);
3200 bzero((void *) dimap, size);
3201 imap->im_DBGdimap = dimap;
3202}
3203
3204/*
3205 * DBGdiAlloc()
3206 */
3207static void DBGdiAlloc(struct inomap * imap, ino_t ino)
3208{
3209 u32 *dimap = imap->im_DBGdimap;
3210 int w, b;
3211 u32 m;
3212 w = ino >> 5;
3213 b = ino & 31;
3214 m = 0x80000000 >> b;
3215 assert(w < 64 * 256);
3216 if (dimap[w] & m) {
3217 printk("DEBUG diAlloc: duplicate alloc ino:0x%x\n", ino);
3218 }
3219 dimap[w] |= m;
3220}
3221
3222/*
3223 * DBGdiFree()
3224 */
3225static void DBGdiFree(struct inomap * imap, ino_t ino)
3226{
3227 u32 *dimap = imap->im_DBGdimap;
3228 int w, b;
3229 u32 m;
3230 w = ino >> 5;
3231 b = ino & 31;
3232 m = 0x80000000 >> b;
3233 assert(w < 64 * 256);
3234 if ((dimap[w] & m) == 0) {
3235 printk("DEBUG diFree: duplicate free ino:0x%x\n", ino);
3236 }
3237 dimap[w] &= ~m;
3238}
3239
3240static void dump_cp(struct inomap * ipimap, char *function, int line)
3241{
3242 printk("\n* ********* *\nControl Page %s %d\n", function, line);
3243 printk("FreeIAG %d\tNextIAG %d\n", ipimap->im_freeiag,
3244 ipimap->im_nextiag);
3245 printk("NumInos %d\tNumFree %d\n",
3246 atomic_read(&ipimap->im_numinos),
3247 atomic_read(&ipimap->im_numfree));
3248 printk("AG InoFree %d\tAG ExtFree %d\n",
3249 ipimap->im_agctl[0].inofree, ipimap->im_agctl[0].extfree);
3250 printk("AG NumInos %d\tAG NumFree %d\n",
3251 ipimap->im_agctl[0].numinos, ipimap->im_agctl[0].numfree);
3252}
3253
3254static void dump_iag(struct iag * iag, char *function, int line)
3255{
3256 printk("\n* ********* *\nIAG %s %d\n", function, line);
3257 printk("IagNum %d\tIAG Free %d\n", le32_to_cpu(iag->iagnum),
3258 le32_to_cpu(iag->iagfree));
3259 printk("InoFreeFwd %d\tInoFreeBack %d\n",
3260 le32_to_cpu(iag->inofreefwd),
3261 le32_to_cpu(iag->inofreeback));
3262 printk("ExtFreeFwd %d\tExtFreeBack %d\n",
3263 le32_to_cpu(iag->extfreefwd),
3264 le32_to_cpu(iag->extfreeback));
3265 printk("NFreeInos %d\tNFreeExts %d\n", le32_to_cpu(iag->nfreeinos),
3266 le32_to_cpu(iag->nfreeexts));
3267}
3268#endif /* _JFS_DEBUG_IMAP */
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index 7c8387ed4192..d27bac6acaa3 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -191,7 +191,7 @@ static int lbmIOWait(struct lbuf * bp, int flag);
191static bio_end_io_t lbmIODone; 191static bio_end_io_t lbmIODone;
192static void lbmStartIO(struct lbuf * bp); 192static void lbmStartIO(struct lbuf * bp);
193static void lmGCwrite(struct jfs_log * log, int cant_block); 193static void lmGCwrite(struct jfs_log * log, int cant_block);
194static int lmLogSync(struct jfs_log * log, int nosyncwait); 194static int lmLogSync(struct jfs_log * log, int hard_sync);
195 195
196 196
197 197
@@ -915,19 +915,17 @@ static void lmPostGC(struct lbuf * bp)
915 * if new sync address is available 915 * if new sync address is available
916 * (normally the case if sync() is executed by back-ground 916 * (normally the case if sync() is executed by back-ground
917 * process). 917 * process).
918 * if not, explicitly run jfs_blogsync() to initiate
919 * getting of new sync address.
920 * calculate new value of i_nextsync which determines when 918 * calculate new value of i_nextsync which determines when
921 * this code is called again. 919 * this code is called again.
922 * 920 *
923 * PARAMETERS: log - log structure 921 * PARAMETERS: log - log structure
924 * nosyncwait - 1 if called asynchronously 922 * hard_sync - 1 to force all metadata to be written
925 * 923 *
926 * RETURN: 0 924 * RETURN: 0
927 * 925 *
928 * serialization: LOG_LOCK() held on entry/exit 926 * serialization: LOG_LOCK() held on entry/exit
929 */ 927 */
930static int lmLogSync(struct jfs_log * log, int nosyncwait) 928static int lmLogSync(struct jfs_log * log, int hard_sync)
931{ 929{
932 int logsize; 930 int logsize;
933 int written; /* written since last syncpt */ 931 int written; /* written since last syncpt */
@@ -941,11 +939,18 @@ static int lmLogSync(struct jfs_log * log, int nosyncwait)
941 unsigned long flags; 939 unsigned long flags;
942 940
943 /* push dirty metapages out to disk */ 941 /* push dirty metapages out to disk */
944 list_for_each_entry(sbi, &log->sb_list, log_list) { 942 if (hard_sync)
945 filemap_flush(sbi->ipbmap->i_mapping); 943 list_for_each_entry(sbi, &log->sb_list, log_list) {
946 filemap_flush(sbi->ipimap->i_mapping); 944 filemap_fdatawrite(sbi->ipbmap->i_mapping);
947 filemap_flush(sbi->direct_inode->i_mapping); 945 filemap_fdatawrite(sbi->ipimap->i_mapping);
948 } 946 filemap_fdatawrite(sbi->direct_inode->i_mapping);
947 }
948 else
949 list_for_each_entry(sbi, &log->sb_list, log_list) {
950 filemap_flush(sbi->ipbmap->i_mapping);
951 filemap_flush(sbi->ipimap->i_mapping);
952 filemap_flush(sbi->direct_inode->i_mapping);
953 }
949 954
950 /* 955 /*
951 * forward syncpt 956 * forward syncpt
@@ -1021,16 +1026,13 @@ static int lmLogSync(struct jfs_log * log, int nosyncwait)
1021 /* next syncpt trigger = written + more */ 1026 /* next syncpt trigger = written + more */
1022 log->nextsync = written + more; 1027 log->nextsync = written + more;
1023 1028
1024 /* return if lmLogSync() from outside of transaction, e.g., sync() */
1025 if (nosyncwait)
1026 return lsn;
1027
1028 /* if number of bytes written from last sync point is more 1029 /* if number of bytes written from last sync point is more
1029 * than 1/4 of the log size, stop new transactions from 1030 * than 1/4 of the log size, stop new transactions from
1030 * starting until all current transactions are completed 1031 * starting until all current transactions are completed
1031 * by setting syncbarrier flag. 1032 * by setting syncbarrier flag.
1032 */ 1033 */
1033 if (written > LOGSYNC_BARRIER(logsize) && logsize > 32 * LOGPSIZE) { 1034 if (!test_bit(log_SYNCBARRIER, &log->flag) &&
1035 (written > LOGSYNC_BARRIER(logsize)) && log->active) {
1034 set_bit(log_SYNCBARRIER, &log->flag); 1036 set_bit(log_SYNCBARRIER, &log->flag);
1035 jfs_info("log barrier on: lsn=0x%x syncpt=0x%x", lsn, 1037 jfs_info("log barrier on: lsn=0x%x syncpt=0x%x", lsn,
1036 log->syncpt); 1038 log->syncpt);
@@ -1048,11 +1050,12 @@ static int lmLogSync(struct jfs_log * log, int nosyncwait)
1048 * 1050 *
1049 * FUNCTION: write log SYNCPT record for specified log 1051 * FUNCTION: write log SYNCPT record for specified log
1050 * 1052 *
1051 * PARAMETERS: log - log structure 1053 * PARAMETERS: log - log structure
1054 * hard_sync - set to 1 to force metadata to be written
1052 */ 1055 */
1053void jfs_syncpt(struct jfs_log *log) 1056void jfs_syncpt(struct jfs_log *log, int hard_sync)
1054{ LOG_LOCK(log); 1057{ LOG_LOCK(log);
1055 lmLogSync(log, 1); 1058 lmLogSync(log, hard_sync);
1056 LOG_UNLOCK(log); 1059 LOG_UNLOCK(log);
1057} 1060}
1058 1061
@@ -2359,9 +2362,9 @@ int jfsIOWait(void *arg)
2359 lbmStartIO(bp); 2362 lbmStartIO(bp);
2360 spin_lock_irq(&log_redrive_lock); 2363 spin_lock_irq(&log_redrive_lock);
2361 } 2364 }
2362 if (current->flags & PF_FREEZE) { 2365 if (freezing(current)) {
2363 spin_unlock_irq(&log_redrive_lock); 2366 spin_unlock_irq(&log_redrive_lock);
2364 refrigerator(PF_FREEZE); 2367 refrigerator();
2365 } else { 2368 } else {
2366 add_wait_queue(&jfs_IO_thread_wait, &wq); 2369 add_wait_queue(&jfs_IO_thread_wait, &wq);
2367 set_current_state(TASK_INTERRUPTIBLE); 2370 set_current_state(TASK_INTERRUPTIBLE);
diff --git a/fs/jfs/jfs_logmgr.h b/fs/jfs/jfs_logmgr.h
index 747114cd38b8..e4978b5b65ee 100644
--- a/fs/jfs/jfs_logmgr.h
+++ b/fs/jfs/jfs_logmgr.h
@@ -510,6 +510,6 @@ extern int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize);
510extern int lmGroupCommit(struct jfs_log *, struct tblock *); 510extern int lmGroupCommit(struct jfs_log *, struct tblock *);
511extern int jfsIOWait(void *); 511extern int jfsIOWait(void *);
512extern void jfs_flush_journal(struct jfs_log * log, int wait); 512extern void jfs_flush_journal(struct jfs_log * log, int wait);
513extern void jfs_syncpt(struct jfs_log *log); 513extern void jfs_syncpt(struct jfs_log *log, int hard_sync);
514 514
515#endif /* _H_JFS_LOGMGR */ 515#endif /* _H_JFS_LOGMGR */
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 6c5485d16c39..13d7e3f1feb4 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -561,7 +561,6 @@ static int metapage_releasepage(struct page *page, int gfp_mask)
561 dump_mem("page", page, sizeof(struct page)); 561 dump_mem("page", page, sizeof(struct page));
562 dump_stack(); 562 dump_stack();
563 } 563 }
564 WARN_ON(mp->lsn);
565 if (mp->lsn) 564 if (mp->lsn)
566 remove_from_logsync(mp); 565 remove_from_logsync(mp);
567 remove_metapage(page, mp); 566 remove_metapage(page, mp);
@@ -641,7 +640,7 @@ struct metapage *__get_metapage(struct inode *inode, unsigned long lblock,
641 } else { 640 } else {
642 page = read_cache_page(mapping, page_index, 641 page = read_cache_page(mapping, page_index,
643 (filler_t *)mapping->a_ops->readpage, NULL); 642 (filler_t *)mapping->a_ops->readpage, NULL);
644 if (IS_ERR(page)) { 643 if (IS_ERR(page) || !PageUptodate(page)) {
645 jfs_err("read_cache_page failed!"); 644 jfs_err("read_cache_page failed!");
646 return NULL; 645 return NULL;
647 } 646 }
@@ -783,14 +782,6 @@ void release_metapage(struct metapage * mp)
783 if (test_bit(META_discard, &mp->flag) && !mp->count) { 782 if (test_bit(META_discard, &mp->flag) && !mp->count) {
784 clear_page_dirty(page); 783 clear_page_dirty(page);
785 ClearPageUptodate(page); 784 ClearPageUptodate(page);
786#ifdef _NOT_YET
787 if (page->mapping) {
788 /* Remove from page cache and page cache reference */
789 remove_from_page_cache(page);
790 page_cache_release(page);
791 metapage_releasepage(page, 0);
792 }
793#endif
794 } 785 }
795#else 786#else
796 /* Try to keep metapages from using up too much memory */ 787 /* Try to keep metapages from using up too much memory */
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index 8cbaaff1d5fa..c7a92f9deb2b 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -552,6 +552,11 @@ void txEnd(tid_t tid)
552 * synchronize with logsync barrier 552 * synchronize with logsync barrier
553 */ 553 */
554 if (test_bit(log_SYNCBARRIER, &log->flag)) { 554 if (test_bit(log_SYNCBARRIER, &log->flag)) {
555 TXN_UNLOCK();
556
557 /* write dirty metadata & forward log syncpt */
558 jfs_syncpt(log, 1);
559
555 jfs_info("log barrier off: 0x%x", log->lsn); 560 jfs_info("log barrier off: 0x%x", log->lsn);
556 561
557 /* enable new transactions start */ 562 /* enable new transactions start */
@@ -560,11 +565,6 @@ void txEnd(tid_t tid)
560 /* wakeup all waitors for logsync barrier */ 565 /* wakeup all waitors for logsync barrier */
561 TXN_WAKEUP(&log->syncwait); 566 TXN_WAKEUP(&log->syncwait);
562 567
563 TXN_UNLOCK();
564
565 /* forward log syncpt */
566 jfs_syncpt(log);
567
568 goto wakeup; 568 goto wakeup;
569 } 569 }
570 } 570 }
@@ -657,7 +657,9 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
657 /* only anonymous txn. 657 /* only anonymous txn.
658 * Remove from anon_list 658 * Remove from anon_list
659 */ 659 */
660 TXN_LOCK();
660 list_del_init(&jfs_ip->anon_inode_list); 661 list_del_init(&jfs_ip->anon_inode_list);
662 TXN_UNLOCK();
661 } 663 }
662 jfs_ip->atlhead = tlck->next; 664 jfs_ip->atlhead = tlck->next;
663 } else { 665 } else {
@@ -2788,9 +2790,9 @@ int jfs_lazycommit(void *arg)
2788 /* In case a wakeup came while all threads were active */ 2790 /* In case a wakeup came while all threads were active */
2789 jfs_commit_thread_waking = 0; 2791 jfs_commit_thread_waking = 0;
2790 2792
2791 if (current->flags & PF_FREEZE) { 2793 if (freezing(current)) {
2792 LAZY_UNLOCK(flags); 2794 LAZY_UNLOCK(flags);
2793 refrigerator(PF_FREEZE); 2795 refrigerator();
2794 } else { 2796 } else {
2795 DECLARE_WAITQUEUE(wq, current); 2797 DECLARE_WAITQUEUE(wq, current);
2796 2798
@@ -2987,9 +2989,9 @@ int jfs_sync(void *arg)
2987 /* Add anon_list2 back to anon_list */ 2989 /* Add anon_list2 back to anon_list */
2988 list_splice_init(&TxAnchor.anon_list2, &TxAnchor.anon_list); 2990 list_splice_init(&TxAnchor.anon_list2, &TxAnchor.anon_list);
2989 2991
2990 if (current->flags & PF_FREEZE) { 2992 if (freezing(current)) {
2991 TXN_UNLOCK(); 2993 TXN_UNLOCK();
2992 refrigerator(PF_FREEZE); 2994 refrigerator();
2993 } else { 2995 } else {
2994 DECLARE_WAITQUEUE(wq, current); 2996 DECLARE_WAITQUEUE(wq, current);
2995 2997
diff --git a/fs/jfs/jfs_unicode.c b/fs/jfs/jfs_unicode.c
index b32208aad550..f327decfb155 100644
--- a/fs/jfs/jfs_unicode.c
+++ b/fs/jfs/jfs_unicode.c
@@ -51,8 +51,9 @@ int jfs_strfromUCS_le(char *to, const __le16 * from,
51 } 51 }
52 } else { 52 } else {
53 for (i = 0; (i < len) && from[i]; i++) { 53 for (i = 0; (i < len) && from[i]; i++) {
54 if (le16_to_cpu(from[i]) & 0xff00) { 54 if (unlikely(le16_to_cpu(from[i]) & 0xff00)) {
55 if (warn) { 55 to[i] = '?';
56 if (unlikely(warn)) {
56 warn--; 57 warn--;
57 warn_again--; 58 warn_again--;
58 printk(KERN_ERR 59 printk(KERN_ERR
@@ -61,7 +62,7 @@ int jfs_strfromUCS_le(char *to, const __le16 * from,
61 printk(KERN_ERR 62 printk(KERN_ERR
62 "mount with iocharset=utf8 to access\n"); 63 "mount with iocharset=utf8 to access\n");
63 } 64 }
64 to[i] = '?'; 65
65 } 66 }
66 else 67 else
67 to[i] = (char) (le16_to_cpu(from[i])); 68 to[i] = (char) (le16_to_cpu(from[i]));
diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c
index 31b34db4519e..a7fe2f2b969f 100644
--- a/fs/jfs/jfs_xtree.c
+++ b/fs/jfs/jfs_xtree.c
@@ -135,14 +135,6 @@ static int xtSearchNode(struct inode *ip,
135static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * fp); 135static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * fp);
136#endif /* _STILL_TO_PORT */ 136#endif /* _STILL_TO_PORT */
137 137
138/* External references */
139
140/*
141 * debug control
142 */
143/* #define _JFS_DEBUG_XTREE 1 */
144
145
146/* 138/*
147 * xtLookup() 139 * xtLookup()
148 * 140 *
@@ -4140,338 +4132,6 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
4140 return 0; 4132 return 0;
4141} 4133}
4142 4134
4143
4144#ifdef _JFS_DEBUG_XTREE
4145/*
4146 * xtDisplayTree()
4147 *
4148 * function: traverse forward
4149 */
4150int xtDisplayTree(struct inode *ip)
4151{
4152 int rc = 0;
4153 struct metapage *mp;
4154 xtpage_t *p;
4155 s64 bn, pbn;
4156 int index, lastindex, v, h;
4157 xad_t *xad;
4158 struct btstack btstack;
4159 struct btframe *btsp;
4160 struct btframe *parent;
4161
4162 printk("display B+-tree.\n");
4163
4164 /* clear stack */
4165 btsp = btstack.stack;
4166
4167 /*
4168 * start with root
4169 *
4170 * root resides in the inode
4171 */
4172 bn = 0;
4173 v = h = 0;
4174
4175 /*
4176 * first access of each page:
4177 */
4178 getPage:
4179 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
4180 if (rc)
4181 return rc;
4182
4183 /* process entries forward from first index */
4184 index = XTENTRYSTART;
4185 lastindex = le16_to_cpu(p->header.nextindex) - 1;
4186
4187 if (p->header.flag & BT_INTERNAL) {
4188 /*
4189 * first access of each internal page
4190 */
4191 goto getChild;
4192 } else { /* (p->header.flag & BT_LEAF) */
4193
4194 /*
4195 * first access of each leaf page
4196 */
4197 printf("leaf page ");
4198 xtDisplayPage(ip, bn, p);
4199
4200 /* unpin the leaf page */
4201 XT_PUTPAGE(mp);
4202 }
4203
4204 /*
4205 * go back up to the parent page
4206 */
4207 getParent:
4208 /* pop/restore parent entry for the current child page */
4209 if ((parent = (btsp == btstack.stack ? NULL : --btsp)) == NULL)
4210 /* current page must have been root */
4211 return;
4212
4213 /*
4214 * parent page scan completed
4215 */
4216 if ((index = parent->index) == (lastindex = parent->lastindex)) {
4217 /* go back up to the parent page */
4218 goto getParent;
4219 }
4220
4221 /*
4222 * parent page has entries remaining
4223 */
4224 /* get back the parent page */
4225 bn = parent->bn;
4226 /* v = parent->level; */
4227 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
4228 if (rc)
4229 return rc;
4230
4231 /* get next parent entry */
4232 index++;
4233
4234 /*
4235 * internal page: go down to child page of current entry
4236 */
4237 getChild:
4238 /* push/save current parent entry for the child page */
4239 btsp->bn = pbn = bn;
4240 btsp->index = index;
4241 btsp->lastindex = lastindex;
4242 /* btsp->level = v; */
4243 /* btsp->node = h; */
4244 ++btsp;
4245
4246 /* get child page */
4247 xad = &p->xad[index];
4248 bn = addressXAD(xad);
4249
4250 /*
4251 * first access of each internal entry:
4252 */
4253 /* release parent page */
4254 XT_PUTPAGE(mp);
4255
4256 printk("traverse down 0x%lx[%d]->0x%lx\n", (ulong) pbn, index,
4257 (ulong) bn);
4258 v++;
4259 h = index;
4260
4261 /* process the child page */
4262 goto getPage;
4263}
4264
4265
4266/*
4267 * xtDisplayPage()
4268 *
4269 * function: display page
4270 */
4271int xtDisplayPage(struct inode *ip, s64 bn, xtpage_t * p)
4272{
4273 int rc = 0;
4274 xad_t *xad;
4275 s64 xaddr, xoff;
4276 int xlen, i, j;
4277
4278 /* display page control */
4279 printf("bn:0x%lx flag:0x%x nextindex:%d\n",
4280 (ulong) bn, p->header.flag,
4281 le16_to_cpu(p->header.nextindex));
4282
4283 /* display entries */
4284 xad = &p->xad[XTENTRYSTART];
4285 for (i = XTENTRYSTART, j = 1; i < le16_to_cpu(p->header.nextindex);
4286 i++, xad++, j++) {
4287 xoff = offsetXAD(xad);
4288 xaddr = addressXAD(xad);
4289 xlen = lengthXAD(xad);
4290 printf("\t[%d] 0x%lx:0x%lx(0x%x)", i, (ulong) xoff,
4291 (ulong) xaddr, xlen);
4292
4293 if (j == 4) {
4294 printf("\n");
4295 j = 0;
4296 }
4297 }
4298
4299 printf("\n");
4300}
4301#endif /* _JFS_DEBUG_XTREE */
4302
4303
4304#ifdef _JFS_WIP
4305/*
4306 * xtGather()
4307 *
4308 * function:
4309 * traverse for allocation acquiring tlock at commit time
4310 * (vs at the time of update) logging backward top down
4311 *
4312 * note:
4313 * problem - establishing that all new allocation have been
4314 * processed both for append and random write in sparse file
4315 * at the current entry at the current subtree root page
4316 *
4317 */
4318int xtGather(btree_t *t)
4319{
4320 int rc = 0;
4321 xtpage_t *p;
4322 u64 bn;
4323 int index;
4324 btentry_t *e;
4325 struct btstack btstack;
4326 struct btsf *parent;
4327
4328 /* clear stack */
4329 BT_CLR(&btstack);
4330
4331 /*
4332 * start with root
4333 *
4334 * root resides in the inode
4335 */
4336 bn = 0;
4337 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
4338 if (rc)
4339 return rc;
4340
4341 /* new root is NOT pointed by a new entry
4342 if (p->header.flag & NEW)
4343 allocate new page lock;
4344 write a NEWPAGE log;
4345 */
4346
4347 dopage:
4348 /*
4349 * first access of each page:
4350 */
4351 /* process entries backward from last index */
4352 index = le16_to_cpu(p->header.nextindex) - 1;
4353
4354 if (p->header.flag & BT_LEAF) {
4355 /*
4356 * first access of each leaf page
4357 */
4358 /* process leaf page entries backward */
4359 for (; index >= XTENTRYSTART; index--) {
4360 e = &p->xad[index];
4361 /*
4362 * if newpage, log NEWPAGE.
4363 *
4364 if (e->flag & XAD_NEW) {
4365 nfound =+ entry->length;
4366 update current page lock for the entry;
4367 newpage(entry);
4368 *
4369 * if moved, log move.
4370 *
4371 } else if (e->flag & XAD_MOVED) {
4372 reset flag;
4373 update current page lock for the entry;
4374 }
4375 */
4376 }
4377
4378 /* unpin the leaf page */
4379 XT_PUTPAGE(mp);
4380
4381 /*
4382 * go back up to the parent page
4383 */
4384 getParent:
4385 /* restore parent entry for the current child page */
4386 if ((parent = BT_POP(&btstack)) == NULL)
4387 /* current page must have been root */
4388 return 0;
4389
4390 if ((index = parent->index) == XTENTRYSTART) {
4391 /*
4392 * parent page scan completed
4393 */
4394 /* go back up to the parent page */
4395 goto getParent;
4396 } else {
4397 /*
4398 * parent page has entries remaining
4399 */
4400 /* get back the parent page */
4401 bn = parent->bn;
4402 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
4403 if (rc)
4404 return -EIO;
4405
4406 /* first subroot page which
4407 * covers all new allocated blocks
4408 * itself not new/modified.
4409 * (if modified from split of descendent,
4410 * go down path of split page)
4411
4412 if (nfound == nnew &&
4413 !(p->header.flag & (NEW | MOD)))
4414 exit scan;
4415 */
4416
4417 /* process parent page entries backward */
4418 index--;
4419 }
4420 } else {
4421 /*
4422 * first access of each internal page
4423 */
4424 }
4425
4426 /*
4427 * internal page: go down to child page of current entry
4428 */
4429
4430 /* save current parent entry for the child page */
4431 BT_PUSH(&btstack, bn, index);
4432
4433 /* get current entry for the child page */
4434 e = &p->xad[index];
4435
4436 /*
4437 * first access of each internal entry:
4438 */
4439 /*
4440 * if new entry, log btree_tnewentry.
4441 *
4442 if (e->flag & XAD_NEW)
4443 update parent page lock for the entry;
4444 */
4445
4446 /* release parent page */
4447 XT_PUTPAGE(mp);
4448
4449 /* get child page */
4450 bn = e->bn;
4451 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
4452 if (rc)
4453 return rc;
4454
4455 /*
4456 * first access of each non-root page:
4457 */
4458 /*
4459 * if new, log btree_newpage.
4460 *
4461 if (p->header.flag & NEW)
4462 allocate new page lock;
4463 write a NEWPAGE log (next, prev);
4464 */
4465
4466 /* process the child page */
4467 goto dopage;
4468
4469 out:
4470 return 0;
4471}
4472#endif /* _JFS_WIP */
4473
4474
4475#ifdef CONFIG_JFS_STATISTICS 4135#ifdef CONFIG_JFS_STATISTICS
4476int jfs_xtstat_read(char *buffer, char **start, off_t offset, int length, 4136int jfs_xtstat_read(char *buffer, char **start, off_t offset, int length,
4477 int *eof, void *data) 4137 int *eof, void *data)
diff --git a/fs/jfs/jfs_xtree.h b/fs/jfs/jfs_xtree.h
index a69784254fe7..af668a80b40f 100644
--- a/fs/jfs/jfs_xtree.h
+++ b/fs/jfs/jfs_xtree.h
@@ -131,10 +131,4 @@ extern int xtRelocate(tid_t tid, struct inode *ip,
131extern int xtAppend(tid_t tid, 131extern int xtAppend(tid_t tid,
132 struct inode *ip, int xflag, s64 xoff, int maxblocks, 132 struct inode *ip, int xflag, s64 xoff, int maxblocks,
133 int *xlenp, s64 * xaddrp, int flag); 133 int *xlenp, s64 * xaddrp, int flag);
134
135#ifdef _JFS_DEBUG_XTREE
136extern int xtDisplayTree(struct inode *ip);
137extern int xtDisplayPage(struct inode *ip, s64 bn, xtpage_t * p);
138#endif /* _JFS_DEBUG_XTREE */
139
140#endif /* !_H_JFS_XTREE */ 134#endif /* !_H_JFS_XTREE */
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 810a3653d8b3..9ff89720f93b 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -24,6 +24,7 @@
24#include <linux/completion.h> 24#include <linux/completion.h>
25#include <linux/vfs.h> 25#include <linux/vfs.h>
26#include <linux/moduleparam.h> 26#include <linux/moduleparam.h>
27#include <linux/posix_acl.h>
27#include <asm/uaccess.h> 28#include <asm/uaccess.h>
28 29
29#include "jfs_incore.h" 30#include "jfs_incore.h"
@@ -113,6 +114,8 @@ static void jfs_destroy_inode(struct inode *inode)
113{ 114{
114 struct jfs_inode_info *ji = JFS_IP(inode); 115 struct jfs_inode_info *ji = JFS_IP(inode);
115 116
117 BUG_ON(!list_empty(&ji->anon_inode_list));
118
116 spin_lock_irq(&ji->ag_lock); 119 spin_lock_irq(&ji->ag_lock);
117 if (ji->active_ag != -1) { 120 if (ji->active_ag != -1) {
118 struct bmap *bmap = JFS_SBI(inode->i_sb)->bmap; 121 struct bmap *bmap = JFS_SBI(inode->i_sb)->bmap;
@@ -530,7 +533,7 @@ static int jfs_sync_fs(struct super_block *sb, int wait)
530 /* log == NULL indicates read-only mount */ 533 /* log == NULL indicates read-only mount */
531 if (log) { 534 if (log) {
532 jfs_flush_journal(log, wait); 535 jfs_flush_journal(log, wait);
533 jfs_syncpt(log); 536 jfs_syncpt(log, 0);
534 } 537 }
535 538
536 return 0; 539 return 0;
diff --git a/fs/jfs/symlink.c b/fs/jfs/symlink.c
index 287d8d6c3cfd..16477b3835e1 100644
--- a/fs/jfs/symlink.c
+++ b/fs/jfs/symlink.c
@@ -22,11 +22,11 @@
22#include "jfs_inode.h" 22#include "jfs_inode.h"
23#include "jfs_xattr.h" 23#include "jfs_xattr.h"
24 24
25static int jfs_follow_link(struct dentry *dentry, struct nameidata *nd) 25static void *jfs_follow_link(struct dentry *dentry, struct nameidata *nd)
26{ 26{
27 char *s = JFS_IP(dentry->d_inode)->i_inline; 27 char *s = JFS_IP(dentry->d_inode)->i_inline;
28 nd_set_link(nd, s); 28 nd_set_link(nd, s);
29 return 0; 29 return NULL;
30} 30}
31 31
32struct inode_operations jfs_symlink_inode_operations = { 32struct inode_operations jfs_symlink_inode_operations = {
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index 6016373701a3..554ec739e49b 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -19,6 +19,7 @@
19 19
20#include <linux/fs.h> 20#include <linux/fs.h>
21#include <linux/xattr.h> 21#include <linux/xattr.h>
22#include <linux/posix_acl_xattr.h>
22#include <linux/quotaops.h> 23#include <linux/quotaops.h>
23#include "jfs_incore.h" 24#include "jfs_incore.h"
24#include "jfs_superblock.h" 25#include "jfs_superblock.h"
@@ -718,9 +719,9 @@ static int can_set_system_xattr(struct inode *inode, const char *name,
718 return -EPERM; 719 return -EPERM;
719 720
720 /* 721 /*
721 * XATTR_NAME_ACL_ACCESS is tied to i_mode 722 * POSIX_ACL_XATTR_ACCESS is tied to i_mode
722 */ 723 */
723 if (strcmp(name, XATTR_NAME_ACL_ACCESS) == 0) { 724 if (strcmp(name, POSIX_ACL_XATTR_ACCESS) == 0) {
724 acl = posix_acl_from_xattr(value, value_len); 725 acl = posix_acl_from_xattr(value, value_len);
725 if (IS_ERR(acl)) { 726 if (IS_ERR(acl)) {
726 rc = PTR_ERR(acl); 727 rc = PTR_ERR(acl);
@@ -750,7 +751,7 @@ static int can_set_system_xattr(struct inode *inode, const char *name,
750 JFS_IP(inode)->i_acl = JFS_ACL_NOT_CACHED; 751 JFS_IP(inode)->i_acl = JFS_ACL_NOT_CACHED;
751 752
752 return 0; 753 return 0;
753 } else if (strcmp(name, XATTR_NAME_ACL_DEFAULT) == 0) { 754 } else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0) {
754 acl = posix_acl_from_xattr(value, value_len); 755 acl = posix_acl_from_xattr(value, value_len);
755 if (IS_ERR(acl)) { 756 if (IS_ERR(acl)) {
756 rc = PTR_ERR(acl); 757 rc = PTR_ERR(acl);
@@ -780,7 +781,7 @@ static int can_set_xattr(struct inode *inode, const char *name,
780 if (IS_RDONLY(inode)) 781 if (IS_RDONLY(inode))
781 return -EROFS; 782 return -EROFS;
782 783
783 if (IS_IMMUTABLE(inode) || IS_APPEND(inode) || S_ISLNK(inode->i_mode)) 784 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
784 return -EPERM; 785 return -EPERM;
785 786
786 if(strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) == 0) 787 if(strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) == 0)
@@ -789,12 +790,12 @@ static int can_set_xattr(struct inode *inode, const char *name,
789 */ 790 */
790 return can_set_system_xattr(inode, name, value, value_len); 791 return can_set_system_xattr(inode, name, value, value_len);
791 792
792 if(strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) != 0) 793 if(strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) == 0)
793 return (capable(CAP_SYS_ADMIN) ? 0 : -EPERM); 794 return (capable(CAP_SYS_ADMIN) ? 0 : -EPERM);
794 795
795#ifdef CONFIG_JFS_SECURITY 796#ifdef CONFIG_JFS_SECURITY
796 if (strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) 797 if (strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN)
797 != 0) 798 == 0)
798 return 0; /* Leave it to the security module */ 799 return 0; /* Leave it to the security module */
799#endif 800#endif
800 801
diff --git a/fs/libfs.c b/fs/libfs.c
index 5025563e7379..58101dff2c66 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -183,6 +183,7 @@ struct file_operations simple_dir_operations = {
183 .llseek = dcache_dir_lseek, 183 .llseek = dcache_dir_lseek,
184 .read = generic_read_dir, 184 .read = generic_read_dir,
185 .readdir = dcache_readdir, 185 .readdir = dcache_readdir,
186 .fsync = simple_sync_file,
186}; 187};
187 188
188struct inode_operations simple_dir_inode_operations = { 189struct inode_operations simple_dir_inode_operations = {
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index fd77ed1d710d..14b3ce87fa29 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -313,7 +313,7 @@ static int nlm_wait_on_grace(wait_queue_head_t *queue)
313 prepare_to_wait(queue, &wait, TASK_INTERRUPTIBLE); 313 prepare_to_wait(queue, &wait, TASK_INTERRUPTIBLE);
314 if (!signalled ()) { 314 if (!signalled ()) {
315 schedule_timeout(NLMCLNT_GRACE_WAIT); 315 schedule_timeout(NLMCLNT_GRACE_WAIT);
316 try_to_freeze(PF_FREEZE); 316 try_to_freeze();
317 if (!signalled ()) 317 if (!signalled ())
318 status = 0; 318 status = 0;
319 } 319 }
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index b82e470912e8..12a857c29e25 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -191,7 +191,9 @@ lockd(struct svc_rqst *rqstp)
191 printk(KERN_DEBUG 191 printk(KERN_DEBUG
192 "lockd: new process, skipping host shutdown\n"); 192 "lockd: new process, skipping host shutdown\n");
193 wake_up(&lockd_exit); 193 wake_up(&lockd_exit);
194 194
195 flush_signals(current);
196
195 /* Exit the RPC thread */ 197 /* Exit the RPC thread */
196 svc_exit_thread(rqstp); 198 svc_exit_thread(rqstp);
197 199
@@ -329,7 +331,7 @@ static ctl_table nlm_sysctls[] = {
329 .ctl_name = CTL_UNNUMBERED, 331 .ctl_name = CTL_UNNUMBERED,
330 .procname = "nlm_grace_period", 332 .procname = "nlm_grace_period",
331 .data = &nlm_grace_period, 333 .data = &nlm_grace_period,
332 .maxlen = sizeof(int), 334 .maxlen = sizeof(unsigned long),
333 .mode = 0644, 335 .mode = 0644,
334 .proc_handler = &proc_doulongvec_minmax, 336 .proc_handler = &proc_doulongvec_minmax,
335 .extra1 = (unsigned long *) &nlm_grace_period_min, 337 .extra1 = (unsigned long *) &nlm_grace_period_min,
@@ -339,7 +341,7 @@ static ctl_table nlm_sysctls[] = {
339 .ctl_name = CTL_UNNUMBERED, 341 .ctl_name = CTL_UNNUMBERED,
340 .procname = "nlm_timeout", 342 .procname = "nlm_timeout",
341 .data = &nlm_timeout, 343 .data = &nlm_timeout,
342 .maxlen = sizeof(int), 344 .maxlen = sizeof(unsigned long),
343 .mode = 0644, 345 .mode = 0644,
344 .proc_handler = &proc_doulongvec_minmax, 346 .proc_handler = &proc_doulongvec_minmax,
345 .extra1 = (unsigned long *) &nlm_timeout_min, 347 .extra1 = (unsigned long *) &nlm_timeout_min,
diff --git a/fs/locks.c b/fs/locks.c
index a0bc03495bd4..11956b6179ff 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1276,7 +1276,7 @@ int fcntl_getlease(struct file *filp)
1276 */ 1276 */
1277static int __setlease(struct file *filp, long arg, struct file_lock **flp) 1277static int __setlease(struct file *filp, long arg, struct file_lock **flp)
1278{ 1278{
1279 struct file_lock *fl, **before, **my_before = NULL, *lease = *flp; 1279 struct file_lock *fl, **before, **my_before = NULL, *lease;
1280 struct dentry *dentry = filp->f_dentry; 1280 struct dentry *dentry = filp->f_dentry;
1281 struct inode *inode = dentry->d_inode; 1281 struct inode *inode = dentry->d_inode;
1282 int error, rdlease_count = 0, wrlease_count = 0; 1282 int error, rdlease_count = 0, wrlease_count = 0;
@@ -1287,6 +1287,8 @@ static int __setlease(struct file *filp, long arg, struct file_lock **flp)
1287 if (!flp || !(*flp) || !(*flp)->fl_lmops || !(*flp)->fl_lmops->fl_break) 1287 if (!flp || !(*flp) || !(*flp)->fl_lmops || !(*flp)->fl_lmops->fl_break)
1288 goto out; 1288 goto out;
1289 1289
1290 lease = *flp;
1291
1290 error = -EAGAIN; 1292 error = -EAGAIN;
1291 if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) 1293 if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0))
1292 goto out; 1294 goto out;
@@ -1589,7 +1591,8 @@ out:
1589/* Apply the lock described by l to an open file descriptor. 1591/* Apply the lock described by l to an open file descriptor.
1590 * This implements both the F_SETLK and F_SETLKW commands of fcntl(). 1592 * This implements both the F_SETLK and F_SETLKW commands of fcntl().
1591 */ 1593 */
1592int fcntl_setlk(struct file *filp, unsigned int cmd, struct flock __user *l) 1594int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
1595 struct flock __user *l)
1593{ 1596{
1594 struct file_lock *file_lock = locks_alloc_lock(); 1597 struct file_lock *file_lock = locks_alloc_lock();
1595 struct flock flock; 1598 struct flock flock;
@@ -1618,6 +1621,7 @@ int fcntl_setlk(struct file *filp, unsigned int cmd, struct flock __user *l)
1618 goto out; 1621 goto out;
1619 } 1622 }
1620 1623
1624again:
1621 error = flock_to_posix_lock(filp, file_lock, &flock); 1625 error = flock_to_posix_lock(filp, file_lock, &flock);
1622 if (error) 1626 if (error)
1623 goto out; 1627 goto out;
@@ -1646,25 +1650,33 @@ int fcntl_setlk(struct file *filp, unsigned int cmd, struct flock __user *l)
1646 if (error) 1650 if (error)
1647 goto out; 1651 goto out;
1648 1652
1649 if (filp->f_op && filp->f_op->lock != NULL) { 1653 if (filp->f_op && filp->f_op->lock != NULL)
1650 error = filp->f_op->lock(filp, cmd, file_lock); 1654 error = filp->f_op->lock(filp, cmd, file_lock);
1651 goto out; 1655 else {
1652 } 1656 for (;;) {
1657 error = __posix_lock_file(inode, file_lock);
1658 if ((error != -EAGAIN) || (cmd == F_SETLK))
1659 break;
1660 error = wait_event_interruptible(file_lock->fl_wait,
1661 !file_lock->fl_next);
1662 if (!error)
1663 continue;
1653 1664
1654 for (;;) { 1665 locks_delete_block(file_lock);
1655 error = __posix_lock_file(inode, file_lock);
1656 if ((error != -EAGAIN) || (cmd == F_SETLK))
1657 break; 1666 break;
1658 error = wait_event_interruptible(file_lock->fl_wait, 1667 }
1659 !file_lock->fl_next); 1668 }
1660 if (!error)
1661 continue;
1662 1669
1663 locks_delete_block(file_lock); 1670 /*
1664 break; 1671 * Attempt to detect a close/fcntl race and recover by
1672 * releasing the lock that was just acquired.
1673 */
1674 if (!error && fcheck(fd) != filp && flock.l_type != F_UNLCK) {
1675 flock.l_type = F_UNLCK;
1676 goto again;
1665 } 1677 }
1666 1678
1667 out: 1679out:
1668 locks_free_lock(file_lock); 1680 locks_free_lock(file_lock);
1669 return error; 1681 return error;
1670} 1682}
@@ -1722,7 +1734,8 @@ out:
1722/* Apply the lock described by l to an open file descriptor. 1734/* Apply the lock described by l to an open file descriptor.
1723 * This implements both the F_SETLK and F_SETLKW commands of fcntl(). 1735 * This implements both the F_SETLK and F_SETLKW commands of fcntl().
1724 */ 1736 */
1725int fcntl_setlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l) 1737int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
1738 struct flock64 __user *l)
1726{ 1739{
1727 struct file_lock *file_lock = locks_alloc_lock(); 1740 struct file_lock *file_lock = locks_alloc_lock();
1728 struct flock64 flock; 1741 struct flock64 flock;
@@ -1751,6 +1764,7 @@ int fcntl_setlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l)
1751 goto out; 1764 goto out;
1752 } 1765 }
1753 1766
1767again:
1754 error = flock64_to_posix_lock(filp, file_lock, &flock); 1768 error = flock64_to_posix_lock(filp, file_lock, &flock);
1755 if (error) 1769 if (error)
1756 goto out; 1770 goto out;
@@ -1779,22 +1793,30 @@ int fcntl_setlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l)
1779 if (error) 1793 if (error)
1780 goto out; 1794 goto out;
1781 1795
1782 if (filp->f_op && filp->f_op->lock != NULL) { 1796 if (filp->f_op && filp->f_op->lock != NULL)
1783 error = filp->f_op->lock(filp, cmd, file_lock); 1797 error = filp->f_op->lock(filp, cmd, file_lock);
1784 goto out; 1798 else {
1785 } 1799 for (;;) {
1800 error = __posix_lock_file(inode, file_lock);
1801 if ((error != -EAGAIN) || (cmd == F_SETLK64))
1802 break;
1803 error = wait_event_interruptible(file_lock->fl_wait,
1804 !file_lock->fl_next);
1805 if (!error)
1806 continue;
1786 1807
1787 for (;;) { 1808 locks_delete_block(file_lock);
1788 error = __posix_lock_file(inode, file_lock);
1789 if ((error != -EAGAIN) || (cmd == F_SETLK64))
1790 break; 1809 break;
1791 error = wait_event_interruptible(file_lock->fl_wait, 1810 }
1792 !file_lock->fl_next); 1811 }
1793 if (!error)
1794 continue;
1795 1812
1796 locks_delete_block(file_lock); 1813 /*
1797 break; 1814 * Attempt to detect a close/fcntl race and recover by
1815 * releasing the lock that was just acquired.
1816 */
1817 if (!error && fcheck(fd) != filp && flock.l_type != F_UNLCK) {
1818 flock.l_type = F_UNLCK;
1819 goto again;
1798 } 1820 }
1799 1821
1800out: 1822out:
@@ -1886,12 +1908,7 @@ void locks_remove_flock(struct file *filp)
1886 1908
1887 while ((fl = *before) != NULL) { 1909 while ((fl = *before) != NULL) {
1888 if (fl->fl_file == filp) { 1910 if (fl->fl_file == filp) {
1889 /* 1911 if (IS_FLOCK(fl)) {
1890 * We might have a POSIX lock that was created at the same time
1891 * the filp was closed for the last time. Just remove that too,
1892 * regardless of ownership, since nobody can own it.
1893 */
1894 if (IS_FLOCK(fl) || IS_POSIX(fl)) {
1895 locks_delete_lock(before); 1912 locks_delete_lock(before);
1896 continue; 1913 continue;
1897 } 1914 }
diff --git a/fs/mbcache.c b/fs/mbcache.c
index c7170b9221a3..b002a088857d 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -316,11 +316,10 @@ fail:
316 * currently in use cannot be freed, and thus remain in the cache. All others 316 * currently in use cannot be freed, and thus remain in the cache. All others
317 * are freed. 317 * are freed.
318 * 318 *
319 * @cache: which cache to shrink
320 * @bdev: which device's cache entries to shrink 319 * @bdev: which device's cache entries to shrink
321 */ 320 */
322void 321void
323mb_cache_shrink(struct mb_cache *cache, struct block_device *bdev) 322mb_cache_shrink(struct block_device *bdev)
324{ 323{
325 LIST_HEAD(free_list); 324 LIST_HEAD(free_list);
326 struct list_head *l, *ltmp; 325 struct list_head *l, *ltmp;
diff --git a/fs/namei.c b/fs/namei.c
index a7f7f44119b3..6ec1f0fefc5b 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -21,7 +21,7 @@
21#include <linux/namei.h> 21#include <linux/namei.h>
22#include <linux/quotaops.h> 22#include <linux/quotaops.h>
23#include <linux/pagemap.h> 23#include <linux/pagemap.h>
24#include <linux/dnotify.h> 24#include <linux/fsnotify.h>
25#include <linux/smp_lock.h> 25#include <linux/smp_lock.h>
26#include <linux/personality.h> 26#include <linux/personality.h>
27#include <linux/security.h> 27#include <linux/security.h>
@@ -314,7 +314,7 @@ void path_release(struct nameidata *nd)
314void path_release_on_umount(struct nameidata *nd) 314void path_release_on_umount(struct nameidata *nd)
315{ 315{
316 dput(nd->dentry); 316 dput(nd->dentry);
317 _mntput(nd->mnt); 317 mntput_no_expire(nd->mnt);
318} 318}
319 319
320/* 320/*
@@ -501,6 +501,7 @@ struct path {
501static inline int __do_follow_link(struct path *path, struct nameidata *nd) 501static inline int __do_follow_link(struct path *path, struct nameidata *nd)
502{ 502{
503 int error; 503 int error;
504 void *cookie;
504 struct dentry *dentry = path->dentry; 505 struct dentry *dentry = path->dentry;
505 506
506 touch_atime(path->mnt, dentry); 507 touch_atime(path->mnt, dentry);
@@ -508,13 +509,15 @@ static inline int __do_follow_link(struct path *path, struct nameidata *nd)
508 509
509 if (path->mnt == nd->mnt) 510 if (path->mnt == nd->mnt)
510 mntget(path->mnt); 511 mntget(path->mnt);
511 error = dentry->d_inode->i_op->follow_link(dentry, nd); 512 cookie = dentry->d_inode->i_op->follow_link(dentry, nd);
512 if (!error) { 513 error = PTR_ERR(cookie);
514 if (!IS_ERR(cookie)) {
513 char *s = nd_get_link(nd); 515 char *s = nd_get_link(nd);
516 error = 0;
514 if (s) 517 if (s)
515 error = __vfs_follow_link(nd, s); 518 error = __vfs_follow_link(nd, s);
516 if (dentry->d_inode->i_op->put_link) 519 if (dentry->d_inode->i_op->put_link)
517 dentry->d_inode->i_op->put_link(dentry, nd); 520 dentry->d_inode->i_op->put_link(dentry, nd, cookie);
518 } 521 }
519 dput(dentry); 522 dput(dentry);
520 mntput(path->mnt); 523 mntput(path->mnt);
@@ -1312,7 +1315,7 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
1312 DQUOT_INIT(dir); 1315 DQUOT_INIT(dir);
1313 error = dir->i_op->create(dir, dentry, mode, nd); 1316 error = dir->i_op->create(dir, dentry, mode, nd);
1314 if (!error) { 1317 if (!error) {
1315 inode_dir_notify(dir, DN_CREATE); 1318 fsnotify_create(dir, dentry->d_name.name);
1316 security_inode_post_create(dir, dentry, mode); 1319 security_inode_post_create(dir, dentry, mode);
1317 } 1320 }
1318 return error; 1321 return error;
@@ -1577,19 +1580,35 @@ do_link:
1577 * 1580 *
1578 * Simple function to lookup and return a dentry and create it 1581 * Simple function to lookup and return a dentry and create it
1579 * if it doesn't exist. Is SMP-safe. 1582 * if it doesn't exist. Is SMP-safe.
1583 *
1584 * Returns with nd->dentry->d_inode->i_sem locked.
1580 */ 1585 */
1581struct dentry *lookup_create(struct nameidata *nd, int is_dir) 1586struct dentry *lookup_create(struct nameidata *nd, int is_dir)
1582{ 1587{
1583 struct dentry *dentry; 1588 struct dentry *dentry = ERR_PTR(-EEXIST);
1584 1589
1585 down(&nd->dentry->d_inode->i_sem); 1590 down(&nd->dentry->d_inode->i_sem);
1586 dentry = ERR_PTR(-EEXIST); 1591 /*
1592 * Yucky last component or no last component at all?
1593 * (foo/., foo/.., /////)
1594 */
1587 if (nd->last_type != LAST_NORM) 1595 if (nd->last_type != LAST_NORM)
1588 goto fail; 1596 goto fail;
1589 nd->flags &= ~LOOKUP_PARENT; 1597 nd->flags &= ~LOOKUP_PARENT;
1598
1599 /*
1600 * Do the final lookup.
1601 */
1590 dentry = lookup_hash(&nd->last, nd->dentry); 1602 dentry = lookup_hash(&nd->last, nd->dentry);
1591 if (IS_ERR(dentry)) 1603 if (IS_ERR(dentry))
1592 goto fail; 1604 goto fail;
1605
1606 /*
1607 * Special case - lookup gave negative, but... we had foo/bar/
1608 * From the vfs_mknod() POV we just have a negative dentry -
1609 * all is fine. Let's be bastards - you had / on the end, you've
1610 * been asking for (non-existent) directory. -ENOENT for you.
1611 */
1593 if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode) 1612 if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
1594 goto enoent; 1613 goto enoent;
1595 return dentry; 1614 return dentry;
@@ -1621,7 +1640,7 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
1621 DQUOT_INIT(dir); 1640 DQUOT_INIT(dir);
1622 error = dir->i_op->mknod(dir, dentry, mode, dev); 1641 error = dir->i_op->mknod(dir, dentry, mode, dev);
1623 if (!error) { 1642 if (!error) {
1624 inode_dir_notify(dir, DN_CREATE); 1643 fsnotify_create(dir, dentry->d_name.name);
1625 security_inode_post_mknod(dir, dentry, mode, dev); 1644 security_inode_post_mknod(dir, dentry, mode, dev);
1626 } 1645 }
1627 return error; 1646 return error;
@@ -1694,7 +1713,7 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1694 DQUOT_INIT(dir); 1713 DQUOT_INIT(dir);
1695 error = dir->i_op->mkdir(dir, dentry, mode); 1714 error = dir->i_op->mkdir(dir, dentry, mode);
1696 if (!error) { 1715 if (!error) {
1697 inode_dir_notify(dir, DN_CREATE); 1716 fsnotify_mkdir(dir, dentry->d_name.name);
1698 security_inode_post_mkdir(dir,dentry, mode); 1717 security_inode_post_mkdir(dir,dentry, mode);
1699 } 1718 }
1700 return error; 1719 return error;
@@ -1785,7 +1804,6 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
1785 } 1804 }
1786 up(&dentry->d_inode->i_sem); 1805 up(&dentry->d_inode->i_sem);
1787 if (!error) { 1806 if (!error) {
1788 inode_dir_notify(dir, DN_DELETE);
1789 d_delete(dentry); 1807 d_delete(dentry);
1790 } 1808 }
1791 dput(dentry); 1809 dput(dentry);
@@ -1859,8 +1877,8 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry)
1859 /* We don't d_delete() NFS sillyrenamed files--they still exist. */ 1877 /* We don't d_delete() NFS sillyrenamed files--they still exist. */
1860 if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) { 1878 if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) {
1861 d_delete(dentry); 1879 d_delete(dentry);
1862 inode_dir_notify(dir, DN_DELETE);
1863 } 1880 }
1881
1864 return error; 1882 return error;
1865} 1883}
1866 1884
@@ -1934,7 +1952,7 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, i
1934 DQUOT_INIT(dir); 1952 DQUOT_INIT(dir);
1935 error = dir->i_op->symlink(dir, dentry, oldname); 1953 error = dir->i_op->symlink(dir, dentry, oldname);
1936 if (!error) { 1954 if (!error) {
1937 inode_dir_notify(dir, DN_CREATE); 1955 fsnotify_create(dir, dentry->d_name.name);
1938 security_inode_post_symlink(dir, dentry, oldname); 1956 security_inode_post_symlink(dir, dentry, oldname);
1939 } 1957 }
1940 return error; 1958 return error;
@@ -2007,7 +2025,7 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
2007 error = dir->i_op->link(old_dentry, dir, new_dentry); 2025 error = dir->i_op->link(old_dentry, dir, new_dentry);
2008 up(&old_dentry->d_inode->i_sem); 2026 up(&old_dentry->d_inode->i_sem);
2009 if (!error) { 2027 if (!error) {
2010 inode_dir_notify(dir, DN_CREATE); 2028 fsnotify_create(dir, new_dentry->d_name.name);
2011 security_inode_post_link(old_dentry, dir, new_dentry); 2029 security_inode_post_link(old_dentry, dir, new_dentry);
2012 } 2030 }
2013 return error; 2031 return error;
@@ -2171,6 +2189,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
2171{ 2189{
2172 int error; 2190 int error;
2173 int is_dir = S_ISDIR(old_dentry->d_inode->i_mode); 2191 int is_dir = S_ISDIR(old_dentry->d_inode->i_mode);
2192 const char *old_name;
2174 2193
2175 if (old_dentry->d_inode == new_dentry->d_inode) 2194 if (old_dentry->d_inode == new_dentry->d_inode)
2176 return 0; 2195 return 0;
@@ -2192,18 +2211,19 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
2192 DQUOT_INIT(old_dir); 2211 DQUOT_INIT(old_dir);
2193 DQUOT_INIT(new_dir); 2212 DQUOT_INIT(new_dir);
2194 2213
2214 old_name = fsnotify_oldname_init(old_dentry->d_name.name);
2215
2195 if (is_dir) 2216 if (is_dir)
2196 error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry); 2217 error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
2197 else 2218 else
2198 error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry); 2219 error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
2199 if (!error) { 2220 if (!error) {
2200 if (old_dir == new_dir) 2221 const char *new_name = old_dentry->d_name.name;
2201 inode_dir_notify(old_dir, DN_RENAME); 2222 fsnotify_move(old_dir, new_dir, old_name, new_name, is_dir,
2202 else { 2223 new_dentry->d_inode, old_dentry->d_inode);
2203 inode_dir_notify(old_dir, DN_DELETE);
2204 inode_dir_notify(new_dir, DN_CREATE);
2205 }
2206 } 2224 }
2225 fsnotify_oldname_free(old_name);
2226
2207 return error; 2227 return error;
2208} 2228}
2209 2229
@@ -2327,15 +2347,17 @@ out:
2327int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen) 2347int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen)
2328{ 2348{
2329 struct nameidata nd; 2349 struct nameidata nd;
2330 int res; 2350 void *cookie;
2351
2331 nd.depth = 0; 2352 nd.depth = 0;
2332 res = dentry->d_inode->i_op->follow_link(dentry, &nd); 2353 cookie = dentry->d_inode->i_op->follow_link(dentry, &nd);
2333 if (!res) { 2354 if (!IS_ERR(cookie)) {
2334 res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd)); 2355 int res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd));
2335 if (dentry->d_inode->i_op->put_link) 2356 if (dentry->d_inode->i_op->put_link)
2336 dentry->d_inode->i_op->put_link(dentry, &nd); 2357 dentry->d_inode->i_op->put_link(dentry, &nd, cookie);
2358 cookie = ERR_PTR(res);
2337 } 2359 }
2338 return res; 2360 return PTR_ERR(cookie);
2339} 2361}
2340 2362
2341int vfs_follow_link(struct nameidata *nd, const char *link) 2363int vfs_follow_link(struct nameidata *nd, const char *link)
@@ -2378,23 +2400,20 @@ int page_readlink(struct dentry *dentry, char __user *buffer, int buflen)
2378 return res; 2400 return res;
2379} 2401}
2380 2402
2381int page_follow_link_light(struct dentry *dentry, struct nameidata *nd) 2403void *page_follow_link_light(struct dentry *dentry, struct nameidata *nd)
2382{ 2404{
2383 struct page *page; 2405 struct page *page = NULL;
2384 nd_set_link(nd, page_getlink(dentry, &page)); 2406 nd_set_link(nd, page_getlink(dentry, &page));
2385 return 0; 2407 return page;
2386} 2408}
2387 2409
2388void page_put_link(struct dentry *dentry, struct nameidata *nd) 2410void page_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
2389{ 2411{
2390 if (!IS_ERR(nd_get_link(nd))) { 2412 struct page *page = cookie;
2391 struct page *page; 2413
2392 page = find_get_page(dentry->d_inode->i_mapping, 0); 2414 if (page) {
2393 if (!page)
2394 BUG();
2395 kunmap(page); 2415 kunmap(page);
2396 page_cache_release(page); 2416 page_cache_release(page);
2397 page_cache_release(page);
2398 } 2417 }
2399} 2418}
2400 2419
diff --git a/fs/namespace.c b/fs/namespace.c
index 3b93e5d750eb..79bd8a46e1e7 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -61,7 +61,7 @@ struct vfsmount *alloc_vfsmnt(const char *name)
61 INIT_LIST_HEAD(&mnt->mnt_child); 61 INIT_LIST_HEAD(&mnt->mnt_child);
62 INIT_LIST_HEAD(&mnt->mnt_mounts); 62 INIT_LIST_HEAD(&mnt->mnt_mounts);
63 INIT_LIST_HEAD(&mnt->mnt_list); 63 INIT_LIST_HEAD(&mnt->mnt_list);
64 INIT_LIST_HEAD(&mnt->mnt_fslink); 64 INIT_LIST_HEAD(&mnt->mnt_expire);
65 if (name) { 65 if (name) {
66 int size = strlen(name)+1; 66 int size = strlen(name)+1;
67 char *newname = kmalloc(size, GFP_KERNEL); 67 char *newname = kmalloc(size, GFP_KERNEL);
@@ -160,13 +160,13 @@ clone_mnt(struct vfsmount *old, struct dentry *root)
160 mnt->mnt_root = dget(root); 160 mnt->mnt_root = dget(root);
161 mnt->mnt_mountpoint = mnt->mnt_root; 161 mnt->mnt_mountpoint = mnt->mnt_root;
162 mnt->mnt_parent = mnt; 162 mnt->mnt_parent = mnt;
163 mnt->mnt_namespace = old->mnt_namespace; 163 mnt->mnt_namespace = current->namespace;
164 164
165 /* stick the duplicate mount on the same expiry list 165 /* stick the duplicate mount on the same expiry list
166 * as the original if that was on one */ 166 * as the original if that was on one */
167 spin_lock(&vfsmount_lock); 167 spin_lock(&vfsmount_lock);
168 if (!list_empty(&old->mnt_fslink)) 168 if (!list_empty(&old->mnt_expire))
169 list_add(&mnt->mnt_fslink, &old->mnt_fslink); 169 list_add(&mnt->mnt_expire, &old->mnt_expire);
170 spin_unlock(&vfsmount_lock); 170 spin_unlock(&vfsmount_lock);
171 } 171 }
172 return mnt; 172 return mnt;
@@ -337,7 +337,7 @@ int may_umount(struct vfsmount *mnt)
337 337
338EXPORT_SYMBOL(may_umount); 338EXPORT_SYMBOL(may_umount);
339 339
340void umount_tree(struct vfsmount *mnt) 340static void umount_tree(struct vfsmount *mnt)
341{ 341{
342 struct vfsmount *p; 342 struct vfsmount *p;
343 LIST_HEAD(kill); 343 LIST_HEAD(kill);
@@ -345,12 +345,13 @@ void umount_tree(struct vfsmount *mnt)
345 for (p = mnt; p; p = next_mnt(p, mnt)) { 345 for (p = mnt; p; p = next_mnt(p, mnt)) {
346 list_del(&p->mnt_list); 346 list_del(&p->mnt_list);
347 list_add(&p->mnt_list, &kill); 347 list_add(&p->mnt_list, &kill);
348 p->mnt_namespace = NULL;
348 } 349 }
349 350
350 while (!list_empty(&kill)) { 351 while (!list_empty(&kill)) {
351 mnt = list_entry(kill.next, struct vfsmount, mnt_list); 352 mnt = list_entry(kill.next, struct vfsmount, mnt_list);
352 list_del_init(&mnt->mnt_list); 353 list_del_init(&mnt->mnt_list);
353 list_del_init(&mnt->mnt_fslink); 354 list_del_init(&mnt->mnt_expire);
354 if (mnt->mnt_parent == mnt) { 355 if (mnt->mnt_parent == mnt) {
355 spin_unlock(&vfsmount_lock); 356 spin_unlock(&vfsmount_lock);
356 } else { 357 } else {
@@ -644,7 +645,7 @@ static int do_loopback(struct nameidata *nd, char *old_name, int recurse)
644 if (mnt) { 645 if (mnt) {
645 /* stop bind mounts from expiring */ 646 /* stop bind mounts from expiring */
646 spin_lock(&vfsmount_lock); 647 spin_lock(&vfsmount_lock);
647 list_del_init(&mnt->mnt_fslink); 648 list_del_init(&mnt->mnt_expire);
648 spin_unlock(&vfsmount_lock); 649 spin_unlock(&vfsmount_lock);
649 650
650 err = graft_tree(mnt, nd); 651 err = graft_tree(mnt, nd);
@@ -743,7 +744,7 @@ static int do_move_mount(struct nameidata *nd, char *old_name)
743 744
744 /* if the mount is moved, it should no longer be expire 745 /* if the mount is moved, it should no longer be expire
745 * automatically */ 746 * automatically */
746 list_del_init(&old_nd.mnt->mnt_fslink); 747 list_del_init(&old_nd.mnt->mnt_expire);
747out2: 748out2:
748 spin_unlock(&vfsmount_lock); 749 spin_unlock(&vfsmount_lock);
749out1: 750out1:
@@ -807,12 +808,13 @@ int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd,
807 goto unlock; 808 goto unlock;
808 809
809 newmnt->mnt_flags = mnt_flags; 810 newmnt->mnt_flags = mnt_flags;
811 newmnt->mnt_namespace = current->namespace;
810 err = graft_tree(newmnt, nd); 812 err = graft_tree(newmnt, nd);
811 813
812 if (err == 0 && fslist) { 814 if (err == 0 && fslist) {
813 /* add to the specified expiration list */ 815 /* add to the specified expiration list */
814 spin_lock(&vfsmount_lock); 816 spin_lock(&vfsmount_lock);
815 list_add_tail(&newmnt->mnt_fslink, fslist); 817 list_add_tail(&newmnt->mnt_expire, fslist);
816 spin_unlock(&vfsmount_lock); 818 spin_unlock(&vfsmount_lock);
817 } 819 }
818 820
@@ -824,6 +826,54 @@ unlock:
824 826
825EXPORT_SYMBOL_GPL(do_add_mount); 827EXPORT_SYMBOL_GPL(do_add_mount);
826 828
829static void expire_mount(struct vfsmount *mnt, struct list_head *mounts)
830{
831 spin_lock(&vfsmount_lock);
832
833 /*
834 * Check if mount is still attached, if not, let whoever holds it deal
835 * with the sucker
836 */
837 if (mnt->mnt_parent == mnt) {
838 spin_unlock(&vfsmount_lock);
839 return;
840 }
841
842 /*
843 * Check that it is still dead: the count should now be 2 - as
844 * contributed by the vfsmount parent and the mntget above
845 */
846 if (atomic_read(&mnt->mnt_count) == 2) {
847 struct nameidata old_nd;
848
849 /* delete from the namespace */
850 list_del_init(&mnt->mnt_list);
851 mnt->mnt_namespace = NULL;
852 detach_mnt(mnt, &old_nd);
853 spin_unlock(&vfsmount_lock);
854 path_release(&old_nd);
855
856 /*
857 * Now lay it to rest if this was the last ref on the superblock
858 */
859 if (atomic_read(&mnt->mnt_sb->s_active) == 1) {
860 /* last instance - try to be smart */
861 lock_kernel();
862 DQUOT_OFF(mnt->mnt_sb);
863 acct_auto_close(mnt->mnt_sb);
864 unlock_kernel();
865 }
866 mntput(mnt);
867 } else {
868 /*
869 * Someone brought it back to life whilst we didn't have any
870 * locks held so return it to the expiration list
871 */
872 list_add_tail(&mnt->mnt_expire, mounts);
873 spin_unlock(&vfsmount_lock);
874 }
875}
876
827/* 877/*
828 * process a list of expirable mountpoints with the intent of discarding any 878 * process a list of expirable mountpoints with the intent of discarding any
829 * mountpoints that aren't in use and haven't been touched since last we came 879 * mountpoints that aren't in use and haven't been touched since last we came
@@ -846,13 +896,13 @@ void mark_mounts_for_expiry(struct list_head *mounts)
846 * - still marked for expiry (marked on the last call here; marks are 896 * - still marked for expiry (marked on the last call here; marks are
847 * cleared by mntput()) 897 * cleared by mntput())
848 */ 898 */
849 list_for_each_entry_safe(mnt, next, mounts, mnt_fslink) { 899 list_for_each_entry_safe(mnt, next, mounts, mnt_expire) {
850 if (!xchg(&mnt->mnt_expiry_mark, 1) || 900 if (!xchg(&mnt->mnt_expiry_mark, 1) ||
851 atomic_read(&mnt->mnt_count) != 1) 901 atomic_read(&mnt->mnt_count) != 1)
852 continue; 902 continue;
853 903
854 mntget(mnt); 904 mntget(mnt);
855 list_move(&mnt->mnt_fslink, &graveyard); 905 list_move(&mnt->mnt_expire, &graveyard);
856 } 906 }
857 907
858 /* 908 /*
@@ -862,61 +912,19 @@ void mark_mounts_for_expiry(struct list_head *mounts)
862 * - dispose of the corpse 912 * - dispose of the corpse
863 */ 913 */
864 while (!list_empty(&graveyard)) { 914 while (!list_empty(&graveyard)) {
865 mnt = list_entry(graveyard.next, struct vfsmount, mnt_fslink); 915 mnt = list_entry(graveyard.next, struct vfsmount, mnt_expire);
866 list_del_init(&mnt->mnt_fslink); 916 list_del_init(&mnt->mnt_expire);
867 917
868 /* don't do anything if the namespace is dead - all the 918 /* don't do anything if the namespace is dead - all the
869 * vfsmounts from it are going away anyway */ 919 * vfsmounts from it are going away anyway */
870 namespace = mnt->mnt_namespace; 920 namespace = mnt->mnt_namespace;
871 if (!namespace || atomic_read(&namespace->count) <= 0) 921 if (!namespace || !namespace->root)
872 continue; 922 continue;
873 get_namespace(namespace); 923 get_namespace(namespace);
874 924
875 spin_unlock(&vfsmount_lock); 925 spin_unlock(&vfsmount_lock);
876 down_write(&namespace->sem); 926 down_write(&namespace->sem);
877 spin_lock(&vfsmount_lock); 927 expire_mount(mnt, mounts);
878
879 /* check that it is still dead: the count should now be 2 - as
880 * contributed by the vfsmount parent and the mntget above */
881 if (atomic_read(&mnt->mnt_count) == 2) {
882 struct vfsmount *xdmnt;
883 struct dentry *xdentry;
884
885 /* delete from the namespace */
886 list_del_init(&mnt->mnt_list);
887 list_del_init(&mnt->mnt_child);
888 list_del_init(&mnt->mnt_hash);
889 mnt->mnt_mountpoint->d_mounted--;
890
891 xdentry = mnt->mnt_mountpoint;
892 mnt->mnt_mountpoint = mnt->mnt_root;
893 xdmnt = mnt->mnt_parent;
894 mnt->mnt_parent = mnt;
895
896 spin_unlock(&vfsmount_lock);
897
898 mntput(xdmnt);
899 dput(xdentry);
900
901 /* now lay it to rest if this was the last ref on the
902 * superblock */
903 if (atomic_read(&mnt->mnt_sb->s_active) == 1) {
904 /* last instance - try to be smart */
905 lock_kernel();
906 DQUOT_OFF(mnt->mnt_sb);
907 acct_auto_close(mnt->mnt_sb);
908 unlock_kernel();
909 }
910
911 mntput(mnt);
912 } else {
913 /* someone brought it back to life whilst we didn't
914 * have any locks held so return it to the expiration
915 * list */
916 list_add_tail(&mnt->mnt_fslink, mounts);
917 spin_unlock(&vfsmount_lock);
918 }
919
920 up_write(&namespace->sem); 928 up_write(&namespace->sem);
921 929
922 mntput(mnt); 930 mntput(mnt);
@@ -1449,16 +1457,12 @@ void __init mnt_init(unsigned long mempages)
1449 1457
1450void __put_namespace(struct namespace *namespace) 1458void __put_namespace(struct namespace *namespace)
1451{ 1459{
1452 struct vfsmount *mnt; 1460 struct vfsmount *root = namespace->root;
1453 1461 namespace->root = NULL;
1462 spin_unlock(&vfsmount_lock);
1454 down_write(&namespace->sem); 1463 down_write(&namespace->sem);
1455 spin_lock(&vfsmount_lock); 1464 spin_lock(&vfsmount_lock);
1456 1465 umount_tree(root);
1457 list_for_each_entry(mnt, &namespace->list, mnt_list) {
1458 mnt->mnt_namespace = NULL;
1459 }
1460
1461 umount_tree(namespace->root);
1462 spin_unlock(&vfsmount_lock); 1466 spin_unlock(&vfsmount_lock);
1463 up_write(&namespace->sem); 1467 up_write(&namespace->sem);
1464 kfree(namespace); 1468 kfree(namespace);
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index 2dc2d8693968..a9f7a8ab1d59 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -705,18 +705,6 @@ ncp_do_readdir(struct file *filp, void *dirent, filldir_t filldir,
705 DPRINTK("ncp_do_readdir: init failed, err=%d\n", err); 705 DPRINTK("ncp_do_readdir: init failed, err=%d\n", err);
706 return; 706 return;
707 } 707 }
708#ifdef USE_OLD_SLOW_DIRECTORY_LISTING
709 for (;;) {
710 err = ncp_search_for_file_or_subdir(server, &seq, &entry.i);
711 if (err) {
712 DPRINTK("ncp_do_readdir: search failed, err=%d\n", err);
713 break;
714 }
715 entry.volume = entry.i.volNumber;
716 if (!ncp_fill_cache(filp, dirent, filldir, ctl, &entry))
717 break;
718 }
719#else
720 /* We MUST NOT use server->buffer_size handshaked with server if we are 708 /* We MUST NOT use server->buffer_size handshaked with server if we are
721 using UDP, as for UDP server uses max. buffer size determined by 709 using UDP, as for UDP server uses max. buffer size determined by
722 MTU, and for TCP server uses hardwired value 65KB (== 66560 bytes). 710 MTU, and for TCP server uses hardwired value 65KB (== 66560 bytes).
@@ -754,7 +742,6 @@ ncp_do_readdir(struct file *filp, void *dirent, filldir_t filldir,
754 } 742 }
755 } while (more); 743 } while (more);
756 vfree(buf); 744 vfree(buf);
757#endif
758 return; 745 return;
759} 746}
760 747
diff --git a/fs/ncpfs/ncplib_kernel.c b/fs/ncpfs/ncplib_kernel.c
index e4eb5ed4bee4..c755e1848a42 100644
--- a/fs/ncpfs/ncplib_kernel.c
+++ b/fs/ncpfs/ncplib_kernel.c
@@ -845,46 +845,6 @@ out:
845 return result; 845 return result;
846} 846}
847 847
848/* Search for everything */
849int ncp_search_for_file_or_subdir(struct ncp_server *server,
850 struct nw_search_sequence *seq,
851 struct nw_info_struct *target)
852{
853 int result;
854
855 ncp_init_request(server);
856 ncp_add_byte(server, 3); /* subfunction */
857 ncp_add_byte(server, server->name_space[seq->volNumber]);
858 ncp_add_byte(server, 0); /* data stream (???) */
859 ncp_add_word(server, cpu_to_le16(0x8006)); /* Search attribs */
860 ncp_add_dword(server, RIM_ALL); /* return info mask */
861 ncp_add_mem(server, seq, 9);
862#ifdef CONFIG_NCPFS_NFS_NS
863 if (server->name_space[seq->volNumber] == NW_NS_NFS) {
864 ncp_add_byte(server, 0); /* 0 byte pattern */
865 } else
866#endif
867 {
868 ncp_add_byte(server, 2); /* 2 byte pattern */
869 ncp_add_byte(server, 0xff); /* following is a wildcard */
870 ncp_add_byte(server, '*');
871 }
872
873 if ((result = ncp_request(server, 87)) != 0)
874 goto out;
875 memcpy(seq, ncp_reply_data(server, 0), sizeof(*seq));
876 ncp_extract_file_info(ncp_reply_data(server, 10), target);
877
878 ncp_unlock_server(server);
879
880 result = ncp_obtain_nfs_info(server, target);
881 return result;
882
883out:
884 ncp_unlock_server(server);
885 return result;
886}
887
888int ncp_search_for_fileset(struct ncp_server *server, 848int ncp_search_for_fileset(struct ncp_server *server,
889 struct nw_search_sequence *seq, 849 struct nw_search_sequence *seq,
890 int* more, 850 int* more,
diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h
index 05ec2e9d90c6..9e4dc30c2435 100644
--- a/fs/ncpfs/ncplib_kernel.h
+++ b/fs/ncpfs/ncplib_kernel.h
@@ -87,9 +87,6 @@ int ncp_open_create_file_or_subdir(struct ncp_server *, struct inode *, char *,
87 87
88int ncp_initialize_search(struct ncp_server *, struct inode *, 88int ncp_initialize_search(struct ncp_server *, struct inode *,
89 struct nw_search_sequence *target); 89 struct nw_search_sequence *target);
90int ncp_search_for_file_or_subdir(struct ncp_server *server,
91 struct nw_search_sequence *seq,
92 struct nw_info_struct *target);
93int ncp_search_for_fileset(struct ncp_server *server, 90int ncp_search_for_fileset(struct ncp_server *server,
94 struct nw_search_sequence *seq, 91 struct nw_search_sequence *seq,
95 int* more, int* cnt, 92 int* more, int* cnt,
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index b38a57e78a63..2df639f143e8 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -182,14 +182,16 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
182 /* We requested READDIRPLUS, but the server doesn't grok it */ 182 /* We requested READDIRPLUS, but the server doesn't grok it */
183 if (error == -ENOTSUPP && desc->plus) { 183 if (error == -ENOTSUPP && desc->plus) {
184 NFS_SERVER(inode)->caps &= ~NFS_CAP_READDIRPLUS; 184 NFS_SERVER(inode)->caps &= ~NFS_CAP_READDIRPLUS;
185 NFS_FLAGS(inode) &= ~NFS_INO_ADVISE_RDPLUS; 185 clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_FLAGS(inode));
186 desc->plus = 0; 186 desc->plus = 0;
187 goto again; 187 goto again;
188 } 188 }
189 goto error; 189 goto error;
190 } 190 }
191 SetPageUptodate(page); 191 SetPageUptodate(page);
192 NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME; 192 spin_lock(&inode->i_lock);
193 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME;
194 spin_unlock(&inode->i_lock);
193 /* Ensure consistent page alignment of the data. 195 /* Ensure consistent page alignment of the data.
194 * Note: assumes we have exclusive access to this mapping either 196 * Note: assumes we have exclusive access to this mapping either
195 * through inode->i_sem or some other mechanism. 197 * through inode->i_sem or some other mechanism.
@@ -462,7 +464,9 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
462 page, 464 page,
463 NFS_SERVER(inode)->dtsize, 465 NFS_SERVER(inode)->dtsize,
464 desc->plus); 466 desc->plus);
465 NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME; 467 spin_lock(&inode->i_lock);
468 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME;
469 spin_unlock(&inode->i_lock);
466 desc->page = page; 470 desc->page = page;
467 desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ 471 desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */
468 if (desc->error >= 0) { 472 if (desc->error >= 0) {
@@ -545,7 +549,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
545 break; 549 break;
546 } 550 }
547 if (res == -ETOOSMALL && desc->plus) { 551 if (res == -ETOOSMALL && desc->plus) {
548 NFS_FLAGS(inode) &= ~NFS_INO_ADVISE_RDPLUS; 552 clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_FLAGS(inode));
549 nfs_zap_caches(inode); 553 nfs_zap_caches(inode);
550 desc->plus = 0; 554 desc->plus = 0;
551 desc->entry->eof = 0; 555 desc->entry->eof = 0;
@@ -608,7 +612,7 @@ static inline int nfs_check_verifier(struct inode *dir, struct dentry *dentry)
608{ 612{
609 if (IS_ROOT(dentry)) 613 if (IS_ROOT(dentry))
610 return 1; 614 return 1;
611 if ((NFS_FLAGS(dir) & NFS_INO_INVALID_ATTR) != 0 615 if ((NFS_I(dir)->cache_validity & NFS_INO_INVALID_ATTR) != 0
612 || nfs_attribute_timeout(dir)) 616 || nfs_attribute_timeout(dir))
613 return 0; 617 return 0;
614 return nfs_verify_change_attribute(dir, (unsigned long)dentry->d_fsdata); 618 return nfs_verify_change_attribute(dir, (unsigned long)dentry->d_fsdata);
@@ -935,6 +939,7 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
935 error = nfs_revalidate_inode(NFS_SERVER(dir), dir); 939 error = nfs_revalidate_inode(NFS_SERVER(dir), dir);
936 if (error < 0) { 940 if (error < 0) {
937 res = ERR_PTR(error); 941 res = ERR_PTR(error);
942 unlock_kernel();
938 goto out; 943 goto out;
939 } 944 }
940 945
@@ -1575,11 +1580,12 @@ out:
1575 1580
1576int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res) 1581int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res)
1577{ 1582{
1578 struct nfs_access_entry *cache = &NFS_I(inode)->cache_access; 1583 struct nfs_inode *nfsi = NFS_I(inode);
1584 struct nfs_access_entry *cache = &nfsi->cache_access;
1579 1585
1580 if (cache->cred != cred 1586 if (cache->cred != cred
1581 || time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode)) 1587 || time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode))
1582 || (NFS_FLAGS(inode) & NFS_INO_INVALID_ACCESS)) 1588 || (nfsi->cache_validity & NFS_INO_INVALID_ACCESS))
1583 return -ENOENT; 1589 return -ENOENT;
1584 memcpy(res, cache, sizeof(*res)); 1590 memcpy(res, cache, sizeof(*res));
1585 return 0; 1591 return 0;
@@ -1587,14 +1593,18 @@ int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs
1587 1593
1588void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) 1594void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
1589{ 1595{
1590 struct nfs_access_entry *cache = &NFS_I(inode)->cache_access; 1596 struct nfs_inode *nfsi = NFS_I(inode);
1597 struct nfs_access_entry *cache = &nfsi->cache_access;
1591 1598
1592 if (cache->cred != set->cred) { 1599 if (cache->cred != set->cred) {
1593 if (cache->cred) 1600 if (cache->cred)
1594 put_rpccred(cache->cred); 1601 put_rpccred(cache->cred);
1595 cache->cred = get_rpccred(set->cred); 1602 cache->cred = get_rpccred(set->cred);
1596 } 1603 }
1597 NFS_FLAGS(inode) &= ~NFS_INO_INVALID_ACCESS; 1604 /* FIXME: replace current access_cache BKL reliance with inode->i_lock */
1605 spin_lock(&inode->i_lock);
1606 nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS;
1607 spin_unlock(&inode->i_lock);
1598 cache->jiffies = set->jiffies; 1608 cache->jiffies = set->jiffies;
1599 cache->mask = set->mask; 1609 cache->mask = set->mask;
1600} 1610}
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index d6a30c844de3..6537f2c4ae44 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -751,11 +751,6 @@ nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count,
751 retval = -EFAULT; 751 retval = -EFAULT;
752 if (!access_ok(VERIFY_READ, iov.iov_base, iov.iov_len)) 752 if (!access_ok(VERIFY_READ, iov.iov_base, iov.iov_len))
753 goto out; 753 goto out;
754 if (file->f_error) {
755 retval = file->f_error;
756 file->f_error = 0;
757 goto out;
758 }
759 retval = -EFBIG; 754 retval = -EFBIG;
760 if (limit != RLIM_INFINITY) { 755 if (limit != RLIM_INFINITY) {
761 if (pos >= limit) { 756 if (pos >= limit) {
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 5621ba9885f4..f6b9eda925c5 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -134,9 +134,10 @@ nfs_file_release(struct inode *inode, struct file *filp)
134 */ 134 */
135static int nfs_revalidate_file(struct inode *inode, struct file *filp) 135static int nfs_revalidate_file(struct inode *inode, struct file *filp)
136{ 136{
137 struct nfs_inode *nfsi = NFS_I(inode);
137 int retval = 0; 138 int retval = 0;
138 139
139 if ((NFS_FLAGS(inode) & NFS_INO_REVAL_PAGECACHE) || nfs_attribute_timeout(inode)) 140 if ((nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) || nfs_attribute_timeout(inode))
140 retval = __nfs_revalidate_inode(NFS_SERVER(inode), inode); 141 retval = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
141 nfs_revalidate_mapping(inode, filp->f_mapping); 142 nfs_revalidate_mapping(inode, filp->f_mapping);
142 return 0; 143 return 0;
@@ -164,7 +165,7 @@ static int nfs_revalidate_file_size(struct inode *inode, struct file *filp)
164 goto force_reval; 165 goto force_reval;
165 if (nfsi->npages != 0) 166 if (nfsi->npages != 0)
166 return 0; 167 return 0;
167 if (!(NFS_FLAGS(inode) & NFS_INO_REVAL_PAGECACHE) && !nfs_attribute_timeout(inode)) 168 if (!(nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) && !nfs_attribute_timeout(inode))
168 return 0; 169 return 0;
169force_reval: 170force_reval:
170 return __nfs_revalidate_inode(server, inode); 171 return __nfs_revalidate_inode(server, inode);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 4845911f1c63..541b418327c8 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -615,14 +615,18 @@ nfs_zap_caches(struct inode *inode)
615 struct nfs_inode *nfsi = NFS_I(inode); 615 struct nfs_inode *nfsi = NFS_I(inode);
616 int mode = inode->i_mode; 616 int mode = inode->i_mode;
617 617
618 spin_lock(&inode->i_lock);
619
618 NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); 620 NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode);
619 NFS_ATTRTIMEO_UPDATE(inode) = jiffies; 621 NFS_ATTRTIMEO_UPDATE(inode) = jiffies;
620 622
621 memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); 623 memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode)));
622 if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) 624 if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))
623 nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; 625 nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE;
624 else 626 else
625 nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; 627 nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE;
628
629 spin_unlock(&inode->i_lock);
626} 630}
627 631
628static void nfs_zap_acl_cache(struct inode *inode) 632static void nfs_zap_acl_cache(struct inode *inode)
@@ -632,7 +636,9 @@ static void nfs_zap_acl_cache(struct inode *inode)
632 clear_acl_cache = NFS_PROTO(inode)->clear_acl_cache; 636 clear_acl_cache = NFS_PROTO(inode)->clear_acl_cache;
633 if (clear_acl_cache != NULL) 637 if (clear_acl_cache != NULL)
634 clear_acl_cache(inode); 638 clear_acl_cache(inode);
635 NFS_I(inode)->flags &= ~NFS_INO_INVALID_ACL; 639 spin_lock(&inode->i_lock);
640 NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_ACL;
641 spin_unlock(&inode->i_lock);
636} 642}
637 643
638/* 644/*
@@ -739,7 +745,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
739 inode->i_fop = &nfs_dir_operations; 745 inode->i_fop = &nfs_dir_operations;
740 if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS) 746 if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS)
741 && fattr->size <= NFS_LIMIT_READDIRPLUS) 747 && fattr->size <= NFS_LIMIT_READDIRPLUS)
742 NFS_FLAGS(inode) |= NFS_INO_ADVISE_RDPLUS; 748 set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_FLAGS(inode));
743 } else if (S_ISLNK(inode->i_mode)) 749 } else if (S_ISLNK(inode->i_mode))
744 inode->i_op = &nfs_symlink_inode_operations; 750 inode->i_op = &nfs_symlink_inode_operations;
745 else 751 else
@@ -814,55 +820,84 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
814 nfs_wb_all(inode); 820 nfs_wb_all(inode);
815 } 821 }
816 error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr); 822 error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr);
817 if (error == 0) { 823 if (error == 0)
818 nfs_refresh_inode(inode, &fattr); 824 nfs_refresh_inode(inode, &fattr);
825 nfs_end_data_update(inode);
826 unlock_kernel();
827 return error;
828}
829
830/**
831 * nfs_setattr_update_inode - Update inode metadata after a setattr call.
832 * @inode: pointer to struct inode
833 * @attr: pointer to struct iattr
834 *
835 * Note: we do this in the *proc.c in order to ensure that
836 * it works for things like exclusive creates too.
837 */
838void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr)
839{
840 if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) {
819 if ((attr->ia_valid & ATTR_MODE) != 0) { 841 if ((attr->ia_valid & ATTR_MODE) != 0) {
820 int mode; 842 int mode = attr->ia_mode & S_IALLUGO;
821 mode = inode->i_mode & ~S_IALLUGO; 843 mode |= inode->i_mode & ~S_IALLUGO;
822 mode |= attr->ia_mode & S_IALLUGO;
823 inode->i_mode = mode; 844 inode->i_mode = mode;
824 } 845 }
825 if ((attr->ia_valid & ATTR_UID) != 0) 846 if ((attr->ia_valid & ATTR_UID) != 0)
826 inode->i_uid = attr->ia_uid; 847 inode->i_uid = attr->ia_uid;
827 if ((attr->ia_valid & ATTR_GID) != 0) 848 if ((attr->ia_valid & ATTR_GID) != 0)
828 inode->i_gid = attr->ia_gid; 849 inode->i_gid = attr->ia_gid;
829 if ((attr->ia_valid & ATTR_SIZE) != 0) { 850 spin_lock(&inode->i_lock);
830 inode->i_size = attr->ia_size; 851 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
831 vmtruncate(inode, attr->ia_size); 852 spin_unlock(&inode->i_lock);
832 }
833 } 853 }
834 if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) 854 if ((attr->ia_valid & ATTR_SIZE) != 0) {
835 NFS_FLAGS(inode) |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; 855 inode->i_size = attr->ia_size;
836 nfs_end_data_update(inode); 856 vmtruncate(inode, attr->ia_size);
837 unlock_kernel(); 857 }
838 return error; 858}
859
860static int nfs_wait_schedule(void *word)
861{
862 if (signal_pending(current))
863 return -ERESTARTSYS;
864 schedule();
865 return 0;
839} 866}
840 867
841/* 868/*
842 * Wait for the inode to get unlocked. 869 * Wait for the inode to get unlocked.
843 * (Used for NFS_INO_LOCKED and NFS_INO_REVALIDATING).
844 */ 870 */
845static int 871static int nfs_wait_on_inode(struct inode *inode)
846nfs_wait_on_inode(struct inode *inode, int flag)
847{ 872{
848 struct rpc_clnt *clnt = NFS_CLIENT(inode); 873 struct rpc_clnt *clnt = NFS_CLIENT(inode);
849 struct nfs_inode *nfsi = NFS_I(inode); 874 struct nfs_inode *nfsi = NFS_I(inode);
850 875 sigset_t oldmask;
851 int error; 876 int error;
852 if (!(NFS_FLAGS(inode) & flag)) 877
853 return 0;
854 atomic_inc(&inode->i_count); 878 atomic_inc(&inode->i_count);
855 error = nfs_wait_event(clnt, nfsi->nfs_i_wait, 879 rpc_clnt_sigmask(clnt, &oldmask);
856 !(NFS_FLAGS(inode) & flag)); 880 error = wait_on_bit_lock(&nfsi->flags, NFS_INO_REVALIDATING,
881 nfs_wait_schedule, TASK_INTERRUPTIBLE);
882 rpc_clnt_sigunmask(clnt, &oldmask);
857 iput(inode); 883 iput(inode);
884
858 return error; 885 return error;
859} 886}
860 887
888static void nfs_wake_up_inode(struct inode *inode)
889{
890 struct nfs_inode *nfsi = NFS_I(inode);
891
892 clear_bit(NFS_INO_REVALIDATING, &nfsi->flags);
893 smp_mb__after_clear_bit();
894 wake_up_bit(&nfsi->flags, NFS_INO_REVALIDATING);
895}
896
861int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) 897int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
862{ 898{
863 struct inode *inode = dentry->d_inode; 899 struct inode *inode = dentry->d_inode;
864 struct nfs_inode *nfsi = NFS_I(inode); 900 int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME;
865 int need_atime = nfsi->flags & NFS_INO_INVALID_ATIME;
866 int err; 901 int err;
867 902
868 if (__IS_FLG(inode, MS_NOATIME)) 903 if (__IS_FLG(inode, MS_NOATIME))
@@ -1008,7 +1043,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
1008 struct nfs_fattr fattr; 1043 struct nfs_fattr fattr;
1009 struct nfs_inode *nfsi = NFS_I(inode); 1044 struct nfs_inode *nfsi = NFS_I(inode);
1010 unsigned long verifier; 1045 unsigned long verifier;
1011 unsigned int flags; 1046 unsigned long cache_validity;
1012 1047
1013 dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n", 1048 dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n",
1014 inode->i_sb->s_id, (long long)NFS_FILEID(inode)); 1049 inode->i_sb->s_id, (long long)NFS_FILEID(inode));
@@ -1019,18 +1054,19 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
1019 if (NFS_STALE(inode)) 1054 if (NFS_STALE(inode))
1020 goto out_nowait; 1055 goto out_nowait;
1021 1056
1022 while (NFS_REVALIDATING(inode)) { 1057 status = nfs_wait_on_inode(inode);
1023 status = nfs_wait_on_inode(inode, NFS_INO_REVALIDATING); 1058 if (status < 0)
1024 if (status < 0) 1059 goto out;
1025 goto out_nowait; 1060 if (NFS_STALE(inode)) {
1026 if (NFS_ATTRTIMEO(inode) == 0) 1061 status = -ESTALE;
1027 continue; 1062 /* Do we trust the cached ESTALE? */
1028 if (NFS_FLAGS(inode) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ATIME)) 1063 if (NFS_ATTRTIMEO(inode) != 0) {
1029 continue; 1064 if (nfsi->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ATIME)) {
1030 status = NFS_STALE(inode) ? -ESTALE : 0; 1065 /* no */
1031 goto out_nowait; 1066 } else
1067 goto out;
1068 }
1032 } 1069 }
1033 NFS_FLAGS(inode) |= NFS_INO_REVALIDATING;
1034 1070
1035 /* Protect against RPC races by saving the change attribute */ 1071 /* Protect against RPC races by saving the change attribute */
1036 verifier = nfs_save_change_attribute(inode); 1072 verifier = nfs_save_change_attribute(inode);
@@ -1042,7 +1078,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
1042 if (status == -ESTALE) { 1078 if (status == -ESTALE) {
1043 nfs_zap_caches(inode); 1079 nfs_zap_caches(inode);
1044 if (!S_ISDIR(inode->i_mode)) 1080 if (!S_ISDIR(inode->i_mode))
1045 NFS_FLAGS(inode) |= NFS_INO_STALE; 1081 set_bit(NFS_INO_STALE, &NFS_FLAGS(inode));
1046 } 1082 }
1047 goto out; 1083 goto out;
1048 } 1084 }
@@ -1054,25 +1090,30 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
1054 (long long)NFS_FILEID(inode), status); 1090 (long long)NFS_FILEID(inode), status);
1055 goto out; 1091 goto out;
1056 } 1092 }
1057 flags = nfsi->flags; 1093 spin_lock(&inode->i_lock);
1058 nfsi->flags &= ~NFS_INO_REVAL_PAGECACHE; 1094 cache_validity = nfsi->cache_validity;
1095 nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE;
1096
1059 /* 1097 /*
1060 * We may need to keep the attributes marked as invalid if 1098 * We may need to keep the attributes marked as invalid if
1061 * we raced with nfs_end_attr_update(). 1099 * we raced with nfs_end_attr_update().
1062 */ 1100 */
1063 if (verifier == nfsi->cache_change_attribute) 1101 if (verifier == nfsi->cache_change_attribute)
1064 nfsi->flags &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME); 1102 nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME);
1065 /* Do the page cache invalidation */ 1103 spin_unlock(&inode->i_lock);
1104
1066 nfs_revalidate_mapping(inode, inode->i_mapping); 1105 nfs_revalidate_mapping(inode, inode->i_mapping);
1067 if (flags & NFS_INO_INVALID_ACL) 1106
1107 if (cache_validity & NFS_INO_INVALID_ACL)
1068 nfs_zap_acl_cache(inode); 1108 nfs_zap_acl_cache(inode);
1109
1069 dfprintk(PAGECACHE, "NFS: (%s/%Ld) revalidation complete\n", 1110 dfprintk(PAGECACHE, "NFS: (%s/%Ld) revalidation complete\n",
1070 inode->i_sb->s_id, 1111 inode->i_sb->s_id,
1071 (long long)NFS_FILEID(inode)); 1112 (long long)NFS_FILEID(inode));
1072 1113
1073out: 1114 out:
1074 NFS_FLAGS(inode) &= ~NFS_INO_REVALIDATING; 1115 nfs_wake_up_inode(inode);
1075 wake_up(&nfsi->nfs_i_wait); 1116
1076 out_nowait: 1117 out_nowait:
1077 unlock_kernel(); 1118 unlock_kernel();
1078 return status; 1119 return status;
@@ -1096,7 +1137,7 @@ int nfs_attribute_timeout(struct inode *inode)
1096 */ 1137 */
1097int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) 1138int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
1098{ 1139{
1099 if (!(NFS_FLAGS(inode) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA)) 1140 if (!(NFS_I(inode)->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA))
1100 && !nfs_attribute_timeout(inode)) 1141 && !nfs_attribute_timeout(inode))
1101 return NFS_STALE(inode) ? -ESTALE : 0; 1142 return NFS_STALE(inode) ? -ESTALE : 0;
1102 return __nfs_revalidate_inode(server, inode); 1143 return __nfs_revalidate_inode(server, inode);
@@ -1111,19 +1152,23 @@ void nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
1111{ 1152{
1112 struct nfs_inode *nfsi = NFS_I(inode); 1153 struct nfs_inode *nfsi = NFS_I(inode);
1113 1154
1114 if (nfsi->flags & NFS_INO_INVALID_DATA) { 1155 if (nfsi->cache_validity & NFS_INO_INVALID_DATA) {
1115 if (S_ISREG(inode->i_mode)) { 1156 if (S_ISREG(inode->i_mode)) {
1116 if (filemap_fdatawrite(mapping) == 0) 1157 if (filemap_fdatawrite(mapping) == 0)
1117 filemap_fdatawait(mapping); 1158 filemap_fdatawait(mapping);
1118 nfs_wb_all(inode); 1159 nfs_wb_all(inode);
1119 } 1160 }
1120 invalidate_inode_pages2(mapping); 1161 invalidate_inode_pages2(mapping);
1121 nfsi->flags &= ~NFS_INO_INVALID_DATA; 1162
1163 spin_lock(&inode->i_lock);
1164 nfsi->cache_validity &= ~NFS_INO_INVALID_DATA;
1122 if (S_ISDIR(inode->i_mode)) { 1165 if (S_ISDIR(inode->i_mode)) {
1123 memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); 1166 memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
1124 /* This ensures we revalidate child dentries */ 1167 /* This ensures we revalidate child dentries */
1125 nfsi->cache_change_attribute++; 1168 nfsi->cache_change_attribute++;
1126 } 1169 }
1170 spin_unlock(&inode->i_lock);
1171
1127 dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n", 1172 dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n",
1128 inode->i_sb->s_id, 1173 inode->i_sb->s_id,
1129 (long long)NFS_FILEID(inode)); 1174 (long long)NFS_FILEID(inode));
@@ -1153,10 +1198,12 @@ void nfs_end_data_update(struct inode *inode)
1153 1198
1154 if (!nfs_have_delegation(inode, FMODE_READ)) { 1199 if (!nfs_have_delegation(inode, FMODE_READ)) {
1155 /* Mark the attribute cache for revalidation */ 1200 /* Mark the attribute cache for revalidation */
1156 nfsi->flags |= NFS_INO_INVALID_ATTR; 1201 spin_lock(&inode->i_lock);
1202 nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
1157 /* Directories and symlinks: invalidate page cache too */ 1203 /* Directories and symlinks: invalidate page cache too */
1158 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) 1204 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
1159 nfsi->flags |= NFS_INO_INVALID_DATA; 1205 nfsi->cache_validity |= NFS_INO_INVALID_DATA;
1206 spin_unlock(&inode->i_lock);
1160 } 1207 }
1161 nfsi->cache_change_attribute ++; 1208 nfsi->cache_change_attribute ++;
1162 atomic_dec(&nfsi->data_updates); 1209 atomic_dec(&nfsi->data_updates);
@@ -1181,6 +1228,8 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
1181 if (nfs_have_delegation(inode, FMODE_READ)) 1228 if (nfs_have_delegation(inode, FMODE_READ))
1182 return 0; 1229 return 0;
1183 1230
1231 spin_lock(&inode->i_lock);
1232
1184 /* Are we in the process of updating data on the server? */ 1233 /* Are we in the process of updating data on the server? */
1185 data_unstable = nfs_caches_unstable(inode); 1234 data_unstable = nfs_caches_unstable(inode);
1186 1235
@@ -1189,19 +1238,23 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
1189 && nfsi->change_attr == fattr->pre_change_attr) 1238 && nfsi->change_attr == fattr->pre_change_attr)
1190 nfsi->change_attr = fattr->change_attr; 1239 nfsi->change_attr = fattr->change_attr;
1191 if (nfsi->change_attr != fattr->change_attr) { 1240 if (nfsi->change_attr != fattr->change_attr) {
1192 nfsi->flags |= NFS_INO_INVALID_ATTR; 1241 nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
1193 if (!data_unstable) 1242 if (!data_unstable)
1194 nfsi->flags |= NFS_INO_REVAL_PAGECACHE; 1243 nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE;
1195 } 1244 }
1196 } 1245 }
1197 1246
1198 if ((fattr->valid & NFS_ATTR_FATTR) == 0) 1247 if ((fattr->valid & NFS_ATTR_FATTR) == 0) {
1248 spin_unlock(&inode->i_lock);
1199 return 0; 1249 return 0;
1250 }
1200 1251
1201 /* Has the inode gone and changed behind our back? */ 1252 /* Has the inode gone and changed behind our back? */
1202 if (nfsi->fileid != fattr->fileid 1253 if (nfsi->fileid != fattr->fileid
1203 || (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) 1254 || (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) {
1255 spin_unlock(&inode->i_lock);
1204 return -EIO; 1256 return -EIO;
1257 }
1205 1258
1206 cur_size = i_size_read(inode); 1259 cur_size = i_size_read(inode);
1207 new_isize = nfs_size_to_loff_t(fattr->size); 1260 new_isize = nfs_size_to_loff_t(fattr->size);
@@ -1216,30 +1269,31 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
1216 1269
1217 /* Verify a few of the more important attributes */ 1270 /* Verify a few of the more important attributes */
1218 if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) { 1271 if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) {
1219 nfsi->flags |= NFS_INO_INVALID_ATTR; 1272 nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
1220 if (!data_unstable) 1273 if (!data_unstable)
1221 nfsi->flags |= NFS_INO_REVAL_PAGECACHE; 1274 nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE;
1222 } 1275 }
1223 if (cur_size != new_isize) { 1276 if (cur_size != new_isize) {
1224 nfsi->flags |= NFS_INO_INVALID_ATTR; 1277 nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
1225 if (nfsi->npages == 0) 1278 if (nfsi->npages == 0)
1226 nfsi->flags |= NFS_INO_REVAL_PAGECACHE; 1279 nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE;
1227 } 1280 }
1228 1281
1229 /* Have any file permissions changed? */ 1282 /* Have any file permissions changed? */
1230 if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) 1283 if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)
1231 || inode->i_uid != fattr->uid 1284 || inode->i_uid != fattr->uid
1232 || inode->i_gid != fattr->gid) 1285 || inode->i_gid != fattr->gid)
1233 nfsi->flags |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; 1286 nfsi->cache_validity |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
1234 1287
1235 /* Has the link count changed? */ 1288 /* Has the link count changed? */
1236 if (inode->i_nlink != fattr->nlink) 1289 if (inode->i_nlink != fattr->nlink)
1237 nfsi->flags |= NFS_INO_INVALID_ATTR; 1290 nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
1238 1291
1239 if (!timespec_equal(&inode->i_atime, &fattr->atime)) 1292 if (!timespec_equal(&inode->i_atime, &fattr->atime))
1240 nfsi->flags |= NFS_INO_INVALID_ATIME; 1293 nfsi->cache_validity |= NFS_INO_INVALID_ATIME;
1241 1294
1242 nfsi->read_cache_jiffies = fattr->timestamp; 1295 nfsi->read_cache_jiffies = fattr->timestamp;
1296 spin_unlock(&inode->i_lock);
1243 return 0; 1297 return 0;
1244} 1298}
1245 1299
@@ -1278,11 +1332,15 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign
1278 goto out_err; 1332 goto out_err;
1279 } 1333 }
1280 1334
1335 spin_lock(&inode->i_lock);
1336
1281 /* 1337 /*
1282 * Make sure the inode's type hasn't changed. 1338 * Make sure the inode's type hasn't changed.
1283 */ 1339 */
1284 if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) 1340 if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) {
1341 spin_unlock(&inode->i_lock);
1285 goto out_changed; 1342 goto out_changed;
1343 }
1286 1344
1287 /* 1345 /*
1288 * Update the read time so we don't revalidate too often. 1346 * Update the read time so we don't revalidate too often.
@@ -1373,8 +1431,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign
1373 || S_ISLNK(inode->i_mode))) 1431 || S_ISLNK(inode->i_mode)))
1374 invalid &= ~NFS_INO_INVALID_DATA; 1432 invalid &= ~NFS_INO_INVALID_DATA;
1375 if (!nfs_have_delegation(inode, FMODE_READ)) 1433 if (!nfs_have_delegation(inode, FMODE_READ))
1376 nfsi->flags |= invalid; 1434 nfsi->cache_validity |= invalid;
1377 1435
1436 spin_unlock(&inode->i_lock);
1378 return 0; 1437 return 0;
1379 out_changed: 1438 out_changed:
1380 /* 1439 /*
@@ -1391,7 +1450,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign
1391 */ 1450 */
1392 nfs_invalidate_inode(inode); 1451 nfs_invalidate_inode(inode);
1393 out_err: 1452 out_err:
1394 NFS_FLAGS(inode) |= NFS_INO_STALE; 1453 set_bit(NFS_INO_STALE, &NFS_FLAGS(inode));
1395 return -ESTALE; 1454 return -ESTALE;
1396} 1455}
1397 1456
@@ -1950,7 +2009,8 @@ static struct inode *nfs_alloc_inode(struct super_block *sb)
1950 nfsi = (struct nfs_inode *)kmem_cache_alloc(nfs_inode_cachep, SLAB_KERNEL); 2009 nfsi = (struct nfs_inode *)kmem_cache_alloc(nfs_inode_cachep, SLAB_KERNEL);
1951 if (!nfsi) 2010 if (!nfsi)
1952 return NULL; 2011 return NULL;
1953 nfsi->flags = 0; 2012 nfsi->flags = 0UL;
2013 nfsi->cache_validity = 0UL;
1954#ifdef CONFIG_NFS_V3_ACL 2014#ifdef CONFIG_NFS_V3_ACL
1955 nfsi->acl_access = ERR_PTR(-EAGAIN); 2015 nfsi->acl_access = ERR_PTR(-EAGAIN);
1956 nfsi->acl_default = ERR_PTR(-EAGAIN); 2016 nfsi->acl_default = ERR_PTR(-EAGAIN);
@@ -1982,7 +2042,6 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
1982 nfsi->ndirty = 0; 2042 nfsi->ndirty = 0;
1983 nfsi->ncommit = 0; 2043 nfsi->ncommit = 0;
1984 nfsi->npages = 0; 2044 nfsi->npages = 0;
1985 init_waitqueue_head(&nfsi->nfs_i_wait);
1986 nfs4_init_once(nfsi); 2045 nfs4_init_once(nfsi);
1987 } 2046 }
1988} 2047}
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index ee3536fc84a3..6a5bbc0ae941 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -2,7 +2,7 @@
2#include <linux/nfs.h> 2#include <linux/nfs.h>
3#include <linux/nfs3.h> 3#include <linux/nfs3.h>
4#include <linux/nfs_fs.h> 4#include <linux/nfs_fs.h>
5#include <linux/xattr_acl.h> 5#include <linux/posix_acl_xattr.h>
6#include <linux/nfsacl.h> 6#include <linux/nfsacl.h>
7 7
8#define NFSDBG_FACILITY NFSDBG_PROC 8#define NFSDBG_FACILITY NFSDBG_PROC
@@ -53,9 +53,9 @@ ssize_t nfs3_getxattr(struct dentry *dentry, const char *name,
53 struct posix_acl *acl; 53 struct posix_acl *acl;
54 int type, error = 0; 54 int type, error = 0;
55 55
56 if (strcmp(name, XATTR_NAME_ACL_ACCESS) == 0) 56 if (strcmp(name, POSIX_ACL_XATTR_ACCESS) == 0)
57 type = ACL_TYPE_ACCESS; 57 type = ACL_TYPE_ACCESS;
58 else if (strcmp(name, XATTR_NAME_ACL_DEFAULT) == 0) 58 else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0)
59 type = ACL_TYPE_DEFAULT; 59 type = ACL_TYPE_DEFAULT;
60 else 60 else
61 return -EOPNOTSUPP; 61 return -EOPNOTSUPP;
@@ -82,9 +82,9 @@ int nfs3_setxattr(struct dentry *dentry, const char *name,
82 struct posix_acl *acl; 82 struct posix_acl *acl;
83 int type, error; 83 int type, error;
84 84
85 if (strcmp(name, XATTR_NAME_ACL_ACCESS) == 0) 85 if (strcmp(name, POSIX_ACL_XATTR_ACCESS) == 0)
86 type = ACL_TYPE_ACCESS; 86 type = ACL_TYPE_ACCESS;
87 else if (strcmp(name, XATTR_NAME_ACL_DEFAULT) == 0) 87 else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0)
88 type = ACL_TYPE_DEFAULT; 88 type = ACL_TYPE_DEFAULT;
89 else 89 else
90 return -EOPNOTSUPP; 90 return -EOPNOTSUPP;
@@ -103,9 +103,9 @@ int nfs3_removexattr(struct dentry *dentry, const char *name)
103 struct inode *inode = dentry->d_inode; 103 struct inode *inode = dentry->d_inode;
104 int type; 104 int type;
105 105
106 if (strcmp(name, XATTR_NAME_ACL_ACCESS) == 0) 106 if (strcmp(name, POSIX_ACL_XATTR_ACCESS) == 0)
107 type = ACL_TYPE_ACCESS; 107 type = ACL_TYPE_ACCESS;
108 else if (strcmp(name, XATTR_NAME_ACL_DEFAULT) == 0) 108 else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0)
109 type = ACL_TYPE_DEFAULT; 109 type = ACL_TYPE_DEFAULT;
110 else 110 else
111 return -EOPNOTSUPP; 111 return -EOPNOTSUPP;
@@ -308,7 +308,9 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
308 nfs_begin_data_update(inode); 308 nfs_begin_data_update(inode);
309 status = rpc_call(server->client_acl, ACLPROC3_SETACL, 309 status = rpc_call(server->client_acl, ACLPROC3_SETACL,
310 &args, &fattr, 0); 310 &args, &fattr, 0);
311 NFS_FLAGS(inode) |= NFS_INO_INVALID_ACCESS; 311 spin_lock(&inode->i_lock);
312 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS;
313 spin_unlock(&inode->i_lock);
312 nfs_end_data_update(inode); 314 nfs_end_data_update(inode);
313 dprintk("NFS reply setacl: %d\n", status); 315 dprintk("NFS reply setacl: %d\n", status);
314 316
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 7851569b31c6..2681485cf2d0 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -120,6 +120,8 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
120 dprintk("NFS call setattr\n"); 120 dprintk("NFS call setattr\n");
121 fattr->valid = 0; 121 fattr->valid = 0;
122 status = rpc_call(NFS_CLIENT(inode), NFS3PROC_SETATTR, &arg, fattr, 0); 122 status = rpc_call(NFS_CLIENT(inode), NFS3PROC_SETATTR, &arg, fattr, 0);
123 if (status == 0)
124 nfs_setattr_update_inode(inode, sattr);
123 dprintk("NFS reply setattr: %d\n", status); 125 dprintk("NFS reply setattr: %d\n", status);
124 return status; 126 return status;
125} 127}
@@ -370,6 +372,8 @@ again:
370 * not sure this buys us anything (and I'd have 372 * not sure this buys us anything (and I'd have
371 * to revamp the NFSv3 XDR code) */ 373 * to revamp the NFSv3 XDR code) */
372 status = nfs3_proc_setattr(dentry, &fattr, sattr); 374 status = nfs3_proc_setattr(dentry, &fattr, sattr);
375 if (status == 0)
376 nfs_setattr_update_inode(dentry->d_inode, sattr);
373 nfs_refresh_inode(dentry->d_inode, &fattr); 377 nfs_refresh_inode(dentry->d_inode, &fattr);
374 dprintk("NFS reply setattr (post-create): %d\n", status); 378 dprintk("NFS reply setattr (post-create): %d\n", status);
375 } 379 }
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 1b76f80aedb9..0c5a308e4963 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -753,6 +753,7 @@ static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
753 .rpc_argp = &arg, 753 .rpc_argp = &arg,
754 .rpc_resp = &res, 754 .rpc_resp = &res,
755 }; 755 };
756 int status;
756 757
757 fattr->valid = 0; 758 fattr->valid = 0;
758 759
@@ -762,7 +763,8 @@ static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
762 } else 763 } else
763 memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid)); 764 memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid));
764 765
765 return rpc_call_sync(server->client, &msg, 0); 766 status = rpc_call_sync(server->client, &msg, 0);
767 return status;
766} 768}
767 769
768static int nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr, 770static int nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
@@ -1145,6 +1147,8 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
1145 1147
1146 status = nfs4_do_setattr(NFS_SERVER(inode), fattr, 1148 status = nfs4_do_setattr(NFS_SERVER(inode), fattr,
1147 NFS_FH(inode), sattr, state); 1149 NFS_FH(inode), sattr, state);
1150 if (status == 0)
1151 nfs_setattr_update_inode(inode, sattr);
1148 if (state != NULL) 1152 if (state != NULL)
1149 nfs4_close_state(state, FMODE_WRITE); 1153 nfs4_close_state(state, FMODE_WRITE);
1150 put_rpccred(cred); 1154 put_rpccred(cred);
@@ -1449,8 +1453,10 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
1449 struct nfs_fattr fattr; 1453 struct nfs_fattr fattr;
1450 status = nfs4_do_setattr(NFS_SERVER(dir), &fattr, 1454 status = nfs4_do_setattr(NFS_SERVER(dir), &fattr,
1451 NFS_FH(state->inode), sattr, state); 1455 NFS_FH(state->inode), sattr, state);
1452 if (status == 0) 1456 if (status == 0) {
1457 nfs_setattr_update_inode(state->inode, sattr);
1453 goto out; 1458 goto out;
1459 }
1454 } else if (flags != 0) 1460 } else if (flags != 0)
1455 goto out; 1461 goto out;
1456 nfs4_close_state(state, flags); 1462 nfs4_close_state(state, flags);
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index cedf636bcf3c..be23c3fb9260 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -114,6 +114,8 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
114 dprintk("NFS call setattr\n"); 114 dprintk("NFS call setattr\n");
115 fattr->valid = 0; 115 fattr->valid = 0;
116 status = rpc_call(NFS_CLIENT(inode), NFSPROC_SETATTR, &arg, fattr, 0); 116 status = rpc_call(NFS_CLIENT(inode), NFSPROC_SETATTR, &arg, fattr, 0);
117 if (status == 0)
118 nfs_setattr_update_inode(inode, sattr);
117 dprintk("NFS reply setattr: %d\n", status); 119 dprintk("NFS reply setattr: %d\n", status);
118 return status; 120 return status;
119} 121}
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 6f866b8aa2d5..6ceb1d471f20 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -140,7 +140,9 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode,
140 if (rdata->res.eof != 0 || result == 0) 140 if (rdata->res.eof != 0 || result == 0)
141 break; 141 break;
142 } while (count); 142 } while (count);
143 NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME; 143 spin_lock(&inode->i_lock);
144 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME;
145 spin_unlock(&inode->i_lock);
144 146
145 if (count) 147 if (count)
146 memclear_highpage_flush(page, rdata->args.pgbase, count); 148 memclear_highpage_flush(page, rdata->args.pgbase, count);
@@ -473,7 +475,9 @@ void nfs_readpage_result(struct rpc_task *task)
473 } 475 }
474 task->tk_status = -EIO; 476 task->tk_status = -EIO;
475 } 477 }
476 NFS_FLAGS(data->inode) |= NFS_INO_INVALID_ATIME; 478 spin_lock(&data->inode->i_lock);
479 NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME;
480 spin_unlock(&data->inode->i_lock);
477 data->complete(data, status); 481 data->complete(data, status);
478} 482}
479 483
diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c
index 35f106599144..18dc95b0b646 100644
--- a/fs/nfs/symlink.c
+++ b/fs/nfs/symlink.c
@@ -27,26 +27,14 @@
27 27
28/* Symlink caching in the page cache is even more simplistic 28/* Symlink caching in the page cache is even more simplistic
29 * and straight-forward than readdir caching. 29 * and straight-forward than readdir caching.
30 *
31 * At the beginning of the page we store pointer to struct page in question,
32 * simplifying nfs_put_link() (if inode got invalidated we can't find the page
33 * to be freed via pagecache lookup).
34 * The NUL-terminated string follows immediately thereafter.
35 */ 30 */
36 31
37struct nfs_symlink {
38 struct page *page;
39 char body[0];
40};
41
42static int nfs_symlink_filler(struct inode *inode, struct page *page) 32static int nfs_symlink_filler(struct inode *inode, struct page *page)
43{ 33{
44 const unsigned int pgbase = offsetof(struct nfs_symlink, body);
45 const unsigned int pglen = PAGE_SIZE - pgbase;
46 int error; 34 int error;
47 35
48 lock_kernel(); 36 lock_kernel();
49 error = NFS_PROTO(inode)->readlink(inode, page, pgbase, pglen); 37 error = NFS_PROTO(inode)->readlink(inode, page, 0, PAGE_SIZE);
50 unlock_kernel(); 38 unlock_kernel();
51 if (error < 0) 39 if (error < 0)
52 goto error; 40 goto error;
@@ -60,11 +48,10 @@ error:
60 return -EIO; 48 return -EIO;
61} 49}
62 50
63static int nfs_follow_link(struct dentry *dentry, struct nameidata *nd) 51static void *nfs_follow_link(struct dentry *dentry, struct nameidata *nd)
64{ 52{
65 struct inode *inode = dentry->d_inode; 53 struct inode *inode = dentry->d_inode;
66 struct page *page; 54 struct page *page;
67 struct nfs_symlink *p;
68 void *err = ERR_PTR(nfs_revalidate_inode(NFS_SERVER(inode), inode)); 55 void *err = ERR_PTR(nfs_revalidate_inode(NFS_SERVER(inode), inode));
69 if (err) 56 if (err)
70 goto read_failed; 57 goto read_failed;
@@ -78,28 +65,20 @@ static int nfs_follow_link(struct dentry *dentry, struct nameidata *nd)
78 err = ERR_PTR(-EIO); 65 err = ERR_PTR(-EIO);
79 goto getlink_read_error; 66 goto getlink_read_error;
80 } 67 }
81 p = kmap(page); 68 nd_set_link(nd, kmap(page));
82 p->page = page; 69 return page;
83 nd_set_link(nd, p->body);
84 return 0;
85 70
86getlink_read_error: 71getlink_read_error:
87 page_cache_release(page); 72 page_cache_release(page);
88read_failed: 73read_failed:
89 nd_set_link(nd, err); 74 nd_set_link(nd, err);
90 return 0; 75 return NULL;
91} 76}
92 77
93static void nfs_put_link(struct dentry *dentry, struct nameidata *nd) 78static void nfs_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
94{ 79{
95 char *s = nd_get_link(nd); 80 if (cookie) {
96 if (!IS_ERR(s)) { 81 struct page *page = cookie;
97 struct nfs_symlink *p;
98 struct page *page;
99
100 p = container_of(s, struct nfs_symlink, body[0]);
101 page = p->page;
102
103 kunmap(page); 82 kunmap(page);
104 page_cache_release(page); 83 page_cache_release(page);
105 } 84 }
diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c
index 18c58c32e326..251e5a1bb1c4 100644
--- a/fs/nfs_common/nfsacl.c
+++ b/fs/nfs_common/nfsacl.c
@@ -239,6 +239,7 @@ nfsacl_decode(struct xdr_buf *buf, unsigned int base, unsigned int *aclcnt,
239 if (xdr_decode_word(buf, base, &entries) || 239 if (xdr_decode_word(buf, base, &entries) ||
240 entries > NFS_ACL_MAX_ENTRIES) 240 entries > NFS_ACL_MAX_ENTRIES)
241 return -EINVAL; 241 return -EINVAL;
242 nfsacl_desc.desc.array_maxlen = entries;
242 err = xdr_decode_array2(buf, base + 4, &nfsacl_desc.desc); 243 err = xdr_decode_array2(buf, base + 4, &nfsacl_desc.desc);
243 if (err) 244 if (err)
244 return err; 245 return err;
diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile
index 9f043f44c92f..ce341dc76d5e 100644
--- a/fs/nfsd/Makefile
+++ b/fs/nfsd/Makefile
@@ -10,5 +10,5 @@ nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o
10nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o 10nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o
11nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o 11nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
12nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \ 12nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
13 nfs4acl.o nfs4callback.o 13 nfs4acl.o nfs4callback.o nfs4recover.o
14nfsd-objs := $(nfsd-y) 14nfsd-objs := $(nfsd-y)
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 11ebf6c4aa54..4a2105552ac4 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -125,7 +125,7 @@ static short ace2type(struct nfs4_ace *);
125static int _posix_to_nfsv4_one(struct posix_acl *, struct nfs4_acl *, unsigned int); 125static int _posix_to_nfsv4_one(struct posix_acl *, struct nfs4_acl *, unsigned int);
126static struct posix_acl *_nfsv4_to_posix_one(struct nfs4_acl *, unsigned int); 126static struct posix_acl *_nfsv4_to_posix_one(struct nfs4_acl *, unsigned int);
127int nfs4_acl_add_ace(struct nfs4_acl *, u32, u32, u32, int, uid_t); 127int nfs4_acl_add_ace(struct nfs4_acl *, u32, u32, u32, int, uid_t);
128int nfs4_acl_split(struct nfs4_acl *, struct nfs4_acl *); 128static int nfs4_acl_split(struct nfs4_acl *, struct nfs4_acl *);
129 129
130struct nfs4_acl * 130struct nfs4_acl *
131nfs4_acl_posix_to_nfsv4(struct posix_acl *pacl, struct posix_acl *dpacl, 131nfs4_acl_posix_to_nfsv4(struct posix_acl *pacl, struct posix_acl *dpacl,
@@ -775,7 +775,7 @@ out_err:
775 return pacl; 775 return pacl;
776} 776}
777 777
778int 778static int
779nfs4_acl_split(struct nfs4_acl *acl, struct nfs4_acl *dacl) 779nfs4_acl_split(struct nfs4_acl *acl, struct nfs4_acl *dacl)
780{ 780{
781 struct list_head *h, *n; 781 struct list_head *h, *n;
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 634465e9cfc6..583c0710e45e 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -54,7 +54,6 @@
54 54
55/* declarations */ 55/* declarations */
56static void nfs4_cb_null(struct rpc_task *task); 56static void nfs4_cb_null(struct rpc_task *task);
57extern spinlock_t recall_lock;
58 57
59/* Index of predefined Linux callback client operations */ 58/* Index of predefined Linux callback client operations */
60 59
@@ -329,12 +328,12 @@ out:
329 .p_bufsiz = MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2, \ 328 .p_bufsiz = MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2, \
330} 329}
331 330
332struct rpc_procinfo nfs4_cb_procedures[] = { 331static struct rpc_procinfo nfs4_cb_procedures[] = {
333 PROC(CB_NULL, NULL, enc_cb_null, dec_cb_null), 332 PROC(CB_NULL, NULL, enc_cb_null, dec_cb_null),
334 PROC(CB_RECALL, COMPOUND, enc_cb_recall, dec_cb_recall), 333 PROC(CB_RECALL, COMPOUND, enc_cb_recall, dec_cb_recall),
335}; 334};
336 335
337struct rpc_version nfs_cb_version4 = { 336static struct rpc_version nfs_cb_version4 = {
338 .number = 1, 337 .number = 1,
339 .nrprocs = sizeof(nfs4_cb_procedures)/sizeof(nfs4_cb_procedures[0]), 338 .nrprocs = sizeof(nfs4_cb_procedures)/sizeof(nfs4_cb_procedures[0]),
340 .procs = nfs4_cb_procedures 339 .procs = nfs4_cb_procedures
@@ -348,7 +347,7 @@ static struct rpc_version * nfs_cb_version[] = {
348/* 347/*
349 * Use the SETCLIENTID credential 348 * Use the SETCLIENTID credential
350 */ 349 */
351struct rpc_cred * 350static struct rpc_cred *
352nfsd4_lookupcred(struct nfs4_client *clp, int taskflags) 351nfsd4_lookupcred(struct nfs4_client *clp, int taskflags)
353{ 352{
354 struct auth_cred acred; 353 struct auth_cred acred;
@@ -387,9 +386,7 @@ nfsd4_probe_callback(struct nfs4_client *clp)
387 char hostname[32]; 386 char hostname[32];
388 int status; 387 int status;
389 388
390 dprintk("NFSD: probe_callback. cb_parsed %d cb_set %d\n", 389 if (atomic_read(&cb->cb_set))
391 cb->cb_parsed, atomic_read(&cb->cb_set));
392 if (!cb->cb_parsed || atomic_read(&cb->cb_set))
393 return; 390 return;
394 391
395 /* Initialize address */ 392 /* Initialize address */
@@ -427,7 +424,7 @@ nfsd4_probe_callback(struct nfs4_client *clp)
427 * XXX AUTH_UNIX only - need AUTH_GSS.... 424 * XXX AUTH_UNIX only - need AUTH_GSS....
428 */ 425 */
429 sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(addr.sin_addr.s_addr)); 426 sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(addr.sin_addr.s_addr));
430 clnt = rpc_create_client(xprt, hostname, program, 1, RPC_AUTH_UNIX); 427 clnt = rpc_new_client(xprt, hostname, program, 1, RPC_AUTH_UNIX);
431 if (IS_ERR(clnt)) { 428 if (IS_ERR(clnt)) {
432 dprintk("NFSD: couldn't create callback client\n"); 429 dprintk("NFSD: couldn't create callback client\n");
433 goto out_err; 430 goto out_err;
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index 4ba540841cf6..5605a26efc57 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -104,7 +104,7 @@ ent_update(struct ent *new, struct ent *itm)
104 ent_init(new, itm); 104 ent_init(new, itm);
105} 105}
106 106
107void 107static void
108ent_put(struct cache_head *ch, struct cache_detail *cd) 108ent_put(struct cache_head *ch, struct cache_detail *cd)
109{ 109{
110 if (cache_put(ch, cd)) { 110 if (cache_put(ch, cd)) {
@@ -186,7 +186,7 @@ warn_no_idmapd(struct cache_detail *detail)
186static int idtoname_parse(struct cache_detail *, char *, int); 186static int idtoname_parse(struct cache_detail *, char *, int);
187static struct ent *idtoname_lookup(struct ent *, int); 187static struct ent *idtoname_lookup(struct ent *, int);
188 188
189struct cache_detail idtoname_cache = { 189static struct cache_detail idtoname_cache = {
190 .hash_size = ENT_HASHMAX, 190 .hash_size = ENT_HASHMAX,
191 .hash_table = idtoname_table, 191 .hash_table = idtoname_table,
192 .name = "nfs4.idtoname", 192 .name = "nfs4.idtoname",
@@ -277,7 +277,7 @@ nametoid_hash(struct ent *ent)
277 return hash_str(ent->name, ENT_HASHBITS); 277 return hash_str(ent->name, ENT_HASHBITS);
278} 278}
279 279
280void 280static void
281nametoid_request(struct cache_detail *cd, struct cache_head *ch, char **bpp, 281nametoid_request(struct cache_detail *cd, struct cache_head *ch, char **bpp,
282 int *blen) 282 int *blen)
283{ 283{
@@ -317,9 +317,9 @@ nametoid_show(struct seq_file *m, struct cache_detail *cd, struct cache_head *h)
317} 317}
318 318
319static struct ent *nametoid_lookup(struct ent *, int); 319static struct ent *nametoid_lookup(struct ent *, int);
320int nametoid_parse(struct cache_detail *, char *, int); 320static int nametoid_parse(struct cache_detail *, char *, int);
321 321
322struct cache_detail nametoid_cache = { 322static struct cache_detail nametoid_cache = {
323 .hash_size = ENT_HASHMAX, 323 .hash_size = ENT_HASHMAX,
324 .hash_table = nametoid_table, 324 .hash_table = nametoid_table,
325 .name = "nfs4.nametoid", 325 .name = "nfs4.nametoid",
@@ -330,7 +330,7 @@ struct cache_detail nametoid_cache = {
330 .warn_no_listener = warn_no_idmapd, 330 .warn_no_listener = warn_no_idmapd,
331}; 331};
332 332
333int 333static int
334nametoid_parse(struct cache_detail *cd, char *buf, int buflen) 334nametoid_parse(struct cache_detail *cd, char *buf, int buflen)
335{ 335{
336 struct ent ent, *res; 336 struct ent ent, *res;
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index e8158741e8b5..e08edc17c6a0 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -45,6 +45,7 @@
45#include <linux/param.h> 45#include <linux/param.h>
46#include <linux/major.h> 46#include <linux/major.h>
47#include <linux/slab.h> 47#include <linux/slab.h>
48#include <linux/file.h>
48 49
49#include <linux/sunrpc/svc.h> 50#include <linux/sunrpc/svc.h>
50#include <linux/nfsd/nfsd.h> 51#include <linux/nfsd/nfsd.h>
@@ -168,12 +169,6 @@ nfsd4_open(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open
168 (int)open->op_fname.len, open->op_fname.data, 169 (int)open->op_fname.len, open->op_fname.data,
169 open->op_stateowner); 170 open->op_stateowner);
170 171
171 if (nfs4_in_grace() && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS)
172 return nfserr_grace;
173
174 if (!nfs4_in_grace() && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS)
175 return nfserr_no_grace;
176
177 /* This check required by spec. */ 172 /* This check required by spec. */
178 if (open->op_create && open->op_claim_type != NFS4_OPEN_CLAIM_NULL) 173 if (open->op_create && open->op_claim_type != NFS4_OPEN_CLAIM_NULL)
179 return nfserr_inval; 174 return nfserr_inval;
@@ -198,6 +193,11 @@ nfsd4_open(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open
198 if (status) 193 if (status)
199 goto out; 194 goto out;
200 switch (open->op_claim_type) { 195 switch (open->op_claim_type) {
196 case NFS4_OPEN_CLAIM_DELEGATE_CUR:
197 status = nfserr_inval;
198 if (open->op_create)
199 goto out;
200 /* fall through */
201 case NFS4_OPEN_CLAIM_NULL: 201 case NFS4_OPEN_CLAIM_NULL:
202 /* 202 /*
203 * (1) set CURRENT_FH to the file being opened, 203 * (1) set CURRENT_FH to the file being opened,
@@ -220,7 +220,6 @@ nfsd4_open(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open
220 if (status) 220 if (status)
221 goto out; 221 goto out;
222 break; 222 break;
223 case NFS4_OPEN_CLAIM_DELEGATE_CUR:
224 case NFS4_OPEN_CLAIM_DELEGATE_PREV: 223 case NFS4_OPEN_CLAIM_DELEGATE_PREV:
225 printk("NFSD: unsupported OPEN claim type %d\n", 224 printk("NFSD: unsupported OPEN claim type %d\n",
226 open->op_claim_type); 225 open->op_claim_type);
@@ -473,26 +472,27 @@ static inline int
473nfsd4_read(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_read *read) 472nfsd4_read(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_read *read)
474{ 473{
475 int status; 474 int status;
476 struct file *filp = NULL;
477 475
478 /* no need to check permission - this will be done in nfsd_read() */ 476 /* no need to check permission - this will be done in nfsd_read() */
479 477
478 read->rd_filp = NULL;
480 if (read->rd_offset >= OFFSET_MAX) 479 if (read->rd_offset >= OFFSET_MAX)
481 return nfserr_inval; 480 return nfserr_inval;
482 481
483 nfs4_lock_state(); 482 nfs4_lock_state();
484 /* check stateid */ 483 /* check stateid */
485 if ((status = nfs4_preprocess_stateid_op(current_fh, &read->rd_stateid, 484 if ((status = nfs4_preprocess_stateid_op(current_fh, &read->rd_stateid,
486 CHECK_FH | RD_STATE, &filp))) { 485 CHECK_FH | RD_STATE, &read->rd_filp))) {
487 dprintk("NFSD: nfsd4_read: couldn't process stateid!\n"); 486 dprintk("NFSD: nfsd4_read: couldn't process stateid!\n");
488 goto out; 487 goto out;
489 } 488 }
489 if (read->rd_filp)
490 get_file(read->rd_filp);
490 status = nfs_ok; 491 status = nfs_ok;
491out: 492out:
492 nfs4_unlock_state(); 493 nfs4_unlock_state();
493 read->rd_rqstp = rqstp; 494 read->rd_rqstp = rqstp;
494 read->rd_fhp = current_fh; 495 read->rd_fhp = current_fh;
495 read->rd_filp = filp;
496 return status; 496 return status;
497} 497}
498 498
@@ -532,6 +532,8 @@ nfsd4_remove(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_rem
532{ 532{
533 int status; 533 int status;
534 534
535 if (nfs4_in_grace())
536 return nfserr_grace;
535 status = nfsd_unlink(rqstp, current_fh, 0, remove->rm_name, remove->rm_namelen); 537 status = nfsd_unlink(rqstp, current_fh, 0, remove->rm_name, remove->rm_namelen);
536 if (status == nfserr_symlink) 538 if (status == nfserr_symlink)
537 return nfserr_notdir; 539 return nfserr_notdir;
@@ -550,6 +552,9 @@ nfsd4_rename(struct svc_rqst *rqstp, struct svc_fh *current_fh,
550 552
551 if (!save_fh->fh_dentry) 553 if (!save_fh->fh_dentry)
552 return status; 554 return status;
555 if (nfs4_in_grace() && !(save_fh->fh_export->ex_flags
556 & NFSEXP_NOSUBTREECHECK))
557 return nfserr_grace;
553 status = nfsd_rename(rqstp, save_fh, rename->rn_sname, 558 status = nfsd_rename(rqstp, save_fh, rename->rn_sname,
554 rename->rn_snamelen, current_fh, 559 rename->rn_snamelen, current_fh,
555 rename->rn_tname, rename->rn_tnamelen); 560 rename->rn_tname, rename->rn_tnamelen);
@@ -624,6 +629,8 @@ nfsd4_write(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_writ
624 dprintk("NFSD: nfsd4_write: couldn't process stateid!\n"); 629 dprintk("NFSD: nfsd4_write: couldn't process stateid!\n");
625 goto out; 630 goto out;
626 } 631 }
632 if (filp)
633 get_file(filp);
627 nfs4_unlock_state(); 634 nfs4_unlock_state();
628 635
629 write->wr_bytes_written = write->wr_buflen; 636 write->wr_bytes_written = write->wr_buflen;
@@ -635,6 +642,8 @@ nfsd4_write(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_writ
635 status = nfsd_write(rqstp, current_fh, filp, write->wr_offset, 642 status = nfsd_write(rqstp, current_fh, filp, write->wr_offset,
636 write->wr_vec, write->wr_vlen, write->wr_buflen, 643 write->wr_vec, write->wr_vlen, write->wr_buflen,
637 &write->wr_how_written); 644 &write->wr_how_written);
645 if (filp)
646 fput(filp);
638 647
639 if (status == nfserr_symlink) 648 if (status == nfserr_symlink)
640 status = nfserr_inval; 649 status = nfserr_inval;
@@ -923,6 +932,9 @@ encode_op:
923 nfs4_put_stateowner(replay_owner); 932 nfs4_put_stateowner(replay_owner);
924 replay_owner = NULL; 933 replay_owner = NULL;
925 } 934 }
935 /* XXX Ugh, we need to get rid of this kind of special case: */
936 if (op->opnum == OP_READ && op->u.read.rd_filp)
937 fput(op->u.read.rd_filp);
926 } 938 }
927 939
928out: 940out:
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
new file mode 100644
index 000000000000..57ed50fe7f85
--- /dev/null
+++ b/fs/nfsd/nfs4recover.c
@@ -0,0 +1,421 @@
1/*
2* linux/fs/nfsd/nfs4recover.c
3*
4* Copyright (c) 2004 The Regents of the University of Michigan.
5* All rights reserved.
6*
7* Andy Adamson <andros@citi.umich.edu>
8*
9* Redistribution and use in source and binary forms, with or without
10* modification, are permitted provided that the following conditions
11* are met:
12*
13* 1. Redistributions of source code must retain the above copyright
14* notice, this list of conditions and the following disclaimer.
15* 2. Redistributions in binary form must reproduce the above copyright
16* notice, this list of conditions and the following disclaimer in the
17* documentation and/or other materials provided with the distribution.
18* 3. Neither the name of the University nor the names of its
19* contributors may be used to endorse or promote products derived
20* from this software without specific prior written permission.
21*
22* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
23* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
24* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
25* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
29* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33*
34*/
35
36
37#include <linux/sunrpc/svc.h>
38#include <linux/nfsd/nfsd.h>
39#include <linux/nfs4.h>
40#include <linux/nfsd/state.h>
41#include <linux/nfsd/xdr4.h>
42#include <linux/param.h>
43#include <linux/file.h>
44#include <linux/namei.h>
45#include <asm/uaccess.h>
46#include <asm/scatterlist.h>
47#include <linux/crypto.h>
48
49
50#define NFSDDBG_FACILITY NFSDDBG_PROC
51
52/* Globals */
53static struct nameidata rec_dir;
54static int rec_dir_init = 0;
55
56static void
57nfs4_save_user(uid_t *saveuid, gid_t *savegid)
58{
59 *saveuid = current->fsuid;
60 *savegid = current->fsgid;
61 current->fsuid = 0;
62 current->fsgid = 0;
63}
64
65static void
66nfs4_reset_user(uid_t saveuid, gid_t savegid)
67{
68 current->fsuid = saveuid;
69 current->fsgid = savegid;
70}
71
72static void
73md5_to_hex(char *out, char *md5)
74{
75 int i;
76
77 for (i=0; i<16; i++) {
78 unsigned char c = md5[i];
79
80 *out++ = '0' + ((c&0xf0)>>4) + (c>=0xa0)*('a'-'9'-1);
81 *out++ = '0' + (c&0x0f) + ((c&0x0f)>=0x0a)*('a'-'9'-1);
82 }
83 *out = '\0';
84}
85
86int
87nfs4_make_rec_clidname(char *dname, struct xdr_netobj *clname)
88{
89 struct xdr_netobj cksum;
90 struct crypto_tfm *tfm;
91 struct scatterlist sg[1];
92 int status = nfserr_resource;
93
94 dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n",
95 clname->len, clname->data);
96 tfm = crypto_alloc_tfm("md5", 0);
97 if (tfm == NULL)
98 goto out;
99 cksum.len = crypto_tfm_alg_digestsize(tfm);
100 cksum.data = kmalloc(cksum.len, GFP_KERNEL);
101 if (cksum.data == NULL)
102 goto out;
103 crypto_digest_init(tfm);
104
105 sg[0].page = virt_to_page(clname->data);
106 sg[0].offset = offset_in_page(clname->data);
107 sg[0].length = clname->len;
108
109 crypto_digest_update(tfm, sg, 1);
110 crypto_digest_final(tfm, cksum.data);
111
112 md5_to_hex(dname, cksum.data);
113
114 kfree(cksum.data);
115 status = nfs_ok;
116out:
117 if (tfm)
118 crypto_free_tfm(tfm);
119 return status;
120}
121
122static void
123nfsd4_sync_rec_dir(void)
124{
125 down(&rec_dir.dentry->d_inode->i_sem);
126 nfsd_sync_dir(rec_dir.dentry);
127 up(&rec_dir.dentry->d_inode->i_sem);
128}
129
130int
131nfsd4_create_clid_dir(struct nfs4_client *clp)
132{
133 char *dname = clp->cl_recdir;
134 struct dentry *dentry;
135 uid_t uid;
136 gid_t gid;
137 int status;
138
139 dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname);
140
141 if (!rec_dir_init || clp->cl_firststate)
142 return 0;
143
144 nfs4_save_user(&uid, &gid);
145
146 /* lock the parent */
147 down(&rec_dir.dentry->d_inode->i_sem);
148
149 dentry = lookup_one_len(dname, rec_dir.dentry, HEXDIR_LEN-1);
150 if (IS_ERR(dentry)) {
151 status = PTR_ERR(dentry);
152 goto out_unlock;
153 }
154 status = -EEXIST;
155 if (dentry->d_inode) {
156 dprintk("NFSD: nfsd4_create_clid_dir: DIRECTORY EXISTS\n");
157 goto out_put;
158 }
159 status = vfs_mkdir(rec_dir.dentry->d_inode, dentry, S_IRWXU);
160out_put:
161 dput(dentry);
162out_unlock:
163 up(&rec_dir.dentry->d_inode->i_sem);
164 if (status == 0) {
165 clp->cl_firststate = 1;
166 nfsd4_sync_rec_dir();
167 }
168 nfs4_reset_user(uid, gid);
169 dprintk("NFSD: nfsd4_create_clid_dir returns %d\n", status);
170 return status;
171}
172
173typedef int (recdir_func)(struct dentry *, struct dentry *);
174
175struct dentry_list {
176 struct dentry *dentry;
177 struct list_head list;
178};
179
180struct dentry_list_arg {
181 struct list_head dentries;
182 struct dentry *parent;
183};
184
185static int
186nfsd4_build_dentrylist(void *arg, const char *name, int namlen,
187 loff_t offset, ino_t ino, unsigned int d_type)
188{
189 struct dentry_list_arg *dla = arg;
190 struct list_head *dentries = &dla->dentries;
191 struct dentry *parent = dla->parent;
192 struct dentry *dentry;
193 struct dentry_list *child;
194
195 if (name && isdotent(name, namlen))
196 return nfs_ok;
197 dentry = lookup_one_len(name, parent, namlen);
198 if (IS_ERR(dentry))
199 return PTR_ERR(dentry);
200 child = kmalloc(sizeof(*child), GFP_KERNEL);
201 if (child == NULL)
202 return -ENOMEM;
203 child->dentry = dentry;
204 list_add(&child->list, dentries);
205 return 0;
206}
207
208static int
209nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f)
210{
211 struct file *filp;
212 struct dentry_list_arg dla = {
213 .parent = dir,
214 };
215 struct list_head *dentries = &dla.dentries;
216 struct dentry_list *child;
217 uid_t uid;
218 gid_t gid;
219 int status;
220
221 if (!rec_dir_init)
222 return 0;
223
224 nfs4_save_user(&uid, &gid);
225
226 filp = dentry_open(dget(dir), mntget(rec_dir.mnt),
227 O_RDWR);
228 status = PTR_ERR(filp);
229 if (IS_ERR(filp))
230 goto out;
231 INIT_LIST_HEAD(dentries);
232 status = vfs_readdir(filp, nfsd4_build_dentrylist, &dla);
233 fput(filp);
234 while (!list_empty(dentries)) {
235 child = list_entry(dentries->next, struct dentry_list, list);
236 status = f(dir, child->dentry);
237 if (status)
238 goto out;
239 list_del(&child->list);
240 dput(child->dentry);
241 kfree(child);
242 }
243out:
244 while (!list_empty(dentries)) {
245 child = list_entry(dentries->next, struct dentry_list, list);
246 list_del(&child->list);
247 dput(child->dentry);
248 kfree(child);
249 }
250 nfs4_reset_user(uid, gid);
251 return status;
252}
253
254static int
255nfsd4_remove_clid_file(struct dentry *dir, struct dentry *dentry)
256{
257 int status;
258
259 if (!S_ISREG(dir->d_inode->i_mode)) {
260 printk("nfsd4: non-file found in client recovery directory\n");
261 return -EINVAL;
262 }
263 down(&dir->d_inode->i_sem);
264 status = vfs_unlink(dir->d_inode, dentry);
265 up(&dir->d_inode->i_sem);
266 return status;
267}
268
269static int
270nfsd4_clear_clid_dir(struct dentry *dir, struct dentry *dentry)
271{
272 int status;
273
274 /* For now this directory should already be empty, but we empty it of
275 * any regular files anyway, just in case the directory was created by
276 * a kernel from the future.... */
277 nfsd4_list_rec_dir(dentry, nfsd4_remove_clid_file);
278 down(&dir->d_inode->i_sem);
279 status = vfs_rmdir(dir->d_inode, dentry);
280 up(&dir->d_inode->i_sem);
281 return status;
282}
283
284static int
285nfsd4_unlink_clid_dir(char *name, int namlen)
286{
287 struct dentry *dentry;
288 int status;
289
290 dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name);
291
292 down(&rec_dir.dentry->d_inode->i_sem);
293 dentry = lookup_one_len(name, rec_dir.dentry, namlen);
294 up(&rec_dir.dentry->d_inode->i_sem);
295 if (IS_ERR(dentry)) {
296 status = PTR_ERR(dentry);
297 return status;
298 }
299 status = -ENOENT;
300 if (!dentry->d_inode)
301 goto out;
302
303 status = nfsd4_clear_clid_dir(rec_dir.dentry, dentry);
304out:
305 dput(dentry);
306 return status;
307}
308
309void
310nfsd4_remove_clid_dir(struct nfs4_client *clp)
311{
312 uid_t uid;
313 gid_t gid;
314 int status;
315
316 if (!rec_dir_init || !clp->cl_firststate)
317 return;
318
319 clp->cl_firststate = 0;
320 nfs4_save_user(&uid, &gid);
321 status = nfsd4_unlink_clid_dir(clp->cl_recdir, HEXDIR_LEN-1);
322 nfs4_reset_user(uid, gid);
323 if (status == 0)
324 nfsd4_sync_rec_dir();
325 if (status)
326 printk("NFSD: Failed to remove expired client state directory"
327 " %.*s\n", HEXDIR_LEN, clp->cl_recdir);
328 return;
329}
330
331static int
332purge_old(struct dentry *parent, struct dentry *child)
333{
334 int status;
335
336 if (nfs4_has_reclaimed_state(child->d_name.name))
337 return nfs_ok;
338
339 status = nfsd4_clear_clid_dir(parent, child);
340 if (status)
341 printk("failed to remove client recovery directory %s\n",
342 child->d_name.name);
343 /* Keep trying, success or failure: */
344 return nfs_ok;
345}
346
347void
348nfsd4_recdir_purge_old(void) {
349 int status;
350
351 if (!rec_dir_init)
352 return;
353 status = nfsd4_list_rec_dir(rec_dir.dentry, purge_old);
354 if (status == 0)
355 nfsd4_sync_rec_dir();
356 if (status)
357 printk("nfsd4: failed to purge old clients from recovery"
358 " directory %s\n", rec_dir.dentry->d_name.name);
359 return;
360}
361
362static int
363load_recdir(struct dentry *parent, struct dentry *child)
364{
365 if (child->d_name.len != HEXDIR_LEN - 1) {
366 printk("nfsd4: illegal name %s in recovery directory\n",
367 child->d_name.name);
368 /* Keep trying; maybe the others are OK: */
369 return nfs_ok;
370 }
371 nfs4_client_to_reclaim(child->d_name.name);
372 return nfs_ok;
373}
374
375int
376nfsd4_recdir_load(void) {
377 int status;
378
379 status = nfsd4_list_rec_dir(rec_dir.dentry, load_recdir);
380 if (status)
381 printk("nfsd4: failed loading clients from recovery"
382 " directory %s\n", rec_dir.dentry->d_name.name);
383 return status;
384}
385
386/*
387 * Hold reference to the recovery directory.
388 */
389
390void
391nfsd4_init_recdir(char *rec_dirname)
392{
393 uid_t uid = 0;
394 gid_t gid = 0;
395 int status;
396
397 printk("NFSD: Using %s as the NFSv4 state recovery directory\n",
398 rec_dirname);
399
400 BUG_ON(rec_dir_init);
401
402 nfs4_save_user(&uid, &gid);
403
404 status = path_lookup(rec_dirname, LOOKUP_FOLLOW, &rec_dir);
405 if (status == -ENOENT)
406 printk("NFSD: recovery directory %s doesn't exist\n",
407 rec_dirname);
408
409 if (!status)
410 rec_dir_init = 1;
411 nfs4_reset_user(uid, gid);
412}
413
414void
415nfsd4_shutdown_recdir(void)
416{
417 if (!rec_dir_init)
418 return;
419 rec_dir_init = 0;
420 path_release(&rec_dir);
421}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 75e8b137580c..b83f8fb441e1 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -48,39 +48,32 @@
48#include <linux/nfs4.h> 48#include <linux/nfs4.h>
49#include <linux/nfsd/state.h> 49#include <linux/nfsd/state.h>
50#include <linux/nfsd/xdr4.h> 50#include <linux/nfsd/xdr4.h>
51#include <linux/namei.h>
51 52
52#define NFSDDBG_FACILITY NFSDDBG_PROC 53#define NFSDDBG_FACILITY NFSDDBG_PROC
53 54
54/* Globals */ 55/* Globals */
55static time_t lease_time = 90; /* default lease time */ 56static time_t lease_time = 90; /* default lease time */
56static time_t old_lease_time = 90; /* past incarnation lease time */ 57static time_t user_lease_time = 90;
57static u32 nfs4_reclaim_init = 0; 58static time_t boot_time;
58time_t boot_time; 59static int in_grace = 1;
59static time_t grace_end = 0;
60static u32 current_clientid = 1; 60static u32 current_clientid = 1;
61static u32 current_ownerid = 1; 61static u32 current_ownerid = 1;
62static u32 current_fileid = 1; 62static u32 current_fileid = 1;
63static u32 current_delegid = 1; 63static u32 current_delegid = 1;
64static u32 nfs4_init; 64static u32 nfs4_init;
65stateid_t zerostateid; /* bits all 0 */ 65static stateid_t zerostateid; /* bits all 0 */
66stateid_t onestateid; /* bits all 1 */ 66static stateid_t onestateid; /* bits all 1 */
67 67
68/* debug counters */ 68#define ZERO_STATEID(stateid) (!memcmp((stateid), &zerostateid, sizeof(stateid_t)))
69u32 list_add_perfile = 0; 69#define ONE_STATEID(stateid) (!memcmp((stateid), &onestateid, sizeof(stateid_t)))
70u32 list_del_perfile = 0;
71u32 add_perclient = 0;
72u32 del_perclient = 0;
73u32 alloc_file = 0;
74u32 free_file = 0;
75u32 vfsopen = 0;
76u32 vfsclose = 0;
77u32 alloc_delegation= 0;
78u32 free_delegation= 0;
79 70
80/* forward declarations */ 71/* forward declarations */
81struct nfs4_stateid * find_stateid(stateid_t *stid, int flags); 72static struct nfs4_stateid * find_stateid(stateid_t *stid, int flags);
82static struct nfs4_delegation * find_delegation_stateid(struct inode *ino, stateid_t *stid); 73static struct nfs4_delegation * find_delegation_stateid(struct inode *ino, stateid_t *stid);
83static void release_stateid_lockowners(struct nfs4_stateid *open_stp); 74static void release_stateid_lockowners(struct nfs4_stateid *open_stp);
75static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery";
76static void nfs4_set_recdir(char *recdir);
84 77
85/* Locking: 78/* Locking:
86 * 79 *
@@ -90,6 +83,11 @@ static void release_stateid_lockowners(struct nfs4_stateid *open_stp);
90 */ 83 */
91static DECLARE_MUTEX(client_sema); 84static DECLARE_MUTEX(client_sema);
92 85
86static kmem_cache_t *stateowner_slab = NULL;
87static kmem_cache_t *file_slab = NULL;
88static kmem_cache_t *stateid_slab = NULL;
89static kmem_cache_t *deleg_slab = NULL;
90
93void 91void
94nfs4_lock_state(void) 92nfs4_lock_state(void)
95{ 93{
@@ -118,16 +116,36 @@ opaque_hashval(const void *ptr, int nbytes)
118/* forward declarations */ 116/* forward declarations */
119static void release_stateowner(struct nfs4_stateowner *sop); 117static void release_stateowner(struct nfs4_stateowner *sop);
120static void release_stateid(struct nfs4_stateid *stp, int flags); 118static void release_stateid(struct nfs4_stateid *stp, int flags);
121static void release_file(struct nfs4_file *fp);
122 119
123/* 120/*
124 * Delegation state 121 * Delegation state
125 */ 122 */
126 123
127/* recall_lock protects the del_recall_lru */ 124/* recall_lock protects the del_recall_lru */
128spinlock_t recall_lock; 125static spinlock_t recall_lock = SPIN_LOCK_UNLOCKED;
129static struct list_head del_recall_lru; 126static struct list_head del_recall_lru;
130 127
128static void
129free_nfs4_file(struct kref *kref)
130{
131 struct nfs4_file *fp = container_of(kref, struct nfs4_file, fi_ref);
132 list_del(&fp->fi_hash);
133 iput(fp->fi_inode);
134 kmem_cache_free(file_slab, fp);
135}
136
137static inline void
138put_nfs4_file(struct nfs4_file *fi)
139{
140 kref_put(&fi->fi_ref, free_nfs4_file);
141}
142
143static inline void
144get_nfs4_file(struct nfs4_file *fi)
145{
146 kref_get(&fi->fi_ref);
147}
148
131static struct nfs4_delegation * 149static struct nfs4_delegation *
132alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_fh *current_fh, u32 type) 150alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_fh *current_fh, u32 type)
133{ 151{
@@ -136,13 +154,14 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
136 struct nfs4_callback *cb = &stp->st_stateowner->so_client->cl_callback; 154 struct nfs4_callback *cb = &stp->st_stateowner->so_client->cl_callback;
137 155
138 dprintk("NFSD alloc_init_deleg\n"); 156 dprintk("NFSD alloc_init_deleg\n");
139 if ((dp = kmalloc(sizeof(struct nfs4_delegation), 157 dp = kmem_cache_alloc(deleg_slab, GFP_KERNEL);
140 GFP_KERNEL)) == NULL) 158 if (dp == NULL)
141 return dp; 159 return dp;
142 INIT_LIST_HEAD(&dp->dl_del_perfile); 160 INIT_LIST_HEAD(&dp->dl_perfile);
143 INIT_LIST_HEAD(&dp->dl_del_perclnt); 161 INIT_LIST_HEAD(&dp->dl_perclnt);
144 INIT_LIST_HEAD(&dp->dl_recall_lru); 162 INIT_LIST_HEAD(&dp->dl_recall_lru);
145 dp->dl_client = clp; 163 dp->dl_client = clp;
164 get_nfs4_file(fp);
146 dp->dl_file = fp; 165 dp->dl_file = fp;
147 dp->dl_flock = NULL; 166 dp->dl_flock = NULL;
148 get_file(stp->st_vfs_file); 167 get_file(stp->st_vfs_file);
@@ -160,9 +179,8 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
160 current_fh->fh_handle.fh_size); 179 current_fh->fh_handle.fh_size);
161 dp->dl_time = 0; 180 dp->dl_time = 0;
162 atomic_set(&dp->dl_count, 1); 181 atomic_set(&dp->dl_count, 1);
163 list_add(&dp->dl_del_perfile, &fp->fi_del_perfile); 182 list_add(&dp->dl_perfile, &fp->fi_delegations);
164 list_add(&dp->dl_del_perclnt, &clp->cl_del_perclnt); 183 list_add(&dp->dl_perclnt, &clp->cl_delegations);
165 alloc_delegation++;
166 return dp; 184 return dp;
167} 185}
168 186
@@ -171,8 +189,8 @@ nfs4_put_delegation(struct nfs4_delegation *dp)
171{ 189{
172 if (atomic_dec_and_test(&dp->dl_count)) { 190 if (atomic_dec_and_test(&dp->dl_count)) {
173 dprintk("NFSD: freeing dp %p\n",dp); 191 dprintk("NFSD: freeing dp %p\n",dp);
174 kfree(dp); 192 put_nfs4_file(dp->dl_file);
175 free_delegation++; 193 kmem_cache_free(deleg_slab, dp);
176 } 194 }
177} 195}
178 196
@@ -193,15 +211,14 @@ nfs4_close_delegation(struct nfs4_delegation *dp)
193 if (dp->dl_flock) 211 if (dp->dl_flock)
194 setlease(filp, F_UNLCK, &dp->dl_flock); 212 setlease(filp, F_UNLCK, &dp->dl_flock);
195 nfsd_close(filp); 213 nfsd_close(filp);
196 vfsclose++;
197} 214}
198 215
199/* Called under the state lock. */ 216/* Called under the state lock. */
200static void 217static void
201unhash_delegation(struct nfs4_delegation *dp) 218unhash_delegation(struct nfs4_delegation *dp)
202{ 219{
203 list_del_init(&dp->dl_del_perfile); 220 list_del_init(&dp->dl_perfile);
204 list_del_init(&dp->dl_del_perclnt); 221 list_del_init(&dp->dl_perclnt);
205 spin_lock(&recall_lock); 222 spin_lock(&recall_lock);
206 list_del_init(&dp->dl_recall_lru); 223 list_del_init(&dp->dl_recall_lru);
207 spin_unlock(&recall_lock); 224 spin_unlock(&recall_lock);
@@ -220,8 +237,8 @@ unhash_delegation(struct nfs4_delegation *dp)
220 237
221#define clientid_hashval(id) \ 238#define clientid_hashval(id) \
222 ((id) & CLIENT_HASH_MASK) 239 ((id) & CLIENT_HASH_MASK)
223#define clientstr_hashval(name, namelen) \ 240#define clientstr_hashval(name) \
224 (opaque_hashval((name), (namelen)) & CLIENT_HASH_MASK) 241 (opaque_hashval((name), 8) & CLIENT_HASH_MASK)
225/* 242/*
226 * reclaim_str_hashtbl[] holds known client info from previous reset/reboot 243 * reclaim_str_hashtbl[] holds known client info from previous reset/reboot
227 * used in reboot/reset lease grace period processing 244 * used in reboot/reset lease grace period processing
@@ -331,11 +348,11 @@ expire_client(struct nfs4_client *clp)
331 348
332 INIT_LIST_HEAD(&reaplist); 349 INIT_LIST_HEAD(&reaplist);
333 spin_lock(&recall_lock); 350 spin_lock(&recall_lock);
334 while (!list_empty(&clp->cl_del_perclnt)) { 351 while (!list_empty(&clp->cl_delegations)) {
335 dp = list_entry(clp->cl_del_perclnt.next, struct nfs4_delegation, dl_del_perclnt); 352 dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt);
336 dprintk("NFSD: expire client. dp %p, fp %p\n", dp, 353 dprintk("NFSD: expire client. dp %p, fp %p\n", dp,
337 dp->dl_flock); 354 dp->dl_flock);
338 list_del_init(&dp->dl_del_perclnt); 355 list_del_init(&dp->dl_perclnt);
339 list_move(&dp->dl_recall_lru, &reaplist); 356 list_move(&dp->dl_recall_lru, &reaplist);
340 } 357 }
341 spin_unlock(&recall_lock); 358 spin_unlock(&recall_lock);
@@ -347,26 +364,26 @@ expire_client(struct nfs4_client *clp)
347 list_del(&clp->cl_idhash); 364 list_del(&clp->cl_idhash);
348 list_del(&clp->cl_strhash); 365 list_del(&clp->cl_strhash);
349 list_del(&clp->cl_lru); 366 list_del(&clp->cl_lru);
350 while (!list_empty(&clp->cl_perclient)) { 367 while (!list_empty(&clp->cl_openowners)) {
351 sop = list_entry(clp->cl_perclient.next, struct nfs4_stateowner, so_perclient); 368 sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient);
352 release_stateowner(sop); 369 release_stateowner(sop);
353 } 370 }
354 put_nfs4_client(clp); 371 put_nfs4_client(clp);
355} 372}
356 373
357static struct nfs4_client * 374static struct nfs4_client *
358create_client(struct xdr_netobj name) { 375create_client(struct xdr_netobj name, char *recdir) {
359 struct nfs4_client *clp; 376 struct nfs4_client *clp;
360 377
361 if (!(clp = alloc_client(name))) 378 if (!(clp = alloc_client(name)))
362 goto out; 379 goto out;
380 memcpy(clp->cl_recdir, recdir, HEXDIR_LEN);
363 atomic_set(&clp->cl_count, 1); 381 atomic_set(&clp->cl_count, 1);
364 atomic_set(&clp->cl_callback.cb_set, 0); 382 atomic_set(&clp->cl_callback.cb_set, 0);
365 clp->cl_callback.cb_parsed = 0;
366 INIT_LIST_HEAD(&clp->cl_idhash); 383 INIT_LIST_HEAD(&clp->cl_idhash);
367 INIT_LIST_HEAD(&clp->cl_strhash); 384 INIT_LIST_HEAD(&clp->cl_strhash);
368 INIT_LIST_HEAD(&clp->cl_perclient); 385 INIT_LIST_HEAD(&clp->cl_openowners);
369 INIT_LIST_HEAD(&clp->cl_del_perclnt); 386 INIT_LIST_HEAD(&clp->cl_delegations);
370 INIT_LIST_HEAD(&clp->cl_lru); 387 INIT_LIST_HEAD(&clp->cl_lru);
371out: 388out:
372 return clp; 389 return clp;
@@ -392,11 +409,9 @@ copy_cred(struct svc_cred *target, struct svc_cred *source) {
392 get_group_info(target->cr_group_info); 409 get_group_info(target->cr_group_info);
393} 410}
394 411
395static int 412static inline int
396cmp_name(struct xdr_netobj *n1, struct xdr_netobj *n2) { 413same_name(const char *n1, const char *n2) {
397 if (!n1 || !n2) 414 return 0 == memcmp(n1, n2, HEXDIR_LEN);
398 return 0;
399 return((n1->len == n2->len) && !memcmp(n1->data, n2->data, n2->len));
400} 415}
401 416
402static int 417static int
@@ -446,7 +461,7 @@ check_name(struct xdr_netobj name) {
446 return 1; 461 return 1;
447} 462}
448 463
449void 464static void
450add_to_unconfirmed(struct nfs4_client *clp, unsigned int strhashval) 465add_to_unconfirmed(struct nfs4_client *clp, unsigned int strhashval)
451{ 466{
452 unsigned int idhashval; 467 unsigned int idhashval;
@@ -458,7 +473,7 @@ add_to_unconfirmed(struct nfs4_client *clp, unsigned int strhashval)
458 clp->cl_time = get_seconds(); 473 clp->cl_time = get_seconds();
459} 474}
460 475
461void 476static void
462move_to_confirmed(struct nfs4_client *clp) 477move_to_confirmed(struct nfs4_client *clp)
463{ 478{
464 unsigned int idhashval = clientid_hashval(clp->cl_clientid.cl_id); 479 unsigned int idhashval = clientid_hashval(clp->cl_clientid.cl_id);
@@ -468,8 +483,7 @@ move_to_confirmed(struct nfs4_client *clp)
468 list_del_init(&clp->cl_strhash); 483 list_del_init(&clp->cl_strhash);
469 list_del_init(&clp->cl_idhash); 484 list_del_init(&clp->cl_idhash);
470 list_add(&clp->cl_idhash, &conf_id_hashtbl[idhashval]); 485 list_add(&clp->cl_idhash, &conf_id_hashtbl[idhashval]);
471 strhashval = clientstr_hashval(clp->cl_name.data, 486 strhashval = clientstr_hashval(clp->cl_recdir);
472 clp->cl_name.len);
473 list_add(&clp->cl_strhash, &conf_str_hashtbl[strhashval]); 487 list_add(&clp->cl_strhash, &conf_str_hashtbl[strhashval]);
474 renew_client(clp); 488 renew_client(clp);
475} 489}
@@ -500,6 +514,30 @@ find_unconfirmed_client(clientid_t *clid)
500 return NULL; 514 return NULL;
501} 515}
502 516
517static struct nfs4_client *
518find_confirmed_client_by_str(const char *dname, unsigned int hashval)
519{
520 struct nfs4_client *clp;
521
522 list_for_each_entry(clp, &conf_str_hashtbl[hashval], cl_strhash) {
523 if (same_name(clp->cl_recdir, dname))
524 return clp;
525 }
526 return NULL;
527}
528
529static struct nfs4_client *
530find_unconfirmed_client_by_str(const char *dname, unsigned int hashval)
531{
532 struct nfs4_client *clp;
533
534 list_for_each_entry(clp, &unconf_str_hashtbl[hashval], cl_strhash) {
535 if (same_name(clp->cl_recdir, dname))
536 return clp;
537 }
538 return NULL;
539}
540
503/* a helper function for parse_callback */ 541/* a helper function for parse_callback */
504static int 542static int
505parse_octet(unsigned int *lenp, char **addrp) 543parse_octet(unsigned int *lenp, char **addrp)
@@ -534,7 +572,7 @@ parse_octet(unsigned int *lenp, char **addrp)
534} 572}
535 573
536/* parse and set the setclientid ipv4 callback address */ 574/* parse and set the setclientid ipv4 callback address */
537int 575static int
538parse_ipv4(unsigned int addr_len, char *addr_val, unsigned int *cbaddrp, unsigned short *cbportp) 576parse_ipv4(unsigned int addr_len, char *addr_val, unsigned int *cbaddrp, unsigned short *cbportp)
539{ 577{
540 int temp = 0; 578 int temp = 0;
@@ -570,7 +608,7 @@ parse_ipv4(unsigned int addr_len, char *addr_val, unsigned int *cbaddrp, unsigne
570 return 1; 608 return 1;
571} 609}
572 610
573void 611static void
574gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se) 612gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se)
575{ 613{
576 struct nfs4_callback *cb = &clp->cl_callback; 614 struct nfs4_callback *cb = &clp->cl_callback;
@@ -584,14 +622,12 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se)
584 goto out_err; 622 goto out_err;
585 cb->cb_prog = se->se_callback_prog; 623 cb->cb_prog = se->se_callback_prog;
586 cb->cb_ident = se->se_callback_ident; 624 cb->cb_ident = se->se_callback_ident;
587 cb->cb_parsed = 1;
588 return; 625 return;
589out_err: 626out_err:
590 printk(KERN_INFO "NFSD: this client (clientid %08x/%08x) " 627 printk(KERN_INFO "NFSD: this client (clientid %08x/%08x) "
591 "will not receive delegations\n", 628 "will not receive delegations\n",
592 clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); 629 clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
593 630
594 cb->cb_parsed = 0;
595 return; 631 return;
596} 632}
597 633
@@ -638,59 +674,43 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
638 }; 674 };
639 nfs4_verifier clverifier = setclid->se_verf; 675 nfs4_verifier clverifier = setclid->se_verf;
640 unsigned int strhashval; 676 unsigned int strhashval;
641 struct nfs4_client * conf, * unconf, * new, * clp; 677 struct nfs4_client *conf, *unconf, *new;
642 int status; 678 int status;
679 char dname[HEXDIR_LEN];
643 680
644 status = nfserr_inval; 681 status = nfserr_inval;
645 if (!check_name(clname)) 682 if (!check_name(clname))
646 goto out; 683 goto out;
647 684
685 status = nfs4_make_rec_clidname(dname, &clname);
686 if (status)
687 goto out;
688
648 /* 689 /*
649 * XXX The Duplicate Request Cache (DRC) has been checked (??) 690 * XXX The Duplicate Request Cache (DRC) has been checked (??)
650 * We get here on a DRC miss. 691 * We get here on a DRC miss.
651 */ 692 */
652 693
653 strhashval = clientstr_hashval(clname.data, clname.len); 694 strhashval = clientstr_hashval(dname);
654 695
655 conf = NULL;
656 nfs4_lock_state(); 696 nfs4_lock_state();
657 list_for_each_entry(clp, &conf_str_hashtbl[strhashval], cl_strhash) { 697 conf = find_confirmed_client_by_str(dname, strhashval);
658 if (!cmp_name(&clp->cl_name, &clname)) 698 if (conf) {
659 continue;
660 /* 699 /*
661 * CASE 0: 700 * CASE 0:
662 * clname match, confirmed, different principal 701 * clname match, confirmed, different principal
663 * or different ip_address 702 * or different ip_address
664 */ 703 */
665 status = nfserr_clid_inuse; 704 status = nfserr_clid_inuse;
666 if (!cmp_creds(&clp->cl_cred,&rqstp->rq_cred)) { 705 if (!cmp_creds(&conf->cl_cred, &rqstp->rq_cred)
706 || conf->cl_addr != ip_addr) {
667 printk("NFSD: setclientid: string in use by client" 707 printk("NFSD: setclientid: string in use by client"
668 "(clientid %08x/%08x)\n", 708 "(clientid %08x/%08x)\n",
669 clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); 709 conf->cl_clientid.cl_boot, conf->cl_clientid.cl_id);
670 goto out;
671 }
672 if (clp->cl_addr != ip_addr) {
673 printk("NFSD: setclientid: string in use by client"
674 "(clientid %08x/%08x)\n",
675 clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
676 goto out; 710 goto out;
677 } 711 }
678
679 /*
680 * cl_name match from a previous SETCLIENTID operation
681 * XXX check for additional matches?
682 */
683 conf = clp;
684 break;
685 }
686 unconf = NULL;
687 list_for_each_entry(clp, &unconf_str_hashtbl[strhashval], cl_strhash) {
688 if (!cmp_name(&clp->cl_name, &clname))
689 continue;
690 /* cl_name match from a previous SETCLIENTID operation */
691 unconf = clp;
692 break;
693 } 712 }
713 unconf = find_unconfirmed_client_by_str(dname, strhashval);
694 status = nfserr_resource; 714 status = nfserr_resource;
695 if (!conf) { 715 if (!conf) {
696 /* 716 /*
@@ -699,7 +719,8 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
699 */ 719 */
700 if (unconf) 720 if (unconf)
701 expire_client(unconf); 721 expire_client(unconf);
702 if (!(new = create_client(clname))) 722 new = create_client(clname, dname);
723 if (new == NULL)
703 goto out; 724 goto out;
704 copy_verf(new, &clverifier); 725 copy_verf(new, &clverifier);
705 new->cl_addr = ip_addr; 726 new->cl_addr = ip_addr;
@@ -722,12 +743,16 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
722 * nfs4_client, but with the new callback info and a 743 * nfs4_client, but with the new callback info and a
723 * new cl_confirm 744 * new cl_confirm
724 */ 745 */
725 if ((unconf) && 746 if (unconf) {
726 cmp_verf(&unconf->cl_verifier, &conf->cl_verifier) && 747 /* Note this is removing unconfirmed {*x***},
727 cmp_clid(&unconf->cl_clientid, &conf->cl_clientid)) { 748 * which is stronger than RFC recommended {vxc**}.
728 expire_client(unconf); 749 * This has the advantage that there is at most
750 * one {*x***} in either list at any time.
751 */
752 expire_client(unconf);
729 } 753 }
730 if (!(new = create_client(clname))) 754 new = create_client(clname, dname);
755 if (new == NULL)
731 goto out; 756 goto out;
732 copy_verf(new,&conf->cl_verifier); 757 copy_verf(new,&conf->cl_verifier);
733 new->cl_addr = ip_addr; 758 new->cl_addr = ip_addr;
@@ -745,7 +770,8 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
745 * using input clverifier, clname, and callback info 770 * using input clverifier, clname, and callback info
746 * and generate a new cl_clientid and cl_confirm. 771 * and generate a new cl_clientid and cl_confirm.
747 */ 772 */
748 if (!(new = create_client(clname))) 773 new = create_client(clname, dname);
774 if (new == NULL)
749 goto out; 775 goto out;
750 copy_verf(new,&clverifier); 776 copy_verf(new,&clverifier);
751 new->cl_addr = ip_addr; 777 new->cl_addr = ip_addr;
@@ -771,7 +797,8 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
771 * new cl_verifier and a new cl_confirm 797 * new cl_verifier and a new cl_confirm
772 */ 798 */
773 expire_client(unconf); 799 expire_client(unconf);
774 if (!(new = create_client(clname))) 800 new = create_client(clname, dname);
801 if (new == NULL)
775 goto out; 802 goto out;
776 copy_verf(new,&clverifier); 803 copy_verf(new,&clverifier);
777 new->cl_addr = ip_addr; 804 new->cl_addr = ip_addr;
@@ -807,7 +834,7 @@ int
807nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confirm *setclientid_confirm) 834nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confirm *setclientid_confirm)
808{ 835{
809 u32 ip_addr = rqstp->rq_addr.sin_addr.s_addr; 836 u32 ip_addr = rqstp->rq_addr.sin_addr.s_addr;
810 struct nfs4_client *clp, *conf = NULL, *unconf = NULL; 837 struct nfs4_client *conf, *unconf;
811 nfs4_verifier confirm = setclientid_confirm->sc_confirm; 838 nfs4_verifier confirm = setclientid_confirm->sc_confirm;
812 clientid_t * clid = &setclientid_confirm->sc_clientid; 839 clientid_t * clid = &setclientid_confirm->sc_clientid;
813 int status; 840 int status;
@@ -820,102 +847,91 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
820 */ 847 */
821 848
822 nfs4_lock_state(); 849 nfs4_lock_state();
823 clp = find_confirmed_client(clid); 850
824 if (clp) { 851 conf = find_confirmed_client(clid);
825 status = nfserr_inval; 852 unconf = find_unconfirmed_client(clid);
826 /* 853
827 * Found a record for this clientid. If the IP addresses 854 status = nfserr_clid_inuse;
828 * don't match, return ERR_INVAL just as if the record had 855 if (conf && conf->cl_addr != ip_addr)
829 * not been found. 856 goto out;
830 */ 857 if (unconf && unconf->cl_addr != ip_addr)
831 if (clp->cl_addr != ip_addr) { 858 goto out;
832 printk("NFSD: setclientid: string in use by client" 859
833 "(clientid %08x/%08x)\n",
834 clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
835 goto out;
836 }
837 conf = clp;
838 }
839 clp = find_unconfirmed_client(clid);
840 if (clp) {
841 status = nfserr_inval;
842 if (clp->cl_addr != ip_addr) {
843 printk("NFSD: setclientid: string in use by client"
844 "(clientid %08x/%08x)\n",
845 clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
846 goto out;
847 }
848 unconf = clp;
849 }
850 /* CASE 1:
851 * unconf record that matches input clientid and input confirm.
852 * conf record that matches input clientid.
853 * conf and unconf records match names, verifiers
854 */
855 if ((conf && unconf) && 860 if ((conf && unconf) &&
856 (cmp_verf(&unconf->cl_confirm, &confirm)) && 861 (cmp_verf(&unconf->cl_confirm, &confirm)) &&
857 (cmp_verf(&conf->cl_verifier, &unconf->cl_verifier)) && 862 (cmp_verf(&conf->cl_verifier, &unconf->cl_verifier)) &&
858 (cmp_name(&conf->cl_name,&unconf->cl_name)) && 863 (same_name(conf->cl_recdir,unconf->cl_recdir)) &&
859 (!cmp_verf(&conf->cl_confirm, &unconf->cl_confirm))) { 864 (!cmp_verf(&conf->cl_confirm, &unconf->cl_confirm))) {
865 /* CASE 1:
866 * unconf record that matches input clientid and input confirm.
867 * conf record that matches input clientid.
868 * conf and unconf records match names, verifiers
869 */
860 if (!cmp_creds(&conf->cl_cred, &unconf->cl_cred)) 870 if (!cmp_creds(&conf->cl_cred, &unconf->cl_cred))
861 status = nfserr_clid_inuse; 871 status = nfserr_clid_inuse;
862 else { 872 else {
863 expire_client(conf); 873 /* XXX: We just turn off callbacks until we can handle
864 clp = unconf; 874 * change request correctly. */
865 move_to_confirmed(unconf); 875 atomic_set(&conf->cl_callback.cb_set, 0);
876 gen_confirm(conf);
877 nfsd4_remove_clid_dir(unconf);
878 expire_client(unconf);
866 status = nfs_ok; 879 status = nfs_ok;
880
867 } 881 }
868 goto out; 882 } else if ((conf && !unconf) ||
869 }
870 /* CASE 2:
871 * conf record that matches input clientid.
872 * if unconf record that matches input clientid, then unconf->cl_name
873 * or unconf->cl_verifier don't match the conf record.
874 */
875 if ((conf && !unconf) ||
876 ((conf && unconf) && 883 ((conf && unconf) &&
877 (!cmp_verf(&conf->cl_verifier, &unconf->cl_verifier) || 884 (!cmp_verf(&conf->cl_verifier, &unconf->cl_verifier) ||
878 !cmp_name(&conf->cl_name, &unconf->cl_name)))) { 885 !same_name(conf->cl_recdir, unconf->cl_recdir)))) {
879 if (!cmp_creds(&conf->cl_cred,&rqstp->rq_cred)) { 886 /* CASE 2:
887 * conf record that matches input clientid.
888 * if unconf record matches input clientid, then
889 * unconf->cl_name or unconf->cl_verifier don't match the
890 * conf record.
891 */
892 if (!cmp_creds(&conf->cl_cred,&rqstp->rq_cred))
880 status = nfserr_clid_inuse; 893 status = nfserr_clid_inuse;
881 } else { 894 else
882 clp = conf;
883 status = nfs_ok; 895 status = nfs_ok;
884 } 896 } else if (!conf && unconf
885 goto out; 897 && cmp_verf(&unconf->cl_confirm, &confirm)) {
886 } 898 /* CASE 3:
887 /* CASE 3: 899 * conf record not found.
888 * conf record not found. 900 * unconf record found.
889 * unconf record found. 901 * unconf->cl_confirm matches input confirm
890 * unconf->cl_confirm matches input confirm 902 */
891 */
892 if (!conf && unconf && cmp_verf(&unconf->cl_confirm, &confirm)) {
893 if (!cmp_creds(&unconf->cl_cred, &rqstp->rq_cred)) { 903 if (!cmp_creds(&unconf->cl_cred, &rqstp->rq_cred)) {
894 status = nfserr_clid_inuse; 904 status = nfserr_clid_inuse;
895 } else { 905 } else {
896 status = nfs_ok; 906 unsigned int hash =
897 clp = unconf; 907 clientstr_hashval(unconf->cl_recdir);
908 conf = find_confirmed_client_by_str(unconf->cl_recdir,
909 hash);
910 if (conf) {
911 nfsd4_remove_clid_dir(conf);
912 expire_client(conf);
913 }
898 move_to_confirmed(unconf); 914 move_to_confirmed(unconf);
915 conf = unconf;
916 status = nfs_ok;
899 } 917 }
900 goto out; 918 } else if ((!conf || (conf && !cmp_verf(&conf->cl_confirm, &confirm)))
901 } 919 && (!unconf || (unconf && !cmp_verf(&unconf->cl_confirm,
902 /* CASE 4: 920 &confirm)))) {
903 * conf record not found, or if conf, then conf->cl_confirm does not 921 /* CASE 4:
904 * match input confirm. 922 * conf record not found, or if conf, conf->cl_confirm does not
905 * unconf record not found, or if unconf, then unconf->cl_confirm 923 * match input confirm.
906 * does not match input confirm. 924 * unconf record not found, or if unconf, unconf->cl_confirm
907 */ 925 * does not match input confirm.
908 if ((!conf || (conf && !cmp_verf(&conf->cl_confirm, &confirm))) && 926 */
909 (!unconf || (unconf && !cmp_verf(&unconf->cl_confirm, &confirm)))) {
910 status = nfserr_stale_clientid; 927 status = nfserr_stale_clientid;
911 goto out; 928 } else {
929 /* check that we have hit one of the cases...*/
930 status = nfserr_clid_inuse;
912 } 931 }
913 /* check that we have hit one of the cases...*/
914 status = nfserr_inval;
915 goto out;
916out: 932out:
917 if (!status) 933 if (!status)
918 nfsd4_probe_callback(clp); 934 nfsd4_probe_callback(conf);
919 nfs4_unlock_state(); 935 nfs4_unlock_state();
920 return status; 936 return status;
921} 937}
@@ -961,60 +977,65 @@ alloc_init_file(struct inode *ino)
961 struct nfs4_file *fp; 977 struct nfs4_file *fp;
962 unsigned int hashval = file_hashval(ino); 978 unsigned int hashval = file_hashval(ino);
963 979
964 if ((fp = kmalloc(sizeof(struct nfs4_file),GFP_KERNEL))) { 980 fp = kmem_cache_alloc(file_slab, GFP_KERNEL);
981 if (fp) {
982 kref_init(&fp->fi_ref);
965 INIT_LIST_HEAD(&fp->fi_hash); 983 INIT_LIST_HEAD(&fp->fi_hash);
966 INIT_LIST_HEAD(&fp->fi_perfile); 984 INIT_LIST_HEAD(&fp->fi_stateids);
967 INIT_LIST_HEAD(&fp->fi_del_perfile); 985 INIT_LIST_HEAD(&fp->fi_delegations);
968 list_add(&fp->fi_hash, &file_hashtbl[hashval]); 986 list_add(&fp->fi_hash, &file_hashtbl[hashval]);
969 fp->fi_inode = igrab(ino); 987 fp->fi_inode = igrab(ino);
970 fp->fi_id = current_fileid++; 988 fp->fi_id = current_fileid++;
971 alloc_file++;
972 return fp; 989 return fp;
973 } 990 }
974 return NULL; 991 return NULL;
975} 992}
976 993
977static void 994static void
978release_all_files(void) 995nfsd4_free_slab(kmem_cache_t **slab)
979{ 996{
980 int i; 997 int status;
981 struct nfs4_file *fp;
982 998
983 for (i=0;i<FILE_HASH_SIZE;i++) { 999 if (*slab == NULL)
984 while (!list_empty(&file_hashtbl[i])) { 1000 return;
985 fp = list_entry(file_hashtbl[i].next, struct nfs4_file, fi_hash); 1001 status = kmem_cache_destroy(*slab);
986 /* this should never be more than once... */ 1002 *slab = NULL;
987 if (!list_empty(&fp->fi_perfile) || !list_empty(&fp->fi_del_perfile)) { 1003 WARN_ON(status);
988 printk("ERROR: release_all_files: file %p is open, creating dangling state !!!\n",fp);
989 }
990 release_file(fp);
991 }
992 }
993} 1004}
994 1005
995kmem_cache_t *stateowner_slab = NULL; 1006static void
1007nfsd4_free_slabs(void)
1008{
1009 nfsd4_free_slab(&stateowner_slab);
1010 nfsd4_free_slab(&file_slab);
1011 nfsd4_free_slab(&stateid_slab);
1012 nfsd4_free_slab(&deleg_slab);
1013}
996 1014
997static int 1015static int
998nfsd4_init_slabs(void) 1016nfsd4_init_slabs(void)
999{ 1017{
1000 stateowner_slab = kmem_cache_create("nfsd4_stateowners", 1018 stateowner_slab = kmem_cache_create("nfsd4_stateowners",
1001 sizeof(struct nfs4_stateowner), 0, 0, NULL, NULL); 1019 sizeof(struct nfs4_stateowner), 0, 0, NULL, NULL);
1002 if (stateowner_slab == NULL) { 1020 if (stateowner_slab == NULL)
1003 dprintk("nfsd4: out of memory while initializing nfsv4\n"); 1021 goto out_nomem;
1004 return -ENOMEM; 1022 file_slab = kmem_cache_create("nfsd4_files",
1005 } 1023 sizeof(struct nfs4_file), 0, 0, NULL, NULL);
1024 if (file_slab == NULL)
1025 goto out_nomem;
1026 stateid_slab = kmem_cache_create("nfsd4_stateids",
1027 sizeof(struct nfs4_stateid), 0, 0, NULL, NULL);
1028 if (stateid_slab == NULL)
1029 goto out_nomem;
1030 deleg_slab = kmem_cache_create("nfsd4_delegations",
1031 sizeof(struct nfs4_delegation), 0, 0, NULL, NULL);
1032 if (deleg_slab == NULL)
1033 goto out_nomem;
1006 return 0; 1034 return 0;
1007} 1035out_nomem:
1008 1036 nfsd4_free_slabs();
1009static void 1037 dprintk("nfsd4: out of memory while initializing nfsv4\n");
1010nfsd4_free_slabs(void) 1038 return -ENOMEM;
1011{
1012 int status = 0;
1013
1014 if (stateowner_slab)
1015 status = kmem_cache_destroy(stateowner_slab);
1016 stateowner_slab = NULL;
1017 BUG_ON(status);
1018} 1039}
1019 1040
1020void 1041void
@@ -1055,14 +1076,13 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfs4_client *clp, str
1055 INIT_LIST_HEAD(&sop->so_idhash); 1076 INIT_LIST_HEAD(&sop->so_idhash);
1056 INIT_LIST_HEAD(&sop->so_strhash); 1077 INIT_LIST_HEAD(&sop->so_strhash);
1057 INIT_LIST_HEAD(&sop->so_perclient); 1078 INIT_LIST_HEAD(&sop->so_perclient);
1058 INIT_LIST_HEAD(&sop->so_perfilestate); 1079 INIT_LIST_HEAD(&sop->so_stateids);
1059 INIT_LIST_HEAD(&sop->so_perlockowner); /* not used */ 1080 INIT_LIST_HEAD(&sop->so_perstateid); /* not used */
1060 INIT_LIST_HEAD(&sop->so_close_lru); 1081 INIT_LIST_HEAD(&sop->so_close_lru);
1061 sop->so_time = 0; 1082 sop->so_time = 0;
1062 list_add(&sop->so_idhash, &ownerid_hashtbl[idhashval]); 1083 list_add(&sop->so_idhash, &ownerid_hashtbl[idhashval]);
1063 list_add(&sop->so_strhash, &ownerstr_hashtbl[strhashval]); 1084 list_add(&sop->so_strhash, &ownerstr_hashtbl[strhashval]);
1064 list_add(&sop->so_perclient, &clp->cl_perclient); 1085 list_add(&sop->so_perclient, &clp->cl_openowners);
1065 add_perclient++;
1066 sop->so_is_open_owner = 1; 1086 sop->so_is_open_owner = 1;
1067 sop->so_id = current_ownerid++; 1087 sop->so_id = current_ownerid++;
1068 sop->so_client = clp; 1088 sop->so_client = clp;
@@ -1080,10 +1100,10 @@ release_stateid_lockowners(struct nfs4_stateid *open_stp)
1080{ 1100{
1081 struct nfs4_stateowner *lock_sop; 1101 struct nfs4_stateowner *lock_sop;
1082 1102
1083 while (!list_empty(&open_stp->st_perlockowner)) { 1103 while (!list_empty(&open_stp->st_lockowners)) {
1084 lock_sop = list_entry(open_stp->st_perlockowner.next, 1104 lock_sop = list_entry(open_stp->st_lockowners.next,
1085 struct nfs4_stateowner, so_perlockowner); 1105 struct nfs4_stateowner, so_perstateid);
1086 /* list_del(&open_stp->st_perlockowner); */ 1106 /* list_del(&open_stp->st_lockowners); */
1087 BUG_ON(lock_sop->so_is_open_owner); 1107 BUG_ON(lock_sop->so_is_open_owner);
1088 release_stateowner(lock_sop); 1108 release_stateowner(lock_sop);
1089 } 1109 }
@@ -1096,14 +1116,12 @@ unhash_stateowner(struct nfs4_stateowner *sop)
1096 1116
1097 list_del(&sop->so_idhash); 1117 list_del(&sop->so_idhash);
1098 list_del(&sop->so_strhash); 1118 list_del(&sop->so_strhash);
1099 if (sop->so_is_open_owner) { 1119 if (sop->so_is_open_owner)
1100 list_del(&sop->so_perclient); 1120 list_del(&sop->so_perclient);
1101 del_perclient++; 1121 list_del(&sop->so_perstateid);
1102 } 1122 while (!list_empty(&sop->so_stateids)) {
1103 list_del(&sop->so_perlockowner); 1123 stp = list_entry(sop->so_stateids.next,
1104 while (!list_empty(&sop->so_perfilestate)) { 1124 struct nfs4_stateid, st_perstateowner);
1105 stp = list_entry(sop->so_perfilestate.next,
1106 struct nfs4_stateid, st_perfilestate);
1107 if (sop->so_is_open_owner) 1125 if (sop->so_is_open_owner)
1108 release_stateid(stp, OPEN_STATE); 1126 release_stateid(stp, OPEN_STATE);
1109 else 1127 else
@@ -1125,14 +1143,14 @@ init_stateid(struct nfs4_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *
1125 unsigned int hashval = stateid_hashval(sop->so_id, fp->fi_id); 1143 unsigned int hashval = stateid_hashval(sop->so_id, fp->fi_id);
1126 1144
1127 INIT_LIST_HEAD(&stp->st_hash); 1145 INIT_LIST_HEAD(&stp->st_hash);
1128 INIT_LIST_HEAD(&stp->st_perfilestate); 1146 INIT_LIST_HEAD(&stp->st_perstateowner);
1129 INIT_LIST_HEAD(&stp->st_perlockowner); 1147 INIT_LIST_HEAD(&stp->st_lockowners);
1130 INIT_LIST_HEAD(&stp->st_perfile); 1148 INIT_LIST_HEAD(&stp->st_perfile);
1131 list_add(&stp->st_hash, &stateid_hashtbl[hashval]); 1149 list_add(&stp->st_hash, &stateid_hashtbl[hashval]);
1132 list_add(&stp->st_perfilestate, &sop->so_perfilestate); 1150 list_add(&stp->st_perstateowner, &sop->so_stateids);
1133 list_add_perfile++; 1151 list_add(&stp->st_perfile, &fp->fi_stateids);
1134 list_add(&stp->st_perfile, &fp->fi_perfile);
1135 stp->st_stateowner = sop; 1152 stp->st_stateowner = sop;
1153 get_nfs4_file(fp);
1136 stp->st_file = fp; 1154 stp->st_file = fp;
1137 stp->st_stateid.si_boot = boot_time; 1155 stp->st_stateid.si_boot = boot_time;
1138 stp->st_stateid.si_stateownerid = sop->so_id; 1156 stp->st_stateid.si_stateownerid = sop->so_id;
@@ -1142,6 +1160,7 @@ init_stateid(struct nfs4_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *
1142 stp->st_deny_bmap = 0; 1160 stp->st_deny_bmap = 0;
1143 __set_bit(open->op_share_access, &stp->st_access_bmap); 1161 __set_bit(open->op_share_access, &stp->st_access_bmap);
1144 __set_bit(open->op_share_deny, &stp->st_deny_bmap); 1162 __set_bit(open->op_share_deny, &stp->st_deny_bmap);
1163 stp->st_openstp = NULL;
1145} 1164}
1146 1165
1147static void 1166static void
@@ -1150,30 +1169,20 @@ release_stateid(struct nfs4_stateid *stp, int flags)
1150 struct file *filp = stp->st_vfs_file; 1169 struct file *filp = stp->st_vfs_file;
1151 1170
1152 list_del(&stp->st_hash); 1171 list_del(&stp->st_hash);
1153 list_del_perfile++;
1154 list_del(&stp->st_perfile); 1172 list_del(&stp->st_perfile);
1155 list_del(&stp->st_perfilestate); 1173 list_del(&stp->st_perstateowner);
1156 if (flags & OPEN_STATE) { 1174 if (flags & OPEN_STATE) {
1157 release_stateid_lockowners(stp); 1175 release_stateid_lockowners(stp);
1158 stp->st_vfs_file = NULL; 1176 stp->st_vfs_file = NULL;
1159 nfsd_close(filp); 1177 nfsd_close(filp);
1160 vfsclose++;
1161 } else if (flags & LOCK_STATE) 1178 } else if (flags & LOCK_STATE)
1162 locks_remove_posix(filp, (fl_owner_t) stp->st_stateowner); 1179 locks_remove_posix(filp, (fl_owner_t) stp->st_stateowner);
1163 kfree(stp); 1180 put_nfs4_file(stp->st_file);
1181 kmem_cache_free(stateid_slab, stp);
1164 stp = NULL; 1182 stp = NULL;
1165} 1183}
1166 1184
1167static void 1185static void
1168release_file(struct nfs4_file *fp)
1169{
1170 free_file++;
1171 list_del(&fp->fi_hash);
1172 iput(fp->fi_inode);
1173 kfree(fp);
1174}
1175
1176void
1177move_to_close_lru(struct nfs4_stateowner *sop) 1186move_to_close_lru(struct nfs4_stateowner *sop)
1178{ 1187{
1179 dprintk("NFSD: move_to_close_lru nfs4_stateowner %p\n", sop); 1188 dprintk("NFSD: move_to_close_lru nfs4_stateowner %p\n", sop);
@@ -1183,11 +1192,10 @@ move_to_close_lru(struct nfs4_stateowner *sop)
1183 sop->so_time = get_seconds(); 1192 sop->so_time = get_seconds();
1184} 1193}
1185 1194
1186void 1195static void
1187release_state_owner(struct nfs4_stateid *stp, int flag) 1196release_state_owner(struct nfs4_stateid *stp, int flag)
1188{ 1197{
1189 struct nfs4_stateowner *sop = stp->st_stateowner; 1198 struct nfs4_stateowner *sop = stp->st_stateowner;
1190 struct nfs4_file *fp = stp->st_file;
1191 1199
1192 dprintk("NFSD: release_state_owner\n"); 1200 dprintk("NFSD: release_state_owner\n");
1193 release_stateid(stp, flag); 1201 release_stateid(stp, flag);
@@ -1196,12 +1204,8 @@ release_state_owner(struct nfs4_stateid *stp, int flag)
1196 * released by the laundromat service after the lease period 1204 * released by the laundromat service after the lease period
1197 * to enable us to handle CLOSE replay 1205 * to enable us to handle CLOSE replay
1198 */ 1206 */
1199 if (sop->so_confirmed && list_empty(&sop->so_perfilestate)) 1207 if (sop->so_confirmed && list_empty(&sop->so_stateids))
1200 move_to_close_lru(sop); 1208 move_to_close_lru(sop);
1201 /* unused nfs4_file's are releseed. XXX slab cache? */
1202 if (list_empty(&fp->fi_perfile) && list_empty(&fp->fi_del_perfile)) {
1203 release_file(fp);
1204 }
1205} 1209}
1206 1210
1207static int 1211static int
@@ -1231,8 +1235,10 @@ find_file(struct inode *ino)
1231 struct nfs4_file *fp; 1235 struct nfs4_file *fp;
1232 1236
1233 list_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) { 1237 list_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) {
1234 if (fp->fi_inode == ino) 1238 if (fp->fi_inode == ino) {
1239 get_nfs4_file(fp);
1235 return fp; 1240 return fp;
1241 }
1236 } 1242 }
1237 return NULL; 1243 return NULL;
1238} 1244}
@@ -1240,7 +1246,7 @@ find_file(struct inode *ino)
1240#define TEST_ACCESS(x) ((x > 0 || x < 4)?1:0) 1246#define TEST_ACCESS(x) ((x > 0 || x < 4)?1:0)
1241#define TEST_DENY(x) ((x >= 0 || x < 5)?1:0) 1247#define TEST_DENY(x) ((x >= 0 || x < 5)?1:0)
1242 1248
1243void 1249static void
1244set_access(unsigned int *access, unsigned long bmap) { 1250set_access(unsigned int *access, unsigned long bmap) {
1245 int i; 1251 int i;
1246 1252
@@ -1251,7 +1257,7 @@ set_access(unsigned int *access, unsigned long bmap) {
1251 } 1257 }
1252} 1258}
1253 1259
1254void 1260static void
1255set_deny(unsigned int *deny, unsigned long bmap) { 1261set_deny(unsigned int *deny, unsigned long bmap) {
1256 int i; 1262 int i;
1257 1263
@@ -1277,25 +1283,30 @@ test_share(struct nfs4_stateid *stp, struct nfsd4_open *open) {
1277 * Called to check deny when READ with all zero stateid or 1283 * Called to check deny when READ with all zero stateid or
1278 * WRITE with all zero or all one stateid 1284 * WRITE with all zero or all one stateid
1279 */ 1285 */
1280int 1286static int
1281nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) 1287nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
1282{ 1288{
1283 struct inode *ino = current_fh->fh_dentry->d_inode; 1289 struct inode *ino = current_fh->fh_dentry->d_inode;
1284 struct nfs4_file *fp; 1290 struct nfs4_file *fp;
1285 struct nfs4_stateid *stp; 1291 struct nfs4_stateid *stp;
1292 int ret;
1286 1293
1287 dprintk("NFSD: nfs4_share_conflict\n"); 1294 dprintk("NFSD: nfs4_share_conflict\n");
1288 1295
1289 fp = find_file(ino); 1296 fp = find_file(ino);
1290 if (fp) { 1297 if (!fp)
1298 return nfs_ok;
1299 ret = nfserr_locked;
1291 /* Search for conflicting share reservations */ 1300 /* Search for conflicting share reservations */
1292 list_for_each_entry(stp, &fp->fi_perfile, st_perfile) { 1301 list_for_each_entry(stp, &fp->fi_stateids, st_perfile) {
1293 if (test_bit(deny_type, &stp->st_deny_bmap) || 1302 if (test_bit(deny_type, &stp->st_deny_bmap) ||
1294 test_bit(NFS4_SHARE_DENY_BOTH, &stp->st_deny_bmap)) 1303 test_bit(NFS4_SHARE_DENY_BOTH, &stp->st_deny_bmap))
1295 return nfserr_share_denied; 1304 goto out;
1296 }
1297 } 1305 }
1298 return nfs_ok; 1306 ret = nfs_ok;
1307out:
1308 put_nfs4_file(fp);
1309 return ret;
1299} 1310}
1300 1311
1301static inline void 1312static inline void
@@ -1427,7 +1438,7 @@ int nfsd_change_deleg_cb(struct file_lock **onlist, int arg)
1427 return -EAGAIN; 1438 return -EAGAIN;
1428} 1439}
1429 1440
1430struct lock_manager_operations nfsd_lease_mng_ops = { 1441static struct lock_manager_operations nfsd_lease_mng_ops = {
1431 .fl_break = nfsd_break_deleg_cb, 1442 .fl_break = nfsd_break_deleg_cb,
1432 .fl_release_private = nfsd_release_deleg_cb, 1443 .fl_release_private = nfsd_release_deleg_cb,
1433 .fl_copy_lock = nfsd_copy_lock_deleg_cb, 1444 .fl_copy_lock = nfsd_copy_lock_deleg_cb,
@@ -1473,7 +1484,7 @@ nfsd4_process_open1(struct nfsd4_open *open)
1473 if (sop) { 1484 if (sop) {
1474 open->op_stateowner = sop; 1485 open->op_stateowner = sop;
1475 /* check for replay */ 1486 /* check for replay */
1476 if (open->op_seqid == sop->so_seqid){ 1487 if (open->op_seqid == sop->so_seqid - 1){
1477 if (sop->so_replay.rp_buflen) 1488 if (sop->so_replay.rp_buflen)
1478 return NFSERR_REPLAY_ME; 1489 return NFSERR_REPLAY_ME;
1479 else { 1490 else {
@@ -1488,7 +1499,7 @@ nfsd4_process_open1(struct nfsd4_open *open)
1488 goto renew; 1499 goto renew;
1489 } 1500 }
1490 } else if (sop->so_confirmed) { 1501 } else if (sop->so_confirmed) {
1491 if (open->op_seqid == sop->so_seqid + 1) 1502 if (open->op_seqid == sop->so_seqid)
1492 goto renew; 1503 goto renew;
1493 status = nfserr_bad_seqid; 1504 status = nfserr_bad_seqid;
1494 goto out; 1505 goto out;
@@ -1521,11 +1532,54 @@ renew:
1521 status = nfs_ok; 1532 status = nfs_ok;
1522 renew_client(sop->so_client); 1533 renew_client(sop->so_client);
1523out: 1534out:
1524 if (status && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS)
1525 status = nfserr_reclaim_bad;
1526 return status; 1535 return status;
1527} 1536}
1528 1537
1538static inline int
1539nfs4_check_delegmode(struct nfs4_delegation *dp, int flags)
1540{
1541 if ((flags & WR_STATE) && (dp->dl_type == NFS4_OPEN_DELEGATE_READ))
1542 return nfserr_openmode;
1543 else
1544 return nfs_ok;
1545}
1546
1547static struct nfs4_delegation *
1548find_delegation_file(struct nfs4_file *fp, stateid_t *stid)
1549{
1550 struct nfs4_delegation *dp;
1551
1552 list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) {
1553 if (dp->dl_stateid.si_stateownerid == stid->si_stateownerid)
1554 return dp;
1555 }
1556 return NULL;
1557}
1558
1559static int
1560nfs4_check_deleg(struct nfs4_file *fp, struct nfsd4_open *open,
1561 struct nfs4_delegation **dp)
1562{
1563 int flags;
1564 int status = nfserr_bad_stateid;
1565
1566 *dp = find_delegation_file(fp, &open->op_delegate_stateid);
1567 if (*dp == NULL)
1568 goto out;
1569 flags = open->op_share_access == NFS4_SHARE_ACCESS_READ ?
1570 RD_STATE : WR_STATE;
1571 status = nfs4_check_delegmode(*dp, flags);
1572 if (status)
1573 *dp = NULL;
1574out:
1575 if (open->op_claim_type != NFS4_OPEN_CLAIM_DELEGATE_CUR)
1576 return nfs_ok;
1577 if (status)
1578 return status;
1579 open->op_stateowner->so_confirmed = 1;
1580 return nfs_ok;
1581}
1582
1529static int 1583static int
1530nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_stateid **stpp) 1584nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_stateid **stpp)
1531{ 1585{
@@ -1533,7 +1587,7 @@ nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_state
1533 int status = nfserr_share_denied; 1587 int status = nfserr_share_denied;
1534 struct nfs4_stateowner *sop = open->op_stateowner; 1588 struct nfs4_stateowner *sop = open->op_stateowner;
1535 1589
1536 list_for_each_entry(local, &fp->fi_perfile, st_perfile) { 1590 list_for_each_entry(local, &fp->fi_stateids, st_perfile) {
1537 /* ignore lock owners */ 1591 /* ignore lock owners */
1538 if (local->st_stateowner->so_is_open_owner == 0) 1592 if (local->st_stateowner->so_is_open_owner == 0)
1539 continue; 1593 continue;
@@ -1549,25 +1603,37 @@ out:
1549 return status; 1603 return status;
1550} 1604}
1551 1605
1606static inline struct nfs4_stateid *
1607nfs4_alloc_stateid(void)
1608{
1609 return kmem_cache_alloc(stateid_slab, GFP_KERNEL);
1610}
1611
1552static int 1612static int
1553nfs4_new_open(struct svc_rqst *rqstp, struct nfs4_stateid **stpp, 1613nfs4_new_open(struct svc_rqst *rqstp, struct nfs4_stateid **stpp,
1614 struct nfs4_delegation *dp,
1554 struct svc_fh *cur_fh, int flags) 1615 struct svc_fh *cur_fh, int flags)
1555{ 1616{
1556 struct nfs4_stateid *stp; 1617 struct nfs4_stateid *stp;
1557 int status;
1558 1618
1559 stp = kmalloc(sizeof(struct nfs4_stateid), GFP_KERNEL); 1619 stp = nfs4_alloc_stateid();
1560 if (stp == NULL) 1620 if (stp == NULL)
1561 return nfserr_resource; 1621 return nfserr_resource;
1562 1622
1563 status = nfsd_open(rqstp, cur_fh, S_IFREG, flags, &stp->st_vfs_file); 1623 if (dp) {
1564 if (status) { 1624 get_file(dp->dl_vfs_file);
1565 if (status == nfserr_dropit) 1625 stp->st_vfs_file = dp->dl_vfs_file;
1566 status = nfserr_jukebox; 1626 } else {
1567 kfree(stp); 1627 int status;
1568 return status; 1628 status = nfsd_open(rqstp, cur_fh, S_IFREG, flags,
1629 &stp->st_vfs_file);
1630 if (status) {
1631 if (status == nfserr_dropit)
1632 status = nfserr_jukebox;
1633 kmem_cache_free(stateid_slab, stp);
1634 return status;
1635 }
1569 } 1636 }
1570 vfsopen++;
1571 *stpp = stp; 1637 *stpp = stp;
1572 return 0; 1638 return 0;
1573} 1639}
@@ -1619,18 +1685,11 @@ nfs4_upgrade_open(struct svc_rqst *rqstp, struct svc_fh *cur_fh, struct nfs4_sta
1619} 1685}
1620 1686
1621 1687
1622/* decrement seqid on successful reclaim, it will be bumped in encode_open */
1623static void 1688static void
1624nfs4_set_claim_prev(struct nfsd4_open *open, int *status) 1689nfs4_set_claim_prev(struct nfsd4_open *open)
1625{ 1690{
1626 if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) { 1691 open->op_stateowner->so_confirmed = 1;
1627 if (*status) 1692 open->op_stateowner->so_client->cl_firststate = 1;
1628 *status = nfserr_reclaim_bad;
1629 else {
1630 open->op_stateowner->so_confirmed = 1;
1631 open->op_stateowner->so_seqid--;
1632 }
1633 }
1634} 1693}
1635 1694
1636/* 1695/*
@@ -1646,14 +1705,30 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
1646 int status, flag = 0; 1705 int status, flag = 0;
1647 1706
1648 flag = NFS4_OPEN_DELEGATE_NONE; 1707 flag = NFS4_OPEN_DELEGATE_NONE;
1649 if (open->op_claim_type != NFS4_OPEN_CLAIM_NULL 1708 open->op_recall = 0;
1650 || !atomic_read(&cb->cb_set) || !sop->so_confirmed) 1709 switch (open->op_claim_type) {
1651 goto out; 1710 case NFS4_OPEN_CLAIM_PREVIOUS:
1652 1711 if (!atomic_read(&cb->cb_set))
1653 if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) 1712 open->op_recall = 1;
1654 flag = NFS4_OPEN_DELEGATE_WRITE; 1713 flag = open->op_delegate_type;
1655 else 1714 if (flag == NFS4_OPEN_DELEGATE_NONE)
1656 flag = NFS4_OPEN_DELEGATE_READ; 1715 goto out;
1716 break;
1717 case NFS4_OPEN_CLAIM_NULL:
1718 /* Let's not give out any delegations till everyone's
1719 * had the chance to reclaim theirs.... */
1720 if (nfs4_in_grace())
1721 goto out;
1722 if (!atomic_read(&cb->cb_set) || !sop->so_confirmed)
1723 goto out;
1724 if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
1725 flag = NFS4_OPEN_DELEGATE_WRITE;
1726 else
1727 flag = NFS4_OPEN_DELEGATE_READ;
1728 break;
1729 default:
1730 goto out;
1731 }
1657 1732
1658 dp = alloc_init_deleg(sop->so_client, stp, fh, flag); 1733 dp = alloc_init_deleg(sop->so_client, stp, fh, flag);
1659 if (dp == NULL) { 1734 if (dp == NULL) {
@@ -1687,6 +1762,10 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
1687 dp->dl_stateid.si_fileid, 1762 dp->dl_stateid.si_fileid,
1688 dp->dl_stateid.si_generation); 1763 dp->dl_stateid.si_generation);
1689out: 1764out:
1765 if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS
1766 && flag == NFS4_OPEN_DELEGATE_NONE
1767 && open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE)
1768 printk("NFSD: WARNING: refusing delegation reclaim\n");
1690 open->op_delegate_type = flag; 1769 open->op_delegate_type = flag;
1691} 1770}
1692 1771
@@ -1699,8 +1778,15 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
1699 struct nfs4_file *fp = NULL; 1778 struct nfs4_file *fp = NULL;
1700 struct inode *ino = current_fh->fh_dentry->d_inode; 1779 struct inode *ino = current_fh->fh_dentry->d_inode;
1701 struct nfs4_stateid *stp = NULL; 1780 struct nfs4_stateid *stp = NULL;
1781 struct nfs4_delegation *dp = NULL;
1702 int status; 1782 int status;
1703 1783
1784 if (nfs4_in_grace() && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS)
1785 return nfserr_grace;
1786
1787 if (!nfs4_in_grace() && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS)
1788 return nfserr_no_grace;
1789
1704 status = nfserr_inval; 1790 status = nfserr_inval;
1705 if (!TEST_ACCESS(open->op_share_access) || !TEST_DENY(open->op_share_deny)) 1791 if (!TEST_ACCESS(open->op_share_access) || !TEST_DENY(open->op_share_deny))
1706 goto out; 1792 goto out;
@@ -1713,7 +1799,13 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
1713 if (fp) { 1799 if (fp) {
1714 if ((status = nfs4_check_open(fp, open, &stp))) 1800 if ((status = nfs4_check_open(fp, open, &stp)))
1715 goto out; 1801 goto out;
1802 status = nfs4_check_deleg(fp, open, &dp);
1803 if (status)
1804 goto out;
1716 } else { 1805 } else {
1806 status = nfserr_bad_stateid;
1807 if (open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR)
1808 goto out;
1717 status = nfserr_resource; 1809 status = nfserr_resource;
1718 fp = alloc_init_file(ino); 1810 fp = alloc_init_file(ino);
1719 if (fp == NULL) 1811 if (fp == NULL)
@@ -1729,6 +1821,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
1729 status = nfs4_upgrade_open(rqstp, current_fh, stp, open); 1821 status = nfs4_upgrade_open(rqstp, current_fh, stp, open);
1730 if (status) 1822 if (status)
1731 goto out; 1823 goto out;
1824 update_stateid(&stp->st_stateid);
1732 } else { 1825 } else {
1733 /* Stateid was not found, this is a new OPEN */ 1826 /* Stateid was not found, this is a new OPEN */
1734 int flags = 0; 1827 int flags = 0;
@@ -1736,7 +1829,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
1736 flags = MAY_WRITE; 1829 flags = MAY_WRITE;
1737 else 1830 else
1738 flags = MAY_READ; 1831 flags = MAY_READ;
1739 if ((status = nfs4_new_open(rqstp, &stp, current_fh, flags))) 1832 status = nfs4_new_open(rqstp, &stp, dp, current_fh, flags);
1833 if (status)
1740 goto out; 1834 goto out;
1741 init_stateid(stp, fp, open); 1835 init_stateid(stp, fp, open);
1742 status = nfsd4_truncate(rqstp, current_fh, open); 1836 status = nfsd4_truncate(rqstp, current_fh, open);
@@ -1759,12 +1853,10 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
1759 stp->st_stateid.si_boot, stp->st_stateid.si_stateownerid, 1853 stp->st_stateid.si_boot, stp->st_stateid.si_stateownerid,
1760 stp->st_stateid.si_fileid, stp->st_stateid.si_generation); 1854 stp->st_stateid.si_fileid, stp->st_stateid.si_generation);
1761out: 1855out:
1762 /* take the opportunity to clean up unused state */ 1856 if (fp)
1763 if (fp && list_empty(&fp->fi_perfile) && list_empty(&fp->fi_del_perfile)) 1857 put_nfs4_file(fp);
1764 release_file(fp); 1858 if (status == 0 && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS)
1765 1859 nfs4_set_claim_prev(open);
1766 /* CLAIM_PREVIOUS has different error returns */
1767 nfs4_set_claim_prev(open, &status);
1768 /* 1860 /*
1769 * To finish the open response, we just need to set the rflags. 1861 * To finish the open response, we just need to set the rflags.
1770 */ 1862 */
@@ -1775,6 +1867,7 @@ out:
1775 return status; 1867 return status;
1776} 1868}
1777 1869
1870static struct workqueue_struct *laundry_wq;
1778static struct work_struct laundromat_work; 1871static struct work_struct laundromat_work;
1779static void laundromat_main(void *); 1872static void laundromat_main(void *);
1780static DECLARE_WORK(laundromat_work, laundromat_main, NULL); 1873static DECLARE_WORK(laundromat_work, laundromat_main, NULL);
@@ -1800,7 +1893,7 @@ nfsd4_renew(clientid_t *clid)
1800 } 1893 }
1801 renew_client(clp); 1894 renew_client(clp);
1802 status = nfserr_cb_path_down; 1895 status = nfserr_cb_path_down;
1803 if (!list_empty(&clp->cl_del_perclnt) 1896 if (!list_empty(&clp->cl_delegations)
1804 && !atomic_read(&clp->cl_callback.cb_set)) 1897 && !atomic_read(&clp->cl_callback.cb_set))
1805 goto out; 1898 goto out;
1806 status = nfs_ok; 1899 status = nfs_ok;
@@ -1809,7 +1902,15 @@ out:
1809 return status; 1902 return status;
1810} 1903}
1811 1904
1812time_t 1905static void
1906end_grace(void)
1907{
1908 dprintk("NFSD: end of grace period\n");
1909 nfsd4_recdir_purge_old();
1910 in_grace = 0;
1911}
1912
1913static time_t
1813nfs4_laundromat(void) 1914nfs4_laundromat(void)
1814{ 1915{
1815 struct nfs4_client *clp; 1916 struct nfs4_client *clp;
@@ -1823,6 +1924,8 @@ nfs4_laundromat(void)
1823 nfs4_lock_state(); 1924 nfs4_lock_state();
1824 1925
1825 dprintk("NFSD: laundromat service - starting\n"); 1926 dprintk("NFSD: laundromat service - starting\n");
1927 if (in_grace)
1928 end_grace();
1826 list_for_each_safe(pos, next, &client_lru) { 1929 list_for_each_safe(pos, next, &client_lru) {
1827 clp = list_entry(pos, struct nfs4_client, cl_lru); 1930 clp = list_entry(pos, struct nfs4_client, cl_lru);
1828 if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) { 1931 if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) {
@@ -1833,6 +1936,7 @@ nfs4_laundromat(void)
1833 } 1936 }
1834 dprintk("NFSD: purging unused client (clientid %08x)\n", 1937 dprintk("NFSD: purging unused client (clientid %08x)\n",
1835 clp->cl_clientid.cl_id); 1938 clp->cl_clientid.cl_id);
1939 nfsd4_remove_clid_dir(clp);
1836 expire_client(clp); 1940 expire_client(clp);
1837 } 1941 }
1838 INIT_LIST_HEAD(&reaplist); 1942 INIT_LIST_HEAD(&reaplist);
@@ -1882,17 +1986,14 @@ laundromat_main(void *not_used)
1882 1986
1883 t = nfs4_laundromat(); 1987 t = nfs4_laundromat();
1884 dprintk("NFSD: laundromat_main - sleeping for %ld seconds\n", t); 1988 dprintk("NFSD: laundromat_main - sleeping for %ld seconds\n", t);
1885 schedule_delayed_work(&laundromat_work, t*HZ); 1989 queue_delayed_work(laundry_wq, &laundromat_work, t*HZ);
1886} 1990}
1887 1991
1888/* search ownerid_hashtbl[] and close_lru for stateid owner 1992static struct nfs4_stateowner *
1889 * (stateid->si_stateownerid) 1993search_close_lru(u32 st_id, int flags)
1890 */ 1994{
1891struct nfs4_stateowner *
1892find_openstateowner_id(u32 st_id, int flags) {
1893 struct nfs4_stateowner *local = NULL; 1995 struct nfs4_stateowner *local = NULL;
1894 1996
1895 dprintk("NFSD: find_openstateowner_id %d\n", st_id);
1896 if (flags & CLOSE_STATE) { 1997 if (flags & CLOSE_STATE) {
1897 list_for_each_entry(local, &close_lru, so_close_lru) { 1998 list_for_each_entry(local, &close_lru, so_close_lru) {
1898 if (local->so_id == st_id) 1999 if (local->so_id == st_id)
@@ -1949,15 +2050,6 @@ out:
1949} 2050}
1950 2051
1951static inline int 2052static inline int
1952nfs4_check_delegmode(struct nfs4_delegation *dp, int flags)
1953{
1954 if ((flags & WR_STATE) && (dp->dl_type == NFS4_OPEN_DELEGATE_READ))
1955 return nfserr_openmode;
1956 else
1957 return nfs_ok;
1958}
1959
1960static inline int
1961check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags) 2053check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags)
1962{ 2054{
1963 /* Trying to call delegreturn with a special stateid? Yuch: */ 2055 /* Trying to call delegreturn with a special stateid? Yuch: */
@@ -2067,14 +2159,19 @@ out:
2067 return status; 2159 return status;
2068} 2160}
2069 2161
2162static inline int
2163setlkflg (int type)
2164{
2165 return (type == NFS4_READW_LT || type == NFS4_READ_LT) ?
2166 RD_STATE : WR_STATE;
2167}
2070 2168
2071/* 2169/*
2072 * Checks for sequence id mutating operations. 2170 * Checks for sequence id mutating operations.
2073 */ 2171 */
2074int 2172static int
2075nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *stateid, int flags, struct nfs4_stateowner **sopp, struct nfs4_stateid **stpp, clientid_t *lockclid) 2173nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *stateid, int flags, struct nfs4_stateowner **sopp, struct nfs4_stateid **stpp, struct nfsd4_lock *lock)
2076{ 2174{
2077 int status;
2078 struct nfs4_stateid *stp; 2175 struct nfs4_stateid *stp;
2079 struct nfs4_stateowner *sop; 2176 struct nfs4_stateowner *sop;
2080 2177
@@ -2082,53 +2179,65 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei
2082 "stateid = (%08x/%08x/%08x/%08x)\n", seqid, 2179 "stateid = (%08x/%08x/%08x/%08x)\n", seqid,
2083 stateid->si_boot, stateid->si_stateownerid, stateid->si_fileid, 2180 stateid->si_boot, stateid->si_stateownerid, stateid->si_fileid,
2084 stateid->si_generation); 2181 stateid->si_generation);
2085 2182
2086 *stpp = NULL; 2183 *stpp = NULL;
2087 *sopp = NULL; 2184 *sopp = NULL;
2088 2185
2089 status = nfserr_bad_stateid;
2090 if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) { 2186 if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) {
2091 printk("NFSD: preprocess_seqid_op: magic stateid!\n"); 2187 printk("NFSD: preprocess_seqid_op: magic stateid!\n");
2092 goto out; 2188 return nfserr_bad_stateid;
2093 } 2189 }
2094 2190
2095 status = nfserr_stale_stateid;
2096 if (STALE_STATEID(stateid)) 2191 if (STALE_STATEID(stateid))
2097 goto out; 2192 return nfserr_stale_stateid;
2098 /* 2193 /*
2099 * We return BAD_STATEID if filehandle doesn't match stateid, 2194 * We return BAD_STATEID if filehandle doesn't match stateid,
2100 * the confirmed flag is incorrecly set, or the generation 2195 * the confirmed flag is incorrecly set, or the generation
2101 * number is incorrect. 2196 * number is incorrect.
2102 * If there is no entry in the openfile table for this id,
2103 * we can't always return BAD_STATEID;
2104 * this might be a retransmitted CLOSE which has arrived after
2105 * the openfile has been released.
2106 */ 2197 */
2107 if (!(stp = find_stateid(stateid, flags))) 2198 stp = find_stateid(stateid, flags);
2108 goto no_nfs4_stateid; 2199 if (stp == NULL) {
2109 2200 /*
2110 status = nfserr_bad_stateid; 2201 * Also, we should make sure this isn't just the result of
2202 * a replayed close:
2203 */
2204 sop = search_close_lru(stateid->si_stateownerid, flags);
2205 if (sop == NULL)
2206 return nfserr_bad_stateid;
2207 *sopp = sop;
2208 goto check_replay;
2209 }
2111 2210
2112 /* for new lock stateowners: 2211 if (lock) {
2113 * check that the lock->v.new.open_stateid
2114 * refers to an open stateowner
2115 *
2116 * check that the lockclid (nfs4_lock->v.new.clientid) is the same
2117 * as the open_stateid->st_stateowner->so_client->clientid
2118 */
2119 if (lockclid) {
2120 struct nfs4_stateowner *sop = stp->st_stateowner; 2212 struct nfs4_stateowner *sop = stp->st_stateowner;
2213 clientid_t *lockclid = &lock->v.new.clientid;
2121 struct nfs4_client *clp = sop->so_client; 2214 struct nfs4_client *clp = sop->so_client;
2215 int lkflg = 0;
2216 int status;
2217
2218 lkflg = setlkflg(lock->lk_type);
2219
2220 if (lock->lk_is_new) {
2221 if (!sop->so_is_open_owner)
2222 return nfserr_bad_stateid;
2223 if (!cmp_clid(&clp->cl_clientid, lockclid))
2224 return nfserr_bad_stateid;
2225 /* stp is the open stateid */
2226 status = nfs4_check_openmode(stp, lkflg);
2227 if (status)
2228 return status;
2229 } else {
2230 /* stp is the lock stateid */
2231 status = nfs4_check_openmode(stp->st_openstp, lkflg);
2232 if (status)
2233 return status;
2234 }
2122 2235
2123 if (!sop->so_is_open_owner)
2124 goto out;
2125 if (!cmp_clid(&clp->cl_clientid, lockclid))
2126 goto out;
2127 } 2236 }
2128 2237
2129 if ((flags & CHECK_FH) && nfs4_check_fh(current_fh, stp)) { 2238 if ((flags & CHECK_FH) && nfs4_check_fh(current_fh, stp)) {
2130 printk("NFSD: preprocess_seqid_op: fh-stateid mismatch!\n"); 2239 printk("NFSD: preprocess_seqid_op: fh-stateid mismatch!\n");
2131 goto out; 2240 return nfserr_bad_stateid;
2132 } 2241 }
2133 2242
2134 *stpp = stp; 2243 *stpp = stp;
@@ -2139,63 +2248,41 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei
2139 * For the moment, we ignore the possibility of 2248 * For the moment, we ignore the possibility of
2140 * generation number wraparound. 2249 * generation number wraparound.
2141 */ 2250 */
2142 if (seqid != sop->so_seqid + 1) 2251 if (seqid != sop->so_seqid)
2143 goto check_replay; 2252 goto check_replay;
2144 2253
2145 if (sop->so_confirmed) { 2254 if (sop->so_confirmed && flags & CONFIRM) {
2146 if (flags & CONFIRM) { 2255 printk("NFSD: preprocess_seqid_op: expected"
2147 printk("NFSD: preprocess_seqid_op: expected unconfirmed stateowner!\n"); 2256 " unconfirmed stateowner!\n");
2148 goto out; 2257 return nfserr_bad_stateid;
2149 }
2150 } 2258 }
2151 else { 2259 if (!sop->so_confirmed && !(flags & CONFIRM)) {
2152 if (!(flags & CONFIRM)) { 2260 printk("NFSD: preprocess_seqid_op: stateowner not"
2153 printk("NFSD: preprocess_seqid_op: stateowner not confirmed yet!\n"); 2261 " confirmed yet!\n");
2154 goto out; 2262 return nfserr_bad_stateid;
2155 }
2156 } 2263 }
2157 if (stateid->si_generation > stp->st_stateid.si_generation) { 2264 if (stateid->si_generation > stp->st_stateid.si_generation) {
2158 printk("NFSD: preprocess_seqid_op: future stateid?!\n"); 2265 printk("NFSD: preprocess_seqid_op: future stateid?!\n");
2159 goto out; 2266 return nfserr_bad_stateid;
2160 } 2267 }
2161 2268
2162 status = nfserr_old_stateid;
2163 if (stateid->si_generation < stp->st_stateid.si_generation) { 2269 if (stateid->si_generation < stp->st_stateid.si_generation) {
2164 printk("NFSD: preprocess_seqid_op: old stateid!\n"); 2270 printk("NFSD: preprocess_seqid_op: old stateid!\n");
2165 goto out; 2271 return nfserr_old_stateid;
2166 }
2167 /* XXX renew the client lease here */
2168 status = nfs_ok;
2169
2170out:
2171 return status;
2172
2173no_nfs4_stateid:
2174
2175 /*
2176 * We determine whether this is a bad stateid or a replay,
2177 * starting by trying to look up the stateowner.
2178 * If stateowner is not found - stateid is bad.
2179 */
2180 if (!(sop = find_openstateowner_id(stateid->si_stateownerid, flags))) {
2181 printk("NFSD: preprocess_seqid_op: no stateowner or nfs4_stateid!\n");
2182 status = nfserr_bad_stateid;
2183 goto out;
2184 } 2272 }
2185 *sopp = sop; 2273 renew_client(sop->so_client);
2274 return nfs_ok;
2186 2275
2187check_replay: 2276check_replay:
2188 if (seqid == sop->so_seqid) { 2277 if (seqid == sop->so_seqid - 1) {
2189 printk("NFSD: preprocess_seqid_op: retransmission?\n"); 2278 printk("NFSD: preprocess_seqid_op: retransmission?\n");
2190 /* indicate replay to calling function */ 2279 /* indicate replay to calling function */
2191 status = NFSERR_REPLAY_ME; 2280 return NFSERR_REPLAY_ME;
2192 } else {
2193 printk("NFSD: preprocess_seqid_op: bad seqid (expected %d, got %d\n", sop->so_seqid +1, seqid);
2194
2195 *sopp = NULL;
2196 status = nfserr_bad_seqid;
2197 } 2281 }
2198 goto out; 2282 printk("NFSD: preprocess_seqid_op: bad seqid (expected %d, got %d)\n",
2283 sop->so_seqid, seqid);
2284 *sopp = NULL;
2285 return nfserr_bad_seqid;
2199} 2286}
2200 2287
2201int 2288int
@@ -2230,6 +2317,8 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfs
2230 stp->st_stateid.si_stateownerid, 2317 stp->st_stateid.si_stateownerid,
2231 stp->st_stateid.si_fileid, 2318 stp->st_stateid.si_fileid,
2232 stp->st_stateid.si_generation); 2319 stp->st_stateid.si_generation);
2320
2321 nfsd4_create_clid_dir(sop->so_client);
2233out: 2322out:
2234 if (oc->oc_stateowner) 2323 if (oc->oc_stateowner)
2235 nfs4_get_stateowner(oc->oc_stateowner); 2324 nfs4_get_stateowner(oc->oc_stateowner);
@@ -2387,7 +2476,7 @@ static struct list_head lock_ownerid_hashtbl[LOCK_HASH_SIZE];
2387static struct list_head lock_ownerstr_hashtbl[LOCK_HASH_SIZE]; 2476static struct list_head lock_ownerstr_hashtbl[LOCK_HASH_SIZE];
2388static struct list_head lockstateid_hashtbl[STATEID_HASH_SIZE]; 2477static struct list_head lockstateid_hashtbl[STATEID_HASH_SIZE];
2389 2478
2390struct nfs4_stateid * 2479static struct nfs4_stateid *
2391find_stateid(stateid_t *stid, int flags) 2480find_stateid(stateid_t *stid, int flags)
2392{ 2481{
2393 struct nfs4_stateid *local = NULL; 2482 struct nfs4_stateid *local = NULL;
@@ -2419,25 +2508,19 @@ find_stateid(stateid_t *stid, int flags)
2419static struct nfs4_delegation * 2508static struct nfs4_delegation *
2420find_delegation_stateid(struct inode *ino, stateid_t *stid) 2509find_delegation_stateid(struct inode *ino, stateid_t *stid)
2421{ 2510{
2422 struct nfs4_delegation *dp = NULL; 2511 struct nfs4_file *fp;
2423 struct nfs4_file *fp = NULL; 2512 struct nfs4_delegation *dl;
2424 u32 st_id;
2425 2513
2426 dprintk("NFSD:find_delegation_stateid stateid=(%08x/%08x/%08x/%08x)\n", 2514 dprintk("NFSD:find_delegation_stateid stateid=(%08x/%08x/%08x/%08x)\n",
2427 stid->si_boot, stid->si_stateownerid, 2515 stid->si_boot, stid->si_stateownerid,
2428 stid->si_fileid, stid->si_generation); 2516 stid->si_fileid, stid->si_generation);
2429 2517
2430 st_id = stid->si_stateownerid;
2431 fp = find_file(ino); 2518 fp = find_file(ino);
2432 if (fp) { 2519 if (!fp)
2433 list_for_each_entry(dp, &fp->fi_del_perfile, dl_del_perfile) { 2520 return NULL;
2434 if(dp->dl_stateid.si_stateownerid == st_id) { 2521 dl = find_delegation_file(fp, stid);
2435 dprintk("NFSD: find_delegation dp %p\n",dp); 2522 put_nfs4_file(fp);
2436 return dp; 2523 return dl;
2437 }
2438 }
2439 }
2440 return NULL;
2441} 2524}
2442 2525
2443/* 2526/*
@@ -2457,7 +2540,7 @@ nfs4_transform_lock_offset(struct file_lock *lock)
2457 lock->fl_end = OFFSET_MAX; 2540 lock->fl_end = OFFSET_MAX;
2458} 2541}
2459 2542
2460int 2543static int
2461nfs4_verify_lock_stateowner(struct nfs4_stateowner *sop, unsigned int hashval) 2544nfs4_verify_lock_stateowner(struct nfs4_stateowner *sop, unsigned int hashval)
2462{ 2545{
2463 struct nfs4_stateowner *local = NULL; 2546 struct nfs4_stateowner *local = NULL;
@@ -2498,22 +2581,6 @@ nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny)
2498} 2581}
2499 2582
2500static struct nfs4_stateowner * 2583static struct nfs4_stateowner *
2501find_lockstateowner(struct xdr_netobj *owner, clientid_t *clid)
2502{
2503 struct nfs4_stateowner *local = NULL;
2504 int i;
2505
2506 for (i = 0; i < LOCK_HASH_SIZE; i++) {
2507 list_for_each_entry(local, &lock_ownerid_hashtbl[i], so_idhash) {
2508 if (!cmp_owner_str(local, owner, clid))
2509 continue;
2510 return local;
2511 }
2512 }
2513 return NULL;
2514}
2515
2516static struct nfs4_stateowner *
2517find_lockstateowner_str(struct inode *inode, clientid_t *clid, 2584find_lockstateowner_str(struct inode *inode, clientid_t *clid,
2518 struct xdr_netobj *owner) 2585 struct xdr_netobj *owner)
2519{ 2586{
@@ -2533,7 +2600,6 @@ find_lockstateowner_str(struct inode *inode, clientid_t *clid,
2533 * occured. 2600 * occured.
2534 * 2601 *
2535 * strhashval = lock_ownerstr_hashval 2602 * strhashval = lock_ownerstr_hashval
2536 * so_seqid = lock->lk_new_lock_seqid - 1: it gets bumped in encode
2537 */ 2603 */
2538 2604
2539static struct nfs4_stateowner * 2605static struct nfs4_stateowner *
@@ -2548,17 +2614,17 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, str
2548 INIT_LIST_HEAD(&sop->so_idhash); 2614 INIT_LIST_HEAD(&sop->so_idhash);
2549 INIT_LIST_HEAD(&sop->so_strhash); 2615 INIT_LIST_HEAD(&sop->so_strhash);
2550 INIT_LIST_HEAD(&sop->so_perclient); 2616 INIT_LIST_HEAD(&sop->so_perclient);
2551 INIT_LIST_HEAD(&sop->so_perfilestate); 2617 INIT_LIST_HEAD(&sop->so_stateids);
2552 INIT_LIST_HEAD(&sop->so_perlockowner); 2618 INIT_LIST_HEAD(&sop->so_perstateid);
2553 INIT_LIST_HEAD(&sop->so_close_lru); /* not used */ 2619 INIT_LIST_HEAD(&sop->so_close_lru); /* not used */
2554 sop->so_time = 0; 2620 sop->so_time = 0;
2555 list_add(&sop->so_idhash, &lock_ownerid_hashtbl[idhashval]); 2621 list_add(&sop->so_idhash, &lock_ownerid_hashtbl[idhashval]);
2556 list_add(&sop->so_strhash, &lock_ownerstr_hashtbl[strhashval]); 2622 list_add(&sop->so_strhash, &lock_ownerstr_hashtbl[strhashval]);
2557 list_add(&sop->so_perlockowner, &open_stp->st_perlockowner); 2623 list_add(&sop->so_perstateid, &open_stp->st_lockowners);
2558 sop->so_is_open_owner = 0; 2624 sop->so_is_open_owner = 0;
2559 sop->so_id = current_ownerid++; 2625 sop->so_id = current_ownerid++;
2560 sop->so_client = clp; 2626 sop->so_client = clp;
2561 sop->so_seqid = lock->lk_new_lock_seqid - 1; 2627 sop->so_seqid = lock->lk_new_lock_seqid;
2562 sop->so_confirmed = 1; 2628 sop->so_confirmed = 1;
2563 rp = &sop->so_replay; 2629 rp = &sop->so_replay;
2564 rp->rp_status = NFSERR_SERVERFAULT; 2630 rp->rp_status = NFSERR_SERVERFAULT;
@@ -2567,24 +2633,24 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, str
2567 return sop; 2633 return sop;
2568} 2634}
2569 2635
2570struct nfs4_stateid * 2636static struct nfs4_stateid *
2571alloc_init_lock_stateid(struct nfs4_stateowner *sop, struct nfs4_file *fp, struct nfs4_stateid *open_stp) 2637alloc_init_lock_stateid(struct nfs4_stateowner *sop, struct nfs4_file *fp, struct nfs4_stateid *open_stp)
2572{ 2638{
2573 struct nfs4_stateid *stp; 2639 struct nfs4_stateid *stp;
2574 unsigned int hashval = stateid_hashval(sop->so_id, fp->fi_id); 2640 unsigned int hashval = stateid_hashval(sop->so_id, fp->fi_id);
2575 2641
2576 if ((stp = kmalloc(sizeof(struct nfs4_stateid), 2642 stp = nfs4_alloc_stateid();
2577 GFP_KERNEL)) == NULL) 2643 if (stp == NULL)
2578 goto out; 2644 goto out;
2579 INIT_LIST_HEAD(&stp->st_hash); 2645 INIT_LIST_HEAD(&stp->st_hash);
2580 INIT_LIST_HEAD(&stp->st_perfile); 2646 INIT_LIST_HEAD(&stp->st_perfile);
2581 INIT_LIST_HEAD(&stp->st_perfilestate); 2647 INIT_LIST_HEAD(&stp->st_perstateowner);
2582 INIT_LIST_HEAD(&stp->st_perlockowner); /* not used */ 2648 INIT_LIST_HEAD(&stp->st_lockowners); /* not used */
2583 list_add(&stp->st_hash, &lockstateid_hashtbl[hashval]); 2649 list_add(&stp->st_hash, &lockstateid_hashtbl[hashval]);
2584 list_add(&stp->st_perfile, &fp->fi_perfile); 2650 list_add(&stp->st_perfile, &fp->fi_stateids);
2585 list_add_perfile++; 2651 list_add(&stp->st_perstateowner, &sop->so_stateids);
2586 list_add(&stp->st_perfilestate, &sop->so_perfilestate);
2587 stp->st_stateowner = sop; 2652 stp->st_stateowner = sop;
2653 get_nfs4_file(fp);
2588 stp->st_file = fp; 2654 stp->st_file = fp;
2589 stp->st_stateid.si_boot = boot_time; 2655 stp->st_stateid.si_boot = boot_time;
2590 stp->st_stateid.si_stateownerid = sop->so_id; 2656 stp->st_stateid.si_stateownerid = sop->so_id;
@@ -2593,12 +2659,13 @@ alloc_init_lock_stateid(struct nfs4_stateowner *sop, struct nfs4_file *fp, struc
2593 stp->st_vfs_file = open_stp->st_vfs_file; /* FIXME refcount?? */ 2659 stp->st_vfs_file = open_stp->st_vfs_file; /* FIXME refcount?? */
2594 stp->st_access_bmap = open_stp->st_access_bmap; 2660 stp->st_access_bmap = open_stp->st_access_bmap;
2595 stp->st_deny_bmap = open_stp->st_deny_bmap; 2661 stp->st_deny_bmap = open_stp->st_deny_bmap;
2662 stp->st_openstp = open_stp;
2596 2663
2597out: 2664out:
2598 return stp; 2665 return stp;
2599} 2666}
2600 2667
2601int 2668static int
2602check_lock_length(u64 offset, u64 length) 2669check_lock_length(u64 offset, u64 length)
2603{ 2670{
2604 return ((length == 0) || ((length != ~(u64)0) && 2671 return ((length == 0) || ((length != ~(u64)0) &&
@@ -2611,7 +2678,7 @@ check_lock_length(u64 offset, u64 length)
2611int 2678int
2612nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock *lock) 2679nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock *lock)
2613{ 2680{
2614 struct nfs4_stateowner *lock_sop = NULL, *open_sop = NULL; 2681 struct nfs4_stateowner *open_sop = NULL;
2615 struct nfs4_stateid *lock_stp; 2682 struct nfs4_stateid *lock_stp;
2616 struct file *filp; 2683 struct file *filp;
2617 struct file_lock file_lock; 2684 struct file_lock file_lock;
@@ -2623,22 +2690,17 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
2623 (long long) lock->lk_offset, 2690 (long long) lock->lk_offset,
2624 (long long) lock->lk_length); 2691 (long long) lock->lk_length);
2625 2692
2626 if (nfs4_in_grace() && !lock->lk_reclaim)
2627 return nfserr_grace;
2628 if (!nfs4_in_grace() && lock->lk_reclaim)
2629 return nfserr_no_grace;
2630
2631 if (check_lock_length(lock->lk_offset, lock->lk_length)) 2693 if (check_lock_length(lock->lk_offset, lock->lk_length))
2632 return nfserr_inval; 2694 return nfserr_inval;
2633 2695
2634 nfs4_lock_state(); 2696 nfs4_lock_state();
2635 2697
2636 if (lock->lk_is_new) { 2698 if (lock->lk_is_new) {
2637 /* 2699 /*
2638 * Client indicates that this is a new lockowner. 2700 * Client indicates that this is a new lockowner.
2639 * Use open owner and open stateid to create lock owner and lock 2701 * Use open owner and open stateid to create lock owner and
2640 * stateid. 2702 * lock stateid.
2641 */ 2703 */
2642 struct nfs4_stateid *open_stp = NULL; 2704 struct nfs4_stateid *open_stp = NULL;
2643 struct nfs4_file *fp; 2705 struct nfs4_file *fp;
2644 2706
@@ -2648,38 +2710,22 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
2648 goto out; 2710 goto out;
2649 } 2711 }
2650 2712
2651 /* is the new lock seqid presented by the client zero? */
2652 status = nfserr_bad_seqid;
2653 if (lock->v.new.lock_seqid != 0)
2654 goto out;
2655
2656 /* validate and update open stateid and open seqid */ 2713 /* validate and update open stateid and open seqid */
2657 status = nfs4_preprocess_seqid_op(current_fh, 2714 status = nfs4_preprocess_seqid_op(current_fh,
2658 lock->lk_new_open_seqid, 2715 lock->lk_new_open_seqid,
2659 &lock->lk_new_open_stateid, 2716 &lock->lk_new_open_stateid,
2660 CHECK_FH | OPEN_STATE, 2717 CHECK_FH | OPEN_STATE,
2661 &open_sop, &open_stp, 2718 &open_sop, &open_stp, lock);
2662 &lock->v.new.clientid); 2719 if (status)
2663 if (status) {
2664 if (lock->lk_reclaim)
2665 status = nfserr_reclaim_bad;
2666 goto out; 2720 goto out;
2667 }
2668 /* create lockowner and lock stateid */ 2721 /* create lockowner and lock stateid */
2669 fp = open_stp->st_file; 2722 fp = open_stp->st_file;
2670 strhashval = lock_ownerstr_hashval(fp->fi_inode, 2723 strhashval = lock_ownerstr_hashval(fp->fi_inode,
2671 open_sop->so_client->cl_clientid.cl_id, 2724 open_sop->so_client->cl_clientid.cl_id,
2672 &lock->v.new.owner); 2725 &lock->v.new.owner);
2673 /* 2726 /* XXX: Do we need to check for duplicate stateowners on
2674 * If we already have this lock owner, the client is in 2727 * the same file, or should they just be allowed (and
2675 * error (or our bookeeping is wrong!) 2728 * create new stateids)? */
2676 * for asking for a 'new lock'.
2677 */
2678 status = nfserr_bad_stateid;
2679 lock_sop = find_lockstateowner(&lock->v.new.owner,
2680 &lock->v.new.clientid);
2681 if (lock_sop)
2682 goto out;
2683 status = nfserr_resource; 2729 status = nfserr_resource;
2684 if (!(lock->lk_stateowner = alloc_init_lock_stateowner(strhashval, open_sop->so_client, open_stp, lock))) 2730 if (!(lock->lk_stateowner = alloc_init_lock_stateowner(strhashval, open_sop->so_client, open_stp, lock)))
2685 goto out; 2731 goto out;
@@ -2697,7 +2743,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
2697 lock->lk_old_lock_seqid, 2743 lock->lk_old_lock_seqid,
2698 &lock->lk_old_lock_stateid, 2744 &lock->lk_old_lock_stateid,
2699 CHECK_FH | LOCK_STATE, 2745 CHECK_FH | LOCK_STATE,
2700 &lock->lk_stateowner, &lock_stp, NULL); 2746 &lock->lk_stateowner, &lock_stp, lock);
2701 if (status) 2747 if (status)
2702 goto out; 2748 goto out;
2703 } 2749 }
@@ -2709,6 +2755,13 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
2709 goto out; 2755 goto out;
2710 } 2756 }
2711 2757
2758 status = nfserr_grace;
2759 if (nfs4_in_grace() && !lock->lk_reclaim)
2760 goto out;
2761 status = nfserr_no_grace;
2762 if (!nfs4_in_grace() && lock->lk_reclaim)
2763 goto out;
2764
2712 locks_init_lock(&file_lock); 2765 locks_init_lock(&file_lock);
2713 switch (lock->lk_type) { 2766 switch (lock->lk_type) {
2714 case NFS4_READ_LT: 2767 case NFS4_READ_LT:
@@ -2775,10 +2828,10 @@ conflicting_lock:
2775out_destroy_new_stateid: 2828out_destroy_new_stateid:
2776 if (lock->lk_is_new) { 2829 if (lock->lk_is_new) {
2777 dprintk("NFSD: nfsd4_lock: destroy new stateid!\n"); 2830 dprintk("NFSD: nfsd4_lock: destroy new stateid!\n");
2778 /* 2831 /*
2779 * An error encountered after instantiation of the new 2832 * An error encountered after instantiation of the new
2780 * stateid has forced us to destroy it. 2833 * stateid has forced us to destroy it.
2781 */ 2834 */
2782 if (!seqid_mutating_err(status)) 2835 if (!seqid_mutating_err(status))
2783 open_sop->so_seqid--; 2836 open_sop->so_seqid--;
2784 2837
@@ -2970,8 +3023,11 @@ int
2970nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfsd4_release_lockowner *rlockowner) 3023nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfsd4_release_lockowner *rlockowner)
2971{ 3024{
2972 clientid_t *clid = &rlockowner->rl_clientid; 3025 clientid_t *clid = &rlockowner->rl_clientid;
2973 struct nfs4_stateowner *local = NULL; 3026 struct nfs4_stateowner *sop;
3027 struct nfs4_stateid *stp;
2974 struct xdr_netobj *owner = &rlockowner->rl_owner; 3028 struct xdr_netobj *owner = &rlockowner->rl_owner;
3029 struct list_head matches;
3030 int i;
2975 int status; 3031 int status;
2976 3032
2977 dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n", 3033 dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n",
@@ -2987,22 +3043,37 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfsd4_release_lockowner *
2987 3043
2988 nfs4_lock_state(); 3044 nfs4_lock_state();
2989 3045
2990 status = nfs_ok; 3046 status = nfserr_locks_held;
2991 local = find_lockstateowner(owner, clid); 3047 /* XXX: we're doing a linear search through all the lockowners.
2992 if (local) { 3048 * Yipes! For now we'll just hope clients aren't really using
2993 struct nfs4_stateid *stp; 3049 * release_lockowner much, but eventually we have to fix these
2994 3050 * data structures. */
2995 /* check for any locks held by any stateid 3051 INIT_LIST_HEAD(&matches);
2996 * associated with the (lock) stateowner */ 3052 for (i = 0; i < LOCK_HASH_SIZE; i++) {
2997 status = nfserr_locks_held; 3053 list_for_each_entry(sop, &lock_ownerid_hashtbl[i], so_idhash) {
2998 list_for_each_entry(stp, &local->so_perfilestate, 3054 if (!cmp_owner_str(sop, owner, clid))
2999 st_perfilestate) { 3055 continue;
3000 if (check_for_locks(stp->st_vfs_file, local)) 3056 list_for_each_entry(stp, &sop->so_stateids,
3001 goto out; 3057 st_perstateowner) {
3058 if (check_for_locks(stp->st_vfs_file, sop))
3059 goto out;
3060 /* Note: so_perclient unused for lockowners,
3061 * so it's OK to fool with here. */
3062 list_add(&sop->so_perclient, &matches);
3063 }
3002 } 3064 }
3003 /* no locks held by (lock) stateowner */ 3065 }
3004 status = nfs_ok; 3066 /* Clients probably won't expect us to return with some (but not all)
3005 release_stateowner(local); 3067 * of the lockowner state released; so don't release any until all
3068 * have been checked. */
3069 status = nfs_ok;
3070 while (!list_empty(&matches)) {
3071 sop = list_entry(matches.next, struct nfs4_stateowner,
3072 so_perclient);
3073 /* unhash_stateowner deletes so_perclient only
3074 * for openowners. */
3075 list_del(&sop->so_perclient);
3076 release_stateowner(sop);
3006 } 3077 }
3007out: 3078out:
3008 nfs4_unlock_state(); 3079 nfs4_unlock_state();
@@ -3010,39 +3081,38 @@ out:
3010} 3081}
3011 3082
3012static inline struct nfs4_client_reclaim * 3083static inline struct nfs4_client_reclaim *
3013alloc_reclaim(int namelen) 3084alloc_reclaim(void)
3014{ 3085{
3015 struct nfs4_client_reclaim *crp = NULL; 3086 return kmalloc(sizeof(struct nfs4_client_reclaim), GFP_KERNEL);
3087}
3016 3088
3017 crp = kmalloc(sizeof(struct nfs4_client_reclaim), GFP_KERNEL); 3089int
3018 if (!crp) 3090nfs4_has_reclaimed_state(const char *name)
3019 return NULL; 3091{
3020 crp->cr_name.data = kmalloc(namelen, GFP_KERNEL); 3092 unsigned int strhashval = clientstr_hashval(name);
3021 if (!crp->cr_name.data) { 3093 struct nfs4_client *clp;
3022 kfree(crp); 3094
3023 return NULL; 3095 clp = find_confirmed_client_by_str(name, strhashval);
3024 } 3096 return clp ? 1 : 0;
3025 return crp;
3026} 3097}
3027 3098
3028/* 3099/*
3029 * failure => all reset bets are off, nfserr_no_grace... 3100 * failure => all reset bets are off, nfserr_no_grace...
3030 */ 3101 */
3031static int 3102int
3032nfs4_client_to_reclaim(char *name, int namlen) 3103nfs4_client_to_reclaim(const char *name)
3033{ 3104{
3034 unsigned int strhashval; 3105 unsigned int strhashval;
3035 struct nfs4_client_reclaim *crp = NULL; 3106 struct nfs4_client_reclaim *crp = NULL;
3036 3107
3037 dprintk("NFSD nfs4_client_to_reclaim NAME: %.*s\n", namlen, name); 3108 dprintk("NFSD nfs4_client_to_reclaim NAME: %.*s\n", HEXDIR_LEN, name);
3038 crp = alloc_reclaim(namlen); 3109 crp = alloc_reclaim();
3039 if (!crp) 3110 if (!crp)
3040 return 0; 3111 return 0;
3041 strhashval = clientstr_hashval(name, namlen); 3112 strhashval = clientstr_hashval(name);
3042 INIT_LIST_HEAD(&crp->cr_strhash); 3113 INIT_LIST_HEAD(&crp->cr_strhash);
3043 list_add(&crp->cr_strhash, &reclaim_str_hashtbl[strhashval]); 3114 list_add(&crp->cr_strhash, &reclaim_str_hashtbl[strhashval]);
3044 memcpy(crp->cr_name.data, name, namlen); 3115 memcpy(crp->cr_recdir, name, HEXDIR_LEN);
3045 crp->cr_name.len = namlen;
3046 reclaim_str_hashtbl_size++; 3116 reclaim_str_hashtbl_size++;
3047 return 1; 3117 return 1;
3048} 3118}
@@ -3053,13 +3123,11 @@ nfs4_release_reclaim(void)
3053 struct nfs4_client_reclaim *crp = NULL; 3123 struct nfs4_client_reclaim *crp = NULL;
3054 int i; 3124 int i;
3055 3125
3056 BUG_ON(!nfs4_reclaim_init);
3057 for (i = 0; i < CLIENT_HASH_SIZE; i++) { 3126 for (i = 0; i < CLIENT_HASH_SIZE; i++) {
3058 while (!list_empty(&reclaim_str_hashtbl[i])) { 3127 while (!list_empty(&reclaim_str_hashtbl[i])) {
3059 crp = list_entry(reclaim_str_hashtbl[i].next, 3128 crp = list_entry(reclaim_str_hashtbl[i].next,
3060 struct nfs4_client_reclaim, cr_strhash); 3129 struct nfs4_client_reclaim, cr_strhash);
3061 list_del(&crp->cr_strhash); 3130 list_del(&crp->cr_strhash);
3062 kfree(crp->cr_name.data);
3063 kfree(crp); 3131 kfree(crp);
3064 reclaim_str_hashtbl_size--; 3132 reclaim_str_hashtbl_size--;
3065 } 3133 }
@@ -3069,7 +3137,7 @@ nfs4_release_reclaim(void)
3069 3137
3070/* 3138/*
3071 * called from OPEN, CLAIM_PREVIOUS with a new clientid. */ 3139 * called from OPEN, CLAIM_PREVIOUS with a new clientid. */
3072struct nfs4_client_reclaim * 3140static struct nfs4_client_reclaim *
3073nfs4_find_reclaim_client(clientid_t *clid) 3141nfs4_find_reclaim_client(clientid_t *clid)
3074{ 3142{
3075 unsigned int strhashval; 3143 unsigned int strhashval;
@@ -3082,13 +3150,14 @@ nfs4_find_reclaim_client(clientid_t *clid)
3082 if (clp == NULL) 3150 if (clp == NULL)
3083 return NULL; 3151 return NULL;
3084 3152
3085 dprintk("NFSD: nfs4_find_reclaim_client for %.*s\n", 3153 dprintk("NFSD: nfs4_find_reclaim_client for %.*s with recdir %s\n",
3086 clp->cl_name.len, clp->cl_name.data); 3154 clp->cl_name.len, clp->cl_name.data,
3155 clp->cl_recdir);
3087 3156
3088 /* find clp->cl_name in reclaim_str_hashtbl */ 3157 /* find clp->cl_name in reclaim_str_hashtbl */
3089 strhashval = clientstr_hashval(clp->cl_name.data, clp->cl_name.len); 3158 strhashval = clientstr_hashval(clp->cl_recdir);
3090 list_for_each_entry(crp, &reclaim_str_hashtbl[strhashval], cr_strhash) { 3159 list_for_each_entry(crp, &reclaim_str_hashtbl[strhashval], cr_strhash) {
3091 if (cmp_name(&crp->cr_name, &clp->cl_name)) { 3160 if (same_name(crp->cr_recdir, clp->cl_recdir)) {
3092 return crp; 3161 return crp;
3093 } 3162 }
3094 } 3163 }
@@ -3101,30 +3170,16 @@ nfs4_find_reclaim_client(clientid_t *clid)
3101int 3170int
3102nfs4_check_open_reclaim(clientid_t *clid) 3171nfs4_check_open_reclaim(clientid_t *clid)
3103{ 3172{
3104 struct nfs4_client_reclaim *crp; 3173 return nfs4_find_reclaim_client(clid) ? nfs_ok : nfserr_reclaim_bad;
3105
3106 if ((crp = nfs4_find_reclaim_client(clid)) == NULL)
3107 return nfserr_reclaim_bad;
3108 return nfs_ok;
3109} 3174}
3110 3175
3176/* initialization to perform at module load time: */
3111 3177
3112/* 3178void
3113 * Start and stop routines 3179nfs4_state_init(void)
3114 */
3115
3116static void
3117__nfs4_state_init(void)
3118{ 3180{
3119 int i; 3181 int i;
3120 time_t grace_time;
3121 3182
3122 if (!nfs4_reclaim_init) {
3123 for (i = 0; i < CLIENT_HASH_SIZE; i++)
3124 INIT_LIST_HEAD(&reclaim_str_hashtbl[i]);
3125 reclaim_str_hashtbl_size = 0;
3126 nfs4_reclaim_init = 1;
3127 }
3128 for (i = 0; i < CLIENT_HASH_SIZE; i++) { 3183 for (i = 0; i < CLIENT_HASH_SIZE; i++) {
3129 INIT_LIST_HEAD(&conf_id_hashtbl[i]); 3184 INIT_LIST_HEAD(&conf_id_hashtbl[i]);
3130 INIT_LIST_HEAD(&conf_str_hashtbl[i]); 3185 INIT_LIST_HEAD(&conf_str_hashtbl[i]);
@@ -3146,26 +3201,46 @@ __nfs4_state_init(void)
3146 INIT_LIST_HEAD(&lock_ownerid_hashtbl[i]); 3201 INIT_LIST_HEAD(&lock_ownerid_hashtbl[i]);
3147 INIT_LIST_HEAD(&lock_ownerstr_hashtbl[i]); 3202 INIT_LIST_HEAD(&lock_ownerstr_hashtbl[i]);
3148 } 3203 }
3149 memset(&zerostateid, 0, sizeof(stateid_t));
3150 memset(&onestateid, ~0, sizeof(stateid_t)); 3204 memset(&onestateid, ~0, sizeof(stateid_t));
3151
3152 INIT_LIST_HEAD(&close_lru); 3205 INIT_LIST_HEAD(&close_lru);
3153 INIT_LIST_HEAD(&client_lru); 3206 INIT_LIST_HEAD(&client_lru);
3154 INIT_LIST_HEAD(&del_recall_lru); 3207 INIT_LIST_HEAD(&del_recall_lru);
3155 spin_lock_init(&recall_lock); 3208 for (i = 0; i < CLIENT_HASH_SIZE; i++)
3209 INIT_LIST_HEAD(&reclaim_str_hashtbl[i]);
3210 reclaim_str_hashtbl_size = 0;
3211}
3212
3213static void
3214nfsd4_load_reboot_recovery_data(void)
3215{
3216 int status;
3217
3218 nfs4_lock_state();
3219 nfsd4_init_recdir(user_recovery_dirname);
3220 status = nfsd4_recdir_load();
3221 nfs4_unlock_state();
3222 if (status)
3223 printk("NFSD: Failure reading reboot recovery data\n");
3224}
3225
3226/* initialization to perform when the nfsd service is started: */
3227
3228static void
3229__nfs4_state_start(void)
3230{
3231 time_t grace_time;
3232
3156 boot_time = get_seconds(); 3233 boot_time = get_seconds();
3157 grace_time = max(old_lease_time, lease_time); 3234 grace_time = max(user_lease_time, lease_time);
3158 if (reclaim_str_hashtbl_size == 0) 3235 lease_time = user_lease_time;
3159 grace_time = 0; 3236 in_grace = 1;
3160 if (grace_time) 3237 printk("NFSD: starting %ld-second grace period\n", grace_time);
3161 printk("NFSD: starting %ld-second grace period\n", grace_time); 3238 laundry_wq = create_singlethread_workqueue("nfsd4");
3162 grace_end = boot_time + grace_time; 3239 queue_delayed_work(laundry_wq, &laundromat_work, grace_time*HZ);
3163 INIT_WORK(&laundromat_work,laundromat_main, NULL);
3164 schedule_delayed_work(&laundromat_work, NFSD_LEASE_TIME*HZ);
3165} 3240}
3166 3241
3167int 3242int
3168nfs4_state_init(void) 3243nfs4_state_start(void)
3169{ 3244{
3170 int status; 3245 int status;
3171 3246
@@ -3174,7 +3249,8 @@ nfs4_state_init(void)
3174 status = nfsd4_init_slabs(); 3249 status = nfsd4_init_slabs();
3175 if (status) 3250 if (status)
3176 return status; 3251 return status;
3177 __nfs4_state_init(); 3252 nfsd4_load_reboot_recovery_data();
3253 __nfs4_state_start();
3178 nfs4_init = 1; 3254 nfs4_init = 1;
3179 return 0; 3255 return 0;
3180} 3256}
@@ -3182,14 +3258,7 @@ nfs4_state_init(void)
3182int 3258int
3183nfs4_in_grace(void) 3259nfs4_in_grace(void)
3184{ 3260{
3185 return get_seconds() < grace_end; 3261 return in_grace;
3186}
3187
3188void
3189set_no_grace(void)
3190{
3191 printk("NFSD: ERROR in reboot recovery. State reclaims will fail.\n");
3192 grace_end = get_seconds();
3193} 3262}
3194 3263
3195time_t 3264time_t
@@ -3236,21 +3305,11 @@ __nfs4_state_shutdown(void)
3236 unhash_delegation(dp); 3305 unhash_delegation(dp);
3237 } 3306 }
3238 3307
3239 release_all_files();
3240 cancel_delayed_work(&laundromat_work); 3308 cancel_delayed_work(&laundromat_work);
3241 flush_scheduled_work(); 3309 flush_workqueue(laundry_wq);
3310 destroy_workqueue(laundry_wq);
3311 nfsd4_shutdown_recdir();
3242 nfs4_init = 0; 3312 nfs4_init = 0;
3243 dprintk("NFSD: list_add_perfile %d list_del_perfile %d\n",
3244 list_add_perfile, list_del_perfile);
3245 dprintk("NFSD: add_perclient %d del_perclient %d\n",
3246 add_perclient, del_perclient);
3247 dprintk("NFSD: alloc_file %d free_file %d\n",
3248 alloc_file, free_file);
3249 dprintk("NFSD: vfsopen %d vfsclose %d\n",
3250 vfsopen, vfsclose);
3251 dprintk("NFSD: alloc_delegation %d free_delegation %d\n",
3252 alloc_delegation, free_delegation);
3253
3254} 3313}
3255 3314
3256void 3315void
@@ -3263,56 +3322,48 @@ nfs4_state_shutdown(void)
3263 nfs4_unlock_state(); 3322 nfs4_unlock_state();
3264} 3323}
3265 3324
3325static void
3326nfs4_set_recdir(char *recdir)
3327{
3328 nfs4_lock_state();
3329 strcpy(user_recovery_dirname, recdir);
3330 nfs4_unlock_state();
3331}
3332
3333/*
3334 * Change the NFSv4 recovery directory to recdir.
3335 */
3336int
3337nfs4_reset_recoverydir(char *recdir)
3338{
3339 int status;
3340 struct nameidata nd;
3341
3342 status = path_lookup(recdir, LOOKUP_FOLLOW, &nd);
3343 if (status)
3344 return status;
3345 status = -ENOTDIR;
3346 if (S_ISDIR(nd.dentry->d_inode->i_mode)) {
3347 nfs4_set_recdir(recdir);
3348 status = 0;
3349 }
3350 path_release(&nd);
3351 return status;
3352}
3353
3266/* 3354/*
3267 * Called when leasetime is changed. 3355 * Called when leasetime is changed.
3268 * 3356 *
3269 * if nfsd is not started, simply set the global lease. 3357 * The only way the protocol gives us to handle on-the-fly lease changes is to
3270 * 3358 * simulate a reboot. Instead of doing that, we just wait till the next time
3271 * if nfsd(s) are running, lease change requires nfsv4 state to be reset. 3359 * we start to register any changes in lease time. If the administrator
3272 * e.g: boot_time is reset, existing nfs4_client structs are 3360 * really wants to change the lease time *now*, they can go ahead and bring
3273 * used to fill reclaim_str_hashtbl, then all state (except for the 3361 * nfsd down and then back up again after changing the lease time.
3274 * reclaim_str_hashtbl) is re-initialized.
3275 *
3276 * if the old lease time is greater than the new lease time, the grace
3277 * period needs to be set to the old lease time to allow clients to reclaim
3278 * their state. XXX - we may want to set the grace period == lease time
3279 * after an initial grace period == old lease time
3280 *
3281 * if an error occurs in this process, the new lease is set, but the server
3282 * will not honor OPEN or LOCK reclaims, and will return nfserr_no_grace
3283 * which means OPEN/LOCK/READ/WRITE will fail during grace period.
3284 *
3285 * clients will attempt to reset all state with SETCLIENTID/CONFIRM, and
3286 * OPEN and LOCK reclaims.
3287 */ 3362 */
3288void 3363void
3289nfs4_reset_lease(time_t leasetime) 3364nfs4_reset_lease(time_t leasetime)
3290{ 3365{
3291 struct nfs4_client *clp; 3366 lock_kernel();
3292 int i; 3367 user_lease_time = leasetime;
3293 3368 unlock_kernel();
3294 printk("NFSD: New leasetime %ld\n",leasetime);
3295 if (!nfs4_init)
3296 return;
3297 nfs4_lock_state();
3298 old_lease_time = lease_time;
3299 lease_time = leasetime;
3300
3301 nfs4_release_reclaim();
3302
3303 /* populate reclaim_str_hashtbl with current confirmed nfs4_clientid */
3304 for (i = 0; i < CLIENT_HASH_SIZE; i++) {
3305 list_for_each_entry(clp, &conf_id_hashtbl[i], cl_idhash) {
3306 if (!nfs4_client_to_reclaim(clp->cl_name.data,
3307 clp->cl_name.len)) {
3308 nfs4_release_reclaim();
3309 goto init_state;
3310 }
3311 }
3312 }
3313init_state:
3314 __nfs4_state_shutdown();
3315 __nfs4_state_init();
3316 nfs4_unlock_state();
3317} 3369}
3318
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 36a058a112d5..4c4146350236 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -136,7 +136,7 @@ xdr_error: \
136 } \ 136 } \
137} while (0) 137} while (0)
138 138
139u32 *read_buf(struct nfsd4_compoundargs *argp, int nbytes) 139static u32 *read_buf(struct nfsd4_compoundargs *argp, int nbytes)
140{ 140{
141 /* We want more bytes than seem to be available. 141 /* We want more bytes than seem to be available.
142 * Maybe we need a new page, maybe we have just run out 142 * Maybe we need a new page, maybe we have just run out
@@ -190,7 +190,7 @@ defer_free(struct nfsd4_compoundargs *argp,
190 return 0; 190 return 0;
191} 191}
192 192
193char *savemem(struct nfsd4_compoundargs *argp, u32 *p, int nbytes) 193static char *savemem(struct nfsd4_compoundargs *argp, u32 *p, int nbytes)
194{ 194{
195 void *new = NULL; 195 void *new = NULL;
196 if (p == argp->tmp) { 196 if (p == argp->tmp) {
@@ -1210,16 +1210,15 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
1210 save = resp->p; 1210 save = resp->p;
1211 1211
1212/* 1212/*
1213 * Routine for encoding the result of a 1213 * Routine for encoding the result of a "seqid-mutating" NFSv4 operation. This
1214 * "seqid-mutating" NFSv4 operation. This is 1214 * is where sequence id's are incremented, and the replay cache is filled.
1215 * where seqids are incremented, and the 1215 * Note that we increment sequence id's here, at the last moment, so we're sure
1216 * replay cache is filled. 1216 * we know whether the error to be returned is a sequence id mutating error.
1217 */ 1217 */
1218 1218
1219#define ENCODE_SEQID_OP_TAIL(stateowner) do { \ 1219#define ENCODE_SEQID_OP_TAIL(stateowner) do { \
1220 if (seqid_mutating_err(nfserr) && stateowner) { \ 1220 if (seqid_mutating_err(nfserr) && stateowner) { \
1221 if (stateowner->so_confirmed) \ 1221 stateowner->so_seqid++; \
1222 stateowner->so_seqid++; \
1223 stateowner->so_replay.rp_status = nfserr; \ 1222 stateowner->so_replay.rp_status = nfserr; \
1224 stateowner->so_replay.rp_buflen = \ 1223 stateowner->so_replay.rp_buflen = \
1225 (((char *)(resp)->p - (char *)save)); \ 1224 (((char *)(resp)->p - (char *)save)); \
@@ -1366,7 +1365,10 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
1366 if (bmval0 & FATTR4_WORD0_FH_EXPIRE_TYPE) { 1365 if (bmval0 & FATTR4_WORD0_FH_EXPIRE_TYPE) {
1367 if ((buflen -= 4) < 0) 1366 if ((buflen -= 4) < 0)
1368 goto out_resource; 1367 goto out_resource;
1369 WRITE32( NFS4_FH_NOEXPIRE_WITH_OPEN | NFS4_FH_VOL_RENAME ); 1368 if (exp->ex_flags & NFSEXP_NOSUBTREECHECK)
1369 WRITE32(NFS4_FH_PERSISTENT);
1370 else
1371 WRITE32(NFS4_FH_PERSISTENT|NFS4_FH_VOL_RENAME);
1370 } 1372 }
1371 if (bmval0 & FATTR4_WORD0_CHANGE) { 1373 if (bmval0 & FATTR4_WORD0_CHANGE) {
1372 /* 1374 /*
@@ -1969,7 +1971,7 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_open
1969 case NFS4_OPEN_DELEGATE_READ: 1971 case NFS4_OPEN_DELEGATE_READ:
1970 RESERVE_SPACE(20 + sizeof(stateid_t)); 1972 RESERVE_SPACE(20 + sizeof(stateid_t));
1971 WRITEMEM(&open->op_delegate_stateid, sizeof(stateid_t)); 1973 WRITEMEM(&open->op_delegate_stateid, sizeof(stateid_t));
1972 WRITE32(0); 1974 WRITE32(open->op_recall);
1973 1975
1974 /* 1976 /*
1975 * TODO: ACE's in delegations 1977 * TODO: ACE's in delegations
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 161afdcb8f7d..841c562991e8 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -51,6 +51,7 @@ enum {
51 NFSD_Fh, 51 NFSD_Fh,
52 NFSD_Threads, 52 NFSD_Threads,
53 NFSD_Leasetime, 53 NFSD_Leasetime,
54 NFSD_RecoveryDir,
54}; 55};
55 56
56/* 57/*
@@ -66,6 +67,7 @@ static ssize_t write_getfs(struct file *file, char *buf, size_t size);
66static ssize_t write_filehandle(struct file *file, char *buf, size_t size); 67static ssize_t write_filehandle(struct file *file, char *buf, size_t size);
67static ssize_t write_threads(struct file *file, char *buf, size_t size); 68static ssize_t write_threads(struct file *file, char *buf, size_t size);
68static ssize_t write_leasetime(struct file *file, char *buf, size_t size); 69static ssize_t write_leasetime(struct file *file, char *buf, size_t size);
70static ssize_t write_recoverydir(struct file *file, char *buf, size_t size);
69 71
70static ssize_t (*write_op[])(struct file *, char *, size_t) = { 72static ssize_t (*write_op[])(struct file *, char *, size_t) = {
71 [NFSD_Svc] = write_svc, 73 [NFSD_Svc] = write_svc,
@@ -78,6 +80,7 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = {
78 [NFSD_Fh] = write_filehandle, 80 [NFSD_Fh] = write_filehandle,
79 [NFSD_Threads] = write_threads, 81 [NFSD_Threads] = write_threads,
80 [NFSD_Leasetime] = write_leasetime, 82 [NFSD_Leasetime] = write_leasetime,
83 [NFSD_RecoveryDir] = write_recoverydir,
81}; 84};
82 85
83static ssize_t nfsctl_transaction_write(struct file *file, const char __user *buf, size_t size, loff_t *pos) 86static ssize_t nfsctl_transaction_write(struct file *file, const char __user *buf, size_t size, loff_t *pos)
@@ -349,6 +352,25 @@ static ssize_t write_leasetime(struct file *file, char *buf, size_t size)
349 return strlen(buf); 352 return strlen(buf);
350} 353}
351 354
355static ssize_t write_recoverydir(struct file *file, char *buf, size_t size)
356{
357 char *mesg = buf;
358 char *recdir;
359 int len, status;
360
361 if (size > PATH_MAX || buf[size-1] != '\n')
362 return -EINVAL;
363 buf[size-1] = 0;
364
365 recdir = mesg;
366 len = qword_get(&mesg, recdir, size);
367 if (len <= 0)
368 return -EINVAL;
369
370 status = nfs4_reset_recoverydir(recdir);
371 return strlen(buf);
372}
373
352/*----------------------------------------------------------------------------*/ 374/*----------------------------------------------------------------------------*/
353/* 375/*
354 * populating the filesystem. 376 * populating the filesystem.
@@ -369,6 +391,7 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
369 [NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR}, 391 [NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR},
370#ifdef CONFIG_NFSD_V4 392#ifdef CONFIG_NFSD_V4
371 [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR}, 393 [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR},
394 [NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR},
372#endif 395#endif
373 /* last one */ {""} 396 /* last one */ {""}
374 }; 397 };
@@ -397,9 +420,8 @@ static int __init init_nfsd(void)
397 nfsd_cache_init(); /* RPC reply cache */ 420 nfsd_cache_init(); /* RPC reply cache */
398 nfsd_export_init(); /* Exports table */ 421 nfsd_export_init(); /* Exports table */
399 nfsd_lockd_init(); /* lockd->nfsd callbacks */ 422 nfsd_lockd_init(); /* lockd->nfsd callbacks */
400#ifdef CONFIG_NFSD_V4 423 nfs4_state_init(); /* NFSv4 locking state */
401 nfsd_idmap_init(); /* Name to ID mapping */ 424 nfsd_idmap_init(); /* Name to ID mapping */
402#endif /* CONFIG_NFSD_V4 */
403 if (proc_mkdir("fs/nfs", NULL)) { 425 if (proc_mkdir("fs/nfs", NULL)) {
404 struct proc_dir_entry *entry; 426 struct proc_dir_entry *entry;
405 entry = create_proc_entry("fs/nfs/exports", 0, NULL); 427 entry = create_proc_entry("fs/nfs/exports", 0, NULL);
@@ -426,9 +448,7 @@ static void __exit exit_nfsd(void)
426 remove_proc_entry("fs/nfs", NULL); 448 remove_proc_entry("fs/nfs", NULL);
427 nfsd_stat_shutdown(); 449 nfsd_stat_shutdown();
428 nfsd_lockd_shutdown(); 450 nfsd_lockd_shutdown();
429#ifdef CONFIG_NFSD_V4
430 nfsd_idmap_shutdown(); 451 nfsd_idmap_shutdown();
431#endif /* CONFIG_NFSD_V4 */
432 unregister_filesystem(&nfsd_fs_type); 452 unregister_filesystem(&nfsd_fs_type);
433} 453}
434 454
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 904df604e86b..1697539a7171 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -95,7 +95,7 @@ nfsd_svc(unsigned short port, int nrservs)
95 error = nfsd_racache_init(2*nrservs); 95 error = nfsd_racache_init(2*nrservs);
96 if (error<0) 96 if (error<0)
97 goto out; 97 goto out;
98 error = nfs4_state_init(); 98 error = nfs4_state_start();
99 if (error<0) 99 if (error<0)
100 goto out; 100 goto out;
101 if (!nfsd_serv) { 101 if (!nfsd_serv) {
@@ -287,6 +287,7 @@ out:
287 svc_exit_thread(rqstp); 287 svc_exit_thread(rqstp);
288 288
289 /* Release module */ 289 /* Release module */
290 unlock_kernel();
290 module_put_and_exit(0); 291 module_put_and_exit(0);
291} 292}
292 293
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index ae3940dc85cc..4f2cd3d27566 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -45,12 +45,10 @@
45#endif /* CONFIG_NFSD_V3 */ 45#endif /* CONFIG_NFSD_V3 */
46#include <linux/nfsd/nfsfh.h> 46#include <linux/nfsd/nfsfh.h>
47#include <linux/quotaops.h> 47#include <linux/quotaops.h>
48#include <linux/dnotify.h> 48#include <linux/fsnotify.h>
49#include <linux/xattr_acl.h>
50#include <linux/posix_acl.h> 49#include <linux/posix_acl.h>
51#ifdef CONFIG_NFSD_V4
52#include <linux/posix_acl_xattr.h> 50#include <linux/posix_acl_xattr.h>
53#include <linux/xattr_acl.h> 51#ifdef CONFIG_NFSD_V4
54#include <linux/xattr.h> 52#include <linux/xattr.h>
55#include <linux/nfs4.h> 53#include <linux/nfs4.h>
56#include <linux/nfs4_acl.h> 54#include <linux/nfs4_acl.h>
@@ -425,13 +423,13 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
425 goto out_nfserr; 423 goto out_nfserr;
426 424
427 if (pacl) { 425 if (pacl) {
428 error = set_nfsv4_acl_one(dentry, pacl, XATTR_NAME_ACL_ACCESS); 426 error = set_nfsv4_acl_one(dentry, pacl, POSIX_ACL_XATTR_ACCESS);
429 if (error < 0) 427 if (error < 0)
430 goto out_nfserr; 428 goto out_nfserr;
431 } 429 }
432 430
433 if (dpacl) { 431 if (dpacl) {
434 error = set_nfsv4_acl_one(dentry, dpacl, XATTR_NAME_ACL_DEFAULT); 432 error = set_nfsv4_acl_one(dentry, dpacl, POSIX_ACL_XATTR_DEFAULT);
435 if (error < 0) 433 if (error < 0)
436 goto out_nfserr; 434 goto out_nfserr;
437 } 435 }
@@ -498,7 +496,7 @@ nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_ac
498 struct posix_acl *pacl = NULL, *dpacl = NULL; 496 struct posix_acl *pacl = NULL, *dpacl = NULL;
499 unsigned int flags = 0; 497 unsigned int flags = 0;
500 498
501 pacl = _get_posix_acl(dentry, XATTR_NAME_ACL_ACCESS); 499 pacl = _get_posix_acl(dentry, POSIX_ACL_XATTR_ACCESS);
502 if (IS_ERR(pacl) && PTR_ERR(pacl) == -ENODATA) 500 if (IS_ERR(pacl) && PTR_ERR(pacl) == -ENODATA)
503 pacl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); 501 pacl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL);
504 if (IS_ERR(pacl)) { 502 if (IS_ERR(pacl)) {
@@ -508,7 +506,7 @@ nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_ac
508 } 506 }
509 507
510 if (S_ISDIR(inode->i_mode)) { 508 if (S_ISDIR(inode->i_mode)) {
511 dpacl = _get_posix_acl(dentry, XATTR_NAME_ACL_DEFAULT); 509 dpacl = _get_posix_acl(dentry, POSIX_ACL_XATTR_DEFAULT);
512 if (IS_ERR(dpacl) && PTR_ERR(dpacl) == -ENODATA) 510 if (IS_ERR(dpacl) && PTR_ERR(dpacl) == -ENODATA)
513 dpacl = NULL; 511 dpacl = NULL;
514 else if (IS_ERR(dpacl)) { 512 else if (IS_ERR(dpacl)) {
@@ -735,7 +733,7 @@ nfsd_sync(struct file *filp)
735 up(&inode->i_sem); 733 up(&inode->i_sem);
736} 734}
737 735
738static void 736void
739nfsd_sync_dir(struct dentry *dp) 737nfsd_sync_dir(struct dentry *dp)
740{ 738{
741 nfsd_dosync(NULL, dp, dp->d_inode->i_fop); 739 nfsd_dosync(NULL, dp, dp->d_inode->i_fop);
@@ -862,7 +860,7 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
862 nfsdstats.io_read += err; 860 nfsdstats.io_read += err;
863 *count = err; 861 *count = err;
864 err = 0; 862 err = 0;
865 dnotify_parent(file->f_dentry, DN_ACCESS); 863 fsnotify_access(file->f_dentry);
866 } else 864 } else
867 err = nfserrno(err); 865 err = nfserrno(err);
868out: 866out:
@@ -918,7 +916,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
918 set_fs(oldfs); 916 set_fs(oldfs);
919 if (err >= 0) { 917 if (err >= 0) {
920 nfsdstats.io_write += cnt; 918 nfsdstats.io_write += cnt;
921 dnotify_parent(file->f_dentry, DN_MODIFY); 919 fsnotify_modify(file->f_dentry);
922 } 920 }
923 921
924 /* clear setuid/setgid flag after write */ 922 /* clear setuid/setgid flag after write */
@@ -1873,10 +1871,10 @@ nfsd_get_posix_acl(struct svc_fh *fhp, int type)
1873 return ERR_PTR(-EOPNOTSUPP); 1871 return ERR_PTR(-EOPNOTSUPP);
1874 switch(type) { 1872 switch(type) {
1875 case ACL_TYPE_ACCESS: 1873 case ACL_TYPE_ACCESS:
1876 name = XATTR_NAME_ACL_ACCESS; 1874 name = POSIX_ACL_XATTR_ACCESS;
1877 break; 1875 break;
1878 case ACL_TYPE_DEFAULT: 1876 case ACL_TYPE_DEFAULT:
1879 name = XATTR_NAME_ACL_DEFAULT; 1877 name = POSIX_ACL_XATTR_DEFAULT;
1880 break; 1878 break;
1881 default: 1879 default:
1882 return ERR_PTR(-EOPNOTSUPP); 1880 return ERR_PTR(-EOPNOTSUPP);
@@ -1920,17 +1918,17 @@ nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl)
1920 return -EOPNOTSUPP; 1918 return -EOPNOTSUPP;
1921 switch(type) { 1919 switch(type) {
1922 case ACL_TYPE_ACCESS: 1920 case ACL_TYPE_ACCESS:
1923 name = XATTR_NAME_ACL_ACCESS; 1921 name = POSIX_ACL_XATTR_ACCESS;
1924 break; 1922 break;
1925 case ACL_TYPE_DEFAULT: 1923 case ACL_TYPE_DEFAULT:
1926 name = XATTR_NAME_ACL_DEFAULT; 1924 name = POSIX_ACL_XATTR_DEFAULT;
1927 break; 1925 break;
1928 default: 1926 default:
1929 return -EOPNOTSUPP; 1927 return -EOPNOTSUPP;
1930 } 1928 }
1931 1929
1932 if (acl && acl->a_count) { 1930 if (acl && acl->a_count) {
1933 size = xattr_acl_size(acl->a_count); 1931 size = posix_acl_xattr_size(acl->a_count);
1934 value = kmalloc(size, GFP_KERNEL); 1932 value = kmalloc(size, GFP_KERNEL);
1935 if (!value) 1933 if (!value)
1936 return -ENOMEM; 1934 return -ENOMEM;
diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 1d2ad15f1533..9eecc9939dfe 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -1,21 +1,18 @@
1ToDo/Notes: 1ToDo/Notes:
2 - Find and fix bugs. 2 - Find and fix bugs.
3 - Checkpoint or disable the user space journal ($UsnJrnl).
4 - In between ntfs_prepare/commit_write, need exclusion between 3 - In between ntfs_prepare/commit_write, need exclusion between
5 simultaneous file extensions. Need perhaps an NInoResizeUnderway() 4 simultaneous file extensions. This is given to us by holding i_sem
6 flag which we can set in ntfs_prepare_write() and clear again in 5 on the inode. The only places in the kernel when a file is resized
7 ntfs_commit_write(). Just have to be careful in readpage/writepage, 6 are prepare/commit write and truncate for both of which i_sem is
8 as well as in truncate, that we play nice... We might need to have 7 held. Just have to be careful in readpage/writepage and all other
9 a data_size field in the ntfs_inode to store the real attribute 8 helpers not running under i_sem that we play nice...
10 length. Also need to be careful with initialized_size extention in 9 Also need to be careful with initialized_size extention in
11 ntfs_prepare_write. Basically, just be _very_ careful in this code... 10 ntfs_prepare_write. Basically, just be _very_ careful in this code...
12 OTOH, perhaps i_sem, which is held accross generic_file_write is 11 UPDATE: The only things that need to be checked are read/writepage
13 sufficient for synchronisation here. We then just need to make sure 12 which do not hold i_sem. Note writepage cannot change i_size but it
14 ntfs_readpage/writepage/truncate interoperate properly with us. 13 needs to cope with a concurrent i_size change, just like readpage.
15 UPDATE: The above is all ok as it is due to i_sem held. The only 14 Also both need to cope with concurrent changes to the other sizes,
16 thing that needs to be checked is ntfs_writepage() which does not 15 i.e. initialized/allocated/compressed size, as well.
17 hold i_sem. It cannot change i_size but it needs to cope with a
18 concurrent i_size change.
19 - Implement mft.c::sync_mft_mirror_umount(). We currently will just 16 - Implement mft.c::sync_mft_mirror_umount(). We currently will just
20 leave the volume dirty on umount if the final iput(vol->mft_ino) 17 leave the volume dirty on umount if the final iput(vol->mft_ino)
21 causes a write of any mirrored mft records due to the mft mirror 18 causes a write of any mirrored mft records due to the mft mirror
@@ -25,12 +22,161 @@ ToDo/Notes:
25 - Enable the code for setting the NT4 compatibility flag when we start 22 - Enable the code for setting the NT4 compatibility flag when we start
26 making NTFS 1.2 specific modifications. 23 making NTFS 1.2 specific modifications.
27 24
282.1.23-WIP 252.1.23 - Implement extension of resident files and make writing safe as well as
26 many bug fixes, cleanups, and enhancements...
29 27
30 - Add printk rate limiting for ntfs_warning() and ntfs_error() when 28 - Add printk rate limiting for ntfs_warning() and ntfs_error() when
31 compiled without debug. This avoids a possible denial of service 29 compiled without debug. This avoids a possible denial of service
32 attack. Thanks to Carl-Daniel Hailfinger from SuSE for pointing this 30 attack. Thanks to Carl-Daniel Hailfinger from SuSE for pointing this
33 out. 31 out.
32 - Fix compilation warnings on ia64. (Randy Dunlap)
33 - Use i_size_{read,write}() instead of reading i_size by hand and cache
34 the value where apropriate.
35 - Add size_lock to the ntfs_inode structure. This is an rw spinlock
36 and it locks against access to the inode sizes. Note, ->size_lock
37 is also accessed from irq context so you must use the _irqsave and
38 _irqrestore lock and unlock functions, respectively. Protect all
39 accesses to allocated_size, initialized_size, and compressed_size.
40 - Minor optimization to fs/ntfs/super.c::ntfs_statfs() and its helpers.
41 - Implement extension of resident files in the regular file write code
42 paths (fs/ntfs/aops.c::ntfs_{prepare,commit}_write()). At present
43 this only works until the data attribute becomes too big for the mft
44 record after which we abort the write returning -EOPNOTSUPP from
45 ntfs_prepare_write().
46 - Add disable_sparse mount option together with a per volume sparse
47 enable bit which is set appropriately and a per inode sparse disable
48 bit which is preset on some system file inodes as appropriate.
49 - Enforce that sparse support is disabled on NTFS volumes pre 3.0.
50 - Fix a bug in fs/ntfs/runlist.c::ntfs_mapping_pairs_decompress() in
51 the creation of the unmapped runlist element for the base attribute
52 extent.
53 - Split ntfs_map_runlist() into ntfs_map_runlist() and a non-locking
54 helper ntfs_map_runlist_nolock() which is used by ntfs_map_runlist().
55 This allows us to map runlist fragments with the runlist lock already
56 held without having to drop and reacquire it around the call. Adapt
57 all callers.
58 - Change ntfs_find_vcn() to ntfs_find_vcn_nolock() which takes a locked
59 runlist. This allows us to find runlist elements with the runlist
60 lock already held without having to drop and reacquire it around the
61 call. Adapt all callers.
62 - Change time to u64 in time.h::ntfs2utc() as it otherwise generates a
63 warning in the do_div() call on sparc32. Thanks to Meelis Roos for
64 the report and analysis of the warning.
65 - Fix a nasty runlist merge bug when merging two holes.
66 - Set the ntfs_inode->allocated_size to the real allocated size in the
67 mft record for resident attributes (fs/ntfs/inode.c).
68 - Small readability cleanup to use "a" instead of "ctx->attr"
69 everywhere (fs/ntfs/inode.c).
70 - Make fs/ntfs/namei.c::ntfs_get_{parent,dentry} static and move the
71 definition of ntfs_export_ops from fs/ntfs/super.c to namei.c. Also,
72 declare ntfs_export_ops in fs/ntfs/ntfs.h.
73 - Correct sparse file handling. The compressed values need to be
74 checked and set in the ntfs inode as done for compressed files and
75 the compressed size needs to be used for vfs inode->i_blocks instead
76 of the allocated size, again, as done for compressed files.
77 - Add AT_EA in addition to AT_DATA to whitelist for being allowed to be
78 non-resident in fs/ntfs/attrib.c::ntfs_attr_can_be_non_resident().
79 - Add fs/ntfs/attrib.c::ntfs_attr_vcn_to_lcn_nolock() used by the new
80 write code.
81 - Fix bug in fs/ntfs/attrib.c::ntfs_find_vcn_nolock() where after
82 dropping the read lock and taking the write lock we were not checking
83 whether someone else did not already do the work we wanted to do.
84 - Rename fs/ntfs/attrib.c::ntfs_find_vcn_nolock() to
85 ntfs_attr_find_vcn_nolock() and update all callers.
86 - Add fs/ntfs/attrib.[hc]::ntfs_attr_make_non_resident().
87 - Fix sign of various error return values to be negative in
88 fs/ntfs/lcnalloc.c.
89 - Modify ->readpage and ->writepage (fs/ntfs/aops.c) so they detect and
90 handle the case where an attribute is converted from resident to
91 non-resident by a concurrent file write.
92 - Remove checks for NULL before calling kfree() since kfree() does the
93 checking itself. (Jesper Juhl)
94 - Some utilities modify the boot sector but do not update the checksum.
95 Thus, relax the checking in fs/ntfs/super.c::is_boot_sector_ntfs() to
96 only emit a warning when the checksum is incorrect rather than
97 refusing the mount. Thanks to Bernd Casimir for pointing this
98 problem out.
99 - Update attribute definition handling.
100 - Add NTFS_MAX_CLUSTER_SIZE and NTFS_MAX_PAGES_PER_CLUSTER constants.
101 - Use NTFS_MAX_CLUSTER_SIZE in super.c instead of hard coding 0x10000.
102 - Use MAX_BUF_PER_PAGE instead of variable sized array allocation for
103 better code generation and one less sparse warning in fs/ntfs/aops.c.
104 - Remove spurious void pointer casts from fs/ntfs/. (Pekka Enberg)
105 - Use C99 style structure initialization after memory allocation where
106 possible (fs/ntfs/{attrib.c,index.c,super.c}). Thanks to Al Viro and
107 Pekka Enberg.
108 - Stamp the transaction log ($UsnJrnl), aka user space journal, if it
109 is active on the volume and we are mounting read-write or remounting
110 from read-only to read-write.
111 - Fix a bug in address space operations error recovery code paths where
112 if the runlist was not mapped at all and a mapping error occured we
113 would leave the runlist locked on exit to the function so that the
114 next access to the same file would try to take the lock and deadlock.
115 - Detect the case when Windows has been suspended to disk on the volume
116 to be mounted and if this is the case do not allow (re)mounting
117 read-write. This is done by parsing hiberfil.sys if present.
118 - Fix several occurences of a bug where we would perform 'var & ~const'
119 with a 64-bit variable and a int, i.e. 32-bit, constant. This causes
120 the higher order 32-bits of the 64-bit variable to be zeroed. To fix
121 this cast the 'const' to the same 64-bit type as 'var'.
122 - Change the runlist terminator of the newly allocated cluster(s) to
123 LCN_ENOENT in ntfs_attr_make_non_resident(). Otherwise the runlist
124 code gets confused.
125 - Add an extra parameter @last_vcn to ntfs_get_size_for_mapping_pairs()
126 and ntfs_mapping_pairs_build() to allow the runlist encoding to be
127 partial which is desirable when filling holes in sparse attributes.
128 Update all callers.
129 - Change ntfs_map_runlist_nolock() to only decompress the mapping pairs
130 if the requested vcn is inside it. Otherwise we get into problems
131 when we try to map an out of bounds vcn because we then try to map
132 the already mapped runlist fragment which causes
133 ntfs_mapping_pairs_decompress() to fail and return error. Update
134 ntfs_attr_find_vcn_nolock() accordingly.
135 - Fix a nasty deadlock that appeared in recent kernels.
136 The situation: VFS inode X on a mounted ntfs volume is dirty. For
137 same inode X, the ntfs_inode is dirty and thus corresponding on-disk
138 inode, i.e. mft record, which is in a dirty PAGE_CACHE_PAGE belonging
139 to the table of inodes, i.e. $MFT, inode 0.
140 What happens:
141 Process 1: sys_sync()/umount()/whatever... calls
142 __sync_single_inode() for $MFT -> do_writepages() -> write_page for
143 the dirty page containing the on-disk inode X, the page is now locked
144 -> ntfs_write_mst_block() which clears PageUptodate() on the page to
145 prevent anyone else getting hold of it whilst it does the write out.
146 This is necessary as the on-disk inode needs "fixups" applied before
147 the write to disk which are removed again after the write and
148 PageUptodate is then set again. It then analyses the page looking
149 for dirty on-disk inodes and when it finds one it calls
150 ntfs_may_write_mft_record() to see if it is safe to write this
151 on-disk inode. This then calls ilookup5() to check if the
152 corresponding VFS inode is in icache(). This in turn calls ifind()
153 which waits on the inode lock via wait_on_inode whilst holding the
154 global inode_lock.
155 Process 2: pdflush results in a call to __sync_single_inode for the
156 same VFS inode X on the ntfs volume. This locks the inode (I_LOCK)
157 then calls write-inode -> ntfs_write_inode -> map_mft_record() ->
158 read_cache_page() for the page (in page cache of table of inodes
159 $MFT, inode 0) containing the on-disk inode. This page has
160 PageUptodate() clear because of Process 1 (see above) so
161 read_cache_page() blocks when it tries to take the page lock for the
162 page so it can call ntfs_read_page().
163 Thus Process 1 is holding the page lock on the page containing the
164 on-disk inode X and it is waiting on the inode X to be unlocked in
165 ifind() so it can write the page out and then unlock the page.
166 And Process 2 is holding the inode lock on inode X and is waiting for
167 the page to be unlocked so it can call ntfs_readpage() or discover
168 that Process 1 set PageUptodate() again and use the page.
169 Thus we have a deadlock due to ifind() waiting on the inode lock.
170 The solution: The fix is to use the newly introduced
171 ilookup5_nowait() which does not wait on the inode's lock and hence
172 avoids the deadlock. This is safe as we do not care about the VFS
173 inode and only use the fact that it is in the VFS inode cache and the
174 fact that the vfs and ntfs inodes are one struct in memory to find
175 the ntfs inode in memory if present. Also, the ntfs inode has its
176 own locking so it does not matter if the vfs inode is locked.
177 - Fix bug in mft record writing where we forgot to set the device in
178 the buffers when mapping them after the VM had discarded them.
179 Thanks to Martin MOKREJÅ  for the bug report.
34 180
352.1.22 - Many bug and race fixes and error handling improvements. 1812.1.22 - Many bug and race fixes and error handling improvements.
36 182
@@ -1037,7 +1183,7 @@ tng-0.0.8 - 08/03/2002 - Now using BitKeeper, http://linux-ntfs.bkbits.net/
1037 - Further runlist merging work. (Richard Russon) 1183 - Further runlist merging work. (Richard Russon)
1038 - Backwards compatibility for gcc-2.95. (Richard Russon) 1184 - Backwards compatibility for gcc-2.95. (Richard Russon)
1039 - Update to kernel 2.5.5-pre1 and rediff the now tiny patch. 1185 - Update to kernel 2.5.5-pre1 and rediff the now tiny patch.
1040 - Convert to new file system declaration using ->ntfs_get_sb() and 1186 - Convert to new filesystem declaration using ->ntfs_get_sb() and
1041 replacing ntfs_read_super() with ntfs_fill_super(). 1187 replacing ntfs_read_super() with ntfs_fill_super().
1042 - Set s_maxbytes to MAX_LFS_FILESIZE to avoid page cache page index 1188 - Set s_maxbytes to MAX_LFS_FILESIZE to avoid page cache page index
1043 overflow on 32-bit architectures. 1189 overflow on 32-bit architectures.
@@ -1333,7 +1479,7 @@ tng-0.0.1 - The first useful version.
1333 The driver is now actually useful! Yey. (-: It undoubtedly has got bugs 1479 The driver is now actually useful! Yey. (-: It undoubtedly has got bugs
1334 though and it doesn't implement accesssing compressed files yet. Also, 1480 though and it doesn't implement accesssing compressed files yet. Also,
1335 accessing files with attribute list attributes is not implemented yet 1481 accessing files with attribute list attributes is not implemented yet
1336 either. But for small or simple file systems it should work and allow 1482 either. But for small or simple filesystems it should work and allow
1337 you to list directories, use stat on directory entries and the file 1483 you to list directories, use stat on directory entries and the file
1338 system, open, read, mmap and llseek around in files. A big mile stone 1484 system, open, read, mmap and llseek around in files. A big mile stone
1339 has been reached! 1485 has been reached!
@@ -1341,7 +1487,7 @@ tng-0.0.1 - The first useful version.
1341tng-0.0.0 - Initial version tag. 1487tng-0.0.0 - Initial version tag.
1342 1488
1343 Initial driver implementation. The driver can mount and umount simple 1489 Initial driver implementation. The driver can mount and umount simple
1344 NTFS file systems (i.e. ones without attribute lists in the system 1490 NTFS filesystems (i.e. ones without attribute lists in the system
1345 files). If the mount fails there might be problems in the error handling 1491 files). If the mount fails there might be problems in the error handling
1346 code paths, so be warned. Otherwise it seems to be loading the system 1492 code paths, so be warned. Otherwise it seems to be loading the system
1347 files nicely and the mft record read mapping/unmapping seems to be 1493 files nicely and the mft record read mapping/unmapping seems to be
diff --git a/fs/ntfs/Makefile b/fs/ntfs/Makefile
index 7b66381a0b0f..f083f27d8b69 100644
--- a/fs/ntfs/Makefile
+++ b/fs/ntfs/Makefile
@@ -6,7 +6,7 @@ ntfs-objs := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \
6 index.o inode.o mft.o mst.o namei.o runlist.o super.o sysctl.o \ 6 index.o inode.o mft.o mst.o namei.o runlist.o super.o sysctl.o \
7 unistr.o upcase.o 7 unistr.o upcase.o
8 8
9EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.22\" 9EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.23\"
10 10
11ifeq ($(CONFIG_NTFS_DEBUG),y) 11ifeq ($(CONFIG_NTFS_DEBUG),y)
12EXTRA_CFLAGS += -DDEBUG 12EXTRA_CFLAGS += -DDEBUG
@@ -15,5 +15,5 @@ endif
15ifeq ($(CONFIG_NTFS_RW),y) 15ifeq ($(CONFIG_NTFS_RW),y)
16EXTRA_CFLAGS += -DNTFS_RW 16EXTRA_CFLAGS += -DNTFS_RW
17 17
18ntfs-objs += bitmap.o lcnalloc.o logfile.o quota.o 18ntfs-objs += bitmap.o lcnalloc.o logfile.o quota.o usnjrnl.o
19endif 19endif
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 45d56e41ed98..78adad7a988d 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -2,7 +2,7 @@
2 * aops.c - NTFS kernel address space operations and page cache handling. 2 * aops.c - NTFS kernel address space operations and page cache handling.
3 * Part of the Linux-NTFS project. 3 * Part of the Linux-NTFS project.
4 * 4 *
5 * Copyright (c) 2001-2004 Anton Altaparmakov 5 * Copyright (c) 2001-2005 Anton Altaparmakov
6 * Copyright (c) 2002 Richard Russon 6 * Copyright (c) 2002 Richard Russon
7 * 7 *
8 * This program/include file is free software; you can redistribute it and/or 8 * This program/include file is free software; you can redistribute it and/or
@@ -66,19 +66,22 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
66 ni = NTFS_I(page->mapping->host); 66 ni = NTFS_I(page->mapping->host);
67 67
68 if (likely(uptodate)) { 68 if (likely(uptodate)) {
69 s64 file_ofs; 69 s64 file_ofs, initialized_size;
70 70
71 set_buffer_uptodate(bh); 71 set_buffer_uptodate(bh);
72 72
73 file_ofs = ((s64)page->index << PAGE_CACHE_SHIFT) + 73 file_ofs = ((s64)page->index << PAGE_CACHE_SHIFT) +
74 bh_offset(bh); 74 bh_offset(bh);
75 read_lock_irqsave(&ni->size_lock, flags);
76 initialized_size = ni->initialized_size;
77 read_unlock_irqrestore(&ni->size_lock, flags);
75 /* Check for the current buffer head overflowing. */ 78 /* Check for the current buffer head overflowing. */
76 if (file_ofs + bh->b_size > ni->initialized_size) { 79 if (file_ofs + bh->b_size > initialized_size) {
77 char *addr; 80 char *addr;
78 int ofs = 0; 81 int ofs = 0;
79 82
80 if (file_ofs < ni->initialized_size) 83 if (file_ofs < initialized_size)
81 ofs = ni->initialized_size - file_ofs; 84 ofs = initialized_size - file_ofs;
82 addr = kmap_atomic(page, KM_BIO_SRC_IRQ); 85 addr = kmap_atomic(page, KM_BIO_SRC_IRQ);
83 memset(addr + bh_offset(bh) + ofs, 0, bh->b_size - ofs); 86 memset(addr + bh_offset(bh) + ofs, 0, bh->b_size - ofs);
84 flush_dcache_page(page); 87 flush_dcache_page(page);
@@ -132,7 +135,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
132 i * rec_size), rec_size); 135 i * rec_size), rec_size);
133 flush_dcache_page(page); 136 flush_dcache_page(page);
134 kunmap_atomic(addr, KM_BIO_SRC_IRQ); 137 kunmap_atomic(addr, KM_BIO_SRC_IRQ);
135 if (likely(!PageError(page) && page_uptodate)) 138 if (likely(page_uptodate && !PageError(page)))
136 SetPageUptodate(page); 139 SetPageUptodate(page);
137 } 140 }
138 unlock_page(page); 141 unlock_page(page);
@@ -168,6 +171,7 @@ static int ntfs_read_block(struct page *page)
168 runlist_element *rl; 171 runlist_element *rl;
169 struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; 172 struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
170 sector_t iblock, lblock, zblock; 173 sector_t iblock, lblock, zblock;
174 unsigned long flags;
171 unsigned int blocksize, vcn_ofs; 175 unsigned int blocksize, vcn_ofs;
172 int i, nr; 176 int i, nr;
173 unsigned char blocksize_bits; 177 unsigned char blocksize_bits;
@@ -190,8 +194,10 @@ static int ntfs_read_block(struct page *page)
190 } 194 }
191 195
192 iblock = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits); 196 iblock = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
197 read_lock_irqsave(&ni->size_lock, flags);
193 lblock = (ni->allocated_size + blocksize - 1) >> blocksize_bits; 198 lblock = (ni->allocated_size + blocksize - 1) >> blocksize_bits;
194 zblock = (ni->initialized_size + blocksize - 1) >> blocksize_bits; 199 zblock = (ni->initialized_size + blocksize - 1) >> blocksize_bits;
200 read_unlock_irqrestore(&ni->size_lock, flags);
195 201
196 /* Loop through all the buffers in the page. */ 202 /* Loop through all the buffers in the page. */
197 rl = NULL; 203 rl = NULL;
@@ -258,7 +264,8 @@ lock_retry_remap:
258 goto lock_retry_remap; 264 goto lock_retry_remap;
259 rl = NULL; 265 rl = NULL;
260 lcn = err; 266 lcn = err;
261 } 267 } else if (!rl)
268 up_read(&ni->runlist.lock);
262 /* Hard error, zero out region. */ 269 /* Hard error, zero out region. */
263 bh->b_blocknr = -1; 270 bh->b_blocknr = -1;
264 SetPageError(page); 271 SetPageError(page);
@@ -341,14 +348,15 @@ handle_zblock:
341 */ 348 */
342static int ntfs_readpage(struct file *file, struct page *page) 349static int ntfs_readpage(struct file *file, struct page *page)
343{ 350{
344 loff_t i_size;
345 ntfs_inode *ni, *base_ni; 351 ntfs_inode *ni, *base_ni;
346 u8 *kaddr; 352 u8 *kaddr;
347 ntfs_attr_search_ctx *ctx; 353 ntfs_attr_search_ctx *ctx;
348 MFT_RECORD *mrec; 354 MFT_RECORD *mrec;
355 unsigned long flags;
349 u32 attr_len; 356 u32 attr_len;
350 int err = 0; 357 int err = 0;
351 358
359retry_readpage:
352 BUG_ON(!PageLocked(page)); 360 BUG_ON(!PageLocked(page));
353 /* 361 /*
354 * This can potentially happen because we clear PageUptodate() during 362 * This can potentially happen because we clear PageUptodate() during
@@ -383,9 +391,9 @@ static int ntfs_readpage(struct file *file, struct page *page)
383 * Attribute is resident, implying it is not compressed or encrypted. 391 * Attribute is resident, implying it is not compressed or encrypted.
384 * This also means the attribute is smaller than an mft record and 392 * This also means the attribute is smaller than an mft record and
385 * hence smaller than a page, so can simply zero out any pages with 393 * hence smaller than a page, so can simply zero out any pages with
386 * index above 0. We can also do this if the file size is 0. 394 * index above 0.
387 */ 395 */
388 if (unlikely(page->index > 0 || !i_size_read(VFS_I(ni)))) { 396 if (unlikely(page->index > 0)) {
389 kaddr = kmap_atomic(page, KM_USER0); 397 kaddr = kmap_atomic(page, KM_USER0);
390 memset(kaddr, 0, PAGE_CACHE_SIZE); 398 memset(kaddr, 0, PAGE_CACHE_SIZE);
391 flush_dcache_page(page); 399 flush_dcache_page(page);
@@ -402,6 +410,14 @@ static int ntfs_readpage(struct file *file, struct page *page)
402 err = PTR_ERR(mrec); 410 err = PTR_ERR(mrec);
403 goto err_out; 411 goto err_out;
404 } 412 }
413 /*
414 * If a parallel write made the attribute non-resident, drop the mft
415 * record and retry the readpage.
416 */
417 if (unlikely(NInoNonResident(ni))) {
418 unmap_mft_record(base_ni);
419 goto retry_readpage;
420 }
405 ctx = ntfs_attr_get_search_ctx(base_ni, mrec); 421 ctx = ntfs_attr_get_search_ctx(base_ni, mrec);
406 if (unlikely(!ctx)) { 422 if (unlikely(!ctx)) {
407 err = -ENOMEM; 423 err = -ENOMEM;
@@ -412,9 +428,10 @@ static int ntfs_readpage(struct file *file, struct page *page)
412 if (unlikely(err)) 428 if (unlikely(err))
413 goto put_unm_err_out; 429 goto put_unm_err_out;
414 attr_len = le32_to_cpu(ctx->attr->data.resident.value_length); 430 attr_len = le32_to_cpu(ctx->attr->data.resident.value_length);
415 i_size = i_size_read(VFS_I(ni)); 431 read_lock_irqsave(&ni->size_lock, flags);
416 if (unlikely(attr_len > i_size)) 432 if (unlikely(attr_len > ni->initialized_size))
417 attr_len = i_size; 433 attr_len = ni->initialized_size;
434 read_unlock_irqrestore(&ni->size_lock, flags);
418 kaddr = kmap_atomic(page, KM_USER0); 435 kaddr = kmap_atomic(page, KM_USER0);
419 /* Copy the data to the page. */ 436 /* Copy the data to the page. */
420 memcpy(kaddr, (u8*)ctx->attr + 437 memcpy(kaddr, (u8*)ctx->attr +
@@ -463,12 +480,15 @@ static int ntfs_write_block(struct page *page, struct writeback_control *wbc)
463{ 480{
464 VCN vcn; 481 VCN vcn;
465 LCN lcn; 482 LCN lcn;
483 s64 initialized_size;
484 loff_t i_size;
466 sector_t block, dblock, iblock; 485 sector_t block, dblock, iblock;
467 struct inode *vi; 486 struct inode *vi;
468 ntfs_inode *ni; 487 ntfs_inode *ni;
469 ntfs_volume *vol; 488 ntfs_volume *vol;
470 runlist_element *rl; 489 runlist_element *rl;
471 struct buffer_head *bh, *head; 490 struct buffer_head *bh, *head;
491 unsigned long flags;
472 unsigned int blocksize, vcn_ofs; 492 unsigned int blocksize, vcn_ofs;
473 int err; 493 int err;
474 BOOL need_end_writeback; 494 BOOL need_end_writeback;
@@ -510,11 +530,16 @@ static int ntfs_write_block(struct page *page, struct writeback_control *wbc)
510 /* The first block in the page. */ 530 /* The first block in the page. */
511 block = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits); 531 block = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
512 532
533 read_lock_irqsave(&ni->size_lock, flags);
534 i_size = i_size_read(vi);
535 initialized_size = ni->initialized_size;
536 read_unlock_irqrestore(&ni->size_lock, flags);
537
513 /* The first out of bounds block for the data size. */ 538 /* The first out of bounds block for the data size. */
514 dblock = (vi->i_size + blocksize - 1) >> blocksize_bits; 539 dblock = (i_size + blocksize - 1) >> blocksize_bits;
515 540
516 /* The last (fully or partially) initialized block. */ 541 /* The last (fully or partially) initialized block. */
517 iblock = ni->initialized_size >> blocksize_bits; 542 iblock = initialized_size >> blocksize_bits;
518 543
519 /* 544 /*
520 * Be very careful. We have no exclusion from __set_page_dirty_buffers 545 * Be very careful. We have no exclusion from __set_page_dirty_buffers
@@ -559,7 +584,7 @@ static int ntfs_write_block(struct page *page, struct writeback_control *wbc)
559 584
560 /* Make sure we have enough initialized size. */ 585 /* Make sure we have enough initialized size. */
561 if (unlikely((block >= iblock) && 586 if (unlikely((block >= iblock) &&
562 (ni->initialized_size < vi->i_size))) { 587 (initialized_size < i_size))) {
563 /* 588 /*
564 * If this page is fully outside initialized size, zero 589 * If this page is fully outside initialized size, zero
565 * out all pages between the current initialized size 590 * out all pages between the current initialized size
@@ -666,7 +691,8 @@ lock_retry_remap:
666 goto lock_retry_remap; 691 goto lock_retry_remap;
667 rl = NULL; 692 rl = NULL;
668 lcn = err; 693 lcn = err;
669 } 694 } else if (!rl)
695 up_read(&ni->runlist.lock);
670 /* Failed to map the buffer, even after retrying. */ 696 /* Failed to map the buffer, even after retrying. */
671 bh->b_blocknr = -1; 697 bh->b_blocknr = -1;
672 ntfs_error(vol->sb, "Failed to write to inode 0x%lx, " 698 ntfs_error(vol->sb, "Failed to write to inode 0x%lx, "
@@ -801,17 +827,15 @@ static int ntfs_write_mst_block(struct page *page,
801 ntfs_inode *ni = NTFS_I(vi); 827 ntfs_inode *ni = NTFS_I(vi);
802 ntfs_volume *vol = ni->vol; 828 ntfs_volume *vol = ni->vol;
803 u8 *kaddr; 829 u8 *kaddr;
804 unsigned char bh_size_bits = vi->i_blkbits;
805 unsigned int bh_size = 1 << bh_size_bits;
806 unsigned int rec_size = ni->itype.index.block_size; 830 unsigned int rec_size = ni->itype.index.block_size;
807 ntfs_inode *locked_nis[PAGE_CACHE_SIZE / rec_size]; 831 ntfs_inode *locked_nis[PAGE_CACHE_SIZE / rec_size];
808 struct buffer_head *bh, *head, *tbh, *rec_start_bh; 832 struct buffer_head *bh, *head, *tbh, *rec_start_bh;
809 int max_bhs = PAGE_CACHE_SIZE / bh_size; 833 struct buffer_head *bhs[MAX_BUF_PER_PAGE];
810 struct buffer_head *bhs[max_bhs];
811 runlist_element *rl; 834 runlist_element *rl;
812 int i, nr_locked_nis, nr_recs, nr_bhs, bhs_per_rec, err, err2; 835 int i, nr_locked_nis, nr_recs, nr_bhs, max_bhs, bhs_per_rec, err, err2;
813 unsigned rec_size_bits; 836 unsigned bh_size, rec_size_bits;
814 BOOL sync, is_mft, page_is_dirty, rec_is_dirty; 837 BOOL sync, is_mft, page_is_dirty, rec_is_dirty;
838 unsigned char bh_size_bits;
815 839
816 ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index " 840 ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
817 "0x%lx.", vi->i_ino, ni->type, page->index); 841 "0x%lx.", vi->i_ino, ni->type, page->index);
@@ -826,7 +850,11 @@ static int ntfs_write_mst_block(struct page *page,
826 */ 850 */
827 BUG_ON(!(is_mft || S_ISDIR(vi->i_mode) || 851 BUG_ON(!(is_mft || S_ISDIR(vi->i_mode) ||
828 (NInoAttr(ni) && ni->type == AT_INDEX_ALLOCATION))); 852 (NInoAttr(ni) && ni->type == AT_INDEX_ALLOCATION)));
853 bh_size_bits = vi->i_blkbits;
854 bh_size = 1 << bh_size_bits;
855 max_bhs = PAGE_CACHE_SIZE / bh_size;
829 BUG_ON(!max_bhs); 856 BUG_ON(!max_bhs);
857 BUG_ON(max_bhs > MAX_BUF_PER_PAGE);
830 858
831 /* Were we called for sync purposes? */ 859 /* Were we called for sync purposes? */
832 sync = (wbc->sync_mode == WB_SYNC_ALL); 860 sync = (wbc->sync_mode == WB_SYNC_ALL);
@@ -846,7 +874,7 @@ static int ntfs_write_mst_block(struct page *page,
846 (PAGE_CACHE_SHIFT - bh_size_bits); 874 (PAGE_CACHE_SHIFT - bh_size_bits);
847 875
848 /* The first out of bounds block for the data size. */ 876 /* The first out of bounds block for the data size. */
849 dblock = (vi->i_size + bh_size - 1) >> bh_size_bits; 877 dblock = (i_size_read(vi) + bh_size - 1) >> bh_size_bits;
850 878
851 rl = NULL; 879 rl = NULL;
852 err = err2 = nr_bhs = nr_recs = nr_locked_nis = 0; 880 err = err2 = nr_bhs = nr_recs = nr_locked_nis = 0;
@@ -858,6 +886,7 @@ static int ntfs_write_mst_block(struct page *page,
858 if (likely(block < rec_block)) { 886 if (likely(block < rec_block)) {
859 if (unlikely(block >= dblock)) { 887 if (unlikely(block >= dblock)) {
860 clear_buffer_dirty(bh); 888 clear_buffer_dirty(bh);
889 set_buffer_uptodate(bh);
861 continue; 890 continue;
862 } 891 }
863 /* 892 /*
@@ -895,6 +924,7 @@ static int ntfs_write_mst_block(struct page *page,
895 LCN lcn; 924 LCN lcn;
896 unsigned int vcn_ofs; 925 unsigned int vcn_ofs;
897 926
927 bh->b_bdev = vol->sb->s_bdev;
898 /* Obtain the vcn and offset of the current block. */ 928 /* Obtain the vcn and offset of the current block. */
899 vcn = (VCN)block << bh_size_bits; 929 vcn = (VCN)block << bh_size_bits;
900 vcn_ofs = vcn & vol->cluster_size_mask; 930 vcn_ofs = vcn & vol->cluster_size_mask;
@@ -938,8 +968,11 @@ lock_retry_remap:
938 if (err2 == -ENOMEM) 968 if (err2 == -ENOMEM)
939 page_is_dirty = TRUE; 969 page_is_dirty = TRUE;
940 lcn = err2; 970 lcn = err2;
941 } else 971 } else {
942 err2 = -EIO; 972 err2 = -EIO;
973 if (!rl)
974 up_read(&ni->runlist.lock);
975 }
943 /* Hard error. Abort writing this record. */ 976 /* Hard error. Abort writing this record. */
944 if (!err || err == -ENOMEM) 977 if (!err || err == -ENOMEM)
945 err = err2; 978 err = err2;
@@ -949,7 +982,8 @@ lock_retry_remap:
949 "attribute type 0x%x) because " 982 "attribute type 0x%x) because "
950 "its location on disk could " 983 "its location on disk could "
951 "not be determined (error " 984 "not be determined (error "
952 "code %lli).", (s64)block << 985 "code %lli).",
986 (long long)block <<
953 bh_size_bits >> 987 bh_size_bits >>
954 vol->mft_record_size_bits, 988 vol->mft_record_size_bits,
955 ni->mft_no, ni->type, 989 ni->mft_no, ni->type,
@@ -1223,19 +1257,17 @@ done:
1223static int ntfs_writepage(struct page *page, struct writeback_control *wbc) 1257static int ntfs_writepage(struct page *page, struct writeback_control *wbc)
1224{ 1258{
1225 loff_t i_size; 1259 loff_t i_size;
1226 struct inode *vi; 1260 struct inode *vi = page->mapping->host;
1227 ntfs_inode *ni, *base_ni; 1261 ntfs_inode *base_ni = NULL, *ni = NTFS_I(vi);
1228 char *kaddr; 1262 char *kaddr;
1229 ntfs_attr_search_ctx *ctx; 1263 ntfs_attr_search_ctx *ctx = NULL;
1230 MFT_RECORD *m; 1264 MFT_RECORD *m = NULL;
1231 u32 attr_len; 1265 u32 attr_len;
1232 int err; 1266 int err;
1233 1267
1268retry_writepage:
1234 BUG_ON(!PageLocked(page)); 1269 BUG_ON(!PageLocked(page));
1235
1236 vi = page->mapping->host;
1237 i_size = i_size_read(vi); 1270 i_size = i_size_read(vi);
1238
1239 /* Is the page fully outside i_size? (truncate in progress) */ 1271 /* Is the page fully outside i_size? (truncate in progress) */
1240 if (unlikely(page->index >= (i_size + PAGE_CACHE_SIZE - 1) >> 1272 if (unlikely(page->index >= (i_size + PAGE_CACHE_SIZE - 1) >>
1241 PAGE_CACHE_SHIFT)) { 1273 PAGE_CACHE_SHIFT)) {
@@ -1248,8 +1280,6 @@ static int ntfs_writepage(struct page *page, struct writeback_control *wbc)
1248 ntfs_debug("Write outside i_size - truncated?"); 1280 ntfs_debug("Write outside i_size - truncated?");
1249 return 0; 1281 return 0;
1250 } 1282 }
1251 ni = NTFS_I(vi);
1252
1253 /* NInoNonResident() == NInoIndexAllocPresent() */ 1283 /* NInoNonResident() == NInoIndexAllocPresent() */
1254 if (NInoNonResident(ni)) { 1284 if (NInoNonResident(ni)) {
1255 /* 1285 /*
@@ -1326,6 +1356,14 @@ static int ntfs_writepage(struct page *page, struct writeback_control *wbc)
1326 ctx = NULL; 1356 ctx = NULL;
1327 goto err_out; 1357 goto err_out;
1328 } 1358 }
1359 /*
1360 * If a parallel write made the attribute non-resident, drop the mft
1361 * record and retry the writepage.
1362 */
1363 if (unlikely(NInoNonResident(ni))) {
1364 unmap_mft_record(base_ni);
1365 goto retry_writepage;
1366 }
1329 ctx = ntfs_attr_get_search_ctx(base_ni, m); 1367 ctx = ntfs_attr_get_search_ctx(base_ni, m);
1330 if (unlikely(!ctx)) { 1368 if (unlikely(!ctx)) {
1331 err = -ENOMEM; 1369 err = -ENOMEM;
@@ -1367,15 +1405,12 @@ static int ntfs_writepage(struct page *page, struct writeback_control *wbc)
1367 */ 1405 */
1368 1406
1369 attr_len = le32_to_cpu(ctx->attr->data.resident.value_length); 1407 attr_len = le32_to_cpu(ctx->attr->data.resident.value_length);
1370 i_size = i_size_read(VFS_I(ni)); 1408 i_size = i_size_read(vi);
1371 kaddr = kmap_atomic(page, KM_USER0);
1372 if (unlikely(attr_len > i_size)) { 1409 if (unlikely(attr_len > i_size)) {
1373 /* Zero out of bounds area in the mft record. */
1374 memset((u8*)ctx->attr + le16_to_cpu(
1375 ctx->attr->data.resident.value_offset) +
1376 i_size, 0, attr_len - i_size);
1377 attr_len = i_size; 1410 attr_len = i_size;
1411 ctx->attr->data.resident.value_length = cpu_to_le32(attr_len);
1378 } 1412 }
1413 kaddr = kmap_atomic(page, KM_USER0);
1379 /* Copy the data from the page to the mft record. */ 1414 /* Copy the data from the page to the mft record. */
1380 memcpy((u8*)ctx->attr + 1415 memcpy((u8*)ctx->attr +
1381 le16_to_cpu(ctx->attr->data.resident.value_offset), 1416 le16_to_cpu(ctx->attr->data.resident.value_offset),
@@ -1405,8 +1440,10 @@ err_out:
1405 err = 0; 1440 err = 0;
1406 } else { 1441 } else {
1407 ntfs_error(vi->i_sb, "Resident attribute write failed with " 1442 ntfs_error(vi->i_sb, "Resident attribute write failed with "
1408 "error %i. Setting page error flag.", err); 1443 "error %i.", err);
1409 SetPageError(page); 1444 SetPageError(page);
1445 NVolSetErrors(ni->vol);
1446 make_bad_inode(vi);
1410 } 1447 }
1411 unlock_page(page); 1448 unlock_page(page);
1412 if (ctx) 1449 if (ctx)
@@ -1425,12 +1462,15 @@ static int ntfs_prepare_nonresident_write(struct page *page,
1425{ 1462{
1426 VCN vcn; 1463 VCN vcn;
1427 LCN lcn; 1464 LCN lcn;
1465 s64 initialized_size;
1466 loff_t i_size;
1428 sector_t block, ablock, iblock; 1467 sector_t block, ablock, iblock;
1429 struct inode *vi; 1468 struct inode *vi;
1430 ntfs_inode *ni; 1469 ntfs_inode *ni;
1431 ntfs_volume *vol; 1470 ntfs_volume *vol;
1432 runlist_element *rl; 1471 runlist_element *rl;
1433 struct buffer_head *bh, *head, *wait[2], **wait_bh = wait; 1472 struct buffer_head *bh, *head, *wait[2], **wait_bh = wait;
1473 unsigned long flags;
1434 unsigned int vcn_ofs, block_start, block_end, blocksize; 1474 unsigned int vcn_ofs, block_start, block_end, blocksize;
1435 int err; 1475 int err;
1436 BOOL is_retry; 1476 BOOL is_retry;
@@ -1462,16 +1502,20 @@ static int ntfs_prepare_nonresident_write(struct page *page,
1462 /* The first block in the page. */ 1502 /* The first block in the page. */
1463 block = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits); 1503 block = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
1464 1504
1505 read_lock_irqsave(&ni->size_lock, flags);
1465 /* 1506 /*
1466 * The first out of bounds block for the allocated size. No need to 1507 * The first out of bounds block for the allocated size. No need to
1467 * round up as allocated_size is in multiples of cluster size and the 1508 * round up as allocated_size is in multiples of cluster size and the
1468 * minimum cluster size is 512 bytes, which is equal to the smallest 1509 * minimum cluster size is 512 bytes, which is equal to the smallest
1469 * blocksize. 1510 * blocksize.
1470 */ 1511 */
1471 ablock = ni->allocated_size >> blocksize_bits; 1512 ablock = ni->allocated_size >> blocksize_bits;
1513 i_size = i_size_read(vi);
1514 initialized_size = ni->initialized_size;
1515 read_unlock_irqrestore(&ni->size_lock, flags);
1472 1516
1473 /* The last (fully or partially) initialized block. */ 1517 /* The last (fully or partially) initialized block. */
1474 iblock = ni->initialized_size >> blocksize_bits; 1518 iblock = initialized_size >> blocksize_bits;
1475 1519
1476 /* Loop through all the buffers in the page. */ 1520 /* Loop through all the buffers in the page. */
1477 block_start = 0; 1521 block_start = 0;
@@ -1518,7 +1562,7 @@ static int ntfs_prepare_nonresident_write(struct page *page,
1518 * request, i.e. block < ablock is true. 1562 * request, i.e. block < ablock is true.
1519 */ 1563 */
1520 if (unlikely((block >= iblock) && 1564 if (unlikely((block >= iblock) &&
1521 (ni->initialized_size < vi->i_size))) { 1565 (initialized_size < i_size))) {
1522 /* 1566 /*
1523 * If this page is fully outside initialized size, zero 1567 * If this page is fully outside initialized size, zero
1524 * out all pages between the current initialized size 1568 * out all pages between the current initialized size
@@ -1622,6 +1666,8 @@ lock_retry_remap:
1622 "not supported yet. " 1666 "not supported yet. "
1623 "Sorry."); 1667 "Sorry.");
1624 err = -EOPNOTSUPP; 1668 err = -EOPNOTSUPP;
1669 if (!rl)
1670 up_read(&ni->runlist.lock);
1625 goto err_out; 1671 goto err_out;
1626 } else if (!is_retry && 1672 } else if (!is_retry &&
1627 lcn == LCN_RL_NOT_MAPPED) { 1673 lcn == LCN_RL_NOT_MAPPED) {
@@ -1636,7 +1682,8 @@ lock_retry_remap:
1636 goto lock_retry_remap; 1682 goto lock_retry_remap;
1637 rl = NULL; 1683 rl = NULL;
1638 lcn = err; 1684 lcn = err;
1639 } 1685 } else if (!rl)
1686 up_read(&ni->runlist.lock);
1640 /* 1687 /*
1641 * Failed to map the buffer, even after 1688 * Failed to map the buffer, even after
1642 * retrying. 1689 * retrying.
@@ -1797,6 +1844,7 @@ static int ntfs_prepare_write(struct file *file, struct page *page,
1797 unsigned from, unsigned to) 1844 unsigned from, unsigned to)
1798{ 1845{
1799 s64 new_size; 1846 s64 new_size;
1847 loff_t i_size;
1800 struct inode *vi = page->mapping->host; 1848 struct inode *vi = page->mapping->host;
1801 ntfs_inode *base_ni = NULL, *ni = NTFS_I(vi); 1849 ntfs_inode *base_ni = NULL, *ni = NTFS_I(vi);
1802 ntfs_volume *vol = ni->vol; 1850 ntfs_volume *vol = ni->vol;
@@ -1868,14 +1916,8 @@ static int ntfs_prepare_write(struct file *file, struct page *page,
1868 BUG_ON(page_has_buffers(page)); 1916 BUG_ON(page_has_buffers(page));
1869 new_size = ((s64)page->index << PAGE_CACHE_SHIFT) + to; 1917 new_size = ((s64)page->index << PAGE_CACHE_SHIFT) + to;
1870 /* If we do not need to resize the attribute allocation we are done. */ 1918 /* If we do not need to resize the attribute allocation we are done. */
1871 if (new_size <= vi->i_size) 1919 if (new_size <= i_size_read(vi))
1872 goto done; 1920 goto done;
1873
1874 // FIXME: We abort for now as this code is not safe.
1875 ntfs_error(vi->i_sb, "Changing the file size is not supported yet. "
1876 "Sorry.");
1877 return -EOPNOTSUPP;
1878
1879 /* Map, pin, and lock the (base) mft record. */ 1921 /* Map, pin, and lock the (base) mft record. */
1880 if (!NInoAttr(ni)) 1922 if (!NInoAttr(ni))
1881 base_ni = ni; 1923 base_ni = ni;
@@ -1904,7 +1946,15 @@ static int ntfs_prepare_write(struct file *file, struct page *page,
1904 a = ctx->attr; 1946 a = ctx->attr;
1905 /* The total length of the attribute value. */ 1947 /* The total length of the attribute value. */
1906 attr_len = le32_to_cpu(a->data.resident.value_length); 1948 attr_len = le32_to_cpu(a->data.resident.value_length);
1907 BUG_ON(vi->i_size != attr_len); 1949 /* Fix an eventual previous failure of ntfs_commit_write(). */
1950 i_size = i_size_read(vi);
1951 if (unlikely(attr_len > i_size)) {
1952 attr_len = i_size;
1953 a->data.resident.value_length = cpu_to_le32(attr_len);
1954 }
1955 /* If we do not need to resize the attribute allocation we are done. */
1956 if (new_size <= attr_len)
1957 goto done_unm;
1908 /* Check if new size is allowed in $AttrDef. */ 1958 /* Check if new size is allowed in $AttrDef. */
1909 err = ntfs_attr_size_bounds_check(vol, ni->type, new_size); 1959 err = ntfs_attr_size_bounds_check(vol, ni->type, new_size);
1910 if (unlikely(err)) { 1960 if (unlikely(err)) {
@@ -1962,6 +2012,7 @@ static int ntfs_prepare_write(struct file *file, struct page *page,
1962 } 2012 }
1963 flush_dcache_mft_record_page(ctx->ntfs_ino); 2013 flush_dcache_mft_record_page(ctx->ntfs_ino);
1964 mark_mft_record_dirty(ctx->ntfs_ino); 2014 mark_mft_record_dirty(ctx->ntfs_ino);
2015done_unm:
1965 ntfs_attr_put_search_ctx(ctx); 2016 ntfs_attr_put_search_ctx(ctx);
1966 unmap_mft_record(base_ni); 2017 unmap_mft_record(base_ni);
1967 /* 2018 /*
@@ -2047,7 +2098,7 @@ static int ntfs_commit_nonresident_write(struct page *page,
2047 * now we know ntfs_prepare_write() would have failed in the write 2098 * now we know ntfs_prepare_write() would have failed in the write
2048 * exceeds i_size case, so this will never trigger which is fine. 2099 * exceeds i_size case, so this will never trigger which is fine.
2049 */ 2100 */
2050 if (pos > vi->i_size) { 2101 if (pos > i_size_read(vi)) {
2051 ntfs_error(vi->i_sb, "Writing beyond the existing file size is " 2102 ntfs_error(vi->i_sb, "Writing beyond the existing file size is "
2052 "not supported yet. Sorry."); 2103 "not supported yet. Sorry.");
2053 return -EOPNOTSUPP; 2104 return -EOPNOTSUPP;
@@ -2183,9 +2234,13 @@ static int ntfs_commit_write(struct file *file, struct page *page,
2183 } 2234 }
2184 kunmap_atomic(kaddr, KM_USER0); 2235 kunmap_atomic(kaddr, KM_USER0);
2185 /* Update i_size if necessary. */ 2236 /* Update i_size if necessary. */
2186 if (vi->i_size < attr_len) { 2237 if (i_size_read(vi) < attr_len) {
2238 unsigned long flags;
2239
2240 write_lock_irqsave(&ni->size_lock, flags);
2187 ni->allocated_size = ni->initialized_size = attr_len; 2241 ni->allocated_size = ni->initialized_size = attr_len;
2188 i_size_write(vi, attr_len); 2242 i_size_write(vi, attr_len);
2243 write_unlock_irqrestore(&ni->size_lock, flags);
2189 } 2244 }
2190 /* Mark the mft record dirty, so it gets written back. */ 2245 /* Mark the mft record dirty, so it gets written back. */
2191 flush_dcache_mft_record_page(ctx->ntfs_ino); 2246 flush_dcache_mft_record_page(ctx->ntfs_ino);
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index 1ff7f90a18b0..cd0f9e740b14 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -1,7 +1,7 @@
1/** 1/**
2 * attrib.c - NTFS attribute operations. Part of the Linux-NTFS project. 2 * attrib.c - NTFS attribute operations. Part of the Linux-NTFS project.
3 * 3 *
4 * Copyright (c) 2001-2004 Anton Altaparmakov 4 * Copyright (c) 2001-2005 Anton Altaparmakov
5 * Copyright (c) 2002 Richard Russon 5 * Copyright (c) 2002 Richard Russon
6 * 6 *
7 * This program/include file is free software; you can redistribute it and/or 7 * This program/include file is free software; you can redistribute it and/or
@@ -21,88 +21,217 @@
21 */ 21 */
22 22
23#include <linux/buffer_head.h> 23#include <linux/buffer_head.h>
24#include <linux/swap.h>
24 25
25#include "attrib.h" 26#include "attrib.h"
26#include "debug.h" 27#include "debug.h"
27#include "layout.h" 28#include "layout.h"
29#include "lcnalloc.h"
30#include "malloc.h"
28#include "mft.h" 31#include "mft.h"
29#include "ntfs.h" 32#include "ntfs.h"
30#include "types.h" 33#include "types.h"
31 34
32/** 35/**
33 * ntfs_map_runlist - map (a part of) a runlist of an ntfs inode 36 * ntfs_map_runlist_nolock - map (a part of) a runlist of an ntfs inode
34 * @ni: ntfs inode for which to map (part of) a runlist 37 * @ni: ntfs inode for which to map (part of) a runlist
35 * @vcn: map runlist part containing this vcn 38 * @vcn: map runlist part containing this vcn
36 * 39 *
37 * Map the part of a runlist containing the @vcn of the ntfs inode @ni. 40 * Map the part of a runlist containing the @vcn of the ntfs inode @ni.
38 * 41 *
39 * Return 0 on success and -errno on error. 42 * Return 0 on success and -errno on error. There is one special error code
43 * which is not an error as such. This is -ENOENT. It means that @vcn is out
44 * of bounds of the runlist.
40 * 45 *
41 * Locking: - The runlist must be unlocked on entry and is unlocked on return. 46 * Locking: - The runlist must be locked for writing.
42 * - This function takes the lock for writing and modifies the runlist. 47 * - This function modifies the runlist.
43 */ 48 */
44int ntfs_map_runlist(ntfs_inode *ni, VCN vcn) 49int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn)
45{ 50{
51 VCN end_vcn;
46 ntfs_inode *base_ni; 52 ntfs_inode *base_ni;
53 MFT_RECORD *m;
54 ATTR_RECORD *a;
47 ntfs_attr_search_ctx *ctx; 55 ntfs_attr_search_ctx *ctx;
48 MFT_RECORD *mrec; 56 runlist_element *rl;
49 int err = 0; 57 int err = 0;
50 58
51 ntfs_debug("Mapping runlist part containing vcn 0x%llx.", 59 ntfs_debug("Mapping runlist part containing vcn 0x%llx.",
52 (unsigned long long)vcn); 60 (unsigned long long)vcn);
53
54 if (!NInoAttr(ni)) 61 if (!NInoAttr(ni))
55 base_ni = ni; 62 base_ni = ni;
56 else 63 else
57 base_ni = ni->ext.base_ntfs_ino; 64 base_ni = ni->ext.base_ntfs_ino;
58 65 m = map_mft_record(base_ni);
59 mrec = map_mft_record(base_ni); 66 if (IS_ERR(m))
60 if (IS_ERR(mrec)) 67 return PTR_ERR(m);
61 return PTR_ERR(mrec); 68 ctx = ntfs_attr_get_search_ctx(base_ni, m);
62 ctx = ntfs_attr_get_search_ctx(base_ni, mrec);
63 if (unlikely(!ctx)) { 69 if (unlikely(!ctx)) {
64 err = -ENOMEM; 70 err = -ENOMEM;
65 goto err_out; 71 goto err_out;
66 } 72 }
67 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 73 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
68 CASE_SENSITIVE, vcn, NULL, 0, ctx); 74 CASE_SENSITIVE, vcn, NULL, 0, ctx);
69 if (unlikely(err)) 75 if (unlikely(err)) {
70 goto put_err_out; 76 if (err == -ENOENT)
77 err = -EIO;
78 goto err_out;
79 }
80 a = ctx->attr;
81 /*
82 * Only decompress the mapping pairs if @vcn is inside it. Otherwise
83 * we get into problems when we try to map an out of bounds vcn because
84 * we then try to map the already mapped runlist fragment and
85 * ntfs_mapping_pairs_decompress() fails.
86 */
87 end_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn) + 1;
88 if (unlikely(!a->data.non_resident.lowest_vcn && end_vcn <= 1))
89 end_vcn = ni->allocated_size >> ni->vol->cluster_size_bits;
90 if (unlikely(vcn >= end_vcn)) {
91 err = -ENOENT;
92 goto err_out;
93 }
94 rl = ntfs_mapping_pairs_decompress(ni->vol, a, ni->runlist.rl);
95 if (IS_ERR(rl))
96 err = PTR_ERR(rl);
97 else
98 ni->runlist.rl = rl;
99err_out:
100 if (likely(ctx))
101 ntfs_attr_put_search_ctx(ctx);
102 unmap_mft_record(base_ni);
103 return err;
104}
105
106/**
107 * ntfs_map_runlist - map (a part of) a runlist of an ntfs inode
108 * @ni: ntfs inode for which to map (part of) a runlist
109 * @vcn: map runlist part containing this vcn
110 *
111 * Map the part of a runlist containing the @vcn of the ntfs inode @ni.
112 *
113 * Return 0 on success and -errno on error. There is one special error code
114 * which is not an error as such. This is -ENOENT. It means that @vcn is out
115 * of bounds of the runlist.
116 *
117 * Locking: - The runlist must be unlocked on entry and is unlocked on return.
118 * - This function takes the runlist lock for writing and modifies the
119 * runlist.
120 */
121int ntfs_map_runlist(ntfs_inode *ni, VCN vcn)
122{
123 int err = 0;
71 124
72 down_write(&ni->runlist.lock); 125 down_write(&ni->runlist.lock);
73 /* Make sure someone else didn't do the work while we were sleeping. */ 126 /* Make sure someone else didn't do the work while we were sleeping. */
74 if (likely(ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn) <= 127 if (likely(ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn) <=
75 LCN_RL_NOT_MAPPED)) { 128 LCN_RL_NOT_MAPPED))
76 runlist_element *rl; 129 err = ntfs_map_runlist_nolock(ni, vcn);
130 up_write(&ni->runlist.lock);
131 return err;
132}
77 133
78 rl = ntfs_mapping_pairs_decompress(ni->vol, ctx->attr, 134/**
79 ni->runlist.rl); 135 * ntfs_attr_vcn_to_lcn_nolock - convert a vcn into a lcn given an ntfs inode
80 if (IS_ERR(rl)) 136 * @ni: ntfs inode of the attribute whose runlist to search
81 err = PTR_ERR(rl); 137 * @vcn: vcn to convert
82 else 138 * @write_locked: true if the runlist is locked for writing
83 ni->runlist.rl = rl; 139 *
140 * Find the virtual cluster number @vcn in the runlist of the ntfs attribute
141 * described by the ntfs inode @ni and return the corresponding logical cluster
142 * number (lcn).
143 *
144 * If the @vcn is not mapped yet, the attempt is made to map the attribute
145 * extent containing the @vcn and the vcn to lcn conversion is retried.
146 *
147 * If @write_locked is true the caller has locked the runlist for writing and
148 * if false for reading.
149 *
150 * Since lcns must be >= 0, we use negative return codes with special meaning:
151 *
152 * Return code Meaning / Description
153 * ==========================================
154 * LCN_HOLE Hole / not allocated on disk.
155 * LCN_ENOENT There is no such vcn in the runlist, i.e. @vcn is out of bounds.
156 * LCN_ENOMEM Not enough memory to map runlist.
157 * LCN_EIO Critical error (runlist/file is corrupt, i/o error, etc).
158 *
159 * Locking: - The runlist must be locked on entry and is left locked on return.
160 * - If @write_locked is FALSE, i.e. the runlist is locked for reading,
161 * the lock may be dropped inside the function so you cannot rely on
162 * the runlist still being the same when this function returns.
163 */
164LCN ntfs_attr_vcn_to_lcn_nolock(ntfs_inode *ni, const VCN vcn,
165 const BOOL write_locked)
166{
167 LCN lcn;
168 BOOL is_retry = FALSE;
169
170 ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, %s_locked.",
171 ni->mft_no, (unsigned long long)vcn,
172 write_locked ? "write" : "read");
173 BUG_ON(!ni);
174 BUG_ON(!NInoNonResident(ni));
175 BUG_ON(vcn < 0);
176retry_remap:
177 /* Convert vcn to lcn. If that fails map the runlist and retry once. */
178 lcn = ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn);
179 if (likely(lcn >= LCN_HOLE)) {
180 ntfs_debug("Done, lcn 0x%llx.", (long long)lcn);
181 return lcn;
84 } 182 }
85 up_write(&ni->runlist.lock); 183 if (lcn != LCN_RL_NOT_MAPPED) {
184 if (lcn != LCN_ENOENT)
185 lcn = LCN_EIO;
186 } else if (!is_retry) {
187 int err;
86 188
87put_err_out: 189 if (!write_locked) {
88 ntfs_attr_put_search_ctx(ctx); 190 up_read(&ni->runlist.lock);
89err_out: 191 down_write(&ni->runlist.lock);
90 unmap_mft_record(base_ni); 192 if (unlikely(ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn) !=
91 return err; 193 LCN_RL_NOT_MAPPED)) {
194 up_write(&ni->runlist.lock);
195 down_read(&ni->runlist.lock);
196 goto retry_remap;
197 }
198 }
199 err = ntfs_map_runlist_nolock(ni, vcn);
200 if (!write_locked) {
201 up_write(&ni->runlist.lock);
202 down_read(&ni->runlist.lock);
203 }
204 if (likely(!err)) {
205 is_retry = TRUE;
206 goto retry_remap;
207 }
208 if (err == -ENOENT)
209 lcn = LCN_ENOENT;
210 else if (err == -ENOMEM)
211 lcn = LCN_ENOMEM;
212 else
213 lcn = LCN_EIO;
214 }
215 if (lcn != LCN_ENOENT)
216 ntfs_error(ni->vol->sb, "Failed with error code %lli.",
217 (long long)lcn);
218 return lcn;
92} 219}
93 220
94/** 221/**
95 * ntfs_find_vcn - find a vcn in the runlist described by an ntfs inode 222 * ntfs_attr_find_vcn_nolock - find a vcn in the runlist of an ntfs inode
96 * @ni: ntfs inode describing the runlist to search 223 * @ni: ntfs inode describing the runlist to search
97 * @vcn: vcn to find 224 * @vcn: vcn to find
98 * @need_write: if false, lock for reading and if true, lock for writing 225 * @write_locked: true if the runlist is locked for writing
99 * 226 *
100 * Find the virtual cluster number @vcn in the runlist described by the ntfs 227 * Find the virtual cluster number @vcn in the runlist described by the ntfs
101 * inode @ni and return the address of the runlist element containing the @vcn. 228 * inode @ni and return the address of the runlist element containing the @vcn.
102 * The runlist is left locked and the caller has to unlock it. If @need_write 229 *
103 * is true, the runlist is locked for writing and if @need_write is false, the 230 * If the @vcn is not mapped yet, the attempt is made to map the attribute
104 * runlist is locked for reading. In the error case, the runlist is not left 231 * extent containing the @vcn and the vcn to lcn conversion is retried.
105 * locked. 232 *
233 * If @write_locked is true the caller has locked the runlist for writing and
234 * if false for reading.
106 * 235 *
107 * Note you need to distinguish between the lcn of the returned runlist element 236 * Note you need to distinguish between the lcn of the returned runlist element
108 * being >= 0 and LCN_HOLE. In the later case you have to return zeroes on 237 * being >= 0 and LCN_HOLE. In the later case you have to return zeroes on
@@ -118,34 +247,29 @@ err_out:
118 * -ENOMEM - Not enough memory to map runlist. 247 * -ENOMEM - Not enough memory to map runlist.
119 * -EIO - Critical error (runlist/file is corrupt, i/o error, etc). 248 * -EIO - Critical error (runlist/file is corrupt, i/o error, etc).
120 * 249 *
121 * Locking: - The runlist must be unlocked on entry. 250 * Locking: - The runlist must be locked on entry and is left locked on return.
122 * - On failing return, the runlist is unlocked. 251 * - If @write_locked is FALSE, i.e. the runlist is locked for reading,
123 * - On successful return, the runlist is locked. If @need_write us 252 * the lock may be dropped inside the function so you cannot rely on
124 * true, it is locked for writing. Otherwise is is locked for 253 * the runlist still being the same when this function returns.
125 * reading.
126 */ 254 */
127runlist_element *ntfs_find_vcn(ntfs_inode *ni, const VCN vcn, 255runlist_element *ntfs_attr_find_vcn_nolock(ntfs_inode *ni, const VCN vcn,
128 const BOOL need_write) 256 const BOOL write_locked)
129{ 257{
130 runlist_element *rl; 258 runlist_element *rl;
131 int err = 0; 259 int err = 0;
132 BOOL is_retry = FALSE; 260 BOOL is_retry = FALSE;
133 261
134 ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, lock for %sing.", 262 ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, %s_locked.",
135 ni->mft_no, (unsigned long long)vcn, 263 ni->mft_no, (unsigned long long)vcn,
136 !need_write ? "read" : "writ"); 264 write_locked ? "write" : "read");
137 BUG_ON(!ni); 265 BUG_ON(!ni);
138 BUG_ON(!NInoNonResident(ni)); 266 BUG_ON(!NInoNonResident(ni));
139 BUG_ON(vcn < 0); 267 BUG_ON(vcn < 0);
140lock_retry_remap: 268retry_remap:
141 if (!need_write)
142 down_read(&ni->runlist.lock);
143 else
144 down_write(&ni->runlist.lock);
145 rl = ni->runlist.rl; 269 rl = ni->runlist.rl;
146 if (likely(rl && vcn >= rl[0].vcn)) { 270 if (likely(rl && vcn >= rl[0].vcn)) {
147 while (likely(rl->length)) { 271 while (likely(rl->length)) {
148 if (likely(vcn < rl[1].vcn)) { 272 if (unlikely(vcn < rl[1].vcn)) {
149 if (likely(rl->lcn >= LCN_HOLE)) { 273 if (likely(rl->lcn >= LCN_HOLE)) {
150 ntfs_debug("Done."); 274 ntfs_debug("Done.");
151 return rl; 275 return rl;
@@ -161,30 +285,41 @@ lock_retry_remap:
161 err = -EIO; 285 err = -EIO;
162 } 286 }
163 } 287 }
164 if (!need_write)
165 up_read(&ni->runlist.lock);
166 else
167 up_write(&ni->runlist.lock);
168 if (!err && !is_retry) { 288 if (!err && !is_retry) {
169 /* 289 /*
170 * The @vcn is in an unmapped region, map the runlist and 290 * The @vcn is in an unmapped region, map the runlist and
171 * retry. 291 * retry.
172 */ 292 */
173 err = ntfs_map_runlist(ni, vcn); 293 if (!write_locked) {
294 up_read(&ni->runlist.lock);
295 down_write(&ni->runlist.lock);
296 if (unlikely(ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn) !=
297 LCN_RL_NOT_MAPPED)) {
298 up_write(&ni->runlist.lock);
299 down_read(&ni->runlist.lock);
300 goto retry_remap;
301 }
302 }
303 err = ntfs_map_runlist_nolock(ni, vcn);
304 if (!write_locked) {
305 up_write(&ni->runlist.lock);
306 down_read(&ni->runlist.lock);
307 }
174 if (likely(!err)) { 308 if (likely(!err)) {
175 is_retry = TRUE; 309 is_retry = TRUE;
176 goto lock_retry_remap; 310 goto retry_remap;
177 } 311 }
178 /* 312 /*
179 * -EINVAL and -ENOENT coming from a failed mapping attempt are 313 * -EINVAL coming from a failed mapping attempt is equivalent
180 * equivalent to i/o errors for us as they should not happen in 314 * to i/o error for us as it should not happen in our code
181 * our code paths. 315 * paths.
182 */ 316 */
183 if (err == -EINVAL || err == -ENOENT) 317 if (err == -EINVAL)
184 err = -EIO; 318 err = -EIO;
185 } else if (!err) 319 } else if (!err)
186 err = -EIO; 320 err = -EIO;
187 ntfs_error(ni->vol->sb, "Failed with error code %i.", err); 321 if (err != -ENOENT)
322 ntfs_error(ni->vol->sb, "Failed with error code %i.", err);
188 return ERR_PTR(err); 323 return ERR_PTR(err);
189} 324}
190 325
@@ -870,15 +1005,14 @@ int ntfs_attr_lookup(const ATTR_TYPE type, const ntfschar *name,
870static inline void ntfs_attr_init_search_ctx(ntfs_attr_search_ctx *ctx, 1005static inline void ntfs_attr_init_search_ctx(ntfs_attr_search_ctx *ctx,
871 ntfs_inode *ni, MFT_RECORD *mrec) 1006 ntfs_inode *ni, MFT_RECORD *mrec)
872{ 1007{
873 ctx->mrec = mrec; 1008 *ctx = (ntfs_attr_search_ctx) {
874 /* Sanity checks are performed elsewhere. */ 1009 .mrec = mrec,
875 ctx->attr = (ATTR_RECORD*)((u8*)mrec + le16_to_cpu(mrec->attrs_offset)); 1010 /* Sanity checks are performed elsewhere. */
876 ctx->is_first = TRUE; 1011 .attr = (ATTR_RECORD*)((u8*)mrec +
877 ctx->ntfs_ino = ni; 1012 le16_to_cpu(mrec->attrs_offset)),
878 ctx->al_entry = NULL; 1013 .is_first = TRUE,
879 ctx->base_ntfs_ino = NULL; 1014 .ntfs_ino = ni,
880 ctx->base_mrec = NULL; 1015 };
881 ctx->base_attr = NULL;
882} 1016}
883 1017
884/** 1018/**
@@ -945,6 +1079,8 @@ void ntfs_attr_put_search_ctx(ntfs_attr_search_ctx *ctx)
945 return; 1079 return;
946} 1080}
947 1081
1082#ifdef NTFS_RW
1083
948/** 1084/**
949 * ntfs_attr_find_in_attrdef - find an attribute in the $AttrDef system file 1085 * ntfs_attr_find_in_attrdef - find an attribute in the $AttrDef system file
950 * @vol: ntfs volume to which the attribute belongs 1086 * @vol: ntfs volume to which the attribute belongs
@@ -1024,27 +1160,21 @@ int ntfs_attr_size_bounds_check(const ntfs_volume *vol, const ATTR_TYPE type,
1024 * Check whether the attribute of @type on the ntfs volume @vol is allowed to 1160 * Check whether the attribute of @type on the ntfs volume @vol is allowed to
1025 * be non-resident. This information is obtained from $AttrDef system file. 1161 * be non-resident. This information is obtained from $AttrDef system file.
1026 * 1162 *
1027 * Return 0 if the attribute is allowed to be non-resident, -EPERM if not, or 1163 * Return 0 if the attribute is allowed to be non-resident, -EPERM if not, and
1028 * -ENOENT if the attribute is not listed in $AttrDef. 1164 * -ENOENT if the attribute is not listed in $AttrDef.
1029 */ 1165 */
1030int ntfs_attr_can_be_non_resident(const ntfs_volume *vol, const ATTR_TYPE type) 1166int ntfs_attr_can_be_non_resident(const ntfs_volume *vol, const ATTR_TYPE type)
1031{ 1167{
1032 ATTR_DEF *ad; 1168 ATTR_DEF *ad;
1033 1169
1034 /*
1035 * $DATA is always allowed to be non-resident even if $AttrDef does not
1036 * specify this in the flags of the $DATA attribute definition record.
1037 */
1038 if (type == AT_DATA)
1039 return 0;
1040 /* Find the attribute definition record in $AttrDef. */ 1170 /* Find the attribute definition record in $AttrDef. */
1041 ad = ntfs_attr_find_in_attrdef(vol, type); 1171 ad = ntfs_attr_find_in_attrdef(vol, type);
1042 if (unlikely(!ad)) 1172 if (unlikely(!ad))
1043 return -ENOENT; 1173 return -ENOENT;
1044 /* Check the flags and return the result. */ 1174 /* Check the flags and return the result. */
1045 if (ad->flags & CAN_BE_NON_RESIDENT) 1175 if (ad->flags & ATTR_DEF_RESIDENT)
1046 return 0; 1176 return -EPERM;
1047 return -EPERM; 1177 return 0;
1048} 1178}
1049 1179
1050/** 1180/**
@@ -1067,9 +1197,9 @@ int ntfs_attr_can_be_non_resident(const ntfs_volume *vol, const ATTR_TYPE type)
1067 */ 1197 */
1068int ntfs_attr_can_be_resident(const ntfs_volume *vol, const ATTR_TYPE type) 1198int ntfs_attr_can_be_resident(const ntfs_volume *vol, const ATTR_TYPE type)
1069{ 1199{
1070 if (type != AT_INDEX_ALLOCATION && type != AT_EA) 1200 if (type == AT_INDEX_ALLOCATION || type == AT_EA)
1071 return 0; 1201 return -EPERM;
1072 return -EPERM; 1202 return 0;
1073} 1203}
1074 1204
1075/** 1205/**
@@ -1117,6 +1247,328 @@ int ntfs_attr_record_resize(MFT_RECORD *m, ATTR_RECORD *a, u32 new_size)
1117} 1247}
1118 1248
1119/** 1249/**
1250 * ntfs_attr_make_non_resident - convert a resident to a non-resident attribute
1251 * @ni: ntfs inode describing the attribute to convert
1252 *
1253 * Convert the resident ntfs attribute described by the ntfs inode @ni to a
1254 * non-resident one.
1255 *
1256 * Return 0 on success and -errno on error. The following error return codes
1257 * are defined:
1258 * -EPERM - The attribute is not allowed to be non-resident.
1259 * -ENOMEM - Not enough memory.
1260 * -ENOSPC - Not enough disk space.
1261 * -EINVAL - Attribute not defined on the volume.
1262 * -EIO - I/o error or other error.
1263 * Note that -ENOSPC is also returned in the case that there is not enough
1264 * space in the mft record to do the conversion. This can happen when the mft
1265 * record is already very full. The caller is responsible for trying to make
1266 * space in the mft record and trying again. FIXME: Do we need a separate
1267 * error return code for this kind of -ENOSPC or is it always worth trying
1268 * again in case the attribute may then fit in a resident state so no need to
1269 * make it non-resident at all? Ho-hum... (AIA)
1270 *
1271 * NOTE to self: No changes in the attribute list are required to move from
1272 * a resident to a non-resident attribute.
1273 *
1274 * Locking: - The caller must hold i_sem on the inode.
1275 */
1276int ntfs_attr_make_non_resident(ntfs_inode *ni)
1277{
1278 s64 new_size;
1279 struct inode *vi = VFS_I(ni);
1280 ntfs_volume *vol = ni->vol;
1281 ntfs_inode *base_ni;
1282 MFT_RECORD *m;
1283 ATTR_RECORD *a;
1284 ntfs_attr_search_ctx *ctx;
1285 struct page *page;
1286 runlist_element *rl;
1287 u8 *kaddr;
1288 unsigned long flags;
1289 int mp_size, mp_ofs, name_ofs, arec_size, err, err2;
1290 u32 attr_size;
1291 u8 old_res_attr_flags;
1292
1293 /* Check that the attribute is allowed to be non-resident. */
1294 err = ntfs_attr_can_be_non_resident(vol, ni->type);
1295 if (unlikely(err)) {
1296 if (err == -EPERM)
1297 ntfs_debug("Attribute is not allowed to be "
1298 "non-resident.");
1299 else
1300 ntfs_debug("Attribute not defined on the NTFS "
1301 "volume!");
1302 return err;
1303 }
1304 /*
1305 * The size needs to be aligned to a cluster boundary for allocation
1306 * purposes.
1307 */
1308 new_size = (i_size_read(vi) + vol->cluster_size - 1) &
1309 ~(vol->cluster_size - 1);
1310 if (new_size > 0) {
1311 runlist_element *rl2;
1312
1313 /*
1314 * Will need the page later and since the page lock nests
1315 * outside all ntfs locks, we need to get the page now.
1316 */
1317 page = find_or_create_page(vi->i_mapping, 0,
1318 mapping_gfp_mask(vi->i_mapping));
1319 if (unlikely(!page))
1320 return -ENOMEM;
1321 /* Start by allocating clusters to hold the attribute value. */
1322 rl = ntfs_cluster_alloc(vol, 0, new_size >>
1323 vol->cluster_size_bits, -1, DATA_ZONE);
1324 if (IS_ERR(rl)) {
1325 err = PTR_ERR(rl);
1326 ntfs_debug("Failed to allocate cluster%s, error code "
1327 "%i.", (new_size >>
1328 vol->cluster_size_bits) > 1 ? "s" : "",
1329 err);
1330 goto page_err_out;
1331 }
1332 /* Change the runlist terminator to LCN_ENOENT. */
1333 rl2 = rl;
1334 while (rl2->length)
1335 rl2++;
1336 BUG_ON(rl2->lcn != LCN_RL_NOT_MAPPED);
1337 rl2->lcn = LCN_ENOENT;
1338 } else {
1339 rl = NULL;
1340 page = NULL;
1341 }
1342 /* Determine the size of the mapping pairs array. */
1343 mp_size = ntfs_get_size_for_mapping_pairs(vol, rl, 0, -1);
1344 if (unlikely(mp_size < 0)) {
1345 err = mp_size;
1346 ntfs_debug("Failed to get size for mapping pairs array, error "
1347 "code %i.", err);
1348 goto rl_err_out;
1349 }
1350 down_write(&ni->runlist.lock);
1351 if (!NInoAttr(ni))
1352 base_ni = ni;
1353 else
1354 base_ni = ni->ext.base_ntfs_ino;
1355 m = map_mft_record(base_ni);
1356 if (IS_ERR(m)) {
1357 err = PTR_ERR(m);
1358 m = NULL;
1359 ctx = NULL;
1360 goto err_out;
1361 }
1362 ctx = ntfs_attr_get_search_ctx(base_ni, m);
1363 if (unlikely(!ctx)) {
1364 err = -ENOMEM;
1365 goto err_out;
1366 }
1367 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
1368 CASE_SENSITIVE, 0, NULL, 0, ctx);
1369 if (unlikely(err)) {
1370 if (err == -ENOENT)
1371 err = -EIO;
1372 goto err_out;
1373 }
1374 m = ctx->mrec;
1375 a = ctx->attr;
1376 BUG_ON(NInoNonResident(ni));
1377 BUG_ON(a->non_resident);
1378 /*
1379 * Calculate new offsets for the name and the mapping pairs array.
1380 * We assume the attribute is not compressed or sparse.
1381 */
1382 name_ofs = (offsetof(ATTR_REC,
1383 data.non_resident.compressed_size) + 7) & ~7;
1384 mp_ofs = (name_ofs + a->name_length * sizeof(ntfschar) + 7) & ~7;
1385 /*
1386 * Determine the size of the resident part of the now non-resident
1387 * attribute record.
1388 */
1389 arec_size = (mp_ofs + mp_size + 7) & ~7;
1390 /*
1391 * If the page is not uptodate bring it uptodate by copying from the
1392 * attribute value.
1393 */
1394 attr_size = le32_to_cpu(a->data.resident.value_length);
1395 BUG_ON(attr_size != i_size_read(vi));
1396 if (page && !PageUptodate(page)) {
1397 kaddr = kmap_atomic(page, KM_USER0);
1398 memcpy(kaddr, (u8*)a +
1399 le16_to_cpu(a->data.resident.value_offset),
1400 attr_size);
1401 memset(kaddr + attr_size, 0, PAGE_CACHE_SIZE - attr_size);
1402 kunmap_atomic(kaddr, KM_USER0);
1403 flush_dcache_page(page);
1404 SetPageUptodate(page);
1405 }
1406 /* Backup the attribute flag. */
1407 old_res_attr_flags = a->data.resident.flags;
1408 /* Resize the resident part of the attribute record. */
1409 err = ntfs_attr_record_resize(m, a, arec_size);
1410 if (unlikely(err))
1411 goto err_out;
1412 /*
1413 * Convert the resident part of the attribute record to describe a
1414 * non-resident attribute.
1415 */
1416 a->non_resident = 1;
1417 /* Move the attribute name if it exists and update the offset. */
1418 if (a->name_length)
1419 memmove((u8*)a + name_ofs, (u8*)a + le16_to_cpu(a->name_offset),
1420 a->name_length * sizeof(ntfschar));
1421 a->name_offset = cpu_to_le16(name_ofs);
1422 /*
1423 * FIXME: For now just clear all of these as we do not support them
1424 * when writing.
1425 */
1426 a->flags &= cpu_to_le16(0xffff & ~le16_to_cpu(ATTR_IS_SPARSE |
1427 ATTR_IS_ENCRYPTED | ATTR_COMPRESSION_MASK));
1428 /* Setup the fields specific to non-resident attributes. */
1429 a->data.non_resident.lowest_vcn = 0;
1430 a->data.non_resident.highest_vcn = cpu_to_sle64((new_size - 1) >>
1431 vol->cluster_size_bits);
1432 a->data.non_resident.mapping_pairs_offset = cpu_to_le16(mp_ofs);
1433 a->data.non_resident.compression_unit = 0;
1434 memset(&a->data.non_resident.reserved, 0,
1435 sizeof(a->data.non_resident.reserved));
1436 a->data.non_resident.allocated_size = cpu_to_sle64(new_size);
1437 a->data.non_resident.data_size =
1438 a->data.non_resident.initialized_size =
1439 cpu_to_sle64(attr_size);
1440 /* Generate the mapping pairs array into the attribute record. */
1441 err = ntfs_mapping_pairs_build(vol, (u8*)a + mp_ofs,
1442 arec_size - mp_ofs, rl, 0, -1, NULL);
1443 if (unlikely(err)) {
1444 ntfs_debug("Failed to build mapping pairs, error code %i.",
1445 err);
1446 goto undo_err_out;
1447 }
1448 /* Setup the in-memory attribute structure to be non-resident. */
1449 /*
1450 * FIXME: For now just clear all of these as we do not support them
1451 * when writing.
1452 */
1453 NInoClearSparse(ni);
1454 NInoClearEncrypted(ni);
1455 NInoClearCompressed(ni);
1456 ni->runlist.rl = rl;
1457 write_lock_irqsave(&ni->size_lock, flags);
1458 ni->allocated_size = new_size;
1459 write_unlock_irqrestore(&ni->size_lock, flags);
1460 /*
1461 * This needs to be last since the address space operations ->readpage
1462 * and ->writepage can run concurrently with us as they are not
1463 * serialized on i_sem. Note, we are not allowed to fail once we flip
1464 * this switch, which is another reason to do this last.
1465 */
1466 NInoSetNonResident(ni);
1467 /* Mark the mft record dirty, so it gets written back. */
1468 flush_dcache_mft_record_page(ctx->ntfs_ino);
1469 mark_mft_record_dirty(ctx->ntfs_ino);
1470 ntfs_attr_put_search_ctx(ctx);
1471 unmap_mft_record(base_ni);
1472 up_write(&ni->runlist.lock);
1473 if (page) {
1474 set_page_dirty(page);
1475 unlock_page(page);
1476 mark_page_accessed(page);
1477 page_cache_release(page);
1478 }
1479 ntfs_debug("Done.");
1480 return 0;
1481undo_err_out:
1482 /* Convert the attribute back into a resident attribute. */
1483 a->non_resident = 0;
1484 /* Move the attribute name if it exists and update the offset. */
1485 name_ofs = (offsetof(ATTR_RECORD, data.resident.reserved) +
1486 sizeof(a->data.resident.reserved) + 7) & ~7;
1487 if (a->name_length)
1488 memmove((u8*)a + name_ofs, (u8*)a + le16_to_cpu(a->name_offset),
1489 a->name_length * sizeof(ntfschar));
1490 mp_ofs = (name_ofs + a->name_length * sizeof(ntfschar) + 7) & ~7;
1491 a->name_offset = cpu_to_le16(name_ofs);
1492 arec_size = (mp_ofs + attr_size + 7) & ~7;
1493 /* Resize the resident part of the attribute record. */
1494 err2 = ntfs_attr_record_resize(m, a, arec_size);
1495 if (unlikely(err2)) {
1496 /*
1497 * This cannot happen (well if memory corruption is at work it
1498 * could happen in theory), but deal with it as well as we can.
1499 * If the old size is too small, truncate the attribute,
1500 * otherwise simply give it a larger allocated size.
1501 * FIXME: Should check whether chkdsk complains when the
1502 * allocated size is much bigger than the resident value size.
1503 */
1504 arec_size = le32_to_cpu(a->length);
1505 if ((mp_ofs + attr_size) > arec_size) {
1506 err2 = attr_size;
1507 attr_size = arec_size - mp_ofs;
1508 ntfs_error(vol->sb, "Failed to undo partial resident "
1509 "to non-resident attribute "
1510 "conversion. Truncating inode 0x%lx, "
1511 "attribute type 0x%x from %i bytes to "
1512 "%i bytes to maintain metadata "
1513 "consistency. THIS MEANS YOU ARE "
1514 "LOSING %i BYTES DATA FROM THIS %s.",
1515 vi->i_ino,
1516 (unsigned)le32_to_cpu(ni->type),
1517 err2, attr_size, err2 - attr_size,
1518 ((ni->type == AT_DATA) &&
1519 !ni->name_len) ? "FILE": "ATTRIBUTE");
1520 write_lock_irqsave(&ni->size_lock, flags);
1521 ni->initialized_size = attr_size;
1522 i_size_write(vi, attr_size);
1523 write_unlock_irqrestore(&ni->size_lock, flags);
1524 }
1525 }
1526 /* Setup the fields specific to resident attributes. */
1527 a->data.resident.value_length = cpu_to_le32(attr_size);
1528 a->data.resident.value_offset = cpu_to_le16(mp_ofs);
1529 a->data.resident.flags = old_res_attr_flags;
1530 memset(&a->data.resident.reserved, 0,
1531 sizeof(a->data.resident.reserved));
1532 /* Copy the data from the page back to the attribute value. */
1533 if (page) {
1534 kaddr = kmap_atomic(page, KM_USER0);
1535 memcpy((u8*)a + mp_ofs, kaddr, attr_size);
1536 kunmap_atomic(kaddr, KM_USER0);
1537 }
1538 /* Setup the allocated size in the ntfs inode in case it changed. */
1539 write_lock_irqsave(&ni->size_lock, flags);
1540 ni->allocated_size = arec_size - mp_ofs;
1541 write_unlock_irqrestore(&ni->size_lock, flags);
1542 /* Mark the mft record dirty, so it gets written back. */
1543 flush_dcache_mft_record_page(ctx->ntfs_ino);
1544 mark_mft_record_dirty(ctx->ntfs_ino);
1545err_out:
1546 if (ctx)
1547 ntfs_attr_put_search_ctx(ctx);
1548 if (m)
1549 unmap_mft_record(base_ni);
1550 ni->runlist.rl = NULL;
1551 up_write(&ni->runlist.lock);
1552rl_err_out:
1553 if (rl) {
1554 if (ntfs_cluster_free_from_rl(vol, rl) < 0) {
1555 ntfs_error(vol->sb, "Failed to release allocated "
1556 "cluster(s) in error code path. Run "
1557 "chkdsk to recover the lost "
1558 "cluster(s).");
1559 NVolSetErrors(vol);
1560 }
1561 ntfs_free(rl);
1562page_err_out:
1563 unlock_page(page);
1564 page_cache_release(page);
1565 }
1566 if (err == -EINVAL)
1567 err = -EIO;
1568 return err;
1569}
1570
1571/**
1120 * ntfs_attr_set - fill (a part of) an attribute with a byte 1572 * ntfs_attr_set - fill (a part of) an attribute with a byte
1121 * @ni: ntfs inode describing the attribute to fill 1573 * @ni: ntfs inode describing the attribute to fill
1122 * @ofs: offset inside the attribute at which to start to fill 1574 * @ofs: offset inside the attribute at which to start to fill
@@ -1127,6 +1579,10 @@ int ntfs_attr_record_resize(MFT_RECORD *m, ATTR_RECORD *a, u32 new_size)
1127 * byte offset @ofs inside the attribute with the constant byte @val. 1579 * byte offset @ofs inside the attribute with the constant byte @val.
1128 * 1580 *
1129 * This function is effectively like memset() applied to an ntfs attribute. 1581 * This function is effectively like memset() applied to an ntfs attribute.
1582 * Note thie function actually only operates on the page cache pages belonging
1583 * to the ntfs attribute and it marks them dirty after doing the memset().
1584 * Thus it relies on the vm dirty page write code paths to cause the modified
1585 * pages to be written to the mft record/disk.
1130 * 1586 *
1131 * Return 0 on success and -errno on error. An error code of -ESPIPE means 1587 * Return 0 on success and -errno on error. An error code of -ESPIPE means
1132 * that @ofs + @cnt were outside the end of the attribute and no write was 1588 * that @ofs + @cnt were outside the end of the attribute and no write was
@@ -1155,7 +1611,7 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val)
1155 end = ofs + cnt; 1611 end = ofs + cnt;
1156 end_ofs = end & ~PAGE_CACHE_MASK; 1612 end_ofs = end & ~PAGE_CACHE_MASK;
1157 /* If the end is outside the inode size return -ESPIPE. */ 1613 /* If the end is outside the inode size return -ESPIPE. */
1158 if (unlikely(end > VFS_I(ni)->i_size)) { 1614 if (unlikely(end > i_size_read(VFS_I(ni)))) {
1159 ntfs_error(vol->sb, "Request exceeds end of attribute."); 1615 ntfs_error(vol->sb, "Request exceeds end of attribute.");
1160 return -ESPIPE; 1616 return -ESPIPE;
1161 } 1617 }
@@ -1256,3 +1712,5 @@ done:
1256 ntfs_debug("Done."); 1712 ntfs_debug("Done.");
1257 return 0; 1713 return 0;
1258} 1714}
1715
1716#endif /* NTFS_RW */
diff --git a/fs/ntfs/attrib.h b/fs/ntfs/attrib.h
index e0c2c6c81bc0..0e4ac6d3c0e7 100644
--- a/fs/ntfs/attrib.h
+++ b/fs/ntfs/attrib.h
@@ -2,7 +2,7 @@
2 * attrib.h - Defines for attribute handling in NTFS Linux kernel driver. 2 * attrib.h - Defines for attribute handling in NTFS Linux kernel driver.
3 * Part of the Linux-NTFS project. 3 * Part of the Linux-NTFS project.
4 * 4 *
5 * Copyright (c) 2001-2004 Anton Altaparmakov 5 * Copyright (c) 2001-2005 Anton Altaparmakov
6 * Copyright (c) 2002 Richard Russon 6 * Copyright (c) 2002 Richard Russon
7 * 7 *
8 * This program/include file is free software; you can redistribute it and/or 8 * This program/include file is free software; you can redistribute it and/or
@@ -60,10 +60,14 @@ typedef struct {
60 ATTR_RECORD *base_attr; 60 ATTR_RECORD *base_attr;
61} ntfs_attr_search_ctx; 61} ntfs_attr_search_ctx;
62 62
63extern int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn);
63extern int ntfs_map_runlist(ntfs_inode *ni, VCN vcn); 64extern int ntfs_map_runlist(ntfs_inode *ni, VCN vcn);
64 65
65extern runlist_element *ntfs_find_vcn(ntfs_inode *ni, const VCN vcn, 66extern LCN ntfs_attr_vcn_to_lcn_nolock(ntfs_inode *ni, const VCN vcn,
66 const BOOL need_write); 67 const BOOL write_locked);
68
69extern runlist_element *ntfs_attr_find_vcn_nolock(ntfs_inode *ni,
70 const VCN vcn, const BOOL write_locked);
67 71
68int ntfs_attr_lookup(const ATTR_TYPE type, const ntfschar *name, 72int ntfs_attr_lookup(const ATTR_TYPE type, const ntfschar *name,
69 const u32 name_len, const IGNORE_CASE_BOOL ic, 73 const u32 name_len, const IGNORE_CASE_BOOL ic,
@@ -85,6 +89,8 @@ extern ntfs_attr_search_ctx *ntfs_attr_get_search_ctx(ntfs_inode *ni,
85 MFT_RECORD *mrec); 89 MFT_RECORD *mrec);
86extern void ntfs_attr_put_search_ctx(ntfs_attr_search_ctx *ctx); 90extern void ntfs_attr_put_search_ctx(ntfs_attr_search_ctx *ctx);
87 91
92#ifdef NTFS_RW
93
88extern int ntfs_attr_size_bounds_check(const ntfs_volume *vol, 94extern int ntfs_attr_size_bounds_check(const ntfs_volume *vol,
89 const ATTR_TYPE type, const s64 size); 95 const ATTR_TYPE type, const s64 size);
90extern int ntfs_attr_can_be_non_resident(const ntfs_volume *vol, 96extern int ntfs_attr_can_be_non_resident(const ntfs_volume *vol,
@@ -94,7 +100,11 @@ extern int ntfs_attr_can_be_resident(const ntfs_volume *vol,
94 100
95extern int ntfs_attr_record_resize(MFT_RECORD *m, ATTR_RECORD *a, u32 new_size); 101extern int ntfs_attr_record_resize(MFT_RECORD *m, ATTR_RECORD *a, u32 new_size);
96 102
103extern int ntfs_attr_make_non_resident(ntfs_inode *ni);
104
97extern int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, 105extern int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt,
98 const u8 val); 106 const u8 val);
99 107
108#endif /* NTFS_RW */
109
100#endif /* _LINUX_NTFS_ATTRIB_H */ 110#endif /* _LINUX_NTFS_ATTRIB_H */
diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c
index ee5ae706f861..6d265cfd49aa 100644
--- a/fs/ntfs/compress.c
+++ b/fs/ntfs/compress.c
@@ -96,13 +96,14 @@ void free_compression_buffers(void)
96/** 96/**
97 * zero_partial_compressed_page - zero out of bounds compressed page region 97 * zero_partial_compressed_page - zero out of bounds compressed page region
98 */ 98 */
99static void zero_partial_compressed_page(ntfs_inode *ni, struct page *page) 99static void zero_partial_compressed_page(struct page *page,
100 const s64 initialized_size)
100{ 101{
101 u8 *kp = page_address(page); 102 u8 *kp = page_address(page);
102 unsigned int kp_ofs; 103 unsigned int kp_ofs;
103 104
104 ntfs_debug("Zeroing page region outside initialized size."); 105 ntfs_debug("Zeroing page region outside initialized size.");
105 if (((s64)page->index << PAGE_CACHE_SHIFT) >= ni->initialized_size) { 106 if (((s64)page->index << PAGE_CACHE_SHIFT) >= initialized_size) {
106 /* 107 /*
107 * FIXME: Using clear_page() will become wrong when we get 108 * FIXME: Using clear_page() will become wrong when we get
108 * PAGE_CACHE_SIZE != PAGE_SIZE but for now there is no problem. 109 * PAGE_CACHE_SIZE != PAGE_SIZE but for now there is no problem.
@@ -110,7 +111,7 @@ static void zero_partial_compressed_page(ntfs_inode *ni, struct page *page)
110 clear_page(kp); 111 clear_page(kp);
111 return; 112 return;
112 } 113 }
113 kp_ofs = ni->initialized_size & ~PAGE_CACHE_MASK; 114 kp_ofs = initialized_size & ~PAGE_CACHE_MASK;
114 memset(kp + kp_ofs, 0, PAGE_CACHE_SIZE - kp_ofs); 115 memset(kp + kp_ofs, 0, PAGE_CACHE_SIZE - kp_ofs);
115 return; 116 return;
116} 117}
@@ -118,12 +119,12 @@ static void zero_partial_compressed_page(ntfs_inode *ni, struct page *page)
118/** 119/**
119 * handle_bounds_compressed_page - test for&handle out of bounds compressed page 120 * handle_bounds_compressed_page - test for&handle out of bounds compressed page
120 */ 121 */
121static inline void handle_bounds_compressed_page(ntfs_inode *ni, 122static inline void handle_bounds_compressed_page(struct page *page,
122 struct page *page) 123 const loff_t i_size, const s64 initialized_size)
123{ 124{
124 if ((page->index >= (ni->initialized_size >> PAGE_CACHE_SHIFT)) && 125 if ((page->index >= (initialized_size >> PAGE_CACHE_SHIFT)) &&
125 (ni->initialized_size < VFS_I(ni)->i_size)) 126 (initialized_size < i_size))
126 zero_partial_compressed_page(ni, page); 127 zero_partial_compressed_page(page, initialized_size);
127 return; 128 return;
128} 129}
129 130
@@ -138,6 +139,8 @@ static inline void handle_bounds_compressed_page(ntfs_inode *ni,
138 * @xpage_done: set to 1 if xpage was completed successfully (IN/OUT) 139 * @xpage_done: set to 1 if xpage was completed successfully (IN/OUT)
139 * @cb_start: compression block to decompress (IN) 140 * @cb_start: compression block to decompress (IN)
140 * @cb_size: size of compression block @cb_start in bytes (IN) 141 * @cb_size: size of compression block @cb_start in bytes (IN)
142 * @i_size: file size when we started the read (IN)
143 * @initialized_size: initialized file size when we started the read (IN)
141 * 144 *
142 * The caller must have disabled preemption. ntfs_decompress() reenables it when 145 * The caller must have disabled preemption. ntfs_decompress() reenables it when
143 * the critical section is finished. 146 * the critical section is finished.
@@ -165,7 +168,8 @@ static inline void handle_bounds_compressed_page(ntfs_inode *ni,
165static int ntfs_decompress(struct page *dest_pages[], int *dest_index, 168static int ntfs_decompress(struct page *dest_pages[], int *dest_index,
166 int *dest_ofs, const int dest_max_index, const int dest_max_ofs, 169 int *dest_ofs, const int dest_max_index, const int dest_max_ofs,
167 const int xpage, char *xpage_done, u8 *const cb_start, 170 const int xpage, char *xpage_done, u8 *const cb_start,
168 const u32 cb_size) 171 const u32 cb_size, const loff_t i_size,
172 const s64 initialized_size)
169{ 173{
170 /* 174 /*
171 * Pointers into the compressed data, i.e. the compression block (cb), 175 * Pointers into the compressed data, i.e. the compression block (cb),
@@ -219,9 +223,6 @@ return_error:
219 spin_unlock(&ntfs_cb_lock); 223 spin_unlock(&ntfs_cb_lock);
220 /* Second stage: finalize completed pages. */ 224 /* Second stage: finalize completed pages. */
221 if (nr_completed_pages > 0) { 225 if (nr_completed_pages > 0) {
222 struct page *page = dest_pages[completed_pages[0]];
223 ntfs_inode *ni = NTFS_I(page->mapping->host);
224
225 for (i = 0; i < nr_completed_pages; i++) { 226 for (i = 0; i < nr_completed_pages; i++) {
226 int di = completed_pages[i]; 227 int di = completed_pages[i];
227 228
@@ -230,7 +231,8 @@ return_error:
230 * If we are outside the initialized size, zero 231 * If we are outside the initialized size, zero
231 * the out of bounds page range. 232 * the out of bounds page range.
232 */ 233 */
233 handle_bounds_compressed_page(ni, dp); 234 handle_bounds_compressed_page(dp, i_size,
235 initialized_size);
234 flush_dcache_page(dp); 236 flush_dcache_page(dp);
235 kunmap(dp); 237 kunmap(dp);
236 SetPageUptodate(dp); 238 SetPageUptodate(dp);
@@ -478,12 +480,14 @@ return_overflow:
478 */ 480 */
479int ntfs_read_compressed_block(struct page *page) 481int ntfs_read_compressed_block(struct page *page)
480{ 482{
483 loff_t i_size;
484 s64 initialized_size;
481 struct address_space *mapping = page->mapping; 485 struct address_space *mapping = page->mapping;
482 ntfs_inode *ni = NTFS_I(mapping->host); 486 ntfs_inode *ni = NTFS_I(mapping->host);
483 ntfs_volume *vol = ni->vol; 487 ntfs_volume *vol = ni->vol;
484 struct super_block *sb = vol->sb; 488 struct super_block *sb = vol->sb;
485 runlist_element *rl; 489 runlist_element *rl;
486 unsigned long block_size = sb->s_blocksize; 490 unsigned long flags, block_size = sb->s_blocksize;
487 unsigned char block_size_bits = sb->s_blocksize_bits; 491 unsigned char block_size_bits = sb->s_blocksize_bits;
488 u8 *cb, *cb_pos, *cb_end; 492 u8 *cb, *cb_pos, *cb_end;
489 struct buffer_head **bhs; 493 struct buffer_head **bhs;
@@ -552,8 +556,12 @@ int ntfs_read_compressed_block(struct page *page)
552 * The remaining pages need to be allocated and inserted into the page 556 * The remaining pages need to be allocated and inserted into the page
553 * cache, alignment guarantees keep all the below much simpler. (-8 557 * cache, alignment guarantees keep all the below much simpler. (-8
554 */ 558 */
555 max_page = ((VFS_I(ni)->i_size + PAGE_CACHE_SIZE - 1) >> 559 read_lock_irqsave(&ni->size_lock, flags);
556 PAGE_CACHE_SHIFT) - offset; 560 i_size = i_size_read(VFS_I(ni));
561 initialized_size = ni->initialized_size;
562 read_unlock_irqrestore(&ni->size_lock, flags);
563 max_page = ((i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) -
564 offset;
557 if (nr_pages < max_page) 565 if (nr_pages < max_page)
558 max_page = nr_pages; 566 max_page = nr_pages;
559 for (i = 0; i < max_page; i++, offset++) { 567 for (i = 0; i < max_page; i++, offset++) {
@@ -824,7 +832,8 @@ lock_retry_remap:
824 * If we are outside the initialized size, zero 832 * If we are outside the initialized size, zero
825 * the out of bounds page range. 833 * the out of bounds page range.
826 */ 834 */
827 handle_bounds_compressed_page(ni, page); 835 handle_bounds_compressed_page(page, i_size,
836 initialized_size);
828 flush_dcache_page(page); 837 flush_dcache_page(page);
829 kunmap(page); 838 kunmap(page);
830 SetPageUptodate(page); 839 SetPageUptodate(page);
@@ -847,7 +856,8 @@ lock_retry_remap:
847 ntfs_debug("Found compressed compression block."); 856 ntfs_debug("Found compressed compression block.");
848 err = ntfs_decompress(pages, &cur_page, &cur_ofs, 857 err = ntfs_decompress(pages, &cur_page, &cur_ofs,
849 cb_max_page, cb_max_ofs, xpage, &xpage_done, 858 cb_max_page, cb_max_ofs, xpage, &xpage_done,
850 cb_pos, cb_size - (cb_pos - cb)); 859 cb_pos, cb_size - (cb_pos - cb), i_size,
860 initialized_size);
851 /* 861 /*
852 * We can sleep from now on, lock already dropped by 862 * We can sleep from now on, lock already dropped by
853 * ntfs_decompress(). 863 * ntfs_decompress().
diff --git a/fs/ntfs/debug.c b/fs/ntfs/debug.c
index 6fb6bb5e3723..807150e2c2b9 100644
--- a/fs/ntfs/debug.c
+++ b/fs/ntfs/debug.c
@@ -164,14 +164,17 @@ void ntfs_debug_dump_runlist(const runlist_element *rl)
164 if (index > -LCN_ENOENT - 1) 164 if (index > -LCN_ENOENT - 1)
165 index = 3; 165 index = 3;
166 printk(KERN_DEBUG "%-16Lx %s %-16Lx%s\n", 166 printk(KERN_DEBUG "%-16Lx %s %-16Lx%s\n",
167 (rl + i)->vcn, lcn_str[index], 167 (long long)(rl + i)->vcn, lcn_str[index],
168 (rl + i)->length, (rl + i)->length ? 168 (long long)(rl + i)->length,
169 "" : " (runlist end)"); 169 (rl + i)->length ? "" :
170 " (runlist end)");
170 } else 171 } else
171 printk(KERN_DEBUG "%-16Lx %-16Lx %-16Lx%s\n", 172 printk(KERN_DEBUG "%-16Lx %-16Lx %-16Lx%s\n",
172 (rl + i)->vcn, (rl + i)->lcn, 173 (long long)(rl + i)->vcn,
173 (rl + i)->length, (rl + i)->length ? 174 (long long)(rl + i)->lcn,
174 "" : " (runlist end)"); 175 (long long)(rl + i)->length,
176 (rl + i)->length ? "" :
177 " (runlist end)");
175 if (!(rl + i)->length) 178 if (!(rl + i)->length)
176 break; 179 break;
177 } 180 }
diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c
index 93577561cdbe..46779471c542 100644
--- a/fs/ntfs/dir.c
+++ b/fs/ntfs/dir.c
@@ -1,7 +1,7 @@
1/** 1/**
2 * dir.c - NTFS kernel directory operations. Part of the Linux-NTFS project. 2 * dir.c - NTFS kernel directory operations. Part of the Linux-NTFS project.
3 * 3 *
4 * Copyright (c) 2001-2004 Anton Altaparmakov 4 * Copyright (c) 2001-2005 Anton Altaparmakov
5 * Copyright (c) 2002 Richard Russon 5 * Copyright (c) 2002 Richard Russon
6 * 6 *
7 * This program/include file is free software; you can redistribute it and/or 7 * This program/include file is free software; you can redistribute it and/or
@@ -183,8 +183,7 @@ found_it:
183 name->len = 0; 183 name->len = 0;
184 *res = name; 184 *res = name;
185 } else { 185 } else {
186 if (name) 186 kfree(name);
187 kfree(name);
188 *res = NULL; 187 *res = NULL;
189 } 188 }
190 mref = le64_to_cpu(ie->data.dir.indexed_file); 189 mref = le64_to_cpu(ie->data.dir.indexed_file);
@@ -444,8 +443,7 @@ found_it2:
444 name->len = 0; 443 name->len = 0;
445 *res = name; 444 *res = name;
446 } else { 445 } else {
447 if (name) 446 kfree(name);
448 kfree(name);
449 *res = NULL; 447 *res = NULL;
450 } 448 }
451 mref = le64_to_cpu(ie->data.dir.indexed_file); 449 mref = le64_to_cpu(ie->data.dir.indexed_file);
@@ -610,7 +608,7 @@ dir_err_out:
610// TODO: (AIA) 608// TODO: (AIA)
611// The algorithm embedded in this code will be required for the time when we 609// The algorithm embedded in this code will be required for the time when we
612// want to support adding of entries to directories, where we require correct 610// want to support adding of entries to directories, where we require correct
613// collation of file names in order not to cause corruption of the file system. 611// collation of file names in order not to cause corruption of the filesystem.
614 612
615/** 613/**
616 * ntfs_lookup_inode_by_name - find an inode in a directory given its name 614 * ntfs_lookup_inode_by_name - find an inode in a directory given its name
@@ -1101,7 +1099,7 @@ static inline int ntfs_filldir(ntfs_volume *vol, loff_t fpos,
1101static int ntfs_readdir(struct file *filp, void *dirent, filldir_t filldir) 1099static int ntfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
1102{ 1100{
1103 s64 ia_pos, ia_start, prev_ia_pos, bmp_pos; 1101 s64 ia_pos, ia_start, prev_ia_pos, bmp_pos;
1104 loff_t fpos; 1102 loff_t fpos, i_size;
1105 struct inode *bmp_vi, *vdir = filp->f_dentry->d_inode; 1103 struct inode *bmp_vi, *vdir = filp->f_dentry->d_inode;
1106 struct super_block *sb = vdir->i_sb; 1104 struct super_block *sb = vdir->i_sb;
1107 ntfs_inode *ndir = NTFS_I(vdir); 1105 ntfs_inode *ndir = NTFS_I(vdir);
@@ -1122,7 +1120,8 @@ static int ntfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
1122 vdir->i_ino, fpos); 1120 vdir->i_ino, fpos);
1123 rc = err = 0; 1121 rc = err = 0;
1124 /* Are we at end of dir yet? */ 1122 /* Are we at end of dir yet? */
1125 if (fpos >= vdir->i_size + vol->mft_record_size) 1123 i_size = i_size_read(vdir);
1124 if (fpos >= i_size + vol->mft_record_size)
1126 goto done; 1125 goto done;
1127 /* Emulate . and .. for all directories. */ 1126 /* Emulate . and .. for all directories. */
1128 if (!fpos) { 1127 if (!fpos) {
@@ -1264,7 +1263,7 @@ skip_index_root:
1264 bmp_mapping = bmp_vi->i_mapping; 1263 bmp_mapping = bmp_vi->i_mapping;
1265 /* Get the starting bitmap bit position and sanity check it. */ 1264 /* Get the starting bitmap bit position and sanity check it. */
1266 bmp_pos = ia_pos >> ndir->itype.index.block_size_bits; 1265 bmp_pos = ia_pos >> ndir->itype.index.block_size_bits;
1267 if (unlikely(bmp_pos >> 3 >= bmp_vi->i_size)) { 1266 if (unlikely(bmp_pos >> 3 >= i_size_read(bmp_vi))) {
1268 ntfs_error(sb, "Current index allocation position exceeds " 1267 ntfs_error(sb, "Current index allocation position exceeds "
1269 "index bitmap size."); 1268 "index bitmap size.");
1270 goto err_out; 1269 goto err_out;
@@ -1301,7 +1300,7 @@ find_next_index_buffer:
1301 goto get_next_bmp_page; 1300 goto get_next_bmp_page;
1302 } 1301 }
1303 /* If we have reached the end of the bitmap, we are done. */ 1302 /* If we have reached the end of the bitmap, we are done. */
1304 if (unlikely(((bmp_pos + cur_bmp_pos) >> 3) >= vdir->i_size)) 1303 if (unlikely(((bmp_pos + cur_bmp_pos) >> 3) >= i_size))
1305 goto unm_EOD; 1304 goto unm_EOD;
1306 ia_pos = (bmp_pos + cur_bmp_pos) << 1305 ia_pos = (bmp_pos + cur_bmp_pos) <<
1307 ndir->itype.index.block_size_bits; 1306 ndir->itype.index.block_size_bits;
@@ -1309,7 +1308,8 @@ find_next_index_buffer:
1309 ntfs_debug("Handling index buffer 0x%llx.", 1308 ntfs_debug("Handling index buffer 0x%llx.",
1310 (unsigned long long)bmp_pos + cur_bmp_pos); 1309 (unsigned long long)bmp_pos + cur_bmp_pos);
1311 /* If the current index buffer is in the same page we reuse the page. */ 1310 /* If the current index buffer is in the same page we reuse the page. */
1312 if ((prev_ia_pos & PAGE_CACHE_MASK) != (ia_pos & PAGE_CACHE_MASK)) { 1311 if ((prev_ia_pos & (s64)PAGE_CACHE_MASK) !=
1312 (ia_pos & (s64)PAGE_CACHE_MASK)) {
1313 prev_ia_pos = ia_pos; 1313 prev_ia_pos = ia_pos;
1314 if (likely(ia_page != NULL)) { 1314 if (likely(ia_page != NULL)) {
1315 unlock_page(ia_page); 1315 unlock_page(ia_page);
@@ -1441,7 +1441,7 @@ unm_EOD:
1441 ntfs_unmap_page(bmp_page); 1441 ntfs_unmap_page(bmp_page);
1442EOD: 1442EOD:
1443 /* We are finished, set fpos to EOD. */ 1443 /* We are finished, set fpos to EOD. */
1444 fpos = vdir->i_size + vol->mft_record_size; 1444 fpos = i_size + vol->mft_record_size;
1445abort: 1445abort:
1446 kfree(name); 1446 kfree(name);
1447done: 1447done:
@@ -1461,10 +1461,8 @@ err_out:
1461 unlock_page(ia_page); 1461 unlock_page(ia_page);
1462 ntfs_unmap_page(ia_page); 1462 ntfs_unmap_page(ia_page);
1463 } 1463 }
1464 if (ir) 1464 kfree(ir);
1465 kfree(ir); 1465 kfree(name);
1466 if (name)
1467 kfree(name);
1468 if (ctx) 1466 if (ctx)
1469 ntfs_attr_put_search_ctx(ctx); 1467 ntfs_attr_put_search_ctx(ctx);
1470 if (m) 1468 if (m)
@@ -1495,7 +1493,7 @@ err_out:
1495static int ntfs_dir_open(struct inode *vi, struct file *filp) 1493static int ntfs_dir_open(struct inode *vi, struct file *filp)
1496{ 1494{
1497 if (sizeof(unsigned long) < 8) { 1495 if (sizeof(unsigned long) < 8) {
1498 if (vi->i_size > MAX_LFS_FILESIZE) 1496 if (i_size_read(vi) > MAX_LFS_FILESIZE)
1499 return -EFBIG; 1497 return -EFBIG;
1500 } 1498 }
1501 return 0; 1499 return 0;
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index db8713ea0d27..e0f530ce6b99 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -47,7 +47,7 @@
47static int ntfs_file_open(struct inode *vi, struct file *filp) 47static int ntfs_file_open(struct inode *vi, struct file *filp)
48{ 48{
49 if (sizeof(unsigned long) < 8) { 49 if (sizeof(unsigned long) < 8) {
50 if (vi->i_size > MAX_LFS_FILESIZE) 50 if (i_size_read(vi) > MAX_LFS_FILESIZE)
51 return -EFBIG; 51 return -EFBIG;
52 } 52 }
53 return generic_file_open(vi, filp); 53 return generic_file_open(vi, filp);
diff --git a/fs/ntfs/index.c b/fs/ntfs/index.c
index 71bd2cd7a4d9..11fd5307d780 100644
--- a/fs/ntfs/index.c
+++ b/fs/ntfs/index.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * index.c - NTFS kernel index handling. Part of the Linux-NTFS project. 2 * index.c - NTFS kernel index handling. Part of the Linux-NTFS project.
3 * 3 *
4 * Copyright (c) 2004 Anton Altaparmakov 4 * Copyright (c) 2004-2005 Anton Altaparmakov
5 * 5 *
6 * This program/include file is free software; you can redistribute it and/or 6 * This program/include file is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as published 7 * modify it under the terms of the GNU General Public License as published
@@ -39,18 +39,8 @@ ntfs_index_context *ntfs_index_ctx_get(ntfs_inode *idx_ni)
39 ntfs_index_context *ictx; 39 ntfs_index_context *ictx;
40 40
41 ictx = kmem_cache_alloc(ntfs_index_ctx_cache, SLAB_NOFS); 41 ictx = kmem_cache_alloc(ntfs_index_ctx_cache, SLAB_NOFS);
42 if (ictx) { 42 if (ictx)
43 ictx->idx_ni = idx_ni; 43 *ictx = (ntfs_index_context){ .idx_ni = idx_ni };
44 ictx->entry = NULL;
45 ictx->data = NULL;
46 ictx->data_len = 0;
47 ictx->is_in_root = 0;
48 ictx->ir = NULL;
49 ictx->actx = NULL;
50 ictx->base_ni = NULL;
51 ictx->ia = NULL;
52 ictx->page = NULL;
53 }
54 return ictx; 44 return ictx;
55} 45}
56 46
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 31840ba0b38c..886214a77f90 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -1,7 +1,7 @@
1/** 1/**
2 * inode.c - NTFS kernel inode handling. Part of the Linux-NTFS project. 2 * inode.c - NTFS kernel inode handling. Part of the Linux-NTFS project.
3 * 3 *
4 * Copyright (c) 2001-2004 Anton Altaparmakov 4 * Copyright (c) 2001-2005 Anton Altaparmakov
5 * 5 *
6 * This program/include file is free software; you can redistribute it and/or 6 * This program/include file is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as published 7 * modify it under the terms of the GNU General Public License as published
@@ -174,7 +174,7 @@ struct inode *ntfs_iget(struct super_block *sb, unsigned long mft_no)
174 174
175 vi = iget5_locked(sb, mft_no, (test_t)ntfs_test_inode, 175 vi = iget5_locked(sb, mft_no, (test_t)ntfs_test_inode,
176 (set_t)ntfs_init_locked_inode, &na); 176 (set_t)ntfs_init_locked_inode, &na);
177 if (!vi) 177 if (unlikely(!vi))
178 return ERR_PTR(-ENOMEM); 178 return ERR_PTR(-ENOMEM);
179 179
180 err = 0; 180 err = 0;
@@ -188,7 +188,7 @@ struct inode *ntfs_iget(struct super_block *sb, unsigned long mft_no)
188 * There is no point in keeping bad inodes around if the failure was 188 * There is no point in keeping bad inodes around if the failure was
189 * due to ENOMEM. We want to be able to retry again later. 189 * due to ENOMEM. We want to be able to retry again later.
190 */ 190 */
191 if (err == -ENOMEM) { 191 if (unlikely(err == -ENOMEM)) {
192 iput(vi); 192 iput(vi);
193 vi = ERR_PTR(err); 193 vi = ERR_PTR(err);
194 } 194 }
@@ -235,7 +235,7 @@ struct inode *ntfs_attr_iget(struct inode *base_vi, ATTR_TYPE type,
235 235
236 vi = iget5_locked(base_vi->i_sb, na.mft_no, (test_t)ntfs_test_inode, 236 vi = iget5_locked(base_vi->i_sb, na.mft_no, (test_t)ntfs_test_inode,
237 (set_t)ntfs_init_locked_inode, &na); 237 (set_t)ntfs_init_locked_inode, &na);
238 if (!vi) 238 if (unlikely(!vi))
239 return ERR_PTR(-ENOMEM); 239 return ERR_PTR(-ENOMEM);
240 240
241 err = 0; 241 err = 0;
@@ -250,7 +250,7 @@ struct inode *ntfs_attr_iget(struct inode *base_vi, ATTR_TYPE type,
250 * simplifies things in that we never need to check for bad attribute 250 * simplifies things in that we never need to check for bad attribute
251 * inodes elsewhere. 251 * inodes elsewhere.
252 */ 252 */
253 if (err) { 253 if (unlikely(err)) {
254 iput(vi); 254 iput(vi);
255 vi = ERR_PTR(err); 255 vi = ERR_PTR(err);
256 } 256 }
@@ -290,7 +290,7 @@ struct inode *ntfs_index_iget(struct inode *base_vi, ntfschar *name,
290 290
291 vi = iget5_locked(base_vi->i_sb, na.mft_no, (test_t)ntfs_test_inode, 291 vi = iget5_locked(base_vi->i_sb, na.mft_no, (test_t)ntfs_test_inode,
292 (set_t)ntfs_init_locked_inode, &na); 292 (set_t)ntfs_init_locked_inode, &na);
293 if (!vi) 293 if (unlikely(!vi))
294 return ERR_PTR(-ENOMEM); 294 return ERR_PTR(-ENOMEM);
295 295
296 err = 0; 296 err = 0;
@@ -305,7 +305,7 @@ struct inode *ntfs_index_iget(struct inode *base_vi, ntfschar *name,
305 * simplifies things in that we never need to check for bad index 305 * simplifies things in that we never need to check for bad index
306 * inodes elsewhere. 306 * inodes elsewhere.
307 */ 307 */
308 if (err) { 308 if (unlikely(err)) {
309 iput(vi); 309 iput(vi);
310 vi = ERR_PTR(err); 310 vi = ERR_PTR(err);
311 } 311 }
@@ -317,8 +317,7 @@ struct inode *ntfs_alloc_big_inode(struct super_block *sb)
317 ntfs_inode *ni; 317 ntfs_inode *ni;
318 318
319 ntfs_debug("Entering."); 319 ntfs_debug("Entering.");
320 ni = (ntfs_inode *)kmem_cache_alloc(ntfs_big_inode_cache, 320 ni = kmem_cache_alloc(ntfs_big_inode_cache, SLAB_NOFS);
321 SLAB_NOFS);
322 if (likely(ni != NULL)) { 321 if (likely(ni != NULL)) {
323 ni->state = 0; 322 ni->state = 0;
324 return VFS_I(ni); 323 return VFS_I(ni);
@@ -343,7 +342,7 @@ static inline ntfs_inode *ntfs_alloc_extent_inode(void)
343 ntfs_inode *ni; 342 ntfs_inode *ni;
344 343
345 ntfs_debug("Entering."); 344 ntfs_debug("Entering.");
346 ni = (ntfs_inode *)kmem_cache_alloc(ntfs_inode_cache, SLAB_NOFS); 345 ni = kmem_cache_alloc(ntfs_inode_cache, SLAB_NOFS);
347 if (likely(ni != NULL)) { 346 if (likely(ni != NULL)) {
348 ni->state = 0; 347 ni->state = 0;
349 return ni; 348 return ni;
@@ -376,6 +375,7 @@ static void ntfs_destroy_extent_inode(ntfs_inode *ni)
376void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni) 375void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni)
377{ 376{
378 ntfs_debug("Entering."); 377 ntfs_debug("Entering.");
378 rwlock_init(&ni->size_lock);
379 ni->initialized_size = ni->allocated_size = 0; 379 ni->initialized_size = ni->allocated_size = 0;
380 ni->seq_no = 0; 380 ni->seq_no = 0;
381 atomic_set(&ni->count, 1); 381 atomic_set(&ni->count, 1);
@@ -524,6 +524,7 @@ static int ntfs_read_locked_inode(struct inode *vi)
524 ntfs_volume *vol = NTFS_SB(vi->i_sb); 524 ntfs_volume *vol = NTFS_SB(vi->i_sb);
525 ntfs_inode *ni; 525 ntfs_inode *ni;
526 MFT_RECORD *m; 526 MFT_RECORD *m;
527 ATTR_RECORD *a;
527 STANDARD_INFORMATION *si; 528 STANDARD_INFORMATION *si;
528 ntfs_attr_search_ctx *ctx; 529 ntfs_attr_search_ctx *ctx;
529 int err = 0; 530 int err = 0;
@@ -632,9 +633,10 @@ static int ntfs_read_locked_inode(struct inode *vi)
632 } 633 }
633 goto unm_err_out; 634 goto unm_err_out;
634 } 635 }
636 a = ctx->attr;
635 /* Get the standard information attribute value. */ 637 /* Get the standard information attribute value. */
636 si = (STANDARD_INFORMATION*)((char*)ctx->attr + 638 si = (STANDARD_INFORMATION*)((u8*)a +
637 le16_to_cpu(ctx->attr->data.resident.value_offset)); 639 le16_to_cpu(a->data.resident.value_offset));
638 640
639 /* Transfer information from the standard information into vi. */ 641 /* Transfer information from the standard information into vi. */
640 /* 642 /*
@@ -673,15 +675,16 @@ static int ntfs_read_locked_inode(struct inode *vi)
673 goto skip_attr_list_load; 675 goto skip_attr_list_load;
674 ntfs_debug("Attribute list found in inode 0x%lx.", vi->i_ino); 676 ntfs_debug("Attribute list found in inode 0x%lx.", vi->i_ino);
675 NInoSetAttrList(ni); 677 NInoSetAttrList(ni);
676 if (ctx->attr->flags & ATTR_IS_ENCRYPTED || 678 a = ctx->attr;
677 ctx->attr->flags & ATTR_COMPRESSION_MASK || 679 if (a->flags & ATTR_IS_ENCRYPTED ||
678 ctx->attr->flags & ATTR_IS_SPARSE) { 680 a->flags & ATTR_COMPRESSION_MASK ||
681 a->flags & ATTR_IS_SPARSE) {
679 ntfs_error(vi->i_sb, "Attribute list attribute is " 682 ntfs_error(vi->i_sb, "Attribute list attribute is "
680 "compressed/encrypted/sparse."); 683 "compressed/encrypted/sparse.");
681 goto unm_err_out; 684 goto unm_err_out;
682 } 685 }
683 /* Now allocate memory for the attribute list. */ 686 /* Now allocate memory for the attribute list. */
684 ni->attr_list_size = (u32)ntfs_attr_size(ctx->attr); 687 ni->attr_list_size = (u32)ntfs_attr_size(a);
685 ni->attr_list = ntfs_malloc_nofs(ni->attr_list_size); 688 ni->attr_list = ntfs_malloc_nofs(ni->attr_list_size);
686 if (!ni->attr_list) { 689 if (!ni->attr_list) {
687 ntfs_error(vi->i_sb, "Not enough memory to allocate " 690 ntfs_error(vi->i_sb, "Not enough memory to allocate "
@@ -689,9 +692,9 @@ static int ntfs_read_locked_inode(struct inode *vi)
689 err = -ENOMEM; 692 err = -ENOMEM;
690 goto unm_err_out; 693 goto unm_err_out;
691 } 694 }
692 if (ctx->attr->non_resident) { 695 if (a->non_resident) {
693 NInoSetAttrListNonResident(ni); 696 NInoSetAttrListNonResident(ni);
694 if (ctx->attr->data.non_resident.lowest_vcn) { 697 if (a->data.non_resident.lowest_vcn) {
695 ntfs_error(vi->i_sb, "Attribute list has non " 698 ntfs_error(vi->i_sb, "Attribute list has non "
696 "zero lowest_vcn."); 699 "zero lowest_vcn.");
697 goto unm_err_out; 700 goto unm_err_out;
@@ -701,7 +704,7 @@ static int ntfs_read_locked_inode(struct inode *vi)
701 * exclusive access to the inode at this time. 704 * exclusive access to the inode at this time.
702 */ 705 */
703 ni->attr_list_rl.rl = ntfs_mapping_pairs_decompress(vol, 706 ni->attr_list_rl.rl = ntfs_mapping_pairs_decompress(vol,
704 ctx->attr, NULL); 707 a, NULL);
705 if (IS_ERR(ni->attr_list_rl.rl)) { 708 if (IS_ERR(ni->attr_list_rl.rl)) {
706 err = PTR_ERR(ni->attr_list_rl.rl); 709 err = PTR_ERR(ni->attr_list_rl.rl);
707 ni->attr_list_rl.rl = NULL; 710 ni->attr_list_rl.rl = NULL;
@@ -712,27 +715,26 @@ static int ntfs_read_locked_inode(struct inode *vi)
712 /* Now load the attribute list. */ 715 /* Now load the attribute list. */
713 if ((err = load_attribute_list(vol, &ni->attr_list_rl, 716 if ((err = load_attribute_list(vol, &ni->attr_list_rl,
714 ni->attr_list, ni->attr_list_size, 717 ni->attr_list, ni->attr_list_size,
715 sle64_to_cpu(ctx->attr->data. 718 sle64_to_cpu(a->data.non_resident.
716 non_resident.initialized_size)))) { 719 initialized_size)))) {
717 ntfs_error(vi->i_sb, "Failed to load " 720 ntfs_error(vi->i_sb, "Failed to load "
718 "attribute list attribute."); 721 "attribute list attribute.");
719 goto unm_err_out; 722 goto unm_err_out;
720 } 723 }
721 } else /* if (!ctx.attr->non_resident) */ { 724 } else /* if (!a->non_resident) */ {
722 if ((u8*)ctx->attr + le16_to_cpu( 725 if ((u8*)a + le16_to_cpu(a->data.resident.value_offset)
723 ctx->attr->data.resident.value_offset) + 726 + le32_to_cpu(
724 le32_to_cpu( 727 a->data.resident.value_length) >
725 ctx->attr->data.resident.value_length) >
726 (u8*)ctx->mrec + vol->mft_record_size) { 728 (u8*)ctx->mrec + vol->mft_record_size) {
727 ntfs_error(vi->i_sb, "Corrupt attribute list " 729 ntfs_error(vi->i_sb, "Corrupt attribute list "
728 "in inode."); 730 "in inode.");
729 goto unm_err_out; 731 goto unm_err_out;
730 } 732 }
731 /* Now copy the attribute list. */ 733 /* Now copy the attribute list. */
732 memcpy(ni->attr_list, (u8*)ctx->attr + le16_to_cpu( 734 memcpy(ni->attr_list, (u8*)a + le16_to_cpu(
733 ctx->attr->data.resident.value_offset), 735 a->data.resident.value_offset),
734 le32_to_cpu( 736 le32_to_cpu(
735 ctx->attr->data.resident.value_length)); 737 a->data.resident.value_length));
736 } 738 }
737 } 739 }
738skip_attr_list_load: 740skip_attr_list_load:
@@ -741,10 +743,11 @@ skip_attr_list_load:
741 * in ntfs_ino->attr_list and it is ntfs_ino->attr_list_size bytes. 743 * in ntfs_ino->attr_list and it is ntfs_ino->attr_list_size bytes.
742 */ 744 */
743 if (S_ISDIR(vi->i_mode)) { 745 if (S_ISDIR(vi->i_mode)) {
746 loff_t bvi_size;
744 struct inode *bvi; 747 struct inode *bvi;
745 ntfs_inode *bni; 748 ntfs_inode *bni;
746 INDEX_ROOT *ir; 749 INDEX_ROOT *ir;
747 char *ir_end, *index_end; 750 u8 *ir_end, *index_end;
748 751
749 /* It is a directory, find index root attribute. */ 752 /* It is a directory, find index root attribute. */
750 ntfs_attr_reinit_search_ctx(ctx); 753 ntfs_attr_reinit_search_ctx(ctx);
@@ -760,17 +763,16 @@ skip_attr_list_load:
760 } 763 }
761 goto unm_err_out; 764 goto unm_err_out;
762 } 765 }
766 a = ctx->attr;
763 /* Set up the state. */ 767 /* Set up the state. */
764 if (unlikely(ctx->attr->non_resident)) { 768 if (unlikely(a->non_resident)) {
765 ntfs_error(vol->sb, "$INDEX_ROOT attribute is not " 769 ntfs_error(vol->sb, "$INDEX_ROOT attribute is not "
766 "resident."); 770 "resident.");
767 goto unm_err_out; 771 goto unm_err_out;
768 } 772 }
769 /* Ensure the attribute name is placed before the value. */ 773 /* Ensure the attribute name is placed before the value. */
770 if (unlikely(ctx->attr->name_length && 774 if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
771 (le16_to_cpu(ctx->attr->name_offset) >= 775 le16_to_cpu(a->data.resident.value_offset)))) {
772 le16_to_cpu(ctx->attr->data.resident.
773 value_offset)))) {
774 ntfs_error(vol->sb, "$INDEX_ROOT attribute name is " 776 ntfs_error(vol->sb, "$INDEX_ROOT attribute name is "
775 "placed after the attribute value."); 777 "placed after the attribute value.");
776 goto unm_err_out; 778 goto unm_err_out;
@@ -781,28 +783,27 @@ skip_attr_list_load:
781 * encrypted. However index root cannot be both compressed and 783 * encrypted. However index root cannot be both compressed and
782 * encrypted. 784 * encrypted.
783 */ 785 */
784 if (ctx->attr->flags & ATTR_COMPRESSION_MASK) 786 if (a->flags & ATTR_COMPRESSION_MASK)
785 NInoSetCompressed(ni); 787 NInoSetCompressed(ni);
786 if (ctx->attr->flags & ATTR_IS_ENCRYPTED) { 788 if (a->flags & ATTR_IS_ENCRYPTED) {
787 if (ctx->attr->flags & ATTR_COMPRESSION_MASK) { 789 if (a->flags & ATTR_COMPRESSION_MASK) {
788 ntfs_error(vi->i_sb, "Found encrypted and " 790 ntfs_error(vi->i_sb, "Found encrypted and "
789 "compressed attribute."); 791 "compressed attribute.");
790 goto unm_err_out; 792 goto unm_err_out;
791 } 793 }
792 NInoSetEncrypted(ni); 794 NInoSetEncrypted(ni);
793 } 795 }
794 if (ctx->attr->flags & ATTR_IS_SPARSE) 796 if (a->flags & ATTR_IS_SPARSE)
795 NInoSetSparse(ni); 797 NInoSetSparse(ni);
796 ir = (INDEX_ROOT*)((char*)ctx->attr + le16_to_cpu( 798 ir = (INDEX_ROOT*)((u8*)a +
797 ctx->attr->data.resident.value_offset)); 799 le16_to_cpu(a->data.resident.value_offset));
798 ir_end = (char*)ir + le32_to_cpu( 800 ir_end = (u8*)ir + le32_to_cpu(a->data.resident.value_length);
799 ctx->attr->data.resident.value_length); 801 if (ir_end > (u8*)ctx->mrec + vol->mft_record_size) {
800 if (ir_end > (char*)ctx->mrec + vol->mft_record_size) {
801 ntfs_error(vi->i_sb, "$INDEX_ROOT attribute is " 802 ntfs_error(vi->i_sb, "$INDEX_ROOT attribute is "
802 "corrupt."); 803 "corrupt.");
803 goto unm_err_out; 804 goto unm_err_out;
804 } 805 }
805 index_end = (char*)&ir->index + 806 index_end = (u8*)&ir->index +
806 le32_to_cpu(ir->index.index_length); 807 le32_to_cpu(ir->index.index_length);
807 if (index_end > ir_end) { 808 if (index_end > ir_end) {
808 ntfs_error(vi->i_sb, "Directory index is corrupt."); 809 ntfs_error(vi->i_sb, "Directory index is corrupt.");
@@ -889,7 +890,8 @@ skip_attr_list_load:
889 "attribute."); 890 "attribute.");
890 goto unm_err_out; 891 goto unm_err_out;
891 } 892 }
892 if (!ctx->attr->non_resident) { 893 a = ctx->attr;
894 if (!a->non_resident) {
893 ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute " 895 ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute "
894 "is resident."); 896 "is resident.");
895 goto unm_err_out; 897 goto unm_err_out;
@@ -898,42 +900,40 @@ skip_attr_list_load:
898 * Ensure the attribute name is placed before the mapping pairs 900 * Ensure the attribute name is placed before the mapping pairs
899 * array. 901 * array.
900 */ 902 */
901 if (unlikely(ctx->attr->name_length && 903 if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
902 (le16_to_cpu(ctx->attr->name_offset) >= 904 le16_to_cpu(
903 le16_to_cpu(ctx->attr->data.non_resident. 905 a->data.non_resident.mapping_pairs_offset)))) {
904 mapping_pairs_offset)))) {
905 ntfs_error(vol->sb, "$INDEX_ALLOCATION attribute name " 906 ntfs_error(vol->sb, "$INDEX_ALLOCATION attribute name "
906 "is placed after the mapping pairs " 907 "is placed after the mapping pairs "
907 "array."); 908 "array.");
908 goto unm_err_out; 909 goto unm_err_out;
909 } 910 }
910 if (ctx->attr->flags & ATTR_IS_ENCRYPTED) { 911 if (a->flags & ATTR_IS_ENCRYPTED) {
911 ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute " 912 ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute "
912 "is encrypted."); 913 "is encrypted.");
913 goto unm_err_out; 914 goto unm_err_out;
914 } 915 }
915 if (ctx->attr->flags & ATTR_IS_SPARSE) { 916 if (a->flags & ATTR_IS_SPARSE) {
916 ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute " 917 ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute "
917 "is sparse."); 918 "is sparse.");
918 goto unm_err_out; 919 goto unm_err_out;
919 } 920 }
920 if (ctx->attr->flags & ATTR_COMPRESSION_MASK) { 921 if (a->flags & ATTR_COMPRESSION_MASK) {
921 ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute " 922 ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute "
922 "is compressed."); 923 "is compressed.");
923 goto unm_err_out; 924 goto unm_err_out;
924 } 925 }
925 if (ctx->attr->data.non_resident.lowest_vcn) { 926 if (a->data.non_resident.lowest_vcn) {
926 ntfs_error(vi->i_sb, "First extent of " 927 ntfs_error(vi->i_sb, "First extent of "
927 "$INDEX_ALLOCATION attribute has non " 928 "$INDEX_ALLOCATION attribute has non "
928 "zero lowest_vcn."); 929 "zero lowest_vcn.");
929 goto unm_err_out; 930 goto unm_err_out;
930 } 931 }
931 vi->i_size = sle64_to_cpu( 932 vi->i_size = sle64_to_cpu(a->data.non_resident.data_size);
932 ctx->attr->data.non_resident.data_size);
933 ni->initialized_size = sle64_to_cpu( 933 ni->initialized_size = sle64_to_cpu(
934 ctx->attr->data.non_resident.initialized_size); 934 a->data.non_resident.initialized_size);
935 ni->allocated_size = sle64_to_cpu( 935 ni->allocated_size = sle64_to_cpu(
936 ctx->attr->data.non_resident.allocated_size); 936 a->data.non_resident.allocated_size);
937 /* 937 /*
938 * We are done with the mft record, so we release it. Otherwise 938 * We are done with the mft record, so we release it. Otherwise
939 * we would deadlock in ntfs_attr_iget(). 939 * we would deadlock in ntfs_attr_iget().
@@ -958,11 +958,12 @@ skip_attr_list_load:
958 goto unm_err_out; 958 goto unm_err_out;
959 } 959 }
960 /* Consistency check bitmap size vs. index allocation size. */ 960 /* Consistency check bitmap size vs. index allocation size. */
961 if ((bvi->i_size << 3) < (vi->i_size >> 961 bvi_size = i_size_read(bvi);
962 if ((bvi_size << 3) < (vi->i_size >>
962 ni->itype.index.block_size_bits)) { 963 ni->itype.index.block_size_bits)) {
963 ntfs_error(vi->i_sb, "Index bitmap too small (0x%llx) " 964 ntfs_error(vi->i_sb, "Index bitmap too small (0x%llx) "
964 "for index allocation (0x%llx).", 965 "for index allocation (0x%llx).",
965 bvi->i_size << 3, vi->i_size); 966 bvi_size << 3, vi->i_size);
966 goto unm_err_out; 967 goto unm_err_out;
967 } 968 }
968skip_large_dir_stuff: 969skip_large_dir_stuff:
@@ -1010,87 +1011,92 @@ skip_large_dir_stuff:
1010 ntfs_error(vi->i_sb, "$DATA attribute is missing."); 1011 ntfs_error(vi->i_sb, "$DATA attribute is missing.");
1011 goto unm_err_out; 1012 goto unm_err_out;
1012 } 1013 }
1014 a = ctx->attr;
1013 /* Setup the state. */ 1015 /* Setup the state. */
1014 if (ctx->attr->non_resident) { 1016 if (a->non_resident) {
1015 NInoSetNonResident(ni); 1017 NInoSetNonResident(ni);
1016 if (ctx->attr->flags & ATTR_COMPRESSION_MASK) { 1018 if (a->flags & (ATTR_COMPRESSION_MASK |
1017 NInoSetCompressed(ni); 1019 ATTR_IS_SPARSE)) {
1018 if (vol->cluster_size > 4096) { 1020 if (a->flags & ATTR_COMPRESSION_MASK) {
1019 ntfs_error(vi->i_sb, "Found " 1021 NInoSetCompressed(ni);
1020 "compressed data but " 1022 if (vol->cluster_size > 4096) {
1021 "compression is disabled due " 1023 ntfs_error(vi->i_sb, "Found "
1022 "to cluster size (%i) > 4kiB.", 1024 "compressed data but "
1023 vol->cluster_size); 1025 "compression is "
1024 goto unm_err_out; 1026 "disabled due to "
1025 } 1027 "cluster size (%i) > "
1026 if ((ctx->attr->flags & ATTR_COMPRESSION_MASK) 1028 "4kiB.",
1027 != ATTR_IS_COMPRESSED) { 1029 vol->cluster_size);
1028 ntfs_error(vi->i_sb, "Found " 1030 goto unm_err_out;
1029 "unknown compression method or " 1031 }
1030 "corrupt file."); 1032 if ((a->flags & ATTR_COMPRESSION_MASK)
1031 goto unm_err_out; 1033 != ATTR_IS_COMPRESSED) {
1034 ntfs_error(vi->i_sb, "Found "
1035 "unknown compression "
1036 "method or corrupt "
1037 "file.");
1038 goto unm_err_out;
1039 }
1032 } 1040 }
1033 ni->itype.compressed.block_clusters = 1U << 1041 if (a->flags & ATTR_IS_SPARSE)
1034 ctx->attr->data.non_resident. 1042 NInoSetSparse(ni);
1035 compression_unit; 1043 if (a->data.non_resident.compression_unit !=
1036 if (ctx->attr->data.non_resident. 1044 4) {
1037 compression_unit != 4) {
1038 ntfs_error(vi->i_sb, "Found " 1045 ntfs_error(vi->i_sb, "Found "
1039 "nonstandard compression unit " 1046 "nonstandard compression unit "
1040 "(%u instead of 4). Cannot " 1047 "(%u instead of 4). Cannot "
1041 "handle this.", 1048 "handle this.",
1042 ctx->attr->data.non_resident. 1049 a->data.non_resident.
1043 compression_unit); 1050 compression_unit);
1044 err = -EOPNOTSUPP; 1051 err = -EOPNOTSUPP;
1045 goto unm_err_out; 1052 goto unm_err_out;
1046 } 1053 }
1054 ni->itype.compressed.block_clusters = 1U <<
1055 a->data.non_resident.
1056 compression_unit;
1047 ni->itype.compressed.block_size = 1U << ( 1057 ni->itype.compressed.block_size = 1U << (
1048 ctx->attr->data.non_resident. 1058 a->data.non_resident.
1049 compression_unit + 1059 compression_unit +
1050 vol->cluster_size_bits); 1060 vol->cluster_size_bits);
1051 ni->itype.compressed.block_size_bits = ffs( 1061 ni->itype.compressed.block_size_bits = ffs(
1052 ni->itype.compressed.block_size) - 1; 1062 ni->itype.compressed.
1063 block_size) - 1;
1064 ni->itype.compressed.size = sle64_to_cpu(
1065 a->data.non_resident.
1066 compressed_size);
1053 } 1067 }
1054 if (ctx->attr->flags & ATTR_IS_ENCRYPTED) { 1068 if (a->flags & ATTR_IS_ENCRYPTED) {
1055 if (ctx->attr->flags & ATTR_COMPRESSION_MASK) { 1069 if (a->flags & ATTR_COMPRESSION_MASK) {
1056 ntfs_error(vi->i_sb, "Found encrypted " 1070 ntfs_error(vi->i_sb, "Found encrypted "
1057 "and compressed data."); 1071 "and compressed data.");
1058 goto unm_err_out; 1072 goto unm_err_out;
1059 } 1073 }
1060 NInoSetEncrypted(ni); 1074 NInoSetEncrypted(ni);
1061 } 1075 }
1062 if (ctx->attr->flags & ATTR_IS_SPARSE) 1076 if (a->data.non_resident.lowest_vcn) {
1063 NInoSetSparse(ni);
1064 if (ctx->attr->data.non_resident.lowest_vcn) {
1065 ntfs_error(vi->i_sb, "First extent of $DATA " 1077 ntfs_error(vi->i_sb, "First extent of $DATA "
1066 "attribute has non zero " 1078 "attribute has non zero "
1067 "lowest_vcn."); 1079 "lowest_vcn.");
1068 goto unm_err_out; 1080 goto unm_err_out;
1069 } 1081 }
1070 /* Setup all the sizes. */
1071 vi->i_size = sle64_to_cpu( 1082 vi->i_size = sle64_to_cpu(
1072 ctx->attr->data.non_resident.data_size); 1083 a->data.non_resident.data_size);
1073 ni->initialized_size = sle64_to_cpu( 1084 ni->initialized_size = sle64_to_cpu(
1074 ctx->attr->data.non_resident. 1085 a->data.non_resident.initialized_size);
1075 initialized_size);
1076 ni->allocated_size = sle64_to_cpu( 1086 ni->allocated_size = sle64_to_cpu(
1077 ctx->attr->data.non_resident. 1087 a->data.non_resident.allocated_size);
1078 allocated_size);
1079 if (NInoCompressed(ni)) {
1080 ni->itype.compressed.size = sle64_to_cpu(
1081 ctx->attr->data.non_resident.
1082 compressed_size);
1083 }
1084 } else { /* Resident attribute. */ 1088 } else { /* Resident attribute. */
1085 /* 1089 vi->i_size = ni->initialized_size = le32_to_cpu(
1086 * Make all sizes equal for simplicity in read code 1090 a->data.resident.value_length);
1087 * paths. FIXME: Need to keep this in mind when 1091 ni->allocated_size = le32_to_cpu(a->length) -
1088 * converting to non-resident attribute in write code 1092 le16_to_cpu(
1089 * path. (Probably only affects truncate().) 1093 a->data.resident.value_offset);
1090 */ 1094 if (vi->i_size > ni->allocated_size) {
1091 vi->i_size = ni->initialized_size = ni->allocated_size = 1095 ntfs_error(vi->i_sb, "Resident data attribute "
1092 le32_to_cpu( 1096 "is corrupt (size exceeds "
1093 ctx->attr->data.resident.value_length); 1097 "allocation).");
1098 goto unm_err_out;
1099 }
1094 } 1100 }
1095no_data_attr_special_case: 1101no_data_attr_special_case:
1096 /* We are done with the mft record, so we release it. */ 1102 /* We are done with the mft record, so we release it. */
@@ -1117,11 +1123,10 @@ no_data_attr_special_case:
1117 * sizes of all non-resident attributes present to give us the Linux 1123 * sizes of all non-resident attributes present to give us the Linux
1118 * correct size that should go into i_blocks (after division by 512). 1124 * correct size that should go into i_blocks (after division by 512).
1119 */ 1125 */
1120 if (S_ISDIR(vi->i_mode) || !NInoCompressed(ni)) 1126 if (S_ISREG(vi->i_mode) && (NInoCompressed(ni) || NInoSparse(ni)))
1121 vi->i_blocks = ni->allocated_size >> 9;
1122 else
1123 vi->i_blocks = ni->itype.compressed.size >> 9; 1127 vi->i_blocks = ni->itype.compressed.size >> 9;
1124 1128 else
1129 vi->i_blocks = ni->allocated_size >> 9;
1125 ntfs_debug("Done."); 1130 ntfs_debug("Done.");
1126 return 0; 1131 return 0;
1127 1132
@@ -1166,6 +1171,7 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
1166 ntfs_volume *vol = NTFS_SB(vi->i_sb); 1171 ntfs_volume *vol = NTFS_SB(vi->i_sb);
1167 ntfs_inode *ni, *base_ni; 1172 ntfs_inode *ni, *base_ni;
1168 MFT_RECORD *m; 1173 MFT_RECORD *m;
1174 ATTR_RECORD *a;
1169 ntfs_attr_search_ctx *ctx; 1175 ntfs_attr_search_ctx *ctx;
1170 int err = 0; 1176 int err = 0;
1171 1177
@@ -1200,24 +1206,21 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
1200 err = -ENOMEM; 1206 err = -ENOMEM;
1201 goto unm_err_out; 1207 goto unm_err_out;
1202 } 1208 }
1203
1204 /* Find the attribute. */ 1209 /* Find the attribute. */
1205 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 1210 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
1206 CASE_SENSITIVE, 0, NULL, 0, ctx); 1211 CASE_SENSITIVE, 0, NULL, 0, ctx);
1207 if (unlikely(err)) 1212 if (unlikely(err))
1208 goto unm_err_out; 1213 goto unm_err_out;
1209 1214 a = ctx->attr;
1210 if (!ctx->attr->non_resident) { 1215 if (!a->non_resident) {
1211 /* Ensure the attribute name is placed before the value. */ 1216 /* Ensure the attribute name is placed before the value. */
1212 if (unlikely(ctx->attr->name_length && 1217 if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
1213 (le16_to_cpu(ctx->attr->name_offset) >= 1218 le16_to_cpu(a->data.resident.value_offset)))) {
1214 le16_to_cpu(ctx->attr->data.resident.
1215 value_offset)))) {
1216 ntfs_error(vol->sb, "Attribute name is placed after " 1219 ntfs_error(vol->sb, "Attribute name is placed after "
1217 "the attribute value."); 1220 "the attribute value.");
1218 goto unm_err_out; 1221 goto unm_err_out;
1219 } 1222 }
1220 if (NInoMstProtected(ni) || ctx->attr->flags) { 1223 if (NInoMstProtected(ni) || a->flags) {
1221 ntfs_error(vi->i_sb, "Found mst protected attribute " 1224 ntfs_error(vi->i_sb, "Found mst protected attribute "
1222 "or attribute with non-zero flags but " 1225 "or attribute with non-zero flags but "
1223 "the attribute is resident. Please " 1226 "the attribute is resident. Please "
@@ -1225,85 +1228,95 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
1225 "linux-ntfs-dev@lists.sourceforge.net"); 1228 "linux-ntfs-dev@lists.sourceforge.net");
1226 goto unm_err_out; 1229 goto unm_err_out;
1227 } 1230 }
1228 /* 1231 vi->i_size = ni->initialized_size = le32_to_cpu(
1229 * Resident attribute. Make all sizes equal for simplicity in 1232 a->data.resident.value_length);
1230 * read code paths. 1233 ni->allocated_size = le32_to_cpu(a->length) -
1231 */ 1234 le16_to_cpu(a->data.resident.value_offset);
1232 vi->i_size = ni->initialized_size = ni->allocated_size = 1235 if (vi->i_size > ni->allocated_size) {
1233 le32_to_cpu(ctx->attr->data.resident.value_length); 1236 ntfs_error(vi->i_sb, "Resident attribute is corrupt "
1237 "(size exceeds allocation).");
1238 goto unm_err_out;
1239 }
1234 } else { 1240 } else {
1235 NInoSetNonResident(ni); 1241 NInoSetNonResident(ni);
1236 /* 1242 /*
1237 * Ensure the attribute name is placed before the mapping pairs 1243 * Ensure the attribute name is placed before the mapping pairs
1238 * array. 1244 * array.
1239 */ 1245 */
1240 if (unlikely(ctx->attr->name_length && 1246 if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
1241 (le16_to_cpu(ctx->attr->name_offset) >= 1247 le16_to_cpu(
1242 le16_to_cpu(ctx->attr->data.non_resident. 1248 a->data.non_resident.mapping_pairs_offset)))) {
1243 mapping_pairs_offset)))) {
1244 ntfs_error(vol->sb, "Attribute name is placed after " 1249 ntfs_error(vol->sb, "Attribute name is placed after "
1245 "the mapping pairs array."); 1250 "the mapping pairs array.");
1246 goto unm_err_out; 1251 goto unm_err_out;
1247 } 1252 }
1248 if (ctx->attr->flags & ATTR_COMPRESSION_MASK) { 1253 if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_SPARSE)) {
1254 if (a->flags & ATTR_COMPRESSION_MASK) {
1255 NInoSetCompressed(ni);
1256 if ((ni->type != AT_DATA) || (ni->type ==
1257 AT_DATA && ni->name_len)) {
1258 ntfs_error(vi->i_sb, "Found compressed "
1259 "non-data or named "
1260 "data attribute. "
1261 "Please report you "
1262 "saw this message to "
1263 "linux-ntfs-dev@lists."
1264 "sourceforge.net");
1265 goto unm_err_out;
1266 }
1267 if (vol->cluster_size > 4096) {
1268 ntfs_error(vi->i_sb, "Found compressed "
1269 "attribute but "
1270 "compression is "
1271 "disabled due to "
1272 "cluster size (%i) > "
1273 "4kiB.",
1274 vol->cluster_size);
1275 goto unm_err_out;
1276 }
1277 if ((a->flags & ATTR_COMPRESSION_MASK) !=
1278 ATTR_IS_COMPRESSED) {
1279 ntfs_error(vi->i_sb, "Found unknown "
1280 "compression method.");
1281 goto unm_err_out;
1282 }
1283 }
1249 if (NInoMstProtected(ni)) { 1284 if (NInoMstProtected(ni)) {
1250 ntfs_error(vi->i_sb, "Found mst protected " 1285 ntfs_error(vi->i_sb, "Found mst protected "
1251 "attribute but the attribute " 1286 "attribute but the attribute "
1252 "is compressed. Please report " 1287 "is %s. Please report you "
1253 "you saw this message to " 1288 "saw this message to "
1254 "linux-ntfs-dev@lists."
1255 "sourceforge.net");
1256 goto unm_err_out;
1257 }
1258 NInoSetCompressed(ni);
1259 if ((ni->type != AT_DATA) || (ni->type == AT_DATA &&
1260 ni->name_len)) {
1261 ntfs_error(vi->i_sb, "Found compressed "
1262 "non-data or named data "
1263 "attribute. Please report "
1264 "you saw this message to "
1265 "linux-ntfs-dev@lists." 1289 "linux-ntfs-dev@lists."
1266 "sourceforge.net"); 1290 "sourceforge.net",
1267 goto unm_err_out; 1291 NInoCompressed(ni) ?
1268 } 1292 "compressed" : "sparse");
1269 if (vol->cluster_size > 4096) {
1270 ntfs_error(vi->i_sb, "Found compressed "
1271 "attribute but compression is "
1272 "disabled due to cluster size "
1273 "(%i) > 4kiB.",
1274 vol->cluster_size);
1275 goto unm_err_out; 1293 goto unm_err_out;
1276 } 1294 }
1277 if ((ctx->attr->flags & ATTR_COMPRESSION_MASK) 1295 if (a->flags & ATTR_IS_SPARSE)
1278 != ATTR_IS_COMPRESSED) { 1296 NInoSetSparse(ni);
1279 ntfs_error(vi->i_sb, "Found unknown " 1297 if (a->data.non_resident.compression_unit != 4) {
1280 "compression method.");
1281 goto unm_err_out;
1282 }
1283 ni->itype.compressed.block_clusters = 1U <<
1284 ctx->attr->data.non_resident.
1285 compression_unit;
1286 if (ctx->attr->data.non_resident.compression_unit !=
1287 4) {
1288 ntfs_error(vi->i_sb, "Found nonstandard " 1298 ntfs_error(vi->i_sb, "Found nonstandard "
1289 "compression unit (%u instead " 1299 "compression unit (%u instead "
1290 "of 4). Cannot handle this.", 1300 "of 4). Cannot handle this.",
1291 ctx->attr->data.non_resident. 1301 a->data.non_resident.
1292 compression_unit); 1302 compression_unit);
1293 err = -EOPNOTSUPP; 1303 err = -EOPNOTSUPP;
1294 goto unm_err_out; 1304 goto unm_err_out;
1295 } 1305 }
1306 ni->itype.compressed.block_clusters = 1U <<
1307 a->data.non_resident.compression_unit;
1296 ni->itype.compressed.block_size = 1U << ( 1308 ni->itype.compressed.block_size = 1U << (
1297 ctx->attr->data.non_resident. 1309 a->data.non_resident.compression_unit +
1298 compression_unit +
1299 vol->cluster_size_bits); 1310 vol->cluster_size_bits);
1300 ni->itype.compressed.block_size_bits = ffs( 1311 ni->itype.compressed.block_size_bits = ffs(
1301 ni->itype.compressed.block_size) - 1; 1312 ni->itype.compressed.block_size) - 1;
1313 ni->itype.compressed.size = sle64_to_cpu(
1314 a->data.non_resident.compressed_size);
1302 } 1315 }
1303 if (ctx->attr->flags & ATTR_IS_ENCRYPTED) { 1316 if (a->flags & ATTR_IS_ENCRYPTED) {
1304 if (ctx->attr->flags & ATTR_COMPRESSION_MASK) { 1317 if (a->flags & ATTR_COMPRESSION_MASK) {
1305 ntfs_error(vi->i_sb, "Found encrypted " 1318 ntfs_error(vi->i_sb, "Found encrypted and "
1306 "and compressed data."); 1319 "compressed data.");
1307 goto unm_err_out; 1320 goto unm_err_out;
1308 } 1321 }
1309 if (NInoMstProtected(ni)) { 1322 if (NInoMstProtected(ni)) {
@@ -1317,37 +1330,17 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
1317 } 1330 }
1318 NInoSetEncrypted(ni); 1331 NInoSetEncrypted(ni);
1319 } 1332 }
1320 if (ctx->attr->flags & ATTR_IS_SPARSE) { 1333 if (a->data.non_resident.lowest_vcn) {
1321 if (NInoMstProtected(ni)) {
1322 ntfs_error(vi->i_sb, "Found mst protected "
1323 "attribute but the attribute "
1324 "is sparse. Please report "
1325 "you saw this message to "
1326 "linux-ntfs-dev@lists."
1327 "sourceforge.net");
1328 goto unm_err_out;
1329 }
1330 NInoSetSparse(ni);
1331 }
1332 if (ctx->attr->data.non_resident.lowest_vcn) {
1333 ntfs_error(vi->i_sb, "First extent of attribute has " 1334 ntfs_error(vi->i_sb, "First extent of attribute has "
1334 "non-zero lowest_vcn."); 1335 "non-zero lowest_vcn.");
1335 goto unm_err_out; 1336 goto unm_err_out;
1336 } 1337 }
1337 /* Setup all the sizes. */ 1338 vi->i_size = sle64_to_cpu(a->data.non_resident.data_size);
1338 vi->i_size = sle64_to_cpu(
1339 ctx->attr->data.non_resident.data_size);
1340 ni->initialized_size = sle64_to_cpu( 1339 ni->initialized_size = sle64_to_cpu(
1341 ctx->attr->data.non_resident.initialized_size); 1340 a->data.non_resident.initialized_size);
1342 ni->allocated_size = sle64_to_cpu( 1341 ni->allocated_size = sle64_to_cpu(
1343 ctx->attr->data.non_resident.allocated_size); 1342 a->data.non_resident.allocated_size);
1344 if (NInoCompressed(ni)) {
1345 ni->itype.compressed.size = sle64_to_cpu(
1346 ctx->attr->data.non_resident.
1347 compressed_size);
1348 }
1349 } 1343 }
1350
1351 /* Setup the operations for this attribute inode. */ 1344 /* Setup the operations for this attribute inode. */
1352 vi->i_op = NULL; 1345 vi->i_op = NULL;
1353 vi->i_fop = NULL; 1346 vi->i_fop = NULL;
@@ -1355,12 +1348,10 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
1355 vi->i_mapping->a_ops = &ntfs_mst_aops; 1348 vi->i_mapping->a_ops = &ntfs_mst_aops;
1356 else 1349 else
1357 vi->i_mapping->a_ops = &ntfs_aops; 1350 vi->i_mapping->a_ops = &ntfs_aops;
1358 1351 if (NInoCompressed(ni) || NInoSparse(ni))
1359 if (!NInoCompressed(ni))
1360 vi->i_blocks = ni->allocated_size >> 9;
1361 else
1362 vi->i_blocks = ni->itype.compressed.size >> 9; 1352 vi->i_blocks = ni->itype.compressed.size >> 9;
1363 1353 else
1354 vi->i_blocks = ni->allocated_size >> 9;
1364 /* 1355 /*
1365 * Make sure the base inode doesn't go away and attach it to the 1356 * Make sure the base inode doesn't go away and attach it to the
1366 * attribute inode. 1357 * attribute inode.
@@ -1429,10 +1420,12 @@ err_out:
1429 */ 1420 */
1430static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi) 1421static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
1431{ 1422{
1423 loff_t bvi_size;
1432 ntfs_volume *vol = NTFS_SB(vi->i_sb); 1424 ntfs_volume *vol = NTFS_SB(vi->i_sb);
1433 ntfs_inode *ni, *base_ni, *bni; 1425 ntfs_inode *ni, *base_ni, *bni;
1434 struct inode *bvi; 1426 struct inode *bvi;
1435 MFT_RECORD *m; 1427 MFT_RECORD *m;
1428 ATTR_RECORD *a;
1436 ntfs_attr_search_ctx *ctx; 1429 ntfs_attr_search_ctx *ctx;
1437 INDEX_ROOT *ir; 1430 INDEX_ROOT *ir;
1438 u8 *ir_end, *index_end; 1431 u8 *ir_end, *index_end;
@@ -1474,30 +1467,28 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
1474 "missing."); 1467 "missing.");
1475 goto unm_err_out; 1468 goto unm_err_out;
1476 } 1469 }
1470 a = ctx->attr;
1477 /* Set up the state. */ 1471 /* Set up the state. */
1478 if (unlikely(ctx->attr->non_resident)) { 1472 if (unlikely(a->non_resident)) {
1479 ntfs_error(vol->sb, "$INDEX_ROOT attribute is not resident."); 1473 ntfs_error(vol->sb, "$INDEX_ROOT attribute is not resident.");
1480 goto unm_err_out; 1474 goto unm_err_out;
1481 } 1475 }
1482 /* Ensure the attribute name is placed before the value. */ 1476 /* Ensure the attribute name is placed before the value. */
1483 if (unlikely(ctx->attr->name_length && 1477 if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
1484 (le16_to_cpu(ctx->attr->name_offset) >= 1478 le16_to_cpu(a->data.resident.value_offset)))) {
1485 le16_to_cpu(ctx->attr->data.resident.
1486 value_offset)))) {
1487 ntfs_error(vol->sb, "$INDEX_ROOT attribute name is placed " 1479 ntfs_error(vol->sb, "$INDEX_ROOT attribute name is placed "
1488 "after the attribute value."); 1480 "after the attribute value.");
1489 goto unm_err_out; 1481 goto unm_err_out;
1490 } 1482 }
1491 /* Compressed/encrypted/sparse index root is not allowed. */ 1483 /* Compressed/encrypted/sparse index root is not allowed. */
1492 if (ctx->attr->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_ENCRYPTED | 1484 if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_ENCRYPTED |
1493 ATTR_IS_SPARSE)) { 1485 ATTR_IS_SPARSE)) {
1494 ntfs_error(vi->i_sb, "Found compressed/encrypted/sparse index " 1486 ntfs_error(vi->i_sb, "Found compressed/encrypted/sparse index "
1495 "root attribute."); 1487 "root attribute.");
1496 goto unm_err_out; 1488 goto unm_err_out;
1497 } 1489 }
1498 ir = (INDEX_ROOT*)((u8*)ctx->attr + 1490 ir = (INDEX_ROOT*)((u8*)a + le16_to_cpu(a->data.resident.value_offset));
1499 le16_to_cpu(ctx->attr->data.resident.value_offset)); 1491 ir_end = (u8*)ir + le32_to_cpu(a->data.resident.value_length);
1500 ir_end = (u8*)ir + le32_to_cpu(ctx->attr->data.resident.value_length);
1501 if (ir_end > (u8*)ctx->mrec + vol->mft_record_size) { 1492 if (ir_end > (u8*)ctx->mrec + vol->mft_record_size) {
1502 ntfs_error(vi->i_sb, "$INDEX_ROOT attribute is corrupt."); 1493 ntfs_error(vi->i_sb, "$INDEX_ROOT attribute is corrupt.");
1503 goto unm_err_out; 1494 goto unm_err_out;
@@ -1570,7 +1561,7 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
1570 "$INDEX_ALLOCATION attribute."); 1561 "$INDEX_ALLOCATION attribute.");
1571 goto unm_err_out; 1562 goto unm_err_out;
1572 } 1563 }
1573 if (!ctx->attr->non_resident) { 1564 if (!a->non_resident) {
1574 ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is " 1565 ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is "
1575 "resident."); 1566 "resident.");
1576 goto unm_err_out; 1567 goto unm_err_out;
@@ -1578,37 +1569,36 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
1578 /* 1569 /*
1579 * Ensure the attribute name is placed before the mapping pairs array. 1570 * Ensure the attribute name is placed before the mapping pairs array.
1580 */ 1571 */
1581 if (unlikely(ctx->attr->name_length && (le16_to_cpu( 1572 if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
1582 ctx->attr->name_offset) >= le16_to_cpu( 1573 le16_to_cpu(
1583 ctx->attr->data.non_resident.mapping_pairs_offset)))) { 1574 a->data.non_resident.mapping_pairs_offset)))) {
1584 ntfs_error(vol->sb, "$INDEX_ALLOCATION attribute name is " 1575 ntfs_error(vol->sb, "$INDEX_ALLOCATION attribute name is "
1585 "placed after the mapping pairs array."); 1576 "placed after the mapping pairs array.");
1586 goto unm_err_out; 1577 goto unm_err_out;
1587 } 1578 }
1588 if (ctx->attr->flags & ATTR_IS_ENCRYPTED) { 1579 if (a->flags & ATTR_IS_ENCRYPTED) {
1589 ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is " 1580 ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is "
1590 "encrypted."); 1581 "encrypted.");
1591 goto unm_err_out; 1582 goto unm_err_out;
1592 } 1583 }
1593 if (ctx->attr->flags & ATTR_IS_SPARSE) { 1584 if (a->flags & ATTR_IS_SPARSE) {
1594 ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is sparse."); 1585 ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is sparse.");
1595 goto unm_err_out; 1586 goto unm_err_out;
1596 } 1587 }
1597 if (ctx->attr->flags & ATTR_COMPRESSION_MASK) { 1588 if (a->flags & ATTR_COMPRESSION_MASK) {
1598 ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is " 1589 ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is "
1599 "compressed."); 1590 "compressed.");
1600 goto unm_err_out; 1591 goto unm_err_out;
1601 } 1592 }
1602 if (ctx->attr->data.non_resident.lowest_vcn) { 1593 if (a->data.non_resident.lowest_vcn) {
1603 ntfs_error(vi->i_sb, "First extent of $INDEX_ALLOCATION " 1594 ntfs_error(vi->i_sb, "First extent of $INDEX_ALLOCATION "
1604 "attribute has non zero lowest_vcn."); 1595 "attribute has non zero lowest_vcn.");
1605 goto unm_err_out; 1596 goto unm_err_out;
1606 } 1597 }
1607 vi->i_size = sle64_to_cpu(ctx->attr->data.non_resident.data_size); 1598 vi->i_size = sle64_to_cpu(a->data.non_resident.data_size);
1608 ni->initialized_size = sle64_to_cpu( 1599 ni->initialized_size = sle64_to_cpu(
1609 ctx->attr->data.non_resident.initialized_size); 1600 a->data.non_resident.initialized_size);
1610 ni->allocated_size = sle64_to_cpu( 1601 ni->allocated_size = sle64_to_cpu(a->data.non_resident.allocated_size);
1611 ctx->attr->data.non_resident.allocated_size);
1612 /* 1602 /*
1613 * We are done with the mft record, so we release it. Otherwise 1603 * We are done with the mft record, so we release it. Otherwise
1614 * we would deadlock in ntfs_attr_iget(). 1604 * we would deadlock in ntfs_attr_iget().
@@ -1632,10 +1622,10 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
1632 goto iput_unm_err_out; 1622 goto iput_unm_err_out;
1633 } 1623 }
1634 /* Consistency check bitmap size vs. index allocation size. */ 1624 /* Consistency check bitmap size vs. index allocation size. */
1635 if ((bvi->i_size << 3) < (vi->i_size >> 1625 bvi_size = i_size_read(bvi);
1636 ni->itype.index.block_size_bits)) { 1626 if ((bvi_size << 3) < (vi->i_size >> ni->itype.index.block_size_bits)) {
1637 ntfs_error(vi->i_sb, "Index bitmap too small (0x%llx) for " 1627 ntfs_error(vi->i_sb, "Index bitmap too small (0x%llx) for "
1638 "index allocation (0x%llx).", bvi->i_size << 3, 1628 "index allocation (0x%llx).", bvi_size << 3,
1639 vi->i_size); 1629 vi->i_size);
1640 goto iput_unm_err_out; 1630 goto iput_unm_err_out;
1641 } 1631 }
@@ -1646,7 +1636,6 @@ skip_large_index_stuff:
1646 vi->i_fop = NULL; 1636 vi->i_fop = NULL;
1647 vi->i_mapping->a_ops = &ntfs_mst_aops; 1637 vi->i_mapping->a_ops = &ntfs_mst_aops;
1648 vi->i_blocks = ni->allocated_size >> 9; 1638 vi->i_blocks = ni->allocated_size >> 9;
1649
1650 /* 1639 /*
1651 * Make sure the base inode doesn't go away and attach it to the 1640 * Make sure the base inode doesn't go away and attach it to the
1652 * index inode. 1641 * index inode.
@@ -1712,7 +1701,7 @@ int ntfs_read_inode_mount(struct inode *vi)
1712 struct buffer_head *bh; 1701 struct buffer_head *bh;
1713 ntfs_inode *ni; 1702 ntfs_inode *ni;
1714 MFT_RECORD *m = NULL; 1703 MFT_RECORD *m = NULL;
1715 ATTR_RECORD *attr; 1704 ATTR_RECORD *a;
1716 ntfs_attr_search_ctx *ctx; 1705 ntfs_attr_search_ctx *ctx;
1717 unsigned int i, nr_blocks; 1706 unsigned int i, nr_blocks;
1718 int err; 1707 int err;
@@ -1727,10 +1716,10 @@ int ntfs_read_inode_mount(struct inode *vi)
1727 /* Setup the data attribute. It is special as it is mst protected. */ 1716 /* Setup the data attribute. It is special as it is mst protected. */
1728 NInoSetNonResident(ni); 1717 NInoSetNonResident(ni);
1729 NInoSetMstProtected(ni); 1718 NInoSetMstProtected(ni);
1719 NInoSetSparseDisabled(ni);
1730 ni->type = AT_DATA; 1720 ni->type = AT_DATA;
1731 ni->name = NULL; 1721 ni->name = NULL;
1732 ni->name_len = 0; 1722 ni->name_len = 0;
1733
1734 /* 1723 /*
1735 * This sets up our little cheat allowing us to reuse the async read io 1724 * This sets up our little cheat allowing us to reuse the async read io
1736 * completion handler for directories. 1725 * completion handler for directories.
@@ -1808,9 +1797,10 @@ int ntfs_read_inode_mount(struct inode *vi)
1808 1797
1809 ntfs_debug("Attribute list attribute found in $MFT."); 1798 ntfs_debug("Attribute list attribute found in $MFT.");
1810 NInoSetAttrList(ni); 1799 NInoSetAttrList(ni);
1811 if (ctx->attr->flags & ATTR_IS_ENCRYPTED || 1800 a = ctx->attr;
1812 ctx->attr->flags & ATTR_COMPRESSION_MASK || 1801 if (a->flags & ATTR_IS_ENCRYPTED ||
1813 ctx->attr->flags & ATTR_IS_SPARSE) { 1802 a->flags & ATTR_COMPRESSION_MASK ||
1803 a->flags & ATTR_IS_SPARSE) {
1814 ntfs_error(sb, "Attribute list attribute is " 1804 ntfs_error(sb, "Attribute list attribute is "
1815 "compressed/encrypted/sparse. Not " 1805 "compressed/encrypted/sparse. Not "
1816 "allowed. $MFT is corrupt. You should " 1806 "allowed. $MFT is corrupt. You should "
@@ -1818,16 +1808,16 @@ int ntfs_read_inode_mount(struct inode *vi)
1818 goto put_err_out; 1808 goto put_err_out;
1819 } 1809 }
1820 /* Now allocate memory for the attribute list. */ 1810 /* Now allocate memory for the attribute list. */
1821 ni->attr_list_size = (u32)ntfs_attr_size(ctx->attr); 1811 ni->attr_list_size = (u32)ntfs_attr_size(a);
1822 ni->attr_list = ntfs_malloc_nofs(ni->attr_list_size); 1812 ni->attr_list = ntfs_malloc_nofs(ni->attr_list_size);
1823 if (!ni->attr_list) { 1813 if (!ni->attr_list) {
1824 ntfs_error(sb, "Not enough memory to allocate buffer " 1814 ntfs_error(sb, "Not enough memory to allocate buffer "
1825 "for attribute list."); 1815 "for attribute list.");
1826 goto put_err_out; 1816 goto put_err_out;
1827 } 1817 }
1828 if (ctx->attr->non_resident) { 1818 if (a->non_resident) {
1829 NInoSetAttrListNonResident(ni); 1819 NInoSetAttrListNonResident(ni);
1830 if (ctx->attr->data.non_resident.lowest_vcn) { 1820 if (a->data.non_resident.lowest_vcn) {
1831 ntfs_error(sb, "Attribute list has non zero " 1821 ntfs_error(sb, "Attribute list has non zero "
1832 "lowest_vcn. $MFT is corrupt. " 1822 "lowest_vcn. $MFT is corrupt. "
1833 "You should run chkdsk."); 1823 "You should run chkdsk.");
@@ -1835,7 +1825,7 @@ int ntfs_read_inode_mount(struct inode *vi)
1835 } 1825 }
1836 /* Setup the runlist. */ 1826 /* Setup the runlist. */
1837 ni->attr_list_rl.rl = ntfs_mapping_pairs_decompress(vol, 1827 ni->attr_list_rl.rl = ntfs_mapping_pairs_decompress(vol,
1838 ctx->attr, NULL); 1828 a, NULL);
1839 if (IS_ERR(ni->attr_list_rl.rl)) { 1829 if (IS_ERR(ni->attr_list_rl.rl)) {
1840 err = PTR_ERR(ni->attr_list_rl.rl); 1830 err = PTR_ERR(ni->attr_list_rl.rl);
1841 ni->attr_list_rl.rl = NULL; 1831 ni->attr_list_rl.rl = NULL;
@@ -1847,7 +1837,7 @@ int ntfs_read_inode_mount(struct inode *vi)
1847 /* Now load the attribute list. */ 1837 /* Now load the attribute list. */
1848 if ((err = load_attribute_list(vol, &ni->attr_list_rl, 1838 if ((err = load_attribute_list(vol, &ni->attr_list_rl,
1849 ni->attr_list, ni->attr_list_size, 1839 ni->attr_list, ni->attr_list_size,
1850 sle64_to_cpu(ctx->attr->data. 1840 sle64_to_cpu(a->data.
1851 non_resident.initialized_size)))) { 1841 non_resident.initialized_size)))) {
1852 ntfs_error(sb, "Failed to load attribute list " 1842 ntfs_error(sb, "Failed to load attribute list "
1853 "attribute with error code %i.", 1843 "attribute with error code %i.",
@@ -1855,20 +1845,20 @@ int ntfs_read_inode_mount(struct inode *vi)
1855 goto put_err_out; 1845 goto put_err_out;
1856 } 1846 }
1857 } else /* if (!ctx.attr->non_resident) */ { 1847 } else /* if (!ctx.attr->non_resident) */ {
1858 if ((u8*)ctx->attr + le16_to_cpu( 1848 if ((u8*)a + le16_to_cpu(
1859 ctx->attr->data.resident.value_offset) + 1849 a->data.resident.value_offset) +
1860 le32_to_cpu( 1850 le32_to_cpu(
1861 ctx->attr->data.resident.value_length) > 1851 a->data.resident.value_length) >
1862 (u8*)ctx->mrec + vol->mft_record_size) { 1852 (u8*)ctx->mrec + vol->mft_record_size) {
1863 ntfs_error(sb, "Corrupt attribute list " 1853 ntfs_error(sb, "Corrupt attribute list "
1864 "attribute."); 1854 "attribute.");
1865 goto put_err_out; 1855 goto put_err_out;
1866 } 1856 }
1867 /* Now copy the attribute list. */ 1857 /* Now copy the attribute list. */
1868 memcpy(ni->attr_list, (u8*)ctx->attr + le16_to_cpu( 1858 memcpy(ni->attr_list, (u8*)a + le16_to_cpu(
1869 ctx->attr->data.resident.value_offset), 1859 a->data.resident.value_offset),
1870 le32_to_cpu( 1860 le32_to_cpu(
1871 ctx->attr->data.resident.value_length)); 1861 a->data.resident.value_length));
1872 } 1862 }
1873 /* The attribute list is now setup in memory. */ 1863 /* The attribute list is now setup in memory. */
1874 /* 1864 /*
@@ -1934,25 +1924,25 @@ int ntfs_read_inode_mount(struct inode *vi)
1934 ntfs_attr_reinit_search_ctx(ctx); 1924 ntfs_attr_reinit_search_ctx(ctx);
1935 1925
1936 /* Now load all attribute extents. */ 1926 /* Now load all attribute extents. */
1937 attr = NULL; 1927 a = NULL;
1938 next_vcn = last_vcn = highest_vcn = 0; 1928 next_vcn = last_vcn = highest_vcn = 0;
1939 while (!(err = ntfs_attr_lookup(AT_DATA, NULL, 0, 0, next_vcn, NULL, 0, 1929 while (!(err = ntfs_attr_lookup(AT_DATA, NULL, 0, 0, next_vcn, NULL, 0,
1940 ctx))) { 1930 ctx))) {
1941 runlist_element *nrl; 1931 runlist_element *nrl;
1942 1932
1943 /* Cache the current attribute. */ 1933 /* Cache the current attribute. */
1944 attr = ctx->attr; 1934 a = ctx->attr;
1945 /* $MFT must be non-resident. */ 1935 /* $MFT must be non-resident. */
1946 if (!attr->non_resident) { 1936 if (!a->non_resident) {
1947 ntfs_error(sb, "$MFT must be non-resident but a " 1937 ntfs_error(sb, "$MFT must be non-resident but a "
1948 "resident extent was found. $MFT is " 1938 "resident extent was found. $MFT is "
1949 "corrupt. Run chkdsk."); 1939 "corrupt. Run chkdsk.");
1950 goto put_err_out; 1940 goto put_err_out;
1951 } 1941 }
1952 /* $MFT must be uncompressed and unencrypted. */ 1942 /* $MFT must be uncompressed and unencrypted. */
1953 if (attr->flags & ATTR_COMPRESSION_MASK || 1943 if (a->flags & ATTR_COMPRESSION_MASK ||
1954 attr->flags & ATTR_IS_ENCRYPTED || 1944 a->flags & ATTR_IS_ENCRYPTED ||
1955 attr->flags & ATTR_IS_SPARSE) { 1945 a->flags & ATTR_IS_SPARSE) {
1956 ntfs_error(sb, "$MFT must be uncompressed, " 1946 ntfs_error(sb, "$MFT must be uncompressed, "
1957 "non-sparse, and unencrypted but a " 1947 "non-sparse, and unencrypted but a "
1958 "compressed/sparse/encrypted extent " 1948 "compressed/sparse/encrypted extent "
@@ -1966,7 +1956,7 @@ int ntfs_read_inode_mount(struct inode *vi)
1966 * as we have exclusive access to the inode at this time and we 1956 * as we have exclusive access to the inode at this time and we
1967 * are a mount in progress task, too. 1957 * are a mount in progress task, too.
1968 */ 1958 */
1969 nrl = ntfs_mapping_pairs_decompress(vol, attr, ni->runlist.rl); 1959 nrl = ntfs_mapping_pairs_decompress(vol, a, ni->runlist.rl);
1970 if (IS_ERR(nrl)) { 1960 if (IS_ERR(nrl)) {
1971 ntfs_error(sb, "ntfs_mapping_pairs_decompress() " 1961 ntfs_error(sb, "ntfs_mapping_pairs_decompress() "
1972 "failed with error code %ld. $MFT is " 1962 "failed with error code %ld. $MFT is "
@@ -1977,7 +1967,7 @@ int ntfs_read_inode_mount(struct inode *vi)
1977 1967
1978 /* Are we in the first extent? */ 1968 /* Are we in the first extent? */
1979 if (!next_vcn) { 1969 if (!next_vcn) {
1980 if (attr->data.non_resident.lowest_vcn) { 1970 if (a->data.non_resident.lowest_vcn) {
1981 ntfs_error(sb, "First extent of $DATA " 1971 ntfs_error(sb, "First extent of $DATA "
1982 "attribute has non zero " 1972 "attribute has non zero "
1983 "lowest_vcn. $MFT is corrupt. " 1973 "lowest_vcn. $MFT is corrupt. "
@@ -1986,15 +1976,15 @@ int ntfs_read_inode_mount(struct inode *vi)
1986 } 1976 }
1987 /* Get the last vcn in the $DATA attribute. */ 1977 /* Get the last vcn in the $DATA attribute. */
1988 last_vcn = sle64_to_cpu( 1978 last_vcn = sle64_to_cpu(
1989 attr->data.non_resident.allocated_size) 1979 a->data.non_resident.allocated_size)
1990 >> vol->cluster_size_bits; 1980 >> vol->cluster_size_bits;
1991 /* Fill in the inode size. */ 1981 /* Fill in the inode size. */
1992 vi->i_size = sle64_to_cpu( 1982 vi->i_size = sle64_to_cpu(
1993 attr->data.non_resident.data_size); 1983 a->data.non_resident.data_size);
1994 ni->initialized_size = sle64_to_cpu(attr->data. 1984 ni->initialized_size = sle64_to_cpu(
1995 non_resident.initialized_size); 1985 a->data.non_resident.initialized_size);
1996 ni->allocated_size = sle64_to_cpu( 1986 ni->allocated_size = sle64_to_cpu(
1997 attr->data.non_resident.allocated_size); 1987 a->data.non_resident.allocated_size);
1998 /* 1988 /*
1999 * Verify the number of mft records does not exceed 1989 * Verify the number of mft records does not exceed
2000 * 2^32 - 1. 1990 * 2^32 - 1.
@@ -2051,7 +2041,7 @@ int ntfs_read_inode_mount(struct inode *vi)
2051 } 2041 }
2052 2042
2053 /* Get the lowest vcn for the next extent. */ 2043 /* Get the lowest vcn for the next extent. */
2054 highest_vcn = sle64_to_cpu(attr->data.non_resident.highest_vcn); 2044 highest_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn);
2055 next_vcn = highest_vcn + 1; 2045 next_vcn = highest_vcn + 1;
2056 2046
2057 /* Only one extent or error, which we catch below. */ 2047 /* Only one extent or error, which we catch below. */
@@ -2060,7 +2050,7 @@ int ntfs_read_inode_mount(struct inode *vi)
2060 2050
2061 /* Avoid endless loops due to corruption. */ 2051 /* Avoid endless loops due to corruption. */
2062 if (next_vcn < sle64_to_cpu( 2052 if (next_vcn < sle64_to_cpu(
2063 attr->data.non_resident.lowest_vcn)) { 2053 a->data.non_resident.lowest_vcn)) {
2064 ntfs_error(sb, "$MFT has corrupt attribute list " 2054 ntfs_error(sb, "$MFT has corrupt attribute list "
2065 "attribute. Run chkdsk."); 2055 "attribute. Run chkdsk.");
2066 goto put_err_out; 2056 goto put_err_out;
@@ -2071,7 +2061,7 @@ int ntfs_read_inode_mount(struct inode *vi)
2071 "$MFT is corrupt. Run chkdsk."); 2061 "$MFT is corrupt. Run chkdsk.");
2072 goto put_err_out; 2062 goto put_err_out;
2073 } 2063 }
2074 if (!attr) { 2064 if (!a) {
2075 ntfs_error(sb, "$MFT/$DATA attribute not found. $MFT is " 2065 ntfs_error(sb, "$MFT/$DATA attribute not found. $MFT is "
2076 "corrupt. Run chkdsk."); 2066 "corrupt. Run chkdsk.");
2077 goto put_err_out; 2067 goto put_err_out;
@@ -2275,6 +2265,8 @@ int ntfs_show_options(struct seq_file *sf, struct vfsmount *mnt)
2275 seq_printf(sf, ",case_sensitive"); 2265 seq_printf(sf, ",case_sensitive");
2276 if (NVolShowSystemFiles(vol)) 2266 if (NVolShowSystemFiles(vol))
2277 seq_printf(sf, ",show_sys_files"); 2267 seq_printf(sf, ",show_sys_files");
2268 if (!NVolSparseEnabled(vol))
2269 seq_printf(sf, ",disable_sparse");
2278 for (i = 0; on_errors_arr[i].val; i++) { 2270 for (i = 0; on_errors_arr[i].val; i++) {
2279 if (on_errors_arr[i].val & vol->on_errors) 2271 if (on_errors_arr[i].val & vol->on_errors)
2280 seq_printf(sf, ",errors=%s", on_errors_arr[i].str); 2272 seq_printf(sf, ",errors=%s", on_errors_arr[i].str);
@@ -2311,6 +2303,7 @@ int ntfs_truncate(struct inode *vi)
2311 ntfs_volume *vol = ni->vol; 2303 ntfs_volume *vol = ni->vol;
2312 ntfs_attr_search_ctx *ctx; 2304 ntfs_attr_search_ctx *ctx;
2313 MFT_RECORD *m; 2305 MFT_RECORD *m;
2306 ATTR_RECORD *a;
2314 const char *te = " Leaving file length out of sync with i_size."; 2307 const char *te = " Leaving file length out of sync with i_size.";
2315 int err; 2308 int err;
2316 2309
@@ -2347,14 +2340,15 @@ int ntfs_truncate(struct inode *vi)
2347 vi->i_ino, err); 2340 vi->i_ino, err);
2348 goto err_out; 2341 goto err_out;
2349 } 2342 }
2343 a = ctx->attr;
2350 /* If the size has not changed there is nothing to do. */ 2344 /* If the size has not changed there is nothing to do. */
2351 if (ntfs_attr_size(ctx->attr) == i_size_read(vi)) 2345 if (ntfs_attr_size(a) == i_size_read(vi))
2352 goto done; 2346 goto done;
2353 // TODO: Implement the truncate... 2347 // TODO: Implement the truncate...
2354 ntfs_error(vi->i_sb, "Inode size has changed but this is not " 2348 ntfs_error(vi->i_sb, "Inode size has changed but this is not "
2355 "implemented yet. Resetting inode size to old value. " 2349 "implemented yet. Resetting inode size to old value. "
2356 " This is most likely a bug in the ntfs driver!"); 2350 " This is most likely a bug in the ntfs driver!");
2357 i_size_write(vi, ntfs_attr_size(ctx->attr)); 2351 i_size_write(vi, ntfs_attr_size(a));
2358done: 2352done:
2359 ntfs_attr_put_search_ctx(ctx); 2353 ntfs_attr_put_search_ctx(ctx);
2360 unmap_mft_record(ni); 2354 unmap_mft_record(ni);
@@ -2515,18 +2509,18 @@ int ntfs_write_inode(struct inode *vi, int sync)
2515 nt = utc2ntfs(vi->i_mtime); 2509 nt = utc2ntfs(vi->i_mtime);
2516 if (si->last_data_change_time != nt) { 2510 if (si->last_data_change_time != nt) {
2517 ntfs_debug("Updating mtime for inode 0x%lx: old = 0x%llx, " 2511 ntfs_debug("Updating mtime for inode 0x%lx: old = 0x%llx, "
2518 "new = 0x%llx", vi->i_ino, 2512 "new = 0x%llx", vi->i_ino, (long long)
2519 sle64_to_cpu(si->last_data_change_time), 2513 sle64_to_cpu(si->last_data_change_time),
2520 sle64_to_cpu(nt)); 2514 (long long)sle64_to_cpu(nt));
2521 si->last_data_change_time = nt; 2515 si->last_data_change_time = nt;
2522 modified = TRUE; 2516 modified = TRUE;
2523 } 2517 }
2524 nt = utc2ntfs(vi->i_ctime); 2518 nt = utc2ntfs(vi->i_ctime);
2525 if (si->last_mft_change_time != nt) { 2519 if (si->last_mft_change_time != nt) {
2526 ntfs_debug("Updating ctime for inode 0x%lx: old = 0x%llx, " 2520 ntfs_debug("Updating ctime for inode 0x%lx: old = 0x%llx, "
2527 "new = 0x%llx", vi->i_ino, 2521 "new = 0x%llx", vi->i_ino, (long long)
2528 sle64_to_cpu(si->last_mft_change_time), 2522 sle64_to_cpu(si->last_mft_change_time),
2529 sle64_to_cpu(nt)); 2523 (long long)sle64_to_cpu(nt));
2530 si->last_mft_change_time = nt; 2524 si->last_mft_change_time = nt;
2531 modified = TRUE; 2525 modified = TRUE;
2532 } 2526 }
@@ -2534,8 +2528,8 @@ int ntfs_write_inode(struct inode *vi, int sync)
2534 if (si->last_access_time != nt) { 2528 if (si->last_access_time != nt) {
2535 ntfs_debug("Updating atime for inode 0x%lx: old = 0x%llx, " 2529 ntfs_debug("Updating atime for inode 0x%lx: old = 0x%llx, "
2536 "new = 0x%llx", vi->i_ino, 2530 "new = 0x%llx", vi->i_ino,
2537 sle64_to_cpu(si->last_access_time), 2531 (long long)sle64_to_cpu(si->last_access_time),
2538 sle64_to_cpu(nt)); 2532 (long long)sle64_to_cpu(nt));
2539 si->last_access_time = nt; 2533 si->last_access_time = nt;
2540 modified = TRUE; 2534 modified = TRUE;
2541 } 2535 }
diff --git a/fs/ntfs/inode.h b/fs/ntfs/inode.h
index 99580455f2ed..3de5c0231966 100644
--- a/fs/ntfs/inode.h
+++ b/fs/ntfs/inode.h
@@ -2,7 +2,7 @@
2 * inode.h - Defines for inode structures NTFS Linux kernel driver. Part of 2 * inode.h - Defines for inode structures NTFS Linux kernel driver. Part of
3 * the Linux-NTFS project. 3 * the Linux-NTFS project.
4 * 4 *
5 * Copyright (c) 2001-2004 Anton Altaparmakov 5 * Copyright (c) 2001-2005 Anton Altaparmakov
6 * Copyright (c) 2002 Richard Russon 6 * Copyright (c) 2002 Richard Russon
7 * 7 *
8 * This program/include file is free software; you can redistribute it and/or 8 * This program/include file is free software; you can redistribute it and/or
@@ -44,6 +44,7 @@ typedef struct _ntfs_inode ntfs_inode;
44 * fields already provided in the VFS inode. 44 * fields already provided in the VFS inode.
45 */ 45 */
46struct _ntfs_inode { 46struct _ntfs_inode {
47 rwlock_t size_lock; /* Lock serializing access to inode sizes. */
47 s64 initialized_size; /* Copy from the attribute record. */ 48 s64 initialized_size; /* Copy from the attribute record. */
48 s64 allocated_size; /* Copy from the attribute record. */ 49 s64 allocated_size; /* Copy from the attribute record. */
49 unsigned long state; /* NTFS specific flags describing this inode. 50 unsigned long state; /* NTFS specific flags describing this inode.
@@ -109,7 +110,7 @@ struct _ntfs_inode {
109 u8 block_size_bits; /* Log2 of the above. */ 110 u8 block_size_bits; /* Log2 of the above. */
110 u8 vcn_size_bits; /* Log2 of the above. */ 111 u8 vcn_size_bits; /* Log2 of the above. */
111 } index; 112 } index;
112 struct { /* It is a compressed file or an attribute inode. */ 113 struct { /* It is a compressed/sparse file/attribute inode. */
113 s64 size; /* Copy of compressed_size from 114 s64 size; /* Copy of compressed_size from
114 $DATA. */ 115 $DATA. */
115 u32 block_size; /* Size of a compression block 116 u32 block_size; /* Size of a compression block
@@ -165,6 +166,7 @@ typedef enum {
165 NI_Sparse, /* 1: Unnamed data attr is sparse (f). 166 NI_Sparse, /* 1: Unnamed data attr is sparse (f).
166 1: Create sparse files by default (d). 167 1: Create sparse files by default (d).
167 1: Attribute is sparse (a). */ 168 1: Attribute is sparse (a). */
169 NI_SparseDisabled, /* 1: May not create sparse regions. */
168 NI_TruncateFailed, /* 1: Last ntfs_truncate() call failed. */ 170 NI_TruncateFailed, /* 1: Last ntfs_truncate() call failed. */
169} ntfs_inode_state_bits; 171} ntfs_inode_state_bits;
170 172
@@ -217,6 +219,7 @@ NINO_FNS(IndexAllocPresent)
217NINO_FNS(Compressed) 219NINO_FNS(Compressed)
218NINO_FNS(Encrypted) 220NINO_FNS(Encrypted)
219NINO_FNS(Sparse) 221NINO_FNS(Sparse)
222NINO_FNS(SparseDisabled)
220NINO_FNS(TruncateFailed) 223NINO_FNS(TruncateFailed)
221 224
222/* 225/*
diff --git a/fs/ntfs/layout.h b/fs/ntfs/layout.h
index 47b338999921..609ad1728ce4 100644
--- a/fs/ntfs/layout.h
+++ b/fs/ntfs/layout.h
@@ -2,7 +2,7 @@
2 * layout.h - All NTFS associated on-disk structures. Part of the Linux-NTFS 2 * layout.h - All NTFS associated on-disk structures. Part of the Linux-NTFS
3 * project. 3 * project.
4 * 4 *
5 * Copyright (c) 2001-2004 Anton Altaparmakov 5 * Copyright (c) 2001-2005 Anton Altaparmakov
6 * Copyright (c) 2002 Richard Russon 6 * Copyright (c) 2002 Richard Russon
7 * 7 *
8 * This program/include file is free software; you can redistribute it and/or 8 * This program/include file is free software; you can redistribute it and/or
@@ -547,26 +547,44 @@ enum {
547 COLLATION_NTOFS_ULONG = const_cpu_to_le32(0x10), 547 COLLATION_NTOFS_ULONG = const_cpu_to_le32(0x10),
548 COLLATION_NTOFS_SID = const_cpu_to_le32(0x11), 548 COLLATION_NTOFS_SID = const_cpu_to_le32(0x11),
549 COLLATION_NTOFS_SECURITY_HASH = const_cpu_to_le32(0x12), 549 COLLATION_NTOFS_SECURITY_HASH = const_cpu_to_le32(0x12),
550 COLLATION_NTOFS_ULONGS = const_cpu_to_le32(0x13) 550 COLLATION_NTOFS_ULONGS = const_cpu_to_le32(0x13),
551}; 551};
552 552
553typedef le32 COLLATION_RULE; 553typedef le32 COLLATION_RULE;
554 554
555/* 555/*
556 * The flags (32-bit) describing attribute properties in the attribute 556 * The flags (32-bit) describing attribute properties in the attribute
557 * definition structure. FIXME: This information is from Regis's information 557 * definition structure. FIXME: This information is based on Regis's
558 * and, according to him, it is not certain and probably incomplete. 558 * information and, according to him, it is not certain and probably
559 * The INDEXABLE flag is fairly certainly correct as only the file name 559 * incomplete. The INDEXABLE flag is fairly certainly correct as only the file
560 * attribute has this flag set and this is the only attribute indexed in NT4. 560 * name attribute has this flag set and this is the only attribute indexed in
561 * NT4.
561 */ 562 */
562enum { 563enum {
563 INDEXABLE = const_cpu_to_le32(0x02), /* Attribute can be 564 ATTR_DEF_INDEXABLE = const_cpu_to_le32(0x02), /* Attribute can be
564 indexed. */ 565 indexed. */
565 NEED_TO_REGENERATE = const_cpu_to_le32(0x40), /* Need to regenerate 566 ATTR_DEF_MULTIPLE = const_cpu_to_le32(0x04), /* Attribute type
566 during regeneration 567 can be present multiple times in the
567 phase. */ 568 mft records of an inode. */
568 CAN_BE_NON_RESIDENT = const_cpu_to_le32(0x80), /* Attribute can be 569 ATTR_DEF_NOT_ZERO = const_cpu_to_le32(0x08), /* Attribute value
569 non-resident. */ 570 must contain at least one non-zero
571 byte. */
572 ATTR_DEF_INDEXED_UNIQUE = const_cpu_to_le32(0x10), /* Attribute must be
573 indexed and the attribute value must be
574 unique for the attribute type in all of
575 the mft records of an inode. */
576 ATTR_DEF_NAMED_UNIQUE = const_cpu_to_le32(0x20), /* Attribute must be
577 named and the name must be unique for
578 the attribute type in all of the mft
579 records of an inode. */
580 ATTR_DEF_RESIDENT = const_cpu_to_le32(0x40), /* Attribute must be
581 resident. */
582 ATTR_DEF_ALWAYS_LOG = const_cpu_to_le32(0x80), /* Always log
583 modifications to this attribute,
584 regardless of whether it is resident or
585 non-resident. Without this, only log
586 modifications if the attribute is
587 resident. */
570}; 588};
571 589
572typedef le32 ATTR_DEF_FLAGS; 590typedef le32 ATTR_DEF_FLAGS;
@@ -749,10 +767,11 @@ typedef struct {
749 record header aligned to 8-byte boundary. */ 767 record header aligned to 8-byte boundary. */
750/* 34*/ u8 compression_unit; /* The compression unit expressed 768/* 34*/ u8 compression_unit; /* The compression unit expressed
751 as the log to the base 2 of the number of 769 as the log to the base 2 of the number of
752 clusters in a compression unit. 0 means not 770 clusters in a compression unit. 0 means not
753 compressed. (This effectively limits the 771 compressed. (This effectively limits the
754 compression unit size to be a power of two 772 compression unit size to be a power of two
755 clusters.) WinNT4 only uses a value of 4. */ 773 clusters.) WinNT4 only uses a value of 4.
774 Sparse files also have this set to 4. */
756/* 35*/ u8 reserved[5]; /* Align to 8-byte boundary. */ 775/* 35*/ u8 reserved[5]; /* Align to 8-byte boundary. */
757/* The sizes below are only used when lowest_vcn is zero, as otherwise it would 776/* The sizes below are only used when lowest_vcn is zero, as otherwise it would
758 be difficult to keep them up-to-date.*/ 777 be difficult to keep them up-to-date.*/
@@ -772,10 +791,10 @@ typedef struct {
772 data_size. */ 791 data_size. */
773/* sizeof(uncompressed attr) = 64*/ 792/* sizeof(uncompressed attr) = 64*/
774/* 64*/ sle64 compressed_size; /* Byte size of the attribute 793/* 64*/ sle64 compressed_size; /* Byte size of the attribute
775 value after compression. Only present when 794 value after compression. Only present when
776 compressed. Always is a multiple of the 795 compressed or sparse. Always is a multiple of
777 cluster size. Represents the actual amount of 796 the cluster size. Represents the actual amount
778 disk space being used on the disk. */ 797 of disk space being used on the disk. */
779/* sizeof(compressed attr) = 72*/ 798/* sizeof(compressed attr) = 72*/
780 } __attribute__ ((__packed__)) non_resident; 799 } __attribute__ ((__packed__)) non_resident;
781 } __attribute__ ((__packed__)) data; 800 } __attribute__ ((__packed__)) data;
@@ -834,7 +853,7 @@ enum {
834 /* Note, this is a copy of the corresponding bit from the mft record, 853 /* Note, this is a copy of the corresponding bit from the mft record,
835 telling us whether this file has a view index present (eg. object id 854 telling us whether this file has a view index present (eg. object id
836 index, quota index, one of the security indexes or the encrypting 855 index, quota index, one of the security indexes or the encrypting
837 file system related indexes). */ 856 filesystem related indexes). */
838}; 857};
839 858
840typedef le32 FILE_ATTR_FLAGS; 859typedef le32 FILE_ATTR_FLAGS;
@@ -917,20 +936,12 @@ typedef struct {
917 /* 56*/ le64 quota_charged; /* Byte size of the charge to 936 /* 56*/ le64 quota_charged; /* Byte size of the charge to
918 the quota for all streams of the file. Note: Is 937 the quota for all streams of the file. Note: Is
919 zero if quotas are disabled. */ 938 zero if quotas are disabled. */
920 /* 64*/ le64 usn; /* Last update sequence number 939 /* 64*/ leUSN usn; /* Last update sequence number
921 of the file. This is a direct index into the 940 of the file. This is a direct index into the
922 change (aka usn) journal file. It is zero if 941 transaction log file ($UsnJrnl). It is zero if
923 the usn journal is disabled. 942 the usn journal is disabled or this file has
924 NOTE: To disable the journal need to delete 943 not been subject to logging yet. See usnjrnl.h
925 the journal file itself and to then walk the 944 for details. */
926 whole mft and set all Usn entries in all mft
927 records to zero! (This can take a while!)
928 The journal is FILE_Extend/$UsnJrnl. Win2k
929 will recreate the journal and initiate
930 logging if necessary when mounting the
931 partition. This, in contrast to disabling the
932 journal is a very fast process, so the user
933 won't even notice it. */
934 } __attribute__ ((__packed__)) v3; 945 } __attribute__ ((__packed__)) v3;
935 /* sizeof() = 72 bytes (NTFS 3.x) */ 946 /* sizeof() = 72 bytes (NTFS 3.x) */
936 } __attribute__ ((__packed__)) ver; 947 } __attribute__ ((__packed__)) ver;
@@ -1893,7 +1904,7 @@ enum {
1893 VOLUME_FLAGS_MASK = const_cpu_to_le16(0x803f), 1904 VOLUME_FLAGS_MASK = const_cpu_to_le16(0x803f),
1894 1905
1895 /* To make our life easier when checking if we must mount read-only. */ 1906 /* To make our life easier when checking if we must mount read-only. */
1896 VOLUME_MUST_MOUNT_RO_MASK = const_cpu_to_le16(0x8037), 1907 VOLUME_MUST_MOUNT_RO_MASK = const_cpu_to_le16(0x8027),
1897} __attribute__ ((__packed__)); 1908} __attribute__ ((__packed__));
1898 1909
1899typedef le16 VOLUME_FLAGS; 1910typedef le16 VOLUME_FLAGS;
diff --git a/fs/ntfs/lcnalloc.c b/fs/ntfs/lcnalloc.c
index 23fd911078b1..a4bc07616e5d 100644
--- a/fs/ntfs/lcnalloc.c
+++ b/fs/ntfs/lcnalloc.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * lcnalloc.c - Cluster (de)allocation code. Part of the Linux-NTFS project. 2 * lcnalloc.c - Cluster (de)allocation code. Part of the Linux-NTFS project.
3 * 3 *
4 * Copyright (c) 2004 Anton Altaparmakov 4 * Copyright (c) 2004-2005 Anton Altaparmakov
5 * 5 *
6 * This program/include file is free software; you can redistribute it and/or 6 * This program/include file is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as published 7 * modify it under the terms of the GNU General Public License as published
@@ -60,7 +60,7 @@ int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol,
60 if (rl->lcn < 0) 60 if (rl->lcn < 0)
61 continue; 61 continue;
62 err = ntfs_bitmap_clear_run(lcnbmp_vi, rl->lcn, rl->length); 62 err = ntfs_bitmap_clear_run(lcnbmp_vi, rl->lcn, rl->length);
63 if (unlikely(err && (!ret || ret == ENOMEM) && ret != err)) 63 if (unlikely(err && (!ret || ret == -ENOMEM) && ret != err))
64 ret = err; 64 ret = err;
65 } 65 }
66 ntfs_debug("Done."); 66 ntfs_debug("Done.");
@@ -140,6 +140,7 @@ runlist_element *ntfs_cluster_alloc(ntfs_volume *vol, const VCN start_vcn,
140 LCN zone_start, zone_end, bmp_pos, bmp_initial_pos, last_read_pos, lcn; 140 LCN zone_start, zone_end, bmp_pos, bmp_initial_pos, last_read_pos, lcn;
141 LCN prev_lcn = 0, prev_run_len = 0, mft_zone_size; 141 LCN prev_lcn = 0, prev_run_len = 0, mft_zone_size;
142 s64 clusters; 142 s64 clusters;
143 loff_t i_size;
143 struct inode *lcnbmp_vi; 144 struct inode *lcnbmp_vi;
144 runlist_element *rl = NULL; 145 runlist_element *rl = NULL;
145 struct address_space *mapping; 146 struct address_space *mapping;
@@ -249,6 +250,7 @@ runlist_element *ntfs_cluster_alloc(ntfs_volume *vol, const VCN start_vcn,
249 clusters = count; 250 clusters = count;
250 rlpos = rlsize = 0; 251 rlpos = rlsize = 0;
251 mapping = lcnbmp_vi->i_mapping; 252 mapping = lcnbmp_vi->i_mapping;
253 i_size = i_size_read(lcnbmp_vi);
252 while (1) { 254 while (1) {
253 ntfs_debug("Start of outer while loop: done_zones 0x%x, " 255 ntfs_debug("Start of outer while loop: done_zones 0x%x, "
254 "search_zone %i, pass %i, zone_start 0x%llx, " 256 "search_zone %i, pass %i, zone_start 0x%llx, "
@@ -263,7 +265,7 @@ runlist_element *ntfs_cluster_alloc(ntfs_volume *vol, const VCN start_vcn,
263 last_read_pos = bmp_pos >> 3; 265 last_read_pos = bmp_pos >> 3;
264 ntfs_debug("last_read_pos 0x%llx.", 266 ntfs_debug("last_read_pos 0x%llx.",
265 (unsigned long long)last_read_pos); 267 (unsigned long long)last_read_pos);
266 if (last_read_pos > lcnbmp_vi->i_size) { 268 if (last_read_pos > i_size) {
267 ntfs_debug("End of attribute reached. " 269 ntfs_debug("End of attribute reached. "
268 "Skipping to zone_pass_done."); 270 "Skipping to zone_pass_done.");
269 goto zone_pass_done; 271 goto zone_pass_done;
@@ -287,11 +289,11 @@ runlist_element *ntfs_cluster_alloc(ntfs_volume *vol, const VCN start_vcn,
287 buf_size = last_read_pos & ~PAGE_CACHE_MASK; 289 buf_size = last_read_pos & ~PAGE_CACHE_MASK;
288 buf = page_address(page) + buf_size; 290 buf = page_address(page) + buf_size;
289 buf_size = PAGE_CACHE_SIZE - buf_size; 291 buf_size = PAGE_CACHE_SIZE - buf_size;
290 if (unlikely(last_read_pos + buf_size > lcnbmp_vi->i_size)) 292 if (unlikely(last_read_pos + buf_size > i_size))
291 buf_size = lcnbmp_vi->i_size - last_read_pos; 293 buf_size = i_size - last_read_pos;
292 buf_size <<= 3; 294 buf_size <<= 3;
293 lcn = bmp_pos & 7; 295 lcn = bmp_pos & 7;
294 bmp_pos &= ~7; 296 bmp_pos &= ~(LCN)7;
295 ntfs_debug("Before inner while loop: buf_size %i, lcn 0x%llx, " 297 ntfs_debug("Before inner while loop: buf_size %i, lcn 0x%llx, "
296 "bmp_pos 0x%llx, need_writeback %i.", buf_size, 298 "bmp_pos 0x%llx, need_writeback %i.", buf_size,
297 (unsigned long long)lcn, 299 (unsigned long long)lcn,
@@ -309,7 +311,7 @@ runlist_element *ntfs_cluster_alloc(ntfs_volume *vol, const VCN start_vcn,
309 (unsigned int)*byte); 311 (unsigned int)*byte);
310 /* Skip full bytes. */ 312 /* Skip full bytes. */
311 if (*byte == 0xff) { 313 if (*byte == 0xff) {
312 lcn = (lcn + 8) & ~7; 314 lcn = (lcn + 8) & ~(LCN)7;
313 ntfs_debug("Continuing while loop 1."); 315 ntfs_debug("Continuing while loop 1.");
314 continue; 316 continue;
315 } 317 }
@@ -691,7 +693,7 @@ switch_to_data1_zone: search_zone = 2;
691 if (zone == MFT_ZONE || mft_zone_size <= 0) { 693 if (zone == MFT_ZONE || mft_zone_size <= 0) {
692 ntfs_debug("No free clusters left, going to out."); 694 ntfs_debug("No free clusters left, going to out.");
693 /* Really no more space left on device. */ 695 /* Really no more space left on device. */
694 err = ENOSPC; 696 err = -ENOSPC;
695 goto out; 697 goto out;
696 } /* zone == DATA_ZONE && mft_zone_size > 0 */ 698 } /* zone == DATA_ZONE && mft_zone_size > 0 */
697 ntfs_debug("Shrinking mft zone."); 699 ntfs_debug("Shrinking mft zone.");
@@ -755,13 +757,13 @@ out:
755 if (rl) { 757 if (rl) {
756 int err2; 758 int err2;
757 759
758 if (err == ENOSPC) 760 if (err == -ENOSPC)
759 ntfs_debug("Not enough space to complete allocation, " 761 ntfs_debug("Not enough space to complete allocation, "
760 "err ENOSPC, first free lcn 0x%llx, " 762 "err -ENOSPC, first free lcn 0x%llx, "
761 "could allocate up to 0x%llx " 763 "could allocate up to 0x%llx "
762 "clusters.", 764 "clusters.",
763 (unsigned long long)rl[0].lcn, 765 (unsigned long long)rl[0].lcn,
764 (unsigned long long)count - clusters); 766 (unsigned long long)(count - clusters));
765 /* Deallocate all allocated clusters. */ 767 /* Deallocate all allocated clusters. */
766 ntfs_debug("Attempting rollback..."); 768 ntfs_debug("Attempting rollback...");
767 err2 = ntfs_cluster_free_from_rl_nolock(vol, rl); 769 err2 = ntfs_cluster_free_from_rl_nolock(vol, rl);
@@ -773,10 +775,10 @@ out:
773 } 775 }
774 /* Free the runlist. */ 776 /* Free the runlist. */
775 ntfs_free(rl); 777 ntfs_free(rl);
776 } else if (err == ENOSPC) 778 } else if (err == -ENOSPC)
777 ntfs_debug("No space left at all, err = ENOSPC, " 779 ntfs_debug("No space left at all, err = -ENOSPC, first free "
778 "first free lcn = 0x%llx.", 780 "lcn = 0x%llx.",
779 (unsigned long long)vol->data1_zone_pos); 781 (long long)vol->data1_zone_pos);
780 up_write(&vol->lcnbmp_lock); 782 up_write(&vol->lcnbmp_lock);
781 return ERR_PTR(err); 783 return ERR_PTR(err);
782} 784}
@@ -846,8 +848,8 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
846 848
847 total_freed = real_freed = 0; 849 total_freed = real_freed = 0;
848 850
849 /* This returns with ni->runlist locked for reading on success. */ 851 down_read(&ni->runlist.lock);
850 rl = ntfs_find_vcn(ni, start_vcn, FALSE); 852 rl = ntfs_attr_find_vcn_nolock(ni, start_vcn, FALSE);
851 if (IS_ERR(rl)) { 853 if (IS_ERR(rl)) {
852 if (!is_rollback) 854 if (!is_rollback)
853 ntfs_error(vol->sb, "Failed to find first runlist " 855 ntfs_error(vol->sb, "Failed to find first runlist "
@@ -861,7 +863,7 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
861 ntfs_error(vol->sb, "First runlist element has " 863 ntfs_error(vol->sb, "First runlist element has "
862 "invalid lcn, aborting."); 864 "invalid lcn, aborting.");
863 err = -EIO; 865 err = -EIO;
864 goto unl_err_out; 866 goto err_out;
865 } 867 }
866 /* Find the starting cluster inside the run that needs freeing. */ 868 /* Find the starting cluster inside the run that needs freeing. */
867 delta = start_vcn - rl->vcn; 869 delta = start_vcn - rl->vcn;
@@ -879,7 +881,7 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
879 if (!is_rollback) 881 if (!is_rollback)
880 ntfs_error(vol->sb, "Failed to clear first run " 882 ntfs_error(vol->sb, "Failed to clear first run "
881 "(error %i), aborting.", err); 883 "(error %i), aborting.", err);
882 goto unl_err_out; 884 goto err_out;
883 } 885 }
884 /* We have freed @to_free real clusters. */ 886 /* We have freed @to_free real clusters. */
885 real_freed = to_free; 887 real_freed = to_free;
@@ -899,30 +901,15 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
899 if (unlikely(rl->lcn < LCN_HOLE)) { 901 if (unlikely(rl->lcn < LCN_HOLE)) {
900 VCN vcn; 902 VCN vcn;
901 903
902 /* 904 /* Attempt to map runlist. */
903 * Attempt to map runlist, dropping runlist lock for
904 * the duration.
905 */
906 vcn = rl->vcn; 905 vcn = rl->vcn;
907 up_read(&ni->runlist.lock); 906 rl = ntfs_attr_find_vcn_nolock(ni, vcn, FALSE);
908 err = ntfs_map_runlist(ni, vcn);
909 if (err) {
910 if (!is_rollback)
911 ntfs_error(vol->sb, "Failed to map "
912 "runlist fragment.");
913 if (err == -EINVAL || err == -ENOENT)
914 err = -EIO;
915 goto err_out;
916 }
917 /*
918 * This returns with ni->runlist locked for reading on
919 * success.
920 */
921 rl = ntfs_find_vcn(ni, vcn, FALSE);
922 if (IS_ERR(rl)) { 907 if (IS_ERR(rl)) {
923 err = PTR_ERR(rl); 908 err = PTR_ERR(rl);
924 if (!is_rollback) 909 if (!is_rollback)
925 ntfs_error(vol->sb, "Failed to find " 910 ntfs_error(vol->sb, "Failed to map "
911 "runlist fragment or "
912 "failed to find "
926 "subsequent runlist " 913 "subsequent runlist "
927 "element."); 914 "element.");
928 goto err_out; 915 goto err_out;
@@ -935,7 +922,7 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
935 (unsigned long long) 922 (unsigned long long)
936 rl->lcn); 923 rl->lcn);
937 err = -EIO; 924 err = -EIO;
938 goto unl_err_out; 925 goto err_out;
939 } 926 }
940 } 927 }
941 /* The number of clusters in this run that need freeing. */ 928 /* The number of clusters in this run that need freeing. */
@@ -951,7 +938,7 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
951 if (!is_rollback) 938 if (!is_rollback)
952 ntfs_error(vol->sb, "Failed to clear " 939 ntfs_error(vol->sb, "Failed to clear "
953 "subsequent run."); 940 "subsequent run.");
954 goto unl_err_out; 941 goto err_out;
955 } 942 }
956 /* We have freed @to_free real clusters. */ 943 /* We have freed @to_free real clusters. */
957 real_freed += to_free; 944 real_freed += to_free;
@@ -972,9 +959,8 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
972 /* We are done. Return the number of actually freed clusters. */ 959 /* We are done. Return the number of actually freed clusters. */
973 ntfs_debug("Done."); 960 ntfs_debug("Done.");
974 return real_freed; 961 return real_freed;
975unl_err_out:
976 up_read(&ni->runlist.lock);
977err_out: 962err_out:
963 up_read(&ni->runlist.lock);
978 if (is_rollback) 964 if (is_rollback)
979 return err; 965 return err;
980 /* If no real clusters were freed, no need to rollback. */ 966 /* If no real clusters were freed, no need to rollback. */
diff --git a/fs/ntfs/logfile.c b/fs/ntfs/logfile.c
index 5e280abafab3..8edb8e20fb08 100644
--- a/fs/ntfs/logfile.c
+++ b/fs/ntfs/logfile.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * logfile.c - NTFS kernel journal handling. Part of the Linux-NTFS project. 2 * logfile.c - NTFS kernel journal handling. Part of the Linux-NTFS project.
3 * 3 *
4 * Copyright (c) 2002-2004 Anton Altaparmakov 4 * Copyright (c) 2002-2005 Anton Altaparmakov
5 * 5 *
6 * This program/include file is free software; you can redistribute it and/or 6 * This program/include file is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as published 7 * modify it under the terms of the GNU General Public License as published
@@ -410,7 +410,7 @@ err_out:
410} 410}
411 411
412/** 412/**
413 * ntfs_ckeck_logfile - check in the journal if the volume is consistent 413 * ntfs_check_logfile - check the journal for consistency
414 * @log_vi: struct inode of loaded journal $LogFile to check 414 * @log_vi: struct inode of loaded journal $LogFile to check
415 * 415 *
416 * Check the $LogFile journal for consistency and return TRUE if it is 416 * Check the $LogFile journal for consistency and return TRUE if it is
@@ -443,7 +443,7 @@ BOOL ntfs_check_logfile(struct inode *log_vi)
443 /* An empty $LogFile must have been clean before it got emptied. */ 443 /* An empty $LogFile must have been clean before it got emptied. */
444 if (NVolLogFileEmpty(vol)) 444 if (NVolLogFileEmpty(vol))
445 goto is_empty; 445 goto is_empty;
446 size = log_vi->i_size; 446 size = i_size_read(log_vi);
447 /* Make sure the file doesn't exceed the maximum allowed size. */ 447 /* Make sure the file doesn't exceed the maximum allowed size. */
448 if (size > MaxLogFileSize) 448 if (size > MaxLogFileSize)
449 size = MaxLogFileSize; 449 size = MaxLogFileSize;
@@ -464,7 +464,7 @@ BOOL ntfs_check_logfile(struct inode *log_vi)
464 * optimize log_page_size and log_page_bits into constants. 464 * optimize log_page_size and log_page_bits into constants.
465 */ 465 */
466 log_page_bits = generic_ffs(log_page_size) - 1; 466 log_page_bits = generic_ffs(log_page_size) - 1;
467 size &= ~(log_page_size - 1); 467 size &= ~(s64)(log_page_size - 1);
468 /* 468 /*
469 * Ensure the log file is big enough to store at least the two restart 469 * Ensure the log file is big enough to store at least the two restart
470 * pages and the minimum number of log record pages. 470 * pages and the minimum number of log record pages.
@@ -689,7 +689,8 @@ BOOL ntfs_empty_logfile(struct inode *log_vi)
689 if (!NVolLogFileEmpty(vol)) { 689 if (!NVolLogFileEmpty(vol)) {
690 int err; 690 int err;
691 691
692 err = ntfs_attr_set(NTFS_I(log_vi), 0, log_vi->i_size, 0xff); 692 err = ntfs_attr_set(NTFS_I(log_vi), 0, i_size_read(log_vi),
693 0xff);
693 if (unlikely(err)) { 694 if (unlikely(err)) {
694 ntfs_error(vol->sb, "Failed to fill $LogFile with " 695 ntfs_error(vol->sb, "Failed to fill $LogFile with "
695 "0xff bytes (error code %i).", err); 696 "0xff bytes (error code %i).", err);
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index dfa85ac2f8ba..317f7c679fd3 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -1,7 +1,7 @@
1/** 1/**
2 * mft.c - NTFS kernel mft record operations. Part of the Linux-NTFS project. 2 * mft.c - NTFS kernel mft record operations. Part of the Linux-NTFS project.
3 * 3 *
4 * Copyright (c) 2001-2004 Anton Altaparmakov 4 * Copyright (c) 2001-2005 Anton Altaparmakov
5 * Copyright (c) 2002 Richard Russon 5 * Copyright (c) 2002 Richard Russon
6 * 6 *
7 * This program/include file is free software; you can redistribute it and/or 7 * This program/include file is free software; you can redistribute it and/or
@@ -45,6 +45,7 @@
45 */ 45 */
46static inline MFT_RECORD *map_mft_record_page(ntfs_inode *ni) 46static inline MFT_RECORD *map_mft_record_page(ntfs_inode *ni)
47{ 47{
48 loff_t i_size;
48 ntfs_volume *vol = ni->vol; 49 ntfs_volume *vol = ni->vol;
49 struct inode *mft_vi = vol->mft_ino; 50 struct inode *mft_vi = vol->mft_ino;
50 struct page *page; 51 struct page *page;
@@ -60,13 +61,14 @@ static inline MFT_RECORD *map_mft_record_page(ntfs_inode *ni)
60 index = ni->mft_no << vol->mft_record_size_bits >> PAGE_CACHE_SHIFT; 61 index = ni->mft_no << vol->mft_record_size_bits >> PAGE_CACHE_SHIFT;
61 ofs = (ni->mft_no << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK; 62 ofs = (ni->mft_no << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK;
62 63
64 i_size = i_size_read(mft_vi);
63 /* The maximum valid index into the page cache for $MFT's data. */ 65 /* The maximum valid index into the page cache for $MFT's data. */
64 end_index = mft_vi->i_size >> PAGE_CACHE_SHIFT; 66 end_index = i_size >> PAGE_CACHE_SHIFT;
65 67
66 /* If the wanted index is out of bounds the mft record doesn't exist. */ 68 /* If the wanted index is out of bounds the mft record doesn't exist. */
67 if (unlikely(index >= end_index)) { 69 if (unlikely(index >= end_index)) {
68 if (index > end_index || (mft_vi->i_size & ~PAGE_CACHE_MASK) < 70 if (index > end_index || (i_size & ~PAGE_CACHE_MASK) < ofs +
69 ofs + vol->mft_record_size) { 71 vol->mft_record_size) {
70 page = ERR_PTR(-ENOENT); 72 page = ERR_PTR(-ENOENT);
71 ntfs_error(vol->sb, "Attemt to read mft record 0x%lx, " 73 ntfs_error(vol->sb, "Attemt to read mft record 0x%lx, "
72 "which is beyond the end of the mft. " 74 "which is beyond the end of the mft. "
@@ -285,7 +287,7 @@ MFT_RECORD *map_extent_mft_record(ntfs_inode *base_ni, MFT_REF mref,
285 } 287 }
286 unmap_mft_record(ni); 288 unmap_mft_record(ni);
287 ntfs_error(base_ni->vol->sb, "Found stale extent mft " 289 ntfs_error(base_ni->vol->sb, "Found stale extent mft "
288 "reference! Corrupt file system. " 290 "reference! Corrupt filesystem. "
289 "Run chkdsk."); 291 "Run chkdsk.");
290 return ERR_PTR(-EIO); 292 return ERR_PTR(-EIO);
291 } 293 }
@@ -316,7 +318,7 @@ map_err_out:
316 /* Verify the sequence number if it is present. */ 318 /* Verify the sequence number if it is present. */
317 if (seq_no && (le16_to_cpu(m->sequence_number) != seq_no)) { 319 if (seq_no && (le16_to_cpu(m->sequence_number) != seq_no)) {
318 ntfs_error(base_ni->vol->sb, "Found stale extent mft " 320 ntfs_error(base_ni->vol->sb, "Found stale extent mft "
319 "reference! Corrupt file system. Run chkdsk."); 321 "reference! Corrupt filesystem. Run chkdsk.");
320 destroy_ni = TRUE; 322 destroy_ni = TRUE;
321 m = ERR_PTR(-EIO); 323 m = ERR_PTR(-EIO);
322 goto unm_err_out; 324 goto unm_err_out;
@@ -531,6 +533,7 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no,
531 LCN lcn; 533 LCN lcn;
532 unsigned int vcn_ofs; 534 unsigned int vcn_ofs;
533 535
536 bh->b_bdev = vol->sb->s_bdev;
534 /* Obtain the vcn and offset of the current block. */ 537 /* Obtain the vcn and offset of the current block. */
535 vcn = ((VCN)mft_no << vol->mft_record_size_bits) + 538 vcn = ((VCN)mft_no << vol->mft_record_size_bits) +
536 (block_start - m_start); 539 (block_start - m_start);
@@ -723,6 +726,7 @@ int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync)
723 LCN lcn; 726 LCN lcn;
724 unsigned int vcn_ofs; 727 unsigned int vcn_ofs;
725 728
729 bh->b_bdev = vol->sb->s_bdev;
726 /* Obtain the vcn and offset of the current block. */ 730 /* Obtain the vcn and offset of the current block. */
727 vcn = ((VCN)ni->mft_no << vol->mft_record_size_bits) + 731 vcn = ((VCN)ni->mft_no << vol->mft_record_size_bits) +
728 (block_start - m_start); 732 (block_start - m_start);
@@ -946,20 +950,23 @@ BOOL ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no,
946 na.name_len = 0; 950 na.name_len = 0;
947 na.type = AT_UNUSED; 951 na.type = AT_UNUSED;
948 /* 952 /*
949 * For inode 0, i.e. $MFT itself, we cannot use ilookup5() from here or 953 * Optimize inode 0, i.e. $MFT itself, since we have it in memory and
950 * we deadlock because the inode is already locked by the kernel 954 * we get here for it rather often.
951 * (fs/fs-writeback.c::__sync_single_inode()) and ilookup5() waits
952 * until the inode is unlocked before returning it and it never gets
953 * unlocked because ntfs_should_write_mft_record() never returns. )-:
954 * Fortunately, we have inode 0 pinned in icache for the duration of
955 * the mount so we can access it directly.
956 */ 955 */
957 if (!mft_no) { 956 if (!mft_no) {
958 /* Balance the below iput(). */ 957 /* Balance the below iput(). */
959 vi = igrab(mft_vi); 958 vi = igrab(mft_vi);
960 BUG_ON(vi != mft_vi); 959 BUG_ON(vi != mft_vi);
961 } else 960 } else {
962 vi = ilookup5(sb, mft_no, (test_t)ntfs_test_inode, &na); 961 /*
962 * Have to use ilookup5_nowait() since ilookup5() waits for the
963 * inode lock which causes ntfs to deadlock when a concurrent
964 * inode write via the inode dirty code paths and the page
965 * dirty code path of the inode dirty code path when writing
966 * $MFT occurs.
967 */
968 vi = ilookup5_nowait(sb, mft_no, (test_t)ntfs_test_inode, &na);
969 }
963 if (vi) { 970 if (vi) {
964 ntfs_debug("Base inode 0x%lx is in icache.", mft_no); 971 ntfs_debug("Base inode 0x%lx is in icache.", mft_no);
965 /* The inode is in icache. */ 972 /* The inode is in icache. */
@@ -1014,7 +1021,13 @@ BOOL ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no,
1014 na.mft_no = MREF_LE(m->base_mft_record); 1021 na.mft_no = MREF_LE(m->base_mft_record);
1015 ntfs_debug("Mft record 0x%lx is an extent record. Looking for base " 1022 ntfs_debug("Mft record 0x%lx is an extent record. Looking for base "
1016 "inode 0x%lx in icache.", mft_no, na.mft_no); 1023 "inode 0x%lx in icache.", mft_no, na.mft_no);
1017 vi = ilookup5(sb, na.mft_no, (test_t)ntfs_test_inode, &na); 1024 if (!na.mft_no) {
1025 /* Balance the below iput(). */
1026 vi = igrab(mft_vi);
1027 BUG_ON(vi != mft_vi);
1028 } else
1029 vi = ilookup5_nowait(sb, na.mft_no, (test_t)ntfs_test_inode,
1030 &na);
1018 if (!vi) { 1031 if (!vi) {
1019 /* 1032 /*
1020 * The base inode is not in icache, write this extent mft 1033 * The base inode is not in icache, write this extent mft
@@ -1121,6 +1134,7 @@ static int ntfs_mft_bitmap_find_and_alloc_free_rec_nolock(ntfs_volume *vol,
1121 ntfs_inode *base_ni) 1134 ntfs_inode *base_ni)
1122{ 1135{
1123 s64 pass_end, ll, data_pos, pass_start, ofs, bit; 1136 s64 pass_end, ll, data_pos, pass_start, ofs, bit;
1137 unsigned long flags;
1124 struct address_space *mftbmp_mapping; 1138 struct address_space *mftbmp_mapping;
1125 u8 *buf, *byte; 1139 u8 *buf, *byte;
1126 struct page *page; 1140 struct page *page;
@@ -1134,9 +1148,13 @@ static int ntfs_mft_bitmap_find_and_alloc_free_rec_nolock(ntfs_volume *vol,
1134 * Set the end of the pass making sure we do not overflow the mft 1148 * Set the end of the pass making sure we do not overflow the mft
1135 * bitmap. 1149 * bitmap.
1136 */ 1150 */
1151 read_lock_irqsave(&NTFS_I(vol->mft_ino)->size_lock, flags);
1137 pass_end = NTFS_I(vol->mft_ino)->allocated_size >> 1152 pass_end = NTFS_I(vol->mft_ino)->allocated_size >>
1138 vol->mft_record_size_bits; 1153 vol->mft_record_size_bits;
1154 read_unlock_irqrestore(&NTFS_I(vol->mft_ino)->size_lock, flags);
1155 read_lock_irqsave(&NTFS_I(vol->mftbmp_ino)->size_lock, flags);
1139 ll = NTFS_I(vol->mftbmp_ino)->initialized_size << 3; 1156 ll = NTFS_I(vol->mftbmp_ino)->initialized_size << 3;
1157 read_unlock_irqrestore(&NTFS_I(vol->mftbmp_ino)->size_lock, flags);
1140 if (pass_end > ll) 1158 if (pass_end > ll)
1141 pass_end = ll; 1159 pass_end = ll;
1142 pass = 1; 1160 pass = 1;
@@ -1263,6 +1281,7 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol)
1263{ 1281{
1264 LCN lcn; 1282 LCN lcn;
1265 s64 ll; 1283 s64 ll;
1284 unsigned long flags;
1266 struct page *page; 1285 struct page *page;
1267 ntfs_inode *mft_ni, *mftbmp_ni; 1286 ntfs_inode *mft_ni, *mftbmp_ni;
1268 runlist_element *rl, *rl2 = NULL; 1287 runlist_element *rl, *rl2 = NULL;
@@ -1284,17 +1303,20 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol)
1284 /* 1303 /*
1285 * Determine the last lcn of the mft bitmap. The allocated size of the 1304 * Determine the last lcn of the mft bitmap. The allocated size of the
1286 * mft bitmap cannot be zero so we are ok to do this. 1305 * mft bitmap cannot be zero so we are ok to do this.
1287 * ntfs_find_vcn() returns the runlist locked on success.
1288 */ 1306 */
1289 rl = ntfs_find_vcn(mftbmp_ni, (mftbmp_ni->allocated_size - 1) >> 1307 down_write(&mftbmp_ni->runlist.lock);
1290 vol->cluster_size_bits, TRUE); 1308 read_lock_irqsave(&mftbmp_ni->size_lock, flags);
1309 ll = mftbmp_ni->allocated_size;
1310 read_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
1311 rl = ntfs_attr_find_vcn_nolock(mftbmp_ni,
1312 (ll - 1) >> vol->cluster_size_bits, TRUE);
1291 if (unlikely(IS_ERR(rl) || !rl->length || rl->lcn < 0)) { 1313 if (unlikely(IS_ERR(rl) || !rl->length || rl->lcn < 0)) {
1314 up_write(&mftbmp_ni->runlist.lock);
1292 ntfs_error(vol->sb, "Failed to determine last allocated " 1315 ntfs_error(vol->sb, "Failed to determine last allocated "
1293 "cluster of mft bitmap attribute."); 1316 "cluster of mft bitmap attribute.");
1294 if (!IS_ERR(rl)) { 1317 if (!IS_ERR(rl))
1295 up_write(&mftbmp_ni->runlist.lock);
1296 ret = -EIO; 1318 ret = -EIO;
1297 } else 1319 else
1298 ret = PTR_ERR(rl); 1320 ret = PTR_ERR(rl);
1299 return ret; 1321 return ret;
1300 } 1322 }
@@ -1396,7 +1418,7 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol)
1396 BUG_ON(ll < rl2->vcn); 1418 BUG_ON(ll < rl2->vcn);
1397 BUG_ON(ll >= rl2->vcn + rl2->length); 1419 BUG_ON(ll >= rl2->vcn + rl2->length);
1398 /* Get the size for the new mapping pairs array for this extent. */ 1420 /* Get the size for the new mapping pairs array for this extent. */
1399 mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, ll); 1421 mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, ll, -1);
1400 if (unlikely(mp_size <= 0)) { 1422 if (unlikely(mp_size <= 0)) {
1401 ntfs_error(vol->sb, "Get size for mapping pairs failed for " 1423 ntfs_error(vol->sb, "Get size for mapping pairs failed for "
1402 "mft bitmap attribute extent."); 1424 "mft bitmap attribute extent.");
@@ -1418,6 +1440,8 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol)
1418 // TODO: Deal with this by moving this extent to a new mft 1440 // TODO: Deal with this by moving this extent to a new mft
1419 // record or by starting a new extent in a new mft record or by 1441 // record or by starting a new extent in a new mft record or by
1420 // moving other attributes out of this mft record. 1442 // moving other attributes out of this mft record.
1443 // Note: It will need to be a special mft record and if none of
1444 // those are available it gets rather complicated...
1421 ntfs_error(vol->sb, "Not enough space in this mft record to " 1445 ntfs_error(vol->sb, "Not enough space in this mft record to "
1422 "accomodate extended mft bitmap attribute " 1446 "accomodate extended mft bitmap attribute "
1423 "extent. Cannot handle this yet."); 1447 "extent. Cannot handle this yet.");
@@ -1428,7 +1452,7 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol)
1428 /* Generate the mapping pairs array directly into the attr record. */ 1452 /* Generate the mapping pairs array directly into the attr record. */
1429 ret = ntfs_mapping_pairs_build(vol, (u8*)a + 1453 ret = ntfs_mapping_pairs_build(vol, (u8*)a +
1430 le16_to_cpu(a->data.non_resident.mapping_pairs_offset), 1454 le16_to_cpu(a->data.non_resident.mapping_pairs_offset),
1431 mp_size, rl2, ll, NULL); 1455 mp_size, rl2, ll, -1, NULL);
1432 if (unlikely(ret)) { 1456 if (unlikely(ret)) {
1433 ntfs_error(vol->sb, "Failed to build mapping pairs array for " 1457 ntfs_error(vol->sb, "Failed to build mapping pairs array for "
1434 "mft bitmap attribute."); 1458 "mft bitmap attribute.");
@@ -1458,9 +1482,11 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol)
1458 } 1482 }
1459 a = ctx->attr; 1483 a = ctx->attr;
1460 } 1484 }
1485 write_lock_irqsave(&mftbmp_ni->size_lock, flags);
1461 mftbmp_ni->allocated_size += vol->cluster_size; 1486 mftbmp_ni->allocated_size += vol->cluster_size;
1462 a->data.non_resident.allocated_size = 1487 a->data.non_resident.allocated_size =
1463 cpu_to_sle64(mftbmp_ni->allocated_size); 1488 cpu_to_sle64(mftbmp_ni->allocated_size);
1489 write_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
1464 /* Ensure the changes make it to disk. */ 1490 /* Ensure the changes make it to disk. */
1465 flush_dcache_mft_record_page(ctx->ntfs_ino); 1491 flush_dcache_mft_record_page(ctx->ntfs_ino);
1466 mark_mft_record_dirty(ctx->ntfs_ino); 1492 mark_mft_record_dirty(ctx->ntfs_ino);
@@ -1476,7 +1502,9 @@ restore_undo_alloc:
1476 0, ctx)) { 1502 0, ctx)) {
1477 ntfs_error(vol->sb, "Failed to find last attribute extent of " 1503 ntfs_error(vol->sb, "Failed to find last attribute extent of "
1478 "mft bitmap attribute.%s", es); 1504 "mft bitmap attribute.%s", es);
1505 write_lock_irqsave(&mftbmp_ni->size_lock, flags);
1479 mftbmp_ni->allocated_size += vol->cluster_size; 1506 mftbmp_ni->allocated_size += vol->cluster_size;
1507 write_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
1480 ntfs_attr_put_search_ctx(ctx); 1508 ntfs_attr_put_search_ctx(ctx);
1481 unmap_mft_record(mft_ni); 1509 unmap_mft_record(mft_ni);
1482 up_write(&mftbmp_ni->runlist.lock); 1510 up_write(&mftbmp_ni->runlist.lock);
@@ -1512,7 +1540,7 @@ undo_alloc:
1512 a->data.non_resident.mapping_pairs_offset), 1540 a->data.non_resident.mapping_pairs_offset),
1513 old_alen - le16_to_cpu( 1541 old_alen - le16_to_cpu(
1514 a->data.non_resident.mapping_pairs_offset), 1542 a->data.non_resident.mapping_pairs_offset),
1515 rl2, ll, NULL)) { 1543 rl2, ll, -1, NULL)) {
1516 ntfs_error(vol->sb, "Failed to restore mapping pairs " 1544 ntfs_error(vol->sb, "Failed to restore mapping pairs "
1517 "array.%s", es); 1545 "array.%s", es);
1518 NVolSetErrors(vol); 1546 NVolSetErrors(vol);
@@ -1550,6 +1578,7 @@ undo_alloc:
1550static int ntfs_mft_bitmap_extend_initialized_nolock(ntfs_volume *vol) 1578static int ntfs_mft_bitmap_extend_initialized_nolock(ntfs_volume *vol)
1551{ 1579{
1552 s64 old_data_size, old_initialized_size; 1580 s64 old_data_size, old_initialized_size;
1581 unsigned long flags;
1553 struct inode *mftbmp_vi; 1582 struct inode *mftbmp_vi;
1554 ntfs_inode *mft_ni, *mftbmp_ni; 1583 ntfs_inode *mft_ni, *mftbmp_ni;
1555 ntfs_attr_search_ctx *ctx; 1584 ntfs_attr_search_ctx *ctx;
@@ -1583,7 +1612,8 @@ static int ntfs_mft_bitmap_extend_initialized_nolock(ntfs_volume *vol)
1583 goto put_err_out; 1612 goto put_err_out;
1584 } 1613 }
1585 a = ctx->attr; 1614 a = ctx->attr;
1586 old_data_size = mftbmp_vi->i_size; 1615 write_lock_irqsave(&mftbmp_ni->size_lock, flags);
1616 old_data_size = i_size_read(mftbmp_vi);
1587 old_initialized_size = mftbmp_ni->initialized_size; 1617 old_initialized_size = mftbmp_ni->initialized_size;
1588 /* 1618 /*
1589 * We can simply update the initialized_size before filling the space 1619 * We can simply update the initialized_size before filling the space
@@ -1593,11 +1623,12 @@ static int ntfs_mft_bitmap_extend_initialized_nolock(ntfs_volume *vol)
1593 mftbmp_ni->initialized_size += 8; 1623 mftbmp_ni->initialized_size += 8;
1594 a->data.non_resident.initialized_size = 1624 a->data.non_resident.initialized_size =
1595 cpu_to_sle64(mftbmp_ni->initialized_size); 1625 cpu_to_sle64(mftbmp_ni->initialized_size);
1596 if (mftbmp_ni->initialized_size > mftbmp_vi->i_size) { 1626 if (mftbmp_ni->initialized_size > old_data_size) {
1597 mftbmp_vi->i_size = mftbmp_ni->initialized_size; 1627 i_size_write(mftbmp_vi, mftbmp_ni->initialized_size);
1598 a->data.non_resident.data_size = 1628 a->data.non_resident.data_size =
1599 cpu_to_sle64(mftbmp_vi->i_size); 1629 cpu_to_sle64(mftbmp_ni->initialized_size);
1600 } 1630 }
1631 write_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
1601 /* Ensure the changes make it to disk. */ 1632 /* Ensure the changes make it to disk. */
1602 flush_dcache_mft_record_page(ctx->ntfs_ino); 1633 flush_dcache_mft_record_page(ctx->ntfs_ino);
1603 mark_mft_record_dirty(ctx->ntfs_ino); 1634 mark_mft_record_dirty(ctx->ntfs_ino);
@@ -1636,22 +1667,28 @@ unm_err_out:
1636 goto err_out; 1667 goto err_out;
1637 } 1668 }
1638 a = ctx->attr; 1669 a = ctx->attr;
1670 write_lock_irqsave(&mftbmp_ni->size_lock, flags);
1639 mftbmp_ni->initialized_size = old_initialized_size; 1671 mftbmp_ni->initialized_size = old_initialized_size;
1640 a->data.non_resident.initialized_size = 1672 a->data.non_resident.initialized_size =
1641 cpu_to_sle64(old_initialized_size); 1673 cpu_to_sle64(old_initialized_size);
1642 if (mftbmp_vi->i_size != old_data_size) { 1674 if (i_size_read(mftbmp_vi) != old_data_size) {
1643 mftbmp_vi->i_size = old_data_size; 1675 i_size_write(mftbmp_vi, old_data_size);
1644 a->data.non_resident.data_size = cpu_to_sle64(old_data_size); 1676 a->data.non_resident.data_size = cpu_to_sle64(old_data_size);
1645 } 1677 }
1678 write_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
1646 flush_dcache_mft_record_page(ctx->ntfs_ino); 1679 flush_dcache_mft_record_page(ctx->ntfs_ino);
1647 mark_mft_record_dirty(ctx->ntfs_ino); 1680 mark_mft_record_dirty(ctx->ntfs_ino);
1648 ntfs_attr_put_search_ctx(ctx); 1681 ntfs_attr_put_search_ctx(ctx);
1649 unmap_mft_record(mft_ni); 1682 unmap_mft_record(mft_ni);
1683#ifdef DEBUG
1684 read_lock_irqsave(&mftbmp_ni->size_lock, flags);
1650 ntfs_debug("Restored status of mftbmp: allocated_size 0x%llx, " 1685 ntfs_debug("Restored status of mftbmp: allocated_size 0x%llx, "
1651 "data_size 0x%llx, initialized_size 0x%llx.", 1686 "data_size 0x%llx, initialized_size 0x%llx.",
1652 (long long)mftbmp_ni->allocated_size, 1687 (long long)mftbmp_ni->allocated_size,
1653 (long long)mftbmp_vi->i_size, 1688 (long long)i_size_read(mftbmp_vi),
1654 (long long)mftbmp_ni->initialized_size); 1689 (long long)mftbmp_ni->initialized_size);
1690 read_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
1691#endif /* DEBUG */
1655err_out: 1692err_out:
1656 return ret; 1693 return ret;
1657} 1694}
@@ -1679,7 +1716,8 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
1679{ 1716{
1680 LCN lcn; 1717 LCN lcn;
1681 VCN old_last_vcn; 1718 VCN old_last_vcn;
1682 s64 min_nr, nr, ll = 0; 1719 s64 min_nr, nr, ll;
1720 unsigned long flags;
1683 ntfs_inode *mft_ni; 1721 ntfs_inode *mft_ni;
1684 runlist_element *rl, *rl2; 1722 runlist_element *rl, *rl2;
1685 ntfs_attr_search_ctx *ctx = NULL; 1723 ntfs_attr_search_ctx *ctx = NULL;
@@ -1695,23 +1733,25 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
1695 * Determine the preferred allocation location, i.e. the last lcn of 1733 * Determine the preferred allocation location, i.e. the last lcn of
1696 * the mft data attribute. The allocated size of the mft data 1734 * the mft data attribute. The allocated size of the mft data
1697 * attribute cannot be zero so we are ok to do this. 1735 * attribute cannot be zero so we are ok to do this.
1698 * ntfs_find_vcn() returns the runlist locked on success.
1699 */ 1736 */
1700 rl = ntfs_find_vcn(mft_ni, (mft_ni->allocated_size - 1) >> 1737 down_write(&mft_ni->runlist.lock);
1701 vol->cluster_size_bits, TRUE); 1738 read_lock_irqsave(&mft_ni->size_lock, flags);
1739 ll = mft_ni->allocated_size;
1740 read_unlock_irqrestore(&mft_ni->size_lock, flags);
1741 rl = ntfs_attr_find_vcn_nolock(mft_ni,
1742 (ll - 1) >> vol->cluster_size_bits, TRUE);
1702 if (unlikely(IS_ERR(rl) || !rl->length || rl->lcn < 0)) { 1743 if (unlikely(IS_ERR(rl) || !rl->length || rl->lcn < 0)) {
1744 up_write(&mft_ni->runlist.lock);
1703 ntfs_error(vol->sb, "Failed to determine last allocated " 1745 ntfs_error(vol->sb, "Failed to determine last allocated "
1704 "cluster of mft data attribute."); 1746 "cluster of mft data attribute.");
1705 if (!IS_ERR(rl)) { 1747 if (!IS_ERR(rl))
1706 up_write(&mft_ni->runlist.lock);
1707 ret = -EIO; 1748 ret = -EIO;
1708 } else 1749 else
1709 ret = PTR_ERR(rl); 1750 ret = PTR_ERR(rl);
1710 return ret; 1751 return ret;
1711 } 1752 }
1712 lcn = rl->lcn + rl->length; 1753 lcn = rl->lcn + rl->length;
1713 ntfs_debug("Last lcn of mft data attribute is 0x%llx.", 1754 ntfs_debug("Last lcn of mft data attribute is 0x%llx.", (long long)lcn);
1714 (long long)lcn);
1715 /* Minimum allocation is one mft record worth of clusters. */ 1755 /* Minimum allocation is one mft record worth of clusters. */
1716 min_nr = vol->mft_record_size >> vol->cluster_size_bits; 1756 min_nr = vol->mft_record_size >> vol->cluster_size_bits;
1717 if (!min_nr) 1757 if (!min_nr)
@@ -1721,12 +1761,13 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
1721 if (!nr) 1761 if (!nr)
1722 nr = min_nr; 1762 nr = min_nr;
1723 /* Ensure we do not go above 2^32-1 mft records. */ 1763 /* Ensure we do not go above 2^32-1 mft records. */
1724 if (unlikely((mft_ni->allocated_size + 1764 read_lock_irqsave(&mft_ni->size_lock, flags);
1725 (nr << vol->cluster_size_bits)) >> 1765 ll = mft_ni->allocated_size;
1766 read_unlock_irqrestore(&mft_ni->size_lock, flags);
1767 if (unlikely((ll + (nr << vol->cluster_size_bits)) >>
1726 vol->mft_record_size_bits >= (1ll << 32))) { 1768 vol->mft_record_size_bits >= (1ll << 32))) {
1727 nr = min_nr; 1769 nr = min_nr;
1728 if (unlikely((mft_ni->allocated_size + 1770 if (unlikely((ll + (nr << vol->cluster_size_bits)) >>
1729 (nr << vol->cluster_size_bits)) >>
1730 vol->mft_record_size_bits >= (1ll << 32))) { 1771 vol->mft_record_size_bits >= (1ll << 32))) {
1731 ntfs_warning(vol->sb, "Cannot allocate mft record " 1772 ntfs_warning(vol->sb, "Cannot allocate mft record "
1732 "because the maximum number of inodes " 1773 "because the maximum number of inodes "
@@ -1772,7 +1813,7 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
1772 return PTR_ERR(rl); 1813 return PTR_ERR(rl);
1773 } 1814 }
1774 mft_ni->runlist.rl = rl; 1815 mft_ni->runlist.rl = rl;
1775 ntfs_debug("Allocated %lli clusters.", nr); 1816 ntfs_debug("Allocated %lli clusters.", (long long)nr);
1776 /* Find the last run in the new runlist. */ 1817 /* Find the last run in the new runlist. */
1777 for (; rl[1].length; rl++) 1818 for (; rl[1].length; rl++)
1778 ; 1819 ;
@@ -1808,7 +1849,7 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
1808 BUG_ON(ll < rl2->vcn); 1849 BUG_ON(ll < rl2->vcn);
1809 BUG_ON(ll >= rl2->vcn + rl2->length); 1850 BUG_ON(ll >= rl2->vcn + rl2->length);
1810 /* Get the size for the new mapping pairs array for this extent. */ 1851 /* Get the size for the new mapping pairs array for this extent. */
1811 mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, ll); 1852 mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, ll, -1);
1812 if (unlikely(mp_size <= 0)) { 1853 if (unlikely(mp_size <= 0)) {
1813 ntfs_error(vol->sb, "Get size for mapping pairs failed for " 1854 ntfs_error(vol->sb, "Get size for mapping pairs failed for "
1814 "mft data attribute extent."); 1855 "mft data attribute extent.");
@@ -1832,7 +1873,11 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
1832 // moving other attributes out of this mft record. 1873 // moving other attributes out of this mft record.
1833 // Note: Use the special reserved mft records and ensure that 1874 // Note: Use the special reserved mft records and ensure that
1834 // this extent is not required to find the mft record in 1875 // this extent is not required to find the mft record in
1835 // question. 1876 // question. If no free special records left we would need to
1877 // move an existing record away, insert ours in its place, and
1878 // then place the moved record into the newly allocated space
1879 // and we would then need to update all references to this mft
1880 // record appropriately. This is rather complicated...
1836 ntfs_error(vol->sb, "Not enough space in this mft record to " 1881 ntfs_error(vol->sb, "Not enough space in this mft record to "
1837 "accomodate extended mft data attribute " 1882 "accomodate extended mft data attribute "
1838 "extent. Cannot handle this yet."); 1883 "extent. Cannot handle this yet.");
@@ -1843,7 +1888,7 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
1843 /* Generate the mapping pairs array directly into the attr record. */ 1888 /* Generate the mapping pairs array directly into the attr record. */
1844 ret = ntfs_mapping_pairs_build(vol, (u8*)a + 1889 ret = ntfs_mapping_pairs_build(vol, (u8*)a +
1845 le16_to_cpu(a->data.non_resident.mapping_pairs_offset), 1890 le16_to_cpu(a->data.non_resident.mapping_pairs_offset),
1846 mp_size, rl2, ll, NULL); 1891 mp_size, rl2, ll, -1, NULL);
1847 if (unlikely(ret)) { 1892 if (unlikely(ret)) {
1848 ntfs_error(vol->sb, "Failed to build mapping pairs array of " 1893 ntfs_error(vol->sb, "Failed to build mapping pairs array of "
1849 "mft data attribute."); 1894 "mft data attribute.");
@@ -1875,9 +1920,11 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
1875 } 1920 }
1876 a = ctx->attr; 1921 a = ctx->attr;
1877 } 1922 }
1923 write_lock_irqsave(&mft_ni->size_lock, flags);
1878 mft_ni->allocated_size += nr << vol->cluster_size_bits; 1924 mft_ni->allocated_size += nr << vol->cluster_size_bits;
1879 a->data.non_resident.allocated_size = 1925 a->data.non_resident.allocated_size =
1880 cpu_to_sle64(mft_ni->allocated_size); 1926 cpu_to_sle64(mft_ni->allocated_size);
1927 write_unlock_irqrestore(&mft_ni->size_lock, flags);
1881 /* Ensure the changes make it to disk. */ 1928 /* Ensure the changes make it to disk. */
1882 flush_dcache_mft_record_page(ctx->ntfs_ino); 1929 flush_dcache_mft_record_page(ctx->ntfs_ino);
1883 mark_mft_record_dirty(ctx->ntfs_ino); 1930 mark_mft_record_dirty(ctx->ntfs_ino);
@@ -1892,7 +1939,9 @@ restore_undo_alloc:
1892 CASE_SENSITIVE, rl[1].vcn, NULL, 0, ctx)) { 1939 CASE_SENSITIVE, rl[1].vcn, NULL, 0, ctx)) {
1893 ntfs_error(vol->sb, "Failed to find last attribute extent of " 1940 ntfs_error(vol->sb, "Failed to find last attribute extent of "
1894 "mft data attribute.%s", es); 1941 "mft data attribute.%s", es);
1942 write_lock_irqsave(&mft_ni->size_lock, flags);
1895 mft_ni->allocated_size += nr << vol->cluster_size_bits; 1943 mft_ni->allocated_size += nr << vol->cluster_size_bits;
1944 write_unlock_irqrestore(&mft_ni->size_lock, flags);
1896 ntfs_attr_put_search_ctx(ctx); 1945 ntfs_attr_put_search_ctx(ctx);
1897 unmap_mft_record(mft_ni); 1946 unmap_mft_record(mft_ni);
1898 up_write(&mft_ni->runlist.lock); 1947 up_write(&mft_ni->runlist.lock);
@@ -1921,7 +1970,7 @@ undo_alloc:
1921 a->data.non_resident.mapping_pairs_offset), 1970 a->data.non_resident.mapping_pairs_offset),
1922 old_alen - le16_to_cpu( 1971 old_alen - le16_to_cpu(
1923 a->data.non_resident.mapping_pairs_offset), 1972 a->data.non_resident.mapping_pairs_offset),
1924 rl2, ll, NULL)) { 1973 rl2, ll, -1, NULL)) {
1925 ntfs_error(vol->sb, "Failed to restore mapping pairs " 1974 ntfs_error(vol->sb, "Failed to restore mapping pairs "
1926 "array.%s", es); 1975 "array.%s", es);
1927 NVolSetErrors(vol); 1976 NVolSetErrors(vol);
@@ -1991,7 +2040,7 @@ static int ntfs_mft_record_layout(const ntfs_volume *vol, const s64 mft_no,
1991 "reports this as corruption, please email " 2040 "reports this as corruption, please email "
1992 "linux-ntfs-dev@lists.sourceforge.net stating " 2041 "linux-ntfs-dev@lists.sourceforge.net stating "
1993 "that you saw this message and that the " 2042 "that you saw this message and that the "
1994 "modified file system created was corrupt. " 2043 "modified filesystem created was corrupt. "
1995 "Thank you."); 2044 "Thank you.");
1996 } 2045 }
1997 /* Set the update sequence number to 1. */ 2046 /* Set the update sequence number to 1. */
@@ -2036,6 +2085,7 @@ static int ntfs_mft_record_layout(const ntfs_volume *vol, const s64 mft_no,
2036 */ 2085 */
2037static int ntfs_mft_record_format(const ntfs_volume *vol, const s64 mft_no) 2086static int ntfs_mft_record_format(const ntfs_volume *vol, const s64 mft_no)
2038{ 2087{
2088 loff_t i_size;
2039 struct inode *mft_vi = vol->mft_ino; 2089 struct inode *mft_vi = vol->mft_ino;
2040 struct page *page; 2090 struct page *page;
2041 MFT_RECORD *m; 2091 MFT_RECORD *m;
@@ -2051,10 +2101,11 @@ static int ntfs_mft_record_format(const ntfs_volume *vol, const s64 mft_no)
2051 index = mft_no << vol->mft_record_size_bits >> PAGE_CACHE_SHIFT; 2101 index = mft_no << vol->mft_record_size_bits >> PAGE_CACHE_SHIFT;
2052 ofs = (mft_no << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK; 2102 ofs = (mft_no << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK;
2053 /* The maximum valid index into the page cache for $MFT's data. */ 2103 /* The maximum valid index into the page cache for $MFT's data. */
2054 end_index = mft_vi->i_size >> PAGE_CACHE_SHIFT; 2104 i_size = i_size_read(mft_vi);
2105 end_index = i_size >> PAGE_CACHE_SHIFT;
2055 if (unlikely(index >= end_index)) { 2106 if (unlikely(index >= end_index)) {
2056 if (unlikely(index > end_index || ofs + vol->mft_record_size >= 2107 if (unlikely(index > end_index || ofs + vol->mft_record_size >=
2057 (mft_vi->i_size & ~PAGE_CACHE_MASK))) { 2108 (i_size & ~PAGE_CACHE_MASK))) {
2058 ntfs_error(vol->sb, "Tried to format non-existing mft " 2109 ntfs_error(vol->sb, "Tried to format non-existing mft "
2059 "record 0x%llx.", (long long)mft_no); 2110 "record 0x%llx.", (long long)mft_no);
2060 return -ENOENT; 2111 return -ENOENT;
@@ -2188,6 +2239,7 @@ ntfs_inode *ntfs_mft_record_alloc(ntfs_volume *vol, const int mode,
2188 ntfs_inode *base_ni, MFT_RECORD **mrec) 2239 ntfs_inode *base_ni, MFT_RECORD **mrec)
2189{ 2240{
2190 s64 ll, bit, old_data_initialized, old_data_size; 2241 s64 ll, bit, old_data_initialized, old_data_size;
2242 unsigned long flags;
2191 struct inode *vi; 2243 struct inode *vi;
2192 struct page *page; 2244 struct page *page;
2193 ntfs_inode *mft_ni, *mftbmp_ni, *ni; 2245 ntfs_inode *mft_ni, *mftbmp_ni, *ni;
@@ -2237,9 +2289,13 @@ ntfs_inode *ntfs_mft_record_alloc(ntfs_volume *vol, const int mode,
2237 * the first 24 mft records as they are special and whilst they may not 2289 * the first 24 mft records as they are special and whilst they may not
2238 * be in use, we do not allocate from them. 2290 * be in use, we do not allocate from them.
2239 */ 2291 */
2292 read_lock_irqsave(&mft_ni->size_lock, flags);
2240 ll = mft_ni->initialized_size >> vol->mft_record_size_bits; 2293 ll = mft_ni->initialized_size >> vol->mft_record_size_bits;
2241 if (mftbmp_ni->initialized_size << 3 > ll && 2294 read_unlock_irqrestore(&mft_ni->size_lock, flags);
2242 mftbmp_ni->initialized_size > 3) { 2295 read_lock_irqsave(&mftbmp_ni->size_lock, flags);
2296 old_data_initialized = mftbmp_ni->initialized_size;
2297 read_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
2298 if (old_data_initialized << 3 > ll && old_data_initialized > 3) {
2243 bit = ll; 2299 bit = ll;
2244 if (bit < 24) 2300 if (bit < 24)
2245 bit = 24; 2301 bit = 24;
@@ -2254,15 +2310,18 @@ ntfs_inode *ntfs_mft_record_alloc(ntfs_volume *vol, const int mode,
2254 * mft record that we can allocate. 2310 * mft record that we can allocate.
2255 * Note: The smallest mft record we allocate is mft record 24. 2311 * Note: The smallest mft record we allocate is mft record 24.
2256 */ 2312 */
2257 bit = mftbmp_ni->initialized_size << 3; 2313 bit = old_data_initialized << 3;
2258 if (unlikely(bit >= (1ll << 32))) 2314 if (unlikely(bit >= (1ll << 32)))
2259 goto max_err_out; 2315 goto max_err_out;
2316 read_lock_irqsave(&mftbmp_ni->size_lock, flags);
2317 old_data_size = mftbmp_ni->allocated_size;
2260 ntfs_debug("Status of mftbmp before extension: allocated_size 0x%llx, " 2318 ntfs_debug("Status of mftbmp before extension: allocated_size 0x%llx, "
2261 "data_size 0x%llx, initialized_size 0x%llx.", 2319 "data_size 0x%llx, initialized_size 0x%llx.",
2262 (long long)mftbmp_ni->allocated_size, 2320 (long long)old_data_size,
2263 (long long)vol->mftbmp_ino->i_size, 2321 (long long)i_size_read(vol->mftbmp_ino),
2264 (long long)mftbmp_ni->initialized_size); 2322 (long long)old_data_initialized);
2265 if (mftbmp_ni->initialized_size + 8 > mftbmp_ni->allocated_size) { 2323 read_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
2324 if (old_data_initialized + 8 > old_data_size) {
2266 /* Need to extend bitmap by one more cluster. */ 2325 /* Need to extend bitmap by one more cluster. */
2267 ntfs_debug("mftbmp: initialized_size + 8 > allocated_size."); 2326 ntfs_debug("mftbmp: initialized_size + 8 > allocated_size.");
2268 err = ntfs_mft_bitmap_extend_allocation_nolock(vol); 2327 err = ntfs_mft_bitmap_extend_allocation_nolock(vol);
@@ -2270,12 +2329,16 @@ ntfs_inode *ntfs_mft_record_alloc(ntfs_volume *vol, const int mode,
2270 up_write(&vol->mftbmp_lock); 2329 up_write(&vol->mftbmp_lock);
2271 goto err_out; 2330 goto err_out;
2272 } 2331 }
2332#ifdef DEBUG
2333 read_lock_irqsave(&mftbmp_ni->size_lock, flags);
2273 ntfs_debug("Status of mftbmp after allocation extension: " 2334 ntfs_debug("Status of mftbmp after allocation extension: "
2274 "allocated_size 0x%llx, data_size 0x%llx, " 2335 "allocated_size 0x%llx, data_size 0x%llx, "
2275 "initialized_size 0x%llx.", 2336 "initialized_size 0x%llx.",
2276 (long long)mftbmp_ni->allocated_size, 2337 (long long)mftbmp_ni->allocated_size,
2277 (long long)vol->mftbmp_ino->i_size, 2338 (long long)i_size_read(vol->mftbmp_ino),
2278 (long long)mftbmp_ni->initialized_size); 2339 (long long)mftbmp_ni->initialized_size);
2340 read_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
2341#endif /* DEBUG */
2279 } 2342 }
2280 /* 2343 /*
2281 * We now have sufficient allocated space, extend the initialized_size 2344 * We now have sufficient allocated space, extend the initialized_size
@@ -2287,12 +2350,16 @@ ntfs_inode *ntfs_mft_record_alloc(ntfs_volume *vol, const int mode,
2287 up_write(&vol->mftbmp_lock); 2350 up_write(&vol->mftbmp_lock);
2288 goto err_out; 2351 goto err_out;
2289 } 2352 }
2353#ifdef DEBUG
2354 read_lock_irqsave(&mftbmp_ni->size_lock, flags);
2290 ntfs_debug("Status of mftbmp after initialized extention: " 2355 ntfs_debug("Status of mftbmp after initialized extention: "
2291 "allocated_size 0x%llx, data_size 0x%llx, " 2356 "allocated_size 0x%llx, data_size 0x%llx, "
2292 "initialized_size 0x%llx.", 2357 "initialized_size 0x%llx.",
2293 (long long)mftbmp_ni->allocated_size, 2358 (long long)mftbmp_ni->allocated_size,
2294 (long long)vol->mftbmp_ino->i_size, 2359 (long long)i_size_read(vol->mftbmp_ino),
2295 (long long)mftbmp_ni->initialized_size); 2360 (long long)mftbmp_ni->initialized_size);
2361 read_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
2362#endif /* DEBUG */
2296 ntfs_debug("Found free record (#3), bit 0x%llx.", (long long)bit); 2363 ntfs_debug("Found free record (#3), bit 0x%llx.", (long long)bit);
2297found_free_rec: 2364found_free_rec:
2298 /* @bit is the found free mft record, allocate it in the mft bitmap. */ 2365 /* @bit is the found free mft record, allocate it in the mft bitmap. */
@@ -2314,7 +2381,10 @@ have_alloc_rec:
2314 * parallel allocation could allocate the same mft record as this one. 2381 * parallel allocation could allocate the same mft record as this one.
2315 */ 2382 */
2316 ll = (bit + 1) << vol->mft_record_size_bits; 2383 ll = (bit + 1) << vol->mft_record_size_bits;
2317 if (ll <= mft_ni->initialized_size) { 2384 read_lock_irqsave(&mft_ni->size_lock, flags);
2385 old_data_initialized = mft_ni->initialized_size;
2386 read_unlock_irqrestore(&mft_ni->size_lock, flags);
2387 if (ll <= old_data_initialized) {
2318 ntfs_debug("Allocated mft record already initialized."); 2388 ntfs_debug("Allocated mft record already initialized.");
2319 goto mft_rec_already_initialized; 2389 goto mft_rec_already_initialized;
2320 } 2390 }
@@ -2325,26 +2395,30 @@ have_alloc_rec:
2325 * actually traversed more than once when a freshly formatted volume is 2395 * actually traversed more than once when a freshly formatted volume is
2326 * first written to so it optimizes away nicely in the common case. 2396 * first written to so it optimizes away nicely in the common case.
2327 */ 2397 */
2398 read_lock_irqsave(&mft_ni->size_lock, flags);
2328 ntfs_debug("Status of mft data before extension: " 2399 ntfs_debug("Status of mft data before extension: "
2329 "allocated_size 0x%llx, data_size 0x%llx, " 2400 "allocated_size 0x%llx, data_size 0x%llx, "
2330 "initialized_size 0x%llx.", 2401 "initialized_size 0x%llx.",
2331 (long long)mft_ni->allocated_size, 2402 (long long)mft_ni->allocated_size,
2332 (long long)vol->mft_ino->i_size, 2403 (long long)i_size_read(vol->mft_ino),
2333 (long long)mft_ni->initialized_size); 2404 (long long)mft_ni->initialized_size);
2334 while (ll > mft_ni->allocated_size) { 2405 while (ll > mft_ni->allocated_size) {
2406 read_unlock_irqrestore(&mft_ni->size_lock, flags);
2335 err = ntfs_mft_data_extend_allocation_nolock(vol); 2407 err = ntfs_mft_data_extend_allocation_nolock(vol);
2336 if (unlikely(err)) { 2408 if (unlikely(err)) {
2337 ntfs_error(vol->sb, "Failed to extend mft data " 2409 ntfs_error(vol->sb, "Failed to extend mft data "
2338 "allocation."); 2410 "allocation.");
2339 goto undo_mftbmp_alloc_nolock; 2411 goto undo_mftbmp_alloc_nolock;
2340 } 2412 }
2413 read_lock_irqsave(&mft_ni->size_lock, flags);
2341 ntfs_debug("Status of mft data after allocation extension: " 2414 ntfs_debug("Status of mft data after allocation extension: "
2342 "allocated_size 0x%llx, data_size 0x%llx, " 2415 "allocated_size 0x%llx, data_size 0x%llx, "
2343 "initialized_size 0x%llx.", 2416 "initialized_size 0x%llx.",
2344 (long long)mft_ni->allocated_size, 2417 (long long)mft_ni->allocated_size,
2345 (long long)vol->mft_ino->i_size, 2418 (long long)i_size_read(vol->mft_ino),
2346 (long long)mft_ni->initialized_size); 2419 (long long)mft_ni->initialized_size);
2347 } 2420 }
2421 read_unlock_irqrestore(&mft_ni->size_lock, flags);
2348 /* 2422 /*
2349 * Extend mft data initialized size (and data size of course) to reach 2423 * Extend mft data initialized size (and data size of course) to reach
2350 * the allocated mft record, formatting the mft records allong the way. 2424 * the allocated mft record, formatting the mft records allong the way.
@@ -2352,6 +2426,7 @@ have_alloc_rec:
2352 * needed by ntfs_mft_record_format(). We will update the attribute 2426 * needed by ntfs_mft_record_format(). We will update the attribute
2353 * record itself in one fell swoop later on. 2427 * record itself in one fell swoop later on.
2354 */ 2428 */
2429 write_lock_irqsave(&mft_ni->size_lock, flags);
2355 old_data_initialized = mft_ni->initialized_size; 2430 old_data_initialized = mft_ni->initialized_size;
2356 old_data_size = vol->mft_ino->i_size; 2431 old_data_size = vol->mft_ino->i_size;
2357 while (ll > mft_ni->initialized_size) { 2432 while (ll > mft_ni->initialized_size) {
@@ -2360,8 +2435,9 @@ have_alloc_rec:
2360 new_initialized_size = mft_ni->initialized_size + 2435 new_initialized_size = mft_ni->initialized_size +
2361 vol->mft_record_size; 2436 vol->mft_record_size;
2362 mft_no = mft_ni->initialized_size >> vol->mft_record_size_bits; 2437 mft_no = mft_ni->initialized_size >> vol->mft_record_size_bits;
2363 if (new_initialized_size > vol->mft_ino->i_size) 2438 if (new_initialized_size > i_size_read(vol->mft_ino))
2364 vol->mft_ino->i_size = new_initialized_size; 2439 i_size_write(vol->mft_ino, new_initialized_size);
2440 write_unlock_irqrestore(&mft_ni->size_lock, flags);
2365 ntfs_debug("Initializing mft record 0x%llx.", 2441 ntfs_debug("Initializing mft record 0x%llx.",
2366 (long long)mft_no); 2442 (long long)mft_no);
2367 err = ntfs_mft_record_format(vol, mft_no); 2443 err = ntfs_mft_record_format(vol, mft_no);
@@ -2369,8 +2445,10 @@ have_alloc_rec:
2369 ntfs_error(vol->sb, "Failed to format mft record."); 2445 ntfs_error(vol->sb, "Failed to format mft record.");
2370 goto undo_data_init; 2446 goto undo_data_init;
2371 } 2447 }
2448 write_lock_irqsave(&mft_ni->size_lock, flags);
2372 mft_ni->initialized_size = new_initialized_size; 2449 mft_ni->initialized_size = new_initialized_size;
2373 } 2450 }
2451 write_unlock_irqrestore(&mft_ni->size_lock, flags);
2374 record_formatted = TRUE; 2452 record_formatted = TRUE;
2375 /* Update the mft data attribute record to reflect the new sizes. */ 2453 /* Update the mft data attribute record to reflect the new sizes. */
2376 m = map_mft_record(mft_ni); 2454 m = map_mft_record(mft_ni);
@@ -2396,22 +2474,27 @@ have_alloc_rec:
2396 goto undo_data_init; 2474 goto undo_data_init;
2397 } 2475 }
2398 a = ctx->attr; 2476 a = ctx->attr;
2477 read_lock_irqsave(&mft_ni->size_lock, flags);
2399 a->data.non_resident.initialized_size = 2478 a->data.non_resident.initialized_size =
2400 cpu_to_sle64(mft_ni->initialized_size); 2479 cpu_to_sle64(mft_ni->initialized_size);
2401 a->data.non_resident.data_size = cpu_to_sle64(vol->mft_ino->i_size); 2480 a->data.non_resident.data_size =
2481 cpu_to_sle64(i_size_read(vol->mft_ino));
2482 read_unlock_irqrestore(&mft_ni->size_lock, flags);
2402 /* Ensure the changes make it to disk. */ 2483 /* Ensure the changes make it to disk. */
2403 flush_dcache_mft_record_page(ctx->ntfs_ino); 2484 flush_dcache_mft_record_page(ctx->ntfs_ino);
2404 mark_mft_record_dirty(ctx->ntfs_ino); 2485 mark_mft_record_dirty(ctx->ntfs_ino);
2405 ntfs_attr_put_search_ctx(ctx); 2486 ntfs_attr_put_search_ctx(ctx);
2406 unmap_mft_record(mft_ni); 2487 unmap_mft_record(mft_ni);
2488 read_lock_irqsave(&mft_ni->size_lock, flags);
2407 ntfs_debug("Status of mft data after mft record initialization: " 2489 ntfs_debug("Status of mft data after mft record initialization: "
2408 "allocated_size 0x%llx, data_size 0x%llx, " 2490 "allocated_size 0x%llx, data_size 0x%llx, "
2409 "initialized_size 0x%llx.", 2491 "initialized_size 0x%llx.",
2410 (long long)mft_ni->allocated_size, 2492 (long long)mft_ni->allocated_size,
2411 (long long)vol->mft_ino->i_size, 2493 (long long)i_size_read(vol->mft_ino),
2412 (long long)mft_ni->initialized_size); 2494 (long long)mft_ni->initialized_size);
2413 BUG_ON(vol->mft_ino->i_size > mft_ni->allocated_size); 2495 BUG_ON(i_size_read(vol->mft_ino) > mft_ni->allocated_size);
2414 BUG_ON(mft_ni->initialized_size > vol->mft_ino->i_size); 2496 BUG_ON(mft_ni->initialized_size > i_size_read(vol->mft_ino));
2497 read_unlock_irqrestore(&mft_ni->size_lock, flags);
2415mft_rec_already_initialized: 2498mft_rec_already_initialized:
2416 /* 2499 /*
2417 * We can finally drop the mft bitmap lock as the mft data attribute 2500 * We can finally drop the mft bitmap lock as the mft data attribute
@@ -2652,8 +2735,10 @@ mft_rec_already_initialized:
2652 *mrec = m; 2735 *mrec = m;
2653 return ni; 2736 return ni;
2654undo_data_init: 2737undo_data_init:
2738 write_lock_irqsave(&mft_ni->size_lock, flags);
2655 mft_ni->initialized_size = old_data_initialized; 2739 mft_ni->initialized_size = old_data_initialized;
2656 vol->mft_ino->i_size = old_data_size; 2740 i_size_write(vol->mft_ino, old_data_size);
2741 write_unlock_irqrestore(&mft_ni->size_lock, flags);
2657 goto undo_mftbmp_alloc_nolock; 2742 goto undo_mftbmp_alloc_nolock;
2658undo_mftbmp_alloc: 2743undo_mftbmp_alloc:
2659 down_write(&vol->mftbmp_lock); 2744 down_write(&vol->mftbmp_lock);
diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c
index 7c7e13b43b2e..351dbc3b6e40 100644
--- a/fs/ntfs/namei.c
+++ b/fs/ntfs/namei.c
@@ -153,8 +153,7 @@ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent,
153 ntfs_error(vol->sb, "ntfs_iget(0x%lx) failed with " 153 ntfs_error(vol->sb, "ntfs_iget(0x%lx) failed with "
154 "error code %li.", dent_ino, 154 "error code %li.", dent_ino,
155 PTR_ERR(dent_inode)); 155 PTR_ERR(dent_inode));
156 if (name) 156 kfree(name);
157 kfree(name);
158 /* Return the error code. */ 157 /* Return the error code. */
159 return (struct dentry *)dent_inode; 158 return (struct dentry *)dent_inode;
160 } 159 }
@@ -380,7 +379,7 @@ struct inode_operations ntfs_dir_inode_ops = {
380 * Return the dentry of the parent directory on success or the error code on 379 * Return the dentry of the parent directory on success or the error code on
381 * error (IS_ERR() is true). 380 * error (IS_ERR() is true).
382 */ 381 */
383struct dentry *ntfs_get_parent(struct dentry *child_dent) 382static struct dentry *ntfs_get_parent(struct dentry *child_dent)
384{ 383{
385 struct inode *vi = child_dent->d_inode; 384 struct inode *vi = child_dent->d_inode;
386 ntfs_inode *ni = NTFS_I(vi); 385 ntfs_inode *ni = NTFS_I(vi);
@@ -465,7 +464,7 @@ try_next:
465 * 464 *
466 * Return the dentry on success or the error code on error (IS_ERR() is true). 465 * Return the dentry on success or the error code on error (IS_ERR() is true).
467 */ 466 */
468struct dentry *ntfs_get_dentry(struct super_block *sb, void *fh) 467static struct dentry *ntfs_get_dentry(struct super_block *sb, void *fh)
469{ 468{
470 struct inode *vi; 469 struct inode *vi;
471 struct dentry *dent; 470 struct dentry *dent;
@@ -496,3 +495,30 @@ struct dentry *ntfs_get_dentry(struct super_block *sb, void *fh)
496 ntfs_debug("Done for inode 0x%lx, generation 0x%x.", ino, gen); 495 ntfs_debug("Done for inode 0x%lx, generation 0x%x.", ino, gen);
497 return dent; 496 return dent;
498} 497}
498
499/**
500 * Export operations allowing NFS exporting of mounted NTFS partitions.
501 *
502 * We use the default ->decode_fh() and ->encode_fh() for now. Note that they
503 * use 32 bits to store the inode number which is an unsigned long so on 64-bit
504 * architectures is usually 64 bits so it would all fail horribly on huge
505 * volumes. I guess we need to define our own encode and decode fh functions
506 * that store 64-bit inode numbers at some point but for now we will ignore the
507 * problem...
508 *
509 * We also use the default ->get_name() helper (used by ->decode_fh() via
510 * fs/exportfs/expfs.c::find_exported_dentry()) as that is completely fs
511 * independent.
512 *
513 * The default ->get_parent() just returns -EACCES so we have to provide our
514 * own and the default ->get_dentry() is incompatible with NTFS due to not
515 * allowing the inode number 0 which is used in NTFS for the system file $MFT
516 * and due to using iget() whereas NTFS needs ntfs_iget().
517 */
518struct export_operations ntfs_export_ops = {
519 .get_parent = ntfs_get_parent, /* Find the parent of a given
520 directory. */
521 .get_dentry = ntfs_get_dentry, /* Find a dentry for the inode
522 given a file handle
523 sub-fragment. */
524};
diff --git a/fs/ntfs/ntfs.h b/fs/ntfs/ntfs.h
index 720ffb71bab8..446b5014115c 100644
--- a/fs/ntfs/ntfs.h
+++ b/fs/ntfs/ntfs.h
@@ -2,7 +2,7 @@
2 * ntfs.h - Defines for NTFS Linux kernel driver. Part of the Linux-NTFS 2 * ntfs.h - Defines for NTFS Linux kernel driver. Part of the Linux-NTFS
3 * project. 3 * project.
4 * 4 *
5 * Copyright (c) 2001-2004 Anton Altaparmakov 5 * Copyright (c) 2001-2005 Anton Altaparmakov
6 * Copyright (C) 2002 Richard Russon 6 * Copyright (C) 2002 Richard Russon
7 * 7 *
8 * This program/include file is free software; you can redistribute it and/or 8 * This program/include file is free software; you can redistribute it and/or
@@ -31,6 +31,7 @@
31#include <linux/fs.h> 31#include <linux/fs.h>
32#include <linux/nls.h> 32#include <linux/nls.h>
33#include <linux/smp.h> 33#include <linux/smp.h>
34#include <linux/pagemap.h>
34 35
35#include "types.h" 36#include "types.h"
36#include "volume.h" 37#include "volume.h"
@@ -41,6 +42,9 @@ typedef enum {
41 NTFS_BLOCK_SIZE_BITS = 9, 42 NTFS_BLOCK_SIZE_BITS = 9,
42 NTFS_SB_MAGIC = 0x5346544e, /* 'NTFS' */ 43 NTFS_SB_MAGIC = 0x5346544e, /* 'NTFS' */
43 NTFS_MAX_NAME_LEN = 255, 44 NTFS_MAX_NAME_LEN = 255,
45 NTFS_MAX_ATTR_NAME_LEN = 255,
46 NTFS_MAX_CLUSTER_SIZE = 64 * 1024, /* 64kiB */
47 NTFS_MAX_PAGES_PER_CLUSTER = NTFS_MAX_CLUSTER_SIZE / PAGE_CACHE_SIZE,
44} NTFS_CONSTANTS; 48} NTFS_CONSTANTS;
45 49
46/* Global variables. */ 50/* Global variables. */
@@ -65,6 +69,8 @@ extern struct inode_operations ntfs_dir_inode_ops;
65extern struct file_operations ntfs_empty_file_ops; 69extern struct file_operations ntfs_empty_file_ops;
66extern struct inode_operations ntfs_empty_inode_ops; 70extern struct inode_operations ntfs_empty_inode_ops;
67 71
72extern struct export_operations ntfs_export_ops;
73
68/** 74/**
69 * NTFS_SB - return the ntfs volume given a vfs super block 75 * NTFS_SB - return the ntfs volume given a vfs super block
70 * @sb: VFS super block 76 * @sb: VFS super block
diff --git a/fs/ntfs/runlist.c b/fs/ntfs/runlist.c
index 8438fb1da219..758855b0414e 100644
--- a/fs/ntfs/runlist.c
+++ b/fs/ntfs/runlist.c
@@ -1,7 +1,7 @@
1/** 1/**
2 * runlist.c - NTFS runlist handling code. Part of the Linux-NTFS project. 2 * runlist.c - NTFS runlist handling code. Part of the Linux-NTFS project.
3 * 3 *
4 * Copyright (c) 2001-2004 Anton Altaparmakov 4 * Copyright (c) 2001-2005 Anton Altaparmakov
5 * Copyright (c) 2002 Richard Russon 5 * Copyright (c) 2002 Richard Russon
6 * 6 *
7 * This program/include file is free software; you can redistribute it and/or 7 * This program/include file is free software; you can redistribute it and/or
@@ -59,7 +59,7 @@ static inline void ntfs_rl_mc(runlist_element *dstbase, int dst,
59 * 59 *
60 * As the runlists grow, more memory will be required. To prevent the 60 * As the runlists grow, more memory will be required. To prevent the
61 * kernel having to allocate and reallocate large numbers of small bits of 61 * kernel having to allocate and reallocate large numbers of small bits of
62 * memory, this function returns and entire page of memory. 62 * memory, this function returns an entire page of memory.
63 * 63 *
64 * It is up to the caller to serialize access to the runlist @rl. 64 * It is up to the caller to serialize access to the runlist @rl.
65 * 65 *
@@ -113,8 +113,11 @@ static inline BOOL ntfs_are_rl_mergeable(runlist_element *dst,
113 BUG_ON(!dst); 113 BUG_ON(!dst);
114 BUG_ON(!src); 114 BUG_ON(!src);
115 115
116 if ((dst->lcn < 0) || (src->lcn < 0)) /* Are we merging holes? */ 116 if ((dst->lcn < 0) || (src->lcn < 0)) { /* Are we merging holes? */
117 if (dst->lcn == LCN_HOLE && src->lcn == LCN_HOLE)
118 return TRUE;
117 return FALSE; 119 return FALSE;
120 }
118 if ((dst->lcn + dst->length) != src->lcn) /* Are the runs contiguous? */ 121 if ((dst->lcn + dst->length) != src->lcn) /* Are the runs contiguous? */
119 return FALSE; 122 return FALSE;
120 if ((dst->vcn + dst->length) != src->vcn) /* Are the runs misaligned? */ 123 if ((dst->vcn + dst->length) != src->vcn) /* Are the runs misaligned? */
@@ -855,30 +858,42 @@ mpa_err:
855 if (!attr->data.non_resident.lowest_vcn) { 858 if (!attr->data.non_resident.lowest_vcn) {
856 VCN max_cluster; 859 VCN max_cluster;
857 860
858 max_cluster = (sle64_to_cpu( 861 max_cluster = ((sle64_to_cpu(
859 attr->data.non_resident.allocated_size) + 862 attr->data.non_resident.allocated_size) +
860 vol->cluster_size - 1) >> 863 vol->cluster_size - 1) >>
861 vol->cluster_size_bits; 864 vol->cluster_size_bits) - 1;
862 /* 865 /*
863 * If there is a difference between the highest_vcn and the 866 * A highest_vcn of zero means this is a single extent
864 * highest cluster, the runlist is either corrupt or, more 867 * attribute so simply terminate the runlist with LCN_ENOENT).
865 * likely, there are more extents following this one.
866 */ 868 */
867 if (deltaxcn < --max_cluster) { 869 if (deltaxcn) {
868 ntfs_debug("More extents to follow; deltaxcn = 0x%llx, " 870 /*
869 "max_cluster = 0x%llx", 871 * If there is a difference between the highest_vcn and
870 (unsigned long long)deltaxcn, 872 * the highest cluster, the runlist is either corrupt
871 (unsigned long long)max_cluster); 873 * or, more likely, there are more extents following
872 rl[rlpos].vcn = vcn; 874 * this one.
873 vcn += rl[rlpos].length = max_cluster - deltaxcn; 875 */
874 rl[rlpos].lcn = LCN_RL_NOT_MAPPED; 876 if (deltaxcn < max_cluster) {
875 rlpos++; 877 ntfs_debug("More extents to follow; deltaxcn "
876 } else if (unlikely(deltaxcn > max_cluster)) { 878 "= 0x%llx, max_cluster = "
877 ntfs_error(vol->sb, "Corrupt attribute. deltaxcn = " 879 "0x%llx",
878 "0x%llx, max_cluster = 0x%llx", 880 (unsigned long long)deltaxcn,
879 (unsigned long long)deltaxcn, 881 (unsigned long long)
880 (unsigned long long)max_cluster); 882 max_cluster);
881 goto mpa_err; 883 rl[rlpos].vcn = vcn;
884 vcn += rl[rlpos].length = max_cluster -
885 deltaxcn;
886 rl[rlpos].lcn = LCN_RL_NOT_MAPPED;
887 rlpos++;
888 } else if (unlikely(deltaxcn > max_cluster)) {
889 ntfs_error(vol->sb, "Corrupt attribute. "
890 "deltaxcn = 0x%llx, "
891 "max_cluster = 0x%llx",
892 (unsigned long long)deltaxcn,
893 (unsigned long long)
894 max_cluster);
895 goto mpa_err;
896 }
882 } 897 }
883 rl[rlpos].lcn = LCN_ENOENT; 898 rl[rlpos].lcn = LCN_ENOENT;
884 } else /* Not the base extent. There may be more extents to follow. */ 899 } else /* Not the base extent. There may be more extents to follow. */
@@ -918,17 +933,18 @@ err_out:
918 * 933 *
919 * It is up to the caller to serialize access to the runlist @rl. 934 * It is up to the caller to serialize access to the runlist @rl.
920 * 935 *
921 * Since lcns must be >= 0, we use negative return values with special meaning: 936 * Since lcns must be >= 0, we use negative return codes with special meaning:
922 * 937 *
923 * Return value Meaning / Description 938 * Return code Meaning / Description
924 * ================================================== 939 * ==================================================
925 * -1 = LCN_HOLE Hole / not allocated on disk. 940 * LCN_HOLE Hole / not allocated on disk.
926 * -2 = LCN_RL_NOT_MAPPED This is part of the runlist which has not been 941 * LCN_RL_NOT_MAPPED This is part of the runlist which has not been
927 * inserted into the runlist yet. 942 * inserted into the runlist yet.
928 * -3 = LCN_ENOENT There is no such vcn in the attribute. 943 * LCN_ENOENT There is no such vcn in the attribute.
929 * 944 *
930 * Locking: - The caller must have locked the runlist (for reading or writing). 945 * Locking: - The caller must have locked the runlist (for reading or writing).
931 * - This function does not touch the lock. 946 * - This function does not touch the lock, nor does it modify the
947 * runlist.
932 */ 948 */
933LCN ntfs_rl_vcn_to_lcn(const runlist_element *rl, const VCN vcn) 949LCN ntfs_rl_vcn_to_lcn(const runlist_element *rl, const VCN vcn)
934{ 950{
@@ -964,6 +980,39 @@ LCN ntfs_rl_vcn_to_lcn(const runlist_element *rl, const VCN vcn)
964 return LCN_ENOENT; 980 return LCN_ENOENT;
965} 981}
966 982
983#ifdef NTFS_RW
984
985/**
986 * ntfs_rl_find_vcn_nolock - find a vcn in a runlist
987 * @rl: runlist to search
988 * @vcn: vcn to find
989 *
990 * Find the virtual cluster number @vcn in the runlist @rl and return the
991 * address of the runlist element containing the @vcn on success.
992 *
993 * Return NULL if @rl is NULL or @vcn is in an unmapped part/out of bounds of
994 * the runlist.
995 *
996 * Locking: The runlist must be locked on entry.
997 */
998runlist_element *ntfs_rl_find_vcn_nolock(runlist_element *rl, const VCN vcn)
999{
1000 BUG_ON(vcn < 0);
1001 if (unlikely(!rl || vcn < rl[0].vcn))
1002 return NULL;
1003 while (likely(rl->length)) {
1004 if (unlikely(vcn < rl[1].vcn)) {
1005 if (likely(rl->lcn >= LCN_HOLE))
1006 return rl;
1007 return NULL;
1008 }
1009 rl++;
1010 }
1011 if (likely(rl->lcn == LCN_ENOENT))
1012 return rl;
1013 return NULL;
1014}
1015
967/** 1016/**
968 * ntfs_get_nr_significant_bytes - get number of bytes needed to store a number 1017 * ntfs_get_nr_significant_bytes - get number of bytes needed to store a number
969 * @n: number for which to get the number of bytes for 1018 * @n: number for which to get the number of bytes for
@@ -999,10 +1048,17 @@ static inline int ntfs_get_nr_significant_bytes(const s64 n)
999 * ntfs_get_size_for_mapping_pairs - get bytes needed for mapping pairs array 1048 * ntfs_get_size_for_mapping_pairs - get bytes needed for mapping pairs array
1000 * @vol: ntfs volume (needed for the ntfs version) 1049 * @vol: ntfs volume (needed for the ntfs version)
1001 * @rl: locked runlist to determine the size of the mapping pairs of 1050 * @rl: locked runlist to determine the size of the mapping pairs of
1002 * @start_vcn: vcn at which to start the mapping pairs array 1051 * @first_vcn: first vcn which to include in the mapping pairs array
1052 * @last_vcn: last vcn which to include in the mapping pairs array
1003 * 1053 *
1004 * Walk the locked runlist @rl and calculate the size in bytes of the mapping 1054 * Walk the locked runlist @rl and calculate the size in bytes of the mapping
1005 * pairs array corresponding to the runlist @rl, starting at vcn @start_vcn. 1055 * pairs array corresponding to the runlist @rl, starting at vcn @first_vcn and
1056 * finishing with vcn @last_vcn.
1057 *
1058 * A @last_vcn of -1 means end of runlist and in that case the size of the
1059 * mapping pairs array corresponding to the runlist starting at vcn @first_vcn
1060 * and finishing at the end of the runlist is determined.
1061 *
1006 * This for example allows us to allocate a buffer of the right size when 1062 * This for example allows us to allocate a buffer of the right size when
1007 * building the mapping pairs array. 1063 * building the mapping pairs array.
1008 * 1064 *
@@ -1018,34 +1074,50 @@ static inline int ntfs_get_nr_significant_bytes(const s64 n)
1018 * remains locked throughout, and is left locked upon return. 1074 * remains locked throughout, and is left locked upon return.
1019 */ 1075 */
1020int ntfs_get_size_for_mapping_pairs(const ntfs_volume *vol, 1076int ntfs_get_size_for_mapping_pairs(const ntfs_volume *vol,
1021 const runlist_element *rl, const VCN start_vcn) 1077 const runlist_element *rl, const VCN first_vcn,
1078 const VCN last_vcn)
1022{ 1079{
1023 LCN prev_lcn; 1080 LCN prev_lcn;
1024 int rls; 1081 int rls;
1082 BOOL the_end = FALSE;
1025 1083
1026 BUG_ON(start_vcn < 0); 1084 BUG_ON(first_vcn < 0);
1085 BUG_ON(last_vcn < -1);
1086 BUG_ON(last_vcn >= 0 && first_vcn > last_vcn);
1027 if (!rl) { 1087 if (!rl) {
1028 BUG_ON(start_vcn); 1088 BUG_ON(first_vcn);
1089 BUG_ON(last_vcn > 0);
1029 return 1; 1090 return 1;
1030 } 1091 }
1031 /* Skip to runlist element containing @start_vcn. */ 1092 /* Skip to runlist element containing @first_vcn. */
1032 while (rl->length && start_vcn >= rl[1].vcn) 1093 while (rl->length && first_vcn >= rl[1].vcn)
1033 rl++; 1094 rl++;
1034 if ((!rl->length && start_vcn > rl->vcn) || start_vcn < rl->vcn) 1095 if (unlikely((!rl->length && first_vcn > rl->vcn) ||
1096 first_vcn < rl->vcn))
1035 return -EINVAL; 1097 return -EINVAL;
1036 prev_lcn = 0; 1098 prev_lcn = 0;
1037 /* Always need the termining zero byte. */ 1099 /* Always need the termining zero byte. */
1038 rls = 1; 1100 rls = 1;
1039 /* Do the first partial run if present. */ 1101 /* Do the first partial run if present. */
1040 if (start_vcn > rl->vcn) { 1102 if (first_vcn > rl->vcn) {
1041 s64 delta; 1103 s64 delta, length = rl->length;
1042 1104
1043 /* We know rl->length != 0 already. */ 1105 /* We know rl->length != 0 already. */
1044 if (rl->length < 0 || rl->lcn < LCN_HOLE) 1106 if (unlikely(length < 0 || rl->lcn < LCN_HOLE))
1045 goto err_out; 1107 goto err_out;
1046 delta = start_vcn - rl->vcn; 1108 /*
1109 * If @stop_vcn is given and finishes inside this run, cap the
1110 * run length.
1111 */
1112 if (unlikely(last_vcn >= 0 && rl[1].vcn > last_vcn)) {
1113 s64 s1 = last_vcn + 1;
1114 if (unlikely(rl[1].vcn > s1))
1115 length = s1 - rl->vcn;
1116 the_end = TRUE;
1117 }
1118 delta = first_vcn - rl->vcn;
1047 /* Header byte + length. */ 1119 /* Header byte + length. */
1048 rls += 1 + ntfs_get_nr_significant_bytes(rl->length - delta); 1120 rls += 1 + ntfs_get_nr_significant_bytes(length - delta);
1049 /* 1121 /*
1050 * If the logical cluster number (lcn) denotes a hole and we 1122 * If the logical cluster number (lcn) denotes a hole and we
1051 * are on NTFS 3.0+, we don't store it at all, i.e. we need 1123 * are on NTFS 3.0+, we don't store it at all, i.e. we need
@@ -1053,9 +1125,9 @@ int ntfs_get_size_for_mapping_pairs(const ntfs_volume *vol,
1053 * Note: this assumes that on NTFS 1.2-, holes are stored with 1125 * Note: this assumes that on NTFS 1.2-, holes are stored with
1054 * an lcn of -1 and not a delta_lcn of -1 (unless both are -1). 1126 * an lcn of -1 and not a delta_lcn of -1 (unless both are -1).
1055 */ 1127 */
1056 if (rl->lcn >= 0 || vol->major_ver < 3) { 1128 if (likely(rl->lcn >= 0 || vol->major_ver < 3)) {
1057 prev_lcn = rl->lcn; 1129 prev_lcn = rl->lcn;
1058 if (rl->lcn >= 0) 1130 if (likely(rl->lcn >= 0))
1059 prev_lcn += delta; 1131 prev_lcn += delta;
1060 /* Change in lcn. */ 1132 /* Change in lcn. */
1061 rls += ntfs_get_nr_significant_bytes(prev_lcn); 1133 rls += ntfs_get_nr_significant_bytes(prev_lcn);
@@ -1064,11 +1136,23 @@ int ntfs_get_size_for_mapping_pairs(const ntfs_volume *vol,
1064 rl++; 1136 rl++;
1065 } 1137 }
1066 /* Do the full runs. */ 1138 /* Do the full runs. */
1067 for (; rl->length; rl++) { 1139 for (; rl->length && !the_end; rl++) {
1068 if (rl->length < 0 || rl->lcn < LCN_HOLE) 1140 s64 length = rl->length;
1141
1142 if (unlikely(length < 0 || rl->lcn < LCN_HOLE))
1069 goto err_out; 1143 goto err_out;
1144 /*
1145 * If @stop_vcn is given and finishes inside this run, cap the
1146 * run length.
1147 */
1148 if (unlikely(last_vcn >= 0 && rl[1].vcn > last_vcn)) {
1149 s64 s1 = last_vcn + 1;
1150 if (unlikely(rl[1].vcn > s1))
1151 length = s1 - rl->vcn;
1152 the_end = TRUE;
1153 }
1070 /* Header byte + length. */ 1154 /* Header byte + length. */
1071 rls += 1 + ntfs_get_nr_significant_bytes(rl->length); 1155 rls += 1 + ntfs_get_nr_significant_bytes(length);
1072 /* 1156 /*
1073 * If the logical cluster number (lcn) denotes a hole and we 1157 * If the logical cluster number (lcn) denotes a hole and we
1074 * are on NTFS 3.0+, we don't store it at all, i.e. we need 1158 * are on NTFS 3.0+, we don't store it at all, i.e. we need
@@ -1076,7 +1160,7 @@ int ntfs_get_size_for_mapping_pairs(const ntfs_volume *vol,
1076 * Note: this assumes that on NTFS 1.2-, holes are stored with 1160 * Note: this assumes that on NTFS 1.2-, holes are stored with
1077 * an lcn of -1 and not a delta_lcn of -1 (unless both are -1). 1161 * an lcn of -1 and not a delta_lcn of -1 (unless both are -1).
1078 */ 1162 */
1079 if (rl->lcn >= 0 || vol->major_ver < 3) { 1163 if (likely(rl->lcn >= 0 || vol->major_ver < 3)) {
1080 /* Change in lcn. */ 1164 /* Change in lcn. */
1081 rls += ntfs_get_nr_significant_bytes(rl->lcn - 1165 rls += ntfs_get_nr_significant_bytes(rl->lcn -
1082 prev_lcn); 1166 prev_lcn);
@@ -1119,7 +1203,7 @@ static inline int ntfs_write_significant_bytes(s8 *dst, const s8 *dst_max,
1119 1203
1120 i = 0; 1204 i = 0;
1121 do { 1205 do {
1122 if (dst > dst_max) 1206 if (unlikely(dst > dst_max))
1123 goto err_out; 1207 goto err_out;
1124 *dst++ = l & 0xffll; 1208 *dst++ = l & 0xffll;
1125 l >>= 8; 1209 l >>= 8;
@@ -1128,12 +1212,12 @@ static inline int ntfs_write_significant_bytes(s8 *dst, const s8 *dst_max,
1128 j = (n >> 8 * (i - 1)) & 0xff; 1212 j = (n >> 8 * (i - 1)) & 0xff;
1129 /* If the sign bit is wrong, we need an extra byte. */ 1213 /* If the sign bit is wrong, we need an extra byte. */
1130 if (n < 0 && j >= 0) { 1214 if (n < 0 && j >= 0) {
1131 if (dst > dst_max) 1215 if (unlikely(dst > dst_max))
1132 goto err_out; 1216 goto err_out;
1133 i++; 1217 i++;
1134 *dst = (s8)-1; 1218 *dst = (s8)-1;
1135 } else if (n > 0 && j < 0) { 1219 } else if (n > 0 && j < 0) {
1136 if (dst > dst_max) 1220 if (unlikely(dst > dst_max))
1137 goto err_out; 1221 goto err_out;
1138 i++; 1222 i++;
1139 *dst = (s8)0; 1223 *dst = (s8)0;
@@ -1149,13 +1233,18 @@ err_out:
1149 * @dst: destination buffer to which to write the mapping pairs array 1233 * @dst: destination buffer to which to write the mapping pairs array
1150 * @dst_len: size of destination buffer @dst in bytes 1234 * @dst_len: size of destination buffer @dst in bytes
1151 * @rl: locked runlist for which to build the mapping pairs array 1235 * @rl: locked runlist for which to build the mapping pairs array
1152 * @start_vcn: vcn at which to start the mapping pairs array 1236 * @first_vcn: first vcn which to include in the mapping pairs array
1237 * @last_vcn: last vcn which to include in the mapping pairs array
1153 * @stop_vcn: first vcn outside destination buffer on success or -ENOSPC 1238 * @stop_vcn: first vcn outside destination buffer on success or -ENOSPC
1154 * 1239 *
1155 * Create the mapping pairs array from the locked runlist @rl, starting at vcn 1240 * Create the mapping pairs array from the locked runlist @rl, starting at vcn
1156 * @start_vcn and save the array in @dst. @dst_len is the size of @dst in 1241 * @first_vcn and finishing with vcn @last_vcn and save the array in @dst.
1157 * bytes and it should be at least equal to the value obtained by calling 1242 * @dst_len is the size of @dst in bytes and it should be at least equal to the
1158 * ntfs_get_size_for_mapping_pairs(). 1243 * value obtained by calling ntfs_get_size_for_mapping_pairs().
1244 *
1245 * A @last_vcn of -1 means end of runlist and in that case the mapping pairs
1246 * array corresponding to the runlist starting at vcn @first_vcn and finishing
1247 * at the end of the runlist is created.
1159 * 1248 *
1160 * If @rl is NULL, just write a single terminator byte to @dst. 1249 * If @rl is NULL, just write a single terminator byte to @dst.
1161 * 1250 *
@@ -1164,7 +1253,7 @@ err_out:
1164 * been filled with all the mapping pairs that will fit, thus it can be treated 1253 * been filled with all the mapping pairs that will fit, thus it can be treated
1165 * as partial success, in that a new attribute extent needs to be created or 1254 * as partial success, in that a new attribute extent needs to be created or
1166 * the next extent has to be used and the mapping pairs build has to be 1255 * the next extent has to be used and the mapping pairs build has to be
1167 * continued with @start_vcn set to *@stop_vcn. 1256 * continued with @first_vcn set to *@stop_vcn.
1168 * 1257 *
1169 * Return 0 on success and -errno on error. The following error codes are 1258 * Return 0 on success and -errno on error. The following error codes are
1170 * defined: 1259 * defined:
@@ -1178,27 +1267,32 @@ err_out:
1178 */ 1267 */
1179int ntfs_mapping_pairs_build(const ntfs_volume *vol, s8 *dst, 1268int ntfs_mapping_pairs_build(const ntfs_volume *vol, s8 *dst,
1180 const int dst_len, const runlist_element *rl, 1269 const int dst_len, const runlist_element *rl,
1181 const VCN start_vcn, VCN *const stop_vcn) 1270 const VCN first_vcn, const VCN last_vcn, VCN *const stop_vcn)
1182{ 1271{
1183 LCN prev_lcn; 1272 LCN prev_lcn;
1184 s8 *dst_max, *dst_next; 1273 s8 *dst_max, *dst_next;
1185 int err = -ENOSPC; 1274 int err = -ENOSPC;
1275 BOOL the_end = FALSE;
1186 s8 len_len, lcn_len; 1276 s8 len_len, lcn_len;
1187 1277
1188 BUG_ON(start_vcn < 0); 1278 BUG_ON(first_vcn < 0);
1279 BUG_ON(last_vcn < -1);
1280 BUG_ON(last_vcn >= 0 && first_vcn > last_vcn);
1189 BUG_ON(dst_len < 1); 1281 BUG_ON(dst_len < 1);
1190 if (!rl) { 1282 if (!rl) {
1191 BUG_ON(start_vcn); 1283 BUG_ON(first_vcn);
1284 BUG_ON(last_vcn > 0);
1192 if (stop_vcn) 1285 if (stop_vcn)
1193 *stop_vcn = 0; 1286 *stop_vcn = 0;
1194 /* Terminator byte. */ 1287 /* Terminator byte. */
1195 *dst = 0; 1288 *dst = 0;
1196 return 0; 1289 return 0;
1197 } 1290 }
1198 /* Skip to runlist element containing @start_vcn. */ 1291 /* Skip to runlist element containing @first_vcn. */
1199 while (rl->length && start_vcn >= rl[1].vcn) 1292 while (rl->length && first_vcn >= rl[1].vcn)
1200 rl++; 1293 rl++;
1201 if ((!rl->length && start_vcn > rl->vcn) || start_vcn < rl->vcn) 1294 if (unlikely((!rl->length && first_vcn > rl->vcn) ||
1295 first_vcn < rl->vcn))
1202 return -EINVAL; 1296 return -EINVAL;
1203 /* 1297 /*
1204 * @dst_max is used for bounds checking in 1298 * @dst_max is used for bounds checking in
@@ -1207,17 +1301,27 @@ int ntfs_mapping_pairs_build(const ntfs_volume *vol, s8 *dst,
1207 dst_max = dst + dst_len - 1; 1301 dst_max = dst + dst_len - 1;
1208 prev_lcn = 0; 1302 prev_lcn = 0;
1209 /* Do the first partial run if present. */ 1303 /* Do the first partial run if present. */
1210 if (start_vcn > rl->vcn) { 1304 if (first_vcn > rl->vcn) {
1211 s64 delta; 1305 s64 delta, length = rl->length;
1212 1306
1213 /* We know rl->length != 0 already. */ 1307 /* We know rl->length != 0 already. */
1214 if (rl->length < 0 || rl->lcn < LCN_HOLE) 1308 if (unlikely(length < 0 || rl->lcn < LCN_HOLE))
1215 goto err_out; 1309 goto err_out;
1216 delta = start_vcn - rl->vcn; 1310 /*
1311 * If @stop_vcn is given and finishes inside this run, cap the
1312 * run length.
1313 */
1314 if (unlikely(last_vcn >= 0 && rl[1].vcn > last_vcn)) {
1315 s64 s1 = last_vcn + 1;
1316 if (unlikely(rl[1].vcn > s1))
1317 length = s1 - rl->vcn;
1318 the_end = TRUE;
1319 }
1320 delta = first_vcn - rl->vcn;
1217 /* Write length. */ 1321 /* Write length. */
1218 len_len = ntfs_write_significant_bytes(dst + 1, dst_max, 1322 len_len = ntfs_write_significant_bytes(dst + 1, dst_max,
1219 rl->length - delta); 1323 length - delta);
1220 if (len_len < 0) 1324 if (unlikely(len_len < 0))
1221 goto size_err; 1325 goto size_err;
1222 /* 1326 /*
1223 * If the logical cluster number (lcn) denotes a hole and we 1327 * If the logical cluster number (lcn) denotes a hole and we
@@ -1228,19 +1332,19 @@ int ntfs_mapping_pairs_build(const ntfs_volume *vol, s8 *dst,
1228 * case on NT4. - We assume that we just need to write the lcn 1332 * case on NT4. - We assume that we just need to write the lcn
1229 * change until someone tells us otherwise... (AIA) 1333 * change until someone tells us otherwise... (AIA)
1230 */ 1334 */
1231 if (rl->lcn >= 0 || vol->major_ver < 3) { 1335 if (likely(rl->lcn >= 0 || vol->major_ver < 3)) {
1232 prev_lcn = rl->lcn; 1336 prev_lcn = rl->lcn;
1233 if (rl->lcn >= 0) 1337 if (likely(rl->lcn >= 0))
1234 prev_lcn += delta; 1338 prev_lcn += delta;
1235 /* Write change in lcn. */ 1339 /* Write change in lcn. */
1236 lcn_len = ntfs_write_significant_bytes(dst + 1 + 1340 lcn_len = ntfs_write_significant_bytes(dst + 1 +
1237 len_len, dst_max, prev_lcn); 1341 len_len, dst_max, prev_lcn);
1238 if (lcn_len < 0) 1342 if (unlikely(lcn_len < 0))
1239 goto size_err; 1343 goto size_err;
1240 } else 1344 } else
1241 lcn_len = 0; 1345 lcn_len = 0;
1242 dst_next = dst + len_len + lcn_len + 1; 1346 dst_next = dst + len_len + lcn_len + 1;
1243 if (dst_next > dst_max) 1347 if (unlikely(dst_next > dst_max))
1244 goto size_err; 1348 goto size_err;
1245 /* Update header byte. */ 1349 /* Update header byte. */
1246 *dst = lcn_len << 4 | len_len; 1350 *dst = lcn_len << 4 | len_len;
@@ -1250,13 +1354,25 @@ int ntfs_mapping_pairs_build(const ntfs_volume *vol, s8 *dst,
1250 rl++; 1354 rl++;
1251 } 1355 }
1252 /* Do the full runs. */ 1356 /* Do the full runs. */
1253 for (; rl->length; rl++) { 1357 for (; rl->length && !the_end; rl++) {
1254 if (rl->length < 0 || rl->lcn < LCN_HOLE) 1358 s64 length = rl->length;
1359
1360 if (unlikely(length < 0 || rl->lcn < LCN_HOLE))
1255 goto err_out; 1361 goto err_out;
1362 /*
1363 * If @stop_vcn is given and finishes inside this run, cap the
1364 * run length.
1365 */
1366 if (unlikely(last_vcn >= 0 && rl[1].vcn > last_vcn)) {
1367 s64 s1 = last_vcn + 1;
1368 if (unlikely(rl[1].vcn > s1))
1369 length = s1 - rl->vcn;
1370 the_end = TRUE;
1371 }
1256 /* Write length. */ 1372 /* Write length. */
1257 len_len = ntfs_write_significant_bytes(dst + 1, dst_max, 1373 len_len = ntfs_write_significant_bytes(dst + 1, dst_max,
1258 rl->length); 1374 length);
1259 if (len_len < 0) 1375 if (unlikely(len_len < 0))
1260 goto size_err; 1376 goto size_err;
1261 /* 1377 /*
1262 * If the logical cluster number (lcn) denotes a hole and we 1378 * If the logical cluster number (lcn) denotes a hole and we
@@ -1267,17 +1383,17 @@ int ntfs_mapping_pairs_build(const ntfs_volume *vol, s8 *dst,
1267 * case on NT4. - We assume that we just need to write the lcn 1383 * case on NT4. - We assume that we just need to write the lcn
1268 * change until someone tells us otherwise... (AIA) 1384 * change until someone tells us otherwise... (AIA)
1269 */ 1385 */
1270 if (rl->lcn >= 0 || vol->major_ver < 3) { 1386 if (likely(rl->lcn >= 0 || vol->major_ver < 3)) {
1271 /* Write change in lcn. */ 1387 /* Write change in lcn. */
1272 lcn_len = ntfs_write_significant_bytes(dst + 1 + 1388 lcn_len = ntfs_write_significant_bytes(dst + 1 +
1273 len_len, dst_max, rl->lcn - prev_lcn); 1389 len_len, dst_max, rl->lcn - prev_lcn);
1274 if (lcn_len < 0) 1390 if (unlikely(lcn_len < 0))
1275 goto size_err; 1391 goto size_err;
1276 prev_lcn = rl->lcn; 1392 prev_lcn = rl->lcn;
1277 } else 1393 } else
1278 lcn_len = 0; 1394 lcn_len = 0;
1279 dst_next = dst + len_len + lcn_len + 1; 1395 dst_next = dst + len_len + lcn_len + 1;
1280 if (dst_next > dst_max) 1396 if (unlikely(dst_next > dst_max))
1281 goto size_err; 1397 goto size_err;
1282 /* Update header byte. */ 1398 /* Update header byte. */
1283 *dst = lcn_len << 4 | len_len; 1399 *dst = lcn_len << 4 | len_len;
@@ -1436,3 +1552,5 @@ int ntfs_rl_truncate_nolock(const ntfs_volume *vol, runlist *const runlist,
1436 ntfs_debug("Done."); 1552 ntfs_debug("Done.");
1437 return 0; 1553 return 0;
1438} 1554}
1555
1556#endif /* NTFS_RW */
diff --git a/fs/ntfs/runlist.h b/fs/ntfs/runlist.h
index 7107fde59df9..aa0ee6540e7c 100644
--- a/fs/ntfs/runlist.h
+++ b/fs/ntfs/runlist.h
@@ -2,7 +2,7 @@
2 * runlist.h - Defines for runlist handling in NTFS Linux kernel driver. 2 * runlist.h - Defines for runlist handling in NTFS Linux kernel driver.
3 * Part of the Linux-NTFS project. 3 * Part of the Linux-NTFS project.
4 * 4 *
5 * Copyright (c) 2001-2004 Anton Altaparmakov 5 * Copyright (c) 2001-2005 Anton Altaparmakov
6 * Copyright (c) 2002 Richard Russon 6 * Copyright (c) 2002 Richard Russon
7 * 7 *
8 * This program/include file is free software; you can redistribute it and/or 8 * This program/include file is free software; you can redistribute it and/or
@@ -66,6 +66,8 @@ typedef enum {
66 LCN_HOLE = -1, /* Keep this as highest value or die! */ 66 LCN_HOLE = -1, /* Keep this as highest value or die! */
67 LCN_RL_NOT_MAPPED = -2, 67 LCN_RL_NOT_MAPPED = -2,
68 LCN_ENOENT = -3, 68 LCN_ENOENT = -3,
69 LCN_ENOMEM = -4,
70 LCN_EIO = -5,
69} LCN_SPECIAL_VALUES; 71} LCN_SPECIAL_VALUES;
70 72
71extern runlist_element *ntfs_runlists_merge(runlist_element *drl, 73extern runlist_element *ntfs_runlists_merge(runlist_element *drl,
@@ -76,14 +78,22 @@ extern runlist_element *ntfs_mapping_pairs_decompress(const ntfs_volume *vol,
76 78
77extern LCN ntfs_rl_vcn_to_lcn(const runlist_element *rl, const VCN vcn); 79extern LCN ntfs_rl_vcn_to_lcn(const runlist_element *rl, const VCN vcn);
78 80
81#ifdef NTFS_RW
82
83extern runlist_element *ntfs_rl_find_vcn_nolock(runlist_element *rl,
84 const VCN vcn);
85
79extern int ntfs_get_size_for_mapping_pairs(const ntfs_volume *vol, 86extern int ntfs_get_size_for_mapping_pairs(const ntfs_volume *vol,
80 const runlist_element *rl, const VCN start_vcn); 87 const runlist_element *rl, const VCN first_vcn,
88 const VCN last_vcn);
81 89
82extern int ntfs_mapping_pairs_build(const ntfs_volume *vol, s8 *dst, 90extern int ntfs_mapping_pairs_build(const ntfs_volume *vol, s8 *dst,
83 const int dst_len, const runlist_element *rl, 91 const int dst_len, const runlist_element *rl,
84 const VCN start_vcn, VCN *const stop_vcn); 92 const VCN first_vcn, const VCN last_vcn, VCN *const stop_vcn);
85 93
86extern int ntfs_rl_truncate_nolock(const ntfs_volume *vol, 94extern int ntfs_rl_truncate_nolock(const ntfs_volume *vol,
87 runlist *const runlist, const s64 new_length); 95 runlist *const runlist, const s64 new_length);
88 96
97#endif /* NTFS_RW */
98
89#endif /* _LINUX_NTFS_RUNLIST_H */ 99#endif /* _LINUX_NTFS_RUNLIST_H */
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 212a3d0f2073..41aa8eb6755b 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * super.c - NTFS kernel super block handling. Part of the Linux-NTFS project. 2 * super.c - NTFS kernel super block handling. Part of the Linux-NTFS project.
3 * 3 *
4 * Copyright (c) 2001-2004 Anton Altaparmakov 4 * Copyright (c) 2001-2005 Anton Altaparmakov
5 * Copyright (c) 2001,2002 Richard Russon 5 * Copyright (c) 2001,2002 Richard Russon
6 * 6 *
7 * This program/include file is free software; you can redistribute it and/or 7 * This program/include file is free software; you can redistribute it and/or
@@ -34,14 +34,16 @@
34#include "sysctl.h" 34#include "sysctl.h"
35#include "logfile.h" 35#include "logfile.h"
36#include "quota.h" 36#include "quota.h"
37#include "usnjrnl.h"
37#include "dir.h" 38#include "dir.h"
38#include "debug.h" 39#include "debug.h"
39#include "index.h" 40#include "index.h"
40#include "aops.h" 41#include "aops.h"
42#include "layout.h"
41#include "malloc.h" 43#include "malloc.h"
42#include "ntfs.h" 44#include "ntfs.h"
43 45
44/* Number of mounted file systems which have compression enabled. */ 46/* Number of mounted filesystems which have compression enabled. */
45static unsigned long ntfs_nr_compression_users; 47static unsigned long ntfs_nr_compression_users;
46 48
47/* A global default upcase table and a corresponding reference count. */ 49/* A global default upcase table and a corresponding reference count. */
@@ -102,7 +104,7 @@ static BOOL parse_options(ntfs_volume *vol, char *opt)
102 gid_t gid = (gid_t)-1; 104 gid_t gid = (gid_t)-1;
103 mode_t fmask = (mode_t)-1, dmask = (mode_t)-1; 105 mode_t fmask = (mode_t)-1, dmask = (mode_t)-1;
104 int mft_zone_multiplier = -1, on_errors = -1; 106 int mft_zone_multiplier = -1, on_errors = -1;
105 int show_sys_files = -1, case_sensitive = -1; 107 int show_sys_files = -1, case_sensitive = -1, disable_sparse = -1;
106 struct nls_table *nls_map = NULL, *old_nls; 108 struct nls_table *nls_map = NULL, *old_nls;
107 109
108 /* I am lazy... (-8 */ 110 /* I am lazy... (-8 */
@@ -162,6 +164,7 @@ static BOOL parse_options(ntfs_volume *vol, char *opt)
162 else NTFS_GETOPT_WITH_DEFAULT("sloppy", sloppy, TRUE) 164 else NTFS_GETOPT_WITH_DEFAULT("sloppy", sloppy, TRUE)
163 else NTFS_GETOPT_BOOL("show_sys_files", show_sys_files) 165 else NTFS_GETOPT_BOOL("show_sys_files", show_sys_files)
164 else NTFS_GETOPT_BOOL("case_sensitive", case_sensitive) 166 else NTFS_GETOPT_BOOL("case_sensitive", case_sensitive)
167 else NTFS_GETOPT_BOOL("disable_sparse", disable_sparse)
165 else NTFS_GETOPT_OPTIONS_ARRAY("errors", on_errors, 168 else NTFS_GETOPT_OPTIONS_ARRAY("errors", on_errors,
166 on_errors_arr) 169 on_errors_arr)
167 else if (!strcmp(p, "posix") || !strcmp(p, "show_inodes")) 170 else if (!strcmp(p, "posix") || !strcmp(p, "show_inodes"))
@@ -291,6 +294,21 @@ no_mount_options:
291 else 294 else
292 NVolClearCaseSensitive(vol); 295 NVolClearCaseSensitive(vol);
293 } 296 }
297 if (disable_sparse != -1) {
298 if (disable_sparse)
299 NVolClearSparseEnabled(vol);
300 else {
301 if (!NVolSparseEnabled(vol) &&
302 vol->major_ver && vol->major_ver < 3)
303 ntfs_warning(vol->sb, "Not enabling sparse "
304 "support due to NTFS volume "
305 "version %i.%i (need at least "
306 "version 3.0).", vol->major_ver,
307 vol->minor_ver);
308 else
309 NVolSetSparseEnabled(vol);
310 }
311 }
294 return TRUE; 312 return TRUE;
295needs_arg: 313needs_arg:
296 ntfs_error(vol->sb, "The %s option requires an argument.", p); 314 ntfs_error(vol->sb, "The %s option requires an argument.", p);
@@ -480,6 +498,12 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt)
480 NVolSetErrors(vol); 498 NVolSetErrors(vol);
481 return -EROFS; 499 return -EROFS;
482 } 500 }
501 if (!ntfs_stamp_usnjrnl(vol)) {
502 ntfs_error(sb, "Failed to stamp transation log "
503 "($UsnJrnl)%s", es);
504 NVolSetErrors(vol);
505 return -EROFS;
506 }
483 } else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) { 507 } else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) {
484 /* Remounting read-only. */ 508 /* Remounting read-only. */
485 if (!NVolErrors(vol)) { 509 if (!NVolErrors(vol)) {
@@ -516,16 +540,19 @@ static BOOL is_boot_sector_ntfs(const struct super_block *sb,
516{ 540{
517 /* 541 /*
518 * Check that checksum == sum of u32 values from b to the checksum 542 * Check that checksum == sum of u32 values from b to the checksum
519 * field. If checksum is zero, no checking is done. 543 * field. If checksum is zero, no checking is done. We will work when
544 * the checksum test fails, since some utilities update the boot sector
545 * ignoring the checksum which leaves the checksum out-of-date. We
546 * report a warning if this is the case.
520 */ 547 */
521 if ((void*)b < (void*)&b->checksum && b->checksum) { 548 if ((void*)b < (void*)&b->checksum && b->checksum && !silent) {
522 le32 *u; 549 le32 *u;
523 u32 i; 550 u32 i;
524 551
525 for (i = 0, u = (le32*)b; u < (le32*)(&b->checksum); ++u) 552 for (i = 0, u = (le32*)b; u < (le32*)(&b->checksum); ++u)
526 i += le32_to_cpup(u); 553 i += le32_to_cpup(u);
527 if (le32_to_cpu(b->checksum) != i) 554 if (le32_to_cpu(b->checksum) != i)
528 goto not_ntfs; 555 ntfs_warning(sb, "Invalid boot sector checksum.");
529 } 556 }
530 /* Check OEMidentifier is "NTFS " */ 557 /* Check OEMidentifier is "NTFS " */
531 if (b->oem_id != magicNTFS) 558 if (b->oem_id != magicNTFS)
@@ -541,9 +568,9 @@ static BOOL is_boot_sector_ntfs(const struct super_block *sb,
541 default: 568 default:
542 goto not_ntfs; 569 goto not_ntfs;
543 } 570 }
544 /* Check the cluster size is not above 65536 bytes. */ 571 /* Check the cluster size is not above the maximum (64kiB). */
545 if ((u32)le16_to_cpu(b->bpb.bytes_per_sector) * 572 if ((u32)le16_to_cpu(b->bpb.bytes_per_sector) *
546 b->bpb.sectors_per_cluster > 0x10000) 573 b->bpb.sectors_per_cluster > NTFS_MAX_CLUSTER_SIZE)
547 goto not_ntfs; 574 goto not_ntfs;
548 /* Check reserved/unused fields are really zero. */ 575 /* Check reserved/unused fields are really zero. */
549 if (le16_to_cpu(b->bpb.reserved_sectors) || 576 if (le16_to_cpu(b->bpb.reserved_sectors) ||
@@ -575,7 +602,7 @@ static BOOL is_boot_sector_ntfs(const struct super_block *sb,
575 * many BIOSes will refuse to boot from a bootsector if the magic is 602 * many BIOSes will refuse to boot from a bootsector if the magic is
576 * incorrect, so we emit a warning. 603 * incorrect, so we emit a warning.
577 */ 604 */
578 if (!silent && b->end_of_sector_marker != cpu_to_le16(0xaa55)) 605 if (!silent && b->end_of_sector_marker != const_cpu_to_le16(0xaa55))
579 ntfs_warning(sb, "Invalid end of sector marker."); 606 ntfs_warning(sb, "Invalid end of sector marker.");
580 return TRUE; 607 return TRUE;
581not_ntfs: 608not_ntfs:
@@ -967,6 +994,7 @@ static BOOL load_and_init_mft_mirror(ntfs_volume *vol)
967 tmp_ni = NTFS_I(tmp_ino); 994 tmp_ni = NTFS_I(tmp_ino);
968 /* The $MFTMirr, like the $MFT is multi sector transfer protected. */ 995 /* The $MFTMirr, like the $MFT is multi sector transfer protected. */
969 NInoSetMstProtected(tmp_ni); 996 NInoSetMstProtected(tmp_ni);
997 NInoSetSparseDisabled(tmp_ni);
970 /* 998 /*
971 * Set up our little cheat allowing us to reuse the async read io 999 * Set up our little cheat allowing us to reuse the async read io
972 * completion handler for directories. 1000 * completion handler for directories.
@@ -990,12 +1018,12 @@ static BOOL load_and_init_mft_mirror(ntfs_volume *vol)
990 */ 1018 */
991static BOOL check_mft_mirror(ntfs_volume *vol) 1019static BOOL check_mft_mirror(ntfs_volume *vol)
992{ 1020{
993 unsigned long index;
994 struct super_block *sb = vol->sb; 1021 struct super_block *sb = vol->sb;
995 ntfs_inode *mirr_ni; 1022 ntfs_inode *mirr_ni;
996 struct page *mft_page, *mirr_page; 1023 struct page *mft_page, *mirr_page;
997 u8 *kmft, *kmirr; 1024 u8 *kmft, *kmirr;
998 runlist_element *rl, rl2[2]; 1025 runlist_element *rl, rl2[2];
1026 pgoff_t index;
999 int mrecs_per_page, i; 1027 int mrecs_per_page, i;
1000 1028
1001 ntfs_debug("Entering."); 1029 ntfs_debug("Entering.");
@@ -1122,11 +1150,130 @@ static BOOL load_and_check_logfile(ntfs_volume *vol)
1122 /* ntfs_check_logfile() will have displayed error output. */ 1150 /* ntfs_check_logfile() will have displayed error output. */
1123 return FALSE; 1151 return FALSE;
1124 } 1152 }
1153 NInoSetSparseDisabled(NTFS_I(tmp_ino));
1125 vol->logfile_ino = tmp_ino; 1154 vol->logfile_ino = tmp_ino;
1126 ntfs_debug("Done."); 1155 ntfs_debug("Done.");
1127 return TRUE; 1156 return TRUE;
1128} 1157}
1129 1158
1159#define NTFS_HIBERFIL_HEADER_SIZE 4096
1160
1161/**
1162 * check_windows_hibernation_status - check if Windows is suspended on a volume
1163 * @vol: ntfs super block of device to check
1164 *
1165 * Check if Windows is hibernated on the ntfs volume @vol. This is done by
1166 * looking for the file hiberfil.sys in the root directory of the volume. If
1167 * the file is not present Windows is definitely not suspended.
1168 *
1169 * If hiberfil.sys exists and is less than 4kiB in size it means Windows is
1170 * definitely suspended (this volume is not the system volume). Caveat: on a
1171 * system with many volumes it is possible that the < 4kiB check is bogus but
1172 * for now this should do fine.
1173 *
1174 * If hiberfil.sys exists and is larger than 4kiB in size, we need to read the
1175 * hiberfil header (which is the first 4kiB). If this begins with "hibr",
1176 * Windows is definitely suspended. If it is completely full of zeroes,
1177 * Windows is definitely not hibernated. Any other case is treated as if
1178 * Windows is suspended. This caters for the above mentioned caveat of a
1179 * system with many volumes where no "hibr" magic would be present and there is
1180 * no zero header.
1181 *
1182 * Return 0 if Windows is not hibernated on the volume, >0 if Windows is
1183 * hibernated on the volume, and -errno on error.
1184 */
1185static int check_windows_hibernation_status(ntfs_volume *vol)
1186{
1187 MFT_REF mref;
1188 struct inode *vi;
1189 ntfs_inode *ni;
1190 struct page *page;
1191 u32 *kaddr, *kend;
1192 ntfs_name *name = NULL;
1193 int ret = 1;
1194 static const ntfschar hiberfil[13] = { const_cpu_to_le16('h'),
1195 const_cpu_to_le16('i'), const_cpu_to_le16('b'),
1196 const_cpu_to_le16('e'), const_cpu_to_le16('r'),
1197 const_cpu_to_le16('f'), const_cpu_to_le16('i'),
1198 const_cpu_to_le16('l'), const_cpu_to_le16('.'),
1199 const_cpu_to_le16('s'), const_cpu_to_le16('y'),
1200 const_cpu_to_le16('s'), 0 };
1201
1202 ntfs_debug("Entering.");
1203 /*
1204 * Find the inode number for the hibernation file by looking up the
1205 * filename hiberfil.sys in the root directory.
1206 */
1207 down(&vol->root_ino->i_sem);
1208 mref = ntfs_lookup_inode_by_name(NTFS_I(vol->root_ino), hiberfil, 12,
1209 &name);
1210 up(&vol->root_ino->i_sem);
1211 if (IS_ERR_MREF(mref)) {
1212 ret = MREF_ERR(mref);
1213 /* If the file does not exist, Windows is not hibernated. */
1214 if (ret == -ENOENT) {
1215 ntfs_debug("hiberfil.sys not present. Windows is not "
1216 "hibernated on the volume.");
1217 return 0;
1218 }
1219 /* A real error occured. */
1220 ntfs_error(vol->sb, "Failed to find inode number for "
1221 "hiberfil.sys.");
1222 return ret;
1223 }
1224 /* We do not care for the type of match that was found. */
1225 kfree(name);
1226 /* Get the inode. */
1227 vi = ntfs_iget(vol->sb, MREF(mref));
1228 if (IS_ERR(vi) || is_bad_inode(vi)) {
1229 if (!IS_ERR(vi))
1230 iput(vi);
1231 ntfs_error(vol->sb, "Failed to load hiberfil.sys.");
1232 return IS_ERR(vi) ? PTR_ERR(vi) : -EIO;
1233 }
1234 if (unlikely(i_size_read(vi) < NTFS_HIBERFIL_HEADER_SIZE)) {
1235 ntfs_debug("hiberfil.sys is smaller than 4kiB (0x%llx). "
1236 "Windows is hibernated on the volume. This "
1237 "is not the system volume.", i_size_read(vi));
1238 goto iput_out;
1239 }
1240 ni = NTFS_I(vi);
1241 page = ntfs_map_page(vi->i_mapping, 0);
1242 if (IS_ERR(page)) {
1243 ntfs_error(vol->sb, "Failed to read from hiberfil.sys.");
1244 ret = PTR_ERR(page);
1245 goto iput_out;
1246 }
1247 kaddr = (u32*)page_address(page);
1248 if (*(le32*)kaddr == const_cpu_to_le32(0x72626968)/*'hibr'*/) {
1249 ntfs_debug("Magic \"hibr\" found in hiberfil.sys. Windows is "
1250 "hibernated on the volume. This is the "
1251 "system volume.");
1252 goto unm_iput_out;
1253 }
1254 kend = kaddr + NTFS_HIBERFIL_HEADER_SIZE/sizeof(*kaddr);
1255 do {
1256 if (unlikely(*kaddr)) {
1257 ntfs_debug("hiberfil.sys is larger than 4kiB "
1258 "(0x%llx), does not contain the "
1259 "\"hibr\" magic, and does not have a "
1260 "zero header. Windows is hibernated "
1261 "on the volume. This is not the "
1262 "system volume.", i_size_read(vi));
1263 goto unm_iput_out;
1264 }
1265 } while (++kaddr < kend);
1266 ntfs_debug("hiberfil.sys contains a zero header. Windows is not "
1267 "hibernated on the volume. This is the system "
1268 "volume.");
1269 ret = 0;
1270unm_iput_out:
1271 ntfs_unmap_page(page);
1272iput_out:
1273 iput(vi);
1274 return ret;
1275}
1276
1130/** 1277/**
1131 * load_and_init_quota - load and setup the quota file for a volume if present 1278 * load_and_init_quota - load and setup the quota file for a volume if present
1132 * @vol: ntfs super block describing device whose quota file to load 1279 * @vol: ntfs super block describing device whose quota file to load
@@ -1175,8 +1322,7 @@ static BOOL load_and_init_quota(ntfs_volume *vol)
1175 return FALSE; 1322 return FALSE;
1176 } 1323 }
1177 /* We do not care for the type of match that was found. */ 1324 /* We do not care for the type of match that was found. */
1178 if (name) 1325 kfree(name);
1179 kfree(name);
1180 /* Get the inode. */ 1326 /* Get the inode. */
1181 tmp_ino = ntfs_iget(vol->sb, MREF(mref)); 1327 tmp_ino = ntfs_iget(vol->sb, MREF(mref));
1182 if (IS_ERR(tmp_ino) || is_bad_inode(tmp_ino)) { 1328 if (IS_ERR(tmp_ino) || is_bad_inode(tmp_ino)) {
@@ -1198,6 +1344,167 @@ static BOOL load_and_init_quota(ntfs_volume *vol)
1198} 1344}
1199 1345
1200/** 1346/**
1347 * load_and_init_usnjrnl - load and setup the transaction log if present
1348 * @vol: ntfs super block describing device whose usnjrnl file to load
1349 *
1350 * Return TRUE on success or FALSE on error.
1351 *
1352 * If $UsnJrnl is not present or in the process of being disabled, we set
1353 * NVolUsnJrnlStamped() and return success.
1354 *
1355 * If the $UsnJrnl $DATA/$J attribute has a size equal to the lowest valid usn,
1356 * i.e. transaction logging has only just been enabled or the journal has been
1357 * stamped and nothing has been logged since, we also set NVolUsnJrnlStamped()
1358 * and return success.
1359 */
1360static BOOL load_and_init_usnjrnl(ntfs_volume *vol)
1361{
1362 MFT_REF mref;
1363 struct inode *tmp_ino;
1364 ntfs_inode *tmp_ni;
1365 struct page *page;
1366 ntfs_name *name = NULL;
1367 USN_HEADER *uh;
1368 static const ntfschar UsnJrnl[9] = { const_cpu_to_le16('$'),
1369 const_cpu_to_le16('U'), const_cpu_to_le16('s'),
1370 const_cpu_to_le16('n'), const_cpu_to_le16('J'),
1371 const_cpu_to_le16('r'), const_cpu_to_le16('n'),
1372 const_cpu_to_le16('l'), 0 };
1373 static ntfschar Max[5] = { const_cpu_to_le16('$'),
1374 const_cpu_to_le16('M'), const_cpu_to_le16('a'),
1375 const_cpu_to_le16('x'), 0 };
1376 static ntfschar J[3] = { const_cpu_to_le16('$'),
1377 const_cpu_to_le16('J'), 0 };
1378
1379 ntfs_debug("Entering.");
1380 /*
1381 * Find the inode number for the transaction log file by looking up the
1382 * filename $UsnJrnl in the extended system files directory $Extend.
1383 */
1384 down(&vol->extend_ino->i_sem);
1385 mref = ntfs_lookup_inode_by_name(NTFS_I(vol->extend_ino), UsnJrnl, 8,
1386 &name);
1387 up(&vol->extend_ino->i_sem);
1388 if (IS_ERR_MREF(mref)) {
1389 /*
1390 * If the file does not exist, transaction logging is disabled,
1391 * just return success.
1392 */
1393 if (MREF_ERR(mref) == -ENOENT) {
1394 ntfs_debug("$UsnJrnl not present. Volume does not "
1395 "have transaction logging enabled.");
1396not_enabled:
1397 /*
1398 * No need to try to stamp the transaction log if
1399 * transaction logging is not enabled.
1400 */
1401 NVolSetUsnJrnlStamped(vol);
1402 return TRUE;
1403 }
1404 /* A real error occured. */
1405 ntfs_error(vol->sb, "Failed to find inode number for "
1406 "$UsnJrnl.");
1407 return FALSE;
1408 }
1409 /* We do not care for the type of match that was found. */
1410 kfree(name);
1411 /* Get the inode. */
1412 tmp_ino = ntfs_iget(vol->sb, MREF(mref));
1413 if (unlikely(IS_ERR(tmp_ino) || is_bad_inode(tmp_ino))) {
1414 if (!IS_ERR(tmp_ino))
1415 iput(tmp_ino);
1416 ntfs_error(vol->sb, "Failed to load $UsnJrnl.");
1417 return FALSE;
1418 }
1419 vol->usnjrnl_ino = tmp_ino;
1420 /*
1421 * If the transaction log is in the process of being deleted, we can
1422 * ignore it.
1423 */
1424 if (unlikely(vol->vol_flags & VOLUME_DELETE_USN_UNDERWAY)) {
1425 ntfs_debug("$UsnJrnl in the process of being disabled. "
1426 "Volume does not have transaction logging "
1427 "enabled.");
1428 goto not_enabled;
1429 }
1430 /* Get the $DATA/$Max attribute. */
1431 tmp_ino = ntfs_attr_iget(vol->usnjrnl_ino, AT_DATA, Max, 4);
1432 if (IS_ERR(tmp_ino)) {
1433 ntfs_error(vol->sb, "Failed to load $UsnJrnl/$DATA/$Max "
1434 "attribute.");
1435 return FALSE;
1436 }
1437 vol->usnjrnl_max_ino = tmp_ino;
1438 if (unlikely(i_size_read(tmp_ino) < sizeof(USN_HEADER))) {
1439 ntfs_error(vol->sb, "Found corrupt $UsnJrnl/$DATA/$Max "
1440 "attribute (size is 0x%llx but should be at "
1441 "least 0x%x bytes).", i_size_read(tmp_ino),
1442 sizeof(USN_HEADER));
1443 return FALSE;
1444 }
1445 /* Get the $DATA/$J attribute. */
1446 tmp_ino = ntfs_attr_iget(vol->usnjrnl_ino, AT_DATA, J, 2);
1447 if (IS_ERR(tmp_ino)) {
1448 ntfs_error(vol->sb, "Failed to load $UsnJrnl/$DATA/$J "
1449 "attribute.");
1450 return FALSE;
1451 }
1452 vol->usnjrnl_j_ino = tmp_ino;
1453 /* Verify $J is non-resident and sparse. */
1454 tmp_ni = NTFS_I(vol->usnjrnl_j_ino);
1455 if (unlikely(!NInoNonResident(tmp_ni) || !NInoSparse(tmp_ni))) {
1456 ntfs_error(vol->sb, "$UsnJrnl/$DATA/$J attribute is resident "
1457 "and/or not sparse.");
1458 return FALSE;
1459 }
1460 /* Read the USN_HEADER from $DATA/$Max. */
1461 page = ntfs_map_page(vol->usnjrnl_max_ino->i_mapping, 0);
1462 if (IS_ERR(page)) {
1463 ntfs_error(vol->sb, "Failed to read from $UsnJrnl/$DATA/$Max "
1464 "attribute.");
1465 return FALSE;
1466 }
1467 uh = (USN_HEADER*)page_address(page);
1468 /* Sanity check the $Max. */
1469 if (unlikely(sle64_to_cpu(uh->allocation_delta) >
1470 sle64_to_cpu(uh->maximum_size))) {
1471 ntfs_error(vol->sb, "Allocation delta (0x%llx) exceeds "
1472 "maximum size (0x%llx). $UsnJrnl is corrupt.",
1473 (long long)sle64_to_cpu(uh->allocation_delta),
1474 (long long)sle64_to_cpu(uh->maximum_size));
1475 ntfs_unmap_page(page);
1476 return FALSE;
1477 }
1478 /*
1479 * If the transaction log has been stamped and nothing has been written
1480 * to it since, we do not need to stamp it.
1481 */
1482 if (unlikely(sle64_to_cpu(uh->lowest_valid_usn) >=
1483 i_size_read(vol->usnjrnl_j_ino))) {
1484 if (likely(sle64_to_cpu(uh->lowest_valid_usn) ==
1485 i_size_read(vol->usnjrnl_j_ino))) {
1486 ntfs_unmap_page(page);
1487 ntfs_debug("$UsnJrnl is enabled but nothing has been "
1488 "logged since it was last stamped. "
1489 "Treating this as if the volume does "
1490 "not have transaction logging "
1491 "enabled.");
1492 goto not_enabled;
1493 }
1494 ntfs_error(vol->sb, "$UsnJrnl has lowest valid usn (0x%llx) "
1495 "which is out of bounds (0x%llx). $UsnJrnl "
1496 "is corrupt.",
1497 (long long)sle64_to_cpu(uh->lowest_valid_usn),
1498 i_size_read(vol->usnjrnl_j_ino));
1499 ntfs_unmap_page(page);
1500 return FALSE;
1501 }
1502 ntfs_unmap_page(page);
1503 ntfs_debug("Done.");
1504 return TRUE;
1505}
1506
1507/**
1201 * load_and_init_attrdef - load the attribute definitions table for a volume 1508 * load_and_init_attrdef - load the attribute definitions table for a volume
1202 * @vol: ntfs super block describing device whose attrdef to load 1509 * @vol: ntfs super block describing device whose attrdef to load
1203 * 1510 *
@@ -1205,10 +1512,11 @@ static BOOL load_and_init_quota(ntfs_volume *vol)
1205 */ 1512 */
1206static BOOL load_and_init_attrdef(ntfs_volume *vol) 1513static BOOL load_and_init_attrdef(ntfs_volume *vol)
1207{ 1514{
1515 loff_t i_size;
1208 struct super_block *sb = vol->sb; 1516 struct super_block *sb = vol->sb;
1209 struct inode *ino; 1517 struct inode *ino;
1210 struct page *page; 1518 struct page *page;
1211 unsigned long index, max_index; 1519 pgoff_t index, max_index;
1212 unsigned int size; 1520 unsigned int size;
1213 1521
1214 ntfs_debug("Entering."); 1522 ntfs_debug("Entering.");
@@ -1219,14 +1527,16 @@ static BOOL load_and_init_attrdef(ntfs_volume *vol)
1219 iput(ino); 1527 iput(ino);
1220 goto failed; 1528 goto failed;
1221 } 1529 }
1530 NInoSetSparseDisabled(NTFS_I(ino));
1222 /* The size of FILE_AttrDef must be above 0 and fit inside 31 bits. */ 1531 /* The size of FILE_AttrDef must be above 0 and fit inside 31 bits. */
1223 if (!ino->i_size || ino->i_size > 0x7fffffff) 1532 i_size = i_size_read(ino);
1533 if (i_size <= 0 || i_size > 0x7fffffff)
1224 goto iput_failed; 1534 goto iput_failed;
1225 vol->attrdef = (ATTR_DEF*)ntfs_malloc_nofs(ino->i_size); 1535 vol->attrdef = (ATTR_DEF*)ntfs_malloc_nofs(i_size);
1226 if (!vol->attrdef) 1536 if (!vol->attrdef)
1227 goto iput_failed; 1537 goto iput_failed;
1228 index = 0; 1538 index = 0;
1229 max_index = ino->i_size >> PAGE_CACHE_SHIFT; 1539 max_index = i_size >> PAGE_CACHE_SHIFT;
1230 size = PAGE_CACHE_SIZE; 1540 size = PAGE_CACHE_SIZE;
1231 while (index < max_index) { 1541 while (index < max_index) {
1232 /* Read the attrdef table and copy it into the linear buffer. */ 1542 /* Read the attrdef table and copy it into the linear buffer. */
@@ -1239,12 +1549,12 @@ read_partial_attrdef_page:
1239 ntfs_unmap_page(page); 1549 ntfs_unmap_page(page);
1240 }; 1550 };
1241 if (size == PAGE_CACHE_SIZE) { 1551 if (size == PAGE_CACHE_SIZE) {
1242 size = ino->i_size & ~PAGE_CACHE_MASK; 1552 size = i_size & ~PAGE_CACHE_MASK;
1243 if (size) 1553 if (size)
1244 goto read_partial_attrdef_page; 1554 goto read_partial_attrdef_page;
1245 } 1555 }
1246 vol->attrdef_size = ino->i_size; 1556 vol->attrdef_size = i_size;
1247 ntfs_debug("Read %llu bytes from $AttrDef.", ino->i_size); 1557 ntfs_debug("Read %llu bytes from $AttrDef.", i_size);
1248 iput(ino); 1558 iput(ino);
1249 return TRUE; 1559 return TRUE;
1250free_iput_failed: 1560free_iput_failed:
@@ -1267,10 +1577,11 @@ failed:
1267 */ 1577 */
1268static BOOL load_and_init_upcase(ntfs_volume *vol) 1578static BOOL load_and_init_upcase(ntfs_volume *vol)
1269{ 1579{
1580 loff_t i_size;
1270 struct super_block *sb = vol->sb; 1581 struct super_block *sb = vol->sb;
1271 struct inode *ino; 1582 struct inode *ino;
1272 struct page *page; 1583 struct page *page;
1273 unsigned long index, max_index; 1584 pgoff_t index, max_index;
1274 unsigned int size; 1585 unsigned int size;
1275 int i, max; 1586 int i, max;
1276 1587
@@ -1286,14 +1597,15 @@ static BOOL load_and_init_upcase(ntfs_volume *vol)
1286 * The upcase size must not be above 64k Unicode characters, must not 1597 * The upcase size must not be above 64k Unicode characters, must not
1287 * be zero and must be a multiple of sizeof(ntfschar). 1598 * be zero and must be a multiple of sizeof(ntfschar).
1288 */ 1599 */
1289 if (!ino->i_size || ino->i_size & (sizeof(ntfschar) - 1) || 1600 i_size = i_size_read(ino);
1290 ino->i_size > 64ULL * 1024 * sizeof(ntfschar)) 1601 if (!i_size || i_size & (sizeof(ntfschar) - 1) ||
1602 i_size > 64ULL * 1024 * sizeof(ntfschar))
1291 goto iput_upcase_failed; 1603 goto iput_upcase_failed;
1292 vol->upcase = (ntfschar*)ntfs_malloc_nofs(ino->i_size); 1604 vol->upcase = (ntfschar*)ntfs_malloc_nofs(i_size);
1293 if (!vol->upcase) 1605 if (!vol->upcase)
1294 goto iput_upcase_failed; 1606 goto iput_upcase_failed;
1295 index = 0; 1607 index = 0;
1296 max_index = ino->i_size >> PAGE_CACHE_SHIFT; 1608 max_index = i_size >> PAGE_CACHE_SHIFT;
1297 size = PAGE_CACHE_SIZE; 1609 size = PAGE_CACHE_SIZE;
1298 while (index < max_index) { 1610 while (index < max_index) {
1299 /* Read the upcase table and copy it into the linear buffer. */ 1611 /* Read the upcase table and copy it into the linear buffer. */
@@ -1306,13 +1618,13 @@ read_partial_upcase_page:
1306 ntfs_unmap_page(page); 1618 ntfs_unmap_page(page);
1307 }; 1619 };
1308 if (size == PAGE_CACHE_SIZE) { 1620 if (size == PAGE_CACHE_SIZE) {
1309 size = ino->i_size & ~PAGE_CACHE_MASK; 1621 size = i_size & ~PAGE_CACHE_MASK;
1310 if (size) 1622 if (size)
1311 goto read_partial_upcase_page; 1623 goto read_partial_upcase_page;
1312 } 1624 }
1313 vol->upcase_len = ino->i_size >> UCHAR_T_SIZE_BITS; 1625 vol->upcase_len = i_size >> UCHAR_T_SIZE_BITS;
1314 ntfs_debug("Read %llu bytes from $UpCase (expected %zu bytes).", 1626 ntfs_debug("Read %llu bytes from $UpCase (expected %zu bytes).",
1315 ino->i_size, 64 * 1024 * sizeof(ntfschar)); 1627 i_size, 64 * 1024 * sizeof(ntfschar));
1316 iput(ino); 1628 iput(ino);
1317 down(&ntfs_lock); 1629 down(&ntfs_lock);
1318 if (!default_upcase) { 1630 if (!default_upcase) {
@@ -1376,6 +1688,9 @@ static BOOL load_system_files(ntfs_volume *vol)
1376 MFT_RECORD *m; 1688 MFT_RECORD *m;
1377 VOLUME_INFORMATION *vi; 1689 VOLUME_INFORMATION *vi;
1378 ntfs_attr_search_ctx *ctx; 1690 ntfs_attr_search_ctx *ctx;
1691#ifdef NTFS_RW
1692 int err;
1693#endif /* NTFS_RW */
1379 1694
1380 ntfs_debug("Entering."); 1695 ntfs_debug("Entering.");
1381#ifdef NTFS_RW 1696#ifdef NTFS_RW
@@ -1435,7 +1750,8 @@ static BOOL load_system_files(ntfs_volume *vol)
1435 iput(vol->lcnbmp_ino); 1750 iput(vol->lcnbmp_ino);
1436 goto bitmap_failed; 1751 goto bitmap_failed;
1437 } 1752 }
1438 if ((vol->nr_clusters + 7) >> 3 > vol->lcnbmp_ino->i_size) { 1753 NInoSetSparseDisabled(NTFS_I(vol->lcnbmp_ino));
1754 if ((vol->nr_clusters + 7) >> 3 > i_size_read(vol->lcnbmp_ino)) {
1439 iput(vol->lcnbmp_ino); 1755 iput(vol->lcnbmp_ino);
1440bitmap_failed: 1756bitmap_failed:
1441 ntfs_error(sb, "Failed to load $Bitmap."); 1757 ntfs_error(sb, "Failed to load $Bitmap.");
@@ -1486,6 +1802,12 @@ get_ctx_vol_failed:
1486 unmap_mft_record(NTFS_I(vol->vol_ino)); 1802 unmap_mft_record(NTFS_I(vol->vol_ino));
1487 printk(KERN_INFO "NTFS volume version %i.%i.\n", vol->major_ver, 1803 printk(KERN_INFO "NTFS volume version %i.%i.\n", vol->major_ver,
1488 vol->minor_ver); 1804 vol->minor_ver);
1805 if (vol->major_ver < 3 && NVolSparseEnabled(vol)) {
1806 ntfs_warning(vol->sb, "Disabling sparse support due to NTFS "
1807 "volume version %i.%i (need at least version "
1808 "3.0).", vol->major_ver, vol->minor_ver);
1809 NVolClearSparseEnabled(vol);
1810 }
1489#ifdef NTFS_RW 1811#ifdef NTFS_RW
1490 /* Make sure that no unsupported volume flags are set. */ 1812 /* Make sure that no unsupported volume flags are set. */
1491 if (vol->vol_flags & VOLUME_MUST_MOUNT_RO_MASK) { 1813 if (vol->vol_flags & VOLUME_MUST_MOUNT_RO_MASK) {
@@ -1545,6 +1867,50 @@ get_ctx_vol_failed:
1545 /* This will prevent a read-write remount. */ 1867 /* This will prevent a read-write remount. */
1546 NVolSetErrors(vol); 1868 NVolSetErrors(vol);
1547 } 1869 }
1870#endif /* NTFS_RW */
1871 /* Get the root directory inode so we can do path lookups. */
1872 vol->root_ino = ntfs_iget(sb, FILE_root);
1873 if (IS_ERR(vol->root_ino) || is_bad_inode(vol->root_ino)) {
1874 if (!IS_ERR(vol->root_ino))
1875 iput(vol->root_ino);
1876 ntfs_error(sb, "Failed to load root directory.");
1877 goto iput_logfile_err_out;
1878 }
1879#ifdef NTFS_RW
1880 /*
1881 * Check if Windows is suspended to disk on the target volume. If it
1882 * is hibernated, we must not write *anything* to the disk so set
1883 * NVolErrors() without setting the dirty volume flag and mount
1884 * read-only. This will prevent read-write remounting and it will also
1885 * prevent all writes.
1886 */
1887 err = check_windows_hibernation_status(vol);
1888 if (unlikely(err)) {
1889 static const char *es1a = "Failed to determine if Windows is "
1890 "hibernated";
1891 static const char *es1b = "Windows is hibernated";
1892 static const char *es2 = ". Run chkdsk.";
1893 const char *es1;
1894
1895 es1 = err < 0 ? es1a : es1b;
1896 /* If a read-write mount, convert it to a read-only mount. */
1897 if (!(sb->s_flags & MS_RDONLY)) {
1898 if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO |
1899 ON_ERRORS_CONTINUE))) {
1900 ntfs_error(sb, "%s and neither on_errors="
1901 "continue nor on_errors="
1902 "remount-ro was specified%s",
1903 es1, es2);
1904 goto iput_root_err_out;
1905 }
1906 sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
1907 ntfs_error(sb, "%s. Mounting read-only%s", es1, es2);
1908 } else
1909 ntfs_warning(sb, "%s. Will not be able to remount "
1910 "read-write%s", es1, es2);
1911 /* This will prevent a read-write remount. */
1912 NVolSetErrors(vol);
1913 }
1548 /* If (still) a read-write mount, mark the volume dirty. */ 1914 /* If (still) a read-write mount, mark the volume dirty. */
1549 if (!(sb->s_flags & MS_RDONLY) && 1915 if (!(sb->s_flags & MS_RDONLY) &&
1550 ntfs_set_volume_flags(vol, VOLUME_IS_DIRTY)) { 1916 ntfs_set_volume_flags(vol, VOLUME_IS_DIRTY)) {
@@ -1558,7 +1924,7 @@ get_ctx_vol_failed:
1558 ntfs_error(sb, "%s and neither on_errors=continue nor " 1924 ntfs_error(sb, "%s and neither on_errors=continue nor "
1559 "on_errors=remount-ro was specified%s", 1925 "on_errors=remount-ro was specified%s",
1560 es1, es2); 1926 es1, es2);
1561 goto iput_logfile_err_out; 1927 goto iput_root_err_out;
1562 } 1928 }
1563 ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); 1929 ntfs_error(sb, "%s. Mounting read-only%s", es1, es2);
1564 sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; 1930 sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
@@ -1585,7 +1951,7 @@ get_ctx_vol_failed:
1585 ntfs_error(sb, "%s and neither on_errors=continue nor " 1951 ntfs_error(sb, "%s and neither on_errors=continue nor "
1586 "on_errors=remount-ro was specified%s", 1952 "on_errors=remount-ro was specified%s",
1587 es1, es2); 1953 es1, es2);
1588 goto iput_logfile_err_out; 1954 goto iput_root_err_out;
1589 } 1955 }
1590 ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); 1956 ntfs_error(sb, "%s. Mounting read-only%s", es1, es2);
1591 sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; 1957 sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
@@ -1604,23 +1970,15 @@ get_ctx_vol_failed:
1604 ntfs_error(sb, "%s and neither on_errors=continue nor " 1970 ntfs_error(sb, "%s and neither on_errors=continue nor "
1605 "on_errors=remount-ro was specified%s", 1971 "on_errors=remount-ro was specified%s",
1606 es1, es2); 1972 es1, es2);
1607 goto iput_logfile_err_out; 1973 goto iput_root_err_out;
1608 } 1974 }
1609 ntfs_error(sb, "%s. Mounting read-only%s", es1, es2); 1975 ntfs_error(sb, "%s. Mounting read-only%s", es1, es2);
1610 sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; 1976 sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
1611 NVolSetErrors(vol); 1977 NVolSetErrors(vol);
1612 } 1978 }
1613#endif /* NTFS_RW */ 1979#endif /* NTFS_RW */
1614 /* Get the root directory inode. */
1615 vol->root_ino = ntfs_iget(sb, FILE_root);
1616 if (IS_ERR(vol->root_ino) || is_bad_inode(vol->root_ino)) {
1617 if (!IS_ERR(vol->root_ino))
1618 iput(vol->root_ino);
1619 ntfs_error(sb, "Failed to load root directory.");
1620 goto iput_logfile_err_out;
1621 }
1622 /* If on NTFS versions before 3.0, we are done. */ 1980 /* If on NTFS versions before 3.0, we are done. */
1623 if (vol->major_ver < 3) 1981 if (unlikely(vol->major_ver < 3))
1624 return TRUE; 1982 return TRUE;
1625 /* NTFS 3.0+ specific initialization. */ 1983 /* NTFS 3.0+ specific initialization. */
1626 /* Get the security descriptors inode. */ 1984 /* Get the security descriptors inode. */
@@ -1631,7 +1989,7 @@ get_ctx_vol_failed:
1631 ntfs_error(sb, "Failed to load $Secure."); 1989 ntfs_error(sb, "Failed to load $Secure.");
1632 goto iput_root_err_out; 1990 goto iput_root_err_out;
1633 } 1991 }
1634 // FIXME: Initialize security. 1992 // TODO: Initialize security.
1635 /* Get the extended system files' directory inode. */ 1993 /* Get the extended system files' directory inode. */
1636 vol->extend_ino = ntfs_iget(sb, FILE_Extend); 1994 vol->extend_ino = ntfs_iget(sb, FILE_Extend);
1637 if (IS_ERR(vol->extend_ino) || is_bad_inode(vol->extend_ino)) { 1995 if (IS_ERR(vol->extend_ino) || is_bad_inode(vol->extend_ino)) {
@@ -1682,10 +2040,60 @@ get_ctx_vol_failed:
1682 sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; 2040 sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
1683 NVolSetErrors(vol); 2041 NVolSetErrors(vol);
1684 } 2042 }
1685 // TODO: Delete or checkpoint the $UsnJrnl if it exists. 2043 /*
2044 * Find the transaction log file ($UsnJrnl), load it if present, check
2045 * it, and set it up.
2046 */
2047 if (!load_and_init_usnjrnl(vol)) {
2048 static const char *es1 = "Failed to load $UsnJrnl";
2049 static const char *es2 = ". Run chkdsk.";
2050
2051 /* If a read-write mount, convert it to a read-only mount. */
2052 if (!(sb->s_flags & MS_RDONLY)) {
2053 if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO |
2054 ON_ERRORS_CONTINUE))) {
2055 ntfs_error(sb, "%s and neither on_errors="
2056 "continue nor on_errors="
2057 "remount-ro was specified%s",
2058 es1, es2);
2059 goto iput_usnjrnl_err_out;
2060 }
2061 sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
2062 ntfs_error(sb, "%s. Mounting read-only%s", es1, es2);
2063 } else
2064 ntfs_warning(sb, "%s. Will not be able to remount "
2065 "read-write%s", es1, es2);
2066 /* This will prevent a read-write remount. */
2067 NVolSetErrors(vol);
2068 }
2069 /* If (still) a read-write mount, stamp the transaction log. */
2070 if (!(sb->s_flags & MS_RDONLY) && !ntfs_stamp_usnjrnl(vol)) {
2071 static const char *es1 = "Failed to stamp transaction log "
2072 "($UsnJrnl)";
2073 static const char *es2 = ". Run chkdsk.";
2074
2075 /* Convert to a read-only mount. */
2076 if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO |
2077 ON_ERRORS_CONTINUE))) {
2078 ntfs_error(sb, "%s and neither on_errors=continue nor "
2079 "on_errors=remount-ro was specified%s",
2080 es1, es2);
2081 goto iput_usnjrnl_err_out;
2082 }
2083 ntfs_error(sb, "%s. Mounting read-only%s", es1, es2);
2084 sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
2085 NVolSetErrors(vol);
2086 }
1686#endif /* NTFS_RW */ 2087#endif /* NTFS_RW */
1687 return TRUE; 2088 return TRUE;
1688#ifdef NTFS_RW 2089#ifdef NTFS_RW
2090iput_usnjrnl_err_out:
2091 if (vol->usnjrnl_j_ino)
2092 iput(vol->usnjrnl_j_ino);
2093 if (vol->usnjrnl_max_ino)
2094 iput(vol->usnjrnl_max_ino);
2095 if (vol->usnjrnl_ino)
2096 iput(vol->usnjrnl_ino);
1689iput_quota_err_out: 2097iput_quota_err_out:
1690 if (vol->quota_q_ino) 2098 if (vol->quota_q_ino)
1691 iput(vol->quota_q_ino); 2099 iput(vol->quota_q_ino);
@@ -1759,6 +2167,12 @@ static void ntfs_put_super(struct super_block *sb)
1759 2167
1760 /* NTFS 3.0+ specific. */ 2168 /* NTFS 3.0+ specific. */
1761 if (vol->major_ver >= 3) { 2169 if (vol->major_ver >= 3) {
2170 if (vol->usnjrnl_j_ino)
2171 ntfs_commit_inode(vol->usnjrnl_j_ino);
2172 if (vol->usnjrnl_max_ino)
2173 ntfs_commit_inode(vol->usnjrnl_max_ino);
2174 if (vol->usnjrnl_ino)
2175 ntfs_commit_inode(vol->usnjrnl_ino);
1762 if (vol->quota_q_ino) 2176 if (vol->quota_q_ino)
1763 ntfs_commit_inode(vol->quota_q_ino); 2177 ntfs_commit_inode(vol->quota_q_ino);
1764 if (vol->quota_ino) 2178 if (vol->quota_ino)
@@ -1814,6 +2228,18 @@ static void ntfs_put_super(struct super_block *sb)
1814 /* NTFS 3.0+ specific clean up. */ 2228 /* NTFS 3.0+ specific clean up. */
1815 if (vol->major_ver >= 3) { 2229 if (vol->major_ver >= 3) {
1816#ifdef NTFS_RW 2230#ifdef NTFS_RW
2231 if (vol->usnjrnl_j_ino) {
2232 iput(vol->usnjrnl_j_ino);
2233 vol->usnjrnl_j_ino = NULL;
2234 }
2235 if (vol->usnjrnl_max_ino) {
2236 iput(vol->usnjrnl_max_ino);
2237 vol->usnjrnl_max_ino = NULL;
2238 }
2239 if (vol->usnjrnl_ino) {
2240 iput(vol->usnjrnl_ino);
2241 vol->usnjrnl_ino = NULL;
2242 }
1817 if (vol->quota_q_ino) { 2243 if (vol->quota_q_ino) {
1818 iput(vol->quota_q_ino); 2244 iput(vol->quota_q_ino);
1819 vol->quota_q_ino = NULL; 2245 vol->quota_q_ino = NULL;
@@ -1959,8 +2385,7 @@ static s64 get_nr_free_clusters(ntfs_volume *vol)
1959 struct address_space *mapping = vol->lcnbmp_ino->i_mapping; 2385 struct address_space *mapping = vol->lcnbmp_ino->i_mapping;
1960 filler_t *readpage = (filler_t*)mapping->a_ops->readpage; 2386 filler_t *readpage = (filler_t*)mapping->a_ops->readpage;
1961 struct page *page; 2387 struct page *page;
1962 unsigned long index, max_index; 2388 pgoff_t index, max_index;
1963 unsigned int max_size;
1964 2389
1965 ntfs_debug("Entering."); 2390 ntfs_debug("Entering.");
1966 /* Serialize accesses to the cluster bitmap. */ 2391 /* Serialize accesses to the cluster bitmap. */
@@ -1972,11 +2397,10 @@ static s64 get_nr_free_clusters(ntfs_volume *vol)
1972 */ 2397 */
1973 max_index = (((vol->nr_clusters + 7) >> 3) + PAGE_CACHE_SIZE - 1) >> 2398 max_index = (((vol->nr_clusters + 7) >> 3) + PAGE_CACHE_SIZE - 1) >>
1974 PAGE_CACHE_SHIFT; 2399 PAGE_CACHE_SHIFT;
1975 /* Use multiples of 4 bytes. */ 2400 /* Use multiples of 4 bytes, thus max_size is PAGE_CACHE_SIZE / 4. */
1976 max_size = PAGE_CACHE_SIZE >> 2; 2401 ntfs_debug("Reading $Bitmap, max_index = 0x%lx, max_size = 0x%lx.",
1977 ntfs_debug("Reading $Bitmap, max_index = 0x%lx, max_size = 0x%x.", 2402 max_index, PAGE_CACHE_SIZE / 4);
1978 max_index, max_size); 2403 for (index = 0; index < max_index; index++) {
1979 for (index = 0UL; index < max_index; index++) {
1980 unsigned int i; 2404 unsigned int i;
1981 /* 2405 /*
1982 * Read the page from page cache, getting it from backing store 2406 * Read the page from page cache, getting it from backing store
@@ -2008,7 +2432,7 @@ static s64 get_nr_free_clusters(ntfs_volume *vol)
2008 * the result as all out of range bytes are set to zero by 2432 * the result as all out of range bytes are set to zero by
2009 * ntfs_readpage(). 2433 * ntfs_readpage().
2010 */ 2434 */
2011 for (i = 0; i < max_size; i++) 2435 for (i = 0; i < PAGE_CACHE_SIZE / 4; i++)
2012 nr_free -= (s64)hweight32(kaddr[i]); 2436 nr_free -= (s64)hweight32(kaddr[i]);
2013 kunmap_atomic(kaddr, KM_USER0); 2437 kunmap_atomic(kaddr, KM_USER0);
2014 page_cache_release(page); 2438 page_cache_release(page);
@@ -2031,6 +2455,8 @@ static s64 get_nr_free_clusters(ntfs_volume *vol)
2031/** 2455/**
2032 * __get_nr_free_mft_records - return the number of free inodes on a volume 2456 * __get_nr_free_mft_records - return the number of free inodes on a volume
2033 * @vol: ntfs volume for which to obtain free inode count 2457 * @vol: ntfs volume for which to obtain free inode count
2458 * @nr_free: number of mft records in filesystem
2459 * @max_index: maximum number of pages containing set bits
2034 * 2460 *
2035 * Calculate the number of free mft records (inodes) on the mounted NTFS 2461 * Calculate the number of free mft records (inodes) on the mounted NTFS
2036 * volume @vol. We actually calculate the number of mft records in use instead 2462 * volume @vol. We actually calculate the number of mft records in use instead
@@ -2043,32 +2469,20 @@ static s64 get_nr_free_clusters(ntfs_volume *vol)
2043 * 2469 *
2044 * NOTE: Caller must hold mftbmp_lock rw_semaphore for reading or writing. 2470 * NOTE: Caller must hold mftbmp_lock rw_semaphore for reading or writing.
2045 */ 2471 */
2046static unsigned long __get_nr_free_mft_records(ntfs_volume *vol) 2472static unsigned long __get_nr_free_mft_records(ntfs_volume *vol,
2473 s64 nr_free, const pgoff_t max_index)
2047{ 2474{
2048 s64 nr_free;
2049 u32 *kaddr; 2475 u32 *kaddr;
2050 struct address_space *mapping = vol->mftbmp_ino->i_mapping; 2476 struct address_space *mapping = vol->mftbmp_ino->i_mapping;
2051 filler_t *readpage = (filler_t*)mapping->a_ops->readpage; 2477 filler_t *readpage = (filler_t*)mapping->a_ops->readpage;
2052 struct page *page; 2478 struct page *page;
2053 unsigned long index, max_index; 2479 pgoff_t index;
2054 unsigned int max_size;
2055 2480
2056 ntfs_debug("Entering."); 2481 ntfs_debug("Entering.");
2057 /* Number of mft records in file system (at this point in time). */ 2482 /* Use multiples of 4 bytes, thus max_size is PAGE_CACHE_SIZE / 4. */
2058 nr_free = vol->mft_ino->i_size >> vol->mft_record_size_bits;
2059 /*
2060 * Convert the maximum number of set bits into bytes rounded up, then
2061 * convert into multiples of PAGE_CACHE_SIZE, rounding up so that if we
2062 * have one full and one partial page max_index = 2.
2063 */
2064 max_index = ((((NTFS_I(vol->mft_ino)->initialized_size >>
2065 vol->mft_record_size_bits) + 7) >> 3) +
2066 PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
2067 /* Use multiples of 4 bytes. */
2068 max_size = PAGE_CACHE_SIZE >> 2;
2069 ntfs_debug("Reading $MFT/$BITMAP, max_index = 0x%lx, max_size = " 2483 ntfs_debug("Reading $MFT/$BITMAP, max_index = 0x%lx, max_size = "
2070 "0x%x.", max_index, max_size); 2484 "0x%lx.", max_index, PAGE_CACHE_SIZE / 4);
2071 for (index = 0UL; index < max_index; index++) { 2485 for (index = 0; index < max_index; index++) {
2072 unsigned int i; 2486 unsigned int i;
2073 /* 2487 /*
2074 * Read the page from page cache, getting it from backing store 2488 * Read the page from page cache, getting it from backing store
@@ -2100,7 +2514,7 @@ static unsigned long __get_nr_free_mft_records(ntfs_volume *vol)
2100 * the result as all out of range bytes are set to zero by 2514 * the result as all out of range bytes are set to zero by
2101 * ntfs_readpage(). 2515 * ntfs_readpage().
2102 */ 2516 */
2103 for (i = 0; i < max_size; i++) 2517 for (i = 0; i < PAGE_CACHE_SIZE / 4; i++)
2104 nr_free -= (s64)hweight32(kaddr[i]); 2518 nr_free -= (s64)hweight32(kaddr[i]);
2105 kunmap_atomic(kaddr, KM_USER0); 2519 kunmap_atomic(kaddr, KM_USER0);
2106 page_cache_release(page); 2520 page_cache_release(page);
@@ -2134,8 +2548,11 @@ static unsigned long __get_nr_free_mft_records(ntfs_volume *vol)
2134 */ 2548 */
2135static int ntfs_statfs(struct super_block *sb, struct kstatfs *sfs) 2549static int ntfs_statfs(struct super_block *sb, struct kstatfs *sfs)
2136{ 2550{
2137 ntfs_volume *vol = NTFS_SB(sb);
2138 s64 size; 2551 s64 size;
2552 ntfs_volume *vol = NTFS_SB(sb);
2553 ntfs_inode *mft_ni = NTFS_I(vol->mft_ino);
2554 pgoff_t max_index;
2555 unsigned long flags;
2139 2556
2140 ntfs_debug("Entering."); 2557 ntfs_debug("Entering.");
2141 /* Type of filesystem. */ 2558 /* Type of filesystem. */
@@ -2143,13 +2560,13 @@ static int ntfs_statfs(struct super_block *sb, struct kstatfs *sfs)
2143 /* Optimal transfer block size. */ 2560 /* Optimal transfer block size. */
2144 sfs->f_bsize = PAGE_CACHE_SIZE; 2561 sfs->f_bsize = PAGE_CACHE_SIZE;
2145 /* 2562 /*
2146 * Total data blocks in file system in units of f_bsize and since 2563 * Total data blocks in filesystem in units of f_bsize and since
2147 * inodes are also stored in data blocs ($MFT is a file) this is just 2564 * inodes are also stored in data blocs ($MFT is a file) this is just
2148 * the total clusters. 2565 * the total clusters.
2149 */ 2566 */
2150 sfs->f_blocks = vol->nr_clusters << vol->cluster_size_bits >> 2567 sfs->f_blocks = vol->nr_clusters << vol->cluster_size_bits >>
2151 PAGE_CACHE_SHIFT; 2568 PAGE_CACHE_SHIFT;
2152 /* Free data blocks in file system in units of f_bsize. */ 2569 /* Free data blocks in filesystem in units of f_bsize. */
2153 size = get_nr_free_clusters(vol) << vol->cluster_size_bits >> 2570 size = get_nr_free_clusters(vol) << vol->cluster_size_bits >>
2154 PAGE_CACHE_SHIFT; 2571 PAGE_CACHE_SHIFT;
2155 if (size < 0LL) 2572 if (size < 0LL)
@@ -2158,17 +2575,27 @@ static int ntfs_statfs(struct super_block *sb, struct kstatfs *sfs)
2158 sfs->f_bavail = sfs->f_bfree = size; 2575 sfs->f_bavail = sfs->f_bfree = size;
2159 /* Serialize accesses to the inode bitmap. */ 2576 /* Serialize accesses to the inode bitmap. */
2160 down_read(&vol->mftbmp_lock); 2577 down_read(&vol->mftbmp_lock);
2161 /* Number of inodes in file system (at this point in time). */ 2578 read_lock_irqsave(&mft_ni->size_lock, flags);
2162 sfs->f_files = vol->mft_ino->i_size >> vol->mft_record_size_bits; 2579 size = i_size_read(vol->mft_ino) >> vol->mft_record_size_bits;
2580 /*
2581 * Convert the maximum number of set bits into bytes rounded up, then
2582 * convert into multiples of PAGE_CACHE_SIZE, rounding up so that if we
2583 * have one full and one partial page max_index = 2.
2584 */
2585 max_index = ((((mft_ni->initialized_size >> vol->mft_record_size_bits)
2586 + 7) >> 3) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
2587 read_unlock_irqrestore(&mft_ni->size_lock, flags);
2588 /* Number of inodes in filesystem (at this point in time). */
2589 sfs->f_files = size;
2163 /* Free inodes in fs (based on current total count). */ 2590 /* Free inodes in fs (based on current total count). */
2164 sfs->f_ffree = __get_nr_free_mft_records(vol); 2591 sfs->f_ffree = __get_nr_free_mft_records(vol, size, max_index);
2165 up_read(&vol->mftbmp_lock); 2592 up_read(&vol->mftbmp_lock);
2166 /* 2593 /*
2167 * File system id. This is extremely *nix flavour dependent and even 2594 * File system id. This is extremely *nix flavour dependent and even
2168 * within Linux itself all fs do their own thing. I interpret this to 2595 * within Linux itself all fs do their own thing. I interpret this to
2169 * mean a unique id associated with the mounted fs and not the id 2596 * mean a unique id associated with the mounted fs and not the id
2170 * associated with the file system driver, the latter is already given 2597 * associated with the filesystem driver, the latter is already given
2171 * by the file system type in sfs->f_type. Thus we use the 64-bit 2598 * by the filesystem type in sfs->f_type. Thus we use the 64-bit
2172 * volume serial number splitting it into two 32-bit parts. We enter 2599 * volume serial number splitting it into two 32-bit parts. We enter
2173 * the least significant 32-bits in f_fsid[0] and the most significant 2600 * the least significant 32-bits in f_fsid[0] and the most significant
2174 * 32-bits in f_fsid[1]. 2601 * 32-bits in f_fsid[1].
@@ -2219,53 +2646,19 @@ static struct super_operations ntfs_sops = {
2219 proc. */ 2646 proc. */
2220}; 2647};
2221 2648
2222
2223/** 2649/**
2224 * Declarations for NTFS specific export operations (fs/ntfs/namei.c). 2650 * ntfs_fill_super - mount an ntfs filesystem
2225 */ 2651 * @sb: super block of ntfs filesystem to mount
2226extern struct dentry *ntfs_get_parent(struct dentry *child_dent);
2227extern struct dentry *ntfs_get_dentry(struct super_block *sb, void *fh);
2228
2229/**
2230 * Export operations allowing NFS exporting of mounted NTFS partitions.
2231 *
2232 * We use the default ->decode_fh() and ->encode_fh() for now. Note that they
2233 * use 32 bits to store the inode number which is an unsigned long so on 64-bit
2234 * architectures is usually 64 bits so it would all fail horribly on huge
2235 * volumes. I guess we need to define our own encode and decode fh functions
2236 * that store 64-bit inode numbers at some point but for now we will ignore the
2237 * problem...
2238 *
2239 * We also use the default ->get_name() helper (used by ->decode_fh() via
2240 * fs/exportfs/expfs.c::find_exported_dentry()) as that is completely fs
2241 * independent.
2242 *
2243 * The default ->get_parent() just returns -EACCES so we have to provide our
2244 * own and the default ->get_dentry() is incompatible with NTFS due to not
2245 * allowing the inode number 0 which is used in NTFS for the system file $MFT
2246 * and due to using iget() whereas NTFS needs ntfs_iget().
2247 */
2248static struct export_operations ntfs_export_ops = {
2249 .get_parent = ntfs_get_parent, /* Find the parent of a given
2250 directory. */
2251 .get_dentry = ntfs_get_dentry, /* Find a dentry for the inode
2252 given a file handle
2253 sub-fragment. */
2254};
2255
2256/**
2257 * ntfs_fill_super - mount an ntfs files system
2258 * @sb: super block of ntfs file system to mount
2259 * @opt: string containing the mount options 2652 * @opt: string containing the mount options
2260 * @silent: silence error output 2653 * @silent: silence error output
2261 * 2654 *
2262 * ntfs_fill_super() is called by the VFS to mount the device described by @sb 2655 * ntfs_fill_super() is called by the VFS to mount the device described by @sb
2263 * with the mount otions in @data with the NTFS file system. 2656 * with the mount otions in @data with the NTFS filesystem.
2264 * 2657 *
2265 * If @silent is true, remain silent even if errors are detected. This is used 2658 * If @silent is true, remain silent even if errors are detected. This is used
2266 * during bootup, when the kernel tries to mount the root file system with all 2659 * during bootup, when the kernel tries to mount the root filesystem with all
2267 * registered file systems one after the other until one succeeds. This implies 2660 * registered filesystems one after the other until one succeeds. This implies
2268 * that all file systems except the correct one will quite correctly and 2661 * that all filesystems except the correct one will quite correctly and
2269 * expectedly return an error, but nobody wants to see error messages when in 2662 * expectedly return an error, but nobody wants to see error messages when in
2270 * fact this is what is supposed to happen. 2663 * fact this is what is supposed to happen.
2271 * 2664 *
@@ -2292,39 +2685,25 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
2292 return -ENOMEM; 2685 return -ENOMEM;
2293 } 2686 }
2294 /* Initialize ntfs_volume structure. */ 2687 /* Initialize ntfs_volume structure. */
2295 memset(vol, 0, sizeof(ntfs_volume)); 2688 *vol = (ntfs_volume) {
2296 vol->sb = sb; 2689 .sb = sb,
2297 vol->upcase = NULL; 2690 /*
2298 vol->attrdef = NULL; 2691 * Default is group and other don't have any access to files or
2299 vol->mft_ino = NULL; 2692 * directories while owner has full access. Further, files by
2300 vol->mftbmp_ino = NULL; 2693 * default are not executable but directories are of course
2694 * browseable.
2695 */
2696 .fmask = 0177,
2697 .dmask = 0077,
2698 };
2301 init_rwsem(&vol->mftbmp_lock); 2699 init_rwsem(&vol->mftbmp_lock);
2302#ifdef NTFS_RW
2303 vol->mftmirr_ino = NULL;
2304 vol->logfile_ino = NULL;
2305#endif /* NTFS_RW */
2306 vol->lcnbmp_ino = NULL;
2307 init_rwsem(&vol->lcnbmp_lock); 2700 init_rwsem(&vol->lcnbmp_lock);
2308 vol->vol_ino = NULL;
2309 vol->root_ino = NULL;
2310 vol->secure_ino = NULL;
2311 vol->extend_ino = NULL;
2312#ifdef NTFS_RW
2313 vol->quota_ino = NULL;
2314 vol->quota_q_ino = NULL;
2315#endif /* NTFS_RW */
2316 vol->nls_map = NULL;
2317
2318 /*
2319 * Default is group and other don't have any access to files or
2320 * directories while owner has full access. Further, files by default
2321 * are not executable but directories are of course browseable.
2322 */
2323 vol->fmask = 0177;
2324 vol->dmask = 0077;
2325 2701
2326 unlock_kernel(); 2702 unlock_kernel();
2327 2703
2704 /* By default, enable sparse support. */
2705 NVolSetSparseEnabled(vol);
2706
2328 /* Important to get the mount options dealt with now. */ 2707 /* Important to get the mount options dealt with now. */
2329 if (!parse_options(vol, (char*)opt)) 2708 if (!parse_options(vol, (char*)opt))
2330 goto err_out_now; 2709 goto err_out_now;
@@ -2347,7 +2726,8 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
2347 } 2726 }
2348 2727
2349 /* Get the size of the device in units of NTFS_BLOCK_SIZE bytes. */ 2728 /* Get the size of the device in units of NTFS_BLOCK_SIZE bytes. */
2350 vol->nr_blocks = sb->s_bdev->bd_inode->i_size >> NTFS_BLOCK_SIZE_BITS; 2729 vol->nr_blocks = i_size_read(sb->s_bdev->bd_inode) >>
2730 NTFS_BLOCK_SIZE_BITS;
2351 2731
2352 /* Read the boot sector and return unlocked buffer head to it. */ 2732 /* Read the boot sector and return unlocked buffer head to it. */
2353 if (!(bh = read_ntfs_boot_sector(sb, silent))) { 2733 if (!(bh = read_ntfs_boot_sector(sb, silent))) {
@@ -2476,6 +2856,18 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
2476 /* NTFS 3.0+ specific clean up. */ 2856 /* NTFS 3.0+ specific clean up. */
2477 if (vol->major_ver >= 3) { 2857 if (vol->major_ver >= 3) {
2478#ifdef NTFS_RW 2858#ifdef NTFS_RW
2859 if (vol->usnjrnl_j_ino) {
2860 iput(vol->usnjrnl_j_ino);
2861 vol->usnjrnl_j_ino = NULL;
2862 }
2863 if (vol->usnjrnl_max_ino) {
2864 iput(vol->usnjrnl_max_ino);
2865 vol->usnjrnl_max_ino = NULL;
2866 }
2867 if (vol->usnjrnl_ino) {
2868 iput(vol->usnjrnl_ino);
2869 vol->usnjrnl_ino = NULL;
2870 }
2479 if (vol->quota_q_ino) { 2871 if (vol->quota_q_ino) {
2480 iput(vol->quota_q_ino); 2872 iput(vol->quota_q_ino);
2481 vol->quota_q_ino = NULL; 2873 vol->quota_q_ino = NULL;
@@ -2581,7 +2973,7 @@ err_out_now:
2581 */ 2973 */
2582kmem_cache_t *ntfs_name_cache; 2974kmem_cache_t *ntfs_name_cache;
2583 2975
2584/* Slab caches for efficient allocation/deallocation of of inodes. */ 2976/* Slab caches for efficient allocation/deallocation of inodes. */
2585kmem_cache_t *ntfs_inode_cache; 2977kmem_cache_t *ntfs_inode_cache;
2586kmem_cache_t *ntfs_big_inode_cache; 2978kmem_cache_t *ntfs_big_inode_cache;
2587 2979
@@ -2705,7 +3097,7 @@ static int __init init_ntfs_fs(void)
2705 ntfs_debug("NTFS driver registered successfully."); 3097 ntfs_debug("NTFS driver registered successfully.");
2706 return 0; /* Success! */ 3098 return 0; /* Success! */
2707 } 3099 }
2708 printk(KERN_CRIT "NTFS: Failed to register NTFS file system driver!\n"); 3100 printk(KERN_CRIT "NTFS: Failed to register NTFS filesystem driver!\n");
2709 3101
2710sysctl_err_out: 3102sysctl_err_out:
2711 kmem_cache_destroy(ntfs_big_inode_cache); 3103 kmem_cache_destroy(ntfs_big_inode_cache);
@@ -2719,7 +3111,7 @@ actx_err_out:
2719 kmem_cache_destroy(ntfs_index_ctx_cache); 3111 kmem_cache_destroy(ntfs_index_ctx_cache);
2720ictx_err_out: 3112ictx_err_out:
2721 if (!err) { 3113 if (!err) {
2722 printk(KERN_CRIT "NTFS: Aborting NTFS file system driver " 3114 printk(KERN_CRIT "NTFS: Aborting NTFS filesystem driver "
2723 "registration...\n"); 3115 "registration...\n");
2724 err = -ENOMEM; 3116 err = -ENOMEM;
2725 } 3117 }
@@ -2759,7 +3151,7 @@ static void __exit exit_ntfs_fs(void)
2759} 3151}
2760 3152
2761MODULE_AUTHOR("Anton Altaparmakov <aia21@cantab.net>"); 3153MODULE_AUTHOR("Anton Altaparmakov <aia21@cantab.net>");
2762MODULE_DESCRIPTION("NTFS 1.2/3.x driver - Copyright (c) 2001-2004 Anton Altaparmakov"); 3154MODULE_DESCRIPTION("NTFS 1.2/3.x driver - Copyright (c) 2001-2005 Anton Altaparmakov");
2763MODULE_VERSION(NTFS_VERSION); 3155MODULE_VERSION(NTFS_VERSION);
2764MODULE_LICENSE("GPL"); 3156MODULE_LICENSE("GPL");
2765#ifdef DEBUG 3157#ifdef DEBUG
diff --git a/fs/ntfs/sysctl.c b/fs/ntfs/sysctl.c
index 75067e4f3036..1c23138d00b3 100644
--- a/fs/ntfs/sysctl.c
+++ b/fs/ntfs/sysctl.c
@@ -3,7 +3,7 @@
3 * the Linux-NTFS project. Adapted from the old NTFS driver, 3 * the Linux-NTFS project. Adapted from the old NTFS driver,
4 * Copyright (C) 1997 Martin von Löwis, Régis Duchesne 4 * Copyright (C) 1997 Martin von Löwis, Régis Duchesne
5 * 5 *
6 * Copyright (c) 2002-2004 Anton Altaparmakov 6 * Copyright (c) 2002-2005 Anton Altaparmakov
7 * 7 *
8 * This program/include file is free software; you can redistribute it and/or 8 * This program/include file is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as published 9 * modify it under the terms of the GNU General Public License as published
@@ -67,7 +67,7 @@ int ntfs_sysctl(int add)
67 return -ENOMEM; 67 return -ENOMEM;
68#ifdef CONFIG_PROC_FS 68#ifdef CONFIG_PROC_FS
69 /* 69 /*
70 * If the proc file system is in use and we are a module, need 70 * If the proc filesystem is in use and we are a module, need
71 * to set the owner of our proc entry to our module. In the 71 * to set the owner of our proc entry to our module. In the
72 * non-modular case, THIS_MODULE is NULL, so this is ok. 72 * non-modular case, THIS_MODULE is NULL, so this is ok.
73 */ 73 */
diff --git a/fs/ntfs/sysctl.h b/fs/ntfs/sysctl.h
index df749cc0aac8..c8064cae8f17 100644
--- a/fs/ntfs/sysctl.h
+++ b/fs/ntfs/sysctl.h
@@ -26,7 +26,7 @@
26 26
27#include <linux/config.h> 27#include <linux/config.h>
28 28
29#if (DEBUG && CONFIG_SYSCTL) 29#if defined(DEBUG) && defined(CONFIG_SYSCTL)
30 30
31extern int ntfs_sysctl(int add); 31extern int ntfs_sysctl(int add);
32 32
diff --git a/fs/ntfs/time.h b/fs/ntfs/time.h
index a09a51dabe4e..01233989d5d1 100644
--- a/fs/ntfs/time.h
+++ b/fs/ntfs/time.h
@@ -1,7 +1,7 @@
1/* 1/*
2 * time.h - NTFS time conversion functions. Part of the Linux-NTFS project. 2 * time.h - NTFS time conversion functions. Part of the Linux-NTFS project.
3 * 3 *
4 * Copyright (c) 2001-2004 Anton Altaparmakov 4 * Copyright (c) 2001-2005 Anton Altaparmakov
5 * 5 *
6 * This program/include file is free software; you can redistribute it and/or 6 * This program/include file is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as published 7 * modify it under the terms of the GNU General Public License as published
@@ -87,7 +87,7 @@ static inline struct timespec ntfs2utc(const sle64 time)
87 struct timespec ts; 87 struct timespec ts;
88 88
89 /* Subtract the NTFS time offset. */ 89 /* Subtract the NTFS time offset. */
90 s64 t = sle64_to_cpu(time) - NTFS_TIME_OFFSET; 90 u64 t = (u64)(sle64_to_cpu(time) - NTFS_TIME_OFFSET);
91 /* 91 /*
92 * Convert the time to 1-second intervals and the remainder to 92 * Convert the time to 1-second intervals and the remainder to
93 * 1-nano-second intervals. 93 * 1-nano-second intervals.
diff --git a/fs/ntfs/types.h b/fs/ntfs/types.h
index 08a55aa53d4e..6e4a7e3343f2 100644
--- a/fs/ntfs/types.h
+++ b/fs/ntfs/types.h
@@ -2,7 +2,7 @@
2 * types.h - Defines for NTFS Linux kernel driver specific types. 2 * types.h - Defines for NTFS Linux kernel driver specific types.
3 * Part of the Linux-NTFS project. 3 * Part of the Linux-NTFS project.
4 * 4 *
5 * Copyright (c) 2001-2004 Anton Altaparmakov 5 * Copyright (c) 2001-2005 Anton Altaparmakov
6 * 6 *
7 * This program/include file is free software; you can redistribute it and/or 7 * This program/include file is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as published 8 * modify it under the terms of the GNU General Public License as published
@@ -53,6 +53,14 @@ typedef sle64 leLCN;
53typedef s64 LSN; 53typedef s64 LSN;
54typedef sle64 leLSN; 54typedef sle64 leLSN;
55 55
56/*
57 * The NTFS transaction log $UsnJrnl uses usn which are signed 64-bit values.
58 * We define our own type USN, to allow for type checking and better code
59 * readability.
60 */
61typedef s64 USN;
62typedef sle64 leUSN;
63
56typedef enum { 64typedef enum {
57 FALSE = 0, 65 FALSE = 0,
58 TRUE = 1 66 TRUE = 1
diff --git a/fs/ntfs/unistr.c b/fs/ntfs/unistr.c
index 560b0ea255b0..19c42e231b44 100644
--- a/fs/ntfs/unistr.c
+++ b/fs/ntfs/unistr.c
@@ -264,7 +264,7 @@ int ntfs_nlstoucs(const ntfs_volume *vol, const char *ins,
264 264
265 /* We don't trust outside sources. */ 265 /* We don't trust outside sources. */
266 if (ins) { 266 if (ins) {
267 ucs = (ntfschar*)kmem_cache_alloc(ntfs_name_cache, SLAB_NOFS); 267 ucs = kmem_cache_alloc(ntfs_name_cache, SLAB_NOFS);
268 if (ucs) { 268 if (ucs) {
269 for (i = o = 0; i < ins_len; i += wc_len) { 269 for (i = o = 0; i < ins_len; i += wc_len) {
270 wc_len = nls->char2uni(ins + i, ins_len - i, 270 wc_len = nls->char2uni(ins + i, ins_len - i,
diff --git a/fs/ntfs/usnjrnl.c b/fs/ntfs/usnjrnl.c
new file mode 100644
index 000000000000..77773240d139
--- /dev/null
+++ b/fs/ntfs/usnjrnl.c
@@ -0,0 +1,84 @@
1/*
2 * usnjrnl.h - NTFS kernel transaction log ($UsnJrnl) handling. Part of the
3 * Linux-NTFS project.
4 *
5 * Copyright (c) 2005 Anton Altaparmakov
6 *
7 * This program/include file is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as published
9 * by the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program/include file is distributed in the hope that it will be
13 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program (in the main directory of the Linux-NTFS
19 * distribution in the file COPYING); if not, write to the Free Software
20 * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 */
22
23#ifdef NTFS_RW
24
25#include <linux/fs.h>
26#include <linux/highmem.h>
27#include <linux/mm.h>
28
29#include "aops.h"
30#include "debug.h"
31#include "endian.h"
32#include "time.h"
33#include "types.h"
34#include "usnjrnl.h"
35#include "volume.h"
36
37/**
38 * ntfs_stamp_usnjrnl - stamp the transaction log ($UsnJrnl) on an ntfs volume
39 * @vol: ntfs volume on which to stamp the transaction log
40 *
41 * Stamp the transaction log ($UsnJrnl) on the ntfs volume @vol and return
42 * TRUE on success and FALSE on error.
43 *
44 * This function assumes that the transaction log has already been loaded and
45 * consistency checked by a call to fs/ntfs/super.c::load_and_init_usnjrnl().
46 */
47BOOL ntfs_stamp_usnjrnl(ntfs_volume *vol)
48{
49 ntfs_debug("Entering.");
50 if (likely(!NVolUsnJrnlStamped(vol))) {
51 sle64 stamp;
52 struct page *page;
53 USN_HEADER *uh;
54
55 page = ntfs_map_page(vol->usnjrnl_max_ino->i_mapping, 0);
56 if (IS_ERR(page)) {
57 ntfs_error(vol->sb, "Failed to read from "
58 "$UsnJrnl/$DATA/$Max attribute.");
59 return FALSE;
60 }
61 uh = (USN_HEADER*)page_address(page);
62 stamp = get_current_ntfs_time();
63 ntfs_debug("Stamping transaction log ($UsnJrnl): old "
64 "journal_id 0x%llx, old lowest_valid_usn "
65 "0x%llx, new journal_id 0x%llx, new "
66 "lowest_valid_usn 0x%llx.",
67 (long long)sle64_to_cpu(uh->journal_id),
68 (long long)sle64_to_cpu(uh->lowest_valid_usn),
69 (long long)sle64_to_cpu(stamp),
70 i_size_read(vol->usnjrnl_j_ino));
71 uh->lowest_valid_usn =
72 cpu_to_sle64(i_size_read(vol->usnjrnl_j_ino));
73 uh->journal_id = stamp;
74 flush_dcache_page(page);
75 set_page_dirty(page);
76 ntfs_unmap_page(page);
77 /* Set the flag so we do not have to do it again on remount. */
78 NVolSetUsnJrnlStamped(vol);
79 }
80 ntfs_debug("Done.");
81 return TRUE;
82}
83
84#endif /* NTFS_RW */
diff --git a/fs/ntfs/usnjrnl.h b/fs/ntfs/usnjrnl.h
new file mode 100644
index 000000000000..ff988b0deb45
--- /dev/null
+++ b/fs/ntfs/usnjrnl.h
@@ -0,0 +1,205 @@
1/*
2 * usnjrnl.h - Defines for NTFS kernel transaction log ($UsnJrnl) handling.
3 * Part of the Linux-NTFS project.
4 *
5 * Copyright (c) 2005 Anton Altaparmakov
6 *
7 * This program/include file is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as published
9 * by the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program/include file is distributed in the hope that it will be
13 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program (in the main directory of the Linux-NTFS
19 * distribution in the file COPYING); if not, write to the Free Software
20 * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 */
22
23#ifndef _LINUX_NTFS_USNJRNL_H
24#define _LINUX_NTFS_USNJRNL_H
25
26#ifdef NTFS_RW
27
28#include "types.h"
29#include "endian.h"
30#include "layout.h"
31#include "volume.h"
32
33/*
34 * Transaction log ($UsnJrnl) organization:
35 *
36 * The transaction log records whenever a file is modified in any way. So for
37 * example it will record that file "blah" was written to at a particular time
38 * but not what was written. If will record that a file was deleted or
39 * created, that a file was truncated, etc. See below for all the reason
40 * codes used.
41 *
42 * The transaction log is in the $Extend directory which is in the root
43 * directory of each volume. If it is not present it means transaction
44 * logging is disabled. If it is present it means transaction logging is
45 * either enabled or in the process of being disabled in which case we can
46 * ignore it as it will go away as soon as Windows gets its hands on it.
47 *
48 * To determine whether the transaction logging is enabled or in the process
49 * of being disabled, need to check the volume flags in the
50 * $VOLUME_INFORMATION attribute in the $Volume system file (which is present
51 * in the root directory and has a fixed mft record number, see layout.h).
52 * If the flag VOLUME_DELETE_USN_UNDERWAY is set it means the transaction log
53 * is in the process of being disabled and if this flag is clear it means the
54 * transaction log is enabled.
55 *
56 * The transaction log consists of two parts; the $DATA/$Max attribute as well
57 * as the $DATA/$J attribute. $Max is a header describing the transaction
58 * log whilst $J is the transaction log data itself as a sequence of variable
59 * sized USN_RECORDs (see below for all the structures).
60 *
61 * We do not care about transaction logging at this point in time but we still
62 * need to let windows know that the transaction log is out of date. To do
63 * this we need to stamp the transaction log. This involves setting the
64 * lowest_valid_usn field in the $DATA/$Max attribute to the usn to be used
65 * for the next added USN_RECORD to the $DATA/$J attribute as well as
66 * generating a new journal_id in $DATA/$Max.
67 *
68 * The journal_id is as of the current version (2.0) of the transaction log
69 * simply the 64-bit timestamp of when the journal was either created or last
70 * stamped.
71 *
72 * To determine the next usn there are two ways. The first is to parse
73 * $DATA/$J and to find the last USN_RECORD in it and to add its record_length
74 * to its usn (which is the byte offset in the $DATA/$J attribute). The
75 * second is simply to take the data size of the attribute. Since the usns
76 * are simply byte offsets into $DATA/$J, this is exactly the next usn. For
77 * obvious reasons we use the second method as it is much simpler and faster.
78 *
79 * As an aside, note that to actually disable the transaction log, one would
80 * need to set the VOLUME_DELETE_USN_UNDERWAY flag (see above), then go
81 * through all the mft records on the volume and set the usn field in their
82 * $STANDARD_INFORMATION attribute to zero. Once that is done, one would need
83 * to delete the transaction log file, i.e. \$Extent\$UsnJrnl, and finally,
84 * one would need to clear the VOLUME_DELETE_USN_UNDERWAY flag.
85 *
86 * Note that if a volume is unmounted whilst the transaction log is being
87 * disabled, the process will continue the next time the volume is mounted.
88 * This is why we can safely mount read-write when we see a transaction log
89 * in the process of being deleted.
90 */
91
92/* Some $UsnJrnl related constants. */
93#define UsnJrnlMajorVer 2
94#define UsnJrnlMinorVer 0
95
96/*
97 * $DATA/$Max attribute. This is (always?) resident and has a fixed size of
98 * 32 bytes. It contains the header describing the transaction log.
99 */
100typedef struct {
101/*Ofs*/
102/* 0*/sle64 maximum_size; /* The maximum on-disk size of the $DATA/$J
103 attribute. */
104/* 8*/sle64 allocation_delta; /* Number of bytes by which to increase the
105 size of the $DATA/$J attribute. */
106/*0x10*/sle64 journal_id; /* Current id of the transaction log. */
107/*0x18*/leUSN lowest_valid_usn; /* Lowest valid usn in $DATA/$J for the
108 current journal_id. */
109/* sizeof() = 32 (0x20) bytes */
110} __attribute__ ((__packed__)) USN_HEADER;
111
112/*
113 * Reason flags (32-bit). Cumulative flags describing the change(s) to the
114 * file since it was last opened. I think the names speak for themselves but
115 * if you disagree check out the descriptions in the Linux NTFS project NTFS
116 * documentation: http://linux-ntfs.sourceforge.net/ntfs/files/usnjrnl.html
117 */
118enum {
119 USN_REASON_DATA_OVERWRITE = const_cpu_to_le32(0x00000001),
120 USN_REASON_DATA_EXTEND = const_cpu_to_le32(0x00000002),
121 USN_REASON_DATA_TRUNCATION = const_cpu_to_le32(0x00000004),
122 USN_REASON_NAMED_DATA_OVERWRITE = const_cpu_to_le32(0x00000010),
123 USN_REASON_NAMED_DATA_EXTEND = const_cpu_to_le32(0x00000020),
124 USN_REASON_NAMED_DATA_TRUNCATION= const_cpu_to_le32(0x00000040),
125 USN_REASON_FILE_CREATE = const_cpu_to_le32(0x00000100),
126 USN_REASON_FILE_DELETE = const_cpu_to_le32(0x00000200),
127 USN_REASON_EA_CHANGE = const_cpu_to_le32(0x00000400),
128 USN_REASON_SECURITY_CHANGE = const_cpu_to_le32(0x00000800),
129 USN_REASON_RENAME_OLD_NAME = const_cpu_to_le32(0x00001000),
130 USN_REASON_RENAME_NEW_NAME = const_cpu_to_le32(0x00002000),
131 USN_REASON_INDEXABLE_CHANGE = const_cpu_to_le32(0x00004000),
132 USN_REASON_BASIC_INFO_CHANGE = const_cpu_to_le32(0x00008000),
133 USN_REASON_HARD_LINK_CHANGE = const_cpu_to_le32(0x00010000),
134 USN_REASON_COMPRESSION_CHANGE = const_cpu_to_le32(0x00020000),
135 USN_REASON_ENCRYPTION_CHANGE = const_cpu_to_le32(0x00040000),
136 USN_REASON_OBJECT_ID_CHANGE = const_cpu_to_le32(0x00080000),
137 USN_REASON_REPARSE_POINT_CHANGE = const_cpu_to_le32(0x00100000),
138 USN_REASON_STREAM_CHANGE = const_cpu_to_le32(0x00200000),
139 USN_REASON_CLOSE = const_cpu_to_le32(0x80000000),
140};
141
142typedef le32 USN_REASON_FLAGS;
143
144/*
145 * Source info flags (32-bit). Information about the source of the change(s)
146 * to the file. For detailed descriptions of what these mean, see the Linux
147 * NTFS project NTFS documentation:
148 * http://linux-ntfs.sourceforge.net/ntfs/files/usnjrnl.html
149 */
150enum {
151 USN_SOURCE_DATA_MANAGEMENT = const_cpu_to_le32(0x00000001),
152 USN_SOURCE_AUXILIARY_DATA = const_cpu_to_le32(0x00000002),
153 USN_SOURCE_REPLICATION_MANAGEMENT = const_cpu_to_le32(0x00000004),
154};
155
156typedef le32 USN_SOURCE_INFO_FLAGS;
157
158/*
159 * $DATA/$J attribute. This is always non-resident, is marked as sparse, and
160 * is of variabled size. It consists of a sequence of variable size
161 * USN_RECORDS. The minimum allocated_size is allocation_delta as
162 * specified in $DATA/$Max. When the maximum_size specified in $DATA/$Max is
163 * exceeded by more than allocation_delta bytes, allocation_delta bytes are
164 * allocated and appended to the $DATA/$J attribute and an equal number of
165 * bytes at the beginning of the attribute are freed and made sparse. Note the
166 * making sparse only happens at volume checkpoints and hence the actual
167 * $DATA/$J size can exceed maximum_size + allocation_delta temporarily.
168 */
169typedef struct {
170/*Ofs*/
171/* 0*/le32 length; /* Byte size of this record (8-byte
172 aligned). */
173/* 4*/le16 major_ver; /* Major version of the transaction log used
174 for this record. */
175/* 6*/le16 minor_ver; /* Minor version of the transaction log used
176 for this record. */
177/* 8*/leMFT_REF mft_reference;/* The mft reference of the file (or
178 directory) described by this record. */
179/*0x10*/leMFT_REF parent_directory;/* The mft reference of the parent
180 directory of the file described by this
181 record. */
182/*0x18*/leUSN usn; /* The usn of this record. Equals the offset
183 within the $DATA/$J attribute. */
184/*0x20*/sle64 time; /* Time when this record was created. */
185/*0x28*/USN_REASON_FLAGS reason;/* Reason flags (see above). */
186/*0x2c*/USN_SOURCE_INFO_FLAGS source_info;/* Source info flags (see above). */
187/*0x30*/le32 security_id; /* File security_id copied from
188 $STANDARD_INFORMATION. */
189/*0x34*/FILE_ATTR_FLAGS file_attributes; /* File attributes copied from
190 $STANDARD_INFORMATION or $FILE_NAME (not
191 sure which). */
192/*0x38*/le16 file_name_size; /* Size of the file name in bytes. */
193/*0x3a*/le16 file_name_offset; /* Offset to the file name in bytes from the
194 start of this record. */
195/*0x3c*/ntfschar file_name[0]; /* Use when creating only. When reading use
196 file_name_offset to determine the location
197 of the name. */
198/* sizeof() = 60 (0x3c) bytes */
199} __attribute__ ((__packed__)) USN_RECORD;
200
201extern BOOL ntfs_stamp_usnjrnl(ntfs_volume *vol);
202
203#endif /* NTFS_RW */
204
205#endif /* _LINUX_NTFS_USNJRNL_H */
diff --git a/fs/ntfs/volume.h b/fs/ntfs/volume.h
index 4b97fa8635a8..375cd20a9f61 100644
--- a/fs/ntfs/volume.h
+++ b/fs/ntfs/volume.h
@@ -2,7 +2,7 @@
2 * volume.h - Defines for volume structures in NTFS Linux kernel driver. Part 2 * volume.h - Defines for volume structures in NTFS Linux kernel driver. Part
3 * of the Linux-NTFS project. 3 * of the Linux-NTFS project.
4 * 4 *
5 * Copyright (c) 2001-2004 Anton Altaparmakov 5 * Copyright (c) 2001-2005 Anton Altaparmakov
6 * Copyright (c) 2002 Richard Russon 6 * Copyright (c) 2002 Richard Russon
7 * 7 *
8 * This program/include file is free software; you can redistribute it and/or 8 * This program/include file is free software; you can redistribute it and/or
@@ -54,7 +54,7 @@ typedef struct {
54 mode_t dmask; /* The mask for directory 54 mode_t dmask; /* The mask for directory
55 permissions. */ 55 permissions. */
56 u8 mft_zone_multiplier; /* Initial mft zone multiplier. */ 56 u8 mft_zone_multiplier; /* Initial mft zone multiplier. */
57 u8 on_errors; /* What to do on file system errors. */ 57 u8 on_errors; /* What to do on filesystem errors. */
58 /* NTFS bootsector provided information. */ 58 /* NTFS bootsector provided information. */
59 u16 sector_size; /* in bytes */ 59 u16 sector_size; /* in bytes */
60 u8 sector_size_bits; /* log2(sector_size) */ 60 u8 sector_size_bits; /* log2(sector_size) */
@@ -125,6 +125,10 @@ typedef struct {
125 /* $Quota stuff is NTFS3.0+ specific. Unused/NULL otherwise. */ 125 /* $Quota stuff is NTFS3.0+ specific. Unused/NULL otherwise. */
126 struct inode *quota_ino; /* The VFS inode of $Quota. */ 126 struct inode *quota_ino; /* The VFS inode of $Quota. */
127 struct inode *quota_q_ino; /* Attribute inode for $Quota/$Q. */ 127 struct inode *quota_q_ino; /* Attribute inode for $Quota/$Q. */
128 /* $UsnJrnl stuff is NTFS3.0+ specific. Unused/NULL otherwise. */
129 struct inode *usnjrnl_ino; /* The VFS inode of $UsnJrnl. */
130 struct inode *usnjrnl_max_ino; /* Attribute inode for $UsnJrnl/$Max. */
131 struct inode *usnjrnl_j_ino; /* Attribute inode for $UsnJrnl/$J. */
128#endif /* NTFS_RW */ 132#endif /* NTFS_RW */
129 struct nls_table *nls_map; 133 struct nls_table *nls_map;
130} ntfs_volume; 134} ntfs_volume;
@@ -141,6 +145,8 @@ typedef enum {
141 file names in WIN32 namespace. */ 145 file names in WIN32 namespace. */
142 NV_LogFileEmpty, /* 1: $LogFile journal is empty. */ 146 NV_LogFileEmpty, /* 1: $LogFile journal is empty. */
143 NV_QuotaOutOfDate, /* 1: $Quota is out of date. */ 147 NV_QuotaOutOfDate, /* 1: $Quota is out of date. */
148 NV_UsnJrnlStamped, /* 1: $UsnJrnl has been stamped. */
149 NV_SparseEnabled, /* 1: May create sparse files. */
144} ntfs_volume_flags; 150} ntfs_volume_flags;
145 151
146/* 152/*
@@ -167,5 +173,7 @@ NVOL_FNS(ShowSystemFiles)
167NVOL_FNS(CaseSensitive) 173NVOL_FNS(CaseSensitive)
168NVOL_FNS(LogFileEmpty) 174NVOL_FNS(LogFileEmpty)
169NVOL_FNS(QuotaOutOfDate) 175NVOL_FNS(QuotaOutOfDate)
176NVOL_FNS(UsnJrnlStamped)
177NVOL_FNS(SparseEnabled)
170 178
171#endif /* _LINUX_NTFS_VOLUME_H */ 179#endif /* _LINUX_NTFS_VOLUME_H */
diff --git a/fs/open.c b/fs/open.c
index 963bd81a44c8..32bf05e2996d 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -10,7 +10,7 @@
10#include <linux/file.h> 10#include <linux/file.h>
11#include <linux/smp_lock.h> 11#include <linux/smp_lock.h>
12#include <linux/quotaops.h> 12#include <linux/quotaops.h>
13#include <linux/dnotify.h> 13#include <linux/fsnotify.h>
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/slab.h> 15#include <linux/slab.h>
16#include <linux/tty.h> 16#include <linux/tty.h>
@@ -21,6 +21,7 @@
21#include <linux/vfs.h> 21#include <linux/vfs.h>
22#include <asm/uaccess.h> 22#include <asm/uaccess.h>
23#include <linux/fs.h> 23#include <linux/fs.h>
24#include <linux/personality.h>
24#include <linux/pagemap.h> 25#include <linux/pagemap.h>
25#include <linux/syscalls.h> 26#include <linux/syscalls.h>
26 27
@@ -807,7 +808,9 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
807 808
808 /* NB: we're sure to have correct a_ops only after f_op->open */ 809 /* NB: we're sure to have correct a_ops only after f_op->open */
809 if (f->f_flags & O_DIRECT) { 810 if (f->f_flags & O_DIRECT) {
810 if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO) { 811 if (!f->f_mapping->a_ops ||
812 ((!f->f_mapping->a_ops->direct_IO) &&
813 (!f->f_mapping->a_ops->get_xip_page))) {
811 fput(f); 814 fput(f);
812 f = ERR_PTR(-EINVAL); 815 f = ERR_PTR(-EINVAL);
813 } 816 }
@@ -933,31 +936,28 @@ EXPORT_SYMBOL(fd_install);
933asmlinkage long sys_open(const char __user * filename, int flags, int mode) 936asmlinkage long sys_open(const char __user * filename, int flags, int mode)
934{ 937{
935 char * tmp; 938 char * tmp;
936 int fd, error; 939 int fd;
940
941 if (force_o_largefile())
942 flags |= O_LARGEFILE;
937 943
938#if BITS_PER_LONG != 32
939 flags |= O_LARGEFILE;
940#endif
941 tmp = getname(filename); 944 tmp = getname(filename);
942 fd = PTR_ERR(tmp); 945 fd = PTR_ERR(tmp);
943 if (!IS_ERR(tmp)) { 946 if (!IS_ERR(tmp)) {
944 fd = get_unused_fd(); 947 fd = get_unused_fd();
945 if (fd >= 0) { 948 if (fd >= 0) {
946 struct file *f = filp_open(tmp, flags, mode); 949 struct file *f = filp_open(tmp, flags, mode);
947 error = PTR_ERR(f); 950 if (IS_ERR(f)) {
948 if (IS_ERR(f)) 951 put_unused_fd(fd);
949 goto out_error; 952 fd = PTR_ERR(f);
950 fd_install(fd, f); 953 } else {
954 fsnotify_open(f->f_dentry);
955 fd_install(fd, f);
956 }
951 } 957 }
952out:
953 putname(tmp); 958 putname(tmp);
954 } 959 }
955 return fd; 960 return fd;
956
957out_error:
958 put_unused_fd(fd);
959 fd = error;
960 goto out;
961} 961}
962EXPORT_SYMBOL_GPL(sys_open); 962EXPORT_SYMBOL_GPL(sys_open);
963 963
@@ -980,23 +980,15 @@ asmlinkage long sys_creat(const char __user * pathname, int mode)
980 */ 980 */
981int filp_close(struct file *filp, fl_owner_t id) 981int filp_close(struct file *filp, fl_owner_t id)
982{ 982{
983 int retval; 983 int retval = 0;
984
985 /* Report and clear outstanding errors */
986 retval = filp->f_error;
987 if (retval)
988 filp->f_error = 0;
989 984
990 if (!file_count(filp)) { 985 if (!file_count(filp)) {
991 printk(KERN_ERR "VFS: Close: file count is 0\n"); 986 printk(KERN_ERR "VFS: Close: file count is 0\n");
992 return retval; 987 return 0;
993 } 988 }
994 989
995 if (filp->f_op && filp->f_op->flush) { 990 if (filp->f_op && filp->f_op->flush)
996 int err = filp->f_op->flush(filp); 991 retval = filp->f_op->flush(filp);
997 if (!retval)
998 retval = err;
999 }
1000 992
1001 dnotify_flush(filp, id); 993 dnotify_flush(filp, id);
1002 locks_remove_posix(filp, id); 994 locks_remove_posix(filp, id);
diff --git a/fs/partitions/Makefile b/fs/partitions/Makefile
index 4c83c17969e1..66d5cc26fafb 100644
--- a/fs/partitions/Makefile
+++ b/fs/partitions/Makefile
@@ -17,4 +17,3 @@ obj-$(CONFIG_SUN_PARTITION) += sun.o
17obj-$(CONFIG_ULTRIX_PARTITION) += ultrix.o 17obj-$(CONFIG_ULTRIX_PARTITION) += ultrix.o
18obj-$(CONFIG_IBM_PARTITION) += ibm.o 18obj-$(CONFIG_IBM_PARTITION) += ibm.o
19obj-$(CONFIG_EFI_PARTITION) += efi.o 19obj-$(CONFIG_EFI_PARTITION) += efi.o
20obj-$(CONFIG_NEC98_PARTITION) += nec98.o msdos.o
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 2cab98a9a621..77e178f13162 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -79,9 +79,6 @@ static int (*check_part[])(struct parsed_partitions *, struct block_device *) =
79#ifdef CONFIG_LDM_PARTITION 79#ifdef CONFIG_LDM_PARTITION
80 ldm_partition, /* this must come before msdos */ 80 ldm_partition, /* this must come before msdos */
81#endif 81#endif
82#ifdef CONFIG_NEC98_PARTITION
83 nec98_partition, /* must be come before `msdos_partition' */
84#endif
85#ifdef CONFIG_MSDOS_PARTITION 82#ifdef CONFIG_MSDOS_PARTITION
86 msdos_partition, 83 msdos_partition,
87#endif 84#endif
diff --git a/fs/partitions/check.h b/fs/partitions/check.h
index 43adcc68e471..17ae8ecd9e8b 100644
--- a/fs/partitions/check.h
+++ b/fs/partitions/check.h
@@ -30,7 +30,3 @@ put_partition(struct parsed_partitions *p, int n, sector_t from, sector_t size)
30 30
31extern int warn_no_part; 31extern int warn_no_part;
32 32
33extern void parse_bsd(struct parsed_partitions *state,
34 struct block_device *bdev, u32 offset, u32 size,
35 int origin, char *flavour, int max_partitions);
36
diff --git a/fs/partitions/msdos.c b/fs/partitions/msdos.c
index 584a27b2bbd5..9935d254186e 100644
--- a/fs/partitions/msdos.c
+++ b/fs/partitions/msdos.c
@@ -202,12 +202,12 @@ parse_solaris_x86(struct parsed_partitions *state, struct block_device *bdev,
202#endif 202#endif
203} 203}
204 204
205#if defined(CONFIG_BSD_DISKLABEL) || defined(CONFIG_NEC98_PARTITION) 205#if defined(CONFIG_BSD_DISKLABEL)
206/* 206/*
207 * Create devices for BSD partitions listed in a disklabel, under a 207 * Create devices for BSD partitions listed in a disklabel, under a
208 * dos-like partition. See parse_extended() for more information. 208 * dos-like partition. See parse_extended() for more information.
209 */ 209 */
210void 210static void
211parse_bsd(struct parsed_partitions *state, struct block_device *bdev, 211parse_bsd(struct parsed_partitions *state, struct block_device *bdev,
212 u32 offset, u32 size, int origin, char *flavour, 212 u32 offset, u32 size, int origin, char *flavour,
213 int max_partitions) 213 int max_partitions)
diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index 738b9b602932..7431d7ba2d09 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -11,4 +11,5 @@ proc-y += inode.o root.o base.o generic.o array.o \
11 kmsg.o proc_tty.o proc_misc.o 11 kmsg.o proc_tty.o proc_misc.o
12 12
13proc-$(CONFIG_PROC_KCORE) += kcore.o 13proc-$(CONFIG_PROC_KCORE) += kcore.o
14proc-$(CONFIG_PROC_VMCORE) += vmcore.o
14proc-$(CONFIG_PROC_DEVICETREE) += proc_devtree.o 15proc-$(CONFIG_PROC_DEVICETREE) += proc_devtree.o
diff --git a/fs/proc/base.c b/fs/proc/base.c
index e31903aadd96..491f2d9f89ac 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -314,7 +314,7 @@ static int may_ptrace_attach(struct task_struct *task)
314 (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE)) 314 (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE))
315 goto out; 315 goto out;
316 rmb(); 316 rmb();
317 if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE)) 317 if (task->mm->dumpable != 1 && !capable(CAP_SYS_PTRACE))
318 goto out; 318 goto out;
319 if (security_ptrace(current, task)) 319 if (security_ptrace(current, task))
320 goto out; 320 goto out;
@@ -890,7 +890,7 @@ static struct file_operations proc_seccomp_operations = {
890}; 890};
891#endif /* CONFIG_SECCOMP */ 891#endif /* CONFIG_SECCOMP */
892 892
893static int proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) 893static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
894{ 894{
895 struct inode *inode = dentry->d_inode; 895 struct inode *inode = dentry->d_inode;
896 int error = -EACCES; 896 int error = -EACCES;
@@ -907,7 +907,7 @@ static int proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
907 error = PROC_I(inode)->op.proc_get_link(inode, &nd->dentry, &nd->mnt); 907 error = PROC_I(inode)->op.proc_get_link(inode, &nd->dentry, &nd->mnt);
908 nd->last_type = LAST_BIND; 908 nd->last_type = LAST_BIND;
909out: 909out:
910 return error; 910 return ERR_PTR(error);
911} 911}
912 912
913static int do_proc_readlink(struct dentry *dentry, struct vfsmount *mnt, 913static int do_proc_readlink(struct dentry *dentry, struct vfsmount *mnt,
@@ -1113,7 +1113,9 @@ static int task_dumpable(struct task_struct *task)
1113 if (mm) 1113 if (mm)
1114 dumpable = mm->dumpable; 1114 dumpable = mm->dumpable;
1115 task_unlock(task); 1115 task_unlock(task);
1116 return dumpable; 1116 if(dumpable == 1)
1117 return 1;
1118 return 0;
1117} 1119}
1118 1120
1119 1121
@@ -1690,11 +1692,11 @@ static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
1690 return vfs_readlink(dentry,buffer,buflen,tmp); 1692 return vfs_readlink(dentry,buffer,buflen,tmp);
1691} 1693}
1692 1694
1693static int proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) 1695static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
1694{ 1696{
1695 char tmp[30]; 1697 char tmp[30];
1696 sprintf(tmp, "%d", current->tgid); 1698 sprintf(tmp, "%d", current->tgid);
1697 return vfs_follow_link(nd,tmp); 1699 return ERR_PTR(vfs_follow_link(nd,tmp));
1698} 1700}
1699 1701
1700static struct inode_operations proc_self_inode_operations = { 1702static struct inode_operations proc_self_inode_operations = {
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 6c6315d04028..abe8920313fb 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -329,10 +329,10 @@ static void release_inode_number(unsigned int inum)
329 spin_unlock(&proc_inum_lock); 329 spin_unlock(&proc_inum_lock);
330} 330}
331 331
332static int proc_follow_link(struct dentry *dentry, struct nameidata *nd) 332static void *proc_follow_link(struct dentry *dentry, struct nameidata *nd)
333{ 333{
334 nd_set_link(nd, PDE(dentry->d_inode)->data); 334 nd_set_link(nd, PDE(dentry->d_inode)->data);
335 return 0; 335 return NULL;
336} 336}
337 337
338static struct inode_operations proc_link_inode_operations = { 338static struct inode_operations proc_link_inode_operations = {
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 63a9fbf1ac51..a3453555a94e 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -44,6 +44,7 @@
44#include <linux/jiffies.h> 44#include <linux/jiffies.h>
45#include <linux/sysrq.h> 45#include <linux/sysrq.h>
46#include <linux/vmalloc.h> 46#include <linux/vmalloc.h>
47#include <linux/crash_dump.h>
47#include <asm/uaccess.h> 48#include <asm/uaccess.h>
48#include <asm/pgtable.h> 49#include <asm/pgtable.h>
49#include <asm/io.h> 50#include <asm/io.h>
@@ -451,7 +452,7 @@ static int devices_read_proc(char *page, char **start, off_t off,
451 int count, int *eof, void *data) 452 int count, int *eof, void *data)
452{ 453{
453 int len = get_chrdev_list(page); 454 int len = get_chrdev_list(page);
454 len += get_blkdev_list(page+len); 455 len += get_blkdev_list(page+len, len);
455 return proc_calc_metrics(page, start, off, count, eof, len); 456 return proc_calc_metrics(page, start, off, count, eof, len);
456} 457}
457 458
@@ -618,6 +619,11 @@ void __init proc_misc_init(void)
618 (size_t)high_memory - PAGE_OFFSET + PAGE_SIZE; 619 (size_t)high_memory - PAGE_OFFSET + PAGE_SIZE;
619 } 620 }
620#endif 621#endif
622#ifdef CONFIG_PROC_VMCORE
623 proc_vmcore = create_proc_entry("vmcore", S_IRUSR, NULL);
624 if (proc_vmcore)
625 proc_vmcore->proc_fops = &proc_vmcore_operations;
626#endif
621#ifdef CONFIG_MAGIC_SYSRQ 627#ifdef CONFIG_MAGIC_SYSRQ
622 entry = create_proc_entry("sysrq-trigger", S_IWUSR, NULL); 628 entry = create_proc_entry("sysrq-trigger", S_IWUSR, NULL);
623 if (entry) 629 if (entry)
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
new file mode 100644
index 000000000000..3b2e7b69e63a
--- /dev/null
+++ b/fs/proc/vmcore.c
@@ -0,0 +1,669 @@
1/*
2 * fs/proc/vmcore.c Interface for accessing the crash
3 * dump from the system's previous life.
4 * Heavily borrowed from fs/proc/kcore.c
5 * Created by: Hariprasad Nellitheertha (hari@in.ibm.com)
6 * Copyright (C) IBM Corporation, 2004. All rights reserved
7 *
8 */
9
10#include <linux/config.h>
11#include <linux/mm.h>
12#include <linux/proc_fs.h>
13#include <linux/user.h>
14#include <linux/a.out.h>
15#include <linux/elf.h>
16#include <linux/elfcore.h>
17#include <linux/proc_fs.h>
18#include <linux/highmem.h>
19#include <linux/bootmem.h>
20#include <linux/init.h>
21#include <linux/crash_dump.h>
22#include <linux/list.h>
23#include <asm/uaccess.h>
24#include <asm/io.h>
25
26/* List representing chunks of contiguous memory areas and their offsets in
27 * vmcore file.
28 */
29static LIST_HEAD(vmcore_list);
30
31/* Stores the pointer to the buffer containing kernel elf core headers. */
32static char *elfcorebuf;
33static size_t elfcorebuf_sz;
34
35/* Total size of vmcore file. */
36static u64 vmcore_size;
37
38struct proc_dir_entry *proc_vmcore = NULL;
39
40/* Reads a page from the oldmem device from given offset. */
41static ssize_t read_from_oldmem(char *buf, size_t count,
42 loff_t *ppos, int userbuf)
43{
44 unsigned long pfn, offset;
45 size_t nr_bytes;
46 ssize_t read = 0, tmp;
47
48 if (!count)
49 return 0;
50
51 offset = (unsigned long)(*ppos % PAGE_SIZE);
52 pfn = (unsigned long)(*ppos / PAGE_SIZE);
53 if (pfn > saved_max_pfn)
54 return -EINVAL;
55
56 do {
57 if (count > (PAGE_SIZE - offset))
58 nr_bytes = PAGE_SIZE - offset;
59 else
60 nr_bytes = count;
61
62 tmp = copy_oldmem_page(pfn, buf, nr_bytes, offset, userbuf);
63 if (tmp < 0)
64 return tmp;
65 *ppos += nr_bytes;
66 count -= nr_bytes;
67 buf += nr_bytes;
68 read += nr_bytes;
69 ++pfn;
70 offset = 0;
71 } while (count);
72
73 return read;
74}
75
76/* Maps vmcore file offset to respective physical address in memroy. */
77static u64 map_offset_to_paddr(loff_t offset, struct list_head *vc_list,
78 struct vmcore **m_ptr)
79{
80 struct vmcore *m;
81 u64 paddr;
82
83 list_for_each_entry(m, vc_list, list) {
84 u64 start, end;
85 start = m->offset;
86 end = m->offset + m->size - 1;
87 if (offset >= start && offset <= end) {
88 paddr = m->paddr + offset - start;
89 *m_ptr = m;
90 return paddr;
91 }
92 }
93 *m_ptr = NULL;
94 return 0;
95}
96
97/* Read from the ELF header and then the crash dump. On error, negative value is
98 * returned otherwise number of bytes read are returned.
99 */
100static ssize_t read_vmcore(struct file *file, char __user *buffer,
101 size_t buflen, loff_t *fpos)
102{
103 ssize_t acc = 0, tmp;
104 size_t tsz, nr_bytes;
105 u64 start;
106 struct vmcore *curr_m = NULL;
107
108 if (buflen == 0 || *fpos >= vmcore_size)
109 return 0;
110
111 /* trim buflen to not go beyond EOF */
112 if (buflen > vmcore_size - *fpos)
113 buflen = vmcore_size - *fpos;
114
115 /* Read ELF core header */
116 if (*fpos < elfcorebuf_sz) {
117 tsz = elfcorebuf_sz - *fpos;
118 if (buflen < tsz)
119 tsz = buflen;
120 if (copy_to_user(buffer, elfcorebuf + *fpos, tsz))
121 return -EFAULT;
122 buflen -= tsz;
123 *fpos += tsz;
124 buffer += tsz;
125 acc += tsz;
126
127 /* leave now if filled buffer already */
128 if (buflen == 0)
129 return acc;
130 }
131
132 start = map_offset_to_paddr(*fpos, &vmcore_list, &curr_m);
133 if (!curr_m)
134 return -EINVAL;
135 if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen)
136 tsz = buflen;
137
138 /* Calculate left bytes in current memory segment. */
139 nr_bytes = (curr_m->size - (start - curr_m->paddr));
140 if (tsz > nr_bytes)
141 tsz = nr_bytes;
142
143 while (buflen) {
144 tmp = read_from_oldmem(buffer, tsz, &start, 1);
145 if (tmp < 0)
146 return tmp;
147 buflen -= tsz;
148 *fpos += tsz;
149 buffer += tsz;
150 acc += tsz;
151 if (start >= (curr_m->paddr + curr_m->size)) {
152 if (curr_m->list.next == &vmcore_list)
153 return acc; /*EOF*/
154 curr_m = list_entry(curr_m->list.next,
155 struct vmcore, list);
156 start = curr_m->paddr;
157 }
158 if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen)
159 tsz = buflen;
160 /* Calculate left bytes in current memory segment. */
161 nr_bytes = (curr_m->size - (start - curr_m->paddr));
162 if (tsz > nr_bytes)
163 tsz = nr_bytes;
164 }
165 return acc;
166}
167
168static int open_vmcore(struct inode *inode, struct file *filp)
169{
170 return 0;
171}
172
173struct file_operations proc_vmcore_operations = {
174 .read = read_vmcore,
175 .open = open_vmcore,
176};
177
178static struct vmcore* __init get_new_element(void)
179{
180 struct vmcore *p;
181
182 p = kmalloc(sizeof(*p), GFP_KERNEL);
183 if (p)
184 memset(p, 0, sizeof(*p));
185 return p;
186}
187
188static u64 __init get_vmcore_size_elf64(char *elfptr)
189{
190 int i;
191 u64 size;
192 Elf64_Ehdr *ehdr_ptr;
193 Elf64_Phdr *phdr_ptr;
194
195 ehdr_ptr = (Elf64_Ehdr *)elfptr;
196 phdr_ptr = (Elf64_Phdr*)(elfptr + sizeof(Elf64_Ehdr));
197 size = sizeof(Elf64_Ehdr) + ((ehdr_ptr->e_phnum) * sizeof(Elf64_Phdr));
198 for (i = 0; i < ehdr_ptr->e_phnum; i++) {
199 size += phdr_ptr->p_memsz;
200 phdr_ptr++;
201 }
202 return size;
203}
204
205static u64 __init get_vmcore_size_elf32(char *elfptr)
206{
207 int i;
208 u64 size;
209 Elf32_Ehdr *ehdr_ptr;
210 Elf32_Phdr *phdr_ptr;
211
212 ehdr_ptr = (Elf32_Ehdr *)elfptr;
213 phdr_ptr = (Elf32_Phdr*)(elfptr + sizeof(Elf32_Ehdr));
214 size = sizeof(Elf32_Ehdr) + ((ehdr_ptr->e_phnum) * sizeof(Elf32_Phdr));
215 for (i = 0; i < ehdr_ptr->e_phnum; i++) {
216 size += phdr_ptr->p_memsz;
217 phdr_ptr++;
218 }
219 return size;
220}
221
222/* Merges all the PT_NOTE headers into one. */
223static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz,
224 struct list_head *vc_list)
225{
226 int i, nr_ptnote=0, rc=0;
227 char *tmp;
228 Elf64_Ehdr *ehdr_ptr;
229 Elf64_Phdr phdr, *phdr_ptr;
230 Elf64_Nhdr *nhdr_ptr;
231 u64 phdr_sz = 0, note_off;
232
233 ehdr_ptr = (Elf64_Ehdr *)elfptr;
234 phdr_ptr = (Elf64_Phdr*)(elfptr + sizeof(Elf64_Ehdr));
235 for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
236 int j;
237 void *notes_section;
238 struct vmcore *new;
239 u64 offset, max_sz, sz, real_sz = 0;
240 if (phdr_ptr->p_type != PT_NOTE)
241 continue;
242 nr_ptnote++;
243 max_sz = phdr_ptr->p_memsz;
244 offset = phdr_ptr->p_offset;
245 notes_section = kmalloc(max_sz, GFP_KERNEL);
246 if (!notes_section)
247 return -ENOMEM;
248 rc = read_from_oldmem(notes_section, max_sz, &offset, 0);
249 if (rc < 0) {
250 kfree(notes_section);
251 return rc;
252 }
253 nhdr_ptr = notes_section;
254 for (j = 0; j < max_sz; j += sz) {
255 if (nhdr_ptr->n_namesz == 0)
256 break;
257 sz = sizeof(Elf64_Nhdr) +
258 ((nhdr_ptr->n_namesz + 3) & ~3) +
259 ((nhdr_ptr->n_descsz + 3) & ~3);
260 real_sz += sz;
261 nhdr_ptr = (Elf64_Nhdr*)((char*)nhdr_ptr + sz);
262 }
263
264 /* Add this contiguous chunk of notes section to vmcore list.*/
265 new = get_new_element();
266 if (!new) {
267 kfree(notes_section);
268 return -ENOMEM;
269 }
270 new->paddr = phdr_ptr->p_offset;
271 new->size = real_sz;
272 list_add_tail(&new->list, vc_list);
273 phdr_sz += real_sz;
274 kfree(notes_section);
275 }
276
277 /* Prepare merged PT_NOTE program header. */
278 phdr.p_type = PT_NOTE;
279 phdr.p_flags = 0;
280 note_off = sizeof(Elf64_Ehdr) +
281 (ehdr_ptr->e_phnum - nr_ptnote +1) * sizeof(Elf64_Phdr);
282 phdr.p_offset = note_off;
283 phdr.p_vaddr = phdr.p_paddr = 0;
284 phdr.p_filesz = phdr.p_memsz = phdr_sz;
285 phdr.p_align = 0;
286
287 /* Add merged PT_NOTE program header*/
288 tmp = elfptr + sizeof(Elf64_Ehdr);
289 memcpy(tmp, &phdr, sizeof(phdr));
290 tmp += sizeof(phdr);
291
292 /* Remove unwanted PT_NOTE program headers. */
293 i = (nr_ptnote - 1) * sizeof(Elf64_Phdr);
294 *elfsz = *elfsz - i;
295 memmove(tmp, tmp+i, ((*elfsz)-sizeof(Elf64_Ehdr)-sizeof(Elf64_Phdr)));
296
297 /* Modify e_phnum to reflect merged headers. */
298 ehdr_ptr->e_phnum = ehdr_ptr->e_phnum - nr_ptnote + 1;
299
300 return 0;
301}
302
303/* Merges all the PT_NOTE headers into one. */
304static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz,
305 struct list_head *vc_list)
306{
307 int i, nr_ptnote=0, rc=0;
308 char *tmp;
309 Elf32_Ehdr *ehdr_ptr;
310 Elf32_Phdr phdr, *phdr_ptr;
311 Elf32_Nhdr *nhdr_ptr;
312 u64 phdr_sz = 0, note_off;
313
314 ehdr_ptr = (Elf32_Ehdr *)elfptr;
315 phdr_ptr = (Elf32_Phdr*)(elfptr + sizeof(Elf32_Ehdr));
316 for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
317 int j;
318 void *notes_section;
319 struct vmcore *new;
320 u64 offset, max_sz, sz, real_sz = 0;
321 if (phdr_ptr->p_type != PT_NOTE)
322 continue;
323 nr_ptnote++;
324 max_sz = phdr_ptr->p_memsz;
325 offset = phdr_ptr->p_offset;
326 notes_section = kmalloc(max_sz, GFP_KERNEL);
327 if (!notes_section)
328 return -ENOMEM;
329 rc = read_from_oldmem(notes_section, max_sz, &offset, 0);
330 if (rc < 0) {
331 kfree(notes_section);
332 return rc;
333 }
334 nhdr_ptr = notes_section;
335 for (j = 0; j < max_sz; j += sz) {
336 if (nhdr_ptr->n_namesz == 0)
337 break;
338 sz = sizeof(Elf32_Nhdr) +
339 ((nhdr_ptr->n_namesz + 3) & ~3) +
340 ((nhdr_ptr->n_descsz + 3) & ~3);
341 real_sz += sz;
342 nhdr_ptr = (Elf32_Nhdr*)((char*)nhdr_ptr + sz);
343 }
344
345 /* Add this contiguous chunk of notes section to vmcore list.*/
346 new = get_new_element();
347 if (!new) {
348 kfree(notes_section);
349 return -ENOMEM;
350 }
351 new->paddr = phdr_ptr->p_offset;
352 new->size = real_sz;
353 list_add_tail(&new->list, vc_list);
354 phdr_sz += real_sz;
355 kfree(notes_section);
356 }
357
358 /* Prepare merged PT_NOTE program header. */
359 phdr.p_type = PT_NOTE;
360 phdr.p_flags = 0;
361 note_off = sizeof(Elf32_Ehdr) +
362 (ehdr_ptr->e_phnum - nr_ptnote +1) * sizeof(Elf32_Phdr);
363 phdr.p_offset = note_off;
364 phdr.p_vaddr = phdr.p_paddr = 0;
365 phdr.p_filesz = phdr.p_memsz = phdr_sz;
366 phdr.p_align = 0;
367
368 /* Add merged PT_NOTE program header*/
369 tmp = elfptr + sizeof(Elf32_Ehdr);
370 memcpy(tmp, &phdr, sizeof(phdr));
371 tmp += sizeof(phdr);
372
373 /* Remove unwanted PT_NOTE program headers. */
374 i = (nr_ptnote - 1) * sizeof(Elf32_Phdr);
375 *elfsz = *elfsz - i;
376 memmove(tmp, tmp+i, ((*elfsz)-sizeof(Elf32_Ehdr)-sizeof(Elf32_Phdr)));
377
378 /* Modify e_phnum to reflect merged headers. */
379 ehdr_ptr->e_phnum = ehdr_ptr->e_phnum - nr_ptnote + 1;
380
381 return 0;
382}
383
384/* Add memory chunks represented by program headers to vmcore list. Also update
385 * the new offset fields of exported program headers. */
386static int __init process_ptload_program_headers_elf64(char *elfptr,
387 size_t elfsz,
388 struct list_head *vc_list)
389{
390 int i;
391 Elf64_Ehdr *ehdr_ptr;
392 Elf64_Phdr *phdr_ptr;
393 loff_t vmcore_off;
394 struct vmcore *new;
395
396 ehdr_ptr = (Elf64_Ehdr *)elfptr;
397 phdr_ptr = (Elf64_Phdr*)(elfptr + sizeof(Elf64_Ehdr)); /* PT_NOTE hdr */
398
399 /* First program header is PT_NOTE header. */
400 vmcore_off = sizeof(Elf64_Ehdr) +
401 (ehdr_ptr->e_phnum) * sizeof(Elf64_Phdr) +
402 phdr_ptr->p_memsz; /* Note sections */
403
404 for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
405 if (phdr_ptr->p_type != PT_LOAD)
406 continue;
407
408 /* Add this contiguous chunk of memory to vmcore list.*/
409 new = get_new_element();
410 if (!new)
411 return -ENOMEM;
412 new->paddr = phdr_ptr->p_offset;
413 new->size = phdr_ptr->p_memsz;
414 list_add_tail(&new->list, vc_list);
415
416 /* Update the program header offset. */
417 phdr_ptr->p_offset = vmcore_off;
418 vmcore_off = vmcore_off + phdr_ptr->p_memsz;
419 }
420 return 0;
421}
422
423static int __init process_ptload_program_headers_elf32(char *elfptr,
424 size_t elfsz,
425 struct list_head *vc_list)
426{
427 int i;
428 Elf32_Ehdr *ehdr_ptr;
429 Elf32_Phdr *phdr_ptr;
430 loff_t vmcore_off;
431 struct vmcore *new;
432
433 ehdr_ptr = (Elf32_Ehdr *)elfptr;
434 phdr_ptr = (Elf32_Phdr*)(elfptr + sizeof(Elf32_Ehdr)); /* PT_NOTE hdr */
435
436 /* First program header is PT_NOTE header. */
437 vmcore_off = sizeof(Elf32_Ehdr) +
438 (ehdr_ptr->e_phnum) * sizeof(Elf32_Phdr) +
439 phdr_ptr->p_memsz; /* Note sections */
440
441 for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
442 if (phdr_ptr->p_type != PT_LOAD)
443 continue;
444
445 /* Add this contiguous chunk of memory to vmcore list.*/
446 new = get_new_element();
447 if (!new)
448 return -ENOMEM;
449 new->paddr = phdr_ptr->p_offset;
450 new->size = phdr_ptr->p_memsz;
451 list_add_tail(&new->list, vc_list);
452
453 /* Update the program header offset */
454 phdr_ptr->p_offset = vmcore_off;
455 vmcore_off = vmcore_off + phdr_ptr->p_memsz;
456 }
457 return 0;
458}
459
460/* Sets offset fields of vmcore elements. */
461static void __init set_vmcore_list_offsets_elf64(char *elfptr,
462 struct list_head *vc_list)
463{
464 loff_t vmcore_off;
465 Elf64_Ehdr *ehdr_ptr;
466 struct vmcore *m;
467
468 ehdr_ptr = (Elf64_Ehdr *)elfptr;
469
470 /* Skip Elf header and program headers. */
471 vmcore_off = sizeof(Elf64_Ehdr) +
472 (ehdr_ptr->e_phnum) * sizeof(Elf64_Phdr);
473
474 list_for_each_entry(m, vc_list, list) {
475 m->offset = vmcore_off;
476 vmcore_off += m->size;
477 }
478}
479
480/* Sets offset fields of vmcore elements. */
481static void __init set_vmcore_list_offsets_elf32(char *elfptr,
482 struct list_head *vc_list)
483{
484 loff_t vmcore_off;
485 Elf32_Ehdr *ehdr_ptr;
486 struct vmcore *m;
487
488 ehdr_ptr = (Elf32_Ehdr *)elfptr;
489
490 /* Skip Elf header and program headers. */
491 vmcore_off = sizeof(Elf32_Ehdr) +
492 (ehdr_ptr->e_phnum) * sizeof(Elf32_Phdr);
493
494 list_for_each_entry(m, vc_list, list) {
495 m->offset = vmcore_off;
496 vmcore_off += m->size;
497 }
498}
499
500static int __init parse_crash_elf64_headers(void)
501{
502 int rc=0;
503 Elf64_Ehdr ehdr;
504 u64 addr;
505
506 addr = elfcorehdr_addr;
507
508 /* Read Elf header */
509 rc = read_from_oldmem((char*)&ehdr, sizeof(Elf64_Ehdr), &addr, 0);
510 if (rc < 0)
511 return rc;
512
513 /* Do some basic Verification. */
514 if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0 ||
515 (ehdr.e_type != ET_CORE) ||
516 !elf_check_arch(&ehdr) ||
517 ehdr.e_ident[EI_CLASS] != ELFCLASS64 ||
518 ehdr.e_ident[EI_VERSION] != EV_CURRENT ||
519 ehdr.e_version != EV_CURRENT ||
520 ehdr.e_ehsize != sizeof(Elf64_Ehdr) ||
521 ehdr.e_phentsize != sizeof(Elf64_Phdr) ||
522 ehdr.e_phnum == 0) {
523 printk(KERN_WARNING "Warning: Core image elf header is not"
524 "sane\n");
525 return -EINVAL;
526 }
527
528 /* Read in all elf headers. */
529 elfcorebuf_sz = sizeof(Elf64_Ehdr) + ehdr.e_phnum * sizeof(Elf64_Phdr);
530 elfcorebuf = kmalloc(elfcorebuf_sz, GFP_KERNEL);
531 if (!elfcorebuf)
532 return -ENOMEM;
533 addr = elfcorehdr_addr;
534 rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz, &addr, 0);
535 if (rc < 0) {
536 kfree(elfcorebuf);
537 return rc;
538 }
539
540 /* Merge all PT_NOTE headers into one. */
541 rc = merge_note_headers_elf64(elfcorebuf, &elfcorebuf_sz, &vmcore_list);
542 if (rc) {
543 kfree(elfcorebuf);
544 return rc;
545 }
546 rc = process_ptload_program_headers_elf64(elfcorebuf, elfcorebuf_sz,
547 &vmcore_list);
548 if (rc) {
549 kfree(elfcorebuf);
550 return rc;
551 }
552 set_vmcore_list_offsets_elf64(elfcorebuf, &vmcore_list);
553 return 0;
554}
555
556static int __init parse_crash_elf32_headers(void)
557{
558 int rc=0;
559 Elf32_Ehdr ehdr;
560 u64 addr;
561
562 addr = elfcorehdr_addr;
563
564 /* Read Elf header */
565 rc = read_from_oldmem((char*)&ehdr, sizeof(Elf32_Ehdr), &addr, 0);
566 if (rc < 0)
567 return rc;
568
569 /* Do some basic Verification. */
570 if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0 ||
571 (ehdr.e_type != ET_CORE) ||
572 !elf_check_arch(&ehdr) ||
573 ehdr.e_ident[EI_CLASS] != ELFCLASS32||
574 ehdr.e_ident[EI_VERSION] != EV_CURRENT ||
575 ehdr.e_version != EV_CURRENT ||
576 ehdr.e_ehsize != sizeof(Elf32_Ehdr) ||
577 ehdr.e_phentsize != sizeof(Elf32_Phdr) ||
578 ehdr.e_phnum == 0) {
579 printk(KERN_WARNING "Warning: Core image elf header is not"
580 "sane\n");
581 return -EINVAL;
582 }
583
584 /* Read in all elf headers. */
585 elfcorebuf_sz = sizeof(Elf32_Ehdr) + ehdr.e_phnum * sizeof(Elf32_Phdr);
586 elfcorebuf = kmalloc(elfcorebuf_sz, GFP_KERNEL);
587 if (!elfcorebuf)
588 return -ENOMEM;
589 addr = elfcorehdr_addr;
590 rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz, &addr, 0);
591 if (rc < 0) {
592 kfree(elfcorebuf);
593 return rc;
594 }
595
596 /* Merge all PT_NOTE headers into one. */
597 rc = merge_note_headers_elf32(elfcorebuf, &elfcorebuf_sz, &vmcore_list);
598 if (rc) {
599 kfree(elfcorebuf);
600 return rc;
601 }
602 rc = process_ptload_program_headers_elf32(elfcorebuf, elfcorebuf_sz,
603 &vmcore_list);
604 if (rc) {
605 kfree(elfcorebuf);
606 return rc;
607 }
608 set_vmcore_list_offsets_elf32(elfcorebuf, &vmcore_list);
609 return 0;
610}
611
612static int __init parse_crash_elf_headers(void)
613{
614 unsigned char e_ident[EI_NIDENT];
615 u64 addr;
616 int rc=0;
617
618 addr = elfcorehdr_addr;
619 rc = read_from_oldmem(e_ident, EI_NIDENT, &addr, 0);
620 if (rc < 0)
621 return rc;
622 if (memcmp(e_ident, ELFMAG, SELFMAG) != 0) {
623 printk(KERN_WARNING "Warning: Core image elf header"
624 " not found\n");
625 return -EINVAL;
626 }
627
628 if (e_ident[EI_CLASS] == ELFCLASS64) {
629 rc = parse_crash_elf64_headers();
630 if (rc)
631 return rc;
632
633 /* Determine vmcore size. */
634 vmcore_size = get_vmcore_size_elf64(elfcorebuf);
635 } else if (e_ident[EI_CLASS] == ELFCLASS32) {
636 rc = parse_crash_elf32_headers();
637 if (rc)
638 return rc;
639
640 /* Determine vmcore size. */
641 vmcore_size = get_vmcore_size_elf32(elfcorebuf);
642 } else {
643 printk(KERN_WARNING "Warning: Core image elf header is not"
644 " sane\n");
645 return -EINVAL;
646 }
647 return 0;
648}
649
650/* Init function for vmcore module. */
651static int __init vmcore_init(void)
652{
653 int rc = 0;
654
655 /* If elfcorehdr= has been passed in cmdline, then capture the dump.*/
656 if (!(elfcorehdr_addr < ELFCORE_ADDR_MAX))
657 return rc;
658 rc = parse_crash_elf_headers();
659 if (rc) {
660 printk(KERN_WARNING "Kdump: vmcore not initialized\n");
661 return rc;
662 }
663
664 /* Initialize /proc/vmcore size if proc is already up. */
665 if (proc_vmcore)
666 proc_vmcore->size = vmcore_size;
667 return 0;
668}
669module_init(vmcore_init)
diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c
index cd66147cca04..7a8f5595c26f 100644
--- a/fs/qnx4/dir.c
+++ b/fs/qnx4/dir.c
@@ -61,7 +61,7 @@ static int qnx4_readdir(struct file *filp, void *dirent, filldir_t filldir)
61 ino = blknum * QNX4_INODES_PER_BLOCK + ix - 1; 61 ino = blknum * QNX4_INODES_PER_BLOCK + ix - 1;
62 else { 62 else {
63 le = (struct qnx4_link_info*)de; 63 le = (struct qnx4_link_info*)de;
64 ino = ( le->dl_inode_blk - 1 ) * 64 ino = ( le32_to_cpu(le->dl_inode_blk) - 1 ) *
65 QNX4_INODES_PER_BLOCK + 65 QNX4_INODES_PER_BLOCK +
66 le->dl_inode_ndx; 66 le->dl_inode_ndx;
67 } 67 }
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index aa92d6b76a9a..b79162a35478 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -236,7 +236,7 @@ unsigned long qnx4_block_map( struct inode *inode, long iblock )
236 struct buffer_head *bh = NULL; 236 struct buffer_head *bh = NULL;
237 struct qnx4_xblk *xblk = NULL; 237 struct qnx4_xblk *xblk = NULL;
238 struct qnx4_inode_entry *qnx4_inode = qnx4_raw_inode(inode); 238 struct qnx4_inode_entry *qnx4_inode = qnx4_raw_inode(inode);
239 qnx4_nxtnt_t nxtnt = le16_to_cpu(qnx4_inode->di_num_xtnts); 239 u16 nxtnt = le16_to_cpu(qnx4_inode->di_num_xtnts);
240 240
241 if ( iblock < le32_to_cpu(qnx4_inode->di_first_xtnt.xtnt_size) ) { 241 if ( iblock < le32_to_cpu(qnx4_inode->di_first_xtnt.xtnt_size) ) {
242 // iblock is in the first extent. This is easy. 242 // iblock is in the first extent. This is easy.
@@ -372,7 +372,7 @@ static int qnx4_fill_super(struct super_block *s, void *data, int silent)
372 printk("qnx4: unable to read the superblock\n"); 372 printk("qnx4: unable to read the superblock\n");
373 goto outnobh; 373 goto outnobh;
374 } 374 }
375 if ( le32_to_cpu( *(__u32*)bh->b_data ) != QNX4_SUPER_MAGIC ) { 375 if ( le32_to_cpup((__le32*) bh->b_data) != QNX4_SUPER_MAGIC ) {
376 if (!silent) 376 if (!silent)
377 printk("qnx4: wrong fsid in superblock.\n"); 377 printk("qnx4: wrong fsid in superblock.\n");
378 goto out; 378 goto out;
diff --git a/fs/quota.c b/fs/quota.c
index 3f0333a51a23..f5d1cff55196 100644
--- a/fs/quota.c
+++ b/fs/quota.c
@@ -149,36 +149,6 @@ static int check_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t
149 return error; 149 return error;
150} 150}
151 151
152static struct super_block *get_super_to_sync(int type)
153{
154 struct list_head *head;
155 int cnt, dirty;
156
157restart:
158 spin_lock(&sb_lock);
159 list_for_each(head, &super_blocks) {
160 struct super_block *sb = list_entry(head, struct super_block, s_list);
161
162 /* This test just improves performance so it needn't be reliable... */
163 for (cnt = 0, dirty = 0; cnt < MAXQUOTAS; cnt++)
164 if ((type == cnt || type == -1) && sb_has_quota_enabled(sb, cnt)
165 && info_any_dirty(&sb_dqopt(sb)->info[cnt]))
166 dirty = 1;
167 if (!dirty)
168 continue;
169 sb->s_count++;
170 spin_unlock(&sb_lock);
171 down_read(&sb->s_umount);
172 if (!sb->s_root) {
173 drop_super(sb);
174 goto restart;
175 }
176 return sb;
177 }
178 spin_unlock(&sb_lock);
179 return NULL;
180}
181
182static void quota_sync_sb(struct super_block *sb, int type) 152static void quota_sync_sb(struct super_block *sb, int type)
183{ 153{
184 int cnt; 154 int cnt;
@@ -219,17 +189,35 @@ static void quota_sync_sb(struct super_block *sb, int type)
219 189
220void sync_dquots(struct super_block *sb, int type) 190void sync_dquots(struct super_block *sb, int type)
221{ 191{
192 int cnt, dirty;
193
222 if (sb) { 194 if (sb) {
223 if (sb->s_qcop->quota_sync) 195 if (sb->s_qcop->quota_sync)
224 quota_sync_sb(sb, type); 196 quota_sync_sb(sb, type);
197 return;
225 } 198 }
226 else { 199
227 while ((sb = get_super_to_sync(type)) != NULL) { 200 spin_lock(&sb_lock);
228 if (sb->s_qcop->quota_sync) 201restart:
229 quota_sync_sb(sb, type); 202 list_for_each_entry(sb, &super_blocks, s_list) {
230 drop_super(sb); 203 /* This test just improves performance so it needn't be reliable... */
231 } 204 for (cnt = 0, dirty = 0; cnt < MAXQUOTAS; cnt++)
205 if ((type == cnt || type == -1) && sb_has_quota_enabled(sb, cnt)
206 && info_any_dirty(&sb_dqopt(sb)->info[cnt]))
207 dirty = 1;
208 if (!dirty)
209 continue;
210 sb->s_count++;
211 spin_unlock(&sb_lock);
212 down_read(&sb->s_umount);
213 if (sb->s_root && sb->s_qcop->quota_sync)
214 quota_sync_sb(sb, type);
215 up_read(&sb->s_umount);
216 spin_lock(&sb_lock);
217 if (__put_super_and_need_restart(sb))
218 goto restart;
232 } 219 }
220 spin_unlock(&sb_lock);
233} 221}
234 222
235/* Copy parameters and call proper function */ 223/* Copy parameters and call proper function */
diff --git a/fs/read_write.c b/fs/read_write.c
index c4c2bee373ed..563abd09b5c8 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -10,7 +10,7 @@
10#include <linux/file.h> 10#include <linux/file.h>
11#include <linux/uio.h> 11#include <linux/uio.h>
12#include <linux/smp_lock.h> 12#include <linux/smp_lock.h>
13#include <linux/dnotify.h> 13#include <linux/fsnotify.h>
14#include <linux/security.h> 14#include <linux/security.h>
15#include <linux/module.h> 15#include <linux/module.h>
16#include <linux/syscalls.h> 16#include <linux/syscalls.h>
@@ -203,6 +203,16 @@ Einval:
203 return -EINVAL; 203 return -EINVAL;
204} 204}
205 205
206static void wait_on_retry_sync_kiocb(struct kiocb *iocb)
207{
208 set_current_state(TASK_UNINTERRUPTIBLE);
209 if (!kiocbIsKicked(iocb))
210 schedule();
211 else
212 kiocbClearKicked(iocb);
213 __set_current_state(TASK_RUNNING);
214}
215
206ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) 216ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
207{ 217{
208 struct kiocb kiocb; 218 struct kiocb kiocb;
@@ -210,7 +220,10 @@ ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *pp
210 220
211 init_sync_kiocb(&kiocb, filp); 221 init_sync_kiocb(&kiocb, filp);
212 kiocb.ki_pos = *ppos; 222 kiocb.ki_pos = *ppos;
213 ret = filp->f_op->aio_read(&kiocb, buf, len, kiocb.ki_pos); 223 while (-EIOCBRETRY ==
224 (ret = filp->f_op->aio_read(&kiocb, buf, len, kiocb.ki_pos)))
225 wait_on_retry_sync_kiocb(&kiocb);
226
214 if (-EIOCBQUEUED == ret) 227 if (-EIOCBQUEUED == ret)
215 ret = wait_on_sync_kiocb(&kiocb); 228 ret = wait_on_sync_kiocb(&kiocb);
216 *ppos = kiocb.ki_pos; 229 *ppos = kiocb.ki_pos;
@@ -239,7 +252,7 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
239 else 252 else
240 ret = do_sync_read(file, buf, count, pos); 253 ret = do_sync_read(file, buf, count, pos);
241 if (ret > 0) { 254 if (ret > 0) {
242 dnotify_parent(file->f_dentry, DN_ACCESS); 255 fsnotify_access(file->f_dentry);
243 current->rchar += ret; 256 current->rchar += ret;
244 } 257 }
245 current->syscr++; 258 current->syscr++;
@@ -258,7 +271,10 @@ ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, lof
258 271
259 init_sync_kiocb(&kiocb, filp); 272 init_sync_kiocb(&kiocb, filp);
260 kiocb.ki_pos = *ppos; 273 kiocb.ki_pos = *ppos;
261 ret = filp->f_op->aio_write(&kiocb, buf, len, kiocb.ki_pos); 274 while (-EIOCBRETRY ==
275 (ret = filp->f_op->aio_write(&kiocb, buf, len, kiocb.ki_pos)))
276 wait_on_retry_sync_kiocb(&kiocb);
277
262 if (-EIOCBQUEUED == ret) 278 if (-EIOCBQUEUED == ret)
263 ret = wait_on_sync_kiocb(&kiocb); 279 ret = wait_on_sync_kiocb(&kiocb);
264 *ppos = kiocb.ki_pos; 280 *ppos = kiocb.ki_pos;
@@ -287,7 +303,7 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_
287 else 303 else
288 ret = do_sync_write(file, buf, count, pos); 304 ret = do_sync_write(file, buf, count, pos);
289 if (ret > 0) { 305 if (ret > 0) {
290 dnotify_parent(file->f_dentry, DN_MODIFY); 306 fsnotify_modify(file->f_dentry);
291 current->wchar += ret; 307 current->wchar += ret;
292 } 308 }
293 current->syscw++; 309 current->syscw++;
@@ -523,9 +539,12 @@ static ssize_t do_readv_writev(int type, struct file *file,
523out: 539out:
524 if (iov != iovstack) 540 if (iov != iovstack)
525 kfree(iov); 541 kfree(iov);
526 if ((ret + (type == READ)) > 0) 542 if ((ret + (type == READ)) > 0) {
527 dnotify_parent(file->f_dentry, 543 if (type == READ)
528 (type == READ) ? DN_ACCESS : DN_MODIFY); 544 fsnotify_access(file->f_dentry);
545 else
546 fsnotify_modify(file->f_dentry);
547 }
529 return ret; 548 return ret;
530Efault: 549Efault:
531 ret = -EFAULT; 550 ret = -EFAULT;
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index 49c479c9454a..909f71e9a30f 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -46,1125 +46,1221 @@
46#define TEST_OPTION(optname, s) \ 46#define TEST_OPTION(optname, s) \
47 test_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s)) 47 test_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s))
48 48
49static inline void get_bit_address (struct super_block * s, 49static inline void get_bit_address(struct super_block *s,
50 b_blocknr_t block, int * bmap_nr, int * offset) 50 b_blocknr_t block, int *bmap_nr, int *offset)
51{ 51{
52 /* It is in the bitmap block number equal to the block 52 /* It is in the bitmap block number equal to the block
53 * number divided by the number of bits in a block. */ 53 * number divided by the number of bits in a block. */
54 *bmap_nr = block / (s->s_blocksize << 3); 54 *bmap_nr = block / (s->s_blocksize << 3);
55 /* Within that bitmap block it is located at bit offset *offset. */ 55 /* Within that bitmap block it is located at bit offset *offset. */
56 *offset = block & ((s->s_blocksize << 3) - 1 ); 56 *offset = block & ((s->s_blocksize << 3) - 1);
57 return; 57 return;
58} 58}
59 59
60#ifdef CONFIG_REISERFS_CHECK 60#ifdef CONFIG_REISERFS_CHECK
61int is_reusable (struct super_block * s, b_blocknr_t block, int bit_value) 61int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value)
62{ 62{
63 int i, j; 63 int i, j;
64 64
65 if (block == 0 || block >= SB_BLOCK_COUNT (s)) { 65 if (block == 0 || block >= SB_BLOCK_COUNT(s)) {
66 reiserfs_warning (s, "vs-4010: is_reusable: block number is out of range %lu (%u)", 66 reiserfs_warning(s,
67 block, SB_BLOCK_COUNT (s)); 67 "vs-4010: is_reusable: block number is out of range %lu (%u)",
68 return 0; 68 block, SB_BLOCK_COUNT(s));
69 } 69 return 0;
70
71 /* it can't be one of the bitmap blocks */
72 for (i = 0; i < SB_BMAP_NR (s); i ++)
73 if (block == SB_AP_BITMAP (s)[i].bh->b_blocknr) {
74 reiserfs_warning (s, "vs: 4020: is_reusable: "
75 "bitmap block %lu(%u) can't be freed or reused",
76 block, SB_BMAP_NR (s));
77 return 0;
78 } 70 }
79
80 get_bit_address (s, block, &i, &j);
81 71
82 if (i >= SB_BMAP_NR (s)) { 72 /* it can't be one of the bitmap blocks */
83 reiserfs_warning (s, "vs-4030: is_reusable: there is no so many bitmap blocks: " 73 for (i = 0; i < SB_BMAP_NR(s); i++)
84 "block=%lu, bitmap_nr=%d", block, i); 74 if (block == SB_AP_BITMAP(s)[i].bh->b_blocknr) {
85 return 0; 75 reiserfs_warning(s, "vs: 4020: is_reusable: "
86 } 76 "bitmap block %lu(%u) can't be freed or reused",
77 block, SB_BMAP_NR(s));
78 return 0;
79 }
87 80
88 if ((bit_value == 0 && 81 get_bit_address(s, block, &i, &j);
89 reiserfs_test_le_bit(j, SB_AP_BITMAP(s)[i].bh->b_data)) ||
90 (bit_value == 1 &&
91 reiserfs_test_le_bit(j, SB_AP_BITMAP (s)[i].bh->b_data) == 0)) {
92 reiserfs_warning (s, "vs-4040: is_reusable: corresponding bit of block %lu does not "
93 "match required value (i==%d, j==%d) test_bit==%d",
94 block, i, j, reiserfs_test_le_bit (j, SB_AP_BITMAP (s)[i].bh->b_data));
95 82
96 return 0; 83 if (i >= SB_BMAP_NR(s)) {
97 } 84 reiserfs_warning(s,
85 "vs-4030: is_reusable: there is no so many bitmap blocks: "
86 "block=%lu, bitmap_nr=%d", block, i);
87 return 0;
88 }
98 89
99 if (bit_value == 0 && block == SB_ROOT_BLOCK (s)) { 90 if ((bit_value == 0 &&
100 reiserfs_warning (s, "vs-4050: is_reusable: this is root block (%u), " 91 reiserfs_test_le_bit(j, SB_AP_BITMAP(s)[i].bh->b_data)) ||
101 "it must be busy", SB_ROOT_BLOCK (s)); 92 (bit_value == 1 &&
102 return 0; 93 reiserfs_test_le_bit(j, SB_AP_BITMAP(s)[i].bh->b_data) == 0)) {
103 } 94 reiserfs_warning(s,
95 "vs-4040: is_reusable: corresponding bit of block %lu does not "
96 "match required value (i==%d, j==%d) test_bit==%d",
97 block, i, j, reiserfs_test_le_bit(j,
98 SB_AP_BITMAP
99 (s)[i].bh->
100 b_data));
101
102 return 0;
103 }
104 104
105 return 1; 105 if (bit_value == 0 && block == SB_ROOT_BLOCK(s)) {
106 reiserfs_warning(s,
107 "vs-4050: is_reusable: this is root block (%u), "
108 "it must be busy", SB_ROOT_BLOCK(s));
109 return 0;
110 }
111
112 return 1;
106} 113}
107#endif /* CONFIG_REISERFS_CHECK */ 114#endif /* CONFIG_REISERFS_CHECK */
108 115
109/* searches in journal structures for a given block number (bmap, off). If block 116/* searches in journal structures for a given block number (bmap, off). If block
110 is found in reiserfs journal it suggests next free block candidate to test. */ 117 is found in reiserfs journal it suggests next free block candidate to test. */
111static inline int is_block_in_journal (struct super_block * s, int bmap, int 118static inline int is_block_in_journal(struct super_block *s, int bmap, int
112off, int *next) 119 off, int *next)
113{ 120{
114 b_blocknr_t tmp; 121 b_blocknr_t tmp;
115 122
116 if (reiserfs_in_journal (s, bmap, off, 1, &tmp)) { 123 if (reiserfs_in_journal(s, bmap, off, 1, &tmp)) {
117 if (tmp) { /* hint supplied */ 124 if (tmp) { /* hint supplied */
118 *next = tmp; 125 *next = tmp;
119 PROC_INFO_INC( s, scan_bitmap.in_journal_hint ); 126 PROC_INFO_INC(s, scan_bitmap.in_journal_hint);
120 } else { 127 } else {
121 (*next) = off + 1; /* inc offset to avoid looping. */ 128 (*next) = off + 1; /* inc offset to avoid looping. */
122 PROC_INFO_INC( s, scan_bitmap.in_journal_nohint ); 129 PROC_INFO_INC(s, scan_bitmap.in_journal_nohint);
130 }
131 PROC_INFO_INC(s, scan_bitmap.retry);
132 return 1;
123 } 133 }
124 PROC_INFO_INC( s, scan_bitmap.retry ); 134 return 0;
125 return 1;
126 }
127 return 0;
128} 135}
129 136
130/* it searches for a window of zero bits with given minimum and maximum lengths in one bitmap 137/* it searches for a window of zero bits with given minimum and maximum lengths in one bitmap
131 * block; */ 138 * block; */
132static int scan_bitmap_block (struct reiserfs_transaction_handle *th, 139static int scan_bitmap_block(struct reiserfs_transaction_handle *th,
133 int bmap_n, int *beg, int boundary, int min, int max, int unfm) 140 int bmap_n, int *beg, int boundary, int min,
141 int max, int unfm)
134{ 142{
135 struct super_block *s = th->t_super; 143 struct super_block *s = th->t_super;
136 struct reiserfs_bitmap_info *bi=&SB_AP_BITMAP(s)[bmap_n]; 144 struct reiserfs_bitmap_info *bi = &SB_AP_BITMAP(s)[bmap_n];
137 int end, next; 145 int end, next;
138 int org = *beg; 146 int org = *beg;
139 147
140 BUG_ON (!th->t_trans_id); 148 BUG_ON(!th->t_trans_id);
141 149
142 RFALSE(bmap_n >= SB_BMAP_NR (s), "Bitmap %d is out of range (0..%d)",bmap_n, SB_BMAP_NR (s) - 1); 150 RFALSE(bmap_n >= SB_BMAP_NR(s), "Bitmap %d is out of range (0..%d)",
143 PROC_INFO_INC( s, scan_bitmap.bmap ); 151 bmap_n, SB_BMAP_NR(s) - 1);
152 PROC_INFO_INC(s, scan_bitmap.bmap);
144/* this is unclear and lacks comments, explain how journal bitmaps 153/* this is unclear and lacks comments, explain how journal bitmaps
145 work here for the reader. Convey a sense of the design here. What 154 work here for the reader. Convey a sense of the design here. What
146 is a window? */ 155 is a window? */
147/* - I mean `a window of zero bits' as in description of this function - Zam. */ 156/* - I mean `a window of zero bits' as in description of this function - Zam. */
148
149 if ( !bi ) {
150 reiserfs_warning (s, "NULL bitmap info pointer for bitmap %d", bmap_n);
151 return 0;
152 }
153 if (buffer_locked (bi->bh)) {
154 PROC_INFO_INC( s, scan_bitmap.wait );
155 __wait_on_buffer (bi->bh);
156 }
157
158 while (1) {
159 cont:
160 if (bi->free_count < min)
161 return 0; // No free blocks in this bitmap
162
163 /* search for a first zero bit -- beggining of a window */
164 *beg = reiserfs_find_next_zero_le_bit
165 ((unsigned long*)(bi->bh->b_data), boundary, *beg);
166
167 if (*beg + min > boundary) { /* search for a zero bit fails or the rest of bitmap block
168 * cannot contain a zero window of minimum size */
169 return 0;
170 }
171 157
172 if (unfm && is_block_in_journal(s,bmap_n, *beg, beg)) 158 if (!bi) {
173 continue; 159 reiserfs_warning(s, "NULL bitmap info pointer for bitmap %d",
174 /* first zero bit found; we check next bits */ 160 bmap_n);
175 for (end = *beg + 1;; end ++) { 161 return 0;
176 if (end >= *beg + max || end >= boundary || reiserfs_test_le_bit (end, bi->bh->b_data)) { 162 }
177 next = end; 163 if (buffer_locked(bi->bh)) {
178 break; 164 PROC_INFO_INC(s, scan_bitmap.wait);
179 } 165 __wait_on_buffer(bi->bh);
180 /* finding the other end of zero bit window requires looking into journal structures (in
181 * case of searching for free blocks for unformatted nodes) */
182 if (unfm && is_block_in_journal(s, bmap_n, end, &next))
183 break;
184 } 166 }
185 167
186 /* now (*beg) points to beginning of zero bits window, 168 while (1) {
187 * (end) points to one bit after the window end */ 169 cont:
188 if (end - *beg >= min) { /* it seems we have found window of proper size */ 170 if (bi->free_count < min)
189 int i; 171 return 0; // No free blocks in this bitmap
190 reiserfs_prepare_for_journal (s, bi->bh, 1); 172
191 /* try to set all blocks used checking are they still free */ 173 /* search for a first zero bit -- beggining of a window */
192 for (i = *beg; i < end; i++) { 174 *beg = reiserfs_find_next_zero_le_bit
193 /* It seems that we should not check in journal again. */ 175 ((unsigned long *)(bi->bh->b_data), boundary, *beg);
194 if (reiserfs_test_and_set_le_bit (i, bi->bh->b_data)) { 176
195 /* bit was set by another process 177 if (*beg + min > boundary) { /* search for a zero bit fails or the rest of bitmap block
196 * while we slept in prepare_for_journal() */ 178 * cannot contain a zero window of minimum size */
197 PROC_INFO_INC( s, scan_bitmap.stolen ); 179 return 0;
198 if (i >= *beg + min) { /* we can continue with smaller set of allocated blocks,
199 * if length of this set is more or equal to `min' */
200 end = i;
201 break;
202 }
203 /* otherwise we clear all bit were set ... */
204 while (--i >= *beg)
205 reiserfs_test_and_clear_le_bit (i, bi->bh->b_data);
206 reiserfs_restore_prepared_buffer (s, bi->bh);
207 *beg = org;
208 /* ... and search again in current block from beginning */
209 goto cont;
210 } 180 }
211 }
212 bi->free_count -= (end - *beg);
213 journal_mark_dirty (th, s, bi->bh);
214 181
215 /* free block count calculation */ 182 if (unfm && is_block_in_journal(s, bmap_n, *beg, beg))
216 reiserfs_prepare_for_journal (s, SB_BUFFER_WITH_SB(s), 1); 183 continue;
217 PUT_SB_FREE_BLOCKS(s, SB_FREE_BLOCKS(s) - (end - *beg)); 184 /* first zero bit found; we check next bits */
218 journal_mark_dirty (th, s, SB_BUFFER_WITH_SB(s)); 185 for (end = *beg + 1;; end++) {
186 if (end >= *beg + max || end >= boundary
187 || reiserfs_test_le_bit(end, bi->bh->b_data)) {
188 next = end;
189 break;
190 }
191 /* finding the other end of zero bit window requires looking into journal structures (in
192 * case of searching for free blocks for unformatted nodes) */
193 if (unfm && is_block_in_journal(s, bmap_n, end, &next))
194 break;
195 }
219 196
220 return end - (*beg); 197 /* now (*beg) points to beginning of zero bits window,
221 } else { 198 * (end) points to one bit after the window end */
222 *beg = next; 199 if (end - *beg >= min) { /* it seems we have found window of proper size */
200 int i;
201 reiserfs_prepare_for_journal(s, bi->bh, 1);
202 /* try to set all blocks used checking are they still free */
203 for (i = *beg; i < end; i++) {
204 /* It seems that we should not check in journal again. */
205 if (reiserfs_test_and_set_le_bit
206 (i, bi->bh->b_data)) {
207 /* bit was set by another process
208 * while we slept in prepare_for_journal() */
209 PROC_INFO_INC(s, scan_bitmap.stolen);
210 if (i >= *beg + min) { /* we can continue with smaller set of allocated blocks,
211 * if length of this set is more or equal to `min' */
212 end = i;
213 break;
214 }
215 /* otherwise we clear all bit were set ... */
216 while (--i >= *beg)
217 reiserfs_test_and_clear_le_bit
218 (i, bi->bh->b_data);
219 reiserfs_restore_prepared_buffer(s,
220 bi->
221 bh);
222 *beg = org;
223 /* ... and search again in current block from beginning */
224 goto cont;
225 }
226 }
227 bi->free_count -= (end - *beg);
228 journal_mark_dirty(th, s, bi->bh);
229
230 /* free block count calculation */
231 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s),
232 1);
233 PUT_SB_FREE_BLOCKS(s, SB_FREE_BLOCKS(s) - (end - *beg));
234 journal_mark_dirty(th, s, SB_BUFFER_WITH_SB(s));
235
236 return end - (*beg);
237 } else {
238 *beg = next;
239 }
223 } 240 }
224 }
225} 241}
226 242
227static int bmap_hash_id(struct super_block *s, u32 id) { 243static int bmap_hash_id(struct super_block *s, u32 id)
228 char * hash_in = NULL; 244{
229 unsigned long hash; 245 char *hash_in = NULL;
230 unsigned bm; 246 unsigned long hash;
231 247 unsigned bm;
232 if (id <= 2) { 248
233 bm = 1; 249 if (id <= 2) {
234 } else { 250 bm = 1;
235 hash_in = (char *)(&id); 251 } else {
236 hash = keyed_hash(hash_in, 4); 252 hash_in = (char *)(&id);
237 bm = hash % SB_BMAP_NR(s); 253 hash = keyed_hash(hash_in, 4);
238 if (!bm) 254 bm = hash % SB_BMAP_NR(s);
239 bm = 1; 255 if (!bm)
240 } 256 bm = 1;
241 /* this can only be true when SB_BMAP_NR = 1 */ 257 }
242 if (bm >= SB_BMAP_NR(s)) 258 /* this can only be true when SB_BMAP_NR = 1 */
243 bm = 0; 259 if (bm >= SB_BMAP_NR(s))
244 return bm; 260 bm = 0;
261 return bm;
245} 262}
246 263
247/* 264/*
248 * hashes the id and then returns > 0 if the block group for the 265 * hashes the id and then returns > 0 if the block group for the
249 * corresponding hash is full 266 * corresponding hash is full
250 */ 267 */
251static inline int block_group_used(struct super_block *s, u32 id) { 268static inline int block_group_used(struct super_block *s, u32 id)
252 int bm; 269{
253 bm = bmap_hash_id(s, id); 270 int bm;
254 if (SB_AP_BITMAP(s)[bm].free_count > ((s->s_blocksize << 3) * 60 / 100) ) { 271 bm = bmap_hash_id(s, id);
255 return 0; 272 if (SB_AP_BITMAP(s)[bm].free_count > ((s->s_blocksize << 3) * 60 / 100)) {
256 } 273 return 0;
257 return 1; 274 }
275 return 1;
258} 276}
259 277
260/* 278/*
261 * the packing is returned in disk byte order 279 * the packing is returned in disk byte order
262 */ 280 */
263__le32 reiserfs_choose_packing(struct inode *dir) 281__le32 reiserfs_choose_packing(struct inode * dir)
264{ 282{
265 __le32 packing; 283 __le32 packing;
266 if (TEST_OPTION(packing_groups, dir->i_sb)) { 284 if (TEST_OPTION(packing_groups, dir->i_sb)) {
267 u32 parent_dir = le32_to_cpu(INODE_PKEY(dir)->k_dir_id); 285 u32 parent_dir = le32_to_cpu(INODE_PKEY(dir)->k_dir_id);
268 /* 286 /*
269 * some versions of reiserfsck expect packing locality 1 to be 287 * some versions of reiserfsck expect packing locality 1 to be
270 * special 288 * special
271 */ 289 */
272 if (parent_dir == 1 || block_group_used(dir->i_sb,parent_dir)) 290 if (parent_dir == 1 || block_group_used(dir->i_sb, parent_dir))
273 packing = INODE_PKEY(dir)->k_objectid; 291 packing = INODE_PKEY(dir)->k_objectid;
274 else 292 else
275 packing = INODE_PKEY(dir)->k_dir_id; 293 packing = INODE_PKEY(dir)->k_dir_id;
276 } else 294 } else
277 packing = INODE_PKEY(dir)->k_objectid; 295 packing = INODE_PKEY(dir)->k_objectid;
278 return packing; 296 return packing;
279} 297}
280 298
281/* Tries to find contiguous zero bit window (given size) in given region of 299/* Tries to find contiguous zero bit window (given size) in given region of
282 * bitmap and place new blocks there. Returns number of allocated blocks. */ 300 * bitmap and place new blocks there. Returns number of allocated blocks. */
283static int scan_bitmap (struct reiserfs_transaction_handle *th, 301static int scan_bitmap(struct reiserfs_transaction_handle *th,
284 b_blocknr_t *start, b_blocknr_t finish, 302 b_blocknr_t * start, b_blocknr_t finish,
285 int min, int max, int unfm, unsigned long file_block) 303 int min, int max, int unfm, unsigned long file_block)
286{ 304{
287 int nr_allocated=0; 305 int nr_allocated = 0;
288 struct super_block * s = th->t_super; 306 struct super_block *s = th->t_super;
289 /* find every bm and bmap and bmap_nr in this file, and change them all to bitmap_blocknr 307 /* find every bm and bmap and bmap_nr in this file, and change them all to bitmap_blocknr
290 * - Hans, it is not a block number - Zam. */ 308 * - Hans, it is not a block number - Zam. */
291 309
292 int bm, off; 310 int bm, off;
293 int end_bm, end_off; 311 int end_bm, end_off;
294 int off_max = s->s_blocksize << 3; 312 int off_max = s->s_blocksize << 3;
295 313
296 BUG_ON (!th->t_trans_id); 314 BUG_ON(!th->t_trans_id);
297 315
298 PROC_INFO_INC( s, scan_bitmap.call ); 316 PROC_INFO_INC(s, scan_bitmap.call);
299 if ( SB_FREE_BLOCKS(s) <= 0) 317 if (SB_FREE_BLOCKS(s) <= 0)
300 return 0; // No point in looking for more free blocks 318 return 0; // No point in looking for more free blocks
301 319
302 get_bit_address (s, *start, &bm, &off); 320 get_bit_address(s, *start, &bm, &off);
303 get_bit_address (s, finish, &end_bm, &end_off); 321 get_bit_address(s, finish, &end_bm, &end_off);
304 if (bm > SB_BMAP_NR(s)) 322 if (bm > SB_BMAP_NR(s))
305 return 0; 323 return 0;
306 if (end_bm > SB_BMAP_NR(s)) 324 if (end_bm > SB_BMAP_NR(s))
307 end_bm = SB_BMAP_NR(s); 325 end_bm = SB_BMAP_NR(s);
308 326
309 /* When the bitmap is more than 10% free, anyone can allocate. 327 /* When the bitmap is more than 10% free, anyone can allocate.
310 * When it's less than 10% free, only files that already use the 328 * When it's less than 10% free, only files that already use the
311 * bitmap are allowed. Once we pass 80% full, this restriction 329 * bitmap are allowed. Once we pass 80% full, this restriction
312 * is lifted. 330 * is lifted.
313 * 331 *
314 * We do this so that files that grow later still have space close to 332 * We do this so that files that grow later still have space close to
315 * their original allocation. This improves locality, and presumably 333 * their original allocation. This improves locality, and presumably
316 * performance as a result. 334 * performance as a result.
317 * 335 *
318 * This is only an allocation policy and does not make up for getting a 336 * This is only an allocation policy and does not make up for getting a
319 * bad hint. Decent hinting must be implemented for this to work well. 337 * bad hint. Decent hinting must be implemented for this to work well.
320 */ 338 */
321 if ( TEST_OPTION(skip_busy, s) && SB_FREE_BLOCKS(s) > SB_BLOCK_COUNT(s)/20 ) { 339 if (TEST_OPTION(skip_busy, s)
322 for (;bm < end_bm; bm++, off = 0) { 340 && SB_FREE_BLOCKS(s) > SB_BLOCK_COUNT(s) / 20) {
323 if ( ( off && (!unfm || (file_block != 0))) || SB_AP_BITMAP(s)[bm].free_count > (s->s_blocksize << 3) / 10 ) 341 for (; bm < end_bm; bm++, off = 0) {
324 nr_allocated = scan_bitmap_block(th, bm, &off, off_max, min, max, unfm); 342 if ((off && (!unfm || (file_block != 0)))
325 if (nr_allocated) 343 || SB_AP_BITMAP(s)[bm].free_count >
326 goto ret; 344 (s->s_blocksize << 3) / 10)
327 } 345 nr_allocated =
328 /* we know from above that start is a reasonable number */ 346 scan_bitmap_block(th, bm, &off, off_max,
329 get_bit_address (s, *start, &bm, &off); 347 min, max, unfm);
330 } 348 if (nr_allocated)
331 349 goto ret;
332 for (;bm < end_bm; bm++, off = 0) { 350 }
333 nr_allocated = scan_bitmap_block(th, bm, &off, off_max, min, max, unfm); 351 /* we know from above that start is a reasonable number */
334 if (nr_allocated) 352 get_bit_address(s, *start, &bm, &off);
335 goto ret; 353 }
336 } 354
337 355 for (; bm < end_bm; bm++, off = 0) {
338 nr_allocated = scan_bitmap_block(th, bm, &off, end_off + 1, min, max, unfm); 356 nr_allocated =
339 357 scan_bitmap_block(th, bm, &off, off_max, min, max, unfm);
340 ret: 358 if (nr_allocated)
341 *start = bm * off_max + off; 359 goto ret;
342 return nr_allocated; 360 }
361
362 nr_allocated =
363 scan_bitmap_block(th, bm, &off, end_off + 1, min, max, unfm);
364
365 ret:
366 *start = bm * off_max + off;
367 return nr_allocated;
343 368
344} 369}
345 370
346static void _reiserfs_free_block (struct reiserfs_transaction_handle *th, 371static void _reiserfs_free_block(struct reiserfs_transaction_handle *th,
347 struct inode *inode, b_blocknr_t block, 372 struct inode *inode, b_blocknr_t block,
348 int for_unformatted) 373 int for_unformatted)
349{ 374{
350 struct super_block * s = th->t_super; 375 struct super_block *s = th->t_super;
351 struct reiserfs_super_block * rs; 376 struct reiserfs_super_block *rs;
352 struct buffer_head * sbh; 377 struct buffer_head *sbh;
353 struct reiserfs_bitmap_info *apbi; 378 struct reiserfs_bitmap_info *apbi;
354 int nr, offset; 379 int nr, offset;
355 380
356 BUG_ON (!th->t_trans_id); 381 BUG_ON(!th->t_trans_id);
357 382
358 PROC_INFO_INC( s, free_block ); 383 PROC_INFO_INC(s, free_block);
359 384
360 rs = SB_DISK_SUPER_BLOCK (s); 385 rs = SB_DISK_SUPER_BLOCK(s);
361 sbh = SB_BUFFER_WITH_SB (s); 386 sbh = SB_BUFFER_WITH_SB(s);
362 apbi = SB_AP_BITMAP(s); 387 apbi = SB_AP_BITMAP(s);
363 388
364 get_bit_address (s, block, &nr, &offset); 389 get_bit_address(s, block, &nr, &offset);
365 390
366 if (nr >= sb_bmap_nr (rs)) { 391 if (nr >= sb_bmap_nr(rs)) {
367 reiserfs_warning (s, "vs-4075: reiserfs_free_block: " 392 reiserfs_warning(s, "vs-4075: reiserfs_free_block: "
368 "block %lu is out of range on %s", 393 "block %lu is out of range on %s",
369 block, reiserfs_bdevname (s)); 394 block, reiserfs_bdevname(s));
370 return; 395 return;
371 } 396 }
372 397
373 reiserfs_prepare_for_journal(s, apbi[nr].bh, 1 ) ; 398 reiserfs_prepare_for_journal(s, apbi[nr].bh, 1);
374 399
375 /* clear bit for the given block in bit map */ 400 /* clear bit for the given block in bit map */
376 if (!reiserfs_test_and_clear_le_bit (offset, apbi[nr].bh->b_data)) { 401 if (!reiserfs_test_and_clear_le_bit(offset, apbi[nr].bh->b_data)) {
377 reiserfs_warning (s, "vs-4080: reiserfs_free_block: " 402 reiserfs_warning(s, "vs-4080: reiserfs_free_block: "
378 "free_block (%s:%lu)[dev:blocknr]: bit already cleared", 403 "free_block (%s:%lu)[dev:blocknr]: bit already cleared",
379 reiserfs_bdevname (s), block); 404 reiserfs_bdevname(s), block);
380 } 405 }
381 apbi[nr].free_count ++; 406 apbi[nr].free_count++;
382 journal_mark_dirty (th, s, apbi[nr].bh); 407 journal_mark_dirty(th, s, apbi[nr].bh);
383 408
384 reiserfs_prepare_for_journal(s, sbh, 1) ; 409 reiserfs_prepare_for_journal(s, sbh, 1);
385 /* update super block */ 410 /* update super block */
386 set_sb_free_blocks( rs, sb_free_blocks(rs) + 1 ); 411 set_sb_free_blocks(rs, sb_free_blocks(rs) + 1);
387 412
388 journal_mark_dirty (th, s, sbh); 413 journal_mark_dirty(th, s, sbh);
389 if (for_unformatted) 414 if (for_unformatted)
390 DQUOT_FREE_BLOCK_NODIRTY(inode, 1); 415 DQUOT_FREE_BLOCK_NODIRTY(inode, 1);
391} 416}
392 417
393void reiserfs_free_block (struct reiserfs_transaction_handle *th, 418void reiserfs_free_block(struct reiserfs_transaction_handle *th,
394 struct inode *inode, b_blocknr_t block, 419 struct inode *inode, b_blocknr_t block,
395 int for_unformatted) 420 int for_unformatted)
396{ 421{
397 struct super_block * s = th->t_super; 422 struct super_block *s = th->t_super;
398 423
399 BUG_ON (!th->t_trans_id); 424 BUG_ON(!th->t_trans_id);
400 425
401 RFALSE(!s, "vs-4061: trying to free block on nonexistent device"); 426 RFALSE(!s, "vs-4061: trying to free block on nonexistent device");
402 RFALSE(is_reusable (s, block, 1) == 0, "vs-4071: can not free such block"); 427 RFALSE(is_reusable(s, block, 1) == 0,
403 /* mark it before we clear it, just in case */ 428 "vs-4071: can not free such block");
404 journal_mark_freed(th, s, block) ; 429 /* mark it before we clear it, just in case */
405 _reiserfs_free_block(th, inode, block, for_unformatted) ; 430 journal_mark_freed(th, s, block);
431 _reiserfs_free_block(th, inode, block, for_unformatted);
406} 432}
407 433
408/* preallocated blocks don't need to be run through journal_mark_freed */ 434/* preallocated blocks don't need to be run through journal_mark_freed */
409static void reiserfs_free_prealloc_block (struct reiserfs_transaction_handle *th, 435static void reiserfs_free_prealloc_block(struct reiserfs_transaction_handle *th,
410 struct inode *inode, b_blocknr_t block) { 436 struct inode *inode, b_blocknr_t block)
411 RFALSE(!th->t_super, "vs-4060: trying to free block on nonexistent device"); 437{
412 RFALSE(is_reusable (th->t_super, block, 1) == 0, "vs-4070: can not free such block"); 438 RFALSE(!th->t_super,
413 BUG_ON (!th->t_trans_id); 439 "vs-4060: trying to free block on nonexistent device");
414 _reiserfs_free_block(th, inode, block, 1) ; 440 RFALSE(is_reusable(th->t_super, block, 1) == 0,
441 "vs-4070: can not free such block");
442 BUG_ON(!th->t_trans_id);
443 _reiserfs_free_block(th, inode, block, 1);
415} 444}
416 445
417static void __discard_prealloc (struct reiserfs_transaction_handle * th, 446static void __discard_prealloc(struct reiserfs_transaction_handle *th,
418 struct reiserfs_inode_info *ei) 447 struct reiserfs_inode_info *ei)
419{ 448{
420 unsigned long save = ei->i_prealloc_block ; 449 unsigned long save = ei->i_prealloc_block;
421 int dirty = 0; 450 int dirty = 0;
422 struct inode *inode = &ei->vfs_inode; 451 struct inode *inode = &ei->vfs_inode;
423 BUG_ON (!th->t_trans_id); 452 BUG_ON(!th->t_trans_id);
424#ifdef CONFIG_REISERFS_CHECK 453#ifdef CONFIG_REISERFS_CHECK
425 if (ei->i_prealloc_count < 0) 454 if (ei->i_prealloc_count < 0)
426 reiserfs_warning (th->t_super, "zam-4001:%s: inode has negative prealloc blocks count.", __FUNCTION__ ); 455 reiserfs_warning(th->t_super,
456 "zam-4001:%s: inode has negative prealloc blocks count.",
457 __FUNCTION__);
427#endif 458#endif
428 while (ei->i_prealloc_count > 0) { 459 while (ei->i_prealloc_count > 0) {
429 reiserfs_free_prealloc_block(th, inode, ei->i_prealloc_block); 460 reiserfs_free_prealloc_block(th, inode, ei->i_prealloc_block);
430 ei->i_prealloc_block++; 461 ei->i_prealloc_block++;
431 ei->i_prealloc_count --; 462 ei->i_prealloc_count--;
432 dirty = 1; 463 dirty = 1;
433 } 464 }
434 if (dirty) 465 if (dirty)
435 reiserfs_update_sd(th, inode); 466 reiserfs_update_sd(th, inode);
436 ei->i_prealloc_block = save; 467 ei->i_prealloc_block = save;
437 list_del_init(&(ei->i_prealloc_list)); 468 list_del_init(&(ei->i_prealloc_list));
438} 469}
439 470
440/* FIXME: It should be inline function */ 471/* FIXME: It should be inline function */
441void reiserfs_discard_prealloc (struct reiserfs_transaction_handle *th, 472void reiserfs_discard_prealloc(struct reiserfs_transaction_handle *th,
442 struct inode *inode) 473 struct inode *inode)
443{ 474{
444 struct reiserfs_inode_info *ei = REISERFS_I(inode); 475 struct reiserfs_inode_info *ei = REISERFS_I(inode);
445 BUG_ON (!th->t_trans_id); 476 BUG_ON(!th->t_trans_id);
446 if (ei->i_prealloc_count) 477 if (ei->i_prealloc_count)
447 __discard_prealloc(th, ei); 478 __discard_prealloc(th, ei);
448} 479}
449 480
450void reiserfs_discard_all_prealloc (struct reiserfs_transaction_handle *th) 481void reiserfs_discard_all_prealloc(struct reiserfs_transaction_handle *th)
451{ 482{
452 struct list_head * plist = &SB_JOURNAL(th->t_super)->j_prealloc_list; 483 struct list_head *plist = &SB_JOURNAL(th->t_super)->j_prealloc_list;
453 484
454 BUG_ON (!th->t_trans_id); 485 BUG_ON(!th->t_trans_id);
455 486
456 while (!list_empty(plist)) { 487 while (!list_empty(plist)) {
457 struct reiserfs_inode_info *ei; 488 struct reiserfs_inode_info *ei;
458 ei = list_entry(plist->next, struct reiserfs_inode_info, i_prealloc_list); 489 ei = list_entry(plist->next, struct reiserfs_inode_info,
490 i_prealloc_list);
459#ifdef CONFIG_REISERFS_CHECK 491#ifdef CONFIG_REISERFS_CHECK
460 if (!ei->i_prealloc_count) { 492 if (!ei->i_prealloc_count) {
461 reiserfs_warning (th->t_super, "zam-4001:%s: inode is in prealloc list but has no preallocated blocks.", __FUNCTION__); 493 reiserfs_warning(th->t_super,
462 } 494 "zam-4001:%s: inode is in prealloc list but has no preallocated blocks.",
495 __FUNCTION__);
496 }
463#endif 497#endif
464 __discard_prealloc(th, ei); 498 __discard_prealloc(th, ei);
465 } 499 }
466} 500}
467 501
468void reiserfs_init_alloc_options (struct super_block *s) 502void reiserfs_init_alloc_options(struct super_block *s)
469{ 503{
470 set_bit (_ALLOC_skip_busy, &SB_ALLOC_OPTS(s)); 504 set_bit(_ALLOC_skip_busy, &SB_ALLOC_OPTS(s));
471 set_bit (_ALLOC_dirid_groups, &SB_ALLOC_OPTS(s)); 505 set_bit(_ALLOC_dirid_groups, &SB_ALLOC_OPTS(s));
472 set_bit (_ALLOC_packing_groups, &SB_ALLOC_OPTS(s)); 506 set_bit(_ALLOC_packing_groups, &SB_ALLOC_OPTS(s));
473} 507}
474 508
475/* block allocator related options are parsed here */ 509/* block allocator related options are parsed here */
476int reiserfs_parse_alloc_options(struct super_block * s, char * options) 510int reiserfs_parse_alloc_options(struct super_block *s, char *options)
477{ 511{
478 char * this_char, * value; 512 char *this_char, *value;
479 513
480 REISERFS_SB(s)->s_alloc_options.bits = 0; /* clear default settings */ 514 REISERFS_SB(s)->s_alloc_options.bits = 0; /* clear default settings */
481 515
482 while ( (this_char = strsep (&options, ":")) != NULL ) { 516 while ((this_char = strsep(&options, ":")) != NULL) {
483 if ((value = strchr (this_char, '=')) != NULL) 517 if ((value = strchr(this_char, '=')) != NULL)
484 *value++ = 0; 518 *value++ = 0;
485 519
486 if (!strcmp(this_char, "concentrating_formatted_nodes")) { 520 if (!strcmp(this_char, "concentrating_formatted_nodes")) {
487 int temp; 521 int temp;
488 SET_OPTION(concentrating_formatted_nodes); 522 SET_OPTION(concentrating_formatted_nodes);
489 temp = (value && *value) ? simple_strtoul (value, &value, 0) : 10; 523 temp = (value
490 if (temp <= 0 || temp > 100) { 524 && *value) ? simple_strtoul(value, &value,
491 REISERFS_SB(s)->s_alloc_options.border = 10; 525 0) : 10;
492 } else { 526 if (temp <= 0 || temp > 100) {
493 REISERFS_SB(s)->s_alloc_options.border = 100 / temp; 527 REISERFS_SB(s)->s_alloc_options.border = 10;
494 } 528 } else {
495 continue; 529 REISERFS_SB(s)->s_alloc_options.border =
496 } 530 100 / temp;
497 if (!strcmp(this_char, "displacing_large_files")) { 531 }
498 SET_OPTION(displacing_large_files); 532 continue;
499 REISERFS_SB(s)->s_alloc_options.large_file_size = 533 }
500 (value && *value) ? simple_strtoul (value, &value, 0) : 16; 534 if (!strcmp(this_char, "displacing_large_files")) {
501 continue; 535 SET_OPTION(displacing_large_files);
502 } 536 REISERFS_SB(s)->s_alloc_options.large_file_size =
503 if (!strcmp(this_char, "displacing_new_packing_localities")) { 537 (value
504 SET_OPTION(displacing_new_packing_localities); 538 && *value) ? simple_strtoul(value, &value, 0) : 16;
505 continue; 539 continue;
506 }; 540 }
507 541 if (!strcmp(this_char, "displacing_new_packing_localities")) {
508 if (!strcmp(this_char, "old_hashed_relocation")) { 542 SET_OPTION(displacing_new_packing_localities);
509 SET_OPTION(old_hashed_relocation); 543 continue;
510 continue; 544 };
511 } 545
546 if (!strcmp(this_char, "old_hashed_relocation")) {
547 SET_OPTION(old_hashed_relocation);
548 continue;
549 }
512 550
513 if (!strcmp(this_char, "new_hashed_relocation")) { 551 if (!strcmp(this_char, "new_hashed_relocation")) {
514 SET_OPTION(new_hashed_relocation); 552 SET_OPTION(new_hashed_relocation);
515 continue; 553 continue;
516 } 554 }
517 555
518 if (!strcmp(this_char, "dirid_groups")) { 556 if (!strcmp(this_char, "dirid_groups")) {
519 SET_OPTION(dirid_groups); 557 SET_OPTION(dirid_groups);
520 continue; 558 continue;
521 } 559 }
522 if (!strcmp(this_char, "oid_groups")) { 560 if (!strcmp(this_char, "oid_groups")) {
523 SET_OPTION(oid_groups); 561 SET_OPTION(oid_groups);
524 continue; 562 continue;
525 } 563 }
526 if (!strcmp(this_char, "packing_groups")) { 564 if (!strcmp(this_char, "packing_groups")) {
527 SET_OPTION(packing_groups); 565 SET_OPTION(packing_groups);
528 continue; 566 continue;
529 } 567 }
530 if (!strcmp(this_char, "hashed_formatted_nodes")) { 568 if (!strcmp(this_char, "hashed_formatted_nodes")) {
531 SET_OPTION(hashed_formatted_nodes); 569 SET_OPTION(hashed_formatted_nodes);
532 continue; 570 continue;
533 } 571 }
534 572
535 if (!strcmp(this_char, "skip_busy")) { 573 if (!strcmp(this_char, "skip_busy")) {
536 SET_OPTION(skip_busy); 574 SET_OPTION(skip_busy);
537 continue; 575 continue;
538 } 576 }
539 577
540 if (!strcmp(this_char, "hundredth_slices")) { 578 if (!strcmp(this_char, "hundredth_slices")) {
541 SET_OPTION(hundredth_slices); 579 SET_OPTION(hundredth_slices);
542 continue; 580 continue;
543 } 581 }
544 582
545 if (!strcmp(this_char, "old_way")) { 583 if (!strcmp(this_char, "old_way")) {
546 SET_OPTION(old_way); 584 SET_OPTION(old_way);
547 continue; 585 continue;
548 } 586 }
549 587
550 if (!strcmp(this_char, "displace_based_on_dirid")) { 588 if (!strcmp(this_char, "displace_based_on_dirid")) {
551 SET_OPTION(displace_based_on_dirid); 589 SET_OPTION(displace_based_on_dirid);
552 continue; 590 continue;
553 } 591 }
554 592
555 if (!strcmp(this_char, "preallocmin")) { 593 if (!strcmp(this_char, "preallocmin")) {
556 REISERFS_SB(s)->s_alloc_options.preallocmin = 594 REISERFS_SB(s)->s_alloc_options.preallocmin =
557 (value && *value) ? simple_strtoul (value, &value, 0) : 4; 595 (value
558 continue; 596 && *value) ? simple_strtoul(value, &value, 0) : 4;
559 } 597 continue;
598 }
599
600 if (!strcmp(this_char, "preallocsize")) {
601 REISERFS_SB(s)->s_alloc_options.preallocsize =
602 (value
603 && *value) ? simple_strtoul(value, &value,
604 0) :
605 PREALLOCATION_SIZE;
606 continue;
607 }
560 608
561 if (!strcmp(this_char, "preallocsize")) { 609 reiserfs_warning(s, "zam-4001: %s : unknown option - %s",
562 REISERFS_SB(s)->s_alloc_options.preallocsize = 610 __FUNCTION__, this_char);
563 (value && *value) ? simple_strtoul (value, &value, 0) : PREALLOCATION_SIZE; 611 return 1;
564 continue;
565 } 612 }
566 613
567 reiserfs_warning (s, "zam-4001: %s : unknown option - %s", 614 reiserfs_warning(s, "allocator options = [%08x]\n", SB_ALLOC_OPTS(s));
568 __FUNCTION__ , this_char); 615 return 0;
569 return 1;
570 }
571
572 reiserfs_warning (s, "allocator options = [%08x]\n", SB_ALLOC_OPTS(s));
573 return 0;
574} 616}
575 617
576static inline void new_hashed_relocation (reiserfs_blocknr_hint_t * hint) 618static inline void new_hashed_relocation(reiserfs_blocknr_hint_t * hint)
577{ 619{
578 char * hash_in; 620 char *hash_in;
579 if (hint->formatted_node) { 621 if (hint->formatted_node) {
580 hash_in = (char*)&hint->key.k_dir_id; 622 hash_in = (char *)&hint->key.k_dir_id;
581 } else { 623 } else {
582 if (!hint->inode) { 624 if (!hint->inode) {
583 //hint->search_start = hint->beg; 625 //hint->search_start = hint->beg;
584 hash_in = (char*)&hint->key.k_dir_id; 626 hash_in = (char *)&hint->key.k_dir_id;
585 } else 627 } else
586 if ( TEST_OPTION(displace_based_on_dirid, hint->th->t_super)) 628 if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
587 hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id); 629 hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id);
588 else 630 else
589 hash_in = (char *)(&INODE_PKEY(hint->inode)->k_objectid); 631 hash_in =
590 } 632 (char *)(&INODE_PKEY(hint->inode)->k_objectid);
633 }
591 634
592 hint->search_start = hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg); 635 hint->search_start =
636 hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg);
593} 637}
594 638
595/* 639/*
596 * Relocation based on dirid, hashing them into a given bitmap block 640 * Relocation based on dirid, hashing them into a given bitmap block
597 * files. Formatted nodes are unaffected, a seperate policy covers them 641 * files. Formatted nodes are unaffected, a seperate policy covers them
598 */ 642 */
599static void 643static void dirid_groups(reiserfs_blocknr_hint_t * hint)
600dirid_groups (reiserfs_blocknr_hint_t *hint)
601{ 644{
602 unsigned long hash; 645 unsigned long hash;
603 __u32 dirid = 0; 646 __u32 dirid = 0;
604 int bm = 0; 647 int bm = 0;
605 struct super_block *sb = hint->th->t_super; 648 struct super_block *sb = hint->th->t_super;
606 if (hint->inode)
607 dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id);
608 else if (hint->formatted_node)
609 dirid = hint->key.k_dir_id;
610
611 if (dirid) {
612 bm = bmap_hash_id(sb, dirid);
613 hash = bm * (sb->s_blocksize << 3);
614 /* give a portion of the block group to metadata */
615 if (hint->inode) 649 if (hint->inode)
616 hash += sb->s_blocksize/2; 650 dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id);
617 hint->search_start = hash; 651 else if (hint->formatted_node)
618 } 652 dirid = hint->key.k_dir_id;
653
654 if (dirid) {
655 bm = bmap_hash_id(sb, dirid);
656 hash = bm * (sb->s_blocksize << 3);
657 /* give a portion of the block group to metadata */
658 if (hint->inode)
659 hash += sb->s_blocksize / 2;
660 hint->search_start = hash;
661 }
619} 662}
620 663
621/* 664/*
622 * Relocation based on oid, hashing them into a given bitmap block 665 * Relocation based on oid, hashing them into a given bitmap block
623 * files. Formatted nodes are unaffected, a seperate policy covers them 666 * files. Formatted nodes are unaffected, a seperate policy covers them
624 */ 667 */
625static void 668static void oid_groups(reiserfs_blocknr_hint_t * hint)
626oid_groups (reiserfs_blocknr_hint_t *hint)
627{ 669{
628 if (hint->inode) { 670 if (hint->inode) {
629 unsigned long hash; 671 unsigned long hash;
630 __u32 oid; 672 __u32 oid;
631 __u32 dirid; 673 __u32 dirid;
632 int bm; 674 int bm;
633 675
634 dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id); 676 dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id);
635 677
636 /* keep the root dir and it's first set of subdirs close to 678 /* keep the root dir and it's first set of subdirs close to
637 * the start of the disk 679 * the start of the disk
638 */ 680 */
639 if (dirid <= 2) 681 if (dirid <= 2)
640 hash = (hint->inode->i_sb->s_blocksize << 3); 682 hash = (hint->inode->i_sb->s_blocksize << 3);
641 else { 683 else {
642 oid = le32_to_cpu(INODE_PKEY(hint->inode)->k_objectid); 684 oid = le32_to_cpu(INODE_PKEY(hint->inode)->k_objectid);
643 bm = bmap_hash_id(hint->inode->i_sb, oid); 685 bm = bmap_hash_id(hint->inode->i_sb, oid);
644 hash = bm * (hint->inode->i_sb->s_blocksize << 3); 686 hash = bm * (hint->inode->i_sb->s_blocksize << 3);
687 }
688 hint->search_start = hash;
645 } 689 }
646 hint->search_start = hash;
647 }
648} 690}
649 691
650/* returns 1 if it finds an indirect item and gets valid hint info 692/* returns 1 if it finds an indirect item and gets valid hint info
651 * from it, otherwise 0 693 * from it, otherwise 0
652 */ 694 */
653static int get_left_neighbor(reiserfs_blocknr_hint_t *hint) 695static int get_left_neighbor(reiserfs_blocknr_hint_t * hint)
654{ 696{
655 struct path * path; 697 struct path *path;
656 struct buffer_head * bh; 698 struct buffer_head *bh;
657 struct item_head * ih; 699 struct item_head *ih;
658 int pos_in_item; 700 int pos_in_item;
659 __le32 * item; 701 __le32 *item;
660 int ret = 0; 702 int ret = 0;
661 703
662 if (!hint->path) /* reiserfs code can call this function w/o pointer to path 704 if (!hint->path) /* reiserfs code can call this function w/o pointer to path
663 * structure supplied; then we rely on supplied search_start */ 705 * structure supplied; then we rely on supplied search_start */
664 return 0; 706 return 0;
665 707
666 path = hint->path; 708 path = hint->path;
667 bh = get_last_bh(path); 709 bh = get_last_bh(path);
668 RFALSE( !bh, "green-4002: Illegal path specified to get_left_neighbor"); 710 RFALSE(!bh, "green-4002: Illegal path specified to get_left_neighbor");
669 ih = get_ih(path); 711 ih = get_ih(path);
670 pos_in_item = path->pos_in_item; 712 pos_in_item = path->pos_in_item;
671 item = get_item (path); 713 item = get_item(path);
672 714
673 hint->search_start = bh->b_blocknr; 715 hint->search_start = bh->b_blocknr;
674 716
675 if (!hint->formatted_node && is_indirect_le_ih (ih)) { 717 if (!hint->formatted_node && is_indirect_le_ih(ih)) {
676 /* for indirect item: go to left and look for the first non-hole entry 718 /* for indirect item: go to left and look for the first non-hole entry
677 in the indirect item */ 719 in the indirect item */
678 if (pos_in_item == I_UNFM_NUM (ih)) 720 if (pos_in_item == I_UNFM_NUM(ih))
679 pos_in_item--; 721 pos_in_item--;
680// pos_in_item = I_UNFM_NUM (ih) - 1; 722// pos_in_item = I_UNFM_NUM (ih) - 1;
681 while (pos_in_item >= 0) { 723 while (pos_in_item >= 0) {
682 int t=get_block_num(item,pos_in_item); 724 int t = get_block_num(item, pos_in_item);
683 if (t) { 725 if (t) {
684 hint->search_start = t; 726 hint->search_start = t;
685 ret = 1; 727 ret = 1;
686 break; 728 break;
687 } 729 }
688 pos_in_item --; 730 pos_in_item--;
731 }
689 } 732 }
690 }
691 733
692 /* does result value fit into specified region? */ 734 /* does result value fit into specified region? */
693 return ret; 735 return ret;
694} 736}
695 737
696/* should be, if formatted node, then try to put on first part of the device 738/* should be, if formatted node, then try to put on first part of the device
697 specified as number of percent with mount option device, else try to put 739 specified as number of percent with mount option device, else try to put
698 on last of device. This is not to say it is good code to do so, 740 on last of device. This is not to say it is good code to do so,
699 but the effect should be measured. */ 741 but the effect should be measured. */
700static inline void set_border_in_hint(struct super_block *s, reiserfs_blocknr_hint_t *hint) 742static inline void set_border_in_hint(struct super_block *s,
743 reiserfs_blocknr_hint_t * hint)
701{ 744{
702 b_blocknr_t border = SB_BLOCK_COUNT(s) / REISERFS_SB(s)->s_alloc_options.border; 745 b_blocknr_t border =
746 SB_BLOCK_COUNT(s) / REISERFS_SB(s)->s_alloc_options.border;
703 747
704 if (hint->formatted_node) 748 if (hint->formatted_node)
705 hint->end = border - 1; 749 hint->end = border - 1;
706 else 750 else
707 hint->beg = border; 751 hint->beg = border;
708} 752}
709 753
710static inline void displace_large_file(reiserfs_blocknr_hint_t *hint) 754static inline void displace_large_file(reiserfs_blocknr_hint_t * hint)
711{ 755{
712 if ( TEST_OPTION(displace_based_on_dirid, hint->th->t_super)) 756 if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
713 hint->search_start = hint->beg + keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_dir_id), 4) % (hint->end - hint->beg); 757 hint->search_start =
714 else 758 hint->beg +
715 hint->search_start = hint->beg + keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_objectid), 4) % (hint->end - hint->beg); 759 keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_dir_id),
760 4) % (hint->end - hint->beg);
761 else
762 hint->search_start =
763 hint->beg +
764 keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_objectid),
765 4) % (hint->end - hint->beg);
716} 766}
717 767
718static inline void hash_formatted_node(reiserfs_blocknr_hint_t *hint) 768static inline void hash_formatted_node(reiserfs_blocknr_hint_t * hint)
719{ 769{
720 char * hash_in; 770 char *hash_in;
721 771
722 if (!hint->inode) 772 if (!hint->inode)
723 hash_in = (char*)&hint->key.k_dir_id; 773 hash_in = (char *)&hint->key.k_dir_id;
724 else if ( TEST_OPTION(displace_based_on_dirid, hint->th->t_super)) 774 else if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
725 hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id); 775 hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id);
726 else 776 else
727 hash_in = (char *)(&INODE_PKEY(hint->inode)->k_objectid); 777 hash_in = (char *)(&INODE_PKEY(hint->inode)->k_objectid);
728 778
729 hint->search_start = hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg); 779 hint->search_start =
780 hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg);
730} 781}
731 782
732static inline int this_blocknr_allocation_would_make_it_a_large_file(reiserfs_blocknr_hint_t *hint) 783static inline int
784this_blocknr_allocation_would_make_it_a_large_file(reiserfs_blocknr_hint_t *
785 hint)
733{ 786{
734 return hint->block == REISERFS_SB(hint->th->t_super)->s_alloc_options.large_file_size; 787 return hint->block ==
788 REISERFS_SB(hint->th->t_super)->s_alloc_options.large_file_size;
735} 789}
736 790
737#ifdef DISPLACE_NEW_PACKING_LOCALITIES 791#ifdef DISPLACE_NEW_PACKING_LOCALITIES
738static inline void displace_new_packing_locality (reiserfs_blocknr_hint_t *hint) 792static inline void displace_new_packing_locality(reiserfs_blocknr_hint_t * hint)
739{ 793{
740 struct in_core_key * key = &hint->key; 794 struct in_core_key *key = &hint->key;
741 795
742 hint->th->displace_new_blocks = 0; 796 hint->th->displace_new_blocks = 0;
743 hint->search_start = hint->beg + keyed_hash((char*)(&key->k_objectid),4) % (hint->end - hint->beg); 797 hint->search_start =
798 hint->beg + keyed_hash((char *)(&key->k_objectid),
799 4) % (hint->end - hint->beg);
744} 800}
745 #endif 801#endif
746 802
747static inline int old_hashed_relocation (reiserfs_blocknr_hint_t * hint) 803static inline int old_hashed_relocation(reiserfs_blocknr_hint_t * hint)
748{ 804{
749 b_blocknr_t border; 805 b_blocknr_t border;
750 u32 hash_in; 806 u32 hash_in;
751
752 if (hint->formatted_node || hint->inode == NULL) {
753 return 0;
754 }
755 807
756 hash_in = le32_to_cpu((INODE_PKEY(hint->inode))->k_dir_id); 808 if (hint->formatted_node || hint->inode == NULL) {
757 border = hint->beg + (u32) keyed_hash(((char *) (&hash_in)), 4) % (hint->end - hint->beg - 1); 809 return 0;
758 if (border > hint->search_start) 810 }
759 hint->search_start = border;
760 811
761 return 1; 812 hash_in = le32_to_cpu((INODE_PKEY(hint->inode))->k_dir_id);
762 } 813 border =
763 814 hint->beg + (u32) keyed_hash(((char *)(&hash_in)),
764static inline int old_way (reiserfs_blocknr_hint_t * hint) 815 4) % (hint->end - hint->beg - 1);
765{ 816 if (border > hint->search_start)
766 b_blocknr_t border; 817 hint->search_start = border;
767
768 if (hint->formatted_node || hint->inode == NULL) {
769 return 0;
770 }
771
772 border = hint->beg + le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id) % (hint->end - hint->beg);
773 if (border > hint->search_start)
774 hint->search_start = border;
775 818
776 return 1; 819 return 1;
777} 820}
778 821
779static inline void hundredth_slices (reiserfs_blocknr_hint_t * hint) 822static inline int old_way(reiserfs_blocknr_hint_t * hint)
780{ 823{
781 struct in_core_key * key = &hint->key; 824 b_blocknr_t border;
782 b_blocknr_t slice_start; 825
826 if (hint->formatted_node || hint->inode == NULL) {
827 return 0;
828 }
829
830 border =
831 hint->beg +
832 le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id) % (hint->end -
833 hint->beg);
834 if (border > hint->search_start)
835 hint->search_start = border;
783 836
784 slice_start = (keyed_hash((char*)(&key->k_dir_id),4) % 100) * (hint->end / 100); 837 return 1;
785 if ( slice_start > hint->search_start || slice_start + (hint->end / 100) <= hint->search_start) { 838}
786 hint->search_start = slice_start; 839
787 } 840static inline void hundredth_slices(reiserfs_blocknr_hint_t * hint)
841{
842 struct in_core_key *key = &hint->key;
843 b_blocknr_t slice_start;
844
845 slice_start =
846 (keyed_hash((char *)(&key->k_dir_id), 4) % 100) * (hint->end / 100);
847 if (slice_start > hint->search_start
848 || slice_start + (hint->end / 100) <= hint->search_start) {
849 hint->search_start = slice_start;
850 }
788} 851}
789 852
790static void determine_search_start(reiserfs_blocknr_hint_t *hint, 853static void determine_search_start(reiserfs_blocknr_hint_t * hint,
791 int amount_needed) 854 int amount_needed)
792{ 855{
793 struct super_block *s = hint->th->t_super; 856 struct super_block *s = hint->th->t_super;
794 int unfm_hint; 857 int unfm_hint;
795 858
796 hint->beg = 0; 859 hint->beg = 0;
797 hint->end = SB_BLOCK_COUNT(s) - 1; 860 hint->end = SB_BLOCK_COUNT(s) - 1;
798 861
799 /* This is former border algorithm. Now with tunable border offset */ 862 /* This is former border algorithm. Now with tunable border offset */
800 if (concentrating_formatted_nodes(s)) 863 if (concentrating_formatted_nodes(s))
801 set_border_in_hint(s, hint); 864 set_border_in_hint(s, hint);
802 865
803#ifdef DISPLACE_NEW_PACKING_LOCALITIES 866#ifdef DISPLACE_NEW_PACKING_LOCALITIES
804 /* whenever we create a new directory, we displace it. At first we will 867 /* whenever we create a new directory, we displace it. At first we will
805 hash for location, later we might look for a moderately empty place for 868 hash for location, later we might look for a moderately empty place for
806 it */ 869 it */
807 if (displacing_new_packing_localities(s) 870 if (displacing_new_packing_localities(s)
808 && hint->th->displace_new_blocks) { 871 && hint->th->displace_new_blocks) {
809 displace_new_packing_locality(hint); 872 displace_new_packing_locality(hint);
810 873
811 /* we do not continue determine_search_start, 874 /* we do not continue determine_search_start,
812 * if new packing locality is being displaced */ 875 * if new packing locality is being displaced */
813 return; 876 return;
814 } 877 }
815#endif 878#endif
816
817 /* all persons should feel encouraged to add more special cases here and
818 * test them */
819 879
820 if (displacing_large_files(s) && !hint->formatted_node 880 /* all persons should feel encouraged to add more special cases here and
821 && this_blocknr_allocation_would_make_it_a_large_file(hint)) { 881 * test them */
822 displace_large_file(hint);
823 return;
824 }
825
826 /* if none of our special cases is relevant, use the left neighbor in the
827 tree order of the new node we are allocating for */
828 if (hint->formatted_node && TEST_OPTION(hashed_formatted_nodes,s)) {
829 hash_formatted_node(hint);
830 return;
831 }
832 882
833 unfm_hint = get_left_neighbor(hint); 883 if (displacing_large_files(s) && !hint->formatted_node
884 && this_blocknr_allocation_would_make_it_a_large_file(hint)) {
885 displace_large_file(hint);
886 return;
887 }
834 888
835 /* Mimic old block allocator behaviour, that is if VFS allowed for preallocation, 889 /* if none of our special cases is relevant, use the left neighbor in the
836 new blocks are displaced based on directory ID. Also, if suggested search_start 890 tree order of the new node we are allocating for */
837 is less than last preallocated block, we start searching from it, assuming that 891 if (hint->formatted_node && TEST_OPTION(hashed_formatted_nodes, s)) {
838 HDD dataflow is faster in forward direction */ 892 hash_formatted_node(hint);
839 if ( TEST_OPTION(old_way, s)) { 893 return;
840 if (!hint->formatted_node) { 894 }
841 if ( !reiserfs_hashed_relocation(s))
842 old_way(hint);
843 else if (!reiserfs_no_unhashed_relocation(s))
844 old_hashed_relocation(hint);
845 895
846 if ( hint->inode && hint->search_start < REISERFS_I(hint->inode)->i_prealloc_block) 896 unfm_hint = get_left_neighbor(hint);
847 hint->search_start = REISERFS_I(hint->inode)->i_prealloc_block; 897
898 /* Mimic old block allocator behaviour, that is if VFS allowed for preallocation,
899 new blocks are displaced based on directory ID. Also, if suggested search_start
900 is less than last preallocated block, we start searching from it, assuming that
901 HDD dataflow is faster in forward direction */
902 if (TEST_OPTION(old_way, s)) {
903 if (!hint->formatted_node) {
904 if (!reiserfs_hashed_relocation(s))
905 old_way(hint);
906 else if (!reiserfs_no_unhashed_relocation(s))
907 old_hashed_relocation(hint);
908
909 if (hint->inode
910 && hint->search_start <
911 REISERFS_I(hint->inode)->i_prealloc_block)
912 hint->search_start =
913 REISERFS_I(hint->inode)->i_prealloc_block;
914 }
915 return;
848 } 916 }
849 return;
850 }
851 917
852 /* This is an approach proposed by Hans */ 918 /* This is an approach proposed by Hans */
853 if ( TEST_OPTION(hundredth_slices, s) && ! (displacing_large_files(s) && !hint->formatted_node)) { 919 if (TEST_OPTION(hundredth_slices, s)
854 hundredth_slices(hint); 920 && !(displacing_large_files(s) && !hint->formatted_node)) {
855 return; 921 hundredth_slices(hint);
856 } 922 return;
857 923 }
858 /* old_hashed_relocation only works on unformatted */
859 if (!unfm_hint && !hint->formatted_node &&
860 TEST_OPTION(old_hashed_relocation, s))
861 {
862 old_hashed_relocation(hint);
863 }
864 /* new_hashed_relocation works with both formatted/unformatted nodes */
865 if ((!unfm_hint || hint->formatted_node) &&
866 TEST_OPTION(new_hashed_relocation, s))
867 {
868 new_hashed_relocation(hint);
869 }
870 /* dirid grouping works only on unformatted nodes */
871 if (!unfm_hint && !hint->formatted_node && TEST_OPTION(dirid_groups,s))
872 {
873 dirid_groups(hint);
874 }
875 924
925 /* old_hashed_relocation only works on unformatted */
926 if (!unfm_hint && !hint->formatted_node &&
927 TEST_OPTION(old_hashed_relocation, s)) {
928 old_hashed_relocation(hint);
929 }
930 /* new_hashed_relocation works with both formatted/unformatted nodes */
931 if ((!unfm_hint || hint->formatted_node) &&
932 TEST_OPTION(new_hashed_relocation, s)) {
933 new_hashed_relocation(hint);
934 }
935 /* dirid grouping works only on unformatted nodes */
936 if (!unfm_hint && !hint->formatted_node && TEST_OPTION(dirid_groups, s)) {
937 dirid_groups(hint);
938 }
876#ifdef DISPLACE_NEW_PACKING_LOCALITIES 939#ifdef DISPLACE_NEW_PACKING_LOCALITIES
877 if (hint->formatted_node && TEST_OPTION(dirid_groups,s)) 940 if (hint->formatted_node && TEST_OPTION(dirid_groups, s)) {
878 { 941 dirid_groups(hint);
879 dirid_groups(hint); 942 }
880 }
881#endif 943#endif
882 944
883 /* oid grouping works only on unformatted nodes */ 945 /* oid grouping works only on unformatted nodes */
884 if (!unfm_hint && !hint->formatted_node && TEST_OPTION(oid_groups,s)) 946 if (!unfm_hint && !hint->formatted_node && TEST_OPTION(oid_groups, s)) {
885 { 947 oid_groups(hint);
886 oid_groups(hint); 948 }
887 } 949 return;
888 return;
889} 950}
890 951
891static int determine_prealloc_size(reiserfs_blocknr_hint_t * hint) 952static int determine_prealloc_size(reiserfs_blocknr_hint_t * hint)
892{ 953{
893 /* make minimum size a mount option and benchmark both ways */ 954 /* make minimum size a mount option and benchmark both ways */
894 /* we preallocate blocks only for regular files, specific size */ 955 /* we preallocate blocks only for regular files, specific size */
895 /* benchmark preallocating always and see what happens */ 956 /* benchmark preallocating always and see what happens */
896 957
897 hint->prealloc_size = 0; 958 hint->prealloc_size = 0;
898 959
899 if (!hint->formatted_node && hint->preallocate) { 960 if (!hint->formatted_node && hint->preallocate) {
900 if (S_ISREG(hint->inode->i_mode) 961 if (S_ISREG(hint->inode->i_mode)
901 && hint->inode->i_size >= REISERFS_SB(hint->th->t_super)->s_alloc_options.preallocmin * hint->inode->i_sb->s_blocksize) 962 && hint->inode->i_size >=
902 hint->prealloc_size = REISERFS_SB(hint->th->t_super)->s_alloc_options.preallocsize - 1; 963 REISERFS_SB(hint->th->t_super)->s_alloc_options.
903 } 964 preallocmin * hint->inode->i_sb->s_blocksize)
904 return CARRY_ON; 965 hint->prealloc_size =
966 REISERFS_SB(hint->th->t_super)->s_alloc_options.
967 preallocsize - 1;
968 }
969 return CARRY_ON;
905} 970}
906 971
907/* XXX I know it could be merged with upper-level function; 972/* XXX I know it could be merged with upper-level function;
908 but may be result function would be too complex. */ 973 but may be result function would be too complex. */
909static inline int allocate_without_wrapping_disk (reiserfs_blocknr_hint_t * hint, 974static inline int allocate_without_wrapping_disk(reiserfs_blocknr_hint_t * hint,
910 b_blocknr_t * new_blocknrs, 975 b_blocknr_t * new_blocknrs,
911 b_blocknr_t start, b_blocknr_t finish, 976 b_blocknr_t start,
912 int min, 977 b_blocknr_t finish, int min,
913 int amount_needed, int prealloc_size) 978 int amount_needed,
979 int prealloc_size)
914{ 980{
915 int rest = amount_needed; 981 int rest = amount_needed;
916 int nr_allocated; 982 int nr_allocated;
917 983
918 while (rest > 0 && start <= finish) { 984 while (rest > 0 && start <= finish) {
919 nr_allocated = scan_bitmap (hint->th, &start, finish, min, 985 nr_allocated = scan_bitmap(hint->th, &start, finish, min,
920 rest + prealloc_size, !hint->formatted_node, 986 rest + prealloc_size,
921 hint->block); 987 !hint->formatted_node, hint->block);
922 988
923 if (nr_allocated == 0) /* no new blocks allocated, return */ 989 if (nr_allocated == 0) /* no new blocks allocated, return */
924 break; 990 break;
925 991
926 /* fill free_blocknrs array first */ 992 /* fill free_blocknrs array first */
927 while (rest > 0 && nr_allocated > 0) { 993 while (rest > 0 && nr_allocated > 0) {
928 * new_blocknrs ++ = start ++; 994 *new_blocknrs++ = start++;
929 rest --; nr_allocated --; 995 rest--;
930 } 996 nr_allocated--;
997 }
931 998
932 /* do we have something to fill prealloc. array also ? */ 999 /* do we have something to fill prealloc. array also ? */
933 if (nr_allocated > 0) { 1000 if (nr_allocated > 0) {
934 /* it means prealloc_size was greater that 0 and we do preallocation */ 1001 /* it means prealloc_size was greater that 0 and we do preallocation */
935 list_add(&REISERFS_I(hint->inode)->i_prealloc_list, 1002 list_add(&REISERFS_I(hint->inode)->i_prealloc_list,
936 &SB_JOURNAL(hint->th->t_super)->j_prealloc_list); 1003 &SB_JOURNAL(hint->th->t_super)->
937 REISERFS_I(hint->inode)->i_prealloc_block = start; 1004 j_prealloc_list);
938 REISERFS_I(hint->inode)->i_prealloc_count = nr_allocated; 1005 REISERFS_I(hint->inode)->i_prealloc_block = start;
939 break; 1006 REISERFS_I(hint->inode)->i_prealloc_count =
1007 nr_allocated;
1008 break;
1009 }
940 } 1010 }
941 }
942 1011
943 return (amount_needed - rest); 1012 return (amount_needed - rest);
944} 1013}
945 1014
946static inline int blocknrs_and_prealloc_arrays_from_search_start 1015static inline int blocknrs_and_prealloc_arrays_from_search_start
947 (reiserfs_blocknr_hint_t *hint, b_blocknr_t *new_blocknrs, int amount_needed) 1016 (reiserfs_blocknr_hint_t * hint, b_blocknr_t * new_blocknrs,
948{ 1017 int amount_needed) {
949 struct super_block *s = hint->th->t_super; 1018 struct super_block *s = hint->th->t_super;
950 b_blocknr_t start = hint->search_start; 1019 b_blocknr_t start = hint->search_start;
951 b_blocknr_t finish = SB_BLOCK_COUNT(s) - 1; 1020 b_blocknr_t finish = SB_BLOCK_COUNT(s) - 1;
952 int passno = 0; 1021 int passno = 0;
953 int nr_allocated = 0; 1022 int nr_allocated = 0;
954 int bigalloc = 0; 1023 int bigalloc = 0;
955 1024
956 determine_prealloc_size(hint); 1025 determine_prealloc_size(hint);
957 if (!hint->formatted_node) { 1026 if (!hint->formatted_node) {
958 int quota_ret; 1027 int quota_ret;
959#ifdef REISERQUOTA_DEBUG 1028#ifdef REISERQUOTA_DEBUG
960 reiserfs_debug (s, REISERFS_DEBUG_CODE, "reiserquota: allocating %d blocks id=%u", amount_needed, hint->inode->i_uid); 1029 reiserfs_debug(s, REISERFS_DEBUG_CODE,
1030 "reiserquota: allocating %d blocks id=%u",
1031 amount_needed, hint->inode->i_uid);
961#endif 1032#endif
962 quota_ret = DQUOT_ALLOC_BLOCK_NODIRTY(hint->inode, amount_needed); 1033 quota_ret =
963 if (quota_ret) /* Quota exceeded? */ 1034 DQUOT_ALLOC_BLOCK_NODIRTY(hint->inode, amount_needed);
964 return QUOTA_EXCEEDED; 1035 if (quota_ret) /* Quota exceeded? */
965 if (hint->preallocate && hint->prealloc_size ) { 1036 return QUOTA_EXCEEDED;
1037 if (hint->preallocate && hint->prealloc_size) {
966#ifdef REISERQUOTA_DEBUG 1038#ifdef REISERQUOTA_DEBUG
967 reiserfs_debug (s, REISERFS_DEBUG_CODE, "reiserquota: allocating (prealloc) %d blocks id=%u", hint->prealloc_size, hint->inode->i_uid); 1039 reiserfs_debug(s, REISERFS_DEBUG_CODE,
1040 "reiserquota: allocating (prealloc) %d blocks id=%u",
1041 hint->prealloc_size, hint->inode->i_uid);
968#endif 1042#endif
969 quota_ret = DQUOT_PREALLOC_BLOCK_NODIRTY(hint->inode, hint->prealloc_size); 1043 quota_ret =
970 if (quota_ret) 1044 DQUOT_PREALLOC_BLOCK_NODIRTY(hint->inode,
971 hint->preallocate=hint->prealloc_size=0; 1045 hint->prealloc_size);
1046 if (quota_ret)
1047 hint->preallocate = hint->prealloc_size = 0;
1048 }
1049 /* for unformatted nodes, force large allocations */
1050 bigalloc = amount_needed;
972 } 1051 }
973 /* for unformatted nodes, force large allocations */
974 bigalloc = amount_needed;
975 }
976 1052
977 do { 1053 do {
978 /* in bigalloc mode, nr_allocated should stay zero until 1054 /* in bigalloc mode, nr_allocated should stay zero until
979 * the entire allocation is filled 1055 * the entire allocation is filled
980 */ 1056 */
981 if (unlikely(bigalloc && nr_allocated)) { 1057 if (unlikely(bigalloc && nr_allocated)) {
982 reiserfs_warning(s, "bigalloc is %d, nr_allocated %d\n", 1058 reiserfs_warning(s, "bigalloc is %d, nr_allocated %d\n",
983 bigalloc, nr_allocated); 1059 bigalloc, nr_allocated);
984 /* reset things to a sane value */ 1060 /* reset things to a sane value */
985 bigalloc = amount_needed - nr_allocated; 1061 bigalloc = amount_needed - nr_allocated;
986 } 1062 }
987 /* 1063 /*
988 * try pass 0 and pass 1 looking for a nice big 1064 * try pass 0 and pass 1 looking for a nice big
989 * contiguous allocation. Then reset and look 1065 * contiguous allocation. Then reset and look
990 * for anything you can find. 1066 * for anything you can find.
991 */ 1067 */
992 if (passno == 2 && bigalloc) { 1068 if (passno == 2 && bigalloc) {
993 passno = 0; 1069 passno = 0;
994 bigalloc = 0; 1070 bigalloc = 0;
995 } 1071 }
996 switch (passno++) { 1072 switch (passno++) {
997 case 0: /* Search from hint->search_start to end of disk */ 1073 case 0: /* Search from hint->search_start to end of disk */
998 start = hint->search_start; 1074 start = hint->search_start;
999 finish = SB_BLOCK_COUNT(s) - 1; 1075 finish = SB_BLOCK_COUNT(s) - 1;
1000 break; 1076 break;
1001 case 1: /* Search from hint->beg to hint->search_start */ 1077 case 1: /* Search from hint->beg to hint->search_start */
1002 start = hint->beg; 1078 start = hint->beg;
1003 finish = hint->search_start; 1079 finish = hint->search_start;
1004 break; 1080 break;
1005 case 2: /* Last chance: Search from 0 to hint->beg */ 1081 case 2: /* Last chance: Search from 0 to hint->beg */
1006 start = 0; 1082 start = 0;
1007 finish = hint->beg; 1083 finish = hint->beg;
1008 break; 1084 break;
1009 default: /* We've tried searching everywhere, not enough space */ 1085 default: /* We've tried searching everywhere, not enough space */
1010 /* Free the blocks */ 1086 /* Free the blocks */
1011 if (!hint->formatted_node) { 1087 if (!hint->formatted_node) {
1012#ifdef REISERQUOTA_DEBUG 1088#ifdef REISERQUOTA_DEBUG
1013 reiserfs_debug (s, REISERFS_DEBUG_CODE, "reiserquota: freeing (nospace) %d blocks id=%u", amount_needed + hint->prealloc_size - nr_allocated, hint->inode->i_uid); 1089 reiserfs_debug(s, REISERFS_DEBUG_CODE,
1090 "reiserquota: freeing (nospace) %d blocks id=%u",
1091 amount_needed +
1092 hint->prealloc_size -
1093 nr_allocated,
1094 hint->inode->i_uid);
1014#endif 1095#endif
1015 DQUOT_FREE_BLOCK_NODIRTY(hint->inode, amount_needed + hint->prealloc_size - nr_allocated); /* Free not allocated blocks */ 1096 DQUOT_FREE_BLOCK_NODIRTY(hint->inode, amount_needed + hint->prealloc_size - nr_allocated); /* Free not allocated blocks */
1016 } 1097 }
1017 while (nr_allocated --) 1098 while (nr_allocated--)
1018 reiserfs_free_block(hint->th, hint->inode, new_blocknrs[nr_allocated], !hint->formatted_node); 1099 reiserfs_free_block(hint->th, hint->inode,
1019 1100 new_blocknrs[nr_allocated],
1020 return NO_DISK_SPACE; 1101 !hint->formatted_node);
1021 } 1102
1022 } while ((nr_allocated += allocate_without_wrapping_disk (hint, 1103 return NO_DISK_SPACE;
1023 new_blocknrs + nr_allocated, start, finish, 1104 }
1024 bigalloc ? bigalloc : 1, 1105 } while ((nr_allocated += allocate_without_wrapping_disk(hint,
1025 amount_needed - nr_allocated, 1106 new_blocknrs +
1026 hint->prealloc_size)) 1107 nr_allocated,
1027 < amount_needed); 1108 start, finish,
1028 if ( !hint->formatted_node && 1109 bigalloc ?
1029 amount_needed + hint->prealloc_size > 1110 bigalloc : 1,
1030 nr_allocated + REISERFS_I(hint->inode)->i_prealloc_count) { 1111 amount_needed -
1031 /* Some of preallocation blocks were not allocated */ 1112 nr_allocated,
1113 hint->
1114 prealloc_size))
1115 < amount_needed);
1116 if (!hint->formatted_node &&
1117 amount_needed + hint->prealloc_size >
1118 nr_allocated + REISERFS_I(hint->inode)->i_prealloc_count) {
1119 /* Some of preallocation blocks were not allocated */
1032#ifdef REISERQUOTA_DEBUG 1120#ifdef REISERQUOTA_DEBUG
1033 reiserfs_debug (s, REISERFS_DEBUG_CODE, "reiserquota: freeing (failed prealloc) %d blocks id=%u", amount_needed + hint->prealloc_size - nr_allocated - REISERFS_I(hint->inode)->i_prealloc_count, hint->inode->i_uid); 1121 reiserfs_debug(s, REISERFS_DEBUG_CODE,
1122 "reiserquota: freeing (failed prealloc) %d blocks id=%u",
1123 amount_needed + hint->prealloc_size -
1124 nr_allocated -
1125 REISERFS_I(hint->inode)->i_prealloc_count,
1126 hint->inode->i_uid);
1034#endif 1127#endif
1035 DQUOT_FREE_BLOCK_NODIRTY(hint->inode, amount_needed + 1128 DQUOT_FREE_BLOCK_NODIRTY(hint->inode, amount_needed +
1036 hint->prealloc_size - nr_allocated - 1129 hint->prealloc_size - nr_allocated -
1037 REISERFS_I(hint->inode)->i_prealloc_count); 1130 REISERFS_I(hint->inode)->
1038 } 1131 i_prealloc_count);
1132 }
1039 1133
1040 return CARRY_ON; 1134 return CARRY_ON;
1041} 1135}
1042 1136
1043/* grab new blocknrs from preallocated list */ 1137/* grab new blocknrs from preallocated list */
1044/* return amount still needed after using them */ 1138/* return amount still needed after using them */
1045static int use_preallocated_list_if_available (reiserfs_blocknr_hint_t *hint, 1139static int use_preallocated_list_if_available(reiserfs_blocknr_hint_t * hint,
1046 b_blocknr_t *new_blocknrs, int amount_needed) 1140 b_blocknr_t * new_blocknrs,
1141 int amount_needed)
1047{ 1142{
1048 struct inode * inode = hint->inode; 1143 struct inode *inode = hint->inode;
1049 1144
1050 if (REISERFS_I(inode)->i_prealloc_count > 0) { 1145 if (REISERFS_I(inode)->i_prealloc_count > 0) {
1051 while (amount_needed) { 1146 while (amount_needed) {
1052 1147
1053 *new_blocknrs ++ = REISERFS_I(inode)->i_prealloc_block ++; 1148 *new_blocknrs++ = REISERFS_I(inode)->i_prealloc_block++;
1054 REISERFS_I(inode)->i_prealloc_count --; 1149 REISERFS_I(inode)->i_prealloc_count--;
1055 1150
1056 amount_needed --; 1151 amount_needed--;
1057 1152
1058 if (REISERFS_I(inode)->i_prealloc_count <= 0) { 1153 if (REISERFS_I(inode)->i_prealloc_count <= 0) {
1059 list_del(&REISERFS_I(inode)->i_prealloc_list); 1154 list_del(&REISERFS_I(inode)->i_prealloc_list);
1060 break; 1155 break;
1061 } 1156 }
1157 }
1062 } 1158 }
1063 } 1159 /* return amount still needed after using preallocated blocks */
1064 /* return amount still needed after using preallocated blocks */ 1160 return amount_needed;
1065 return amount_needed;
1066} 1161}
1067 1162
1068int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t *hint, 1163int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t * hint, b_blocknr_t * new_blocknrs, int amount_needed, int reserved_by_us /* Amount of blocks we have
1069 b_blocknr_t * new_blocknrs, int amount_needed, 1164 already reserved */ )
1070 int reserved_by_us /* Amount of blocks we have
1071 already reserved */)
1072{ 1165{
1073 int initial_amount_needed = amount_needed; 1166 int initial_amount_needed = amount_needed;
1074 int ret; 1167 int ret;
1075 struct super_block *s = hint->th->t_super; 1168 struct super_block *s = hint->th->t_super;
1076 1169
1077 /* Check if there is enough space, taking into account reserved space */ 1170 /* Check if there is enough space, taking into account reserved space */
1078 if ( SB_FREE_BLOCKS(s) - REISERFS_SB(s)->reserved_blocks < 1171 if (SB_FREE_BLOCKS(s) - REISERFS_SB(s)->reserved_blocks <
1079 amount_needed - reserved_by_us) 1172 amount_needed - reserved_by_us)
1080 return NO_DISK_SPACE; 1173 return NO_DISK_SPACE;
1081 /* should this be if !hint->inode && hint->preallocate? */ 1174 /* should this be if !hint->inode && hint->preallocate? */
1082 /* do you mean hint->formatted_node can be removed ? - Zam */ 1175 /* do you mean hint->formatted_node can be removed ? - Zam */
1083 /* hint->formatted_node cannot be removed because we try to access 1176 /* hint->formatted_node cannot be removed because we try to access
1084 inode information here, and there is often no inode assotiated with 1177 inode information here, and there is often no inode assotiated with
1085 metadata allocations - green */ 1178 metadata allocations - green */
1086 1179
1087 if (!hint->formatted_node && hint->preallocate) { 1180 if (!hint->formatted_node && hint->preallocate) {
1088 amount_needed = use_preallocated_list_if_available 1181 amount_needed = use_preallocated_list_if_available
1182 (hint, new_blocknrs, amount_needed);
1183 if (amount_needed == 0) /* all blocknrs we need we got from
1184 prealloc. list */
1185 return CARRY_ON;
1186 new_blocknrs += (initial_amount_needed - amount_needed);
1187 }
1188
1189 /* find search start and save it in hint structure */
1190 determine_search_start(hint, amount_needed);
1191 if (hint->search_start >= SB_BLOCK_COUNT(s))
1192 hint->search_start = SB_BLOCK_COUNT(s) - 1;
1193
1194 /* allocation itself; fill new_blocknrs and preallocation arrays */
1195 ret = blocknrs_and_prealloc_arrays_from_search_start
1089 (hint, new_blocknrs, amount_needed); 1196 (hint, new_blocknrs, amount_needed);
1090 if (amount_needed == 0) /* all blocknrs we need we got from 1197
1091 prealloc. list */ 1198 /* we used prealloc. list to fill (partially) new_blocknrs array. If final allocation fails we
1092 return CARRY_ON; 1199 * need to return blocks back to prealloc. list or just free them. -- Zam (I chose second
1093 new_blocknrs += (initial_amount_needed - amount_needed); 1200 * variant) */
1094 } 1201
1095 1202 if (ret != CARRY_ON) {
1096 /* find search start and save it in hint structure */ 1203 while (amount_needed++ < initial_amount_needed) {
1097 determine_search_start(hint, amount_needed); 1204 reiserfs_free_block(hint->th, hint->inode,
1098 if (hint->search_start >= SB_BLOCK_COUNT(s)) 1205 *(--new_blocknrs), 1);
1099 hint->search_start = SB_BLOCK_COUNT(s) - 1; 1206 }
1100
1101 /* allocation itself; fill new_blocknrs and preallocation arrays */
1102 ret = blocknrs_and_prealloc_arrays_from_search_start
1103 (hint, new_blocknrs, amount_needed);
1104
1105 /* we used prealloc. list to fill (partially) new_blocknrs array. If final allocation fails we
1106 * need to return blocks back to prealloc. list or just free them. -- Zam (I chose second
1107 * variant) */
1108
1109 if (ret != CARRY_ON) {
1110 while (amount_needed ++ < initial_amount_needed) {
1111 reiserfs_free_block(hint->th, hint->inode, *(--new_blocknrs), 1);
1112 } 1207 }
1113 } 1208 return ret;
1114 return ret;
1115} 1209}
1116 1210
1117/* These 2 functions are here to provide blocks reservation to the rest of kernel */ 1211/* These 2 functions are here to provide blocks reservation to the rest of kernel */
1118/* Reserve @blocks amount of blocks in fs pointed by @sb. Caller must make sure 1212/* Reserve @blocks amount of blocks in fs pointed by @sb. Caller must make sure
1119 there are actually this much blocks on the FS available */ 1213 there are actually this much blocks on the FS available */
1120void reiserfs_claim_blocks_to_be_allocated( 1214void reiserfs_claim_blocks_to_be_allocated(struct super_block *sb, /* super block of
1121 struct super_block *sb, /* super block of 1215 filesystem where
1122 filesystem where 1216 blocks should be
1123 blocks should be 1217 reserved */
1124 reserved */ 1218 int blocks /* How much to reserve */
1125 int blocks /* How much to reserve */ 1219 )
1126 )
1127{ 1220{
1128 1221
1129 /* Fast case, if reservation is zero - exit immediately. */ 1222 /* Fast case, if reservation is zero - exit immediately. */
1130 if ( !blocks ) 1223 if (!blocks)
1131 return; 1224 return;
1132 1225
1133 spin_lock(&REISERFS_SB(sb)->bitmap_lock); 1226 spin_lock(&REISERFS_SB(sb)->bitmap_lock);
1134 REISERFS_SB(sb)->reserved_blocks += blocks; 1227 REISERFS_SB(sb)->reserved_blocks += blocks;
1135 spin_unlock(&REISERFS_SB(sb)->bitmap_lock); 1228 spin_unlock(&REISERFS_SB(sb)->bitmap_lock);
1136} 1229}
1137 1230
1138/* Unreserve @blocks amount of blocks in fs pointed by @sb */ 1231/* Unreserve @blocks amount of blocks in fs pointed by @sb */
1139void reiserfs_release_claimed_blocks( 1232void reiserfs_release_claimed_blocks(struct super_block *sb, /* super block of
1140 struct super_block *sb, /* super block of 1233 filesystem where
1141 filesystem where 1234 blocks should be
1142 blocks should be 1235 reserved */
1143 reserved */ 1236 int blocks /* How much to unreserve */
1144 int blocks /* How much to unreserve */ 1237 )
1145 )
1146{ 1238{
1147 1239
1148 /* Fast case, if unreservation is zero - exit immediately. */ 1240 /* Fast case, if unreservation is zero - exit immediately. */
1149 if ( !blocks ) 1241 if (!blocks)
1150 return; 1242 return;
1151 1243
1152 spin_lock(&REISERFS_SB(sb)->bitmap_lock); 1244 spin_lock(&REISERFS_SB(sb)->bitmap_lock);
1153 REISERFS_SB(sb)->reserved_blocks -= blocks; 1245 REISERFS_SB(sb)->reserved_blocks -= blocks;
1154 spin_unlock(&REISERFS_SB(sb)->bitmap_lock); 1246 spin_unlock(&REISERFS_SB(sb)->bitmap_lock);
1155 RFALSE( REISERFS_SB(sb)->reserved_blocks < 0, "amount of blocks reserved became zero?"); 1247 RFALSE(REISERFS_SB(sb)->reserved_blocks < 0,
1248 "amount of blocks reserved became zero?");
1156} 1249}
1157 1250
1158/* This function estimates how much pages we will be able to write to FS 1251/* This function estimates how much pages we will be able to write to FS
1159 used for reiserfs_file_write() purposes for now. */ 1252 used for reiserfs_file_write() purposes for now. */
1160int reiserfs_can_fit_pages ( struct super_block *sb /* superblock of filesystem 1253int reiserfs_can_fit_pages(struct super_block *sb /* superblock of filesystem
1161 to estimate space */ ) 1254 to estimate space */ )
1162{ 1255{
1163 int space; 1256 int space;
1164 1257
1165 spin_lock(&REISERFS_SB(sb)->bitmap_lock); 1258 spin_lock(&REISERFS_SB(sb)->bitmap_lock);
1166 space = (SB_FREE_BLOCKS(sb) - REISERFS_SB(sb)->reserved_blocks) >> ( PAGE_CACHE_SHIFT - sb->s_blocksize_bits); 1259 space =
1260 (SB_FREE_BLOCKS(sb) -
1261 REISERFS_SB(sb)->reserved_blocks) >> (PAGE_CACHE_SHIFT -
1262 sb->s_blocksize_bits);
1167 spin_unlock(&REISERFS_SB(sb)->bitmap_lock); 1263 spin_unlock(&REISERFS_SB(sb)->bitmap_lock);
1168 1264
1169 return space>0?space:0; 1265 return space > 0 ? space : 0;
1170} 1266}
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index fbde4b01a325..9dd71e807034 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -12,264 +12,286 @@
12#include <linux/buffer_head.h> 12#include <linux/buffer_head.h>
13#include <asm/uaccess.h> 13#include <asm/uaccess.h>
14 14
15extern struct reiserfs_key MIN_KEY; 15extern struct reiserfs_key MIN_KEY;
16 16
17static int reiserfs_readdir (struct file *, void *, filldir_t); 17static int reiserfs_readdir(struct file *, void *, filldir_t);
18static int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry, int datasync) ; 18static int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry,
19 int datasync);
19 20
20struct file_operations reiserfs_dir_operations = { 21struct file_operations reiserfs_dir_operations = {
21 .read = generic_read_dir, 22 .read = generic_read_dir,
22 .readdir = reiserfs_readdir, 23 .readdir = reiserfs_readdir,
23 .fsync = reiserfs_dir_fsync, 24 .fsync = reiserfs_dir_fsync,
24 .ioctl = reiserfs_ioctl, 25 .ioctl = reiserfs_ioctl,
25}; 26};
26 27
27static int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry, int datasync) { 28static int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry,
28 struct inode *inode = dentry->d_inode; 29 int datasync)
29 int err; 30{
30 reiserfs_write_lock(inode->i_sb); 31 struct inode *inode = dentry->d_inode;
31 err = reiserfs_commit_for_inode(inode) ; 32 int err;
32 reiserfs_write_unlock(inode->i_sb) ; 33 reiserfs_write_lock(inode->i_sb);
33 if (err < 0) 34 err = reiserfs_commit_for_inode(inode);
34 return err; 35 reiserfs_write_unlock(inode->i_sb);
35 return 0; 36 if (err < 0)
37 return err;
38 return 0;
36} 39}
37 40
38
39#define store_ih(where,what) copy_item_head (where, what) 41#define store_ih(where,what) copy_item_head (where, what)
40 42
41// 43//
42static int reiserfs_readdir (struct file * filp, void * dirent, filldir_t filldir) 44static int reiserfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
43{ 45{
44 struct inode *inode = filp->f_dentry->d_inode; 46 struct inode *inode = filp->f_dentry->d_inode;
45 struct cpu_key pos_key; /* key of current position in the directory (key of directory entry) */ 47 struct cpu_key pos_key; /* key of current position in the directory (key of directory entry) */
46 INITIALIZE_PATH (path_to_entry); 48 INITIALIZE_PATH(path_to_entry);
47 struct buffer_head * bh; 49 struct buffer_head *bh;
48 int item_num, entry_num; 50 int item_num, entry_num;
49 const struct reiserfs_key * rkey; 51 const struct reiserfs_key *rkey;
50 struct item_head * ih, tmp_ih; 52 struct item_head *ih, tmp_ih;
51 int search_res; 53 int search_res;
52 char * local_buf; 54 char *local_buf;
53 loff_t next_pos; 55 loff_t next_pos;
54 char small_buf[32] ; /* avoid kmalloc if we can */ 56 char small_buf[32]; /* avoid kmalloc if we can */
55 struct reiserfs_dir_entry de; 57 struct reiserfs_dir_entry de;
56 int ret = 0; 58 int ret = 0;
57 59
58 reiserfs_write_lock(inode->i_sb); 60 reiserfs_write_lock(inode->i_sb);
59 61
60 reiserfs_check_lock_depth(inode->i_sb, "readdir") ; 62 reiserfs_check_lock_depth(inode->i_sb, "readdir");
61 63
62 /* form key for search the next directory entry using f_pos field of 64 /* form key for search the next directory entry using f_pos field of
63 file structure */ 65 file structure */
64 make_cpu_key (&pos_key, inode, (filp->f_pos) ? (filp->f_pos) : DOT_OFFSET, 66 make_cpu_key(&pos_key, inode,
65 TYPE_DIRENTRY, 3); 67 (filp->f_pos) ? (filp->f_pos) : DOT_OFFSET, TYPE_DIRENTRY,
66 next_pos = cpu_key_k_offset (&pos_key); 68 3);
67 69 next_pos = cpu_key_k_offset(&pos_key);
68 /* reiserfs_warning (inode->i_sb, "reiserfs_readdir 1: f_pos = %Ld", filp->f_pos);*/ 70
69 71 /* reiserfs_warning (inode->i_sb, "reiserfs_readdir 1: f_pos = %Ld", filp->f_pos); */
70 path_to_entry.reada = PATH_READA; 72
71 while (1) { 73 path_to_entry.reada = PATH_READA;
72 research: 74 while (1) {
73 /* search the directory item, containing entry with specified key */ 75 research:
74 search_res = search_by_entry_key (inode->i_sb, &pos_key, &path_to_entry, &de); 76 /* search the directory item, containing entry with specified key */
75 if (search_res == IO_ERROR) { 77 search_res =
76 // FIXME: we could just skip part of directory which could 78 search_by_entry_key(inode->i_sb, &pos_key, &path_to_entry,
77 // not be read 79 &de);
78 ret = -EIO; 80 if (search_res == IO_ERROR) {
79 goto out; 81 // FIXME: we could just skip part of directory which could
80 } 82 // not be read
81 entry_num = de.de_entry_num; 83 ret = -EIO;
82 bh = de.de_bh;
83 item_num = de.de_item_num;
84 ih = de.de_ih;
85 store_ih (&tmp_ih, ih);
86
87 /* we must have found item, that is item of this directory, */
88 RFALSE( COMP_SHORT_KEYS (&(ih->ih_key), &pos_key),
89 "vs-9000: found item %h does not match to dir we readdir %K",
90 ih, &pos_key);
91 RFALSE( item_num > B_NR_ITEMS (bh) - 1,
92 "vs-9005 item_num == %d, item amount == %d",
93 item_num, B_NR_ITEMS (bh));
94
95 /* and entry must be not more than number of entries in the item */
96 RFALSE( I_ENTRY_COUNT (ih) < entry_num,
97 "vs-9010: entry number is too big %d (%d)",
98 entry_num, I_ENTRY_COUNT (ih));
99
100 if (search_res == POSITION_FOUND || entry_num < I_ENTRY_COUNT (ih)) {
101 /* go through all entries in the directory item beginning from the entry, that has been found */
102 struct reiserfs_de_head * deh = B_I_DEH (bh, ih) + entry_num;
103
104 for (; entry_num < I_ENTRY_COUNT (ih); entry_num ++, deh ++) {
105 int d_reclen;
106 char * d_name;
107 off_t d_off;
108 ino_t d_ino;
109
110 if (!de_visible (deh))
111 /* it is hidden entry */
112 continue;
113 d_reclen = entry_length (bh, ih, entry_num);
114 d_name = B_I_DEH_ENTRY_FILE_NAME (bh, ih, deh);
115 if (!d_name[d_reclen - 1])
116 d_reclen = strlen (d_name);
117
118 if (d_reclen > REISERFS_MAX_NAME(inode->i_sb->s_blocksize)){
119 /* too big to send back to VFS */
120 continue ;
121 }
122
123 /* Ignore the .reiserfs_priv entry */
124 if (reiserfs_xattrs (inode->i_sb) &&
125 !old_format_only(inode->i_sb) &&
126 filp->f_dentry == inode->i_sb->s_root &&
127 REISERFS_SB(inode->i_sb)->priv_root &&
128 REISERFS_SB(inode->i_sb)->priv_root->d_inode &&
129 deh_objectid(deh) == le32_to_cpu (INODE_PKEY(REISERFS_SB(inode->i_sb)->priv_root->d_inode)->k_objectid)) {
130 continue;
131 }
132
133 d_off = deh_offset (deh);
134 filp->f_pos = d_off ;
135 d_ino = deh_objectid (deh);
136 if (d_reclen <= 32) {
137 local_buf = small_buf ;
138 } else {
139 local_buf = reiserfs_kmalloc(d_reclen, GFP_NOFS, inode->i_sb) ;
140 if (!local_buf) {
141 pathrelse (&path_to_entry);
142 ret = -ENOMEM ;
143 goto out; 84 goto out;
144 }
145 if (item_moved (&tmp_ih, &path_to_entry)) {
146 reiserfs_kfree(local_buf, d_reclen, inode->i_sb) ;
147 goto research;
148 }
149 }
150 // Note, that we copy name to user space via temporary
151 // buffer (local_buf) because filldir will block if
152 // user space buffer is swapped out. At that time
153 // entry can move to somewhere else
154 memcpy (local_buf, d_name, d_reclen);
155 if (filldir (dirent, local_buf, d_reclen, d_off, d_ino,
156 DT_UNKNOWN) < 0) {
157 if (local_buf != small_buf) {
158 reiserfs_kfree(local_buf, d_reclen, inode->i_sb) ;
159 }
160 goto end;
161 } 85 }
162 if (local_buf != small_buf) { 86 entry_num = de.de_entry_num;
163 reiserfs_kfree(local_buf, d_reclen, inode->i_sb) ; 87 bh = de.de_bh;
88 item_num = de.de_item_num;
89 ih = de.de_ih;
90 store_ih(&tmp_ih, ih);
91
92 /* we must have found item, that is item of this directory, */
93 RFALSE(COMP_SHORT_KEYS(&(ih->ih_key), &pos_key),
94 "vs-9000: found item %h does not match to dir we readdir %K",
95 ih, &pos_key);
96 RFALSE(item_num > B_NR_ITEMS(bh) - 1,
97 "vs-9005 item_num == %d, item amount == %d",
98 item_num, B_NR_ITEMS(bh));
99
100 /* and entry must be not more than number of entries in the item */
101 RFALSE(I_ENTRY_COUNT(ih) < entry_num,
102 "vs-9010: entry number is too big %d (%d)",
103 entry_num, I_ENTRY_COUNT(ih));
104
105 if (search_res == POSITION_FOUND
106 || entry_num < I_ENTRY_COUNT(ih)) {
107 /* go through all entries in the directory item beginning from the entry, that has been found */
108 struct reiserfs_de_head *deh =
109 B_I_DEH(bh, ih) + entry_num;
110
111 for (; entry_num < I_ENTRY_COUNT(ih);
112 entry_num++, deh++) {
113 int d_reclen;
114 char *d_name;
115 off_t d_off;
116 ino_t d_ino;
117
118 if (!de_visible(deh))
119 /* it is hidden entry */
120 continue;
121 d_reclen = entry_length(bh, ih, entry_num);
122 d_name = B_I_DEH_ENTRY_FILE_NAME(bh, ih, deh);
123 if (!d_name[d_reclen - 1])
124 d_reclen = strlen(d_name);
125
126 if (d_reclen >
127 REISERFS_MAX_NAME(inode->i_sb->
128 s_blocksize)) {
129 /* too big to send back to VFS */
130 continue;
131 }
132
133 /* Ignore the .reiserfs_priv entry */
134 if (reiserfs_xattrs(inode->i_sb) &&
135 !old_format_only(inode->i_sb) &&
136 filp->f_dentry == inode->i_sb->s_root &&
137 REISERFS_SB(inode->i_sb)->priv_root &&
138 REISERFS_SB(inode->i_sb)->priv_root->d_inode
139 && deh_objectid(deh) ==
140 le32_to_cpu(INODE_PKEY
141 (REISERFS_SB(inode->i_sb)->
142 priv_root->d_inode)->
143 k_objectid)) {
144 continue;
145 }
146
147 d_off = deh_offset(deh);
148 filp->f_pos = d_off;
149 d_ino = deh_objectid(deh);
150 if (d_reclen <= 32) {
151 local_buf = small_buf;
152 } else {
153 local_buf =
154 reiserfs_kmalloc(d_reclen, GFP_NOFS,
155 inode->i_sb);
156 if (!local_buf) {
157 pathrelse(&path_to_entry);
158 ret = -ENOMEM;
159 goto out;
160 }
161 if (item_moved(&tmp_ih, &path_to_entry)) {
162 reiserfs_kfree(local_buf,
163 d_reclen,
164 inode->i_sb);
165 goto research;
166 }
167 }
168 // Note, that we copy name to user space via temporary
169 // buffer (local_buf) because filldir will block if
170 // user space buffer is swapped out. At that time
171 // entry can move to somewhere else
172 memcpy(local_buf, d_name, d_reclen);
173 if (filldir
174 (dirent, local_buf, d_reclen, d_off, d_ino,
175 DT_UNKNOWN) < 0) {
176 if (local_buf != small_buf) {
177 reiserfs_kfree(local_buf,
178 d_reclen,
179 inode->i_sb);
180 }
181 goto end;
182 }
183 if (local_buf != small_buf) {
184 reiserfs_kfree(local_buf, d_reclen,
185 inode->i_sb);
186 }
187 // next entry should be looked for with such offset
188 next_pos = deh_offset(deh) + 1;
189
190 if (item_moved(&tmp_ih, &path_to_entry)) {
191 goto research;
192 }
193 } /* for */
164 } 194 }
165 195
166 // next entry should be looked for with such offset 196 if (item_num != B_NR_ITEMS(bh) - 1)
167 next_pos = deh_offset (deh) + 1; 197 // end of directory has been reached
198 goto end;
199
200 /* item we went through is last item of node. Using right
201 delimiting key check is it directory end */
202 rkey = get_rkey(&path_to_entry, inode->i_sb);
203 if (!comp_le_keys(rkey, &MIN_KEY)) {
204 /* set pos_key to key, that is the smallest and greater
205 that key of the last entry in the item */
206 set_cpu_key_k_offset(&pos_key, next_pos);
207 continue;
208 }
168 209
169 if (item_moved (&tmp_ih, &path_to_entry)) { 210 if (COMP_SHORT_KEYS(rkey, &pos_key)) {
170 goto research; 211 // end of directory has been reached
212 goto end;
171 } 213 }
172 } /* for */ 214
173 } 215 /* directory continues in the right neighboring block */
174 216 set_cpu_key_k_offset(&pos_key,
175 if (item_num != B_NR_ITEMS (bh) - 1) 217 le_key_k_offset(KEY_FORMAT_3_5, rkey));
176 // end of directory has been reached 218
177 goto end; 219 } /* while */
178 220
179 /* item we went through is last item of node. Using right 221 end:
180 delimiting key check is it directory end */ 222 filp->f_pos = next_pos;
181 rkey = get_rkey (&path_to_entry, inode->i_sb); 223 pathrelse(&path_to_entry);
182 if (! comp_le_keys (rkey, &MIN_KEY)) { 224 reiserfs_check_path(&path_to_entry);
183 /* set pos_key to key, that is the smallest and greater 225 out:
184 that key of the last entry in the item */ 226 reiserfs_write_unlock(inode->i_sb);
185 set_cpu_key_k_offset (&pos_key, next_pos); 227 return ret;
186 continue;
187 }
188
189 if ( COMP_SHORT_KEYS (rkey, &pos_key)) {
190 // end of directory has been reached
191 goto end;
192 }
193
194 /* directory continues in the right neighboring block */
195 set_cpu_key_k_offset (&pos_key, le_key_k_offset (KEY_FORMAT_3_5, rkey));
196
197 } /* while */
198
199
200 end:
201 filp->f_pos = next_pos;
202 pathrelse (&path_to_entry);
203 reiserfs_check_path(&path_to_entry) ;
204 out:
205 reiserfs_write_unlock(inode->i_sb);
206 return ret;
207} 228}
208 229
209/* compose directory item containing "." and ".." entries (entries are 230/* compose directory item containing "." and ".." entries (entries are
210 not aligned to 4 byte boundary) */ 231 not aligned to 4 byte boundary) */
211/* the last four params are LE */ 232/* the last four params are LE */
212void make_empty_dir_item_v1 (char * body, __le32 dirid, __le32 objid, 233void make_empty_dir_item_v1(char *body, __le32 dirid, __le32 objid,
213 __le32 par_dirid, __le32 par_objid) 234 __le32 par_dirid, __le32 par_objid)
214{ 235{
215 struct reiserfs_de_head * deh; 236 struct reiserfs_de_head *deh;
216 237
217 memset (body, 0, EMPTY_DIR_SIZE_V1); 238 memset(body, 0, EMPTY_DIR_SIZE_V1);
218 deh = (struct reiserfs_de_head *)body; 239 deh = (struct reiserfs_de_head *)body;
219 240
220 /* direntry header of "." */ 241 /* direntry header of "." */
221 put_deh_offset( &(deh[0]), DOT_OFFSET ); 242 put_deh_offset(&(deh[0]), DOT_OFFSET);
222 /* these two are from make_le_item_head, and are are LE */ 243 /* these two are from make_le_item_head, and are are LE */
223 deh[0].deh_dir_id = dirid; 244 deh[0].deh_dir_id = dirid;
224 deh[0].deh_objectid = objid; 245 deh[0].deh_objectid = objid;
225 deh[0].deh_state = 0; /* Endian safe if 0 */ 246 deh[0].deh_state = 0; /* Endian safe if 0 */
226 put_deh_location( &(deh[0]), EMPTY_DIR_SIZE_V1 - strlen( "." )); 247 put_deh_location(&(deh[0]), EMPTY_DIR_SIZE_V1 - strlen("."));
227 mark_de_visible(&(deh[0])); 248 mark_de_visible(&(deh[0]));
228 249
229 /* direntry header of ".." */ 250 /* direntry header of ".." */
230 put_deh_offset( &(deh[1]), DOT_DOT_OFFSET); 251 put_deh_offset(&(deh[1]), DOT_DOT_OFFSET);
231 /* key of ".." for the root directory */ 252 /* key of ".." for the root directory */
232 /* these two are from the inode, and are are LE */ 253 /* these two are from the inode, and are are LE */
233 deh[1].deh_dir_id = par_dirid; 254 deh[1].deh_dir_id = par_dirid;
234 deh[1].deh_objectid = par_objid; 255 deh[1].deh_objectid = par_objid;
235 deh[1].deh_state = 0; /* Endian safe if 0 */ 256 deh[1].deh_state = 0; /* Endian safe if 0 */
236 put_deh_location( &(deh[1]), deh_location( &(deh[0]) ) - strlen( ".." ) ); 257 put_deh_location(&(deh[1]), deh_location(&(deh[0])) - strlen(".."));
237 mark_de_visible(&(deh[1])); 258 mark_de_visible(&(deh[1]));
238 259
239 /* copy ".." and "." */ 260 /* copy ".." and "." */
240 memcpy (body + deh_location( &(deh[0]) ), ".", 1); 261 memcpy(body + deh_location(&(deh[0])), ".", 1);
241 memcpy (body + deh_location( &(deh[1]) ), "..", 2); 262 memcpy(body + deh_location(&(deh[1])), "..", 2);
242} 263}
243 264
244/* compose directory item containing "." and ".." entries */ 265/* compose directory item containing "." and ".." entries */
245void make_empty_dir_item (char * body, __le32 dirid, __le32 objid, 266void make_empty_dir_item(char *body, __le32 dirid, __le32 objid,
246 __le32 par_dirid, __le32 par_objid) 267 __le32 par_dirid, __le32 par_objid)
247{ 268{
248 struct reiserfs_de_head * deh; 269 struct reiserfs_de_head *deh;
249 270
250 memset (body, 0, EMPTY_DIR_SIZE); 271 memset(body, 0, EMPTY_DIR_SIZE);
251 deh = (struct reiserfs_de_head *)body; 272 deh = (struct reiserfs_de_head *)body;
252 273
253 /* direntry header of "." */ 274 /* direntry header of "." */
254 put_deh_offset( &(deh[0]), DOT_OFFSET ); 275 put_deh_offset(&(deh[0]), DOT_OFFSET);
255 /* these two are from make_le_item_head, and are are LE */ 276 /* these two are from make_le_item_head, and are are LE */
256 deh[0].deh_dir_id = dirid; 277 deh[0].deh_dir_id = dirid;
257 deh[0].deh_objectid = objid; 278 deh[0].deh_objectid = objid;
258 deh[0].deh_state = 0; /* Endian safe if 0 */ 279 deh[0].deh_state = 0; /* Endian safe if 0 */
259 put_deh_location( &(deh[0]), EMPTY_DIR_SIZE - ROUND_UP( strlen( "." ) ) ); 280 put_deh_location(&(deh[0]), EMPTY_DIR_SIZE - ROUND_UP(strlen(".")));
260 mark_de_visible(&(deh[0])); 281 mark_de_visible(&(deh[0]));
261 282
262 /* direntry header of ".." */ 283 /* direntry header of ".." */
263 put_deh_offset( &(deh[1]), DOT_DOT_OFFSET ); 284 put_deh_offset(&(deh[1]), DOT_DOT_OFFSET);
264 /* key of ".." for the root directory */ 285 /* key of ".." for the root directory */
265 /* these two are from the inode, and are are LE */ 286 /* these two are from the inode, and are are LE */
266 deh[1].deh_dir_id = par_dirid; 287 deh[1].deh_dir_id = par_dirid;
267 deh[1].deh_objectid = par_objid; 288 deh[1].deh_objectid = par_objid;
268 deh[1].deh_state = 0; /* Endian safe if 0 */ 289 deh[1].deh_state = 0; /* Endian safe if 0 */
269 put_deh_location( &(deh[1]), deh_location( &(deh[0])) - ROUND_UP( strlen( ".." ) ) ); 290 put_deh_location(&(deh[1]),
270 mark_de_visible(&(deh[1])); 291 deh_location(&(deh[0])) - ROUND_UP(strlen("..")));
271 292 mark_de_visible(&(deh[1]));
272 /* copy ".." and "." */ 293
273 memcpy (body + deh_location( &(deh[0]) ), ".", 1); 294 /* copy ".." and "." */
274 memcpy (body + deh_location( &(deh[1]) ), "..", 2); 295 memcpy(body + deh_location(&(deh[0])), ".", 1);
296 memcpy(body + deh_location(&(deh[1])), "..", 2);
275} 297}
diff --git a/fs/reiserfs/do_balan.c b/fs/reiserfs/do_balan.c
index 2118db2896c7..b2264ba3cc56 100644
--- a/fs/reiserfs/do_balan.c
+++ b/fs/reiserfs/do_balan.c
@@ -8,7 +8,6 @@
8/* balance the tree according to the analysis made before, */ 8/* balance the tree according to the analysis made before, */
9/* and using buffers obtained after all above. */ 9/* and using buffers obtained after all above. */
10 10
11
12/** 11/**
13 ** balance_leaf_when_delete 12 ** balance_leaf_when_delete
14 ** balance_leaf 13 ** balance_leaf
@@ -24,23 +23,22 @@
24 23
25#ifdef CONFIG_REISERFS_CHECK 24#ifdef CONFIG_REISERFS_CHECK
26 25
27struct tree_balance * cur_tb = NULL; /* detects whether more than one 26struct tree_balance *cur_tb = NULL; /* detects whether more than one
28 copy of tb exists as a means 27 copy of tb exists as a means
29 of checking whether schedule 28 of checking whether schedule
30 is interrupting do_balance */ 29 is interrupting do_balance */
31#endif 30#endif
32 31
33inline void do_balance_mark_leaf_dirty (struct tree_balance * tb, 32inline void do_balance_mark_leaf_dirty(struct tree_balance *tb,
34 struct buffer_head * bh, int flag) 33 struct buffer_head *bh, int flag)
35{ 34{
36 journal_mark_dirty(tb->transaction_handle, 35 journal_mark_dirty(tb->transaction_handle,
37 tb->transaction_handle->t_super, bh) ; 36 tb->transaction_handle->t_super, bh);
38} 37}
39 38
40#define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty 39#define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty
41#define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty 40#define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty
42 41
43
44/* summary: 42/* summary:
45 if deleting something ( tb->insert_size[0] < 0 ) 43 if deleting something ( tb->insert_size[0] < 0 )
46 return(balance_leaf_when_delete()); (flag d handled here) 44 return(balance_leaf_when_delete()); (flag d handled here)
@@ -64,8 +62,6 @@ be performed by do_balance.
64 62
65-Hans */ 63-Hans */
66 64
67
68
69/* Balance leaf node in case of delete or cut: insert_size[0] < 0 65/* Balance leaf node in case of delete or cut: insert_size[0] < 0
70 * 66 *
71 * lnum, rnum can have values >= -1 67 * lnum, rnum can have values >= -1
@@ -73,1384 +69,1933 @@ be performed by do_balance.
73 * 0 means that nothing should be done with the neighbor 69 * 0 means that nothing should be done with the neighbor
74 * >0 means to shift entirely or partly the specified number of items to the neighbor 70 * >0 means to shift entirely or partly the specified number of items to the neighbor
75 */ 71 */
76static int balance_leaf_when_delete (struct tree_balance * tb, int flag) 72static int balance_leaf_when_delete(struct tree_balance *tb, int flag)
77{ 73{
78 struct buffer_head * tbS0 = PATH_PLAST_BUFFER (tb->tb_path); 74 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
79 int item_pos = PATH_LAST_POSITION (tb->tb_path); 75 int item_pos = PATH_LAST_POSITION(tb->tb_path);
80 int pos_in_item = tb->tb_path->pos_in_item; 76 int pos_in_item = tb->tb_path->pos_in_item;
81 struct buffer_info bi; 77 struct buffer_info bi;
82 int n; 78 int n;
83 struct item_head * ih; 79 struct item_head *ih;
84 80
85 RFALSE( tb->FR[0] && B_LEVEL (tb->FR[0]) != DISK_LEAF_NODE_LEVEL + 1, 81 RFALSE(tb->FR[0] && B_LEVEL(tb->FR[0]) != DISK_LEAF_NODE_LEVEL + 1,
86 "vs- 12000: level: wrong FR %z", tb->FR[0]); 82 "vs- 12000: level: wrong FR %z", tb->FR[0]);
87 RFALSE( tb->blknum[0] > 1, 83 RFALSE(tb->blknum[0] > 1,
88 "PAP-12005: tb->blknum == %d, can not be > 1", tb->blknum[0]); 84 "PAP-12005: tb->blknum == %d, can not be > 1", tb->blknum[0]);
89 RFALSE( ! tb->blknum[0] && ! PATH_H_PPARENT(tb->tb_path, 0), 85 RFALSE(!tb->blknum[0] && !PATH_H_PPARENT(tb->tb_path, 0),
90 "PAP-12010: tree can not be empty"); 86 "PAP-12010: tree can not be empty");
91 87
92 ih = B_N_PITEM_HEAD (tbS0, item_pos); 88 ih = B_N_PITEM_HEAD(tbS0, item_pos);
93 89
94 /* Delete or truncate the item */ 90 /* Delete or truncate the item */
95 91
96 switch (flag) { 92 switch (flag) {
97 case M_DELETE: /* delete item in S[0] */ 93 case M_DELETE: /* delete item in S[0] */
94
95 RFALSE(ih_item_len(ih) + IH_SIZE != -tb->insert_size[0],
96 "vs-12013: mode Delete, insert size %d, ih to be deleted %h",
97 -tb->insert_size[0], ih);
98
99 bi.tb = tb;
100 bi.bi_bh = tbS0;
101 bi.bi_parent = PATH_H_PPARENT(tb->tb_path, 0);
102 bi.bi_position = PATH_H_POSITION(tb->tb_path, 1);
103 leaf_delete_items(&bi, 0, item_pos, 1, -1);
104
105 if (!item_pos && tb->CFL[0]) {
106 if (B_NR_ITEMS(tbS0)) {
107 replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0,
108 0);
109 } else {
110 if (!PATH_H_POSITION(tb->tb_path, 1))
111 replace_key(tb, tb->CFL[0], tb->lkey[0],
112 PATH_H_PPARENT(tb->tb_path,
113 0), 0);
114 }
115 }
98 116
99 RFALSE( ih_item_len(ih) + IH_SIZE != -tb->insert_size[0], 117 RFALSE(!item_pos && !tb->CFL[0],
100 "vs-12013: mode Delete, insert size %d, ih to be deleted %h", 118 "PAP-12020: tb->CFL[0]==%p, tb->L[0]==%p", tb->CFL[0],
101 -tb->insert_size [0], ih); 119 tb->L[0]);
102 120
103 bi.tb = tb; 121 break;
104 bi.bi_bh = tbS0; 122
105 bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0); 123 case M_CUT:{ /* cut item in S[0] */
106 bi.bi_position = PATH_H_POSITION (tb->tb_path, 1); 124 bi.tb = tb;
107 leaf_delete_items (&bi, 0, item_pos, 1, -1); 125 bi.bi_bh = tbS0;
108 126 bi.bi_parent = PATH_H_PPARENT(tb->tb_path, 0);
109 if ( ! item_pos && tb->CFL[0] ) { 127 bi.bi_position = PATH_H_POSITION(tb->tb_path, 1);
110 if ( B_NR_ITEMS(tbS0) ) { 128 if (is_direntry_le_ih(ih)) {
111 replace_key(tb, tb->CFL[0],tb->lkey[0],tbS0,0); 129
112 } 130 /* UFS unlink semantics are such that you can only delete one directory entry at a time. */
113 else { 131 /* when we cut a directory tb->insert_size[0] means number of entries to be cut (always 1) */
114 if ( ! PATH_H_POSITION (tb->tb_path, 1) ) 132 tb->insert_size[0] = -1;
115 replace_key(tb, tb->CFL[0],tb->lkey[0],PATH_H_PPARENT(tb->tb_path, 0),0); 133 leaf_cut_from_buffer(&bi, item_pos, pos_in_item,
116 } 134 -tb->insert_size[0]);
117 } 135
118 136 RFALSE(!item_pos && !pos_in_item && !tb->CFL[0],
119 RFALSE( ! item_pos && !tb->CFL[0], 137 "PAP-12030: can not change delimiting key. CFL[0]=%p",
120 "PAP-12020: tb->CFL[0]==%p, tb->L[0]==%p", tb->CFL[0], tb->L[0]); 138 tb->CFL[0]);
121 139
122 break; 140 if (!item_pos && !pos_in_item && tb->CFL[0]) {
123 141 replace_key(tb, tb->CFL[0], tb->lkey[0],
124 case M_CUT: { /* cut item in S[0] */ 142 tbS0, 0);
125 bi.tb = tb; 143 }
126 bi.bi_bh = tbS0; 144 } else {
127 bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0); 145 leaf_cut_from_buffer(&bi, item_pos, pos_in_item,
128 bi.bi_position = PATH_H_POSITION (tb->tb_path, 1); 146 -tb->insert_size[0]);
129 if (is_direntry_le_ih (ih)) { 147
130 148 RFALSE(!ih_item_len(ih),
131 /* UFS unlink semantics are such that you can only delete one directory entry at a time. */ 149 "PAP-12035: cut must leave non-zero dynamic length of item");
132 /* when we cut a directory tb->insert_size[0] means number of entries to be cut (always 1) */ 150 }
133 tb->insert_size[0] = -1; 151 break;
134 leaf_cut_from_buffer (&bi, item_pos, pos_in_item, -tb->insert_size[0]);
135
136 RFALSE( ! item_pos && ! pos_in_item && ! tb->CFL[0],
137 "PAP-12030: can not change delimiting key. CFL[0]=%p",
138 tb->CFL[0]);
139
140 if ( ! item_pos && ! pos_in_item && tb->CFL[0] ) {
141 replace_key(tb, tb->CFL[0],tb->lkey[0],tbS0,0);
142 }
143 } else {
144 leaf_cut_from_buffer (&bi, item_pos, pos_in_item, -tb->insert_size[0]);
145
146 RFALSE( ! ih_item_len(ih),
147 "PAP-12035: cut must leave non-zero dynamic length of item");
148 }
149 break;
150 }
151
152 default:
153 print_cur_tb ("12040");
154 reiserfs_panic (tb->tb_sb, "PAP-12040: balance_leaf_when_delete: unexpectable mode: %s(%d)",
155 (flag == M_PASTE) ? "PASTE" : ((flag == M_INSERT) ? "INSERT" : "UNKNOWN"), flag);
156 }
157
158 /* the rule is that no shifting occurs unless by shifting a node can be freed */
159 n = B_NR_ITEMS(tbS0);
160 if ( tb->lnum[0] ) /* L[0] takes part in balancing */
161 {
162 if ( tb->lnum[0] == -1 ) /* L[0] must be joined with S[0] */
163 {
164 if ( tb->rnum[0] == -1 ) /* R[0] must be also joined with S[0] */
165 {
166 if ( tb->FR[0] == PATH_H_PPARENT(tb->tb_path, 0) )
167 {
168 /* all contents of all the 3 buffers will be in L[0] */
169 if ( PATH_H_POSITION (tb->tb_path, 1) == 0 && 1 < B_NR_ITEMS(tb->FR[0]) )
170 replace_key(tb, tb->CFL[0],tb->lkey[0],tb->FR[0],1);
171
172 leaf_move_items (LEAF_FROM_S_TO_L, tb, n, -1, NULL);
173 leaf_move_items (LEAF_FROM_R_TO_L, tb, B_NR_ITEMS(tb->R[0]), -1, NULL);
174
175 reiserfs_invalidate_buffer (tb, tbS0);
176 reiserfs_invalidate_buffer (tb, tb->R[0]);
177
178 return 0;
179 } 152 }
180 /* all contents of all the 3 buffers will be in R[0] */
181 leaf_move_items (LEAF_FROM_S_TO_R, tb, n, -1, NULL);
182 leaf_move_items (LEAF_FROM_L_TO_R, tb, B_NR_ITEMS(tb->L[0]), -1, NULL);
183 153
184 /* right_delimiting_key is correct in R[0] */ 154 default:
185 replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0); 155 print_cur_tb("12040");
156 reiserfs_panic(tb->tb_sb,
157 "PAP-12040: balance_leaf_when_delete: unexpectable mode: %s(%d)",
158 (flag ==
159 M_PASTE) ? "PASTE" : ((flag ==
160 M_INSERT) ? "INSERT" :
161 "UNKNOWN"), flag);
162 }
186 163
187 reiserfs_invalidate_buffer (tb, tbS0); 164 /* the rule is that no shifting occurs unless by shifting a node can be freed */
188 reiserfs_invalidate_buffer (tb, tb->L[0]); 165 n = B_NR_ITEMS(tbS0);
166 if (tb->lnum[0]) { /* L[0] takes part in balancing */
167 if (tb->lnum[0] == -1) { /* L[0] must be joined with S[0] */
168 if (tb->rnum[0] == -1) { /* R[0] must be also joined with S[0] */
169 if (tb->FR[0] == PATH_H_PPARENT(tb->tb_path, 0)) {
170 /* all contents of all the 3 buffers will be in L[0] */
171 if (PATH_H_POSITION(tb->tb_path, 1) == 0
172 && 1 < B_NR_ITEMS(tb->FR[0]))
173 replace_key(tb, tb->CFL[0],
174 tb->lkey[0],
175 tb->FR[0], 1);
176
177 leaf_move_items(LEAF_FROM_S_TO_L, tb, n,
178 -1, NULL);
179 leaf_move_items(LEAF_FROM_R_TO_L, tb,
180 B_NR_ITEMS(tb->R[0]),
181 -1, NULL);
182
183 reiserfs_invalidate_buffer(tb, tbS0);
184 reiserfs_invalidate_buffer(tb,
185 tb->R[0]);
186
187 return 0;
188 }
189 /* all contents of all the 3 buffers will be in R[0] */
190 leaf_move_items(LEAF_FROM_S_TO_R, tb, n, -1,
191 NULL);
192 leaf_move_items(LEAF_FROM_L_TO_R, tb,
193 B_NR_ITEMS(tb->L[0]), -1, NULL);
194
195 /* right_delimiting_key is correct in R[0] */
196 replace_key(tb, tb->CFR[0], tb->rkey[0],
197 tb->R[0], 0);
189 198
190 return -1; 199 reiserfs_invalidate_buffer(tb, tbS0);
191 } 200 reiserfs_invalidate_buffer(tb, tb->L[0]);
192 201
193 RFALSE( tb->rnum[0] != 0, 202 return -1;
194 "PAP-12045: rnum must be 0 (%d)", tb->rnum[0]); 203 }
195 /* all contents of L[0] and S[0] will be in L[0] */
196 leaf_shift_left(tb, n, -1);
197 204
198 reiserfs_invalidate_buffer (tb, tbS0); 205 RFALSE(tb->rnum[0] != 0,
206 "PAP-12045: rnum must be 0 (%d)", tb->rnum[0]);
207 /* all contents of L[0] and S[0] will be in L[0] */
208 leaf_shift_left(tb, n, -1);
199 209
200 return 0; 210 reiserfs_invalidate_buffer(tb, tbS0);
211
212 return 0;
213 }
214 /* a part of contents of S[0] will be in L[0] and the rest part of S[0] will be in R[0] */
215
216 RFALSE((tb->lnum[0] + tb->rnum[0] < n) ||
217 (tb->lnum[0] + tb->rnum[0] > n + 1),
218 "PAP-12050: rnum(%d) and lnum(%d) and item number(%d) in S[0] are not consistent",
219 tb->rnum[0], tb->lnum[0], n);
220 RFALSE((tb->lnum[0] + tb->rnum[0] == n) &&
221 (tb->lbytes != -1 || tb->rbytes != -1),
222 "PAP-12055: bad rbytes (%d)/lbytes (%d) parameters when items are not split",
223 tb->rbytes, tb->lbytes);
224 RFALSE((tb->lnum[0] + tb->rnum[0] == n + 1) &&
225 (tb->lbytes < 1 || tb->rbytes != -1),
226 "PAP-12060: bad rbytes (%d)/lbytes (%d) parameters when items are split",
227 tb->rbytes, tb->lbytes);
228
229 leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
230 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
231
232 reiserfs_invalidate_buffer(tb, tbS0);
233
234 return 0;
201 } 235 }
202 /* a part of contents of S[0] will be in L[0] and the rest part of S[0] will be in R[0] */
203
204 RFALSE( ( tb->lnum[0] + tb->rnum[0] < n ) ||
205 ( tb->lnum[0] + tb->rnum[0] > n+1 ),
206 "PAP-12050: rnum(%d) and lnum(%d) and item number(%d) in S[0] are not consistent",
207 tb->rnum[0], tb->lnum[0], n);
208 RFALSE( ( tb->lnum[0] + tb->rnum[0] == n ) &&
209 (tb->lbytes != -1 || tb->rbytes != -1),
210 "PAP-12055: bad rbytes (%d)/lbytes (%d) parameters when items are not split",
211 tb->rbytes, tb->lbytes);
212 RFALSE( ( tb->lnum[0] + tb->rnum[0] == n + 1 ) &&
213 (tb->lbytes < 1 || tb->rbytes != -1),
214 "PAP-12060: bad rbytes (%d)/lbytes (%d) parameters when items are split",
215 tb->rbytes, tb->lbytes);
216
217 leaf_shift_left (tb, tb->lnum[0], tb->lbytes);
218 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
219
220 reiserfs_invalidate_buffer (tb, tbS0);
221 236
222 return 0; 237 if (tb->rnum[0] == -1) {
223 } 238 /* all contents of R[0] and S[0] will be in R[0] */
239 leaf_shift_right(tb, n, -1);
240 reiserfs_invalidate_buffer(tb, tbS0);
241 return 0;
242 }
224 243
225 if ( tb->rnum[0] == -1 ) { 244 RFALSE(tb->rnum[0],
226 /* all contents of R[0] and S[0] will be in R[0] */ 245 "PAP-12065: bad rnum parameter must be 0 (%d)", tb->rnum[0]);
227 leaf_shift_right(tb, n, -1);
228 reiserfs_invalidate_buffer (tb, tbS0);
229 return 0; 246 return 0;
230 }
231
232 RFALSE( tb->rnum[0],
233 "PAP-12065: bad rnum parameter must be 0 (%d)", tb->rnum[0]);
234 return 0;
235} 247}
236 248
237 249static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item header of inserted item (this is on little endian) */
238static int balance_leaf (struct tree_balance * tb, 250 const char *body, /* body of inserted item or bytes to paste */
239 struct item_head * ih, /* item header of inserted item (this is on little endian) */ 251 int flag, /* i - insert, d - delete, c - cut, p - paste
240 const char * body, /* body of inserted item or bytes to paste */ 252 (see comment to do_balance) */
241 int flag, /* i - insert, d - delete, c - cut, p - paste 253 struct item_head *insert_key, /* in our processing of one level we sometimes determine what
242 (see comment to do_balance) */ 254 must be inserted into the next higher level. This insertion
243 struct item_head * insert_key, /* in our processing of one level we sometimes determine what 255 consists of a key or two keys and their corresponding
244 must be inserted into the next higher level. This insertion 256 pointers */
245 consists of a key or two keys and their corresponding 257 struct buffer_head **insert_ptr /* inserted node-ptrs for the next level */
246 pointers */
247 struct buffer_head ** insert_ptr /* inserted node-ptrs for the next level */
248 ) 258 )
249{ 259{
250 struct buffer_head * tbS0 = PATH_PLAST_BUFFER (tb->tb_path); 260 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
251 int item_pos = PATH_LAST_POSITION (tb->tb_path); /* index into the array of item headers in S[0] 261 int item_pos = PATH_LAST_POSITION(tb->tb_path); /* index into the array of item headers in S[0]
252 of the affected item */ 262 of the affected item */
253 struct buffer_info bi; 263 struct buffer_info bi;
254 struct buffer_head *S_new[2]; /* new nodes allocated to hold what could not fit into S */ 264 struct buffer_head *S_new[2]; /* new nodes allocated to hold what could not fit into S */
255 int snum[2]; /* number of items that will be placed 265 int snum[2]; /* number of items that will be placed
256 into S_new (includes partially shifted 266 into S_new (includes partially shifted
257 items) */ 267 items) */
258 int sbytes[2]; /* if an item is partially shifted into S_new then 268 int sbytes[2]; /* if an item is partially shifted into S_new then
259 if it is a directory item 269 if it is a directory item
260 it is the number of entries from the item that are shifted into S_new 270 it is the number of entries from the item that are shifted into S_new
261 else 271 else
262 it is the number of bytes from the item that are shifted into S_new 272 it is the number of bytes from the item that are shifted into S_new
263 */ 273 */
264 int n, i; 274 int n, i;
265 int ret_val; 275 int ret_val;
266 int pos_in_item; 276 int pos_in_item;
267 int zeros_num; 277 int zeros_num;
268 278
269 PROC_INFO_INC( tb -> tb_sb, balance_at[ 0 ] ); 279 PROC_INFO_INC(tb->tb_sb, balance_at[0]);
270 280
271 /* Make balance in case insert_size[0] < 0 */ 281 /* Make balance in case insert_size[0] < 0 */
272 if ( tb->insert_size[0] < 0 ) 282 if (tb->insert_size[0] < 0)
273 return balance_leaf_when_delete (tb, flag); 283 return balance_leaf_when_delete(tb, flag);
274 284
275 zeros_num = 0; 285 zeros_num = 0;
276 if (flag == M_INSERT && body == 0) 286 if (flag == M_INSERT && body == 0)
277 zeros_num = ih_item_len( ih ); 287 zeros_num = ih_item_len(ih);
278 288
279 pos_in_item = tb->tb_path->pos_in_item; 289 pos_in_item = tb->tb_path->pos_in_item;
280 /* for indirect item pos_in_item is measured in unformatted node 290 /* for indirect item pos_in_item is measured in unformatted node
281 pointers. Recalculate to bytes */ 291 pointers. Recalculate to bytes */
282 if (flag != M_INSERT && is_indirect_le_ih (B_N_PITEM_HEAD (tbS0, item_pos))) 292 if (flag != M_INSERT
283 pos_in_item *= UNFM_P_SIZE; 293 && is_indirect_le_ih(B_N_PITEM_HEAD(tbS0, item_pos)))
284 294 pos_in_item *= UNFM_P_SIZE;
285 if ( tb->lnum[0] > 0 ) { 295
286 /* Shift lnum[0] items from S[0] to the left neighbor L[0] */ 296 if (tb->lnum[0] > 0) {
287 if ( item_pos < tb->lnum[0] ) { 297 /* Shift lnum[0] items from S[0] to the left neighbor L[0] */
288 /* new item or it part falls to L[0], shift it too */ 298 if (item_pos < tb->lnum[0]) {
289 n = B_NR_ITEMS(tb->L[0]); 299 /* new item or it part falls to L[0], shift it too */
290 300 n = B_NR_ITEMS(tb->L[0]);
291 switch (flag) { 301
292 case M_INSERT: /* insert item into L[0] */ 302 switch (flag) {
293 303 case M_INSERT: /* insert item into L[0] */
294 if ( item_pos == tb->lnum[0] - 1 && tb->lbytes != -1 ) { 304
295 /* part of new item falls into L[0] */ 305 if (item_pos == tb->lnum[0] - 1
296 int new_item_len; 306 && tb->lbytes != -1) {
297 int version; 307 /* part of new item falls into L[0] */
298 308 int new_item_len;
299 ret_val = leaf_shift_left (tb, tb->lnum[0]-1, -1); 309 int version;
300 310
301 /* Calculate item length to insert to S[0] */ 311 ret_val =
302 new_item_len = ih_item_len(ih) - tb->lbytes; 312 leaf_shift_left(tb, tb->lnum[0] - 1,
303 /* Calculate and check item length to insert to L[0] */ 313 -1);
304 put_ih_item_len(ih, ih_item_len(ih) - new_item_len ); 314
305 315 /* Calculate item length to insert to S[0] */
306 RFALSE( ih_item_len(ih) <= 0, 316 new_item_len =
307 "PAP-12080: there is nothing to insert into L[0]: ih_item_len=%d", 317 ih_item_len(ih) - tb->lbytes;
308 ih_item_len(ih)); 318 /* Calculate and check item length to insert to L[0] */
309 319 put_ih_item_len(ih,
310 /* Insert new item into L[0] */ 320 ih_item_len(ih) -
311 bi.tb = tb; 321 new_item_len);
312 bi.bi_bh = tb->L[0]; 322
313 bi.bi_parent = tb->FL[0]; 323 RFALSE(ih_item_len(ih) <= 0,
314 bi.bi_position = get_left_neighbor_position (tb, 0); 324 "PAP-12080: there is nothing to insert into L[0]: ih_item_len=%d",
315 leaf_insert_into_buf (&bi, n + item_pos - ret_val, ih, body, 325 ih_item_len(ih));
316 zeros_num > ih_item_len(ih) ? ih_item_len(ih) : zeros_num); 326
317 327 /* Insert new item into L[0] */
318 version = ih_version (ih); 328 bi.tb = tb;
319 329 bi.bi_bh = tb->L[0];
320 /* Calculate key component, item length and body to insert into S[0] */ 330 bi.bi_parent = tb->FL[0];
321 set_le_ih_k_offset( ih, le_ih_k_offset( ih ) + (tb->lbytes << (is_indirect_le_ih(ih)?tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT:0)) ); 331 bi.bi_position =
322 332 get_left_neighbor_position(tb, 0);
323 put_ih_item_len( ih, new_item_len ); 333 leaf_insert_into_buf(&bi,
324 if ( tb->lbytes > zeros_num ) { 334 n + item_pos -
325 body += (tb->lbytes - zeros_num); 335 ret_val, ih, body,
326 zeros_num = 0; 336 zeros_num >
327 } 337 ih_item_len(ih) ?
328 else 338 ih_item_len(ih) :
329 zeros_num -= tb->lbytes; 339 zeros_num);
330 340
331 RFALSE( ih_item_len(ih) <= 0, 341 version = ih_version(ih);
332 "PAP-12085: there is nothing to insert into S[0]: ih_item_len=%d", 342
333 ih_item_len(ih)); 343 /* Calculate key component, item length and body to insert into S[0] */
334 } else { 344 set_le_ih_k_offset(ih,
335 /* new item in whole falls into L[0] */ 345 le_ih_k_offset(ih) +
336 /* Shift lnum[0]-1 items to L[0] */ 346 (tb->
337 ret_val = leaf_shift_left(tb, tb->lnum[0]-1, tb->lbytes); 347 lbytes <<
338 /* Insert new item into L[0] */ 348 (is_indirect_le_ih
339 bi.tb = tb; 349 (ih) ? tb->tb_sb->
340 bi.bi_bh = tb->L[0]; 350 s_blocksize_bits -
341 bi.bi_parent = tb->FL[0]; 351 UNFM_P_SHIFT :
342 bi.bi_position = get_left_neighbor_position (tb, 0); 352 0)));
343 leaf_insert_into_buf (&bi, n + item_pos - ret_val, ih, body, zeros_num); 353
344 tb->insert_size[0] = 0; 354 put_ih_item_len(ih, new_item_len);
345 zeros_num = 0; 355 if (tb->lbytes > zeros_num) {
346 } 356 body +=
347 break; 357 (tb->lbytes - zeros_num);
348 358 zeros_num = 0;
349 case M_PASTE: /* append item in L[0] */ 359 } else
350 360 zeros_num -= tb->lbytes;
351 if ( item_pos == tb->lnum[0] - 1 && tb->lbytes != -1 ) { 361
352 /* we must shift the part of the appended item */ 362 RFALSE(ih_item_len(ih) <= 0,
353 if ( is_direntry_le_ih (B_N_PITEM_HEAD (tbS0, item_pos))) { 363 "PAP-12085: there is nothing to insert into S[0]: ih_item_len=%d",
354 364 ih_item_len(ih));
355 RFALSE( zeros_num, 365 } else {
356 "PAP-12090: invalid parameter in case of a directory"); 366 /* new item in whole falls into L[0] */
357 /* directory item */ 367 /* Shift lnum[0]-1 items to L[0] */
358 if ( tb->lbytes > pos_in_item ) { 368 ret_val =
359 /* new directory entry falls into L[0] */ 369 leaf_shift_left(tb, tb->lnum[0] - 1,
360 struct item_head * pasted; 370 tb->lbytes);
361 int l_pos_in_item = pos_in_item; 371 /* Insert new item into L[0] */
362 372 bi.tb = tb;
363 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 entries from given directory item */ 373 bi.bi_bh = tb->L[0];
364 ret_val = leaf_shift_left(tb, tb->lnum[0], tb->lbytes - 1); 374 bi.bi_parent = tb->FL[0];
365 if ( ret_val && ! item_pos ) { 375 bi.bi_position =
366 pasted = B_N_PITEM_HEAD(tb->L[0],B_NR_ITEMS(tb->L[0])-1); 376 get_left_neighbor_position(tb, 0);
367 l_pos_in_item += I_ENTRY_COUNT(pasted) - (tb->lbytes-1); 377 leaf_insert_into_buf(&bi,
368 } 378 n + item_pos -
369 379 ret_val, ih, body,
370 /* Append given directory entry to directory item */ 380 zeros_num);
371 bi.tb = tb; 381 tb->insert_size[0] = 0;
372 bi.bi_bh = tb->L[0]; 382 zeros_num = 0;
373 bi.bi_parent = tb->FL[0];
374 bi.bi_position = get_left_neighbor_position (tb, 0);
375 leaf_paste_in_buffer (&bi, n + item_pos - ret_val, l_pos_in_item,
376 tb->insert_size[0], body, zeros_num);
377
378 /* previous string prepared space for pasting new entry, following string pastes this entry */
379
380 /* when we have merge directory item, pos_in_item has been changed too */
381
382 /* paste new directory entry. 1 is entry number */
383 leaf_paste_entries (bi.bi_bh, n + item_pos - ret_val, l_pos_in_item, 1,
384 (struct reiserfs_de_head *)body,
385 body + DEH_SIZE, tb->insert_size[0]
386 );
387 tb->insert_size[0] = 0;
388 } else {
389 /* new directory item doesn't fall into L[0] */
390 /* Shift lnum[0]-1 items in whole. Shift lbytes directory entries from directory item number lnum[0] */
391 leaf_shift_left (tb, tb->lnum[0], tb->lbytes);
392 }
393 /* Calculate new position to append in item body */
394 pos_in_item -= tb->lbytes;
395 }
396 else {
397 /* regular object */
398 RFALSE( tb->lbytes <= 0,
399 "PAP-12095: there is nothing to shift to L[0]. lbytes=%d",
400 tb->lbytes);
401 RFALSE( pos_in_item != ih_item_len(B_N_PITEM_HEAD(tbS0, item_pos)),
402 "PAP-12100: incorrect position to paste: item_len=%d, pos_in_item=%d",
403 ih_item_len(B_N_PITEM_HEAD(tbS0,item_pos)), pos_in_item);
404
405 if ( tb->lbytes >= pos_in_item ) {
406 /* appended item will be in L[0] in whole */
407 int l_n;
408
409 /* this bytes number must be appended to the last item of L[h] */
410 l_n = tb->lbytes - pos_in_item;
411
412 /* Calculate new insert_size[0] */
413 tb->insert_size[0] -= l_n;
414
415 RFALSE( tb->insert_size[0] <= 0,
416 "PAP-12105: there is nothing to paste into L[0]. insert_size=%d",
417 tb->insert_size[0]);
418 ret_val = leaf_shift_left(tb,tb->lnum[0],
419 ih_item_len(B_N_PITEM_HEAD(tbS0,item_pos)));
420 /* Append to body of item in L[0] */
421 bi.tb = tb;
422 bi.bi_bh = tb->L[0];
423 bi.bi_parent = tb->FL[0];
424 bi.bi_position = get_left_neighbor_position (tb, 0);
425 leaf_paste_in_buffer(
426 &bi,n + item_pos - ret_val,
427 ih_item_len( B_N_PITEM_HEAD(tb->L[0],n+item_pos-ret_val)),
428 l_n,body, zeros_num > l_n ? l_n : zeros_num
429 );
430 /* 0-th item in S0 can be only of DIRECT type when l_n != 0*/
431 {
432 int version;
433 int temp_l = l_n;
434
435 RFALSE (ih_item_len (B_N_PITEM_HEAD (tbS0, 0)),
436 "PAP-12106: item length must be 0");
437 RFALSE (comp_short_le_keys (B_N_PKEY (tbS0, 0),
438 B_N_PKEY (tb->L[0],
439 n + item_pos - ret_val)),
440 "PAP-12107: items must be of the same file");
441 if (is_indirect_le_ih(B_N_PITEM_HEAD (tb->L[0],
442 n + item_pos - ret_val))) {
443 temp_l = l_n << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT);
444 } 383 }
445 /* update key of first item in S0 */ 384 break;
446 version = ih_version (B_N_PITEM_HEAD (tbS0, 0)); 385
447 set_le_key_k_offset (version, B_N_PKEY (tbS0, 0), 386 case M_PASTE: /* append item in L[0] */
448 le_key_k_offset (version, B_N_PKEY (tbS0, 0)) + temp_l); 387
449 /* update left delimiting key */ 388 if (item_pos == tb->lnum[0] - 1
450 set_le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFL[0],tb->lkey[0]), 389 && tb->lbytes != -1) {
451 le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFL[0],tb->lkey[0])) + temp_l); 390 /* we must shift the part of the appended item */
452 } 391 if (is_direntry_le_ih
453 392 (B_N_PITEM_HEAD(tbS0, item_pos))) {
454 /* Calculate new body, position in item and insert_size[0] */ 393
455 if ( l_n > zeros_num ) { 394 RFALSE(zeros_num,
456 body += (l_n - zeros_num); 395 "PAP-12090: invalid parameter in case of a directory");
457 zeros_num = 0; 396 /* directory item */
458 } 397 if (tb->lbytes > pos_in_item) {
459 else 398 /* new directory entry falls into L[0] */
460 zeros_num -= l_n; 399 struct item_head
461 pos_in_item = 0; 400 *pasted;
462 401 int l_pos_in_item =
463 RFALSE( comp_short_le_keys 402 pos_in_item;
464 (B_N_PKEY(tbS0,0), 403
465 B_N_PKEY(tb->L[0],B_NR_ITEMS(tb->L[0])-1)) || 404 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 entries from given directory item */
466 405 ret_val =
467 !op_is_left_mergeable 406 leaf_shift_left(tb,
468 (B_N_PKEY (tbS0, 0), tbS0->b_size) || 407 tb->
469 !op_is_left_mergeable 408 lnum
470 (B_N_PDELIM_KEY(tb->CFL[0],tb->lkey[0]), 409 [0],
471 tbS0->b_size), 410 tb->
472 "PAP-12120: item must be merge-able with left neighboring item"); 411 lbytes
473 } 412 -
474 else /* only part of the appended item will be in L[0] */ 413 1);
475 { 414 if (ret_val
476 /* Calculate position in item for append in S[0] */ 415 && !item_pos) {
477 pos_in_item -= tb->lbytes; 416 pasted =
478 417 B_N_PITEM_HEAD
479 RFALSE( pos_in_item <= 0, 418 (tb->L[0],
480 "PAP-12125: no place for paste. pos_in_item=%d", pos_in_item); 419 B_NR_ITEMS
481 420 (tb->
482 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */ 421 L[0]) -
483 leaf_shift_left(tb,tb->lnum[0],tb->lbytes); 422 1);
484 } 423 l_pos_in_item +=
485 } 424 I_ENTRY_COUNT
486 } 425 (pasted) -
487 else /* appended item will be in L[0] in whole */ 426 (tb->
488 { 427 lbytes -
489 struct item_head * pasted; 428 1);
490 429 }
491 if ( ! item_pos && op_is_left_mergeable (B_N_PKEY (tbS0, 0), tbS0->b_size) ) 430
492 { /* if we paste into first item of S[0] and it is left mergable */ 431 /* Append given directory entry to directory item */
493 /* then increment pos_in_item by the size of the last item in L[0] */ 432 bi.tb = tb;
494 pasted = B_N_PITEM_HEAD(tb->L[0],n-1); 433 bi.bi_bh = tb->L[0];
495 if ( is_direntry_le_ih (pasted) ) 434 bi.bi_parent =
496 pos_in_item += ih_entry_count(pasted); 435 tb->FL[0];
497 else 436 bi.bi_position =
498 pos_in_item += ih_item_len(pasted); 437 get_left_neighbor_position
438 (tb, 0);
439 leaf_paste_in_buffer
440 (&bi,
441 n + item_pos -
442 ret_val,
443 l_pos_in_item,
444 tb->insert_size[0],
445 body, zeros_num);
446
447 /* previous string prepared space for pasting new entry, following string pastes this entry */
448
449 /* when we have merge directory item, pos_in_item has been changed too */
450
451 /* paste new directory entry. 1 is entry number */
452 leaf_paste_entries(bi.
453 bi_bh,
454 n +
455 item_pos
456 -
457 ret_val,
458 l_pos_in_item,
459 1,
460 (struct
461 reiserfs_de_head
462 *)
463 body,
464 body
465 +
466 DEH_SIZE,
467 tb->
468 insert_size
469 [0]
470 );
471 tb->insert_size[0] = 0;
472 } else {
473 /* new directory item doesn't fall into L[0] */
474 /* Shift lnum[0]-1 items in whole. Shift lbytes directory entries from directory item number lnum[0] */
475 leaf_shift_left(tb,
476 tb->
477 lnum[0],
478 tb->
479 lbytes);
480 }
481 /* Calculate new position to append in item body */
482 pos_in_item -= tb->lbytes;
483 } else {
484 /* regular object */
485 RFALSE(tb->lbytes <= 0,
486 "PAP-12095: there is nothing to shift to L[0]. lbytes=%d",
487 tb->lbytes);
488 RFALSE(pos_in_item !=
489 ih_item_len
490 (B_N_PITEM_HEAD
491 (tbS0, item_pos)),
492 "PAP-12100: incorrect position to paste: item_len=%d, pos_in_item=%d",
493 ih_item_len
494 (B_N_PITEM_HEAD
495 (tbS0, item_pos)),
496 pos_in_item);
497
498 if (tb->lbytes >= pos_in_item) {
499 /* appended item will be in L[0] in whole */
500 int l_n;
501
502 /* this bytes number must be appended to the last item of L[h] */
503 l_n =
504 tb->lbytes -
505 pos_in_item;
506
507 /* Calculate new insert_size[0] */
508 tb->insert_size[0] -=
509 l_n;
510
511 RFALSE(tb->
512 insert_size[0] <=
513 0,
514 "PAP-12105: there is nothing to paste into L[0]. insert_size=%d",
515 tb->
516 insert_size[0]);
517 ret_val =
518 leaf_shift_left(tb,
519 tb->
520 lnum
521 [0],
522 ih_item_len
523 (B_N_PITEM_HEAD
524 (tbS0,
525 item_pos)));
526 /* Append to body of item in L[0] */
527 bi.tb = tb;
528 bi.bi_bh = tb->L[0];
529 bi.bi_parent =
530 tb->FL[0];
531 bi.bi_position =
532 get_left_neighbor_position
533 (tb, 0);
534 leaf_paste_in_buffer
535 (&bi,
536 n + item_pos -
537 ret_val,
538 ih_item_len
539 (B_N_PITEM_HEAD
540 (tb->L[0],
541 n + item_pos -
542 ret_val)), l_n,
543 body,
544 zeros_num >
545 l_n ? l_n :
546 zeros_num);
547 /* 0-th item in S0 can be only of DIRECT type when l_n != 0 */
548 {
549 int version;
550 int temp_l =
551 l_n;
552
553 RFALSE
554 (ih_item_len
555 (B_N_PITEM_HEAD
556 (tbS0,
557 0)),
558 "PAP-12106: item length must be 0");
559 RFALSE
560 (comp_short_le_keys
561 (B_N_PKEY
562 (tbS0, 0),
563 B_N_PKEY
564 (tb->L[0],
565 n +
566 item_pos
567 -
568 ret_val)),
569 "PAP-12107: items must be of the same file");
570 if (is_indirect_le_ih(B_N_PITEM_HEAD(tb->L[0], n + item_pos - ret_val))) {
571 temp_l =
572 l_n
573 <<
574 (tb->
575 tb_sb->
576 s_blocksize_bits
577 -
578 UNFM_P_SHIFT);
579 }
580 /* update key of first item in S0 */
581 version =
582 ih_version
583 (B_N_PITEM_HEAD
584 (tbS0, 0));
585 set_le_key_k_offset
586 (version,
587 B_N_PKEY
588 (tbS0, 0),
589 le_key_k_offset
590 (version,
591 B_N_PKEY
592 (tbS0,
593 0)) +
594 temp_l);
595 /* update left delimiting key */
596 set_le_key_k_offset
597 (version,
598 B_N_PDELIM_KEY
599 (tb->
600 CFL[0],
601 tb->
602 lkey[0]),
603 le_key_k_offset
604 (version,
605 B_N_PDELIM_KEY
606 (tb->
607 CFL[0],
608 tb->
609 lkey[0]))
610 + temp_l);
611 }
612
613 /* Calculate new body, position in item and insert_size[0] */
614 if (l_n > zeros_num) {
615 body +=
616 (l_n -
617 zeros_num);
618 zeros_num = 0;
619 } else
620 zeros_num -=
621 l_n;
622 pos_in_item = 0;
623
624 RFALSE
625 (comp_short_le_keys
626 (B_N_PKEY(tbS0, 0),
627 B_N_PKEY(tb->L[0],
628 B_NR_ITEMS
629 (tb->
630 L[0]) -
631 1))
632 ||
633 !op_is_left_mergeable
634 (B_N_PKEY(tbS0, 0),
635 tbS0->b_size)
636 ||
637 !op_is_left_mergeable
638 (B_N_PDELIM_KEY
639 (tb->CFL[0],
640 tb->lkey[0]),
641 tbS0->b_size),
642 "PAP-12120: item must be merge-able with left neighboring item");
643 } else { /* only part of the appended item will be in L[0] */
644
645 /* Calculate position in item for append in S[0] */
646 pos_in_item -=
647 tb->lbytes;
648
649 RFALSE(pos_in_item <= 0,
650 "PAP-12125: no place for paste. pos_in_item=%d",
651 pos_in_item);
652
653 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
654 leaf_shift_left(tb,
655 tb->
656 lnum[0],
657 tb->
658 lbytes);
659 }
660 }
661 } else { /* appended item will be in L[0] in whole */
662
663 struct item_head *pasted;
664
665 if (!item_pos && op_is_left_mergeable(B_N_PKEY(tbS0, 0), tbS0->b_size)) { /* if we paste into first item of S[0] and it is left mergable */
666 /* then increment pos_in_item by the size of the last item in L[0] */
667 pasted =
668 B_N_PITEM_HEAD(tb->L[0],
669 n - 1);
670 if (is_direntry_le_ih(pasted))
671 pos_in_item +=
672 ih_entry_count
673 (pasted);
674 else
675 pos_in_item +=
676 ih_item_len(pasted);
677 }
678
679 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
680 ret_val =
681 leaf_shift_left(tb, tb->lnum[0],
682 tb->lbytes);
683 /* Append to body of item in L[0] */
684 bi.tb = tb;
685 bi.bi_bh = tb->L[0];
686 bi.bi_parent = tb->FL[0];
687 bi.bi_position =
688 get_left_neighbor_position(tb, 0);
689 leaf_paste_in_buffer(&bi,
690 n + item_pos -
691 ret_val,
692 pos_in_item,
693 tb->insert_size[0],
694 body, zeros_num);
695
696 /* if appended item is directory, paste entry */
697 pasted =
698 B_N_PITEM_HEAD(tb->L[0],
699 n + item_pos -
700 ret_val);
701 if (is_direntry_le_ih(pasted))
702 leaf_paste_entries(bi.bi_bh,
703 n +
704 item_pos -
705 ret_val,
706 pos_in_item,
707 1,
708 (struct
709 reiserfs_de_head
710 *)body,
711 body +
712 DEH_SIZE,
713 tb->
714 insert_size
715 [0]
716 );
717 /* if appended item is indirect item, put unformatted node into un list */
718 if (is_indirect_le_ih(pasted))
719 set_ih_free_space(pasted, 0);
720 tb->insert_size[0] = 0;
721 zeros_num = 0;
722 }
723 break;
724 default: /* cases d and t */
725 reiserfs_panic(tb->tb_sb,
726 "PAP-12130: balance_leaf: lnum > 0: unexpectable mode: %s(%d)",
727 (flag ==
728 M_DELETE) ? "DELETE" : ((flag ==
729 M_CUT)
730 ? "CUT"
731 :
732 "UNKNOWN"),
733 flag);
499 } 734 }
500 735 } else {
501 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */ 736 /* new item doesn't fall into L[0] */
502 ret_val = leaf_shift_left(tb,tb->lnum[0],tb->lbytes); 737 leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
503 /* Append to body of item in L[0] */
504 bi.tb = tb;
505 bi.bi_bh = tb->L[0];
506 bi.bi_parent = tb->FL[0];
507 bi.bi_position = get_left_neighbor_position (tb, 0);
508 leaf_paste_in_buffer (&bi, n + item_pos - ret_val, pos_in_item, tb->insert_size[0],
509 body, zeros_num);
510
511 /* if appended item is directory, paste entry */
512 pasted = B_N_PITEM_HEAD (tb->L[0], n + item_pos - ret_val);
513 if (is_direntry_le_ih (pasted))
514 leaf_paste_entries (
515 bi.bi_bh, n + item_pos - ret_val, pos_in_item, 1,
516 (struct reiserfs_de_head *)body, body + DEH_SIZE, tb->insert_size[0]
517 );
518 /* if appended item is indirect item, put unformatted node into un list */
519 if (is_indirect_le_ih (pasted))
520 set_ih_free_space (pasted, 0);
521 tb->insert_size[0] = 0;
522 zeros_num = 0;
523 } 738 }
524 break;
525 default: /* cases d and t */
526 reiserfs_panic (tb->tb_sb, "PAP-12130: balance_leaf: lnum > 0: unexpectable mode: %s(%d)",
527 (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
528 }
529 } else {
530 /* new item doesn't fall into L[0] */
531 leaf_shift_left(tb,tb->lnum[0],tb->lbytes);
532 } 739 }
533 } /* tb->lnum[0] > 0 */
534 740
535 /* Calculate new item position */ 741 /* tb->lnum[0] > 0 */
536 item_pos -= ( tb->lnum[0] - (( tb->lbytes != -1 ) ? 1 : 0)); 742 /* Calculate new item position */
537 743 item_pos -= (tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0));
538 if ( tb->rnum[0] > 0 ) { 744
539 /* shift rnum[0] items from S[0] to the right neighbor R[0] */ 745 if (tb->rnum[0] > 0) {
540 n = B_NR_ITEMS(tbS0); 746 /* shift rnum[0] items from S[0] to the right neighbor R[0] */
541 switch ( flag ) { 747 n = B_NR_ITEMS(tbS0);
542 748 switch (flag) {
543 case M_INSERT: /* insert item */ 749
544 if ( n - tb->rnum[0] < item_pos ) 750 case M_INSERT: /* insert item */
545 { /* new item or its part falls to R[0] */ 751 if (n - tb->rnum[0] < item_pos) { /* new item or its part falls to R[0] */
546 if ( item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1 ) 752 if (item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1) { /* part of new item falls into R[0] */
547 { /* part of new item falls into R[0] */ 753 loff_t old_key_comp, old_len,
548 loff_t old_key_comp, old_len, r_zeros_number; 754 r_zeros_number;
549 const char * r_body; 755 const char *r_body;
550 int version; 756 int version;
551 loff_t offset; 757 loff_t offset;
552 758
553 leaf_shift_right(tb,tb->rnum[0]-1,-1); 759 leaf_shift_right(tb, tb->rnum[0] - 1,
554 760 -1);
555 version = ih_version(ih); 761
556 /* Remember key component and item length */ 762 version = ih_version(ih);
557 old_key_comp = le_ih_k_offset( ih ); 763 /* Remember key component and item length */
558 old_len = ih_item_len(ih); 764 old_key_comp = le_ih_k_offset(ih);
559 765 old_len = ih_item_len(ih);
560 /* Calculate key component and item length to insert into R[0] */ 766
561 offset = le_ih_k_offset( ih ) + ((old_len - tb->rbytes )<<(is_indirect_le_ih(ih)?tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT:0)); 767 /* Calculate key component and item length to insert into R[0] */
562 set_le_ih_k_offset( ih, offset ); 768 offset =
563 put_ih_item_len( ih, tb->rbytes); 769 le_ih_k_offset(ih) +
564 /* Insert part of the item into R[0] */ 770 ((old_len -
565 bi.tb = tb; 771 tb->
566 bi.bi_bh = tb->R[0]; 772 rbytes) << (is_indirect_le_ih(ih)
567 bi.bi_parent = tb->FR[0]; 773 ? tb->tb_sb->
568 bi.bi_position = get_right_neighbor_position (tb, 0); 774 s_blocksize_bits -
569 if ( (old_len - tb->rbytes) > zeros_num ) { 775 UNFM_P_SHIFT : 0));
570 r_zeros_number = 0; 776 set_le_ih_k_offset(ih, offset);
571 r_body = body + (old_len - tb->rbytes) - zeros_num; 777 put_ih_item_len(ih, tb->rbytes);
572 } 778 /* Insert part of the item into R[0] */
573 else { 779 bi.tb = tb;
574 r_body = body; 780 bi.bi_bh = tb->R[0];
575 r_zeros_number = zeros_num - (old_len - tb->rbytes); 781 bi.bi_parent = tb->FR[0];
576 zeros_num -= r_zeros_number; 782 bi.bi_position =
577 } 783 get_right_neighbor_position(tb, 0);
578 784 if ((old_len - tb->rbytes) > zeros_num) {
579 leaf_insert_into_buf (&bi, 0, ih, r_body, r_zeros_number); 785 r_zeros_number = 0;
580 786 r_body =
581 /* Replace right delimiting key by first key in R[0] */ 787 body + (old_len -
582 replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0); 788 tb->rbytes) -
583 789 zeros_num;
584 /* Calculate key component and item length to insert into S[0] */ 790 } else {
585 set_le_ih_k_offset( ih, old_key_comp ); 791 r_body = body;
586 put_ih_item_len( ih, old_len - tb->rbytes ); 792 r_zeros_number =
587 793 zeros_num - (old_len -
588 tb->insert_size[0] -= tb->rbytes; 794 tb->rbytes);
795 zeros_num -= r_zeros_number;
796 }
797
798 leaf_insert_into_buf(&bi, 0, ih, r_body,
799 r_zeros_number);
800
801 /* Replace right delimiting key by first key in R[0] */
802 replace_key(tb, tb->CFR[0], tb->rkey[0],
803 tb->R[0], 0);
804
805 /* Calculate key component and item length to insert into S[0] */
806 set_le_ih_k_offset(ih, old_key_comp);
807 put_ih_item_len(ih,
808 old_len - tb->rbytes);
809
810 tb->insert_size[0] -= tb->rbytes;
811
812 } else { /* whole new item falls into R[0] */
813
814 /* Shift rnum[0]-1 items to R[0] */
815 ret_val =
816 leaf_shift_right(tb,
817 tb->rnum[0] - 1,
818 tb->rbytes);
819 /* Insert new item into R[0] */
820 bi.tb = tb;
821 bi.bi_bh = tb->R[0];
822 bi.bi_parent = tb->FR[0];
823 bi.bi_position =
824 get_right_neighbor_position(tb, 0);
825 leaf_insert_into_buf(&bi,
826 item_pos - n +
827 tb->rnum[0] - 1,
828 ih, body,
829 zeros_num);
830
831 if (item_pos - n + tb->rnum[0] - 1 == 0) {
832 replace_key(tb, tb->CFR[0],
833 tb->rkey[0],
834 tb->R[0], 0);
835
836 }
837 zeros_num = tb->insert_size[0] = 0;
838 }
839 } else { /* new item or part of it doesn't fall into R[0] */
589 840
590 } 841 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
591 else /* whole new item falls into R[0] */
592 {
593 /* Shift rnum[0]-1 items to R[0] */
594 ret_val = leaf_shift_right(tb,tb->rnum[0]-1,tb->rbytes);
595 /* Insert new item into R[0] */
596 bi.tb = tb;
597 bi.bi_bh = tb->R[0];
598 bi.bi_parent = tb->FR[0];
599 bi.bi_position = get_right_neighbor_position (tb, 0);
600 leaf_insert_into_buf (&bi, item_pos - n + tb->rnum[0] - 1, ih, body, zeros_num);
601
602 if ( item_pos - n + tb->rnum[0] - 1 == 0 ) {
603 replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0);
604
605 }
606 zeros_num = tb->insert_size[0] = 0;
607 }
608 }
609 else /* new item or part of it doesn't fall into R[0] */
610 {
611 leaf_shift_right(tb,tb->rnum[0],tb->rbytes);
612 }
613 break;
614
615 case M_PASTE: /* append item */
616
617 if ( n - tb->rnum[0] <= item_pos ) /* pasted item or part of it falls to R[0] */
618 {
619 if ( item_pos == n - tb->rnum[0] && tb->rbytes != -1 )
620 { /* we must shift the part of the appended item */
621 if ( is_direntry_le_ih (B_N_PITEM_HEAD(tbS0, item_pos)))
622 { /* we append to directory item */
623 int entry_count;
624
625 RFALSE( zeros_num,
626 "PAP-12145: invalid parameter in case of a directory");
627 entry_count = I_ENTRY_COUNT(B_N_PITEM_HEAD(tbS0, item_pos));
628 if ( entry_count - tb->rbytes < pos_in_item )
629 /* new directory entry falls into R[0] */
630 {
631 int paste_entry_position;
632
633 RFALSE( tb->rbytes - 1 >= entry_count ||
634 ! tb->insert_size[0],
635 "PAP-12150: no enough of entries to shift to R[0]: rbytes=%d, entry_count=%d",
636 tb->rbytes, entry_count);
637 /* Shift rnum[0]-1 items in whole. Shift rbytes-1 directory entries from directory item number rnum[0] */
638 leaf_shift_right(tb,tb->rnum[0],tb->rbytes - 1);
639 /* Paste given directory entry to directory item */
640 paste_entry_position = pos_in_item - entry_count + tb->rbytes - 1;
641 bi.tb = tb;
642 bi.bi_bh = tb->R[0];
643 bi.bi_parent = tb->FR[0];
644 bi.bi_position = get_right_neighbor_position (tb, 0);
645 leaf_paste_in_buffer (&bi, 0, paste_entry_position,
646 tb->insert_size[0],body,zeros_num);
647 /* paste entry */
648 leaf_paste_entries (
649 bi.bi_bh, 0, paste_entry_position, 1, (struct reiserfs_de_head *)body,
650 body + DEH_SIZE, tb->insert_size[0]
651 );
652
653 if ( paste_entry_position == 0 ) {
654 /* change delimiting keys */
655 replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0);
656 }
657
658 tb->insert_size[0] = 0;
659 pos_in_item++;
660 }
661 else /* new directory entry doesn't fall into R[0] */
662 {
663 leaf_shift_right(tb,tb->rnum[0],tb->rbytes);
664 }
665 }
666 else /* regular object */
667 {
668 int n_shift, n_rem, r_zeros_number;
669 const char * r_body;
670
671 /* Calculate number of bytes which must be shifted from appended item */
672 if ( (n_shift = tb->rbytes - tb->insert_size[0]) < 0 )
673 n_shift = 0;
674
675 RFALSE(pos_in_item != ih_item_len(B_N_PITEM_HEAD (tbS0, item_pos)),
676 "PAP-12155: invalid position to paste. ih_item_len=%d, pos_in_item=%d",
677 pos_in_item, ih_item_len( B_N_PITEM_HEAD(tbS0,item_pos)));
678
679 leaf_shift_right(tb,tb->rnum[0],n_shift);
680 /* Calculate number of bytes which must remain in body after appending to R[0] */
681 if ( (n_rem = tb->insert_size[0] - tb->rbytes) < 0 )
682 n_rem = 0;
683
684 {
685 int version;
686 unsigned long temp_rem = n_rem;
687
688 version = ih_version (B_N_PITEM_HEAD (tb->R[0],0));
689 if (is_indirect_le_key(version,B_N_PKEY(tb->R[0],0))){
690 temp_rem = n_rem << (tb->tb_sb->s_blocksize_bits -
691 UNFM_P_SHIFT);
692 }
693 set_le_key_k_offset (version, B_N_PKEY(tb->R[0],0),
694 le_key_k_offset (version, B_N_PKEY(tb->R[0],0)) + temp_rem);
695 set_le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0]),
696 le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) + temp_rem);
697 } 842 }
843 break;
844
845 case M_PASTE: /* append item */
846
847 if (n - tb->rnum[0] <= item_pos) { /* pasted item or part of it falls to R[0] */
848 if (item_pos == n - tb->rnum[0] && tb->rbytes != -1) { /* we must shift the part of the appended item */
849 if (is_direntry_le_ih(B_N_PITEM_HEAD(tbS0, item_pos))) { /* we append to directory item */
850 int entry_count;
851
852 RFALSE(zeros_num,
853 "PAP-12145: invalid parameter in case of a directory");
854 entry_count =
855 I_ENTRY_COUNT(B_N_PITEM_HEAD
856 (tbS0,
857 item_pos));
858 if (entry_count - tb->rbytes <
859 pos_in_item)
860 /* new directory entry falls into R[0] */
861 {
862 int paste_entry_position;
863
864 RFALSE(tb->rbytes - 1 >=
865 entry_count
866 || !tb->
867 insert_size[0],
868 "PAP-12150: no enough of entries to shift to R[0]: rbytes=%d, entry_count=%d",
869 tb->rbytes,
870 entry_count);
871 /* Shift rnum[0]-1 items in whole. Shift rbytes-1 directory entries from directory item number rnum[0] */
872 leaf_shift_right(tb,
873 tb->
874 rnum
875 [0],
876 tb->
877 rbytes
878 - 1);
879 /* Paste given directory entry to directory item */
880 paste_entry_position =
881 pos_in_item -
882 entry_count +
883 tb->rbytes - 1;
884 bi.tb = tb;
885 bi.bi_bh = tb->R[0];
886 bi.bi_parent =
887 tb->FR[0];
888 bi.bi_position =
889 get_right_neighbor_position
890 (tb, 0);
891 leaf_paste_in_buffer
892 (&bi, 0,
893 paste_entry_position,
894 tb->insert_size[0],
895 body, zeros_num);
896 /* paste entry */
897 leaf_paste_entries(bi.
898 bi_bh,
899 0,
900 paste_entry_position,
901 1,
902 (struct
903 reiserfs_de_head
904 *)
905 body,
906 body
907 +
908 DEH_SIZE,
909 tb->
910 insert_size
911 [0]
912 );
913
914 if (paste_entry_position
915 == 0) {
916 /* change delimiting keys */
917 replace_key(tb,
918 tb->
919 CFR
920 [0],
921 tb->
922 rkey
923 [0],
924 tb->
925 R
926 [0],
927 0);
928 }
929
930 tb->insert_size[0] = 0;
931 pos_in_item++;
932 } else { /* new directory entry doesn't fall into R[0] */
933
934 leaf_shift_right(tb,
935 tb->
936 rnum
937 [0],
938 tb->
939 rbytes);
940 }
941 } else { /* regular object */
942
943 int n_shift, n_rem,
944 r_zeros_number;
945 const char *r_body;
946
947 /* Calculate number of bytes which must be shifted from appended item */
948 if ((n_shift =
949 tb->rbytes -
950 tb->insert_size[0]) < 0)
951 n_shift = 0;
952
953 RFALSE(pos_in_item !=
954 ih_item_len
955 (B_N_PITEM_HEAD
956 (tbS0, item_pos)),
957 "PAP-12155: invalid position to paste. ih_item_len=%d, pos_in_item=%d",
958 pos_in_item,
959 ih_item_len
960 (B_N_PITEM_HEAD
961 (tbS0, item_pos)));
962
963 leaf_shift_right(tb,
964 tb->rnum[0],
965 n_shift);
966 /* Calculate number of bytes which must remain in body after appending to R[0] */
967 if ((n_rem =
968 tb->insert_size[0] -
969 tb->rbytes) < 0)
970 n_rem = 0;
971
972 {
973 int version;
974 unsigned long temp_rem =
975 n_rem;
976
977 version =
978 ih_version
979 (B_N_PITEM_HEAD
980 (tb->R[0], 0));
981 if (is_indirect_le_key
982 (version,
983 B_N_PKEY(tb->R[0],
984 0))) {
985 temp_rem =
986 n_rem <<
987 (tb->tb_sb->
988 s_blocksize_bits
989 -
990 UNFM_P_SHIFT);
991 }
992 set_le_key_k_offset
993 (version,
994 B_N_PKEY(tb->R[0],
995 0),
996 le_key_k_offset
997 (version,
998 B_N_PKEY(tb->R[0],
999 0)) +
1000 temp_rem);
1001 set_le_key_k_offset
1002 (version,
1003 B_N_PDELIM_KEY(tb->
1004 CFR
1005 [0],
1006 tb->
1007 rkey
1008 [0]),
1009 le_key_k_offset
1010 (version,
1011 B_N_PDELIM_KEY
1012 (tb->CFR[0],
1013 tb->rkey[0])) +
1014 temp_rem);
1015 }
698/* k_offset (B_N_PKEY(tb->R[0],0)) += n_rem; 1016/* k_offset (B_N_PKEY(tb->R[0],0)) += n_rem;
699 k_offset (B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) += n_rem;*/ 1017 k_offset (B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) += n_rem;*/
700 do_balance_mark_internal_dirty (tb, tb->CFR[0], 0); 1018 do_balance_mark_internal_dirty
701 1019 (tb, tb->CFR[0], 0);
702 /* Append part of body into R[0] */ 1020
703 bi.tb = tb; 1021 /* Append part of body into R[0] */
704 bi.bi_bh = tb->R[0]; 1022 bi.tb = tb;
705 bi.bi_parent = tb->FR[0]; 1023 bi.bi_bh = tb->R[0];
706 bi.bi_position = get_right_neighbor_position (tb, 0); 1024 bi.bi_parent = tb->FR[0];
707 if ( n_rem > zeros_num ) { 1025 bi.bi_position =
708 r_zeros_number = 0; 1026 get_right_neighbor_position
709 r_body = body + n_rem - zeros_num; 1027 (tb, 0);
710 } 1028 if (n_rem > zeros_num) {
711 else { 1029 r_zeros_number = 0;
712 r_body = body; 1030 r_body =
713 r_zeros_number = zeros_num - n_rem; 1031 body + n_rem -
714 zeros_num -= r_zeros_number; 1032 zeros_num;
715 } 1033 } else {
716 1034 r_body = body;
717 leaf_paste_in_buffer(&bi, 0, n_shift, tb->insert_size[0] - n_rem, r_body, r_zeros_number); 1035 r_zeros_number =
718 1036 zeros_num - n_rem;
719 if (is_indirect_le_ih (B_N_PITEM_HEAD(tb->R[0],0))) { 1037 zeros_num -=
1038 r_zeros_number;
1039 }
1040
1041 leaf_paste_in_buffer(&bi, 0,
1042 n_shift,
1043 tb->
1044 insert_size
1045 [0] -
1046 n_rem,
1047 r_body,
1048 r_zeros_number);
1049
1050 if (is_indirect_le_ih
1051 (B_N_PITEM_HEAD
1052 (tb->R[0], 0))) {
720#if 0 1053#if 0
721 RFALSE( n_rem, 1054 RFALSE(n_rem,
722 "PAP-12160: paste more than one unformatted node pointer"); 1055 "PAP-12160: paste more than one unformatted node pointer");
723#endif 1056#endif
724 set_ih_free_space (B_N_PITEM_HEAD(tb->R[0],0), 0); 1057 set_ih_free_space
725 } 1058 (B_N_PITEM_HEAD
726 tb->insert_size[0] = n_rem; 1059 (tb->R[0], 0), 0);
727 if ( ! n_rem ) 1060 }
728 pos_in_item ++; 1061 tb->insert_size[0] = n_rem;
729 } 1062 if (!n_rem)
730 } 1063 pos_in_item++;
731 else /* pasted item in whole falls into R[0] */ 1064 }
732 { 1065 } else { /* pasted item in whole falls into R[0] */
733 struct item_head * pasted; 1066
1067 struct item_head *pasted;
1068
1069 ret_val =
1070 leaf_shift_right(tb, tb->rnum[0],
1071 tb->rbytes);
1072 /* append item in R[0] */
1073 if (pos_in_item >= 0) {
1074 bi.tb = tb;
1075 bi.bi_bh = tb->R[0];
1076 bi.bi_parent = tb->FR[0];
1077 bi.bi_position =
1078 get_right_neighbor_position
1079 (tb, 0);
1080 leaf_paste_in_buffer(&bi,
1081 item_pos -
1082 n +
1083 tb->
1084 rnum[0],
1085 pos_in_item,
1086 tb->
1087 insert_size
1088 [0], body,
1089 zeros_num);
1090 }
1091
1092 /* paste new entry, if item is directory item */
1093 pasted =
1094 B_N_PITEM_HEAD(tb->R[0],
1095 item_pos - n +
1096 tb->rnum[0]);
1097 if (is_direntry_le_ih(pasted)
1098 && pos_in_item >= 0) {
1099 leaf_paste_entries(bi.bi_bh,
1100 item_pos -
1101 n +
1102 tb->rnum[0],
1103 pos_in_item,
1104 1,
1105 (struct
1106 reiserfs_de_head
1107 *)body,
1108 body +
1109 DEH_SIZE,
1110 tb->
1111 insert_size
1112 [0]
1113 );
1114 if (!pos_in_item) {
1115
1116 RFALSE(item_pos - n +
1117 tb->rnum[0],
1118 "PAP-12165: directory item must be first item of node when pasting is in 0th position");
1119
1120 /* update delimiting keys */
1121 replace_key(tb,
1122 tb->CFR[0],
1123 tb->rkey[0],
1124 tb->R[0],
1125 0);
1126 }
1127 }
1128
1129 if (is_indirect_le_ih(pasted))
1130 set_ih_free_space(pasted, 0);
1131 zeros_num = tb->insert_size[0] = 0;
1132 }
1133 } else { /* new item doesn't fall into R[0] */
734 1134
735 ret_val = leaf_shift_right(tb,tb->rnum[0],tb->rbytes); 1135 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
736 /* append item in R[0] */
737 if ( pos_in_item >= 0 ) {
738 bi.tb = tb;
739 bi.bi_bh = tb->R[0];
740 bi.bi_parent = tb->FR[0];
741 bi.bi_position = get_right_neighbor_position (tb, 0);
742 leaf_paste_in_buffer(&bi,item_pos - n + tb->rnum[0], pos_in_item,
743 tb->insert_size[0],body, zeros_num);
744 }
745
746 /* paste new entry, if item is directory item */
747 pasted = B_N_PITEM_HEAD(tb->R[0], item_pos - n + tb->rnum[0]);
748 if (is_direntry_le_ih (pasted) && pos_in_item >= 0 ) {
749 leaf_paste_entries (
750 bi.bi_bh, item_pos - n + tb->rnum[0], pos_in_item, 1,
751 (struct reiserfs_de_head *)body, body + DEH_SIZE, tb->insert_size[0]
752 );
753 if ( ! pos_in_item ) {
754
755 RFALSE( item_pos - n + tb->rnum[0],
756 "PAP-12165: directory item must be first item of node when pasting is in 0th position");
757
758 /* update delimiting keys */
759 replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0);
760 } 1136 }
761 } 1137 break;
762 1138 default: /* cases d and t */
763 if (is_indirect_le_ih (pasted)) 1139 reiserfs_panic(tb->tb_sb,
764 set_ih_free_space (pasted, 0); 1140 "PAP-12175: balance_leaf: rnum > 0: unexpectable mode: %s(%d)",
765 zeros_num = tb->insert_size[0] = 0; 1141 (flag ==
1142 M_DELETE) ? "DELETE" : ((flag ==
1143 M_CUT) ? "CUT"
1144 : "UNKNOWN"),
1145 flag);
766 } 1146 }
767 }
768 else /* new item doesn't fall into R[0] */
769 {
770 leaf_shift_right(tb,tb->rnum[0],tb->rbytes);
771 }
772 break;
773 default: /* cases d and t */
774 reiserfs_panic (tb->tb_sb, "PAP-12175: balance_leaf: rnum > 0: unexpectable mode: %s(%d)",
775 (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
776 }
777
778 } /* tb->rnum[0] > 0 */
779
780
781 RFALSE( tb->blknum[0] > 3,
782 "PAP-12180: blknum can not be %d. It must be <= 3", tb->blknum[0]);
783 RFALSE( tb->blknum[0] < 0,
784 "PAP-12185: blknum can not be %d. It must be >= 0", tb->blknum[0]);
785
786 /* if while adding to a node we discover that it is possible to split
787 it in two, and merge the left part into the left neighbor and the
788 right part into the right neighbor, eliminating the node */
789 if ( tb->blknum[0] == 0 ) { /* node S[0] is empty now */
790
791 RFALSE( ! tb->lnum[0] || ! tb->rnum[0],
792 "PAP-12190: lnum and rnum must not be zero");
793 /* if insertion was done before 0-th position in R[0], right
794 delimiting key of the tb->L[0]'s and left delimiting key are
795 not set correctly */
796 if (tb->CFL[0]) {
797 if (!tb->CFR[0])
798 reiserfs_panic (tb->tb_sb, "vs-12195: balance_leaf: CFR not initialized");
799 copy_key (B_N_PDELIM_KEY (tb->CFL[0], tb->lkey[0]), B_N_PDELIM_KEY (tb->CFR[0], tb->rkey[0]));
800 do_balance_mark_internal_dirty (tb, tb->CFL[0], 0);
801 }
802
803 reiserfs_invalidate_buffer(tb,tbS0);
804 return 0;
805 }
806
807
808 /* Fill new nodes that appear in place of S[0] */
809 1147
810 /* I am told that this copying is because we need an array to enable 1148 }
811 the looping code. -Hans */
812 snum[0] = tb->s1num,
813 snum[1] = tb->s2num;
814 sbytes[0] = tb->s1bytes;
815 sbytes[1] = tb->s2bytes;
816 for( i = tb->blknum[0] - 2; i >= 0; i-- ) {
817
818 RFALSE( !snum[i], "PAP-12200: snum[%d] == %d. Must be > 0", i, snum[i]);
819 1149
820 /* here we shift from S to S_new nodes */ 1150 /* tb->rnum[0] > 0 */
1151 RFALSE(tb->blknum[0] > 3,
1152 "PAP-12180: blknum can not be %d. It must be <= 3",
1153 tb->blknum[0]);
1154 RFALSE(tb->blknum[0] < 0,
1155 "PAP-12185: blknum can not be %d. It must be >= 0",
1156 tb->blknum[0]);
1157
1158 /* if while adding to a node we discover that it is possible to split
1159 it in two, and merge the left part into the left neighbor and the
1160 right part into the right neighbor, eliminating the node */
1161 if (tb->blknum[0] == 0) { /* node S[0] is empty now */
1162
1163 RFALSE(!tb->lnum[0] || !tb->rnum[0],
1164 "PAP-12190: lnum and rnum must not be zero");
1165 /* if insertion was done before 0-th position in R[0], right
1166 delimiting key of the tb->L[0]'s and left delimiting key are
1167 not set correctly */
1168 if (tb->CFL[0]) {
1169 if (!tb->CFR[0])
1170 reiserfs_panic(tb->tb_sb,
1171 "vs-12195: balance_leaf: CFR not initialized");
1172 copy_key(B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0]),
1173 B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0]));
1174 do_balance_mark_internal_dirty(tb, tb->CFL[0], 0);
1175 }
821 1176
822 S_new[i] = get_FEB(tb); 1177 reiserfs_invalidate_buffer(tb, tbS0);
1178 return 0;
1179 }
823 1180
824 /* initialized block type and tree level */ 1181 /* Fill new nodes that appear in place of S[0] */
825 set_blkh_level( B_BLK_HEAD(S_new[i]), DISK_LEAF_NODE_LEVEL ); 1182
1183 /* I am told that this copying is because we need an array to enable
1184 the looping code. -Hans */
1185 snum[0] = tb->s1num, snum[1] = tb->s2num;
1186 sbytes[0] = tb->s1bytes;
1187 sbytes[1] = tb->s2bytes;
1188 for (i = tb->blknum[0] - 2; i >= 0; i--) {
1189
1190 RFALSE(!snum[i], "PAP-12200: snum[%d] == %d. Must be > 0", i,
1191 snum[i]);
1192
1193 /* here we shift from S to S_new nodes */
1194
1195 S_new[i] = get_FEB(tb);
1196
1197 /* initialized block type and tree level */
1198 set_blkh_level(B_BLK_HEAD(S_new[i]), DISK_LEAF_NODE_LEVEL);
1199
1200 n = B_NR_ITEMS(tbS0);
1201
1202 switch (flag) {
1203 case M_INSERT: /* insert item */
1204
1205 if (n - snum[i] < item_pos) { /* new item or it's part falls to first new node S_new[i] */
1206 if (item_pos == n - snum[i] + 1 && sbytes[i] != -1) { /* part of new item falls into S_new[i] */
1207 int old_key_comp, old_len,
1208 r_zeros_number;
1209 const char *r_body;
1210 int version;
1211
1212 /* Move snum[i]-1 items from S[0] to S_new[i] */
1213 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
1214 snum[i] - 1, -1,
1215 S_new[i]);
1216 /* Remember key component and item length */
1217 version = ih_version(ih);
1218 old_key_comp = le_ih_k_offset(ih);
1219 old_len = ih_item_len(ih);
1220
1221 /* Calculate key component and item length to insert into S_new[i] */
1222 set_le_ih_k_offset(ih,
1223 le_ih_k_offset(ih) +
1224 ((old_len -
1225 sbytes[i]) <<
1226 (is_indirect_le_ih
1227 (ih) ? tb->tb_sb->
1228 s_blocksize_bits -
1229 UNFM_P_SHIFT :
1230 0)));
1231
1232 put_ih_item_len(ih, sbytes[i]);
1233
1234 /* Insert part of the item into S_new[i] before 0-th item */
1235 bi.tb = tb;
1236 bi.bi_bh = S_new[i];
1237 bi.bi_parent = NULL;
1238 bi.bi_position = 0;
1239
1240 if ((old_len - sbytes[i]) > zeros_num) {
1241 r_zeros_number = 0;
1242 r_body =
1243 body + (old_len -
1244 sbytes[i]) -
1245 zeros_num;
1246 } else {
1247 r_body = body;
1248 r_zeros_number =
1249 zeros_num - (old_len -
1250 sbytes[i]);
1251 zeros_num -= r_zeros_number;
1252 }
1253
1254 leaf_insert_into_buf(&bi, 0, ih, r_body,
1255 r_zeros_number);
1256
1257 /* Calculate key component and item length to insert into S[i] */
1258 set_le_ih_k_offset(ih, old_key_comp);
1259 put_ih_item_len(ih,
1260 old_len - sbytes[i]);
1261 tb->insert_size[0] -= sbytes[i];
1262 } else { /* whole new item falls into S_new[i] */
1263
1264 /* Shift snum[0] - 1 items to S_new[i] (sbytes[i] of split item) */
1265 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
1266 snum[i] - 1, sbytes[i],
1267 S_new[i]);
1268
1269 /* Insert new item into S_new[i] */
1270 bi.tb = tb;
1271 bi.bi_bh = S_new[i];
1272 bi.bi_parent = NULL;
1273 bi.bi_position = 0;
1274 leaf_insert_into_buf(&bi,
1275 item_pos - n +
1276 snum[i] - 1, ih,
1277 body, zeros_num);
1278
1279 zeros_num = tb->insert_size[0] = 0;
1280 }
1281 }
826 1282
1283 else { /* new item or it part don't falls into S_new[i] */
827 1284
828 n = B_NR_ITEMS(tbS0); 1285 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
829 1286 snum[i], sbytes[i], S_new[i]);
830 switch (flag) {
831 case M_INSERT: /* insert item */
832
833 if ( n - snum[i] < item_pos )
834 { /* new item or it's part falls to first new node S_new[i]*/
835 if ( item_pos == n - snum[i] + 1 && sbytes[i] != -1 )
836 { /* part of new item falls into S_new[i] */
837 int old_key_comp, old_len, r_zeros_number;
838 const char * r_body;
839 int version;
840
841 /* Move snum[i]-1 items from S[0] to S_new[i] */
842 leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i] - 1, -1, S_new[i]);
843 /* Remember key component and item length */
844 version = ih_version (ih);
845 old_key_comp = le_ih_k_offset( ih );
846 old_len = ih_item_len(ih);
847
848 /* Calculate key component and item length to insert into S_new[i] */
849 set_le_ih_k_offset( ih,
850 le_ih_k_offset(ih) + ((old_len - sbytes[i] )<<(is_indirect_le_ih(ih)?tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT:0)) );
851
852 put_ih_item_len( ih, sbytes[i] );
853
854 /* Insert part of the item into S_new[i] before 0-th item */
855 bi.tb = tb;
856 bi.bi_bh = S_new[i];
857 bi.bi_parent = NULL;
858 bi.bi_position = 0;
859
860 if ( (old_len - sbytes[i]) > zeros_num ) {
861 r_zeros_number = 0;
862 r_body = body + (old_len - sbytes[i]) - zeros_num;
863 }
864 else {
865 r_body = body;
866 r_zeros_number = zeros_num - (old_len - sbytes[i]);
867 zeros_num -= r_zeros_number;
868 }
869
870 leaf_insert_into_buf (&bi, 0, ih, r_body, r_zeros_number);
871
872 /* Calculate key component and item length to insert into S[i] */
873 set_le_ih_k_offset( ih, old_key_comp );
874 put_ih_item_len( ih, old_len - sbytes[i] );
875 tb->insert_size[0] -= sbytes[i];
876 }
877 else /* whole new item falls into S_new[i] */
878 {
879 /* Shift snum[0] - 1 items to S_new[i] (sbytes[i] of split item) */
880 leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i] - 1, sbytes[i], S_new[i]);
881
882 /* Insert new item into S_new[i] */
883 bi.tb = tb;
884 bi.bi_bh = S_new[i];
885 bi.bi_parent = NULL;
886 bi.bi_position = 0;
887 leaf_insert_into_buf (&bi, item_pos - n + snum[i] - 1, ih, body, zeros_num);
888
889 zeros_num = tb->insert_size[0] = 0;
890 }
891 }
892
893 else /* new item or it part don't falls into S_new[i] */
894 {
895 leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i], S_new[i]);
896 }
897 break;
898
899 case M_PASTE: /* append item */
900
901 if ( n - snum[i] <= item_pos ) /* pasted item or part if it falls to S_new[i] */
902 {
903 if ( item_pos == n - snum[i] && sbytes[i] != -1 )
904 { /* we must shift part of the appended item */
905 struct item_head * aux_ih;
906
907 RFALSE( ih, "PAP-12210: ih must be 0");
908
909 if ( is_direntry_le_ih (aux_ih = B_N_PITEM_HEAD(tbS0,item_pos))) {
910 /* we append to directory item */
911
912 int entry_count;
913
914 entry_count = ih_entry_count(aux_ih);
915
916 if ( entry_count - sbytes[i] < pos_in_item && pos_in_item <= entry_count ) {
917 /* new directory entry falls into S_new[i] */
918
919 RFALSE( ! tb->insert_size[0],
920 "PAP-12215: insert_size is already 0");
921 RFALSE( sbytes[i] - 1 >= entry_count,
922 "PAP-12220: there are no so much entries (%d), only %d",
923 sbytes[i] - 1, entry_count);
924
925 /* Shift snum[i]-1 items in whole. Shift sbytes[i] directory entries from directory item number snum[i] */
926 leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i]-1, S_new[i]);
927 /* Paste given directory entry to directory item */
928 bi.tb = tb;
929 bi.bi_bh = S_new[i];
930 bi.bi_parent = NULL;
931 bi.bi_position = 0;
932 leaf_paste_in_buffer (&bi, 0, pos_in_item - entry_count + sbytes[i] - 1,
933 tb->insert_size[0], body,zeros_num);
934 /* paste new directory entry */
935 leaf_paste_entries (
936 bi.bi_bh, 0, pos_in_item - entry_count + sbytes[i] - 1,
937 1, (struct reiserfs_de_head *)body, body + DEH_SIZE,
938 tb->insert_size[0]
939 );
940 tb->insert_size[0] = 0;
941 pos_in_item++;
942 } else { /* new directory entry doesn't fall into S_new[i] */
943 leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i], S_new[i]);
944 } 1287 }
945 } 1288 break;
946 else /* regular object */ 1289
947 { 1290 case M_PASTE: /* append item */
948 int n_shift, n_rem, r_zeros_number; 1291
949 const char * r_body; 1292 if (n - snum[i] <= item_pos) { /* pasted item or part if it falls to S_new[i] */
950 1293 if (item_pos == n - snum[i] && sbytes[i] != -1) { /* we must shift part of the appended item */
951 RFALSE( pos_in_item != ih_item_len(B_N_PITEM_HEAD(tbS0,item_pos)) || 1294 struct item_head *aux_ih;
952 tb->insert_size[0] <= 0, 1295
953 "PAP-12225: item too short or insert_size <= 0"); 1296 RFALSE(ih, "PAP-12210: ih must be 0");
954 1297
955 /* Calculate number of bytes which must be shifted from appended item */ 1298 if (is_direntry_le_ih
956 n_shift = sbytes[i] - tb->insert_size[0]; 1299 (aux_ih =
957 if ( n_shift < 0 ) 1300 B_N_PITEM_HEAD(tbS0, item_pos))) {
958 n_shift = 0; 1301 /* we append to directory item */
959 leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], n_shift, S_new[i]); 1302
960 1303 int entry_count;
961 /* Calculate number of bytes which must remain in body after append to S_new[i] */ 1304
962 n_rem = tb->insert_size[0] - sbytes[i]; 1305 entry_count =
963 if ( n_rem < 0 ) 1306 ih_entry_count(aux_ih);
964 n_rem = 0; 1307
965 /* Append part of body into S_new[0] */ 1308 if (entry_count - sbytes[i] <
966 bi.tb = tb; 1309 pos_in_item
967 bi.bi_bh = S_new[i]; 1310 && pos_in_item <=
968 bi.bi_parent = NULL; 1311 entry_count) {
969 bi.bi_position = 0; 1312 /* new directory entry falls into S_new[i] */
1313
1314 RFALSE(!tb->
1315 insert_size[0],
1316 "PAP-12215: insert_size is already 0");
1317 RFALSE(sbytes[i] - 1 >=
1318 entry_count,
1319 "PAP-12220: there are no so much entries (%d), only %d",
1320 sbytes[i] - 1,
1321 entry_count);
1322
1323 /* Shift snum[i]-1 items in whole. Shift sbytes[i] directory entries from directory item number snum[i] */
1324 leaf_move_items
1325 (LEAF_FROM_S_TO_SNEW,
1326 tb, snum[i],
1327 sbytes[i] - 1,
1328 S_new[i]);
1329 /* Paste given directory entry to directory item */
1330 bi.tb = tb;
1331 bi.bi_bh = S_new[i];
1332 bi.bi_parent = NULL;
1333 bi.bi_position = 0;
1334 leaf_paste_in_buffer
1335 (&bi, 0,
1336 pos_in_item -
1337 entry_count +
1338 sbytes[i] - 1,
1339 tb->insert_size[0],
1340 body, zeros_num);
1341 /* paste new directory entry */
1342 leaf_paste_entries(bi.
1343 bi_bh,
1344 0,
1345 pos_in_item
1346 -
1347 entry_count
1348 +
1349 sbytes
1350 [i] -
1351 1, 1,
1352 (struct
1353 reiserfs_de_head
1354 *)
1355 body,
1356 body
1357 +
1358 DEH_SIZE,
1359 tb->
1360 insert_size
1361 [0]
1362 );
1363 tb->insert_size[0] = 0;
1364 pos_in_item++;
1365 } else { /* new directory entry doesn't fall into S_new[i] */
1366 leaf_move_items
1367 (LEAF_FROM_S_TO_SNEW,
1368 tb, snum[i],
1369 sbytes[i],
1370 S_new[i]);
1371 }
1372 } else { /* regular object */
1373
1374 int n_shift, n_rem,
1375 r_zeros_number;
1376 const char *r_body;
1377
1378 RFALSE(pos_in_item !=
1379 ih_item_len
1380 (B_N_PITEM_HEAD
1381 (tbS0, item_pos))
1382 || tb->insert_size[0] <=
1383 0,
1384 "PAP-12225: item too short or insert_size <= 0");
1385
1386 /* Calculate number of bytes which must be shifted from appended item */
1387 n_shift =
1388 sbytes[i] -
1389 tb->insert_size[0];
1390 if (n_shift < 0)
1391 n_shift = 0;
1392 leaf_move_items
1393 (LEAF_FROM_S_TO_SNEW, tb,
1394 snum[i], n_shift,
1395 S_new[i]);
1396
1397 /* Calculate number of bytes which must remain in body after append to S_new[i] */
1398 n_rem =
1399 tb->insert_size[0] -
1400 sbytes[i];
1401 if (n_rem < 0)
1402 n_rem = 0;
1403 /* Append part of body into S_new[0] */
1404 bi.tb = tb;
1405 bi.bi_bh = S_new[i];
1406 bi.bi_parent = NULL;
1407 bi.bi_position = 0;
1408
1409 if (n_rem > zeros_num) {
1410 r_zeros_number = 0;
1411 r_body =
1412 body + n_rem -
1413 zeros_num;
1414 } else {
1415 r_body = body;
1416 r_zeros_number =
1417 zeros_num - n_rem;
1418 zeros_num -=
1419 r_zeros_number;
1420 }
1421
1422 leaf_paste_in_buffer(&bi, 0,
1423 n_shift,
1424 tb->
1425 insert_size
1426 [0] -
1427 n_rem,
1428 r_body,
1429 r_zeros_number);
1430 {
1431 struct item_head *tmp;
1432
1433 tmp =
1434 B_N_PITEM_HEAD(S_new
1435 [i],
1436 0);
1437 if (is_indirect_le_ih
1438 (tmp)) {
1439 set_ih_free_space
1440 (tmp, 0);
1441 set_le_ih_k_offset
1442 (tmp,
1443 le_ih_k_offset
1444 (tmp) +
1445 (n_rem <<
1446 (tb->
1447 tb_sb->
1448 s_blocksize_bits
1449 -
1450 UNFM_P_SHIFT)));
1451 } else {
1452 set_le_ih_k_offset
1453 (tmp,
1454 le_ih_k_offset
1455 (tmp) +
1456 n_rem);
1457 }
1458 }
1459
1460 tb->insert_size[0] = n_rem;
1461 if (!n_rem)
1462 pos_in_item++;
1463 }
1464 } else
1465 /* item falls wholly into S_new[i] */
1466 {
1467 int ret_val;
1468 struct item_head *pasted;
970 1469
971 if ( n_rem > zeros_num ) { 1470#ifdef CONFIG_REISERFS_CHECK
972 r_zeros_number = 0; 1471 struct item_head *ih =
973 r_body = body + n_rem - zeros_num; 1472 B_N_PITEM_HEAD(tbS0, item_pos);
974 } 1473
975 else { 1474 if (!is_direntry_le_ih(ih)
976 r_body = body; 1475 && (pos_in_item != ih_item_len(ih)
977 r_zeros_number = zeros_num - n_rem; 1476 || tb->insert_size[0] <= 0))
978 zeros_num -= r_zeros_number; 1477 reiserfs_panic(tb->tb_sb,
1478 "PAP-12235: balance_leaf: pos_in_item must be equal to ih_item_len");
1479#endif /* CONFIG_REISERFS_CHECK */
1480
1481 ret_val =
1482 leaf_move_items(LEAF_FROM_S_TO_SNEW,
1483 tb, snum[i],
1484 sbytes[i],
1485 S_new[i]);
1486
1487 RFALSE(ret_val,
1488 "PAP-12240: unexpected value returned by leaf_move_items (%d)",
1489 ret_val);
1490
1491 /* paste into item */
1492 bi.tb = tb;
1493 bi.bi_bh = S_new[i];
1494 bi.bi_parent = NULL;
1495 bi.bi_position = 0;
1496 leaf_paste_in_buffer(&bi,
1497 item_pos - n +
1498 snum[i],
1499 pos_in_item,
1500 tb->insert_size[0],
1501 body, zeros_num);
1502
1503 pasted =
1504 B_N_PITEM_HEAD(S_new[i],
1505 item_pos - n +
1506 snum[i]);
1507 if (is_direntry_le_ih(pasted)) {
1508 leaf_paste_entries(bi.bi_bh,
1509 item_pos -
1510 n + snum[i],
1511 pos_in_item,
1512 1,
1513 (struct
1514 reiserfs_de_head
1515 *)body,
1516 body +
1517 DEH_SIZE,
1518 tb->
1519 insert_size
1520 [0]
1521 );
1522 }
1523
1524 /* if we paste to indirect item update ih_free_space */
1525 if (is_indirect_le_ih(pasted))
1526 set_ih_free_space(pasted, 0);
1527 zeros_num = tb->insert_size[0] = 0;
1528 }
979 } 1529 }
980 1530
981 leaf_paste_in_buffer(&bi, 0, n_shift, tb->insert_size[0]-n_rem, r_body,r_zeros_number); 1531 else { /* pasted item doesn't fall into S_new[i] */
982 {
983 struct item_head * tmp;
984
985 tmp = B_N_PITEM_HEAD(S_new[i],0);
986 if (is_indirect_le_ih (tmp)) {
987 set_ih_free_space (tmp, 0);
988 set_le_ih_k_offset( tmp, le_ih_k_offset(tmp) +
989 (n_rem << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT)));
990 } else {
991 set_le_ih_k_offset( tmp, le_ih_k_offset(tmp) +
992 n_rem );
993 }
994 }
995 1532
996 tb->insert_size[0] = n_rem; 1533 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
997 if ( ! n_rem ) 1534 snum[i], sbytes[i], S_new[i]);
998 pos_in_item++; 1535 }
999 } 1536 break;
1537 default: /* cases d and t */
1538 reiserfs_panic(tb->tb_sb,
1539 "PAP-12245: balance_leaf: blknum > 2: unexpectable mode: %s(%d)",
1540 (flag ==
1541 M_DELETE) ? "DELETE" : ((flag ==
1542 M_CUT) ? "CUT"
1543 : "UNKNOWN"),
1544 flag);
1000 } 1545 }
1001 else
1002 /* item falls wholly into S_new[i] */
1003 {
1004 int ret_val;
1005 struct item_head * pasted;
1006 1546
1007#ifdef CONFIG_REISERFS_CHECK 1547 memcpy(insert_key + i, B_N_PKEY(S_new[i], 0), KEY_SIZE);
1008 struct item_head * ih = B_N_PITEM_HEAD(tbS0,item_pos); 1548 insert_ptr[i] = S_new[i];
1009 1549
1010 if ( ! is_direntry_le_ih(ih) && (pos_in_item != ih_item_len(ih) || 1550 RFALSE(!buffer_journaled(S_new[i])
1011 tb->insert_size[0] <= 0) ) 1551 || buffer_journal_dirty(S_new[i])
1012 reiserfs_panic (tb->tb_sb, "PAP-12235: balance_leaf: pos_in_item must be equal to ih_item_len"); 1552 || buffer_dirty(S_new[i]), "PAP-12247: S_new[%d] : (%b)",
1013#endif /* CONFIG_REISERFS_CHECK */ 1553 i, S_new[i]);
1014
1015 ret_val = leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i], S_new[i]);
1016
1017 RFALSE( ret_val,
1018 "PAP-12240: unexpected value returned by leaf_move_items (%d)",
1019 ret_val);
1020
1021 /* paste into item */
1022 bi.tb = tb;
1023 bi.bi_bh = S_new[i];
1024 bi.bi_parent = NULL;
1025 bi.bi_position = 0;
1026 leaf_paste_in_buffer(&bi, item_pos - n + snum[i], pos_in_item, tb->insert_size[0], body, zeros_num);
1027
1028 pasted = B_N_PITEM_HEAD(S_new[i], item_pos - n + snum[i]);
1029 if (is_direntry_le_ih (pasted))
1030 {
1031 leaf_paste_entries (
1032 bi.bi_bh, item_pos - n + snum[i], pos_in_item, 1,
1033 (struct reiserfs_de_head *)body, body + DEH_SIZE, tb->insert_size[0]
1034 );
1035 }
1036
1037 /* if we paste to indirect item update ih_free_space */
1038 if (is_indirect_le_ih (pasted))
1039 set_ih_free_space (pasted, 0);
1040 zeros_num = tb->insert_size[0] = 0;
1041 }
1042 }
1043
1044 else /* pasted item doesn't fall into S_new[i] */
1045 {
1046 leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i], S_new[i]);
1047 }
1048 break;
1049 default: /* cases d and t */
1050 reiserfs_panic (tb->tb_sb, "PAP-12245: balance_leaf: blknum > 2: unexpectable mode: %s(%d)",
1051 (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
1052 } 1554 }
1053 1555
1054 memcpy (insert_key + i,B_N_PKEY(S_new[i],0),KEY_SIZE); 1556 /* if the affected item was not wholly shifted then we perform all necessary operations on that part or whole of the
1055 insert_ptr[i] = S_new[i]; 1557 affected item which remains in S */
1056 1558 if (0 <= item_pos && item_pos < tb->s0num) { /* if we must insert or append into buffer S[0] */
1057 RFALSE (!buffer_journaled (S_new [i]) || buffer_journal_dirty (S_new [i]) || 1559
1058 buffer_dirty (S_new [i]), 1560 switch (flag) {
1059 "PAP-12247: S_new[%d] : (%b)", i, S_new[i]); 1561 case M_INSERT: /* insert item into S[0] */
1060 } 1562 bi.tb = tb;
1061 1563 bi.bi_bh = tbS0;
1062 /* if the affected item was not wholly shifted then we perform all necessary operations on that part or whole of the 1564 bi.bi_parent = PATH_H_PPARENT(tb->tb_path, 0);
1063 affected item which remains in S */ 1565 bi.bi_position = PATH_H_POSITION(tb->tb_path, 1);
1064 if ( 0 <= item_pos && item_pos < tb->s0num ) 1566 leaf_insert_into_buf(&bi, item_pos, ih, body,
1065 { /* if we must insert or append into buffer S[0] */ 1567 zeros_num);
1066 1568
1067 switch (flag) 1569 /* If we insert the first key change the delimiting key */
1068 { 1570 if (item_pos == 0) {
1069 case M_INSERT: /* insert item into S[0] */ 1571 if (tb->CFL[0]) /* can be 0 in reiserfsck */
1070 bi.tb = tb; 1572 replace_key(tb, tb->CFL[0], tb->lkey[0],
1071 bi.bi_bh = tbS0; 1573 tbS0, 0);
1072 bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0);
1073 bi.bi_position = PATH_H_POSITION (tb->tb_path, 1);
1074 leaf_insert_into_buf (&bi, item_pos, ih, body, zeros_num);
1075
1076 /* If we insert the first key change the delimiting key */
1077 if( item_pos == 0 ) {
1078 if (tb->CFL[0]) /* can be 0 in reiserfsck */
1079 replace_key(tb, tb->CFL[0], tb->lkey[0],tbS0,0);
1080
1081 }
1082 break;
1083
1084 case M_PASTE: { /* append item in S[0] */
1085 struct item_head * pasted;
1086
1087 pasted = B_N_PITEM_HEAD (tbS0, item_pos);
1088 /* when directory, may be new entry already pasted */
1089 if (is_direntry_le_ih (pasted)) {
1090 if ( pos_in_item >= 0 &&
1091 pos_in_item <= ih_entry_count(pasted) ) {
1092
1093 RFALSE( ! tb->insert_size[0],
1094 "PAP-12260: insert_size is 0 already");
1095
1096 /* prepare space */
1097 bi.tb = tb;
1098 bi.bi_bh = tbS0;
1099 bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0);
1100 bi.bi_position = PATH_H_POSITION (tb->tb_path, 1);
1101 leaf_paste_in_buffer(&bi, item_pos, pos_in_item, tb->insert_size[0], body, zeros_num);
1102
1103 /* paste entry */
1104 leaf_paste_entries (
1105 bi.bi_bh, item_pos, pos_in_item, 1, (struct reiserfs_de_head *)body,
1106 body + DEH_SIZE, tb->insert_size[0]
1107 );
1108 if ( ! item_pos && ! pos_in_item ) {
1109 RFALSE( !tb->CFL[0] || !tb->L[0],
1110 "PAP-12270: CFL[0]/L[0] must be specified");
1111 if (tb->CFL[0]) {
1112 replace_key(tb, tb->CFL[0], tb->lkey[0],tbS0,0);
1113 1574
1114 } 1575 }
1115 } 1576 break;
1116 tb->insert_size[0] = 0; 1577
1117 } 1578 case M_PASTE:{ /* append item in S[0] */
1118 } else { /* regular object */ 1579 struct item_head *pasted;
1119 if ( pos_in_item == ih_item_len(pasted) ) { 1580
1120 1581 pasted = B_N_PITEM_HEAD(tbS0, item_pos);
1121 RFALSE( tb->insert_size[0] <= 0, 1582 /* when directory, may be new entry already pasted */
1122 "PAP-12275: insert size must not be %d", 1583 if (is_direntry_le_ih(pasted)) {
1123 tb->insert_size[0]); 1584 if (pos_in_item >= 0 &&
1124 bi.tb = tb; 1585 pos_in_item <=
1125 bi.bi_bh = tbS0; 1586 ih_entry_count(pasted)) {
1126 bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0); 1587
1127 bi.bi_position = PATH_H_POSITION (tb->tb_path, 1); 1588 RFALSE(!tb->insert_size[0],
1128 leaf_paste_in_buffer (&bi, item_pos, pos_in_item, tb->insert_size[0], body, zeros_num); 1589 "PAP-12260: insert_size is 0 already");
1129 1590
1130 if (is_indirect_le_ih (pasted)) { 1591 /* prepare space */
1592 bi.tb = tb;
1593 bi.bi_bh = tbS0;
1594 bi.bi_parent =
1595 PATH_H_PPARENT(tb->tb_path,
1596 0);
1597 bi.bi_position =
1598 PATH_H_POSITION(tb->tb_path,
1599 1);
1600 leaf_paste_in_buffer(&bi,
1601 item_pos,
1602 pos_in_item,
1603 tb->
1604 insert_size
1605 [0], body,
1606 zeros_num);
1607
1608 /* paste entry */
1609 leaf_paste_entries(bi.bi_bh,
1610 item_pos,
1611 pos_in_item,
1612 1,
1613 (struct
1614 reiserfs_de_head
1615 *)body,
1616 body +
1617 DEH_SIZE,
1618 tb->
1619 insert_size
1620 [0]
1621 );
1622 if (!item_pos && !pos_in_item) {
1623 RFALSE(!tb->CFL[0]
1624 || !tb->L[0],
1625 "PAP-12270: CFL[0]/L[0] must be specified");
1626 if (tb->CFL[0]) {
1627 replace_key(tb,
1628 tb->
1629 CFL
1630 [0],
1631 tb->
1632 lkey
1633 [0],
1634 tbS0,
1635 0);
1636
1637 }
1638 }
1639 tb->insert_size[0] = 0;
1640 }
1641 } else { /* regular object */
1642 if (pos_in_item == ih_item_len(pasted)) {
1643
1644 RFALSE(tb->insert_size[0] <= 0,
1645 "PAP-12275: insert size must not be %d",
1646 tb->insert_size[0]);
1647 bi.tb = tb;
1648 bi.bi_bh = tbS0;
1649 bi.bi_parent =
1650 PATH_H_PPARENT(tb->tb_path,
1651 0);
1652 bi.bi_position =
1653 PATH_H_POSITION(tb->tb_path,
1654 1);
1655 leaf_paste_in_buffer(&bi,
1656 item_pos,
1657 pos_in_item,
1658 tb->
1659 insert_size
1660 [0], body,
1661 zeros_num);
1662
1663 if (is_indirect_le_ih(pasted)) {
1131#if 0 1664#if 0
1132 RFALSE( tb->insert_size[0] != UNFM_P_SIZE, 1665 RFALSE(tb->
1133 "PAP-12280: insert_size for indirect item must be %d, not %d", 1666 insert_size[0] !=
1134 UNFM_P_SIZE, tb->insert_size[0]); 1667 UNFM_P_SIZE,
1668 "PAP-12280: insert_size for indirect item must be %d, not %d",
1669 UNFM_P_SIZE,
1670 tb->
1671 insert_size[0]);
1135#endif 1672#endif
1136 set_ih_free_space (pasted, 0); 1673 set_ih_free_space
1137 } 1674 (pasted, 0);
1138 tb->insert_size[0] = 0; 1675 }
1139 } 1676 tb->insert_size[0] = 0;
1140 1677 }
1141#ifdef CONFIG_REISERFS_CHECK 1678#ifdef CONFIG_REISERFS_CHECK
1142 else { 1679 else {
1143 if ( tb->insert_size[0] ) { 1680 if (tb->insert_size[0]) {
1144 print_cur_tb ("12285"); 1681 print_cur_tb("12285");
1145 reiserfs_panic (tb->tb_sb, "PAP-12285: balance_leaf: insert_size must be 0 (%d)", tb->insert_size[0]); 1682 reiserfs_panic(tb->
1146 } 1683 tb_sb,
1684 "PAP-12285: balance_leaf: insert_size must be 0 (%d)",
1685 tb->
1686 insert_size
1687 [0]);
1688 }
1689 }
1690#endif /* CONFIG_REISERFS_CHECK */
1691
1692 }
1693 } /* case M_PASTE: */
1147 } 1694 }
1148#endif /* CONFIG_REISERFS_CHECK */
1149
1150 }
1151 } /* case M_PASTE: */
1152 } 1695 }
1153 }
1154
1155#ifdef CONFIG_REISERFS_CHECK 1696#ifdef CONFIG_REISERFS_CHECK
1156 if ( flag == M_PASTE && tb->insert_size[0] ) { 1697 if (flag == M_PASTE && tb->insert_size[0]) {
1157 print_cur_tb ("12290"); 1698 print_cur_tb("12290");
1158 reiserfs_panic (tb->tb_sb, "PAP-12290: balance_leaf: insert_size is still not 0 (%d)", tb->insert_size[0]); 1699 reiserfs_panic(tb->tb_sb,
1159 } 1700 "PAP-12290: balance_leaf: insert_size is still not 0 (%d)",
1160#endif /* CONFIG_REISERFS_CHECK */ 1701 tb->insert_size[0]);
1161 1702 }
1162 return 0; 1703#endif /* CONFIG_REISERFS_CHECK */
1163} /* Leaf level of the tree is balanced (end of balance_leaf) */
1164
1165 1704
1705 return 0;
1706} /* Leaf level of the tree is balanced (end of balance_leaf) */
1166 1707
1167/* Make empty node */ 1708/* Make empty node */
1168void make_empty_node (struct buffer_info * bi) 1709void make_empty_node(struct buffer_info *bi)
1169{ 1710{
1170 struct block_head * blkh; 1711 struct block_head *blkh;
1171 1712
1172 RFALSE( bi->bi_bh == NULL, "PAP-12295: pointer to the buffer is NULL"); 1713 RFALSE(bi->bi_bh == NULL, "PAP-12295: pointer to the buffer is NULL");
1173 1714
1174 blkh = B_BLK_HEAD(bi->bi_bh); 1715 blkh = B_BLK_HEAD(bi->bi_bh);
1175 set_blkh_nr_item( blkh, 0 ); 1716 set_blkh_nr_item(blkh, 0);
1176 set_blkh_free_space( blkh, MAX_CHILD_SIZE(bi->bi_bh) ); 1717 set_blkh_free_space(blkh, MAX_CHILD_SIZE(bi->bi_bh));
1177 1718
1178 if (bi->bi_parent) 1719 if (bi->bi_parent)
1179 B_N_CHILD (bi->bi_parent, bi->bi_position)->dc_size = 0; /* Endian safe if 0 */ 1720 B_N_CHILD(bi->bi_parent, bi->bi_position)->dc_size = 0; /* Endian safe if 0 */
1180} 1721}
1181 1722
1182
1183/* Get first empty buffer */ 1723/* Get first empty buffer */
1184struct buffer_head * get_FEB (struct tree_balance * tb) 1724struct buffer_head *get_FEB(struct tree_balance *tb)
1185{ 1725{
1186 int i; 1726 int i;
1187 struct buffer_head * first_b; 1727 struct buffer_head *first_b;
1188 struct buffer_info bi; 1728 struct buffer_info bi;
1189
1190 for (i = 0; i < MAX_FEB_SIZE; i ++)
1191 if (tb->FEB[i] != 0)
1192 break;
1193
1194 if (i == MAX_FEB_SIZE)
1195 reiserfs_panic(tb->tb_sb, "vs-12300: get_FEB: FEB list is empty");
1196
1197 bi.tb = tb;
1198 bi.bi_bh = first_b = tb->FEB[i];
1199 bi.bi_parent = NULL;
1200 bi.bi_position = 0;
1201 make_empty_node (&bi);
1202 set_buffer_uptodate(first_b);
1203 tb->FEB[i] = NULL;
1204 tb->used[i] = first_b;
1205
1206 return(first_b);
1207}
1208 1729
1730 for (i = 0; i < MAX_FEB_SIZE; i++)
1731 if (tb->FEB[i] != 0)
1732 break;
1733
1734 if (i == MAX_FEB_SIZE)
1735 reiserfs_panic(tb->tb_sb,
1736 "vs-12300: get_FEB: FEB list is empty");
1737
1738 bi.tb = tb;
1739 bi.bi_bh = first_b = tb->FEB[i];
1740 bi.bi_parent = NULL;
1741 bi.bi_position = 0;
1742 make_empty_node(&bi);
1743 set_buffer_uptodate(first_b);
1744 tb->FEB[i] = NULL;
1745 tb->used[i] = first_b;
1746
1747 return (first_b);
1748}
1209 1749
1210/* This is now used because reiserfs_free_block has to be able to 1750/* This is now used because reiserfs_free_block has to be able to
1211** schedule. 1751** schedule.
1212*/ 1752*/
1213static void store_thrown (struct tree_balance * tb, struct buffer_head * bh) 1753static void store_thrown(struct tree_balance *tb, struct buffer_head *bh)
1214{ 1754{
1215 int i; 1755 int i;
1216 1756
1217 if (buffer_dirty (bh)) 1757 if (buffer_dirty(bh))
1218 reiserfs_warning (tb->tb_sb, "store_thrown deals with dirty buffer"); 1758 reiserfs_warning(tb->tb_sb,
1219 for (i = 0; i < sizeof (tb->thrown)/sizeof (tb->thrown[0]); i ++) 1759 "store_thrown deals with dirty buffer");
1220 if (!tb->thrown[i]) { 1760 for (i = 0; i < sizeof(tb->thrown) / sizeof(tb->thrown[0]); i++)
1221 tb->thrown[i] = bh; 1761 if (!tb->thrown[i]) {
1222 get_bh(bh) ; /* free_thrown puts this */ 1762 tb->thrown[i] = bh;
1223 return; 1763 get_bh(bh); /* free_thrown puts this */
1224 } 1764 return;
1225 reiserfs_warning (tb->tb_sb, "store_thrown: too many thrown buffers"); 1765 }
1766 reiserfs_warning(tb->tb_sb, "store_thrown: too many thrown buffers");
1226} 1767}
1227 1768
1228static void free_thrown(struct tree_balance *tb) { 1769static void free_thrown(struct tree_balance *tb)
1229 int i ; 1770{
1230 b_blocknr_t blocknr ; 1771 int i;
1231 for (i = 0; i < sizeof (tb->thrown)/sizeof (tb->thrown[0]); i++) { 1772 b_blocknr_t blocknr;
1232 if (tb->thrown[i]) { 1773 for (i = 0; i < sizeof(tb->thrown) / sizeof(tb->thrown[0]); i++) {
1233 blocknr = tb->thrown[i]->b_blocknr ; 1774 if (tb->thrown[i]) {
1234 if (buffer_dirty (tb->thrown[i])) 1775 blocknr = tb->thrown[i]->b_blocknr;
1235 reiserfs_warning (tb->tb_sb, 1776 if (buffer_dirty(tb->thrown[i]))
1236 "free_thrown deals with dirty buffer %d", 1777 reiserfs_warning(tb->tb_sb,
1237 blocknr); 1778 "free_thrown deals with dirty buffer %d",
1238 brelse(tb->thrown[i]) ; /* incremented in store_thrown */ 1779 blocknr);
1239 reiserfs_free_block (tb->transaction_handle, NULL, blocknr, 0); 1780 brelse(tb->thrown[i]); /* incremented in store_thrown */
1781 reiserfs_free_block(tb->transaction_handle, NULL,
1782 blocknr, 0);
1783 }
1240 } 1784 }
1241 }
1242} 1785}
1243 1786
1244void reiserfs_invalidate_buffer (struct tree_balance * tb, struct buffer_head * bh) 1787void reiserfs_invalidate_buffer(struct tree_balance *tb, struct buffer_head *bh)
1245{ 1788{
1246 struct block_head *blkh; 1789 struct block_head *blkh;
1247 blkh = B_BLK_HEAD(bh); 1790 blkh = B_BLK_HEAD(bh);
1248 set_blkh_level( blkh, FREE_LEVEL ); 1791 set_blkh_level(blkh, FREE_LEVEL);
1249 set_blkh_nr_item( blkh, 0 ); 1792 set_blkh_nr_item(blkh, 0);
1250 1793
1251 clear_buffer_dirty(bh); 1794 clear_buffer_dirty(bh);
1252 store_thrown (tb, bh); 1795 store_thrown(tb, bh);
1253} 1796}
1254 1797
1255/* Replace n_dest'th key in buffer dest by n_src'th key of buffer src.*/ 1798/* Replace n_dest'th key in buffer dest by n_src'th key of buffer src.*/
1256void replace_key (struct tree_balance * tb, struct buffer_head * dest, int n_dest, 1799void replace_key(struct tree_balance *tb, struct buffer_head *dest, int n_dest,
1257 struct buffer_head * src, int n_src) 1800 struct buffer_head *src, int n_src)
1258{ 1801{
1259 1802
1260 RFALSE( dest == NULL || src == NULL, 1803 RFALSE(dest == NULL || src == NULL,
1261 "vs-12305: source or destination buffer is 0 (src=%p, dest=%p)", 1804 "vs-12305: source or destination buffer is 0 (src=%p, dest=%p)",
1262 src, dest); 1805 src, dest);
1263 RFALSE( ! B_IS_KEYS_LEVEL (dest), 1806 RFALSE(!B_IS_KEYS_LEVEL(dest),
1264 "vs-12310: invalid level (%z) for destination buffer. dest must be leaf", 1807 "vs-12310: invalid level (%z) for destination buffer. dest must be leaf",
1265 dest); 1808 dest);
1266 RFALSE( n_dest < 0 || n_src < 0, 1809 RFALSE(n_dest < 0 || n_src < 0,
1267 "vs-12315: src(%d) or dest(%d) key number < 0", n_src, n_dest); 1810 "vs-12315: src(%d) or dest(%d) key number < 0", n_src, n_dest);
1268 RFALSE( n_dest >= B_NR_ITEMS(dest) || n_src >= B_NR_ITEMS(src), 1811 RFALSE(n_dest >= B_NR_ITEMS(dest) || n_src >= B_NR_ITEMS(src),
1269 "vs-12320: src(%d(%d)) or dest(%d(%d)) key number is too big", 1812 "vs-12320: src(%d(%d)) or dest(%d(%d)) key number is too big",
1270 n_src, B_NR_ITEMS(src), n_dest, B_NR_ITEMS(dest)); 1813 n_src, B_NR_ITEMS(src), n_dest, B_NR_ITEMS(dest));
1271 1814
1272 if (B_IS_ITEMS_LEVEL (src)) 1815 if (B_IS_ITEMS_LEVEL(src))
1273 /* source buffer contains leaf node */ 1816 /* source buffer contains leaf node */
1274 memcpy (B_N_PDELIM_KEY(dest,n_dest), B_N_PITEM_HEAD(src,n_src), KEY_SIZE); 1817 memcpy(B_N_PDELIM_KEY(dest, n_dest), B_N_PITEM_HEAD(src, n_src),
1275 else 1818 KEY_SIZE);
1276 memcpy (B_N_PDELIM_KEY(dest,n_dest), B_N_PDELIM_KEY(src,n_src), KEY_SIZE); 1819 else
1277 1820 memcpy(B_N_PDELIM_KEY(dest, n_dest), B_N_PDELIM_KEY(src, n_src),
1278 do_balance_mark_internal_dirty (tb, dest, 0); 1821 KEY_SIZE);
1822
1823 do_balance_mark_internal_dirty(tb, dest, 0);
1279} 1824}
1280 1825
1281 1826int get_left_neighbor_position(struct tree_balance *tb, int h)
1282int get_left_neighbor_position (
1283 struct tree_balance * tb,
1284 int h
1285 )
1286{ 1827{
1287 int Sh_position = PATH_H_POSITION (tb->tb_path, h + 1); 1828 int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1);
1288 1829
1289 RFALSE( PATH_H_PPARENT (tb->tb_path, h) == 0 || tb->FL[h] == 0, 1830 RFALSE(PATH_H_PPARENT(tb->tb_path, h) == 0 || tb->FL[h] == 0,
1290 "vs-12325: FL[%d](%p) or F[%d](%p) does not exist", 1831 "vs-12325: FL[%d](%p) or F[%d](%p) does not exist",
1291 h, tb->FL[h], h, PATH_H_PPARENT (tb->tb_path, h)); 1832 h, tb->FL[h], h, PATH_H_PPARENT(tb->tb_path, h));
1292 1833
1293 if (Sh_position == 0) 1834 if (Sh_position == 0)
1294 return B_NR_ITEMS (tb->FL[h]); 1835 return B_NR_ITEMS(tb->FL[h]);
1295 else 1836 else
1296 return Sh_position - 1; 1837 return Sh_position - 1;
1297} 1838}
1298 1839
1299 1840int get_right_neighbor_position(struct tree_balance *tb, int h)
1300int get_right_neighbor_position (struct tree_balance * tb, int h)
1301{ 1841{
1302 int Sh_position = PATH_H_POSITION (tb->tb_path, h + 1); 1842 int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1);
1303 1843
1304 RFALSE( PATH_H_PPARENT (tb->tb_path, h) == 0 || tb->FR[h] == 0, 1844 RFALSE(PATH_H_PPARENT(tb->tb_path, h) == 0 || tb->FR[h] == 0,
1305 "vs-12330: F[%d](%p) or FR[%d](%p) does not exist", 1845 "vs-12330: F[%d](%p) or FR[%d](%p) does not exist",
1306 h, PATH_H_PPARENT (tb->tb_path, h), h, tb->FR[h]); 1846 h, PATH_H_PPARENT(tb->tb_path, h), h, tb->FR[h]);
1307 1847
1308 if (Sh_position == B_NR_ITEMS (PATH_H_PPARENT (tb->tb_path, h))) 1848 if (Sh_position == B_NR_ITEMS(PATH_H_PPARENT(tb->tb_path, h)))
1309 return 0; 1849 return 0;
1310 else 1850 else
1311 return Sh_position + 1; 1851 return Sh_position + 1;
1312} 1852}
1313 1853
1314
1315#ifdef CONFIG_REISERFS_CHECK 1854#ifdef CONFIG_REISERFS_CHECK
1316 1855
1317int is_reusable (struct super_block * s, b_blocknr_t block, int bit_value); 1856int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value);
1318static void check_internal_node (struct super_block * s, struct buffer_head * bh, char * mes) 1857static void check_internal_node(struct super_block *s, struct buffer_head *bh,
1858 char *mes)
1319{ 1859{
1320 struct disk_child * dc; 1860 struct disk_child *dc;
1321 int i; 1861 int i;
1322
1323 RFALSE( !bh, "PAP-12336: bh == 0");
1324
1325 if (!bh || !B_IS_IN_TREE (bh))
1326 return;
1327
1328 RFALSE( !buffer_dirty (bh) &&
1329 !(buffer_journaled(bh) || buffer_journal_dirty(bh)),
1330 "PAP-12337: buffer (%b) must be dirty", bh);
1331 dc = B_N_CHILD (bh, 0);
1332
1333 for (i = 0; i <= B_NR_ITEMS (bh); i ++, dc ++) {
1334 if (!is_reusable (s, dc_block_number(dc), 1) ) {
1335 print_cur_tb (mes);
1336 reiserfs_panic (s, "PAP-12338: check_internal_node: invalid child pointer %y in %b", dc, bh);
1337 }
1338 }
1339}
1340 1862
1863 RFALSE(!bh, "PAP-12336: bh == 0");
1341 1864
1342static int locked_or_not_in_tree (struct buffer_head * bh, char * which) 1865 if (!bh || !B_IS_IN_TREE(bh))
1343{ 1866 return;
1344 if ( (!buffer_journal_prepared (bh) && buffer_locked (bh)) ||
1345 !B_IS_IN_TREE (bh) ) {
1346 reiserfs_warning (NULL, "vs-12339: locked_or_not_in_tree: %s (%b)",
1347 which, bh);
1348 return 1;
1349 }
1350 return 0;
1351}
1352 1867
1868 RFALSE(!buffer_dirty(bh) &&
1869 !(buffer_journaled(bh) || buffer_journal_dirty(bh)),
1870 "PAP-12337: buffer (%b) must be dirty", bh);
1871 dc = B_N_CHILD(bh, 0);
1353 1872
1354static int check_before_balancing (struct tree_balance * tb) 1873 for (i = 0; i <= B_NR_ITEMS(bh); i++, dc++) {
1355{ 1874 if (!is_reusable(s, dc_block_number(dc), 1)) {
1356 int retval = 0; 1875 print_cur_tb(mes);
1357 1876 reiserfs_panic(s,
1358 if ( cur_tb ) { 1877 "PAP-12338: check_internal_node: invalid child pointer %y in %b",
1359 reiserfs_panic (tb->tb_sb, "vs-12335: check_before_balancing: " 1878 dc, bh);
1360 "suspect that schedule occurred based on cur_tb not being null at this point in code. " 1879 }
1361 "do_balance cannot properly handle schedule occurring while it runs."); 1880 }
1362 }
1363
1364 /* double check that buffers that we will modify are unlocked. (fix_nodes should already have
1365 prepped all of these for us). */
1366 if ( tb->lnum[0] ) {
1367 retval |= locked_or_not_in_tree (tb->L[0], "L[0]");
1368 retval |= locked_or_not_in_tree (tb->FL[0], "FL[0]");
1369 retval |= locked_or_not_in_tree (tb->CFL[0], "CFL[0]");
1370 check_leaf (tb->L[0]);
1371 }
1372 if ( tb->rnum[0] ) {
1373 retval |= locked_or_not_in_tree (tb->R[0], "R[0]");
1374 retval |= locked_or_not_in_tree (tb->FR[0], "FR[0]");
1375 retval |= locked_or_not_in_tree (tb->CFR[0], "CFR[0]");
1376 check_leaf (tb->R[0]);
1377 }
1378 retval |= locked_or_not_in_tree (PATH_PLAST_BUFFER (tb->tb_path), "S[0]");
1379 check_leaf (PATH_PLAST_BUFFER (tb->tb_path));
1380
1381 return retval;
1382} 1881}
1383 1882
1883static int locked_or_not_in_tree(struct buffer_head *bh, char *which)
1884{
1885 if ((!buffer_journal_prepared(bh) && buffer_locked(bh)) ||
1886 !B_IS_IN_TREE(bh)) {
1887 reiserfs_warning(NULL,
1888 "vs-12339: locked_or_not_in_tree: %s (%b)",
1889 which, bh);
1890 return 1;
1891 }
1892 return 0;
1893}
1384 1894
1385static void check_after_balance_leaf (struct tree_balance * tb) 1895static int check_before_balancing(struct tree_balance *tb)
1386{ 1896{
1387 if (tb->lnum[0]) { 1897 int retval = 0;
1388 if (B_FREE_SPACE (tb->L[0]) != 1898
1389 MAX_CHILD_SIZE (tb->L[0]) - dc_size(B_N_CHILD (tb->FL[0], get_left_neighbor_position (tb, 0)))) { 1899 if (cur_tb) {
1390 print_cur_tb ("12221"); 1900 reiserfs_panic(tb->tb_sb, "vs-12335: check_before_balancing: "
1391 reiserfs_panic (tb->tb_sb, "PAP-12355: check_after_balance_leaf: shift to left was incorrect"); 1901 "suspect that schedule occurred based on cur_tb not being null at this point in code. "
1902 "do_balance cannot properly handle schedule occurring while it runs.");
1392 } 1903 }
1393 } 1904
1394 if (tb->rnum[0]) { 1905 /* double check that buffers that we will modify are unlocked. (fix_nodes should already have
1395 if (B_FREE_SPACE (tb->R[0]) != 1906 prepped all of these for us). */
1396 MAX_CHILD_SIZE (tb->R[0]) - dc_size(B_N_CHILD (tb->FR[0], get_right_neighbor_position (tb, 0)))) { 1907 if (tb->lnum[0]) {
1397 print_cur_tb ("12222"); 1908 retval |= locked_or_not_in_tree(tb->L[0], "L[0]");
1398 reiserfs_panic (tb->tb_sb, "PAP-12360: check_after_balance_leaf: shift to right was incorrect"); 1909 retval |= locked_or_not_in_tree(tb->FL[0], "FL[0]");
1910 retval |= locked_or_not_in_tree(tb->CFL[0], "CFL[0]");
1911 check_leaf(tb->L[0]);
1399 } 1912 }
1400 } 1913 if (tb->rnum[0]) {
1401 if (PATH_H_PBUFFER(tb->tb_path,1) && 1914 retval |= locked_or_not_in_tree(tb->R[0], "R[0]");
1402 (B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) != 1915 retval |= locked_or_not_in_tree(tb->FR[0], "FR[0]");
1403 (MAX_CHILD_SIZE (PATH_H_PBUFFER(tb->tb_path,0)) - 1916 retval |= locked_or_not_in_tree(tb->CFR[0], "CFR[0]");
1404 dc_size(B_N_CHILD (PATH_H_PBUFFER(tb->tb_path,1), 1917 check_leaf(tb->R[0]);
1405 PATH_H_POSITION (tb->tb_path, 1)))) )) { 1918 }
1406 int left = B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)); 1919 retval |= locked_or_not_in_tree(PATH_PLAST_BUFFER(tb->tb_path), "S[0]");
1407 int right = (MAX_CHILD_SIZE (PATH_H_PBUFFER(tb->tb_path,0)) - 1920 check_leaf(PATH_PLAST_BUFFER(tb->tb_path));
1408 dc_size(B_N_CHILD (PATH_H_PBUFFER(tb->tb_path,1),
1409 PATH_H_POSITION (tb->tb_path, 1))));
1410 print_cur_tb ("12223");
1411 reiserfs_warning (tb->tb_sb,
1412 "B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) = %d; "
1413 "MAX_CHILD_SIZE (%d) - dc_size( %y, %d ) [%d] = %d",
1414 left,
1415 MAX_CHILD_SIZE (PATH_H_PBUFFER(tb->tb_path,0)),
1416 PATH_H_PBUFFER(tb->tb_path,1),
1417 PATH_H_POSITION (tb->tb_path, 1),
1418 dc_size(B_N_CHILD (PATH_H_PBUFFER(tb->tb_path,1), PATH_H_POSITION (tb->tb_path, 1 )) ),
1419 right );
1420 reiserfs_panic (tb->tb_sb, "PAP-12365: check_after_balance_leaf: S is incorrect");
1421 }
1422}
1423 1921
1922 return retval;
1923}
1424 1924
1425static void check_leaf_level (struct tree_balance * tb) 1925static void check_after_balance_leaf(struct tree_balance *tb)
1426{ 1926{
1427 check_leaf (tb->L[0]); 1927 if (tb->lnum[0]) {
1428 check_leaf (tb->R[0]); 1928 if (B_FREE_SPACE(tb->L[0]) !=
1429 check_leaf (PATH_PLAST_BUFFER (tb->tb_path)); 1929 MAX_CHILD_SIZE(tb->L[0]) -
1930 dc_size(B_N_CHILD
1931 (tb->FL[0], get_left_neighbor_position(tb, 0)))) {
1932 print_cur_tb("12221");
1933 reiserfs_panic(tb->tb_sb,
1934 "PAP-12355: check_after_balance_leaf: shift to left was incorrect");
1935 }
1936 }
1937 if (tb->rnum[0]) {
1938 if (B_FREE_SPACE(tb->R[0]) !=
1939 MAX_CHILD_SIZE(tb->R[0]) -
1940 dc_size(B_N_CHILD
1941 (tb->FR[0], get_right_neighbor_position(tb, 0)))) {
1942 print_cur_tb("12222");
1943 reiserfs_panic(tb->tb_sb,
1944 "PAP-12360: check_after_balance_leaf: shift to right was incorrect");
1945 }
1946 }
1947 if (PATH_H_PBUFFER(tb->tb_path, 1) &&
1948 (B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0)) !=
1949 (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) -
1950 dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1),
1951 PATH_H_POSITION(tb->tb_path, 1)))))) {
1952 int left = B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0));
1953 int right = (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) -
1954 dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1),
1955 PATH_H_POSITION(tb->tb_path,
1956 1))));
1957 print_cur_tb("12223");
1958 reiserfs_warning(tb->tb_sb,
1959 "B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) = %d; "
1960 "MAX_CHILD_SIZE (%d) - dc_size( %y, %d ) [%d] = %d",
1961 left,
1962 MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)),
1963 PATH_H_PBUFFER(tb->tb_path, 1),
1964 PATH_H_POSITION(tb->tb_path, 1),
1965 dc_size(B_N_CHILD
1966 (PATH_H_PBUFFER(tb->tb_path, 1),
1967 PATH_H_POSITION(tb->tb_path, 1))),
1968 right);
1969 reiserfs_panic(tb->tb_sb,
1970 "PAP-12365: check_after_balance_leaf: S is incorrect");
1971 }
1430} 1972}
1431 1973
1432static void check_internal_levels (struct tree_balance * tb) 1974static void check_leaf_level(struct tree_balance *tb)
1433{ 1975{
1434 int h; 1976 check_leaf(tb->L[0]);
1977 check_leaf(tb->R[0]);
1978 check_leaf(PATH_PLAST_BUFFER(tb->tb_path));
1979}
1435 1980
1436 /* check all internal nodes */ 1981static void check_internal_levels(struct tree_balance *tb)
1437 for (h = 1; tb->insert_size[h]; h ++) { 1982{
1438 check_internal_node (tb->tb_sb, PATH_H_PBUFFER (tb->tb_path, h), "BAD BUFFER ON PATH"); 1983 int h;
1439 if (tb->lnum[h]) 1984
1440 check_internal_node (tb->tb_sb, tb->L[h], "BAD L"); 1985 /* check all internal nodes */
1441 if (tb->rnum[h]) 1986 for (h = 1; tb->insert_size[h]; h++) {
1442 check_internal_node (tb->tb_sb, tb->R[h], "BAD R"); 1987 check_internal_node(tb->tb_sb, PATH_H_PBUFFER(tb->tb_path, h),
1443 } 1988 "BAD BUFFER ON PATH");
1989 if (tb->lnum[h])
1990 check_internal_node(tb->tb_sb, tb->L[h], "BAD L");
1991 if (tb->rnum[h])
1992 check_internal_node(tb->tb_sb, tb->R[h], "BAD R");
1993 }
1444 1994
1445} 1995}
1446 1996
1447#endif 1997#endif
1448 1998
1449
1450
1451
1452
1453
1454/* Now we have all of the buffers that must be used in balancing of 1999/* Now we have all of the buffers that must be used in balancing of
1455 the tree. We rely on the assumption that schedule() will not occur 2000 the tree. We rely on the assumption that schedule() will not occur
1456 while do_balance works. ( Only interrupt handlers are acceptable.) 2001 while do_balance works. ( Only interrupt handlers are acceptable.)
@@ -1484,114 +2029,109 @@ static void check_internal_levels (struct tree_balance * tb)
1484 2029
1485*/ 2030*/
1486 2031
1487static inline void do_balance_starts (struct tree_balance *tb) 2032static inline void do_balance_starts(struct tree_balance *tb)
1488{ 2033{
1489 /* use print_cur_tb() to see initial state of struct 2034 /* use print_cur_tb() to see initial state of struct
1490 tree_balance */ 2035 tree_balance */
1491 2036
1492 /* store_print_tb (tb); */ 2037 /* store_print_tb (tb); */
1493 2038
1494 /* do not delete, just comment it out */ 2039 /* do not delete, just comment it out */
1495/* print_tb(flag, PATH_LAST_POSITION(tb->tb_path), tb->tb_path->pos_in_item, tb, 2040/* print_tb(flag, PATH_LAST_POSITION(tb->tb_path), tb->tb_path->pos_in_item, tb,
1496 "check");*/ 2041 "check");*/
1497 RFALSE( check_before_balancing (tb), "PAP-12340: locked buffers in TB"); 2042 RFALSE(check_before_balancing(tb), "PAP-12340: locked buffers in TB");
1498#ifdef CONFIG_REISERFS_CHECK 2043#ifdef CONFIG_REISERFS_CHECK
1499 cur_tb = tb; 2044 cur_tb = tb;
1500#endif 2045#endif
1501} 2046}
1502 2047
1503 2048static inline void do_balance_completed(struct tree_balance *tb)
1504static inline void do_balance_completed (struct tree_balance * tb)
1505{ 2049{
1506 2050
1507#ifdef CONFIG_REISERFS_CHECK 2051#ifdef CONFIG_REISERFS_CHECK
1508 check_leaf_level (tb); 2052 check_leaf_level(tb);
1509 check_internal_levels (tb); 2053 check_internal_levels(tb);
1510 cur_tb = NULL; 2054 cur_tb = NULL;
1511#endif 2055#endif
1512 2056
1513 /* reiserfs_free_block is no longer schedule safe. So, we need to 2057 /* reiserfs_free_block is no longer schedule safe. So, we need to
1514 ** put the buffers we want freed on the thrown list during do_balance, 2058 ** put the buffers we want freed on the thrown list during do_balance,
1515 ** and then free them now 2059 ** and then free them now
1516 */ 2060 */
1517
1518 REISERFS_SB(tb->tb_sb)->s_do_balance ++;
1519 2061
2062 REISERFS_SB(tb->tb_sb)->s_do_balance++;
1520 2063
1521 /* release all nodes hold to perform the balancing */ 2064 /* release all nodes hold to perform the balancing */
1522 unfix_nodes(tb); 2065 unfix_nodes(tb);
1523 2066
1524 free_thrown(tb) ; 2067 free_thrown(tb);
1525} 2068}
1526 2069
2070void do_balance(struct tree_balance *tb, /* tree_balance structure */
2071 struct item_head *ih, /* item header of inserted item */
2072 const char *body, /* body of inserted item or bytes to paste */
2073 int flag)
2074{ /* i - insert, d - delete
2075 c - cut, p - paste
2076
2077 Cut means delete part of an item
2078 (includes removing an entry from a
2079 directory).
2080
2081 Delete means delete whole item.
2082
2083 Insert means add a new item into the
2084 tree.
2085
2086 Paste means to append to the end of an
2087 existing file or to insert a directory
2088 entry. */
2089 int child_pos, /* position of a child node in its parent */
2090 h; /* level of the tree being processed */
2091 struct item_head insert_key[2]; /* in our processing of one level
2092 we sometimes determine what
2093 must be inserted into the next
2094 higher level. This insertion
2095 consists of a key or two keys
2096 and their corresponding
2097 pointers */
2098 struct buffer_head *insert_ptr[2]; /* inserted node-ptrs for the next
2099 level */
2100
2101 tb->tb_mode = flag;
2102 tb->need_balance_dirty = 0;
2103
2104 if (FILESYSTEM_CHANGED_TB(tb)) {
2105 reiserfs_panic(tb->tb_sb,
2106 "clm-6000: do_balance, fs generation has changed\n");
2107 }
2108 /* if we have no real work to do */
2109 if (!tb->insert_size[0]) {
2110 reiserfs_warning(tb->tb_sb,
2111 "PAP-12350: do_balance: insert_size == 0, mode == %c",
2112 flag);
2113 unfix_nodes(tb);
2114 return;
2115 }
1527 2116
2117 atomic_inc(&(fs_generation(tb->tb_sb)));
2118 do_balance_starts(tb);
1528 2119
1529
1530
1531void do_balance (struct tree_balance * tb, /* tree_balance structure */
1532 struct item_head * ih, /* item header of inserted item */
1533 const char * body, /* body of inserted item or bytes to paste */
1534 int flag) /* i - insert, d - delete
1535 c - cut, p - paste
1536
1537 Cut means delete part of an item
1538 (includes removing an entry from a
1539 directory).
1540
1541 Delete means delete whole item.
1542
1543 Insert means add a new item into the
1544 tree.
1545
1546 Paste means to append to the end of an
1547 existing file or to insert a directory
1548 entry. */
1549{
1550 int child_pos, /* position of a child node in its parent */
1551 h; /* level of the tree being processed */
1552 struct item_head insert_key[2]; /* in our processing of one level
1553 we sometimes determine what
1554 must be inserted into the next
1555 higher level. This insertion
1556 consists of a key or two keys
1557 and their corresponding
1558 pointers */
1559 struct buffer_head *insert_ptr[2]; /* inserted node-ptrs for the next
1560 level */
1561
1562 tb->tb_mode = flag;
1563 tb->need_balance_dirty = 0;
1564
1565 if (FILESYSTEM_CHANGED_TB(tb)) {
1566 reiserfs_panic(tb->tb_sb, "clm-6000: do_balance, fs generation has changed\n") ;
1567 }
1568 /* if we have no real work to do */
1569 if ( ! tb->insert_size[0] ) {
1570 reiserfs_warning (tb->tb_sb,
1571 "PAP-12350: do_balance: insert_size == 0, mode == %c",
1572 flag);
1573 unfix_nodes(tb);
1574 return;
1575 }
1576
1577 atomic_inc (&(fs_generation (tb->tb_sb)));
1578 do_balance_starts (tb);
1579
1580 /* balance leaf returns 0 except if combining L R and S into 2120 /* balance leaf returns 0 except if combining L R and S into
1581 one node. see balance_internal() for explanation of this 2121 one node. see balance_internal() for explanation of this
1582 line of code.*/ 2122 line of code. */
1583 child_pos = PATH_H_B_ITEM_ORDER (tb->tb_path, 0) + 2123 child_pos = PATH_H_B_ITEM_ORDER(tb->tb_path, 0) +
1584 balance_leaf (tb, ih, body, flag, insert_key, insert_ptr); 2124 balance_leaf(tb, ih, body, flag, insert_key, insert_ptr);
1585 2125
1586#ifdef CONFIG_REISERFS_CHECK 2126#ifdef CONFIG_REISERFS_CHECK
1587 check_after_balance_leaf (tb); 2127 check_after_balance_leaf(tb);
1588#endif 2128#endif
1589 2129
1590 /* Balance internal level of the tree. */ 2130 /* Balance internal level of the tree. */
1591 for ( h = 1; h < MAX_HEIGHT && tb->insert_size[h]; h++ ) 2131 for (h = 1; h < MAX_HEIGHT && tb->insert_size[h]; h++)
1592 child_pos = balance_internal (tb, h, child_pos, insert_key, insert_ptr); 2132 child_pos =
1593 2133 balance_internal(tb, h, child_pos, insert_key, insert_ptr);
1594 2134
1595 do_balance_completed (tb); 2135 do_balance_completed(tb);
1596 2136
1597} 2137}
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 2230afff1870..c9f178fb494f 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -2,7 +2,6 @@
2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README 2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3 */ 3 */
4 4
5
6#include <linux/time.h> 5#include <linux/time.h>
7#include <linux/reiserfs_fs.h> 6#include <linux/reiserfs_fs.h>
8#include <linux/reiserfs_acl.h> 7#include <linux/reiserfs_acl.h>
@@ -31,82 +30,84 @@
31** We use reiserfs_truncate_file to pack the tail, since it already has 30** We use reiserfs_truncate_file to pack the tail, since it already has
32** all the conditions coded. 31** all the conditions coded.
33*/ 32*/
34static int reiserfs_file_release (struct inode * inode, struct file * filp) 33static int reiserfs_file_release(struct inode *inode, struct file *filp)
35{ 34{
36 35
37 struct reiserfs_transaction_handle th ; 36 struct reiserfs_transaction_handle th;
38 int err; 37 int err;
39 int jbegin_failure = 0; 38 int jbegin_failure = 0;
40 39
41 if (!S_ISREG (inode->i_mode)) 40 if (!S_ISREG(inode->i_mode))
42 BUG (); 41 BUG();
43 42
44 /* fast out for when nothing needs to be done */ 43 /* fast out for when nothing needs to be done */
45 if ((atomic_read(&inode->i_count) > 1 || 44 if ((atomic_read(&inode->i_count) > 1 ||
46 !(REISERFS_I(inode)->i_flags & i_pack_on_close_mask) || 45 !(REISERFS_I(inode)->i_flags & i_pack_on_close_mask) ||
47 !tail_has_to_be_packed(inode)) && 46 !tail_has_to_be_packed(inode)) &&
48 REISERFS_I(inode)->i_prealloc_count <= 0) { 47 REISERFS_I(inode)->i_prealloc_count <= 0) {
49 return 0; 48 return 0;
50 } 49 }
51
52 reiserfs_write_lock(inode->i_sb);
53 down (&inode->i_sem);
54 /* freeing preallocation only involves relogging blocks that
55 * are already in the current transaction. preallocation gets
56 * freed at the end of each transaction, so it is impossible for
57 * us to log any additional blocks (including quota blocks)
58 */
59 err = journal_begin(&th, inode->i_sb, 1);
60 if (err) {
61 /* uh oh, we can't allow the inode to go away while there
62 * is still preallocation blocks pending. Try to join the
63 * aborted transaction
64 */
65 jbegin_failure = err;
66 err = journal_join_abort(&th, inode->i_sb, 1);
67 50
51 reiserfs_write_lock(inode->i_sb);
52 down(&inode->i_sem);
53 /* freeing preallocation only involves relogging blocks that
54 * are already in the current transaction. preallocation gets
55 * freed at the end of each transaction, so it is impossible for
56 * us to log any additional blocks (including quota blocks)
57 */
58 err = journal_begin(&th, inode->i_sb, 1);
68 if (err) { 59 if (err) {
69 /* hmpf, our choices here aren't good. We can pin the inode 60 /* uh oh, we can't allow the inode to go away while there
70 * which will disallow unmount from every happening, we can 61 * is still preallocation blocks pending. Try to join the
71 * do nothing, which will corrupt random memory on unmount, 62 * aborted transaction
72 * or we can forcibly remove the file from the preallocation 63 */
73 * list, which will leak blocks on disk. Lets pin the inode 64 jbegin_failure = err;
74 * and let the admin know what is going on. 65 err = journal_join_abort(&th, inode->i_sb, 1);
75 */ 66
76 igrab(inode); 67 if (err) {
77 reiserfs_warning(inode->i_sb, "pinning inode %lu because the " 68 /* hmpf, our choices here aren't good. We can pin the inode
78 "preallocation can't be freed"); 69 * which will disallow unmount from every happening, we can
79 goto out; 70 * do nothing, which will corrupt random memory on unmount,
71 * or we can forcibly remove the file from the preallocation
72 * list, which will leak blocks on disk. Lets pin the inode
73 * and let the admin know what is going on.
74 */
75 igrab(inode);
76 reiserfs_warning(inode->i_sb,
77 "pinning inode %lu because the "
78 "preallocation can't be freed");
79 goto out;
80 }
80 } 81 }
81 } 82 reiserfs_update_inode_transaction(inode);
82 reiserfs_update_inode_transaction(inode) ;
83 83
84#ifdef REISERFS_PREALLOCATE 84#ifdef REISERFS_PREALLOCATE
85 reiserfs_discard_prealloc (&th, inode); 85 reiserfs_discard_prealloc(&th, inode);
86#endif 86#endif
87 err = journal_end(&th, inode->i_sb, 1); 87 err = journal_end(&th, inode->i_sb, 1);
88 88
89 /* copy back the error code from journal_begin */ 89 /* copy back the error code from journal_begin */
90 if (!err) 90 if (!err)
91 err = jbegin_failure; 91 err = jbegin_failure;
92 92
93 if (!err && atomic_read(&inode->i_count) <= 1 && 93 if (!err && atomic_read(&inode->i_count) <= 1 &&
94 (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) && 94 (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) &&
95 tail_has_to_be_packed (inode)) { 95 tail_has_to_be_packed(inode)) {
96 /* if regular file is released by last holder and it has been 96 /* if regular file is released by last holder and it has been
97 appended (we append by unformatted node only) or its direct 97 appended (we append by unformatted node only) or its direct
98 item(s) had to be converted, then it may have to be 98 item(s) had to be converted, then it may have to be
99 indirect2direct converted */ 99 indirect2direct converted */
100 err = reiserfs_truncate_file(inode, 0) ; 100 err = reiserfs_truncate_file(inode, 0);
101 } 101 }
102out: 102 out:
103 up (&inode->i_sem); 103 up(&inode->i_sem);
104 reiserfs_write_unlock(inode->i_sb); 104 reiserfs_write_unlock(inode->i_sb);
105 return err; 105 return err;
106} 106}
107 107
108static void reiserfs_vfs_truncate_file(struct inode *inode) { 108static void reiserfs_vfs_truncate_file(struct inode *inode)
109 reiserfs_truncate_file(inode, 1) ; 109{
110 reiserfs_truncate_file(inode, 1);
110} 111}
111 112
112/* Sync a reiserfs file. */ 113/* Sync a reiserfs file. */
@@ -116,26 +117,24 @@ static void reiserfs_vfs_truncate_file(struct inode *inode) {
116 * be removed... 117 * be removed...
117 */ 118 */
118 119
119static int reiserfs_sync_file( 120static int reiserfs_sync_file(struct file *p_s_filp,
120 struct file * p_s_filp, 121 struct dentry *p_s_dentry, int datasync)
121 struct dentry * p_s_dentry, 122{
122 int datasync 123 struct inode *p_s_inode = p_s_dentry->d_inode;
123 ) { 124 int n_err;
124 struct inode * p_s_inode = p_s_dentry->d_inode; 125 int barrier_done;
125 int n_err; 126
126 int barrier_done; 127 if (!S_ISREG(p_s_inode->i_mode))
127 128 BUG();
128 if (!S_ISREG(p_s_inode->i_mode)) 129 n_err = sync_mapping_buffers(p_s_inode->i_mapping);
129 BUG (); 130 reiserfs_write_lock(p_s_inode->i_sb);
130 n_err = sync_mapping_buffers(p_s_inode->i_mapping) ; 131 barrier_done = reiserfs_commit_for_inode(p_s_inode);
131 reiserfs_write_lock(p_s_inode->i_sb); 132 reiserfs_write_unlock(p_s_inode->i_sb);
132 barrier_done = reiserfs_commit_for_inode(p_s_inode); 133 if (barrier_done != 1)
133 reiserfs_write_unlock(p_s_inode->i_sb); 134 blkdev_issue_flush(p_s_inode->i_sb->s_bdev, NULL);
134 if (barrier_done != 1) 135 if (barrier_done < 0)
135 blkdev_issue_flush(p_s_inode->i_sb->s_bdev, NULL); 136 return barrier_done;
136 if (barrier_done < 0) 137 return (n_err < 0) ? -EIO : 0;
137 return barrier_done;
138 return ( n_err < 0 ) ? -EIO : 0;
139} 138}
140 139
141/* I really do not want to play with memory shortage right now, so 140/* I really do not want to play with memory shortage right now, so
@@ -147,700 +146,797 @@ static int reiserfs_sync_file(
147/* Allocates blocks for a file to fulfil write request. 146/* Allocates blocks for a file to fulfil write request.
148 Maps all unmapped but prepared pages from the list. 147 Maps all unmapped but prepared pages from the list.
149 Updates metadata with newly allocated blocknumbers as needed */ 148 Updates metadata with newly allocated blocknumbers as needed */
150static int reiserfs_allocate_blocks_for_region( 149static int reiserfs_allocate_blocks_for_region(struct reiserfs_transaction_handle *th, struct inode *inode, /* Inode we work with */
151 struct reiserfs_transaction_handle *th, 150 loff_t pos, /* Writing position */
152 struct inode *inode, /* Inode we work with */ 151 int num_pages, /* number of pages write going
153 loff_t pos, /* Writing position */ 152 to touch */
154 int num_pages, /* number of pages write going 153 int write_bytes, /* amount of bytes to write */
155 to touch */ 154 struct page **prepared_pages, /* array of
156 int write_bytes, /* amount of bytes to write */ 155 prepared pages
157 struct page **prepared_pages, /* array of 156 */
158 prepared pages 157 int blocks_to_allocate /* Amount of blocks we
159 */ 158 need to allocate to
160 int blocks_to_allocate /* Amount of blocks we 159 fit the data into file
161 need to allocate to 160 */
162 fit the data into file 161 )
163 */
164 )
165{ 162{
166 struct cpu_key key; // cpu key of item that we are going to deal with 163 struct cpu_key key; // cpu key of item that we are going to deal with
167 struct item_head *ih; // pointer to item head that we are going to deal with 164 struct item_head *ih; // pointer to item head that we are going to deal with
168 struct buffer_head *bh; // Buffer head that contains items that we are going to deal with 165 struct buffer_head *bh; // Buffer head that contains items that we are going to deal with
169 __le32 * item; // pointer to item we are going to deal with 166 __le32 *item; // pointer to item we are going to deal with
170 INITIALIZE_PATH(path); // path to item, that we are going to deal with. 167 INITIALIZE_PATH(path); // path to item, that we are going to deal with.
171 b_blocknr_t *allocated_blocks; // Pointer to a place where allocated blocknumbers would be stored. 168 b_blocknr_t *allocated_blocks; // Pointer to a place where allocated blocknumbers would be stored.
172 reiserfs_blocknr_hint_t hint; // hint structure for block allocator. 169 reiserfs_blocknr_hint_t hint; // hint structure for block allocator.
173 size_t res; // return value of various functions that we call. 170 size_t res; // return value of various functions that we call.
174 int curr_block; // current block used to keep track of unmapped blocks. 171 int curr_block; // current block used to keep track of unmapped blocks.
175 int i; // loop counter 172 int i; // loop counter
176 int itempos; // position in item 173 int itempos; // position in item
177 unsigned int from = (pos & (PAGE_CACHE_SIZE - 1)); // writing position in 174 unsigned int from = (pos & (PAGE_CACHE_SIZE - 1)); // writing position in
178 // first page 175 // first page
179 unsigned int to = ((pos + write_bytes - 1) & (PAGE_CACHE_SIZE - 1)) + 1; /* last modified byte offset in last page */ 176 unsigned int to = ((pos + write_bytes - 1) & (PAGE_CACHE_SIZE - 1)) + 1; /* last modified byte offset in last page */
180 __u64 hole_size ; // amount of blocks for a file hole, if it needed to be created. 177 __u64 hole_size; // amount of blocks for a file hole, if it needed to be created.
181 int modifying_this_item = 0; // Flag for items traversal code to keep track 178 int modifying_this_item = 0; // Flag for items traversal code to keep track
182 // of the fact that we already prepared 179 // of the fact that we already prepared
183 // current block for journal 180 // current block for journal
184 int will_prealloc = 0; 181 int will_prealloc = 0;
185 RFALSE(!blocks_to_allocate, "green-9004: tried to allocate zero blocks?"); 182 RFALSE(!blocks_to_allocate,
186 183 "green-9004: tried to allocate zero blocks?");
187 /* only preallocate if this is a small write */ 184
188 if (REISERFS_I(inode)->i_prealloc_count || 185 /* only preallocate if this is a small write */
189 (!(write_bytes & (inode->i_sb->s_blocksize -1)) && 186 if (REISERFS_I(inode)->i_prealloc_count ||
190 blocks_to_allocate < 187 (!(write_bytes & (inode->i_sb->s_blocksize - 1)) &&
191 REISERFS_SB(inode->i_sb)->s_alloc_options.preallocsize)) 188 blocks_to_allocate <
192 will_prealloc = REISERFS_SB(inode->i_sb)->s_alloc_options.preallocsize; 189 REISERFS_SB(inode->i_sb)->s_alloc_options.preallocsize))
193 190 will_prealloc =
194 allocated_blocks = kmalloc((blocks_to_allocate + will_prealloc) * 191 REISERFS_SB(inode->i_sb)->s_alloc_options.preallocsize;
195 sizeof(b_blocknr_t), GFP_NOFS); 192
196 193 allocated_blocks = kmalloc((blocks_to_allocate + will_prealloc) *
197 /* First we compose a key to point at the writing position, we want to do 194 sizeof(b_blocknr_t), GFP_NOFS);
198 that outside of any locking region. */ 195
199 make_cpu_key (&key, inode, pos+1, TYPE_ANY, 3/*key length*/); 196 /* First we compose a key to point at the writing position, we want to do
200 197 that outside of any locking region. */
201 /* If we came here, it means we absolutely need to open a transaction, 198 make_cpu_key(&key, inode, pos + 1, TYPE_ANY, 3 /*key length */ );
202 since we need to allocate some blocks */ 199
203 reiserfs_write_lock(inode->i_sb); // Journaling stuff and we need that. 200 /* If we came here, it means we absolutely need to open a transaction,
204 res = journal_begin(th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS); // Wish I know if this number enough 201 since we need to allocate some blocks */
205 if (res) 202 reiserfs_write_lock(inode->i_sb); // Journaling stuff and we need that.
206 goto error_exit; 203 res = journal_begin(th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb)); // Wish I know if this number enough
207 reiserfs_update_inode_transaction(inode) ; 204 if (res)
208
209 /* Look for the in-tree position of our write, need path for block allocator */
210 res = search_for_position_by_key(inode->i_sb, &key, &path);
211 if ( res == IO_ERROR ) {
212 res = -EIO;
213 goto error_exit;
214 }
215
216 /* Allocate blocks */
217 /* First fill in "hint" structure for block allocator */
218 hint.th = th; // transaction handle.
219 hint.path = &path; // Path, so that block allocator can determine packing locality or whatever it needs to determine.
220 hint.inode = inode; // Inode is needed by block allocator too.
221 hint.search_start = 0; // We have no hint on where to search free blocks for block allocator.
222 hint.key = key.on_disk_key; // on disk key of file.
223 hint.block = inode->i_blocks>>(inode->i_sb->s_blocksize_bits-9); // Number of disk blocks this file occupies already.
224 hint.formatted_node = 0; // We are allocating blocks for unformatted node.
225 hint.preallocate = will_prealloc;
226
227 /* Call block allocator to allocate blocks */
228 res = reiserfs_allocate_blocknrs(&hint, allocated_blocks, blocks_to_allocate, blocks_to_allocate);
229 if ( res != CARRY_ON ) {
230 if ( res == NO_DISK_SPACE ) {
231 /* We flush the transaction in case of no space. This way some
232 blocks might become free */
233 SB_JOURNAL(inode->i_sb)->j_must_wait = 1;
234 res = restart_transaction(th, inode, &path);
235 if (res)
236 goto error_exit;
237
238 /* We might have scheduled, so search again */
239 res = search_for_position_by_key(inode->i_sb, &key, &path);
240 if ( res == IO_ERROR ) {
241 res = -EIO;
242 goto error_exit; 205 goto error_exit;
243 } 206 reiserfs_update_inode_transaction(inode);
244 207
245 /* update changed info for hint structure. */ 208 /* Look for the in-tree position of our write, need path for block allocator */
246 res = reiserfs_allocate_blocknrs(&hint, allocated_blocks, blocks_to_allocate, blocks_to_allocate); 209 res = search_for_position_by_key(inode->i_sb, &key, &path);
247 if ( res != CARRY_ON ) { 210 if (res == IO_ERROR) {
248 res = -ENOSPC; 211 res = -EIO;
249 pathrelse(&path);
250 goto error_exit; 212 goto error_exit;
251 }
252 } else {
253 res = -ENOSPC;
254 pathrelse(&path);
255 goto error_exit;
256 } 213 }
257 }
258 214
259#ifdef __BIG_ENDIAN 215 /* Allocate blocks */
260 // Too bad, I have not found any way to convert a given region from 216 /* First fill in "hint" structure for block allocator */
261 // cpu format to little endian format 217 hint.th = th; // transaction handle.
262 { 218 hint.path = &path; // Path, so that block allocator can determine packing locality or whatever it needs to determine.
263 int i; 219 hint.inode = inode; // Inode is needed by block allocator too.
264 for ( i = 0; i < blocks_to_allocate ; i++) 220 hint.search_start = 0; // We have no hint on where to search free blocks for block allocator.
265 allocated_blocks[i]=cpu_to_le32(allocated_blocks[i]); 221 hint.key = key.on_disk_key; // on disk key of file.
266 } 222 hint.block = inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9); // Number of disk blocks this file occupies already.
267#endif 223 hint.formatted_node = 0; // We are allocating blocks for unformatted node.
268 224 hint.preallocate = will_prealloc;
269 /* Blocks allocating well might have scheduled and tree might have changed, 225
270 let's search the tree again */ 226 /* Call block allocator to allocate blocks */
271 /* find where in the tree our write should go */ 227 res =
272 res = search_for_position_by_key(inode->i_sb, &key, &path); 228 reiserfs_allocate_blocknrs(&hint, allocated_blocks,
273 if ( res == IO_ERROR ) { 229 blocks_to_allocate, blocks_to_allocate);
274 res = -EIO; 230 if (res != CARRY_ON) {
275 goto error_exit_free_blocks; 231 if (res == NO_DISK_SPACE) {
276 } 232 /* We flush the transaction in case of no space. This way some
277 233 blocks might become free */
278 bh = get_last_bh( &path ); // Get a bufferhead for last element in path. 234 SB_JOURNAL(inode->i_sb)->j_must_wait = 1;
279 ih = get_ih( &path ); // Get a pointer to last item head in path. 235 res = restart_transaction(th, inode, &path);
280 item = get_item( &path ); // Get a pointer to last item in path 236 if (res)
281 237 goto error_exit;
282 /* Let's see what we have found */ 238
283 if ( res != POSITION_FOUND ) { /* position not found, this means that we 239 /* We might have scheduled, so search again */
284 might need to append file with holes 240 res =
285 first */ 241 search_for_position_by_key(inode->i_sb, &key,
286 // Since we are writing past the file's end, we need to find out if 242 &path);
287 // there is a hole that needs to be inserted before our writing 243 if (res == IO_ERROR) {
288 // position, and how many blocks it is going to cover (we need to 244 res = -EIO;
289 // populate pointers to file blocks representing the hole with zeros) 245 goto error_exit;
246 }
290 247
248 /* update changed info for hint structure. */
249 res =
250 reiserfs_allocate_blocknrs(&hint, allocated_blocks,
251 blocks_to_allocate,
252 blocks_to_allocate);
253 if (res != CARRY_ON) {
254 res = -ENOSPC;
255 pathrelse(&path);
256 goto error_exit;
257 }
258 } else {
259 res = -ENOSPC;
260 pathrelse(&path);
261 goto error_exit;
262 }
263 }
264#ifdef __BIG_ENDIAN
265 // Too bad, I have not found any way to convert a given region from
266 // cpu format to little endian format
291 { 267 {
292 int item_offset = 1; 268 int i;
293 /* 269 for (i = 0; i < blocks_to_allocate; i++)
294 * if ih is stat data, its offset is 0 and we don't want to 270 allocated_blocks[i] = cpu_to_le32(allocated_blocks[i]);
295 * add 1 to pos in the hole_size calculation
296 */
297 if (is_statdata_le_ih(ih))
298 item_offset = 0;
299 hole_size = (pos + item_offset -
300 (le_key_k_offset( get_inode_item_key_version(inode),
301 &(ih->ih_key)) +
302 op_bytes_number(ih, inode->i_sb->s_blocksize))) >>
303 inode->i_sb->s_blocksize_bits;
304 } 271 }
272#endif
305 273
306 if ( hole_size > 0 ) { 274 /* Blocks allocating well might have scheduled and tree might have changed,
307 int to_paste = min_t(__u64, hole_size, MAX_ITEM_LEN(inode->i_sb->s_blocksize)/UNFM_P_SIZE ); // How much data to insert first time. 275 let's search the tree again */
308 /* area filled with zeroes, to supply as list of zero blocknumbers 276 /* find where in the tree our write should go */
309 We allocate it outside of loop just in case loop would spin for 277 res = search_for_position_by_key(inode->i_sb, &key, &path);
310 several iterations. */ 278 if (res == IO_ERROR) {
311 char *zeros = kmalloc(to_paste*UNFM_P_SIZE, GFP_ATOMIC); // We cannot insert more than MAX_ITEM_LEN bytes anyway. 279 res = -EIO;
312 if ( !zeros ) {
313 res = -ENOMEM;
314 goto error_exit_free_blocks; 280 goto error_exit_free_blocks;
315 } 281 }
316 memset ( zeros, 0, to_paste*UNFM_P_SIZE); 282
317 do { 283 bh = get_last_bh(&path); // Get a bufferhead for last element in path.
318 to_paste = min_t(__u64, hole_size, MAX_ITEM_LEN(inode->i_sb->s_blocksize)/UNFM_P_SIZE ); 284 ih = get_ih(&path); // Get a pointer to last item head in path.
319 if ( is_indirect_le_ih(ih) ) { 285 item = get_item(&path); // Get a pointer to last item in path
320 /* Ok, there is existing indirect item already. Need to append it */ 286
321 /* Calculate position past inserted item */ 287 /* Let's see what we have found */
322 make_cpu_key( &key, inode, le_key_k_offset( get_inode_item_key_version(inode), &(ih->ih_key)) + op_bytes_number(ih, inode->i_sb->s_blocksize), TYPE_INDIRECT, 3); 288 if (res != POSITION_FOUND) { /* position not found, this means that we
323 res = reiserfs_paste_into_item( th, &path, &key, inode, (char *)zeros, UNFM_P_SIZE*to_paste); 289 might need to append file with holes
324 if ( res ) { 290 first */
325 kfree(zeros); 291 // Since we are writing past the file's end, we need to find out if
326 goto error_exit_free_blocks; 292 // there is a hole that needs to be inserted before our writing
327 } 293 // position, and how many blocks it is going to cover (we need to
328 } else if ( is_statdata_le_ih(ih) ) { 294 // populate pointers to file blocks representing the hole with zeros)
329 /* No existing item, create it */ 295
330 /* item head for new item */ 296 {
331 struct item_head ins_ih; 297 int item_offset = 1;
332 298 /*
333 /* create a key for our new item */ 299 * if ih is stat data, its offset is 0 and we don't want to
334 make_cpu_key( &key, inode, 1, TYPE_INDIRECT, 3); 300 * add 1 to pos in the hole_size calculation
335 301 */
336 /* Create new item head for our new item */ 302 if (is_statdata_le_ih(ih))
337 make_le_item_head (&ins_ih, &key, key.version, 1, 303 item_offset = 0;
338 TYPE_INDIRECT, to_paste*UNFM_P_SIZE, 304 hole_size = (pos + item_offset -
339 0 /* free space */); 305 (le_key_k_offset
340 306 (get_inode_item_key_version(inode),
341 /* Find where such item should live in the tree */ 307 &(ih->ih_key)) + op_bytes_number(ih,
342 res = search_item (inode->i_sb, &key, &path); 308 inode->
343 if ( res != ITEM_NOT_FOUND ) { 309 i_sb->
344 /* item should not exist, otherwise we have error */ 310 s_blocksize)))
345 if ( res != -ENOSPC ) { 311 >> inode->i_sb->s_blocksize_bits;
346 reiserfs_warning (inode->i_sb, 312 }
347 "green-9008: search_by_key (%K) returned %d", 313
348 &key, res); 314 if (hole_size > 0) {
315 int to_paste = min_t(__u64, hole_size, MAX_ITEM_LEN(inode->i_sb->s_blocksize) / UNFM_P_SIZE); // How much data to insert first time.
316 /* area filled with zeroes, to supply as list of zero blocknumbers
317 We allocate it outside of loop just in case loop would spin for
318 several iterations. */
319 char *zeros = kmalloc(to_paste * UNFM_P_SIZE, GFP_ATOMIC); // We cannot insert more than MAX_ITEM_LEN bytes anyway.
320 if (!zeros) {
321 res = -ENOMEM;
322 goto error_exit_free_blocks;
349 } 323 }
350 res = -EIO; 324 memset(zeros, 0, to_paste * UNFM_P_SIZE);
351 kfree(zeros); 325 do {
352 goto error_exit_free_blocks; 326 to_paste =
353 } 327 min_t(__u64, hole_size,
354 res = reiserfs_insert_item( th, &path, &key, &ins_ih, inode, (char *)zeros); 328 MAX_ITEM_LEN(inode->i_sb->
355 } else { 329 s_blocksize) /
356 reiserfs_panic(inode->i_sb, "green-9011: Unexpected key type %K\n", &key); 330 UNFM_P_SIZE);
331 if (is_indirect_le_ih(ih)) {
332 /* Ok, there is existing indirect item already. Need to append it */
333 /* Calculate position past inserted item */
334 make_cpu_key(&key, inode,
335 le_key_k_offset
336 (get_inode_item_key_version
337 (inode),
338 &(ih->ih_key)) +
339 op_bytes_number(ih,
340 inode->
341 i_sb->
342 s_blocksize),
343 TYPE_INDIRECT, 3);
344 res =
345 reiserfs_paste_into_item(th, &path,
346 &key,
347 inode,
348 (char *)
349 zeros,
350 UNFM_P_SIZE
351 *
352 to_paste);
353 if (res) {
354 kfree(zeros);
355 goto error_exit_free_blocks;
356 }
357 } else if (is_statdata_le_ih(ih)) {
358 /* No existing item, create it */
359 /* item head for new item */
360 struct item_head ins_ih;
361
362 /* create a key for our new item */
363 make_cpu_key(&key, inode, 1,
364 TYPE_INDIRECT, 3);
365
366 /* Create new item head for our new item */
367 make_le_item_head(&ins_ih, &key,
368 key.version, 1,
369 TYPE_INDIRECT,
370 to_paste *
371 UNFM_P_SIZE,
372 0 /* free space */ );
373
374 /* Find where such item should live in the tree */
375 res =
376 search_item(inode->i_sb, &key,
377 &path);
378 if (res != ITEM_NOT_FOUND) {
379 /* item should not exist, otherwise we have error */
380 if (res != -ENOSPC) {
381 reiserfs_warning(inode->
382 i_sb,
383 "green-9008: search_by_key (%K) returned %d",
384 &key,
385 res);
386 }
387 res = -EIO;
388 kfree(zeros);
389 goto error_exit_free_blocks;
390 }
391 res =
392 reiserfs_insert_item(th, &path,
393 &key, &ins_ih,
394 inode,
395 (char *)zeros);
396 } else {
397 reiserfs_panic(inode->i_sb,
398 "green-9011: Unexpected key type %K\n",
399 &key);
400 }
401 if (res) {
402 kfree(zeros);
403 goto error_exit_free_blocks;
404 }
405 /* Now we want to check if transaction is too full, and if it is
406 we restart it. This will also free the path. */
407 if (journal_transaction_should_end
408 (th, th->t_blocks_allocated)) {
409 res =
410 restart_transaction(th, inode,
411 &path);
412 if (res) {
413 pathrelse(&path);
414 kfree(zeros);
415 goto error_exit;
416 }
417 }
418
419 /* Well, need to recalculate path and stuff */
420 set_cpu_key_k_offset(&key,
421 cpu_key_k_offset(&key) +
422 (to_paste << inode->
423 i_blkbits));
424 res =
425 search_for_position_by_key(inode->i_sb,
426 &key, &path);
427 if (res == IO_ERROR) {
428 res = -EIO;
429 kfree(zeros);
430 goto error_exit_free_blocks;
431 }
432 bh = get_last_bh(&path);
433 ih = get_ih(&path);
434 item = get_item(&path);
435 hole_size -= to_paste;
436 } while (hole_size);
437 kfree(zeros);
357 } 438 }
358 if ( res ) { 439 }
359 kfree(zeros); 440 // Go through existing indirect items first
360 goto error_exit_free_blocks; 441 // replace all zeroes with blocknumbers from list
442 // Note that if no corresponding item was found, by previous search,
443 // it means there are no existing in-tree representation for file area
444 // we are going to overwrite, so there is nothing to scan through for holes.
445 for (curr_block = 0, itempos = path.pos_in_item;
446 curr_block < blocks_to_allocate && res == POSITION_FOUND;) {
447 retry:
448
449 if (itempos >= ih_item_len(ih) / UNFM_P_SIZE) {
450 /* We run out of data in this indirect item, let's look for another
451 one. */
452 /* First if we are already modifying current item, log it */
453 if (modifying_this_item) {
454 journal_mark_dirty(th, inode->i_sb, bh);
455 modifying_this_item = 0;
456 }
457 /* Then set the key to look for a new indirect item (offset of old
458 item is added to old item length */
459 set_cpu_key_k_offset(&key,
460 le_key_k_offset
461 (get_inode_item_key_version(inode),
462 &(ih->ih_key)) +
463 op_bytes_number(ih,
464 inode->i_sb->
465 s_blocksize));
466 /* Search ofor position of new key in the tree. */
467 res =
468 search_for_position_by_key(inode->i_sb, &key,
469 &path);
470 if (res == IO_ERROR) {
471 res = -EIO;
472 goto error_exit_free_blocks;
473 }
474 bh = get_last_bh(&path);
475 ih = get_ih(&path);
476 item = get_item(&path);
477 itempos = path.pos_in_item;
478 continue; // loop to check all kinds of conditions and so on.
361 } 479 }
362 /* Now we want to check if transaction is too full, and if it is 480 /* Ok, we have correct position in item now, so let's see if it is
363 we restart it. This will also free the path. */ 481 representing file hole (blocknumber is zero) and fill it if needed */
364 if (journal_transaction_should_end(th, th->t_blocks_allocated)) { 482 if (!item[itempos]) {
365 res = restart_transaction(th, inode, &path); 483 /* Ok, a hole. Now we need to check if we already prepared this
366 if (res) { 484 block to be journaled */
367 pathrelse (&path); 485 while (!modifying_this_item) { // loop until succeed
368 kfree(zeros); 486 /* Well, this item is not journaled yet, so we must prepare
369 goto error_exit; 487 it for journal first, before we can change it */
370 } 488 struct item_head tmp_ih; // We copy item head of found item,
371 } 489 // here to detect if fs changed under
372 490 // us while we were preparing for
373 /* Well, need to recalculate path and stuff */ 491 // journal.
374 set_cpu_key_k_offset( &key, cpu_key_k_offset(&key) + (to_paste << inode->i_blkbits)); 492 int fs_gen; // We store fs generation here to find if someone
375 res = search_for_position_by_key(inode->i_sb, &key, &path); 493 // changes fs under our feet
376 if ( res == IO_ERROR ) { 494
377 res = -EIO; 495 copy_item_head(&tmp_ih, ih); // Remember itemhead
378 kfree(zeros); 496 fs_gen = get_generation(inode->i_sb); // remember fs generation
379 goto error_exit_free_blocks; 497 reiserfs_prepare_for_journal(inode->i_sb, bh, 1); // Prepare a buffer within which indirect item is stored for changing.
498 if (fs_changed(fs_gen, inode->i_sb)
499 && item_moved(&tmp_ih, &path)) {
500 // Sigh, fs was changed under us, we need to look for new
501 // location of item we are working with
502
503 /* unmark prepaerd area as journaled and search for it's
504 new position */
505 reiserfs_restore_prepared_buffer(inode->
506 i_sb,
507 bh);
508 res =
509 search_for_position_by_key(inode->
510 i_sb,
511 &key,
512 &path);
513 if (res == IO_ERROR) {
514 res = -EIO;
515 goto error_exit_free_blocks;
516 }
517 bh = get_last_bh(&path);
518 ih = get_ih(&path);
519 item = get_item(&path);
520 itempos = path.pos_in_item;
521 goto retry;
522 }
523 modifying_this_item = 1;
524 }
525 item[itempos] = allocated_blocks[curr_block]; // Assign new block
526 curr_block++;
380 } 527 }
381 bh=get_last_bh(&path); 528 itempos++;
382 ih=get_ih(&path);
383 item = get_item(&path);
384 hole_size -= to_paste;
385 } while ( hole_size );
386 kfree(zeros);
387 } 529 }
388 } 530
389 531 if (modifying_this_item) { // We need to log last-accessed block, if it
390 // Go through existing indirect items first 532 // was modified, but not logged yet.
391 // replace all zeroes with blocknumbers from list 533 journal_mark_dirty(th, inode->i_sb, bh);
392 // Note that if no corresponding item was found, by previous search,
393 // it means there are no existing in-tree representation for file area
394 // we are going to overwrite, so there is nothing to scan through for holes.
395 for ( curr_block = 0, itempos = path.pos_in_item ; curr_block < blocks_to_allocate && res == POSITION_FOUND ; ) {
396retry:
397
398 if ( itempos >= ih_item_len(ih)/UNFM_P_SIZE ) {
399 /* We run out of data in this indirect item, let's look for another
400 one. */
401 /* First if we are already modifying current item, log it */
402 if ( modifying_this_item ) {
403 journal_mark_dirty (th, inode->i_sb, bh);
404 modifying_this_item = 0;
405 }
406 /* Then set the key to look for a new indirect item (offset of old
407 item is added to old item length */
408 set_cpu_key_k_offset( &key, le_key_k_offset( get_inode_item_key_version(inode), &(ih->ih_key)) + op_bytes_number(ih, inode->i_sb->s_blocksize));
409 /* Search ofor position of new key in the tree. */
410 res = search_for_position_by_key(inode->i_sb, &key, &path);
411 if ( res == IO_ERROR) {
412 res = -EIO;
413 goto error_exit_free_blocks;
414 }
415 bh=get_last_bh(&path);
416 ih=get_ih(&path);
417 item = get_item(&path);
418 itempos = path.pos_in_item;
419 continue; // loop to check all kinds of conditions and so on.
420 } 534 }
421 /* Ok, we have correct position in item now, so let's see if it is 535
422 representing file hole (blocknumber is zero) and fill it if needed */ 536 if (curr_block < blocks_to_allocate) {
423 if ( !item[itempos] ) { 537 // Oh, well need to append to indirect item, or to create indirect item
424 /* Ok, a hole. Now we need to check if we already prepared this 538 // if there weren't any
425 block to be journaled */ 539 if (is_indirect_le_ih(ih)) {
426 while ( !modifying_this_item ) { // loop until succeed 540 // Existing indirect item - append. First calculate key for append
427 /* Well, this item is not journaled yet, so we must prepare 541 // position. We do not need to recalculate path as it should
428 it for journal first, before we can change it */ 542 // already point to correct place.
429 struct item_head tmp_ih; // We copy item head of found item, 543 make_cpu_key(&key, inode,
430 // here to detect if fs changed under 544 le_key_k_offset(get_inode_item_key_version
431 // us while we were preparing for 545 (inode),
432 // journal. 546 &(ih->ih_key)) +
433 int fs_gen; // We store fs generation here to find if someone 547 op_bytes_number(ih,
434 // changes fs under our feet 548 inode->i_sb->s_blocksize),
435 549 TYPE_INDIRECT, 3);
436 copy_item_head (&tmp_ih, ih); // Remember itemhead 550 res =
437 fs_gen = get_generation (inode->i_sb); // remember fs generation 551 reiserfs_paste_into_item(th, &path, &key, inode,
438 reiserfs_prepare_for_journal(inode->i_sb, bh, 1); // Prepare a buffer within which indirect item is stored for changing. 552 (char *)(allocated_blocks +
439 if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) { 553 curr_block),
440 // Sigh, fs was changed under us, we need to look for new 554 UNFM_P_SIZE *
441 // location of item we are working with 555 (blocks_to_allocate -
442 556 curr_block));
443 /* unmark prepaerd area as journaled and search for it's 557 if (res) {
444 new position */ 558 goto error_exit_free_blocks;
445 reiserfs_restore_prepared_buffer(inode->i_sb, bh); 559 }
446 res = search_for_position_by_key(inode->i_sb, &key, &path); 560 } else if (is_statdata_le_ih(ih)) {
447 if ( res == IO_ERROR) { 561 // Last found item was statdata. That means we need to create indirect item.
448 res = -EIO; 562 struct item_head ins_ih; /* itemhead for new item */
449 goto error_exit_free_blocks; 563
450 } 564 /* create a key for our new item */
451 bh=get_last_bh(&path); 565 make_cpu_key(&key, inode, 1, TYPE_INDIRECT, 3); // Position one,
452 ih=get_ih(&path); 566 // because that's
453 item = get_item(&path); 567 // where first
454 itempos = path.pos_in_item; 568 // indirect item
455 goto retry; 569 // begins
570 /* Create new item head for our new item */
571 make_le_item_head(&ins_ih, &key, key.version, 1,
572 TYPE_INDIRECT,
573 (blocks_to_allocate -
574 curr_block) * UNFM_P_SIZE,
575 0 /* free space */ );
576 /* Find where such item should live in the tree */
577 res = search_item(inode->i_sb, &key, &path);
578 if (res != ITEM_NOT_FOUND) {
579 /* Well, if we have found such item already, or some error
580 occured, we need to warn user and return error */
581 if (res != -ENOSPC) {
582 reiserfs_warning(inode->i_sb,
583 "green-9009: search_by_key (%K) "
584 "returned %d", &key,
585 res);
586 }
587 res = -EIO;
588 goto error_exit_free_blocks;
589 }
590 /* Insert item into the tree with the data as its body */
591 res =
592 reiserfs_insert_item(th, &path, &key, &ins_ih,
593 inode,
594 (char *)(allocated_blocks +
595 curr_block));
596 } else {
597 reiserfs_panic(inode->i_sb,
598 "green-9010: unexpected item type for key %K\n",
599 &key);
456 } 600 }
457 modifying_this_item = 1;
458 }
459 item[itempos] = allocated_blocks[curr_block]; // Assign new block
460 curr_block++;
461 } 601 }
462 itempos++; 602 // the caller is responsible for closing the transaction
463 } 603 // unless we return an error, they are also responsible for logging
464 604 // the inode.
465 if ( modifying_this_item ) { // We need to log last-accessed block, if it 605 //
466 // was modified, but not logged yet. 606 pathrelse(&path);
467 journal_mark_dirty (th, inode->i_sb, bh); 607 /*
468 } 608 * cleanup prellocation from previous writes
469 609 * if this is a partial block write
470 if ( curr_block < blocks_to_allocate ) { 610 */
471 // Oh, well need to append to indirect item, or to create indirect item 611 if (write_bytes & (inode->i_sb->s_blocksize - 1))
472 // if there weren't any 612 reiserfs_discard_prealloc(th, inode);
473 if ( is_indirect_le_ih(ih) ) { 613 reiserfs_write_unlock(inode->i_sb);
474 // Existing indirect item - append. First calculate key for append 614
475 // position. We do not need to recalculate path as it should 615 // go through all the pages/buffers and map the buffers to newly allocated
476 // already point to correct place. 616 // blocks (so that system knows where to write these pages later).
477 make_cpu_key( &key, inode, le_key_k_offset( get_inode_item_key_version(inode), &(ih->ih_key)) + op_bytes_number(ih, inode->i_sb->s_blocksize), TYPE_INDIRECT, 3); 617 curr_block = 0;
478 res = reiserfs_paste_into_item( th, &path, &key, inode, (char *)(allocated_blocks+curr_block), UNFM_P_SIZE*(blocks_to_allocate-curr_block)); 618 for (i = 0; i < num_pages; i++) {
479 if ( res ) { 619 struct page *page = prepared_pages[i]; //current page
480 goto error_exit_free_blocks; 620 struct buffer_head *head = page_buffers(page); // first buffer for a page
481 } 621 int block_start, block_end; // in-page offsets for buffers.
482 } else if (is_statdata_le_ih(ih) ) { 622
483 // Last found item was statdata. That means we need to create indirect item. 623 if (!page_buffers(page))
484 struct item_head ins_ih; /* itemhead for new item */ 624 reiserfs_panic(inode->i_sb,
485 625 "green-9005: No buffers for prepared page???");
486 /* create a key for our new item */ 626
487 make_cpu_key( &key, inode, 1, TYPE_INDIRECT, 3); // Position one, 627 /* For each buffer in page */
488 // because that's 628 for (bh = head, block_start = 0; bh != head || !block_start;
489 // where first 629 block_start = block_end, bh = bh->b_this_page) {
490 // indirect item 630 if (!bh)
491 // begins 631 reiserfs_panic(inode->i_sb,
492 /* Create new item head for our new item */ 632 "green-9006: Allocated but absent buffer for a page?");
493 make_le_item_head (&ins_ih, &key, key.version, 1, TYPE_INDIRECT, 633 block_end = block_start + inode->i_sb->s_blocksize;
494 (blocks_to_allocate-curr_block)*UNFM_P_SIZE, 634 if (i == 0 && block_end <= from)
495 0 /* free space */); 635 /* if this buffer is before requested data to map, skip it */
496 /* Find where such item should live in the tree */ 636 continue;
497 res = search_item (inode->i_sb, &key, &path); 637 if (i == num_pages - 1 && block_start >= to)
498 if ( res != ITEM_NOT_FOUND ) { 638 /* If this buffer is after requested data to map, abort
499 /* Well, if we have found such item already, or some error 639 processing of current page */
500 occured, we need to warn user and return error */ 640 break;
501 if ( res != -ENOSPC ) { 641
502 reiserfs_warning (inode->i_sb, 642 if (!buffer_mapped(bh)) { // Ok, unmapped buffer, need to map it
503 "green-9009: search_by_key (%K) " 643 map_bh(bh, inode->i_sb,
504 "returned %d", &key, res); 644 le32_to_cpu(allocated_blocks
645 [curr_block]));
646 curr_block++;
647 set_buffer_new(bh);
648 }
505 } 649 }
506 res = -EIO;
507 goto error_exit_free_blocks;
508 }
509 /* Insert item into the tree with the data as its body */
510 res = reiserfs_insert_item( th, &path, &key, &ins_ih, inode, (char *)(allocated_blocks+curr_block));
511 } else {
512 reiserfs_panic(inode->i_sb, "green-9010: unexpected item type for key %K\n",&key);
513 }
514 }
515
516 // the caller is responsible for closing the transaction
517 // unless we return an error, they are also responsible for logging
518 // the inode.
519 //
520 pathrelse(&path);
521 /*
522 * cleanup prellocation from previous writes
523 * if this is a partial block write
524 */
525 if (write_bytes & (inode->i_sb->s_blocksize -1))
526 reiserfs_discard_prealloc(th, inode);
527 reiserfs_write_unlock(inode->i_sb);
528
529 // go through all the pages/buffers and map the buffers to newly allocated
530 // blocks (so that system knows where to write these pages later).
531 curr_block = 0;
532 for ( i = 0; i < num_pages ; i++ ) {
533 struct page *page=prepared_pages[i]; //current page
534 struct buffer_head *head = page_buffers(page);// first buffer for a page
535 int block_start, block_end; // in-page offsets for buffers.
536
537 if (!page_buffers(page))
538 reiserfs_panic(inode->i_sb, "green-9005: No buffers for prepared page???");
539
540 /* For each buffer in page */
541 for(bh = head, block_start = 0; bh != head || !block_start;
542 block_start=block_end, bh = bh->b_this_page) {
543 if (!bh)
544 reiserfs_panic(inode->i_sb, "green-9006: Allocated but absent buffer for a page?");
545 block_end = block_start+inode->i_sb->s_blocksize;
546 if (i == 0 && block_end <= from )
547 /* if this buffer is before requested data to map, skip it */
548 continue;
549 if (i == num_pages - 1 && block_start >= to)
550 /* If this buffer is after requested data to map, abort
551 processing of current page */
552 break;
553
554 if ( !buffer_mapped(bh) ) { // Ok, unmapped buffer, need to map it
555 map_bh( bh, inode->i_sb, le32_to_cpu(allocated_blocks[curr_block]));
556 curr_block++;
557 set_buffer_new(bh);
558 }
559 } 650 }
560 }
561 651
562 RFALSE( curr_block > blocks_to_allocate, "green-9007: Used too many blocks? weird"); 652 RFALSE(curr_block > blocks_to_allocate,
653 "green-9007: Used too many blocks? weird");
563 654
564 kfree(allocated_blocks); 655 kfree(allocated_blocks);
565 return 0; 656 return 0;
566 657
567// Need to deal with transaction here. 658// Need to deal with transaction here.
568error_exit_free_blocks: 659 error_exit_free_blocks:
569 pathrelse(&path); 660 pathrelse(&path);
570 // free blocks 661 // free blocks
571 for( i = 0; i < blocks_to_allocate; i++ ) 662 for (i = 0; i < blocks_to_allocate; i++)
572 reiserfs_free_block(th, inode, le32_to_cpu(allocated_blocks[i]), 1); 663 reiserfs_free_block(th, inode, le32_to_cpu(allocated_blocks[i]),
573 664 1);
574error_exit: 665
575 if (th->t_trans_id) { 666 error_exit:
576 int err; 667 if (th->t_trans_id) {
577 // update any changes we made to blk count 668 int err;
578 reiserfs_update_sd(th, inode); 669 // update any changes we made to blk count
579 err = journal_end(th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS); 670 reiserfs_update_sd(th, inode);
580 if (err) 671 err =
581 res = err; 672 journal_end(th, inode->i_sb,
582 } 673 JOURNAL_PER_BALANCE_CNT * 3 + 1 +
583 reiserfs_write_unlock(inode->i_sb); 674 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb));
584 kfree(allocated_blocks); 675 if (err)
585 676 res = err;
586 return res; 677 }
678 reiserfs_write_unlock(inode->i_sb);
679 kfree(allocated_blocks);
680
681 return res;
587} 682}
588 683
589/* Unlock pages prepared by reiserfs_prepare_file_region_for_write */ 684/* Unlock pages prepared by reiserfs_prepare_file_region_for_write */
590static void reiserfs_unprepare_pages(struct page **prepared_pages, /* list of locked pages */ 685static void reiserfs_unprepare_pages(struct page **prepared_pages, /* list of locked pages */
591 size_t num_pages /* amount of pages */) { 686 size_t num_pages /* amount of pages */ )
592 int i; // loop counter 687{
688 int i; // loop counter
593 689
594 for (i=0; i < num_pages ; i++) { 690 for (i = 0; i < num_pages; i++) {
595 struct page *page = prepared_pages[i]; 691 struct page *page = prepared_pages[i];
596 692
597 try_to_free_buffers(page); 693 try_to_free_buffers(page);
598 unlock_page(page); 694 unlock_page(page);
599 page_cache_release(page); 695 page_cache_release(page);
600 } 696 }
601} 697}
602 698
603/* This function will copy data from userspace to specified pages within 699/* This function will copy data from userspace to specified pages within
604 supplied byte range */ 700 supplied byte range */
605static int reiserfs_copy_from_user_to_file_region( 701static int reiserfs_copy_from_user_to_file_region(loff_t pos, /* In-file position */
606 loff_t pos, /* In-file position */ 702 int num_pages, /* Number of pages affected */
607 int num_pages, /* Number of pages affected */ 703 int write_bytes, /* Amount of bytes to write */
608 int write_bytes, /* Amount of bytes to write */ 704 struct page **prepared_pages, /* pointer to
609 struct page **prepared_pages, /* pointer to 705 array to
610 array to 706 prepared pages
611 prepared pages 707 */
612 */ 708 const char __user * buf /* Pointer to user-supplied
613 const char __user *buf /* Pointer to user-supplied 709 data */
614 data*/ 710 )
615 )
616{ 711{
617 long page_fault=0; // status of copy_from_user. 712 long page_fault = 0; // status of copy_from_user.
618 int i; // loop counter. 713 int i; // loop counter.
619 int offset; // offset in page 714 int offset; // offset in page
620 715
621 for ( i = 0, offset = (pos & (PAGE_CACHE_SIZE-1)); i < num_pages ; i++,offset=0) { 716 for (i = 0, offset = (pos & (PAGE_CACHE_SIZE - 1)); i < num_pages;
622 size_t count = min_t(size_t,PAGE_CACHE_SIZE-offset,write_bytes); // How much of bytes to write to this page 717 i++, offset = 0) {
623 struct page *page=prepared_pages[i]; // Current page we process. 718 size_t count = min_t(size_t, PAGE_CACHE_SIZE - offset, write_bytes); // How much of bytes to write to this page
624 719 struct page *page = prepared_pages[i]; // Current page we process.
625 fault_in_pages_readable( buf, count); 720
626 721 fault_in_pages_readable(buf, count);
627 /* Copy data from userspace to the current page */ 722
628 kmap(page); 723 /* Copy data from userspace to the current page */
629 page_fault = __copy_from_user(page_address(page)+offset, buf, count); // Copy the data. 724 kmap(page);
630 /* Flush processor's dcache for this page */ 725 page_fault = __copy_from_user(page_address(page) + offset, buf, count); // Copy the data.
631 flush_dcache_page(page); 726 /* Flush processor's dcache for this page */
632 kunmap(page); 727 flush_dcache_page(page);
633 buf+=count; 728 kunmap(page);
634 write_bytes-=count; 729 buf += count;
635 730 write_bytes -= count;
636 if (page_fault) 731
637 break; // Was there a fault? abort. 732 if (page_fault)
638 } 733 break; // Was there a fault? abort.
639 734 }
640 return page_fault?-EFAULT:0; 735
736 return page_fault ? -EFAULT : 0;
641} 737}
642 738
643/* taken fs/buffer.c:__block_commit_write */ 739/* taken fs/buffer.c:__block_commit_write */
644int reiserfs_commit_page(struct inode *inode, struct page *page, 740int reiserfs_commit_page(struct inode *inode, struct page *page,
645 unsigned from, unsigned to) 741 unsigned from, unsigned to)
646{ 742{
647 unsigned block_start, block_end; 743 unsigned block_start, block_end;
648 int partial = 0; 744 int partial = 0;
649 unsigned blocksize; 745 unsigned blocksize;
650 struct buffer_head *bh, *head; 746 struct buffer_head *bh, *head;
651 unsigned long i_size_index = inode->i_size >> PAGE_CACHE_SHIFT; 747 unsigned long i_size_index = inode->i_size >> PAGE_CACHE_SHIFT;
652 int new; 748 int new;
653 int logit = reiserfs_file_data_log(inode); 749 int logit = reiserfs_file_data_log(inode);
654 struct super_block *s = inode->i_sb; 750 struct super_block *s = inode->i_sb;
655 int bh_per_page = PAGE_CACHE_SIZE / s->s_blocksize; 751 int bh_per_page = PAGE_CACHE_SIZE / s->s_blocksize;
656 struct reiserfs_transaction_handle th; 752 struct reiserfs_transaction_handle th;
657 int ret = 0; 753 int ret = 0;
658 754
659 th.t_trans_id = 0; 755 th.t_trans_id = 0;
660 blocksize = 1 << inode->i_blkbits; 756 blocksize = 1 << inode->i_blkbits;
661 757
662 if (logit) { 758 if (logit) {
663 reiserfs_write_lock(s); 759 reiserfs_write_lock(s);
664 ret = journal_begin(&th, s, bh_per_page + 1); 760 ret = journal_begin(&th, s, bh_per_page + 1);
665 if (ret) 761 if (ret)
666 goto drop_write_lock; 762 goto drop_write_lock;
667 reiserfs_update_inode_transaction(inode); 763 reiserfs_update_inode_transaction(inode);
668 } 764 }
669 for(bh = head = page_buffers(page), block_start = 0; 765 for (bh = head = page_buffers(page), block_start = 0;
670 bh != head || !block_start; 766 bh != head || !block_start;
671 block_start=block_end, bh = bh->b_this_page) 767 block_start = block_end, bh = bh->b_this_page) {
672 { 768
673 769 new = buffer_new(bh);
674 new = buffer_new(bh); 770 clear_buffer_new(bh);
675 clear_buffer_new(bh); 771 block_end = block_start + blocksize;
676 block_end = block_start + blocksize; 772 if (block_end <= from || block_start >= to) {
677 if (block_end <= from || block_start >= to) { 773 if (!buffer_uptodate(bh))
678 if (!buffer_uptodate(bh)) 774 partial = 1;
679 partial = 1; 775 } else {
680 } else { 776 set_buffer_uptodate(bh);
681 set_buffer_uptodate(bh); 777 if (logit) {
682 if (logit) { 778 reiserfs_prepare_for_journal(s, bh, 1);
683 reiserfs_prepare_for_journal(s, bh, 1); 779 journal_mark_dirty(&th, s, bh);
684 journal_mark_dirty(&th, s, bh); 780 } else if (!buffer_dirty(bh)) {
685 } else if (!buffer_dirty(bh)) { 781 mark_buffer_dirty(bh);
686 mark_buffer_dirty(bh); 782 /* do data=ordered on any page past the end
687 /* do data=ordered on any page past the end 783 * of file and any buffer marked BH_New.
688 * of file and any buffer marked BH_New. 784 */
689 */ 785 if (reiserfs_data_ordered(inode->i_sb) &&
690 if (reiserfs_data_ordered(inode->i_sb) && 786 (new || page->index >= i_size_index)) {
691 (new || page->index >= i_size_index)) { 787 reiserfs_add_ordered_list(inode, bh);
692 reiserfs_add_ordered_list(inode, bh); 788 }
693 } 789 }
694 } 790 }
695 } 791 }
696 } 792 if (logit) {
697 if (logit) { 793 ret = journal_end(&th, s, bh_per_page + 1);
698 ret = journal_end(&th, s, bh_per_page + 1); 794 drop_write_lock:
699drop_write_lock: 795 reiserfs_write_unlock(s);
700 reiserfs_write_unlock(s); 796 }
701 } 797 /*
702 /* 798 * If this is a partial write which happened to make all buffers
703 * If this is a partial write which happened to make all buffers 799 * uptodate then we can optimize away a bogus readpage() for
704 * uptodate then we can optimize away a bogus readpage() for 800 * the next read(). Here we 'discover' whether the page went
705 * the next read(). Here we 'discover' whether the page went 801 * uptodate as a result of this (potentially partial) write.
706 * uptodate as a result of this (potentially partial) write. 802 */
707 */ 803 if (!partial)
708 if (!partial) 804 SetPageUptodate(page);
709 SetPageUptodate(page); 805 return ret;
710 return ret;
711} 806}
712 807
713
714/* Submit pages for write. This was separated from actual file copying 808/* Submit pages for write. This was separated from actual file copying
715 because we might want to allocate block numbers in-between. 809 because we might want to allocate block numbers in-between.
716 This function assumes that caller will adjust file size to correct value. */ 810 This function assumes that caller will adjust file size to correct value. */
717static int reiserfs_submit_file_region_for_write( 811static int reiserfs_submit_file_region_for_write(struct reiserfs_transaction_handle *th, struct inode *inode, loff_t pos, /* Writing position offset */
718 struct reiserfs_transaction_handle *th, 812 size_t num_pages, /* Number of pages to write */
719 struct inode *inode, 813 size_t write_bytes, /* number of bytes to write */
720 loff_t pos, /* Writing position offset */ 814 struct page **prepared_pages /* list of pages */
721 size_t num_pages, /* Number of pages to write */ 815 )
722 size_t write_bytes, /* number of bytes to write */
723 struct page **prepared_pages /* list of pages */
724 )
725{ 816{
726 int status; // return status of block_commit_write. 817 int status; // return status of block_commit_write.
727 int retval = 0; // Return value we are going to return. 818 int retval = 0; // Return value we are going to return.
728 int i; // loop counter 819 int i; // loop counter
729 int offset; // Writing offset in page. 820 int offset; // Writing offset in page.
730 int orig_write_bytes = write_bytes; 821 int orig_write_bytes = write_bytes;
731 int sd_update = 0; 822 int sd_update = 0;
732 823
733 for ( i = 0, offset = (pos & (PAGE_CACHE_SIZE-1)); i < num_pages ; i++,offset=0) { 824 for (i = 0, offset = (pos & (PAGE_CACHE_SIZE - 1)); i < num_pages;
734 int count = min_t(int,PAGE_CACHE_SIZE-offset,write_bytes); // How much of bytes to write to this page 825 i++, offset = 0) {
735 struct page *page=prepared_pages[i]; // Current page we process. 826 int count = min_t(int, PAGE_CACHE_SIZE - offset, write_bytes); // How much of bytes to write to this page
736 827 struct page *page = prepared_pages[i]; // Current page we process.
737 status = reiserfs_commit_page(inode, page, offset, offset+count); 828
738 if ( status ) 829 status =
739 retval = status; // To not overcomplicate matters We are going to 830 reiserfs_commit_page(inode, page, offset, offset + count);
740 // submit all the pages even if there was error. 831 if (status)
741 // we only remember error status to report it on 832 retval = status; // To not overcomplicate matters We are going to
742 // exit. 833 // submit all the pages even if there was error.
743 write_bytes-=count; 834 // we only remember error status to report it on
744 } 835 // exit.
745 /* now that we've gotten all the ordered buffers marked dirty, 836 write_bytes -= count;
746 * we can safely update i_size and close any running transaction 837 }
747 */ 838 /* now that we've gotten all the ordered buffers marked dirty,
748 if ( pos + orig_write_bytes > inode->i_size) { 839 * we can safely update i_size and close any running transaction
749 inode->i_size = pos + orig_write_bytes; // Set new size 840 */
750 /* If the file have grown so much that tail packing is no 841 if (pos + orig_write_bytes > inode->i_size) {
751 * longer possible, reset "need to pack" flag */ 842 inode->i_size = pos + orig_write_bytes; // Set new size
752 if ( (have_large_tails (inode->i_sb) && 843 /* If the file have grown so much that tail packing is no
753 inode->i_size > i_block_size (inode)*4) || 844 * longer possible, reset "need to pack" flag */
754 (have_small_tails (inode->i_sb) && 845 if ((have_large_tails(inode->i_sb) &&
755 inode->i_size > i_block_size(inode)) ) 846 inode->i_size > i_block_size(inode) * 4) ||
756 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask ; 847 (have_small_tails(inode->i_sb) &&
757 else if ( (have_large_tails (inode->i_sb) && 848 inode->i_size > i_block_size(inode)))
758 inode->i_size < i_block_size (inode)*4) || 849 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
759 (have_small_tails (inode->i_sb) && 850 else if ((have_large_tails(inode->i_sb) &&
760 inode->i_size < i_block_size(inode)) ) 851 inode->i_size < i_block_size(inode) * 4) ||
761 REISERFS_I(inode)->i_flags |= i_pack_on_close_mask ; 852 (have_small_tails(inode->i_sb) &&
762 853 inode->i_size < i_block_size(inode)))
854 REISERFS_I(inode)->i_flags |= i_pack_on_close_mask;
855
856 if (th->t_trans_id) {
857 reiserfs_write_lock(inode->i_sb);
858 reiserfs_update_sd(th, inode); // And update on-disk metadata
859 reiserfs_write_unlock(inode->i_sb);
860 } else
861 inode->i_sb->s_op->dirty_inode(inode);
862
863 sd_update = 1;
864 }
763 if (th->t_trans_id) { 865 if (th->t_trans_id) {
764 reiserfs_write_lock(inode->i_sb); 866 reiserfs_write_lock(inode->i_sb);
765 reiserfs_update_sd(th, inode); // And update on-disk metadata 867 if (!sd_update)
766 reiserfs_write_unlock(inode->i_sb); 868 reiserfs_update_sd(th, inode);
767 } else 869 status = journal_end(th, th->t_super, th->t_blocks_allocated);
768 inode->i_sb->s_op->dirty_inode(inode); 870 if (status)
871 retval = status;
872 reiserfs_write_unlock(inode->i_sb);
873 }
874 th->t_trans_id = 0;
769 875
770 sd_update = 1; 876 /*
771 } 877 * we have to unlock the pages after updating i_size, otherwise
772 if (th->t_trans_id) { 878 * we race with writepage
773 reiserfs_write_lock(inode->i_sb); 879 */
774 if (!sd_update) 880 for (i = 0; i < num_pages; i++) {
775 reiserfs_update_sd(th, inode); 881 struct page *page = prepared_pages[i];
776 status = journal_end(th, th->t_super, th->t_blocks_allocated); 882 unlock_page(page);
777 if (status) 883 mark_page_accessed(page);
778 retval = status; 884 page_cache_release(page);
779 reiserfs_write_unlock(inode->i_sb); 885 }
780 } 886 return retval;
781 th->t_trans_id = 0;
782
783 /*
784 * we have to unlock the pages after updating i_size, otherwise
785 * we race with writepage
786 */
787 for ( i = 0; i < num_pages ; i++) {
788 struct page *page=prepared_pages[i];
789 unlock_page(page);
790 mark_page_accessed(page);
791 page_cache_release(page);
792 }
793 return retval;
794} 887}
795 888
796/* Look if passed writing region is going to touch file's tail 889/* Look if passed writing region is going to touch file's tail
797 (if it is present). And if it is, convert the tail to unformatted node */ 890 (if it is present). And if it is, convert the tail to unformatted node */
798static int reiserfs_check_for_tail_and_convert( struct inode *inode, /* inode to deal with */ 891static int reiserfs_check_for_tail_and_convert(struct inode *inode, /* inode to deal with */
799 loff_t pos, /* Writing position */ 892 loff_t pos, /* Writing position */
800 int write_bytes /* amount of bytes to write */ 893 int write_bytes /* amount of bytes to write */
801 ) 894 )
802{ 895{
803 INITIALIZE_PATH(path); // needed for search_for_position 896 INITIALIZE_PATH(path); // needed for search_for_position
804 struct cpu_key key; // Key that would represent last touched writing byte. 897 struct cpu_key key; // Key that would represent last touched writing byte.
805 struct item_head *ih; // item header of found block; 898 struct item_head *ih; // item header of found block;
806 int res; // Return value of various functions we call. 899 int res; // Return value of various functions we call.
807 int cont_expand_offset; // We will put offset for generic_cont_expand here 900 int cont_expand_offset; // We will put offset for generic_cont_expand here
808 // This can be int just because tails are created 901 // This can be int just because tails are created
809 // only for small files. 902 // only for small files.
810 903
811/* this embodies a dependency on a particular tail policy */ 904/* this embodies a dependency on a particular tail policy */
812 if ( inode->i_size >= inode->i_sb->s_blocksize*4 ) { 905 if (inode->i_size >= inode->i_sb->s_blocksize * 4) {
813 /* such a big files do not have tails, so we won't bother ourselves 906 /* such a big files do not have tails, so we won't bother ourselves
814 to look for tails, simply return */ 907 to look for tails, simply return */
815 return 0; 908 return 0;
816 } 909 }
817
818 reiserfs_write_lock(inode->i_sb);
819 /* find the item containing the last byte to be written, or if
820 * writing past the end of the file then the last item of the
821 * file (and then we check its type). */
822 make_cpu_key (&key, inode, pos+write_bytes+1, TYPE_ANY, 3/*key length*/);
823 res = search_for_position_by_key(inode->i_sb, &key, &path);
824 if ( res == IO_ERROR ) {
825 reiserfs_write_unlock(inode->i_sb);
826 return -EIO;
827 }
828 ih = get_ih(&path);
829 res = 0;
830 if ( is_direct_le_ih(ih) ) {
831 /* Ok, closest item is file tail (tails are stored in "direct"
832 * items), so we need to unpack it. */
833 /* To not overcomplicate matters, we just call generic_cont_expand
834 which will in turn call other stuff and finally will boil down to
835 reiserfs_get_block() that would do necessary conversion. */
836 cont_expand_offset = le_key_k_offset(get_inode_item_key_version(inode), &(ih->ih_key));
837 pathrelse(&path);
838 res = generic_cont_expand( inode, cont_expand_offset);
839 } else
840 pathrelse(&path);
841 910
842 reiserfs_write_unlock(inode->i_sb); 911 reiserfs_write_lock(inode->i_sb);
843 return res; 912 /* find the item containing the last byte to be written, or if
913 * writing past the end of the file then the last item of the
914 * file (and then we check its type). */
915 make_cpu_key(&key, inode, pos + write_bytes + 1, TYPE_ANY,
916 3 /*key length */ );
917 res = search_for_position_by_key(inode->i_sb, &key, &path);
918 if (res == IO_ERROR) {
919 reiserfs_write_unlock(inode->i_sb);
920 return -EIO;
921 }
922 ih = get_ih(&path);
923 res = 0;
924 if (is_direct_le_ih(ih)) {
925 /* Ok, closest item is file tail (tails are stored in "direct"
926 * items), so we need to unpack it. */
927 /* To not overcomplicate matters, we just call generic_cont_expand
928 which will in turn call other stuff and finally will boil down to
929 reiserfs_get_block() that would do necessary conversion. */
930 cont_expand_offset =
931 le_key_k_offset(get_inode_item_key_version(inode),
932 &(ih->ih_key));
933 pathrelse(&path);
934 res = generic_cont_expand(inode, cont_expand_offset);
935 } else
936 pathrelse(&path);
937
938 reiserfs_write_unlock(inode->i_sb);
939 return res;
844} 940}
845 941
846/* This function locks pages starting from @pos for @inode. 942/* This function locks pages starting from @pos for @inode.
@@ -851,275 +947,296 @@ static int reiserfs_check_for_tail_and_convert( struct inode *inode, /* inode to
851 append), it is zeroed, then. 947 append), it is zeroed, then.
852 Returns number of unallocated blocks that should be allocated to cover 948 Returns number of unallocated blocks that should be allocated to cover
853 new file data.*/ 949 new file data.*/
854static int reiserfs_prepare_file_region_for_write( 950static int reiserfs_prepare_file_region_for_write(struct inode *inode
855 struct inode *inode /* Inode of the file */, 951 /* Inode of the file */ ,
856 loff_t pos, /* position in the file */ 952 loff_t pos, /* position in the file */
857 size_t num_pages, /* number of pages to 953 size_t num_pages, /* number of pages to
858 prepare */ 954 prepare */
859 size_t write_bytes, /* Amount of bytes to be 955 size_t write_bytes, /* Amount of bytes to be
860 overwritten from 956 overwritten from
861 @pos */ 957 @pos */
862 struct page **prepared_pages /* pointer to array 958 struct page **prepared_pages /* pointer to array
863 where to store 959 where to store
864 prepared pages */ 960 prepared pages */
865 ) 961 )
866{ 962{
867 int res=0; // Return values of different functions we call. 963 int res = 0; // Return values of different functions we call.
868 unsigned long index = pos >> PAGE_CACHE_SHIFT; // Offset in file in pages. 964 unsigned long index = pos >> PAGE_CACHE_SHIFT; // Offset in file in pages.
869 int from = (pos & (PAGE_CACHE_SIZE - 1)); // Writing offset in first page 965 int from = (pos & (PAGE_CACHE_SIZE - 1)); // Writing offset in first page
870 int to = ((pos + write_bytes - 1) & (PAGE_CACHE_SIZE - 1)) + 1; 966 int to = ((pos + write_bytes - 1) & (PAGE_CACHE_SIZE - 1)) + 1;
871 /* offset of last modified byte in last 967 /* offset of last modified byte in last
872 page */ 968 page */
873 struct address_space *mapping = inode->i_mapping; // Pages are mapped here. 969 struct address_space *mapping = inode->i_mapping; // Pages are mapped here.
874 int i; // Simple counter 970 int i; // Simple counter
875 int blocks = 0; /* Return value (blocks that should be allocated) */ 971 int blocks = 0; /* Return value (blocks that should be allocated) */
876 struct buffer_head *bh, *head; // Current bufferhead and first bufferhead 972 struct buffer_head *bh, *head; // Current bufferhead and first bufferhead
877 // of a page. 973 // of a page.
878 unsigned block_start, block_end; // Starting and ending offsets of current 974 unsigned block_start, block_end; // Starting and ending offsets of current
879 // buffer in the page. 975 // buffer in the page.
880 struct buffer_head *wait[2], **wait_bh=wait; // Buffers for page, if 976 struct buffer_head *wait[2], **wait_bh = wait; // Buffers for page, if
881 // Page appeared to be not up 977 // Page appeared to be not up
882 // to date. Note how we have 978 // to date. Note how we have
883 // at most 2 buffers, this is 979 // at most 2 buffers, this is
884 // because we at most may 980 // because we at most may
885 // partially overwrite two 981 // partially overwrite two
886 // buffers for one page. One at // the beginning of write area 982 // buffers for one page. One at // the beginning of write area
887 // and one at the end. 983 // and one at the end.
888 // Everything inthe middle gets // overwritten totally. 984 // Everything inthe middle gets // overwritten totally.
889 985
890 struct cpu_key key; // cpu key of item that we are going to deal with 986 struct cpu_key key; // cpu key of item that we are going to deal with
891 struct item_head *ih = NULL; // pointer to item head that we are going to deal with 987 struct item_head *ih = NULL; // pointer to item head that we are going to deal with
892 struct buffer_head *itembuf=NULL; // Buffer head that contains items that we are going to deal with 988 struct buffer_head *itembuf = NULL; // Buffer head that contains items that we are going to deal with
893 INITIALIZE_PATH(path); // path to item, that we are going to deal with. 989 INITIALIZE_PATH(path); // path to item, that we are going to deal with.
894 __le32 * item=NULL; // pointer to item we are going to deal with 990 __le32 *item = NULL; // pointer to item we are going to deal with
895 int item_pos=-1; /* Position in indirect item */ 991 int item_pos = -1; /* Position in indirect item */
896 992
897 993 if (num_pages < 1) {
898 if ( num_pages < 1 ) { 994 reiserfs_warning(inode->i_sb,
899 reiserfs_warning (inode->i_sb, 995 "green-9001: reiserfs_prepare_file_region_for_write "
900 "green-9001: reiserfs_prepare_file_region_for_write " 996 "called with zero number of pages to process");
901 "called with zero number of pages to process"); 997 return -EFAULT;
902 return -EFAULT;
903 }
904
905 /* We have 2 loops for pages. In first loop we grab and lock the pages, so
906 that nobody would touch these until we release the pages. Then
907 we'd start to deal with mapping buffers to blocks. */
908 for ( i = 0; i < num_pages; i++) {
909 prepared_pages[i] = grab_cache_page(mapping, index + i); // locks the page
910 if ( !prepared_pages[i]) {
911 res = -ENOMEM;
912 goto failed_page_grabbing;
913 }
914 if (!page_has_buffers(prepared_pages[i]))
915 create_empty_buffers(prepared_pages[i], inode->i_sb->s_blocksize, 0);
916 }
917
918 /* Let's count amount of blocks for a case where all the blocks
919 overwritten are new (we will substract already allocated blocks later)*/
920 if ( num_pages > 2 )
921 /* These are full-overwritten pages so we count all the blocks in
922 these pages are counted as needed to be allocated */
923 blocks = (num_pages - 2) << (PAGE_CACHE_SHIFT - inode->i_blkbits);
924
925 /* count blocks needed for first page (possibly partially written) */
926 blocks += ((PAGE_CACHE_SIZE - from) >> inode->i_blkbits) +
927 !!(from & (inode->i_sb->s_blocksize-1)); /* roundup */
928
929 /* Now we account for last page. If last page == first page (we
930 overwrite only one page), we substract all the blocks past the
931 last writing position in a page out of already calculated number
932 of blocks */
933 blocks += ((num_pages > 1) << (PAGE_CACHE_SHIFT-inode->i_blkbits)) -
934 ((PAGE_CACHE_SIZE - to) >> inode->i_blkbits);
935 /* Note how we do not roundup here since partial blocks still
936 should be allocated */
937
938 /* Now if all the write area lies past the file end, no point in
939 maping blocks, since there is none, so we just zero out remaining
940 parts of first and last pages in write area (if needed) */
941 if ( (pos & ~((loff_t)PAGE_CACHE_SIZE - 1)) > inode->i_size ) {
942 if ( from != 0 ) {/* First page needs to be partially zeroed */
943 char *kaddr = kmap_atomic(prepared_pages[0], KM_USER0);
944 memset(kaddr, 0, from);
945 kunmap_atomic( kaddr, KM_USER0);
946 }
947 if ( to != PAGE_CACHE_SIZE ) { /* Last page needs to be partially zeroed */
948 char *kaddr = kmap_atomic(prepared_pages[num_pages-1], KM_USER0);
949 memset(kaddr+to, 0, PAGE_CACHE_SIZE - to);
950 kunmap_atomic( kaddr, KM_USER0);
951 } 998 }
952 999
953 /* Since all blocks are new - use already calculated value */ 1000 /* We have 2 loops for pages. In first loop we grab and lock the pages, so
954 return blocks; 1001 that nobody would touch these until we release the pages. Then
955 } 1002 we'd start to deal with mapping buffers to blocks. */
956 1003 for (i = 0; i < num_pages; i++) {
957 /* Well, since we write somewhere into the middle of a file, there is 1004 prepared_pages[i] = grab_cache_page(mapping, index + i); // locks the page
958 possibility we are writing over some already allocated blocks, so 1005 if (!prepared_pages[i]) {
959 let's map these blocks and substract number of such blocks out of blocks 1006 res = -ENOMEM;
960 we need to allocate (calculated above) */ 1007 goto failed_page_grabbing;
961 /* Mask write position to start on blocksize, we do it out of the
962 loop for performance reasons */
963 pos &= ~((loff_t) inode->i_sb->s_blocksize - 1);
964 /* Set cpu key to the starting position in a file (on left block boundary)*/
965 make_cpu_key (&key, inode, 1 + ((pos) & ~((loff_t) inode->i_sb->s_blocksize - 1)), TYPE_ANY, 3/*key length*/);
966
967 reiserfs_write_lock(inode->i_sb); // We need that for at least search_by_key()
968 for ( i = 0; i < num_pages ; i++ ) {
969
970 head = page_buffers(prepared_pages[i]);
971 /* For each buffer in the page */
972 for(bh = head, block_start = 0; bh != head || !block_start;
973 block_start=block_end, bh = bh->b_this_page) {
974 if (!bh)
975 reiserfs_panic(inode->i_sb, "green-9002: Allocated but absent buffer for a page?");
976 /* Find where this buffer ends */
977 block_end = block_start+inode->i_sb->s_blocksize;
978 if (i == 0 && block_end <= from )
979 /* if this buffer is before requested data to map, skip it*/
980 continue;
981
982 if (i == num_pages - 1 && block_start >= to) {
983 /* If this buffer is after requested data to map, abort
984 processing of current page */
985 break;
986 } 1008 }
1009 if (!page_has_buffers(prepared_pages[i]))
1010 create_empty_buffers(prepared_pages[i],
1011 inode->i_sb->s_blocksize, 0);
1012 }
987 1013
988 if ( buffer_mapped(bh) && bh->b_blocknr !=0 ) { 1014 /* Let's count amount of blocks for a case where all the blocks
989 /* This is optimisation for a case where buffer is mapped 1015 overwritten are new (we will substract already allocated blocks later) */
990 and have blocknumber assigned. In case significant amount 1016 if (num_pages > 2)
991 of such buffers are present, we may avoid some amount 1017 /* These are full-overwritten pages so we count all the blocks in
992 of search_by_key calls. 1018 these pages are counted as needed to be allocated */
993 Probably it would be possible to move parts of this code 1019 blocks =
994 out of BKL, but I afraid that would overcomplicate code 1020 (num_pages - 2) << (PAGE_CACHE_SHIFT - inode->i_blkbits);
995 without any noticeable benefit. 1021
996 */ 1022 /* count blocks needed for first page (possibly partially written) */
997 item_pos++; 1023 blocks += ((PAGE_CACHE_SIZE - from) >> inode->i_blkbits) + !!(from & (inode->i_sb->s_blocksize - 1)); /* roundup */
998 /* Update the key */ 1024
999 set_cpu_key_k_offset( &key, cpu_key_k_offset(&key) + inode->i_sb->s_blocksize); 1025 /* Now we account for last page. If last page == first page (we
1000 blocks--; // Decrease the amount of blocks that need to be 1026 overwrite only one page), we substract all the blocks past the
1001 // allocated 1027 last writing position in a page out of already calculated number
1002 continue; // Go to the next buffer 1028 of blocks */
1029 blocks += ((num_pages > 1) << (PAGE_CACHE_SHIFT - inode->i_blkbits)) -
1030 ((PAGE_CACHE_SIZE - to) >> inode->i_blkbits);
1031 /* Note how we do not roundup here since partial blocks still
1032 should be allocated */
1033
1034 /* Now if all the write area lies past the file end, no point in
1035 maping blocks, since there is none, so we just zero out remaining
1036 parts of first and last pages in write area (if needed) */
1037 if ((pos & ~((loff_t) PAGE_CACHE_SIZE - 1)) > inode->i_size) {
1038 if (from != 0) { /* First page needs to be partially zeroed */
1039 char *kaddr = kmap_atomic(prepared_pages[0], KM_USER0);
1040 memset(kaddr, 0, from);
1041 kunmap_atomic(kaddr, KM_USER0);
1042 }
1043 if (to != PAGE_CACHE_SIZE) { /* Last page needs to be partially zeroed */
1044 char *kaddr =
1045 kmap_atomic(prepared_pages[num_pages - 1],
1046 KM_USER0);
1047 memset(kaddr + to, 0, PAGE_CACHE_SIZE - to);
1048 kunmap_atomic(kaddr, KM_USER0);
1003 } 1049 }
1004 1050
1005 if ( !itembuf || /* if first iteration */ 1051 /* Since all blocks are new - use already calculated value */
1006 item_pos >= ih_item_len(ih)/UNFM_P_SIZE) 1052 return blocks;
1007 { /* or if we progressed past the 1053 }
1008 current unformatted_item */ 1054
1009 /* Try to find next item */ 1055 /* Well, since we write somewhere into the middle of a file, there is
1010 res = search_for_position_by_key(inode->i_sb, &key, &path); 1056 possibility we are writing over some already allocated blocks, so
1011 /* Abort if no more items */ 1057 let's map these blocks and substract number of such blocks out of blocks
1012 if ( res != POSITION_FOUND ) { 1058 we need to allocate (calculated above) */
1013 /* make sure later loops don't use this item */ 1059 /* Mask write position to start on blocksize, we do it out of the
1014 itembuf = NULL; 1060 loop for performance reasons */
1015 item = NULL; 1061 pos &= ~((loff_t) inode->i_sb->s_blocksize - 1);
1016 break; 1062 /* Set cpu key to the starting position in a file (on left block boundary) */
1063 make_cpu_key(&key, inode,
1064 1 + ((pos) & ~((loff_t) inode->i_sb->s_blocksize - 1)),
1065 TYPE_ANY, 3 /*key length */ );
1066
1067 reiserfs_write_lock(inode->i_sb); // We need that for at least search_by_key()
1068 for (i = 0; i < num_pages; i++) {
1069
1070 head = page_buffers(prepared_pages[i]);
1071 /* For each buffer in the page */
1072 for (bh = head, block_start = 0; bh != head || !block_start;
1073 block_start = block_end, bh = bh->b_this_page) {
1074 if (!bh)
1075 reiserfs_panic(inode->i_sb,
1076 "green-9002: Allocated but absent buffer for a page?");
1077 /* Find where this buffer ends */
1078 block_end = block_start + inode->i_sb->s_blocksize;
1079 if (i == 0 && block_end <= from)
1080 /* if this buffer is before requested data to map, skip it */
1081 continue;
1082
1083 if (i == num_pages - 1 && block_start >= to) {
1084 /* If this buffer is after requested data to map, abort
1085 processing of current page */
1086 break;
1017 } 1087 }
1018 1088
1019 /* Update information about current indirect item */ 1089 if (buffer_mapped(bh) && bh->b_blocknr != 0) {
1020 itembuf = get_last_bh( &path ); 1090 /* This is optimisation for a case where buffer is mapped
1021 ih = get_ih( &path ); 1091 and have blocknumber assigned. In case significant amount
1022 item = get_item( &path ); 1092 of such buffers are present, we may avoid some amount
1023 item_pos = path.pos_in_item; 1093 of search_by_key calls.
1094 Probably it would be possible to move parts of this code
1095 out of BKL, but I afraid that would overcomplicate code
1096 without any noticeable benefit.
1097 */
1098 item_pos++;
1099 /* Update the key */
1100 set_cpu_key_k_offset(&key,
1101 cpu_key_k_offset(&key) +
1102 inode->i_sb->s_blocksize);
1103 blocks--; // Decrease the amount of blocks that need to be
1104 // allocated
1105 continue; // Go to the next buffer
1106 }
1024 1107
1025 RFALSE( !is_indirect_le_ih (ih), "green-9003: indirect item expected"); 1108 if (!itembuf || /* if first iteration */
1026 } 1109 item_pos >= ih_item_len(ih) / UNFM_P_SIZE) { /* or if we progressed past the
1110 current unformatted_item */
1111 /* Try to find next item */
1112 res =
1113 search_for_position_by_key(inode->i_sb,
1114 &key, &path);
1115 /* Abort if no more items */
1116 if (res != POSITION_FOUND) {
1117 /* make sure later loops don't use this item */
1118 itembuf = NULL;
1119 item = NULL;
1120 break;
1121 }
1122
1123 /* Update information about current indirect item */
1124 itembuf = get_last_bh(&path);
1125 ih = get_ih(&path);
1126 item = get_item(&path);
1127 item_pos = path.pos_in_item;
1128
1129 RFALSE(!is_indirect_le_ih(ih),
1130 "green-9003: indirect item expected");
1131 }
1027 1132
1028 /* See if there is some block associated with the file 1133 /* See if there is some block associated with the file
1029 at that position, map the buffer to this block */ 1134 at that position, map the buffer to this block */
1030 if ( get_block_num(item,item_pos) ) { 1135 if (get_block_num(item, item_pos)) {
1031 map_bh(bh, inode->i_sb, get_block_num(item,item_pos)); 1136 map_bh(bh, inode->i_sb,
1032 blocks--; // Decrease the amount of blocks that need to be 1137 get_block_num(item, item_pos));
1033 // allocated 1138 blocks--; // Decrease the amount of blocks that need to be
1139 // allocated
1140 }
1141 item_pos++;
1142 /* Update the key */
1143 set_cpu_key_k_offset(&key,
1144 cpu_key_k_offset(&key) +
1145 inode->i_sb->s_blocksize);
1034 } 1146 }
1035 item_pos++;
1036 /* Update the key */
1037 set_cpu_key_k_offset( &key, cpu_key_k_offset(&key) + inode->i_sb->s_blocksize);
1038 } 1147 }
1039 } 1148 pathrelse(&path); // Free the path
1040 pathrelse(&path); // Free the path 1149 reiserfs_write_unlock(inode->i_sb);
1041 reiserfs_write_unlock(inode->i_sb);
1042 1150
1043 /* Now zero out unmappend buffers for the first and last pages of 1151 /* Now zero out unmappend buffers for the first and last pages of
1044 write area or issue read requests if page is mapped. */ 1152 write area or issue read requests if page is mapped. */
1045 /* First page, see if it is not uptodate */ 1153 /* First page, see if it is not uptodate */
1046 if ( !PageUptodate(prepared_pages[0]) ) { 1154 if (!PageUptodate(prepared_pages[0])) {
1047 head = page_buffers(prepared_pages[0]); 1155 head = page_buffers(prepared_pages[0]);
1048 1156
1049 /* For each buffer in page */ 1157 /* For each buffer in page */
1050 for(bh = head, block_start = 0; bh != head || !block_start; 1158 for (bh = head, block_start = 0; bh != head || !block_start;
1051 block_start=block_end, bh = bh->b_this_page) { 1159 block_start = block_end, bh = bh->b_this_page) {
1052 1160
1053 if (!bh) 1161 if (!bh)
1054 reiserfs_panic(inode->i_sb, "green-9002: Allocated but absent buffer for a page?"); 1162 reiserfs_panic(inode->i_sb,
1055 /* Find where this buffer ends */ 1163 "green-9002: Allocated but absent buffer for a page?");
1056 block_end = block_start+inode->i_sb->s_blocksize; 1164 /* Find where this buffer ends */
1057 if ( block_end <= from ) 1165 block_end = block_start + inode->i_sb->s_blocksize;
1058 /* if this buffer is before requested data to map, skip it*/ 1166 if (block_end <= from)
1059 continue; 1167 /* if this buffer is before requested data to map, skip it */
1060 if ( block_start < from ) { /* Aha, our partial buffer */ 1168 continue;
1061 if ( buffer_mapped(bh) ) { /* If it is mapped, we need to 1169 if (block_start < from) { /* Aha, our partial buffer */
1062 issue READ request for it to 1170 if (buffer_mapped(bh)) { /* If it is mapped, we need to
1063 not loose data */ 1171 issue READ request for it to
1064 ll_rw_block(READ, 1, &bh); 1172 not loose data */
1065 *wait_bh++=bh; 1173 ll_rw_block(READ, 1, &bh);
1066 } else { /* Not mapped, zero it */ 1174 *wait_bh++ = bh;
1067 char *kaddr = kmap_atomic(prepared_pages[0], KM_USER0); 1175 } else { /* Not mapped, zero it */
1068 memset(kaddr+block_start, 0, from-block_start); 1176 char *kaddr =
1069 kunmap_atomic( kaddr, KM_USER0); 1177 kmap_atomic(prepared_pages[0],
1070 set_buffer_uptodate(bh); 1178 KM_USER0);
1071 } 1179 memset(kaddr + block_start, 0,
1180 from - block_start);
1181 kunmap_atomic(kaddr, KM_USER0);
1182 set_buffer_uptodate(bh);
1183 }
1184 }
1072 } 1185 }
1073 }
1074 } 1186 }
1075 1187
1076 /* Last page, see if it is not uptodate, or if the last page is past the end of the file. */ 1188 /* Last page, see if it is not uptodate, or if the last page is past the end of the file. */
1077 if ( !PageUptodate(prepared_pages[num_pages-1]) || 1189 if (!PageUptodate(prepared_pages[num_pages - 1]) ||
1078 ((pos+write_bytes)>>PAGE_CACHE_SHIFT) > (inode->i_size>>PAGE_CACHE_SHIFT) ) { 1190 ((pos + write_bytes) >> PAGE_CACHE_SHIFT) >
1079 head = page_buffers(prepared_pages[num_pages-1]); 1191 (inode->i_size >> PAGE_CACHE_SHIFT)) {
1080 1192 head = page_buffers(prepared_pages[num_pages - 1]);
1081 /* for each buffer in page */ 1193
1082 for(bh = head, block_start = 0; bh != head || !block_start; 1194 /* for each buffer in page */
1083 block_start=block_end, bh = bh->b_this_page) { 1195 for (bh = head, block_start = 0; bh != head || !block_start;
1084 1196 block_start = block_end, bh = bh->b_this_page) {
1085 if (!bh) 1197
1086 reiserfs_panic(inode->i_sb, "green-9002: Allocated but absent buffer for a page?"); 1198 if (!bh)
1087 /* Find where this buffer ends */ 1199 reiserfs_panic(inode->i_sb,
1088 block_end = block_start+inode->i_sb->s_blocksize; 1200 "green-9002: Allocated but absent buffer for a page?");
1089 if ( block_start >= to ) 1201 /* Find where this buffer ends */
1090 /* if this buffer is after requested data to map, skip it*/ 1202 block_end = block_start + inode->i_sb->s_blocksize;
1091 break; 1203 if (block_start >= to)
1092 if ( block_end > to ) { /* Aha, our partial buffer */ 1204 /* if this buffer is after requested data to map, skip it */
1093 if ( buffer_mapped(bh) ) { /* If it is mapped, we need to 1205 break;
1094 issue READ request for it to 1206 if (block_end > to) { /* Aha, our partial buffer */
1095 not loose data */ 1207 if (buffer_mapped(bh)) { /* If it is mapped, we need to
1096 ll_rw_block(READ, 1, &bh); 1208 issue READ request for it to
1097 *wait_bh++=bh; 1209 not loose data */
1098 } else { /* Not mapped, zero it */ 1210 ll_rw_block(READ, 1, &bh);
1099 char *kaddr = kmap_atomic(prepared_pages[num_pages-1], KM_USER0); 1211 *wait_bh++ = bh;
1100 memset(kaddr+to, 0, block_end-to); 1212 } else { /* Not mapped, zero it */
1101 kunmap_atomic( kaddr, KM_USER0); 1213 char *kaddr =
1102 set_buffer_uptodate(bh); 1214 kmap_atomic(prepared_pages
1103 } 1215 [num_pages - 1],
1216 KM_USER0);
1217 memset(kaddr + to, 0, block_end - to);
1218 kunmap_atomic(kaddr, KM_USER0);
1219 set_buffer_uptodate(bh);
1220 }
1221 }
1104 } 1222 }
1105 }
1106 } 1223 }
1107 1224
1108 /* Wait for read requests we made to happen, if necessary */ 1225 /* Wait for read requests we made to happen, if necessary */
1109 while(wait_bh > wait) { 1226 while (wait_bh > wait) {
1110 wait_on_buffer(*--wait_bh); 1227 wait_on_buffer(*--wait_bh);
1111 if (!buffer_uptodate(*wait_bh)) { 1228 if (!buffer_uptodate(*wait_bh)) {
1112 res = -EIO; 1229 res = -EIO;
1113 goto failed_read; 1230 goto failed_read;
1231 }
1114 } 1232 }
1115 } 1233
1116 1234 return blocks;
1117 return blocks; 1235 failed_page_grabbing:
1118failed_page_grabbing: 1236 num_pages = i;
1119 num_pages = i; 1237 failed_read:
1120failed_read: 1238 reiserfs_unprepare_pages(prepared_pages, num_pages);
1121 reiserfs_unprepare_pages(prepared_pages, num_pages); 1239 return res;
1122 return res;
1123} 1240}
1124 1241
1125/* Write @count bytes at position @ppos in a file indicated by @file 1242/* Write @count bytes at position @ppos in a file indicated by @file
@@ -1148,262 +1265,305 @@ failed_read:
1148 Future Features: providing search_by_key with hints. 1265 Future Features: providing search_by_key with hints.
1149 1266
1150*/ 1267*/
1151static ssize_t reiserfs_file_write( struct file *file, /* the file we are going to write into */ 1268static ssize_t reiserfs_file_write(struct file *file, /* the file we are going to write into */
1152 const char __user *buf, /* pointer to user supplied data 1269 const char __user * buf, /* pointer to user supplied data
1153(in userspace) */ 1270 (in userspace) */
1154 size_t count, /* amount of bytes to write */ 1271 size_t count, /* amount of bytes to write */
1155 loff_t *ppos /* pointer to position in file that we start writing at. Should be updated to 1272 loff_t * ppos /* pointer to position in file that we start writing at. Should be updated to
1156 * new current position before returning. */ ) 1273 * new current position before returning. */
1274 )
1157{ 1275{
1158 size_t already_written = 0; // Number of bytes already written to the file. 1276 size_t already_written = 0; // Number of bytes already written to the file.
1159 loff_t pos; // Current position in the file. 1277 loff_t pos; // Current position in the file.
1160 ssize_t res; // return value of various functions that we call. 1278 ssize_t res; // return value of various functions that we call.
1161 int err = 0; 1279 int err = 0;
1162 struct inode *inode = file->f_dentry->d_inode; // Inode of the file that we are writing to. 1280 struct inode *inode = file->f_dentry->d_inode; // Inode of the file that we are writing to.
1163 /* To simplify coding at this time, we store 1281 /* To simplify coding at this time, we store
1164 locked pages in array for now */ 1282 locked pages in array for now */
1165 struct page * prepared_pages[REISERFS_WRITE_PAGES_AT_A_TIME]; 1283 struct page *prepared_pages[REISERFS_WRITE_PAGES_AT_A_TIME];
1166 struct reiserfs_transaction_handle th; 1284 struct reiserfs_transaction_handle th;
1167 th.t_trans_id = 0; 1285 th.t_trans_id = 0;
1168 1286
1169 if ( file->f_flags & O_DIRECT) { // Direct IO needs treatment 1287 if (file->f_flags & O_DIRECT) { // Direct IO needs treatment
1170 ssize_t result, after_file_end = 0; 1288 ssize_t result, after_file_end = 0;
1171 if ( (*ppos + count >= inode->i_size) || (file->f_flags & O_APPEND) ) { 1289 if ((*ppos + count >= inode->i_size)
1172 /* If we are appending a file, we need to put this savelink in here. 1290 || (file->f_flags & O_APPEND)) {
1173 If we will crash while doing direct io, finish_unfinished will 1291 /* If we are appending a file, we need to put this savelink in here.
1174 cut the garbage from the file end. */ 1292 If we will crash while doing direct io, finish_unfinished will
1175 reiserfs_write_lock(inode->i_sb); 1293 cut the garbage from the file end. */
1176 err = journal_begin(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT ); 1294 reiserfs_write_lock(inode->i_sb);
1177 if (err) { 1295 err =
1178 reiserfs_write_unlock (inode->i_sb); 1296 journal_begin(&th, inode->i_sb,
1179 return err; 1297 JOURNAL_PER_BALANCE_CNT);
1180 } 1298 if (err) {
1181 reiserfs_update_inode_transaction(inode); 1299 reiserfs_write_unlock(inode->i_sb);
1182 add_save_link (&th, inode, 1 /* Truncate */); 1300 return err;
1183 after_file_end = 1; 1301 }
1184 err = journal_end(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT ); 1302 reiserfs_update_inode_transaction(inode);
1185 reiserfs_write_unlock(inode->i_sb); 1303 add_save_link(&th, inode, 1 /* Truncate */ );
1186 if (err) 1304 after_file_end = 1;
1187 return err; 1305 err =
1188 } 1306 journal_end(&th, inode->i_sb,
1189 result = generic_file_write(file, buf, count, ppos); 1307 JOURNAL_PER_BALANCE_CNT);
1190 1308 reiserfs_write_unlock(inode->i_sb);
1191 if ( after_file_end ) { /* Now update i_size and remove the savelink */ 1309 if (err)
1192 struct reiserfs_transaction_handle th; 1310 return err;
1193 reiserfs_write_lock(inode->i_sb); 1311 }
1194 err = journal_begin(&th, inode->i_sb, 1); 1312 result = generic_file_write(file, buf, count, ppos);
1195 if (err) { 1313
1196 reiserfs_write_unlock (inode->i_sb); 1314 if (after_file_end) { /* Now update i_size and remove the savelink */
1197 return err; 1315 struct reiserfs_transaction_handle th;
1198 } 1316 reiserfs_write_lock(inode->i_sb);
1199 reiserfs_update_inode_transaction(inode); 1317 err = journal_begin(&th, inode->i_sb, 1);
1200 reiserfs_update_sd(&th, inode); 1318 if (err) {
1201 err = journal_end(&th, inode->i_sb, 1); 1319 reiserfs_write_unlock(inode->i_sb);
1202 if (err) { 1320 return err;
1203 reiserfs_write_unlock (inode->i_sb); 1321 }
1204 return err; 1322 reiserfs_update_inode_transaction(inode);
1205 } 1323 reiserfs_update_sd(&th, inode);
1206 err = remove_save_link (inode, 1/* truncate */); 1324 err = journal_end(&th, inode->i_sb, 1);
1207 reiserfs_write_unlock(inode->i_sb); 1325 if (err) {
1208 if (err) 1326 reiserfs_write_unlock(inode->i_sb);
1209 return err; 1327 return err;
1210 } 1328 }
1211 1329 err = remove_save_link(inode, 1 /* truncate */ );
1212 return result; 1330 reiserfs_write_unlock(inode->i_sb);
1213 } 1331 if (err)
1214 1332 return err;
1215 if ( unlikely((ssize_t) count < 0 )) 1333 }
1216 return -EINVAL;
1217
1218 if (unlikely(!access_ok(VERIFY_READ, buf, count)))
1219 return -EFAULT;
1220
1221 down(&inode->i_sem); // locks the entire file for just us
1222
1223 pos = *ppos;
1224
1225 /* Check if we can write to specified region of file, file
1226 is not overly big and this kind of stuff. Adjust pos and
1227 count, if needed */
1228 res = generic_write_checks(file, &pos, &count, 0);
1229 if (res)
1230 goto out;
1231
1232 if ( count == 0 )
1233 goto out;
1234
1235 res = remove_suid(file->f_dentry);
1236 if (res)
1237 goto out;
1238
1239 inode_update_time(inode, 1); /* Both mtime and ctime */
1240
1241 // Ok, we are done with all the checks.
1242 1334
1243 // Now we should start real work 1335 return result;
1336 }
1244 1337
1245 /* If we are going to write past the file's packed tail or if we are going 1338 if (unlikely((ssize_t) count < 0))
1246 to overwrite part of the tail, we need that tail to be converted into 1339 return -EINVAL;
1247 unformatted node */ 1340
1248 res = reiserfs_check_for_tail_and_convert( inode, pos, count); 1341 if (unlikely(!access_ok(VERIFY_READ, buf, count)))
1249 if (res) 1342 return -EFAULT;
1250 goto out; 1343
1344 down(&inode->i_sem); // locks the entire file for just us
1345
1346 pos = *ppos;
1347
1348 /* Check if we can write to specified region of file, file
1349 is not overly big and this kind of stuff. Adjust pos and
1350 count, if needed */
1351 res = generic_write_checks(file, &pos, &count, 0);
1352 if (res)
1353 goto out;
1354
1355 if (count == 0)
1356 goto out;
1357
1358 res = remove_suid(file->f_dentry);
1359 if (res)
1360 goto out;
1361
1362 inode_update_time(inode, 1); /* Both mtime and ctime */
1363
1364 // Ok, we are done with all the checks.
1365
1366 // Now we should start real work
1367
1368 /* If we are going to write past the file's packed tail or if we are going
1369 to overwrite part of the tail, we need that tail to be converted into
1370 unformatted node */
1371 res = reiserfs_check_for_tail_and_convert(inode, pos, count);
1372 if (res)
1373 goto out;
1374
1375 while (count > 0) {
1376 /* This is the main loop in which we running until some error occures
1377 or until we write all of the data. */
1378 size_t num_pages; /* amount of pages we are going to write this iteration */
1379 size_t write_bytes; /* amount of bytes to write during this iteration */
1380 size_t blocks_to_allocate; /* how much blocks we need to allocate for this iteration */
1381
1382 /* (pos & (PAGE_CACHE_SIZE-1)) is an idiom for offset into a page of pos */
1383 num_pages = !!((pos + count) & (PAGE_CACHE_SIZE - 1)) + /* round up partial
1384 pages */
1385 ((count +
1386 (pos & (PAGE_CACHE_SIZE - 1))) >> PAGE_CACHE_SHIFT);
1387 /* convert size to amount of
1388 pages */
1389 reiserfs_write_lock(inode->i_sb);
1390 if (num_pages > REISERFS_WRITE_PAGES_AT_A_TIME
1391 || num_pages > reiserfs_can_fit_pages(inode->i_sb)) {
1392 /* If we were asked to write more data than we want to or if there
1393 is not that much space, then we shorten amount of data to write
1394 for this iteration. */
1395 num_pages =
1396 min_t(size_t, REISERFS_WRITE_PAGES_AT_A_TIME,
1397 reiserfs_can_fit_pages(inode->i_sb));
1398 /* Also we should not forget to set size in bytes accordingly */
1399 write_bytes = (num_pages << PAGE_CACHE_SHIFT) -
1400 (pos & (PAGE_CACHE_SIZE - 1));
1401 /* If position is not on the
1402 start of the page, we need
1403 to substract the offset
1404 within page */
1405 } else
1406 write_bytes = count;
1407
1408 /* reserve the blocks to be allocated later, so that later on
1409 we still have the space to write the blocks to */
1410 reiserfs_claim_blocks_to_be_allocated(inode->i_sb,
1411 num_pages <<
1412 (PAGE_CACHE_SHIFT -
1413 inode->i_blkbits));
1414 reiserfs_write_unlock(inode->i_sb);
1415
1416 if (!num_pages) { /* If we do not have enough space even for a single page... */
1417 if (pos >
1418 inode->i_size + inode->i_sb->s_blocksize -
1419 (pos & (inode->i_sb->s_blocksize - 1))) {
1420 res = -ENOSPC;
1421 break; // In case we are writing past the end of the last file block, break.
1422 }
1423 // Otherwise we are possibly overwriting the file, so
1424 // let's set write size to be equal or less than blocksize.
1425 // This way we get it correctly for file holes.
1426 // But overwriting files on absolutelly full volumes would not
1427 // be very efficient. Well, people are not supposed to fill
1428 // 100% of disk space anyway.
1429 write_bytes =
1430 min_t(size_t, count,
1431 inode->i_sb->s_blocksize -
1432 (pos & (inode->i_sb->s_blocksize - 1)));
1433 num_pages = 1;
1434 // No blocks were claimed before, so do it now.
1435 reiserfs_claim_blocks_to_be_allocated(inode->i_sb,
1436 1 <<
1437 (PAGE_CACHE_SHIFT
1438 -
1439 inode->
1440 i_blkbits));
1441 }
1251 1442
1252 while ( count > 0) { 1443 /* Prepare for writing into the region, read in all the
1253 /* This is the main loop in which we running until some error occures 1444 partially overwritten pages, if needed. And lock the pages,
1254 or until we write all of the data. */ 1445 so that nobody else can access these until we are done.
1255 size_t num_pages;/* amount of pages we are going to write this iteration */ 1446 We get number of actual blocks needed as a result. */
1256 size_t write_bytes; /* amount of bytes to write during this iteration */ 1447 blocks_to_allocate =
1257 size_t blocks_to_allocate; /* how much blocks we need to allocate for this iteration */ 1448 reiserfs_prepare_file_region_for_write(inode, pos,
1258 1449 num_pages,
1259 /* (pos & (PAGE_CACHE_SIZE-1)) is an idiom for offset into a page of pos*/ 1450 write_bytes,
1260 num_pages = !!((pos+count) & (PAGE_CACHE_SIZE - 1)) + /* round up partial 1451 prepared_pages);
1261 pages */ 1452 if (blocks_to_allocate < 0) {
1262 ((count + (pos & (PAGE_CACHE_SIZE-1))) >> PAGE_CACHE_SHIFT); 1453 res = blocks_to_allocate;
1263 /* convert size to amount of 1454 reiserfs_release_claimed_blocks(inode->i_sb,
1264 pages */ 1455 num_pages <<
1265 reiserfs_write_lock(inode->i_sb); 1456 (PAGE_CACHE_SHIFT -
1266 if ( num_pages > REISERFS_WRITE_PAGES_AT_A_TIME 1457 inode->i_blkbits));
1267 || num_pages > reiserfs_can_fit_pages(inode->i_sb) ) { 1458 break;
1268 /* If we were asked to write more data than we want to or if there 1459 }
1269 is not that much space, then we shorten amount of data to write
1270 for this iteration. */
1271 num_pages = min_t(size_t, REISERFS_WRITE_PAGES_AT_A_TIME, reiserfs_can_fit_pages(inode->i_sb));
1272 /* Also we should not forget to set size in bytes accordingly */
1273 write_bytes = (num_pages << PAGE_CACHE_SHIFT) -
1274 (pos & (PAGE_CACHE_SIZE-1));
1275 /* If position is not on the
1276 start of the page, we need
1277 to substract the offset
1278 within page */
1279 } else
1280 write_bytes = count;
1281 1460
1282 /* reserve the blocks to be allocated later, so that later on 1461 /* First we correct our estimate of how many blocks we need */
1283 we still have the space to write the blocks to */ 1462 reiserfs_release_claimed_blocks(inode->i_sb,
1284 reiserfs_claim_blocks_to_be_allocated(inode->i_sb, num_pages << (PAGE_CACHE_SHIFT - inode->i_blkbits)); 1463 (num_pages <<
1285 reiserfs_write_unlock(inode->i_sb); 1464 (PAGE_CACHE_SHIFT -
1465 inode->i_sb->
1466 s_blocksize_bits)) -
1467 blocks_to_allocate);
1468
1469 if (blocks_to_allocate > 0) { /*We only allocate blocks if we need to */
1470 /* Fill in all the possible holes and append the file if needed */
1471 res =
1472 reiserfs_allocate_blocks_for_region(&th, inode, pos,
1473 num_pages,
1474 write_bytes,
1475 prepared_pages,
1476 blocks_to_allocate);
1477 }
1286 1478
1287 if ( !num_pages ) { /* If we do not have enough space even for a single page... */ 1479 /* well, we have allocated the blocks, so it is time to free
1288 if ( pos > inode->i_size+inode->i_sb->s_blocksize-(pos & (inode->i_sb->s_blocksize-1))) { 1480 the reservation we made earlier. */
1289 res = -ENOSPC; 1481 reiserfs_release_claimed_blocks(inode->i_sb,
1290 break; // In case we are writing past the end of the last file block, break. 1482 blocks_to_allocate);
1291 } 1483 if (res) {
1292 // Otherwise we are possibly overwriting the file, so 1484 reiserfs_unprepare_pages(prepared_pages, num_pages);
1293 // let's set write size to be equal or less than blocksize. 1485 break;
1294 // This way we get it correctly for file holes. 1486 }
1295 // But overwriting files on absolutelly full volumes would not
1296 // be very efficient. Well, people are not supposed to fill
1297 // 100% of disk space anyway.
1298 write_bytes = min_t(size_t, count, inode->i_sb->s_blocksize - (pos & (inode->i_sb->s_blocksize - 1)));
1299 num_pages = 1;
1300 // No blocks were claimed before, so do it now.
1301 reiserfs_claim_blocks_to_be_allocated(inode->i_sb, 1 << (PAGE_CACHE_SHIFT - inode->i_blkbits));
1302 }
1303 1487
1304 /* Prepare for writing into the region, read in all the 1488/* NOTE that allocating blocks and filling blocks can be done in reverse order
1305 partially overwritten pages, if needed. And lock the pages, 1489 and probably we would do that just to get rid of garbage in files after a
1306 so that nobody else can access these until we are done. 1490 crash */
1307 We get number of actual blocks needed as a result.*/
1308 blocks_to_allocate = reiserfs_prepare_file_region_for_write(inode, pos, num_pages, write_bytes, prepared_pages);
1309 if ( blocks_to_allocate < 0 ) {
1310 res = blocks_to_allocate;
1311 reiserfs_release_claimed_blocks(inode->i_sb, num_pages << (PAGE_CACHE_SHIFT - inode->i_blkbits));
1312 break;
1313 }
1314 1491
1315 /* First we correct our estimate of how many blocks we need */ 1492 /* Copy data from user-supplied buffer to file's pages */
1316 reiserfs_release_claimed_blocks(inode->i_sb, (num_pages << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits)) - blocks_to_allocate ); 1493 res =
1494 reiserfs_copy_from_user_to_file_region(pos, num_pages,
1495 write_bytes,
1496 prepared_pages, buf);
1497 if (res) {
1498 reiserfs_unprepare_pages(prepared_pages, num_pages);
1499 break;
1500 }
1317 1501
1318 if ( blocks_to_allocate > 0) {/*We only allocate blocks if we need to*/ 1502 /* Send the pages to disk and unlock them. */
1319 /* Fill in all the possible holes and append the file if needed */ 1503 res =
1320 res = reiserfs_allocate_blocks_for_region(&th, inode, pos, num_pages, write_bytes, prepared_pages, blocks_to_allocate); 1504 reiserfs_submit_file_region_for_write(&th, inode, pos,
1505 num_pages,
1506 write_bytes,
1507 prepared_pages);
1508 if (res)
1509 break;
1510
1511 already_written += write_bytes;
1512 buf += write_bytes;
1513 *ppos = pos += write_bytes;
1514 count -= write_bytes;
1515 balance_dirty_pages_ratelimited(inode->i_mapping);
1321 } 1516 }
1322 1517
1323 /* well, we have allocated the blocks, so it is time to free 1518 /* this is only true on error */
1324 the reservation we made earlier. */ 1519 if (th.t_trans_id) {
1325 reiserfs_release_claimed_blocks(inode->i_sb, blocks_to_allocate); 1520 reiserfs_write_lock(inode->i_sb);
1326 if ( res ) { 1521 err = journal_end(&th, th.t_super, th.t_blocks_allocated);
1327 reiserfs_unprepare_pages(prepared_pages, num_pages); 1522 reiserfs_write_unlock(inode->i_sb);
1328 break; 1523 if (err) {
1524 res = err;
1525 goto out;
1526 }
1329 } 1527 }
1330 1528
1331/* NOTE that allocating blocks and filling blocks can be done in reverse order 1529 if ((file->f_flags & O_SYNC) || IS_SYNC(inode))
1332 and probably we would do that just to get rid of garbage in files after a 1530 res =
1333 crash */ 1531 generic_osync_inode(inode, file->f_mapping,
1532 OSYNC_METADATA | OSYNC_DATA);
1334 1533
1335 /* Copy data from user-supplied buffer to file's pages */ 1534 up(&inode->i_sem);
1336 res = reiserfs_copy_from_user_to_file_region(pos, num_pages, write_bytes, prepared_pages, buf); 1535 reiserfs_async_progress_wait(inode->i_sb);
1337 if ( res ) { 1536 return (already_written != 0) ? already_written : res;
1338 reiserfs_unprepare_pages(prepared_pages, num_pages);
1339 break;
1340 }
1341 1537
1342 /* Send the pages to disk and unlock them. */ 1538 out:
1343 res = reiserfs_submit_file_region_for_write(&th, inode, pos, num_pages, 1539 up(&inode->i_sem); // unlock the file on exit.
1344 write_bytes,prepared_pages); 1540 return res;
1345 if ( res )
1346 break;
1347
1348 already_written += write_bytes;
1349 buf += write_bytes;
1350 *ppos = pos += write_bytes;
1351 count -= write_bytes;
1352 balance_dirty_pages_ratelimited(inode->i_mapping);
1353 }
1354
1355 /* this is only true on error */
1356 if (th.t_trans_id) {
1357 reiserfs_write_lock(inode->i_sb);
1358 err = journal_end(&th, th.t_super, th.t_blocks_allocated);
1359 reiserfs_write_unlock(inode->i_sb);
1360 if (err) {
1361 res = err;
1362 goto out;
1363 }
1364 }
1365
1366 if ((file->f_flags & O_SYNC) || IS_SYNC(inode))
1367 res = generic_osync_inode(inode, file->f_mapping, OSYNC_METADATA|OSYNC_DATA);
1368
1369 up(&inode->i_sem);
1370 reiserfs_async_progress_wait(inode->i_sb);
1371 return (already_written != 0)?already_written:res;
1372
1373out:
1374 up(&inode->i_sem); // unlock the file on exit.
1375 return res;
1376} 1541}
1377 1542
1378static ssize_t reiserfs_aio_write(struct kiocb *iocb, const char __user *buf, 1543static ssize_t reiserfs_aio_write(struct kiocb *iocb, const char __user * buf,
1379 size_t count, loff_t pos) 1544 size_t count, loff_t pos)
1380{ 1545{
1381 return generic_file_aio_write(iocb, buf, count, pos); 1546 return generic_file_aio_write(iocb, buf, count, pos);
1382} 1547}
1383 1548
1384
1385
1386struct file_operations reiserfs_file_operations = { 1549struct file_operations reiserfs_file_operations = {
1387 .read = generic_file_read, 1550 .read = generic_file_read,
1388 .write = reiserfs_file_write, 1551 .write = reiserfs_file_write,
1389 .ioctl = reiserfs_ioctl, 1552 .ioctl = reiserfs_ioctl,
1390 .mmap = generic_file_mmap, 1553 .mmap = generic_file_mmap,
1391 .release = reiserfs_file_release, 1554 .release = reiserfs_file_release,
1392 .fsync = reiserfs_sync_file, 1555 .fsync = reiserfs_sync_file,
1393 .sendfile = generic_file_sendfile, 1556 .sendfile = generic_file_sendfile,
1394 .aio_read = generic_file_aio_read, 1557 .aio_read = generic_file_aio_read,
1395 .aio_write = reiserfs_aio_write, 1558 .aio_write = reiserfs_aio_write,
1396}; 1559};
1397 1560
1398 1561struct inode_operations reiserfs_file_inode_operations = {
1399struct inode_operations reiserfs_file_inode_operations = { 1562 .truncate = reiserfs_vfs_truncate_file,
1400 .truncate = reiserfs_vfs_truncate_file, 1563 .setattr = reiserfs_setattr,
1401 .setattr = reiserfs_setattr, 1564 .setxattr = reiserfs_setxattr,
1402 .setxattr = reiserfs_setxattr, 1565 .getxattr = reiserfs_getxattr,
1403 .getxattr = reiserfs_getxattr, 1566 .listxattr = reiserfs_listxattr,
1404 .listxattr = reiserfs_listxattr, 1567 .removexattr = reiserfs_removexattr,
1405 .removexattr = reiserfs_removexattr, 1568 .permission = reiserfs_permission,
1406 .permission = reiserfs_permission,
1407}; 1569};
1408
1409
diff --git a/fs/reiserfs/fix_node.c b/fs/reiserfs/fix_node.c
index e4f64be9e15b..2706e2adffab 100644
--- a/fs/reiserfs/fix_node.c
+++ b/fs/reiserfs/fix_node.c
@@ -34,14 +34,12 @@
34 ** 34 **
35 **/ 35 **/
36 36
37
38#include <linux/config.h> 37#include <linux/config.h>
39#include <linux/time.h> 38#include <linux/time.h>
40#include <linux/string.h> 39#include <linux/string.h>
41#include <linux/reiserfs_fs.h> 40#include <linux/reiserfs_fs.h>
42#include <linux/buffer_head.h> 41#include <linux/buffer_head.h>
43 42
44
45/* To make any changes in the tree we find a node, that contains item 43/* To make any changes in the tree we find a node, that contains item
46 to be changed/deleted or position in the node we insert a new item 44 to be changed/deleted or position in the node we insert a new item
47 to. We call this node S. To do balancing we need to decide what we 45 to. We call this node S. To do balancing we need to decide what we
@@ -56,490 +54,522 @@
56 have to have if we do not any shiftings, if we shift to left/right 54 have to have if we do not any shiftings, if we shift to left/right
57 neighbor or to both. */ 55 neighbor or to both. */
58 56
59
60/* taking item number in virtual node, returns number of item, that it has in source buffer */ 57/* taking item number in virtual node, returns number of item, that it has in source buffer */
61static inline int old_item_num (int new_num, int affected_item_num, int mode) 58static inline int old_item_num(int new_num, int affected_item_num, int mode)
62{ 59{
63 if (mode == M_PASTE || mode == M_CUT || new_num < affected_item_num) 60 if (mode == M_PASTE || mode == M_CUT || new_num < affected_item_num)
64 return new_num; 61 return new_num;
65 62
66 if (mode == M_INSERT) { 63 if (mode == M_INSERT) {
67 64
68 RFALSE( new_num == 0, 65 RFALSE(new_num == 0,
69 "vs-8005: for INSERT mode and item number of inserted item"); 66 "vs-8005: for INSERT mode and item number of inserted item");
70 67
71 return new_num - 1; 68 return new_num - 1;
72 } 69 }
73 70
74 RFALSE( mode != M_DELETE, 71 RFALSE(mode != M_DELETE,
75 "vs-8010: old_item_num: mode must be M_DELETE (mode = \'%c\'", mode); 72 "vs-8010: old_item_num: mode must be M_DELETE (mode = \'%c\'",
76 /* delete mode */ 73 mode);
77 return new_num + 1; 74 /* delete mode */
75 return new_num + 1;
78} 76}
79 77
80static void create_virtual_node (struct tree_balance * tb, int h) 78static void create_virtual_node(struct tree_balance *tb, int h)
81{ 79{
82 struct item_head * ih; 80 struct item_head *ih;
83 struct virtual_node * vn = tb->tb_vn; 81 struct virtual_node *vn = tb->tb_vn;
84 int new_num; 82 int new_num;
85 struct buffer_head * Sh; /* this comes from tb->S[h] */ 83 struct buffer_head *Sh; /* this comes from tb->S[h] */
86 84
87 Sh = PATH_H_PBUFFER (tb->tb_path, h); 85 Sh = PATH_H_PBUFFER(tb->tb_path, h);
88 86
89 /* size of changed node */ 87 /* size of changed node */
90 vn->vn_size = MAX_CHILD_SIZE (Sh) - B_FREE_SPACE (Sh) + tb->insert_size[h]; 88 vn->vn_size =
89 MAX_CHILD_SIZE(Sh) - B_FREE_SPACE(Sh) + tb->insert_size[h];
91 90
92 /* for internal nodes array if virtual items is not created */ 91 /* for internal nodes array if virtual items is not created */
93 if (h) { 92 if (h) {
94 vn->vn_nr_item = (vn->vn_size - DC_SIZE) / (DC_SIZE + KEY_SIZE); 93 vn->vn_nr_item = (vn->vn_size - DC_SIZE) / (DC_SIZE + KEY_SIZE);
95 return; 94 return;
96 }
97
98 /* number of items in virtual node */
99 vn->vn_nr_item = B_NR_ITEMS (Sh) + ((vn->vn_mode == M_INSERT)? 1 : 0) - ((vn->vn_mode == M_DELETE)? 1 : 0);
100
101 /* first virtual item */
102 vn->vn_vi = (struct virtual_item *)(tb->tb_vn + 1);
103 memset (vn->vn_vi, 0, vn->vn_nr_item * sizeof (struct virtual_item));
104 vn->vn_free_ptr += vn->vn_nr_item * sizeof (struct virtual_item);
105
106
107 /* first item in the node */
108 ih = B_N_PITEM_HEAD (Sh, 0);
109
110 /* define the mergeability for 0-th item (if it is not being deleted) */
111 if (op_is_left_mergeable (&(ih->ih_key), Sh->b_size) && (vn->vn_mode != M_DELETE || vn->vn_affected_item_num))
112 vn->vn_vi[0].vi_type |= VI_TYPE_LEFT_MERGEABLE;
113
114 /* go through all items those remain in the virtual node (except for the new (inserted) one) */
115 for (new_num = 0; new_num < vn->vn_nr_item; new_num ++) {
116 int j;
117 struct virtual_item * vi = vn->vn_vi + new_num;
118 int is_affected = ((new_num != vn->vn_affected_item_num) ? 0 : 1);
119
120
121 if (is_affected && vn->vn_mode == M_INSERT)
122 continue;
123
124 /* get item number in source node */
125 j = old_item_num (new_num, vn->vn_affected_item_num, vn->vn_mode);
126
127 vi->vi_item_len += ih_item_len(ih + j) + IH_SIZE;
128 vi->vi_ih = ih + j;
129 vi->vi_item = B_I_PITEM (Sh, ih + j);
130 vi->vi_uarea = vn->vn_free_ptr;
131
132 // FIXME: there is no check, that item operation did not
133 // consume too much memory
134 vn->vn_free_ptr += op_create_vi (vn, vi, is_affected, tb->insert_size [0]);
135 if (tb->vn_buf + tb->vn_buf_size < vn->vn_free_ptr)
136 reiserfs_panic (tb->tb_sb, "vs-8030: create_virtual_node: "
137 "virtual node space consumed");
138
139 if (!is_affected)
140 /* this is not being changed */
141 continue;
142
143 if (vn->vn_mode == M_PASTE || vn->vn_mode == M_CUT) {
144 vn->vn_vi[new_num].vi_item_len += tb->insert_size[0];
145 vi->vi_new_data = vn->vn_data; // pointer to data which is going to be pasted
146 } 95 }
147 }
148
149
150 /* virtual inserted item is not defined yet */
151 if (vn->vn_mode == M_INSERT) {
152 struct virtual_item * vi = vn->vn_vi + vn->vn_affected_item_num;
153
154 RFALSE( vn->vn_ins_ih == 0,
155 "vs-8040: item header of inserted item is not specified");
156 vi->vi_item_len = tb->insert_size[0];
157 vi->vi_ih = vn->vn_ins_ih;
158 vi->vi_item = vn->vn_data;
159 vi->vi_uarea = vn->vn_free_ptr;
160
161 op_create_vi (vn, vi, 0/*not pasted or cut*/, tb->insert_size [0]);
162 }
163
164 /* set right merge flag we take right delimiting key and check whether it is a mergeable item */
165 if (tb->CFR[0]) {
166 struct reiserfs_key * key;
167
168 key = B_N_PDELIM_KEY (tb->CFR[0], tb->rkey[0]);
169 if (op_is_left_mergeable (key, Sh->b_size) && (vn->vn_mode != M_DELETE ||
170 vn->vn_affected_item_num != B_NR_ITEMS (Sh) - 1))
171 vn->vn_vi[vn->vn_nr_item-1].vi_type |= VI_TYPE_RIGHT_MERGEABLE;
172 96
173#ifdef CONFIG_REISERFS_CHECK 97 /* number of items in virtual node */
174 if (op_is_left_mergeable (key, Sh->b_size) && 98 vn->vn_nr_item =
175 !(vn->vn_mode != M_DELETE || vn->vn_affected_item_num != B_NR_ITEMS (Sh) - 1) ) { 99 B_NR_ITEMS(Sh) + ((vn->vn_mode == M_INSERT) ? 1 : 0) -
176 /* we delete last item and it could be merged with right neighbor's first item */ 100 ((vn->vn_mode == M_DELETE) ? 1 : 0);
177 if (!(B_NR_ITEMS (Sh) == 1 && is_direntry_le_ih (B_N_PITEM_HEAD (Sh, 0)) && 101
178 I_ENTRY_COUNT (B_N_PITEM_HEAD (Sh, 0)) == 1)) { 102 /* first virtual item */
179 /* node contains more than 1 item, or item is not directory item, or this item contains more than 1 entry */ 103 vn->vn_vi = (struct virtual_item *)(tb->tb_vn + 1);
180 print_block (Sh, 0, -1, -1); 104 memset(vn->vn_vi, 0, vn->vn_nr_item * sizeof(struct virtual_item));
181 reiserfs_panic (tb->tb_sb, "vs-8045: create_virtual_node: rdkey %k, affected item==%d (mode==%c) Must be %c", 105 vn->vn_free_ptr += vn->vn_nr_item * sizeof(struct virtual_item);
182 key, vn->vn_affected_item_num, vn->vn_mode, M_DELETE); 106
183 } else 107 /* first item in the node */
184 /* we can delete directory item, that has only one directory entry in it */ 108 ih = B_N_PITEM_HEAD(Sh, 0);
185 ; 109
110 /* define the mergeability for 0-th item (if it is not being deleted) */
111 if (op_is_left_mergeable(&(ih->ih_key), Sh->b_size)
112 && (vn->vn_mode != M_DELETE || vn->vn_affected_item_num))
113 vn->vn_vi[0].vi_type |= VI_TYPE_LEFT_MERGEABLE;
114
115 /* go through all items those remain in the virtual node (except for the new (inserted) one) */
116 for (new_num = 0; new_num < vn->vn_nr_item; new_num++) {
117 int j;
118 struct virtual_item *vi = vn->vn_vi + new_num;
119 int is_affected =
120 ((new_num != vn->vn_affected_item_num) ? 0 : 1);
121
122 if (is_affected && vn->vn_mode == M_INSERT)
123 continue;
124
125 /* get item number in source node */
126 j = old_item_num(new_num, vn->vn_affected_item_num,
127 vn->vn_mode);
128
129 vi->vi_item_len += ih_item_len(ih + j) + IH_SIZE;
130 vi->vi_ih = ih + j;
131 vi->vi_item = B_I_PITEM(Sh, ih + j);
132 vi->vi_uarea = vn->vn_free_ptr;
133
134 // FIXME: there is no check, that item operation did not
135 // consume too much memory
136 vn->vn_free_ptr +=
137 op_create_vi(vn, vi, is_affected, tb->insert_size[0]);
138 if (tb->vn_buf + tb->vn_buf_size < vn->vn_free_ptr)
139 reiserfs_panic(tb->tb_sb,
140 "vs-8030: create_virtual_node: "
141 "virtual node space consumed");
142
143 if (!is_affected)
144 /* this is not being changed */
145 continue;
146
147 if (vn->vn_mode == M_PASTE || vn->vn_mode == M_CUT) {
148 vn->vn_vi[new_num].vi_item_len += tb->insert_size[0];
149 vi->vi_new_data = vn->vn_data; // pointer to data which is going to be pasted
150 }
186 } 151 }
152
153 /* virtual inserted item is not defined yet */
154 if (vn->vn_mode == M_INSERT) {
155 struct virtual_item *vi = vn->vn_vi + vn->vn_affected_item_num;
156
157 RFALSE(vn->vn_ins_ih == 0,
158 "vs-8040: item header of inserted item is not specified");
159 vi->vi_item_len = tb->insert_size[0];
160 vi->vi_ih = vn->vn_ins_ih;
161 vi->vi_item = vn->vn_data;
162 vi->vi_uarea = vn->vn_free_ptr;
163
164 op_create_vi(vn, vi, 0 /*not pasted or cut */ ,
165 tb->insert_size[0]);
166 }
167
168 /* set right merge flag we take right delimiting key and check whether it is a mergeable item */
169 if (tb->CFR[0]) {
170 struct reiserfs_key *key;
171
172 key = B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0]);
173 if (op_is_left_mergeable(key, Sh->b_size)
174 && (vn->vn_mode != M_DELETE
175 || vn->vn_affected_item_num != B_NR_ITEMS(Sh) - 1))
176 vn->vn_vi[vn->vn_nr_item - 1].vi_type |=
177 VI_TYPE_RIGHT_MERGEABLE;
178
179#ifdef CONFIG_REISERFS_CHECK
180 if (op_is_left_mergeable(key, Sh->b_size) &&
181 !(vn->vn_mode != M_DELETE
182 || vn->vn_affected_item_num != B_NR_ITEMS(Sh) - 1)) {
183 /* we delete last item and it could be merged with right neighbor's first item */
184 if (!
185 (B_NR_ITEMS(Sh) == 1
186 && is_direntry_le_ih(B_N_PITEM_HEAD(Sh, 0))
187 && I_ENTRY_COUNT(B_N_PITEM_HEAD(Sh, 0)) == 1)) {
188 /* node contains more than 1 item, or item is not directory item, or this item contains more than 1 entry */
189 print_block(Sh, 0, -1, -1);
190 reiserfs_panic(tb->tb_sb,
191 "vs-8045: create_virtual_node: rdkey %k, affected item==%d (mode==%c) Must be %c",
192 key, vn->vn_affected_item_num,
193 vn->vn_mode, M_DELETE);
194 } else
195 /* we can delete directory item, that has only one directory entry in it */
196 ;
197 }
187#endif 198#endif
188
189 }
190}
191 199
200 }
201}
192 202
193/* using virtual node check, how many items can be shifted to left 203/* using virtual node check, how many items can be shifted to left
194 neighbor */ 204 neighbor */
195static void check_left (struct tree_balance * tb, int h, int cur_free) 205static void check_left(struct tree_balance *tb, int h, int cur_free)
196{ 206{
197 int i; 207 int i;
198 struct virtual_node * vn = tb->tb_vn; 208 struct virtual_node *vn = tb->tb_vn;
199 struct virtual_item * vi; 209 struct virtual_item *vi;
200 int d_size, ih_size; 210 int d_size, ih_size;
201 211
202 RFALSE( cur_free < 0, "vs-8050: cur_free (%d) < 0", cur_free); 212 RFALSE(cur_free < 0, "vs-8050: cur_free (%d) < 0", cur_free);
203 213
204 /* internal level */ 214 /* internal level */
205 if (h > 0) { 215 if (h > 0) {
206 tb->lnum[h] = cur_free / (DC_SIZE + KEY_SIZE); 216 tb->lnum[h] = cur_free / (DC_SIZE + KEY_SIZE);
207 return; 217 return;
208 } 218 }
209 219
210 /* leaf level */ 220 /* leaf level */
211 221
212 if (!cur_free || !vn->vn_nr_item) { 222 if (!cur_free || !vn->vn_nr_item) {
213 /* no free space or nothing to move */ 223 /* no free space or nothing to move */
214 tb->lnum[h] = 0; 224 tb->lnum[h] = 0;
215 tb->lbytes = -1; 225 tb->lbytes = -1;
216 return; 226 return;
217 } 227 }
218 228
219 RFALSE( !PATH_H_PPARENT (tb->tb_path, 0), 229 RFALSE(!PATH_H_PPARENT(tb->tb_path, 0),
220 "vs-8055: parent does not exist or invalid"); 230 "vs-8055: parent does not exist or invalid");
221 231
222 vi = vn->vn_vi; 232 vi = vn->vn_vi;
223 if ((unsigned int)cur_free >= (vn->vn_size - ((vi->vi_type & VI_TYPE_LEFT_MERGEABLE) ? IH_SIZE : 0))) { 233 if ((unsigned int)cur_free >=
224 /* all contents of S[0] fits into L[0] */ 234 (vn->vn_size -
235 ((vi->vi_type & VI_TYPE_LEFT_MERGEABLE) ? IH_SIZE : 0))) {
236 /* all contents of S[0] fits into L[0] */
225 237
226 RFALSE( vn->vn_mode == M_INSERT || vn->vn_mode == M_PASTE, 238 RFALSE(vn->vn_mode == M_INSERT || vn->vn_mode == M_PASTE,
227 "vs-8055: invalid mode or balance condition failed"); 239 "vs-8055: invalid mode or balance condition failed");
228 240
229 tb->lnum[0] = vn->vn_nr_item; 241 tb->lnum[0] = vn->vn_nr_item;
230 tb->lbytes = -1; 242 tb->lbytes = -1;
231 return; 243 return;
232 }
233
234
235 d_size = 0, ih_size = IH_SIZE;
236
237 /* first item may be merge with last item in left neighbor */
238 if (vi->vi_type & VI_TYPE_LEFT_MERGEABLE)
239 d_size = -((int)IH_SIZE), ih_size = 0;
240
241 tb->lnum[0] = 0;
242 for (i = 0; i < vn->vn_nr_item; i ++, ih_size = IH_SIZE, d_size = 0, vi ++) {
243 d_size += vi->vi_item_len;
244 if (cur_free >= d_size) {
245 /* the item can be shifted entirely */
246 cur_free -= d_size;
247 tb->lnum[0] ++;
248 continue;
249 } 244 }
250 245
251 /* the item cannot be shifted entirely, try to split it */ 246 d_size = 0, ih_size = IH_SIZE;
252 /* check whether L[0] can hold ih and at least one byte of the item body */ 247
253 if (cur_free <= ih_size) { 248 /* first item may be merge with last item in left neighbor */
254 /* cannot shift even a part of the current item */ 249 if (vi->vi_type & VI_TYPE_LEFT_MERGEABLE)
255 tb->lbytes = -1; 250 d_size = -((int)IH_SIZE), ih_size = 0;
256 return; 251
252 tb->lnum[0] = 0;
253 for (i = 0; i < vn->vn_nr_item;
254 i++, ih_size = IH_SIZE, d_size = 0, vi++) {
255 d_size += vi->vi_item_len;
256 if (cur_free >= d_size) {
257 /* the item can be shifted entirely */
258 cur_free -= d_size;
259 tb->lnum[0]++;
260 continue;
261 }
262
263 /* the item cannot be shifted entirely, try to split it */
264 /* check whether L[0] can hold ih and at least one byte of the item body */
265 if (cur_free <= ih_size) {
266 /* cannot shift even a part of the current item */
267 tb->lbytes = -1;
268 return;
269 }
270 cur_free -= ih_size;
271
272 tb->lbytes = op_check_left(vi, cur_free, 0, 0);
273 if (tb->lbytes != -1)
274 /* count partially shifted item */
275 tb->lnum[0]++;
276
277 break;
257 } 278 }
258 cur_free -= ih_size;
259
260 tb->lbytes = op_check_left (vi, cur_free, 0, 0);
261 if (tb->lbytes != -1)
262 /* count partially shifted item */
263 tb->lnum[0] ++;
264
265 break;
266 }
267
268 return;
269}
270 279
280 return;
281}
271 282
272/* using virtual node check, how many items can be shifted to right 283/* using virtual node check, how many items can be shifted to right
273 neighbor */ 284 neighbor */
274static void check_right (struct tree_balance * tb, int h, int cur_free) 285static void check_right(struct tree_balance *tb, int h, int cur_free)
275{ 286{
276 int i; 287 int i;
277 struct virtual_node * vn = tb->tb_vn; 288 struct virtual_node *vn = tb->tb_vn;
278 struct virtual_item * vi; 289 struct virtual_item *vi;
279 int d_size, ih_size; 290 int d_size, ih_size;
280 291
281 RFALSE( cur_free < 0, "vs-8070: cur_free < 0"); 292 RFALSE(cur_free < 0, "vs-8070: cur_free < 0");
282 293
283 /* internal level */ 294 /* internal level */
284 if (h > 0) { 295 if (h > 0) {
285 tb->rnum[h] = cur_free / (DC_SIZE + KEY_SIZE); 296 tb->rnum[h] = cur_free / (DC_SIZE + KEY_SIZE);
286 return; 297 return;
287 }
288
289 /* leaf level */
290
291 if (!cur_free || !vn->vn_nr_item) {
292 /* no free space */
293 tb->rnum[h] = 0;
294 tb->rbytes = -1;
295 return;
296 }
297
298 RFALSE( !PATH_H_PPARENT (tb->tb_path, 0),
299 "vs-8075: parent does not exist or invalid");
300
301 vi = vn->vn_vi + vn->vn_nr_item - 1;
302 if ((unsigned int)cur_free >= (vn->vn_size - ((vi->vi_type & VI_TYPE_RIGHT_MERGEABLE) ? IH_SIZE : 0))) {
303 /* all contents of S[0] fits into R[0] */
304
305 RFALSE( vn->vn_mode == M_INSERT || vn->vn_mode == M_PASTE,
306 "vs-8080: invalid mode or balance condition failed");
307
308 tb->rnum[h] = vn->vn_nr_item;
309 tb->rbytes = -1;
310 return;
311 }
312
313 d_size = 0, ih_size = IH_SIZE;
314
315 /* last item may be merge with first item in right neighbor */
316 if (vi->vi_type & VI_TYPE_RIGHT_MERGEABLE)
317 d_size = -(int)IH_SIZE, ih_size = 0;
318
319 tb->rnum[0] = 0;
320 for (i = vn->vn_nr_item - 1; i >= 0; i --, d_size = 0, ih_size = IH_SIZE, vi --) {
321 d_size += vi->vi_item_len;
322 if (cur_free >= d_size) {
323 /* the item can be shifted entirely */
324 cur_free -= d_size;
325 tb->rnum[0] ++;
326 continue;
327 } 298 }
328 299
329 /* check whether R[0] can hold ih and at least one byte of the item body */ 300 /* leaf level */
330 if ( cur_free <= ih_size ) { /* cannot shift even a part of the current item */ 301
331 tb->rbytes = -1; 302 if (!cur_free || !vn->vn_nr_item) {
332 return; 303 /* no free space */
304 tb->rnum[h] = 0;
305 tb->rbytes = -1;
306 return;
333 } 307 }
334
335 /* R[0] can hold the header of the item and at least one byte of its body */
336 cur_free -= ih_size; /* cur_free is still > 0 */
337
338 tb->rbytes = op_check_right (vi, cur_free);
339 if (tb->rbytes != -1)
340 /* count partially shifted item */
341 tb->rnum[0] ++;
342
343 break;
344 }
345
346 return;
347}
348 308
309 RFALSE(!PATH_H_PPARENT(tb->tb_path, 0),
310 "vs-8075: parent does not exist or invalid");
311
312 vi = vn->vn_vi + vn->vn_nr_item - 1;
313 if ((unsigned int)cur_free >=
314 (vn->vn_size -
315 ((vi->vi_type & VI_TYPE_RIGHT_MERGEABLE) ? IH_SIZE : 0))) {
316 /* all contents of S[0] fits into R[0] */
317
318 RFALSE(vn->vn_mode == M_INSERT || vn->vn_mode == M_PASTE,
319 "vs-8080: invalid mode or balance condition failed");
320
321 tb->rnum[h] = vn->vn_nr_item;
322 tb->rbytes = -1;
323 return;
324 }
325
326 d_size = 0, ih_size = IH_SIZE;
327
328 /* last item may be merge with first item in right neighbor */
329 if (vi->vi_type & VI_TYPE_RIGHT_MERGEABLE)
330 d_size = -(int)IH_SIZE, ih_size = 0;
331
332 tb->rnum[0] = 0;
333 for (i = vn->vn_nr_item - 1; i >= 0;
334 i--, d_size = 0, ih_size = IH_SIZE, vi--) {
335 d_size += vi->vi_item_len;
336 if (cur_free >= d_size) {
337 /* the item can be shifted entirely */
338 cur_free -= d_size;
339 tb->rnum[0]++;
340 continue;
341 }
342
343 /* check whether R[0] can hold ih and at least one byte of the item body */
344 if (cur_free <= ih_size) { /* cannot shift even a part of the current item */
345 tb->rbytes = -1;
346 return;
347 }
348
349 /* R[0] can hold the header of the item and at least one byte of its body */
350 cur_free -= ih_size; /* cur_free is still > 0 */
351
352 tb->rbytes = op_check_right(vi, cur_free);
353 if (tb->rbytes != -1)
354 /* count partially shifted item */
355 tb->rnum[0]++;
356
357 break;
358 }
359
360 return;
361}
349 362
350/* 363/*
351 * from - number of items, which are shifted to left neighbor entirely 364 * from - number of items, which are shifted to left neighbor entirely
352 * to - number of item, which are shifted to right neighbor entirely 365 * to - number of item, which are shifted to right neighbor entirely
353 * from_bytes - number of bytes of boundary item (or directory entries) which are shifted to left neighbor 366 * from_bytes - number of bytes of boundary item (or directory entries) which are shifted to left neighbor
354 * to_bytes - number of bytes of boundary item (or directory entries) which are shifted to right neighbor */ 367 * to_bytes - number of bytes of boundary item (or directory entries) which are shifted to right neighbor */
355static int get_num_ver (int mode, struct tree_balance * tb, int h, 368static int get_num_ver(int mode, struct tree_balance *tb, int h,
356 int from, int from_bytes, 369 int from, int from_bytes,
357 int to, int to_bytes, 370 int to, int to_bytes, short *snum012, int flow)
358 short * snum012, int flow
359 )
360{ 371{
361 int i; 372 int i;
362 int cur_free; 373 int cur_free;
363 // int bytes; 374 // int bytes;
364 int units; 375 int units;
365 struct virtual_node * vn = tb->tb_vn; 376 struct virtual_node *vn = tb->tb_vn;
366 // struct virtual_item * vi; 377 // struct virtual_item * vi;
367 378
368 int total_node_size, max_node_size, current_item_size; 379 int total_node_size, max_node_size, current_item_size;
369 int needed_nodes; 380 int needed_nodes;
370 int start_item, /* position of item we start filling node from */ 381 int start_item, /* position of item we start filling node from */
371 end_item, /* position of item we finish filling node by */ 382 end_item, /* position of item we finish filling node by */
372 start_bytes,/* number of first bytes (entries for directory) of start_item-th item 383 start_bytes, /* number of first bytes (entries for directory) of start_item-th item
373 we do not include into node that is being filled */ 384 we do not include into node that is being filled */
374 end_bytes; /* number of last bytes (entries for directory) of end_item-th item 385 end_bytes; /* number of last bytes (entries for directory) of end_item-th item
375 we do node include into node that is being filled */ 386 we do node include into node that is being filled */
376 int split_item_positions[2]; /* these are positions in virtual item of 387 int split_item_positions[2]; /* these are positions in virtual item of
377 items, that are split between S[0] and 388 items, that are split between S[0] and
378 S1new and S1new and S2new */ 389 S1new and S1new and S2new */
379 390
380 split_item_positions[0] = -1; 391 split_item_positions[0] = -1;
381 split_item_positions[1] = -1; 392 split_item_positions[1] = -1;
382 393
383 /* We only create additional nodes if we are in insert or paste mode 394 /* We only create additional nodes if we are in insert or paste mode
384 or we are in replace mode at the internal level. If h is 0 and 395 or we are in replace mode at the internal level. If h is 0 and
385 the mode is M_REPLACE then in fix_nodes we change the mode to 396 the mode is M_REPLACE then in fix_nodes we change the mode to
386 paste or insert before we get here in the code. */ 397 paste or insert before we get here in the code. */
387 RFALSE( tb->insert_size[h] < 0 || (mode != M_INSERT && mode != M_PASTE), 398 RFALSE(tb->insert_size[h] < 0 || (mode != M_INSERT && mode != M_PASTE),
388 "vs-8100: insert_size < 0 in overflow"); 399 "vs-8100: insert_size < 0 in overflow");
389 400
390 max_node_size = MAX_CHILD_SIZE (PATH_H_PBUFFER (tb->tb_path, h)); 401 max_node_size = MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, h));
391 402
392 /* snum012 [0-2] - number of items, that lay 403 /* snum012 [0-2] - number of items, that lay
393 to S[0], first new node and second new node */ 404 to S[0], first new node and second new node */
394 snum012[3] = -1; /* s1bytes */ 405 snum012[3] = -1; /* s1bytes */
395 snum012[4] = -1; /* s2bytes */ 406 snum012[4] = -1; /* s2bytes */
396 407
397 /* internal level */ 408 /* internal level */
398 if (h > 0) { 409 if (h > 0) {
399 i = ((to - from) * (KEY_SIZE + DC_SIZE) + DC_SIZE); 410 i = ((to - from) * (KEY_SIZE + DC_SIZE) + DC_SIZE);
400 if (i == max_node_size) 411 if (i == max_node_size)
401 return 1; 412 return 1;
402 return (i / max_node_size + 1); 413 return (i / max_node_size + 1);
403 }
404
405 /* leaf level */
406 needed_nodes = 1;
407 total_node_size = 0;
408 cur_free = max_node_size;
409
410 // start from 'from'-th item
411 start_item = from;
412 // skip its first 'start_bytes' units
413 start_bytes = ((from_bytes != -1) ? from_bytes : 0);
414
415 // last included item is the 'end_item'-th one
416 end_item = vn->vn_nr_item - to - 1;
417 // do not count last 'end_bytes' units of 'end_item'-th item
418 end_bytes = (to_bytes != -1) ? to_bytes : 0;
419
420 /* go through all item beginning from the start_item-th item and ending by
421 the end_item-th item. Do not count first 'start_bytes' units of
422 'start_item'-th item and last 'end_bytes' of 'end_item'-th item */
423
424 for (i = start_item; i <= end_item; i ++) {
425 struct virtual_item * vi = vn->vn_vi + i;
426 int skip_from_end = ((i == end_item) ? end_bytes : 0);
427
428 RFALSE( needed_nodes > 3, "vs-8105: too many nodes are needed");
429
430 /* get size of current item */
431 current_item_size = vi->vi_item_len;
432
433 /* do not take in calculation head part (from_bytes) of from-th item */
434 current_item_size -= op_part_size (vi, 0/*from start*/, start_bytes);
435
436 /* do not take in calculation tail part of last item */
437 current_item_size -= op_part_size (vi, 1/*from end*/, skip_from_end);
438
439 /* if item fits into current node entierly */
440 if (total_node_size + current_item_size <= max_node_size) {
441 snum012[needed_nodes - 1] ++;
442 total_node_size += current_item_size;
443 start_bytes = 0;
444 continue;
445 } 414 }
446 415
447 if (current_item_size > max_node_size) { 416 /* leaf level */
448 /* virtual item length is longer, than max size of item in 417 needed_nodes = 1;
449 a node. It is impossible for direct item */ 418 total_node_size = 0;
450 RFALSE( is_direct_le_ih (vi->vi_ih), 419 cur_free = max_node_size;
451 "vs-8110: " 420
452 "direct item length is %d. It can not be longer than %d", 421 // start from 'from'-th item
453 current_item_size, max_node_size); 422 start_item = from;
454 /* we will try to split it */ 423 // skip its first 'start_bytes' units
455 flow = 1; 424 start_bytes = ((from_bytes != -1) ? from_bytes : 0);
425
426 // last included item is the 'end_item'-th one
427 end_item = vn->vn_nr_item - to - 1;
428 // do not count last 'end_bytes' units of 'end_item'-th item
429 end_bytes = (to_bytes != -1) ? to_bytes : 0;
430
431 /* go through all item beginning from the start_item-th item and ending by
432 the end_item-th item. Do not count first 'start_bytes' units of
433 'start_item'-th item and last 'end_bytes' of 'end_item'-th item */
434
435 for (i = start_item; i <= end_item; i++) {
436 struct virtual_item *vi = vn->vn_vi + i;
437 int skip_from_end = ((i == end_item) ? end_bytes : 0);
438
439 RFALSE(needed_nodes > 3, "vs-8105: too many nodes are needed");
440
441 /* get size of current item */
442 current_item_size = vi->vi_item_len;
443
444 /* do not take in calculation head part (from_bytes) of from-th item */
445 current_item_size -=
446 op_part_size(vi, 0 /*from start */ , start_bytes);
447
448 /* do not take in calculation tail part of last item */
449 current_item_size -=
450 op_part_size(vi, 1 /*from end */ , skip_from_end);
451
452 /* if item fits into current node entierly */
453 if (total_node_size + current_item_size <= max_node_size) {
454 snum012[needed_nodes - 1]++;
455 total_node_size += current_item_size;
456 start_bytes = 0;
457 continue;
458 }
459
460 if (current_item_size > max_node_size) {
461 /* virtual item length is longer, than max size of item in
462 a node. It is impossible for direct item */
463 RFALSE(is_direct_le_ih(vi->vi_ih),
464 "vs-8110: "
465 "direct item length is %d. It can not be longer than %d",
466 current_item_size, max_node_size);
467 /* we will try to split it */
468 flow = 1;
469 }
470
471 if (!flow) {
472 /* as we do not split items, take new node and continue */
473 needed_nodes++;
474 i--;
475 total_node_size = 0;
476 continue;
477 }
478 // calculate number of item units which fit into node being
479 // filled
480 {
481 int free_space;
482
483 free_space = max_node_size - total_node_size - IH_SIZE;
484 units =
485 op_check_left(vi, free_space, start_bytes,
486 skip_from_end);
487 if (units == -1) {
488 /* nothing fits into current node, take new node and continue */
489 needed_nodes++, i--, total_node_size = 0;
490 continue;
491 }
492 }
493
494 /* something fits into the current node */
495 //if (snum012[3] != -1 || needed_nodes != 1)
496 // reiserfs_panic (tb->tb_sb, "vs-8115: get_num_ver: too many nodes required");
497 //snum012[needed_nodes - 1 + 3] = op_unit_num (vi) - start_bytes - units;
498 start_bytes += units;
499 snum012[needed_nodes - 1 + 3] = units;
500
501 if (needed_nodes > 2)
502 reiserfs_warning(tb->tb_sb, "vs-8111: get_num_ver: "
503 "split_item_position is out of boundary");
504 snum012[needed_nodes - 1]++;
505 split_item_positions[needed_nodes - 1] = i;
506 needed_nodes++;
507 /* continue from the same item with start_bytes != -1 */
508 start_item = i;
509 i--;
510 total_node_size = 0;
456 } 511 }
457 512
458 if (!flow) { 513 // sum012[4] (if it is not -1) contains number of units of which
459 /* as we do not split items, take new node and continue */ 514 // are to be in S1new, snum012[3] - to be in S0. They are supposed
460 needed_nodes ++; i --; total_node_size = 0; 515 // to be S1bytes and S2bytes correspondingly, so recalculate
461 continue; 516 if (snum012[4] > 0) {
517 int split_item_num;
518 int bytes_to_r, bytes_to_l;
519 int bytes_to_S1new;
520
521 split_item_num = split_item_positions[1];
522 bytes_to_l =
523 ((from == split_item_num
524 && from_bytes != -1) ? from_bytes : 0);
525 bytes_to_r =
526 ((end_item == split_item_num
527 && end_bytes != -1) ? end_bytes : 0);
528 bytes_to_S1new =
529 ((split_item_positions[0] ==
530 split_item_positions[1]) ? snum012[3] : 0);
531
532 // s2bytes
533 snum012[4] =
534 op_unit_num(&vn->vn_vi[split_item_num]) - snum012[4] -
535 bytes_to_r - bytes_to_l - bytes_to_S1new;
536
537 if (vn->vn_vi[split_item_num].vi_index != TYPE_DIRENTRY &&
538 vn->vn_vi[split_item_num].vi_index != TYPE_INDIRECT)
539 reiserfs_warning(tb->tb_sb, "vs-8115: get_num_ver: not "
540 "directory or indirect item");
462 } 541 }
463 542
464 // calculate number of item units which fit into node being 543 /* now we know S2bytes, calculate S1bytes */
465 // filled 544 if (snum012[3] > 0) {
466 { 545 int split_item_num;
467 int free_space; 546 int bytes_to_r, bytes_to_l;
468 547 int bytes_to_S2new;
469 free_space = max_node_size - total_node_size - IH_SIZE; 548
470 units = op_check_left (vi, free_space, start_bytes, skip_from_end); 549 split_item_num = split_item_positions[0];
471 if (units == -1) { 550 bytes_to_l =
472 /* nothing fits into current node, take new node and continue */ 551 ((from == split_item_num
473 needed_nodes ++, i--, total_node_size = 0; 552 && from_bytes != -1) ? from_bytes : 0);
474 continue; 553 bytes_to_r =
475 } 554 ((end_item == split_item_num
555 && end_bytes != -1) ? end_bytes : 0);
556 bytes_to_S2new =
557 ((split_item_positions[0] == split_item_positions[1]
558 && snum012[4] != -1) ? snum012[4] : 0);
559
560 // s1bytes
561 snum012[3] =
562 op_unit_num(&vn->vn_vi[split_item_num]) - snum012[3] -
563 bytes_to_r - bytes_to_l - bytes_to_S2new;
476 } 564 }
477 565
478 /* something fits into the current node */ 566 return needed_nodes;
479 //if (snum012[3] != -1 || needed_nodes != 1)
480 // reiserfs_panic (tb->tb_sb, "vs-8115: get_num_ver: too many nodes required");
481 //snum012[needed_nodes - 1 + 3] = op_unit_num (vi) - start_bytes - units;
482 start_bytes += units;
483 snum012[needed_nodes - 1 + 3] = units;
484
485 if (needed_nodes > 2)
486 reiserfs_warning (tb->tb_sb, "vs-8111: get_num_ver: "
487 "split_item_position is out of boundary");
488 snum012[needed_nodes - 1] ++;
489 split_item_positions[needed_nodes - 1] = i;
490 needed_nodes ++;
491 /* continue from the same item with start_bytes != -1 */
492 start_item = i;
493 i --;
494 total_node_size = 0;
495 }
496
497 // sum012[4] (if it is not -1) contains number of units of which
498 // are to be in S1new, snum012[3] - to be in S0. They are supposed
499 // to be S1bytes and S2bytes correspondingly, so recalculate
500 if (snum012[4] > 0) {
501 int split_item_num;
502 int bytes_to_r, bytes_to_l;
503 int bytes_to_S1new;
504
505 split_item_num = split_item_positions[1];
506 bytes_to_l = ((from == split_item_num && from_bytes != -1) ? from_bytes : 0);
507 bytes_to_r = ((end_item == split_item_num && end_bytes != -1) ? end_bytes : 0);
508 bytes_to_S1new = ((split_item_positions[0] == split_item_positions[1]) ? snum012[3] : 0);
509
510 // s2bytes
511 snum012[4] = op_unit_num (&vn->vn_vi[split_item_num]) - snum012[4] - bytes_to_r - bytes_to_l - bytes_to_S1new;
512
513 if (vn->vn_vi[split_item_num].vi_index != TYPE_DIRENTRY &&
514 vn->vn_vi[split_item_num].vi_index != TYPE_INDIRECT)
515 reiserfs_warning (tb->tb_sb, "vs-8115: get_num_ver: not "
516 "directory or indirect item");
517 }
518
519 /* now we know S2bytes, calculate S1bytes */
520 if (snum012[3] > 0) {
521 int split_item_num;
522 int bytes_to_r, bytes_to_l;
523 int bytes_to_S2new;
524
525 split_item_num = split_item_positions[0];
526 bytes_to_l = ((from == split_item_num && from_bytes != -1) ? from_bytes : 0);
527 bytes_to_r = ((end_item == split_item_num && end_bytes != -1) ? end_bytes : 0);
528 bytes_to_S2new = ((split_item_positions[0] == split_item_positions[1] && snum012[4] != -1) ? snum012[4] : 0);
529
530 // s1bytes
531 snum012[3] = op_unit_num (&vn->vn_vi[split_item_num]) - snum012[3] - bytes_to_r - bytes_to_l - bytes_to_S2new;
532 }
533
534 return needed_nodes;
535} 567}
536 568
537
538#ifdef CONFIG_REISERFS_CHECK 569#ifdef CONFIG_REISERFS_CHECK
539extern struct tree_balance * cur_tb; 570extern struct tree_balance *cur_tb;
540#endif 571#endif
541 572
542
543/* Set parameters for balancing. 573/* Set parameters for balancing.
544 * Performs write of results of analysis of balancing into structure tb, 574 * Performs write of results of analysis of balancing into structure tb,
545 * where it will later be used by the functions that actually do the balancing. 575 * where it will later be used by the functions that actually do the balancing.
@@ -557,131 +587,130 @@ extern struct tree_balance * cur_tb;
557 * s1bytes number of bytes which flow to the first new node when S[0] splits (this number is contained in s012 array) 587 * s1bytes number of bytes which flow to the first new node when S[0] splits (this number is contained in s012 array)
558 */ 588 */
559 589
560static void set_parameters (struct tree_balance * tb, int h, int lnum, 590static void set_parameters(struct tree_balance *tb, int h, int lnum,
561 int rnum, int blk_num, short * s012, int lb, int rb) 591 int rnum, int blk_num, short *s012, int lb, int rb)
562{ 592{
563 593
564 tb->lnum[h] = lnum; 594 tb->lnum[h] = lnum;
565 tb->rnum[h] = rnum; 595 tb->rnum[h] = rnum;
566 tb->blknum[h] = blk_num; 596 tb->blknum[h] = blk_num;
567 597
568 if (h == 0) 598 if (h == 0) { /* only for leaf level */
569 { /* only for leaf level */ 599 if (s012 != NULL) {
570 if (s012 != NULL) 600 tb->s0num = *s012++,
571 { 601 tb->s1num = *s012++, tb->s2num = *s012++;
572 tb->s0num = * s012 ++, 602 tb->s1bytes = *s012++;
573 tb->s1num = * s012 ++, 603 tb->s2bytes = *s012;
574 tb->s2num = * s012 ++; 604 }
575 tb->s1bytes = * s012 ++; 605 tb->lbytes = lb;
576 tb->s2bytes = * s012; 606 tb->rbytes = rb;
577 } 607 }
578 tb->lbytes = lb; 608 PROC_INFO_ADD(tb->tb_sb, lnum[h], lnum);
579 tb->rbytes = rb; 609 PROC_INFO_ADD(tb->tb_sb, rnum[h], rnum);
580 }
581 PROC_INFO_ADD( tb -> tb_sb, lnum[ h ], lnum );
582 PROC_INFO_ADD( tb -> tb_sb, rnum[ h ], rnum );
583
584 PROC_INFO_ADD( tb -> tb_sb, lbytes[ h ], lb );
585 PROC_INFO_ADD( tb -> tb_sb, rbytes[ h ], rb );
586}
587
588 610
611 PROC_INFO_ADD(tb->tb_sb, lbytes[h], lb);
612 PROC_INFO_ADD(tb->tb_sb, rbytes[h], rb);
613}
589 614
590/* check, does node disappear if we shift tb->lnum[0] items to left 615/* check, does node disappear if we shift tb->lnum[0] items to left
591 neighbor and tb->rnum[0] to the right one. */ 616 neighbor and tb->rnum[0] to the right one. */
592static int is_leaf_removable (struct tree_balance * tb) 617static int is_leaf_removable(struct tree_balance *tb)
593{ 618{
594 struct virtual_node * vn = tb->tb_vn; 619 struct virtual_node *vn = tb->tb_vn;
595 int to_left, to_right; 620 int to_left, to_right;
596 int size; 621 int size;
597 int remain_items; 622 int remain_items;
598 623
599 /* number of items, that will be shifted to left (right) neighbor 624 /* number of items, that will be shifted to left (right) neighbor
600 entirely */ 625 entirely */
601 to_left = tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0); 626 to_left = tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0);
602 to_right = tb->rnum[0] - ((tb->rbytes != -1) ? 1 : 0); 627 to_right = tb->rnum[0] - ((tb->rbytes != -1) ? 1 : 0);
603 remain_items = vn->vn_nr_item; 628 remain_items = vn->vn_nr_item;
604 629
605 /* how many items remain in S[0] after shiftings to neighbors */ 630 /* how many items remain in S[0] after shiftings to neighbors */
606 remain_items -= (to_left + to_right); 631 remain_items -= (to_left + to_right);
607 632
608 if (remain_items < 1) { 633 if (remain_items < 1) {
609 /* all content of node can be shifted to neighbors */ 634 /* all content of node can be shifted to neighbors */
610 set_parameters (tb, 0, to_left, vn->vn_nr_item - to_left, 0, NULL, -1, -1); 635 set_parameters(tb, 0, to_left, vn->vn_nr_item - to_left, 0,
611 return 1; 636 NULL, -1, -1);
612 } 637 return 1;
613 638 }
614 if (remain_items > 1 || tb->lbytes == -1 || tb->rbytes == -1)
615 /* S[0] is not removable */
616 return 0;
617
618 /* check, whether we can divide 1 remaining item between neighbors */
619
620 /* get size of remaining item (in item units) */
621 size = op_unit_num (&(vn->vn_vi[to_left]));
622
623 if (tb->lbytes + tb->rbytes >= size) {
624 set_parameters (tb, 0, to_left + 1, to_right + 1, 0, NULL, tb->lbytes, -1);
625 return 1;
626 }
627
628 return 0;
629}
630 639
640 if (remain_items > 1 || tb->lbytes == -1 || tb->rbytes == -1)
641 /* S[0] is not removable */
642 return 0;
643
644 /* check, whether we can divide 1 remaining item between neighbors */
645
646 /* get size of remaining item (in item units) */
647 size = op_unit_num(&(vn->vn_vi[to_left]));
648
649 if (tb->lbytes + tb->rbytes >= size) {
650 set_parameters(tb, 0, to_left + 1, to_right + 1, 0, NULL,
651 tb->lbytes, -1);
652 return 1;
653 }
654
655 return 0;
656}
631 657
632/* check whether L, S, R can be joined in one node */ 658/* check whether L, S, R can be joined in one node */
633static int are_leaves_removable (struct tree_balance * tb, int lfree, int rfree) 659static int are_leaves_removable(struct tree_balance *tb, int lfree, int rfree)
634{ 660{
635 struct virtual_node * vn = tb->tb_vn; 661 struct virtual_node *vn = tb->tb_vn;
636 int ih_size; 662 int ih_size;
637 struct buffer_head *S0; 663 struct buffer_head *S0;
638 664
639 S0 = PATH_H_PBUFFER (tb->tb_path, 0); 665 S0 = PATH_H_PBUFFER(tb->tb_path, 0);
640 666
641 ih_size = 0; 667 ih_size = 0;
642 if (vn->vn_nr_item) { 668 if (vn->vn_nr_item) {
643 if (vn->vn_vi[0].vi_type & VI_TYPE_LEFT_MERGEABLE) 669 if (vn->vn_vi[0].vi_type & VI_TYPE_LEFT_MERGEABLE)
644 ih_size += IH_SIZE; 670 ih_size += IH_SIZE;
645 671
646 if (vn->vn_vi[vn->vn_nr_item-1].vi_type & VI_TYPE_RIGHT_MERGEABLE) 672 if (vn->vn_vi[vn->vn_nr_item - 1].
647 ih_size += IH_SIZE; 673 vi_type & VI_TYPE_RIGHT_MERGEABLE)
648 } else { 674 ih_size += IH_SIZE;
649 /* there was only one item and it will be deleted */ 675 } else {
650 struct item_head * ih; 676 /* there was only one item and it will be deleted */
651 677 struct item_head *ih;
652 RFALSE( B_NR_ITEMS (S0) != 1, 678
653 "vs-8125: item number must be 1: it is %d", B_NR_ITEMS(S0)); 679 RFALSE(B_NR_ITEMS(S0) != 1,
654 680 "vs-8125: item number must be 1: it is %d",
655 ih = B_N_PITEM_HEAD (S0, 0); 681 B_NR_ITEMS(S0));
656 if (tb->CFR[0] && !comp_short_le_keys (&(ih->ih_key), B_N_PDELIM_KEY (tb->CFR[0], tb->rkey[0]))) 682
657 if (is_direntry_le_ih (ih)) { 683 ih = B_N_PITEM_HEAD(S0, 0);
658 /* Directory must be in correct state here: that is 684 if (tb->CFR[0]
659 somewhere at the left side should exist first directory 685 && !comp_short_le_keys(&(ih->ih_key),
660 item. But the item being deleted can not be that first 686 B_N_PDELIM_KEY(tb->CFR[0],
661 one because its right neighbor is item of the same 687 tb->rkey[0])))
662 directory. (But first item always gets deleted in last 688 if (is_direntry_le_ih(ih)) {
663 turn). So, neighbors of deleted item can be merged, so 689 /* Directory must be in correct state here: that is
664 we can save ih_size */ 690 somewhere at the left side should exist first directory
665 ih_size = IH_SIZE; 691 item. But the item being deleted can not be that first
666 692 one because its right neighbor is item of the same
667 /* we might check that left neighbor exists and is of the 693 directory. (But first item always gets deleted in last
668 same directory */ 694 turn). So, neighbors of deleted item can be merged, so
669 RFALSE(le_ih_k_offset (ih) == DOT_OFFSET, 695 we can save ih_size */
670 "vs-8130: first directory item can not be removed until directory is not empty"); 696 ih_size = IH_SIZE;
671 } 697
672 698 /* we might check that left neighbor exists and is of the
673 } 699 same directory */
674 700 RFALSE(le_ih_k_offset(ih) == DOT_OFFSET,
675 if (MAX_CHILD_SIZE (S0) + vn->vn_size <= rfree + lfree + ih_size) { 701 "vs-8130: first directory item can not be removed until directory is not empty");
676 set_parameters (tb, 0, -1, -1, -1, NULL, -1, -1); 702 }
677 PROC_INFO_INC( tb -> tb_sb, leaves_removable );
678 return 1;
679 }
680 return 0;
681
682}
683 703
704 }
705
706 if (MAX_CHILD_SIZE(S0) + vn->vn_size <= rfree + lfree + ih_size) {
707 set_parameters(tb, 0, -1, -1, -1, NULL, -1, -1);
708 PROC_INFO_INC(tb->tb_sb, leaves_removable);
709 return 1;
710 }
711 return 0;
684 712
713}
685 714
686/* when we do not split item, lnum and rnum are numbers of entire items */ 715/* when we do not split item, lnum and rnum are numbers of entire items */
687#define SET_PAR_SHIFT_LEFT \ 716#define SET_PAR_SHIFT_LEFT \
@@ -704,7 +733,6 @@ else \
704 -1, -1);\ 733 -1, -1);\
705} 734}
706 735
707
708#define SET_PAR_SHIFT_RIGHT \ 736#define SET_PAR_SHIFT_RIGHT \
709if (h)\ 737if (h)\
710{\ 738{\
@@ -724,214 +752,199 @@ else \
724 -1, -1);\ 752 -1, -1);\
725} 753}
726 754
727 755static void free_buffers_in_tb(struct tree_balance *p_s_tb)
728static void free_buffers_in_tb ( 756{
729 struct tree_balance * p_s_tb 757 int n_counter;
730 ) { 758
731 int n_counter; 759 decrement_counters_in_path(p_s_tb->tb_path);
732 760
733 decrement_counters_in_path(p_s_tb->tb_path); 761 for (n_counter = 0; n_counter < MAX_HEIGHT; n_counter++) {
734 762 decrement_bcount(p_s_tb->L[n_counter]);
735 for ( n_counter = 0; n_counter < MAX_HEIGHT; n_counter++ ) { 763 p_s_tb->L[n_counter] = NULL;
736 decrement_bcount(p_s_tb->L[n_counter]); 764 decrement_bcount(p_s_tb->R[n_counter]);
737 p_s_tb->L[n_counter] = NULL; 765 p_s_tb->R[n_counter] = NULL;
738 decrement_bcount(p_s_tb->R[n_counter]); 766 decrement_bcount(p_s_tb->FL[n_counter]);
739 p_s_tb->R[n_counter] = NULL; 767 p_s_tb->FL[n_counter] = NULL;
740 decrement_bcount(p_s_tb->FL[n_counter]); 768 decrement_bcount(p_s_tb->FR[n_counter]);
741 p_s_tb->FL[n_counter] = NULL; 769 p_s_tb->FR[n_counter] = NULL;
742 decrement_bcount(p_s_tb->FR[n_counter]); 770 decrement_bcount(p_s_tb->CFL[n_counter]);
743 p_s_tb->FR[n_counter] = NULL; 771 p_s_tb->CFL[n_counter] = NULL;
744 decrement_bcount(p_s_tb->CFL[n_counter]); 772 decrement_bcount(p_s_tb->CFR[n_counter]);
745 p_s_tb->CFL[n_counter] = NULL; 773 p_s_tb->CFR[n_counter] = NULL;
746 decrement_bcount(p_s_tb->CFR[n_counter]); 774 }
747 p_s_tb->CFR[n_counter] = NULL;
748 }
749} 775}
750 776
751
752/* Get new buffers for storing new nodes that are created while balancing. 777/* Get new buffers for storing new nodes that are created while balancing.
753 * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked; 778 * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked;
754 * CARRY_ON - schedule didn't occur while the function worked; 779 * CARRY_ON - schedule didn't occur while the function worked;
755 * NO_DISK_SPACE - no disk space. 780 * NO_DISK_SPACE - no disk space.
756 */ 781 */
757/* The function is NOT SCHEDULE-SAFE! */ 782/* The function is NOT SCHEDULE-SAFE! */
758static int get_empty_nodes( 783static int get_empty_nodes(struct tree_balance *p_s_tb, int n_h)
759 struct tree_balance * p_s_tb, 784{
760 int n_h 785 struct buffer_head *p_s_new_bh,
761 ) { 786 *p_s_Sh = PATH_H_PBUFFER(p_s_tb->tb_path, n_h);
762 struct buffer_head * p_s_new_bh, 787 b_blocknr_t *p_n_blocknr, a_n_blocknrs[MAX_AMOUNT_NEEDED] = { 0, };
763 * p_s_Sh = PATH_H_PBUFFER (p_s_tb->tb_path, n_h); 788 int n_counter, n_number_of_freeblk, n_amount_needed, /* number of needed empty blocks */
764 b_blocknr_t * p_n_blocknr, 789 n_retval = CARRY_ON;
765 a_n_blocknrs[MAX_AMOUNT_NEEDED] = {0, }; 790 struct super_block *p_s_sb = p_s_tb->tb_sb;
766 int n_counter, 791
767 n_number_of_freeblk, 792 /* number_of_freeblk is the number of empty blocks which have been
768 n_amount_needed,/* number of needed empty blocks */ 793 acquired for use by the balancing algorithm minus the number of
769 n_retval = CARRY_ON; 794 empty blocks used in the previous levels of the analysis,
770 struct super_block * p_s_sb = p_s_tb->tb_sb; 795 number_of_freeblk = tb->cur_blknum can be non-zero if a schedule occurs
771 796 after empty blocks are acquired, and the balancing analysis is
772 797 then restarted, amount_needed is the number needed by this level
773 /* number_of_freeblk is the number of empty blocks which have been 798 (n_h) of the balancing analysis.
774 acquired for use by the balancing algorithm minus the number of 799
775 empty blocks used in the previous levels of the analysis, 800 Note that for systems with many processes writing, it would be
776 number_of_freeblk = tb->cur_blknum can be non-zero if a schedule occurs 801 more layout optimal to calculate the total number needed by all
777 after empty blocks are acquired, and the balancing analysis is 802 levels and then to run reiserfs_new_blocks to get all of them at once. */
778 then restarted, amount_needed is the number needed by this level 803
779 (n_h) of the balancing analysis. 804 /* Initiate number_of_freeblk to the amount acquired prior to the restart of
780 805 the analysis or 0 if not restarted, then subtract the amount needed
781 Note that for systems with many processes writing, it would be 806 by all of the levels of the tree below n_h. */
782 more layout optimal to calculate the total number needed by all 807 /* blknum includes S[n_h], so we subtract 1 in this calculation */
783 levels and then to run reiserfs_new_blocks to get all of them at once. */ 808 for (n_counter = 0, n_number_of_freeblk = p_s_tb->cur_blknum;
784 809 n_counter < n_h; n_counter++)
785 /* Initiate number_of_freeblk to the amount acquired prior to the restart of 810 n_number_of_freeblk -=
786 the analysis or 0 if not restarted, then subtract the amount needed 811 (p_s_tb->blknum[n_counter]) ? (p_s_tb->blknum[n_counter] -
787 by all of the levels of the tree below n_h. */ 812 1) : 0;
788 /* blknum includes S[n_h], so we subtract 1 in this calculation */ 813
789 for ( n_counter = 0, n_number_of_freeblk = p_s_tb->cur_blknum; n_counter < n_h; n_counter++ ) 814 /* Allocate missing empty blocks. */
790 n_number_of_freeblk -= ( p_s_tb->blknum[n_counter] ) ? (p_s_tb->blknum[n_counter] - 1) : 0; 815 /* if p_s_Sh == 0 then we are getting a new root */
791 816 n_amount_needed = (p_s_Sh) ? (p_s_tb->blknum[n_h] - 1) : 1;
792 /* Allocate missing empty blocks. */ 817 /* Amount_needed = the amount that we need more than the amount that we have. */
793 /* if p_s_Sh == 0 then we are getting a new root */ 818 if (n_amount_needed > n_number_of_freeblk)
794 n_amount_needed = ( p_s_Sh ) ? (p_s_tb->blknum[n_h] - 1) : 1; 819 n_amount_needed -= n_number_of_freeblk;
795 /* Amount_needed = the amount that we need more than the amount that we have. */ 820 else /* If we have enough already then there is nothing to do. */
796 if ( n_amount_needed > n_number_of_freeblk ) 821 return CARRY_ON;
797 n_amount_needed -= n_number_of_freeblk; 822
798 else /* If we have enough already then there is nothing to do. */ 823 /* No need to check quota - is not allocated for blocks used for formatted nodes */
799 return CARRY_ON; 824 if (reiserfs_new_form_blocknrs(p_s_tb, a_n_blocknrs,
800 825 n_amount_needed) == NO_DISK_SPACE)
801 /* No need to check quota - is not allocated for blocks used for formatted nodes */ 826 return NO_DISK_SPACE;
802 if (reiserfs_new_form_blocknrs (p_s_tb, a_n_blocknrs, 827
803 n_amount_needed) == NO_DISK_SPACE) 828 /* for each blocknumber we just got, get a buffer and stick it on FEB */
804 return NO_DISK_SPACE; 829 for (p_n_blocknr = a_n_blocknrs, n_counter = 0;
805 830 n_counter < n_amount_needed; p_n_blocknr++, n_counter++) {
806 /* for each blocknumber we just got, get a buffer and stick it on FEB */ 831
807 for ( p_n_blocknr = a_n_blocknrs, n_counter = 0; n_counter < n_amount_needed; 832 RFALSE(!*p_n_blocknr,
808 p_n_blocknr++, n_counter++ ) { 833 "PAP-8135: reiserfs_new_blocknrs failed when got new blocks");
809 834
810 RFALSE( ! *p_n_blocknr, 835 p_s_new_bh = sb_getblk(p_s_sb, *p_n_blocknr);
811 "PAP-8135: reiserfs_new_blocknrs failed when got new blocks"); 836 RFALSE(buffer_dirty(p_s_new_bh) ||
812 837 buffer_journaled(p_s_new_bh) ||
813 p_s_new_bh = sb_getblk(p_s_sb, *p_n_blocknr); 838 buffer_journal_dirty(p_s_new_bh),
814 RFALSE (buffer_dirty (p_s_new_bh) || 839 "PAP-8140: journlaled or dirty buffer %b for the new block",
815 buffer_journaled (p_s_new_bh) || 840 p_s_new_bh);
816 buffer_journal_dirty (p_s_new_bh), 841
817 "PAP-8140: journlaled or dirty buffer %b for the new block", 842 /* Put empty buffers into the array. */
818 p_s_new_bh); 843 RFALSE(p_s_tb->FEB[p_s_tb->cur_blknum],
819 844 "PAP-8141: busy slot for new buffer");
820 /* Put empty buffers into the array. */ 845
821 RFALSE (p_s_tb->FEB[p_s_tb->cur_blknum], 846 set_buffer_journal_new(p_s_new_bh);
822 "PAP-8141: busy slot for new buffer"); 847 p_s_tb->FEB[p_s_tb->cur_blknum++] = p_s_new_bh;
823 848 }
824 set_buffer_journal_new (p_s_new_bh); 849
825 p_s_tb->FEB[p_s_tb->cur_blknum++] = p_s_new_bh; 850 if (n_retval == CARRY_ON && FILESYSTEM_CHANGED_TB(p_s_tb))
826 } 851 n_retval = REPEAT_SEARCH;
827
828 if ( n_retval == CARRY_ON && FILESYSTEM_CHANGED_TB (p_s_tb) )
829 n_retval = REPEAT_SEARCH ;
830
831 return n_retval;
832}
833 852
853 return n_retval;
854}
834 855
835/* Get free space of the left neighbor, which is stored in the parent 856/* Get free space of the left neighbor, which is stored in the parent
836 * node of the left neighbor. */ 857 * node of the left neighbor. */
837static int get_lfree (struct tree_balance * tb, int h) 858static int get_lfree(struct tree_balance *tb, int h)
838{ 859{
839 struct buffer_head * l, * f; 860 struct buffer_head *l, *f;
840 int order; 861 int order;
841 862
842 if ((f = PATH_H_PPARENT (tb->tb_path, h)) == 0 || (l = tb->FL[h]) == 0) 863 if ((f = PATH_H_PPARENT(tb->tb_path, h)) == 0 || (l = tb->FL[h]) == 0)
843 return 0; 864 return 0;
844 865
845 if (f == l) 866 if (f == l)
846 order = PATH_H_B_ITEM_ORDER (tb->tb_path, h) - 1; 867 order = PATH_H_B_ITEM_ORDER(tb->tb_path, h) - 1;
847 else { 868 else {
848 order = B_NR_ITEMS (l); 869 order = B_NR_ITEMS(l);
849 f = l; 870 f = l;
850 } 871 }
851 872
852 return (MAX_CHILD_SIZE(f) - dc_size(B_N_CHILD(f,order))); 873 return (MAX_CHILD_SIZE(f) - dc_size(B_N_CHILD(f, order)));
853} 874}
854 875
855
856/* Get free space of the right neighbor, 876/* Get free space of the right neighbor,
857 * which is stored in the parent node of the right neighbor. 877 * which is stored in the parent node of the right neighbor.
858 */ 878 */
859static int get_rfree (struct tree_balance * tb, int h) 879static int get_rfree(struct tree_balance *tb, int h)
860{ 880{
861 struct buffer_head * r, * f; 881 struct buffer_head *r, *f;
862 int order; 882 int order;
863 883
864 if ((f = PATH_H_PPARENT (tb->tb_path, h)) == 0 || (r = tb->FR[h]) == 0) 884 if ((f = PATH_H_PPARENT(tb->tb_path, h)) == 0 || (r = tb->FR[h]) == 0)
865 return 0; 885 return 0;
866 886
867 if (f == r) 887 if (f == r)
868 order = PATH_H_B_ITEM_ORDER (tb->tb_path, h) + 1; 888 order = PATH_H_B_ITEM_ORDER(tb->tb_path, h) + 1;
869 else { 889 else {
870 order = 0; 890 order = 0;
871 f = r; 891 f = r;
872 } 892 }
873 893
874 return (MAX_CHILD_SIZE(f) - dc_size( B_N_CHILD(f,order))); 894 return (MAX_CHILD_SIZE(f) - dc_size(B_N_CHILD(f, order)));
875 895
876} 896}
877 897
878
879/* Check whether left neighbor is in memory. */ 898/* Check whether left neighbor is in memory. */
880static int is_left_neighbor_in_cache( 899static int is_left_neighbor_in_cache(struct tree_balance *p_s_tb, int n_h)
881 struct tree_balance * p_s_tb, 900{
882 int n_h 901 struct buffer_head *p_s_father, *left;
883 ) { 902 struct super_block *p_s_sb = p_s_tb->tb_sb;
884 struct buffer_head * p_s_father, * left; 903 b_blocknr_t n_left_neighbor_blocknr;
885 struct super_block * p_s_sb = p_s_tb->tb_sb; 904 int n_left_neighbor_position;
886 b_blocknr_t n_left_neighbor_blocknr; 905
887 int n_left_neighbor_position; 906 if (!p_s_tb->FL[n_h]) /* Father of the left neighbor does not exist. */
888 907 return 0;
889 if ( ! p_s_tb->FL[n_h] ) /* Father of the left neighbor does not exist. */ 908
890 return 0; 909 /* Calculate father of the node to be balanced. */
891 910 p_s_father = PATH_H_PBUFFER(p_s_tb->tb_path, n_h + 1);
892 /* Calculate father of the node to be balanced. */ 911
893 p_s_father = PATH_H_PBUFFER(p_s_tb->tb_path, n_h + 1); 912 RFALSE(!p_s_father ||
894 913 !B_IS_IN_TREE(p_s_father) ||
895 RFALSE( ! p_s_father || 914 !B_IS_IN_TREE(p_s_tb->FL[n_h]) ||
896 ! B_IS_IN_TREE (p_s_father) || 915 !buffer_uptodate(p_s_father) ||
897 ! B_IS_IN_TREE (p_s_tb->FL[n_h]) || 916 !buffer_uptodate(p_s_tb->FL[n_h]),
898 ! buffer_uptodate (p_s_father) || 917 "vs-8165: F[h] (%b) or FL[h] (%b) is invalid",
899 ! buffer_uptodate (p_s_tb->FL[n_h]), 918 p_s_father, p_s_tb->FL[n_h]);
900 "vs-8165: F[h] (%b) or FL[h] (%b) is invalid", 919
901 p_s_father, p_s_tb->FL[n_h]); 920 /* Get position of the pointer to the left neighbor into the left father. */
902 921 n_left_neighbor_position = (p_s_father == p_s_tb->FL[n_h]) ?
903 922 p_s_tb->lkey[n_h] : B_NR_ITEMS(p_s_tb->FL[n_h]);
904 /* Get position of the pointer to the left neighbor into the left father. */ 923 /* Get left neighbor block number. */
905 n_left_neighbor_position = ( p_s_father == p_s_tb->FL[n_h] ) ? 924 n_left_neighbor_blocknr =
906 p_s_tb->lkey[n_h] : B_NR_ITEMS (p_s_tb->FL[n_h]); 925 B_N_CHILD_NUM(p_s_tb->FL[n_h], n_left_neighbor_position);
907 /* Get left neighbor block number. */ 926 /* Look for the left neighbor in the cache. */
908 n_left_neighbor_blocknr = B_N_CHILD_NUM(p_s_tb->FL[n_h], n_left_neighbor_position); 927 if ((left = sb_find_get_block(p_s_sb, n_left_neighbor_blocknr))) {
909 /* Look for the left neighbor in the cache. */ 928
910 if ( (left = sb_find_get_block(p_s_sb, n_left_neighbor_blocknr)) ) { 929 RFALSE(buffer_uptodate(left) && !B_IS_IN_TREE(left),
911 930 "vs-8170: left neighbor (%b %z) is not in the tree",
912 RFALSE( buffer_uptodate (left) && ! B_IS_IN_TREE(left), 931 left, left);
913 "vs-8170: left neighbor (%b %z) is not in the tree", left, left); 932 put_bh(left);
914 put_bh(left) ; 933 return 1;
915 return 1; 934 }
916 }
917
918 return 0;
919}
920 935
936 return 0;
937}
921 938
922#define LEFT_PARENTS 'l' 939#define LEFT_PARENTS 'l'
923#define RIGHT_PARENTS 'r' 940#define RIGHT_PARENTS 'r'
924 941
925 942static void decrement_key(struct cpu_key *p_s_key)
926static void decrement_key (struct cpu_key * p_s_key)
927{ 943{
928 // call item specific function for this key 944 // call item specific function for this key
929 item_ops[cpu_key_k_type (p_s_key)]->decrement_key (p_s_key); 945 item_ops[cpu_key_k_type(p_s_key)]->decrement_key(p_s_key);
930} 946}
931 947
932
933
934
935/* Calculate far left/right parent of the left/right neighbor of the current node, that 948/* Calculate far left/right parent of the left/right neighbor of the current node, that
936 * is calculate the left/right (FL[h]/FR[h]) neighbor of the parent F[h]. 949 * is calculate the left/right (FL[h]/FR[h]) neighbor of the parent F[h].
937 * Calculate left/right common parent of the current node and L[h]/R[h]. 950 * Calculate left/right common parent of the current node and L[h]/R[h].
@@ -940,111 +953,121 @@ static void decrement_key (struct cpu_key * p_s_key)
940 SCHEDULE_OCCURRED - schedule occurred while the function worked; 953 SCHEDULE_OCCURRED - schedule occurred while the function worked;
941 * CARRY_ON - schedule didn't occur while the function worked; 954 * CARRY_ON - schedule didn't occur while the function worked;
942 */ 955 */
943static int get_far_parent (struct tree_balance * p_s_tb, 956static int get_far_parent(struct tree_balance *p_s_tb,
944 int n_h, 957 int n_h,
945 struct buffer_head ** pp_s_father, 958 struct buffer_head **pp_s_father,
946 struct buffer_head ** pp_s_com_father, 959 struct buffer_head **pp_s_com_father, char c_lr_par)
947 char c_lr_par)
948{ 960{
949 struct buffer_head * p_s_parent; 961 struct buffer_head *p_s_parent;
950 INITIALIZE_PATH (s_path_to_neighbor_father); 962 INITIALIZE_PATH(s_path_to_neighbor_father);
951 struct path * p_s_path = p_s_tb->tb_path; 963 struct path *p_s_path = p_s_tb->tb_path;
952 struct cpu_key s_lr_father_key; 964 struct cpu_key s_lr_father_key;
953 int n_counter, 965 int n_counter,
954 n_position = INT_MAX, 966 n_position = INT_MAX,
955 n_first_last_position = 0, 967 n_first_last_position = 0,
956 n_path_offset = PATH_H_PATH_OFFSET(p_s_path, n_h); 968 n_path_offset = PATH_H_PATH_OFFSET(p_s_path, n_h);
957 969
958 /* Starting from F[n_h] go upwards in the tree, and look for the common 970 /* Starting from F[n_h] go upwards in the tree, and look for the common
959 ancestor of F[n_h], and its neighbor l/r, that should be obtained. */ 971 ancestor of F[n_h], and its neighbor l/r, that should be obtained. */
960 972
961 n_counter = n_path_offset; 973 n_counter = n_path_offset;
962 974
963 RFALSE( n_counter < FIRST_PATH_ELEMENT_OFFSET, 975 RFALSE(n_counter < FIRST_PATH_ELEMENT_OFFSET,
964 "PAP-8180: invalid path length"); 976 "PAP-8180: invalid path length");
965 977
966 978 for (; n_counter > FIRST_PATH_ELEMENT_OFFSET; n_counter--) {
967 for ( ; n_counter > FIRST_PATH_ELEMENT_OFFSET; n_counter-- ) { 979 /* Check whether parent of the current buffer in the path is really parent in the tree. */
968 /* Check whether parent of the current buffer in the path is really parent in the tree. */ 980 if (!B_IS_IN_TREE
969 if ( ! B_IS_IN_TREE(p_s_parent = PATH_OFFSET_PBUFFER(p_s_path, n_counter - 1)) ) 981 (p_s_parent = PATH_OFFSET_PBUFFER(p_s_path, n_counter - 1)))
970 return REPEAT_SEARCH; 982 return REPEAT_SEARCH;
971 /* Check whether position in the parent is correct. */ 983 /* Check whether position in the parent is correct. */
972 if ( (n_position = PATH_OFFSET_POSITION(p_s_path, n_counter - 1)) > B_NR_ITEMS(p_s_parent) ) 984 if ((n_position =
973 return REPEAT_SEARCH; 985 PATH_OFFSET_POSITION(p_s_path,
974 /* Check whether parent at the path really points to the child. */ 986 n_counter - 1)) >
975 if ( B_N_CHILD_NUM(p_s_parent, n_position) != 987 B_NR_ITEMS(p_s_parent))
976 PATH_OFFSET_PBUFFER(p_s_path, n_counter)->b_blocknr ) 988 return REPEAT_SEARCH;
977 return REPEAT_SEARCH; 989 /* Check whether parent at the path really points to the child. */
978 /* Return delimiting key if position in the parent is not equal to first/last one. */ 990 if (B_N_CHILD_NUM(p_s_parent, n_position) !=
979 if ( c_lr_par == RIGHT_PARENTS ) 991 PATH_OFFSET_PBUFFER(p_s_path, n_counter)->b_blocknr)
980 n_first_last_position = B_NR_ITEMS (p_s_parent); 992 return REPEAT_SEARCH;
981 if ( n_position != n_first_last_position ) { 993 /* Return delimiting key if position in the parent is not equal to first/last one. */
982 *pp_s_com_father = p_s_parent; 994 if (c_lr_par == RIGHT_PARENTS)
983 get_bh(*pp_s_com_father) ; 995 n_first_last_position = B_NR_ITEMS(p_s_parent);
984 /*(*pp_s_com_father = p_s_parent)->b_count++;*/ 996 if (n_position != n_first_last_position) {
985 break; 997 *pp_s_com_father = p_s_parent;
998 get_bh(*pp_s_com_father);
999 /*(*pp_s_com_father = p_s_parent)->b_count++; */
1000 break;
1001 }
986 } 1002 }
987 } 1003
988 1004 /* if we are in the root of the tree, then there is no common father */
989 /* if we are in the root of the tree, then there is no common father */ 1005 if (n_counter == FIRST_PATH_ELEMENT_OFFSET) {
990 if ( n_counter == FIRST_PATH_ELEMENT_OFFSET ) { 1006 /* Check whether first buffer in the path is the root of the tree. */
991 /* Check whether first buffer in the path is the root of the tree. */ 1007 if (PATH_OFFSET_PBUFFER
992 if ( PATH_OFFSET_PBUFFER(p_s_tb->tb_path, FIRST_PATH_ELEMENT_OFFSET)->b_blocknr == 1008 (p_s_tb->tb_path,
993 SB_ROOT_BLOCK (p_s_tb->tb_sb) ) { 1009 FIRST_PATH_ELEMENT_OFFSET)->b_blocknr ==
994 *pp_s_father = *pp_s_com_father = NULL; 1010 SB_ROOT_BLOCK(p_s_tb->tb_sb)) {
995 return CARRY_ON; 1011 *pp_s_father = *pp_s_com_father = NULL;
1012 return CARRY_ON;
1013 }
1014 return REPEAT_SEARCH;
996 } 1015 }
997 return REPEAT_SEARCH;
998 }
999 1016
1000 RFALSE( B_LEVEL (*pp_s_com_father) <= DISK_LEAF_NODE_LEVEL, 1017 RFALSE(B_LEVEL(*pp_s_com_father) <= DISK_LEAF_NODE_LEVEL,
1001 "PAP-8185: (%b %z) level too small", 1018 "PAP-8185: (%b %z) level too small",
1002 *pp_s_com_father, *pp_s_com_father); 1019 *pp_s_com_father, *pp_s_com_father);
1003 1020
1004 /* Check whether the common parent is locked. */ 1021 /* Check whether the common parent is locked. */
1005 1022
1006 if ( buffer_locked (*pp_s_com_father) ) { 1023 if (buffer_locked(*pp_s_com_father)) {
1007 __wait_on_buffer(*pp_s_com_father); 1024 __wait_on_buffer(*pp_s_com_father);
1008 if ( FILESYSTEM_CHANGED_TB (p_s_tb) ) { 1025 if (FILESYSTEM_CHANGED_TB(p_s_tb)) {
1009 decrement_bcount(*pp_s_com_father); 1026 decrement_bcount(*pp_s_com_father);
1010 return REPEAT_SEARCH; 1027 return REPEAT_SEARCH;
1028 }
1011 } 1029 }
1012 }
1013
1014 /* So, we got common parent of the current node and its left/right neighbor.
1015 Now we are geting the parent of the left/right neighbor. */
1016 1030
1017 /* Form key to get parent of the left/right neighbor. */ 1031 /* So, we got common parent of the current node and its left/right neighbor.
1018 le_key2cpu_key (&s_lr_father_key, B_N_PDELIM_KEY(*pp_s_com_father, ( c_lr_par == LEFT_PARENTS ) ? 1032 Now we are geting the parent of the left/right neighbor. */
1019 (p_s_tb->lkey[n_h - 1] = n_position - 1) : (p_s_tb->rkey[n_h - 1] = n_position)));
1020 1033
1034 /* Form key to get parent of the left/right neighbor. */
1035 le_key2cpu_key(&s_lr_father_key,
1036 B_N_PDELIM_KEY(*pp_s_com_father,
1037 (c_lr_par ==
1038 LEFT_PARENTS) ? (p_s_tb->lkey[n_h - 1] =
1039 n_position -
1040 1) : (p_s_tb->rkey[n_h -
1041 1] =
1042 n_position)));
1021 1043
1022 if ( c_lr_par == LEFT_PARENTS ) 1044 if (c_lr_par == LEFT_PARENTS)
1023 decrement_key(&s_lr_father_key); 1045 decrement_key(&s_lr_father_key);
1024 1046
1025 if (search_by_key(p_s_tb->tb_sb, &s_lr_father_key, &s_path_to_neighbor_father, n_h + 1) == IO_ERROR) 1047 if (search_by_key
1026 // path is released 1048 (p_s_tb->tb_sb, &s_lr_father_key, &s_path_to_neighbor_father,
1027 return IO_ERROR; 1049 n_h + 1) == IO_ERROR)
1050 // path is released
1051 return IO_ERROR;
1028 1052
1029 if ( FILESYSTEM_CHANGED_TB (p_s_tb) ) { 1053 if (FILESYSTEM_CHANGED_TB(p_s_tb)) {
1030 decrement_counters_in_path(&s_path_to_neighbor_father); 1054 decrement_counters_in_path(&s_path_to_neighbor_father);
1031 decrement_bcount(*pp_s_com_father); 1055 decrement_bcount(*pp_s_com_father);
1032 return REPEAT_SEARCH; 1056 return REPEAT_SEARCH;
1033 } 1057 }
1034 1058
1035 *pp_s_father = PATH_PLAST_BUFFER(&s_path_to_neighbor_father); 1059 *pp_s_father = PATH_PLAST_BUFFER(&s_path_to_neighbor_father);
1036 1060
1037 RFALSE( B_LEVEL (*pp_s_father) != n_h + 1, 1061 RFALSE(B_LEVEL(*pp_s_father) != n_h + 1,
1038 "PAP-8190: (%b %z) level too small", *pp_s_father, *pp_s_father); 1062 "PAP-8190: (%b %z) level too small", *pp_s_father, *pp_s_father);
1039 RFALSE( s_path_to_neighbor_father.path_length < FIRST_PATH_ELEMENT_OFFSET, 1063 RFALSE(s_path_to_neighbor_father.path_length <
1040 "PAP-8192: path length is too small"); 1064 FIRST_PATH_ELEMENT_OFFSET, "PAP-8192: path length is too small");
1041 1065
1042 s_path_to_neighbor_father.path_length--; 1066 s_path_to_neighbor_father.path_length--;
1043 decrement_counters_in_path(&s_path_to_neighbor_father); 1067 decrement_counters_in_path(&s_path_to_neighbor_father);
1044 return CARRY_ON; 1068 return CARRY_ON;
1045} 1069}
1046 1070
1047
1048/* Get parents of neighbors of node in the path(S[n_path_offset]) and common parents of 1071/* Get parents of neighbors of node in the path(S[n_path_offset]) and common parents of
1049 * S[n_path_offset] and L[n_path_offset]/R[n_path_offset]: F[n_path_offset], FL[n_path_offset], 1072 * S[n_path_offset] and L[n_path_offset]/R[n_path_offset]: F[n_path_offset], FL[n_path_offset],
1050 * FR[n_path_offset], CFL[n_path_offset], CFR[n_path_offset]. 1073 * FR[n_path_offset], CFL[n_path_offset], CFR[n_path_offset].
@@ -1052,122 +1075,127 @@ static int get_far_parent (struct tree_balance * p_s_tb,
1052 * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked; 1075 * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked;
1053 * CARRY_ON - schedule didn't occur while the function worked; 1076 * CARRY_ON - schedule didn't occur while the function worked;
1054 */ 1077 */
1055static int get_parents (struct tree_balance * p_s_tb, int n_h) 1078static int get_parents(struct tree_balance *p_s_tb, int n_h)
1056{ 1079{
1057 struct path * p_s_path = p_s_tb->tb_path; 1080 struct path *p_s_path = p_s_tb->tb_path;
1058 int n_position, 1081 int n_position,
1059 n_ret_value, 1082 n_ret_value,
1060 n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h); 1083 n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h);
1061 struct buffer_head * p_s_curf, 1084 struct buffer_head *p_s_curf, *p_s_curcf;
1062 * p_s_curcf; 1085
1063 1086 /* Current node is the root of the tree or will be root of the tree */
1064 /* Current node is the root of the tree or will be root of the tree */ 1087 if (n_path_offset <= FIRST_PATH_ELEMENT_OFFSET) {
1065 if ( n_path_offset <= FIRST_PATH_ELEMENT_OFFSET ) { 1088 /* The root can not have parents.
1066 /* The root can not have parents. 1089 Release nodes which previously were obtained as parents of the current node neighbors. */
1067 Release nodes which previously were obtained as parents of the current node neighbors. */ 1090 decrement_bcount(p_s_tb->FL[n_h]);
1091 decrement_bcount(p_s_tb->CFL[n_h]);
1092 decrement_bcount(p_s_tb->FR[n_h]);
1093 decrement_bcount(p_s_tb->CFR[n_h]);
1094 p_s_tb->FL[n_h] = p_s_tb->CFL[n_h] = p_s_tb->FR[n_h] =
1095 p_s_tb->CFR[n_h] = NULL;
1096 return CARRY_ON;
1097 }
1098
1099 /* Get parent FL[n_path_offset] of L[n_path_offset]. */
1100 if ((n_position = PATH_OFFSET_POSITION(p_s_path, n_path_offset - 1))) {
1101 /* Current node is not the first child of its parent. */
1102 /*(p_s_curf = p_s_curcf = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1))->b_count += 2; */
1103 p_s_curf = p_s_curcf =
1104 PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1);
1105 get_bh(p_s_curf);
1106 get_bh(p_s_curf);
1107 p_s_tb->lkey[n_h] = n_position - 1;
1108 } else {
1109 /* Calculate current parent of L[n_path_offset], which is the left neighbor of the current node.
1110 Calculate current common parent of L[n_path_offset] and the current node. Note that
1111 CFL[n_path_offset] not equal FL[n_path_offset] and CFL[n_path_offset] not equal F[n_path_offset].
1112 Calculate lkey[n_path_offset]. */
1113 if ((n_ret_value = get_far_parent(p_s_tb, n_h + 1, &p_s_curf,
1114 &p_s_curcf,
1115 LEFT_PARENTS)) != CARRY_ON)
1116 return n_ret_value;
1117 }
1118
1068 decrement_bcount(p_s_tb->FL[n_h]); 1119 decrement_bcount(p_s_tb->FL[n_h]);
1120 p_s_tb->FL[n_h] = p_s_curf; /* New initialization of FL[n_h]. */
1069 decrement_bcount(p_s_tb->CFL[n_h]); 1121 decrement_bcount(p_s_tb->CFL[n_h]);
1070 decrement_bcount(p_s_tb->FR[n_h]); 1122 p_s_tb->CFL[n_h] = p_s_curcf; /* New initialization of CFL[n_h]. */
1071 decrement_bcount(p_s_tb->CFR[n_h]); 1123
1072 p_s_tb->FL[n_h] = p_s_tb->CFL[n_h] = p_s_tb->FR[n_h] = p_s_tb->CFR[n_h] = NULL; 1124 RFALSE((p_s_curf && !B_IS_IN_TREE(p_s_curf)) ||
1073 return CARRY_ON; 1125 (p_s_curcf && !B_IS_IN_TREE(p_s_curcf)),
1074 } 1126 "PAP-8195: FL (%b) or CFL (%b) is invalid", p_s_curf, p_s_curcf);
1075
1076 /* Get parent FL[n_path_offset] of L[n_path_offset]. */
1077 if ( (n_position = PATH_OFFSET_POSITION(p_s_path, n_path_offset - 1)) ) {
1078 /* Current node is not the first child of its parent. */
1079 /*(p_s_curf = p_s_curcf = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1))->b_count += 2;*/
1080 p_s_curf = p_s_curcf = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1);
1081 get_bh(p_s_curf) ;
1082 get_bh(p_s_curf) ;
1083 p_s_tb->lkey[n_h] = n_position - 1;
1084 }
1085 else {
1086 /* Calculate current parent of L[n_path_offset], which is the left neighbor of the current node.
1087 Calculate current common parent of L[n_path_offset] and the current node. Note that
1088 CFL[n_path_offset] not equal FL[n_path_offset] and CFL[n_path_offset] not equal F[n_path_offset].
1089 Calculate lkey[n_path_offset]. */
1090 if ( (n_ret_value = get_far_parent(p_s_tb, n_h + 1, &p_s_curf,
1091 &p_s_curcf, LEFT_PARENTS)) != CARRY_ON )
1092 return n_ret_value;
1093 }
1094
1095 decrement_bcount(p_s_tb->FL[n_h]);
1096 p_s_tb->FL[n_h] = p_s_curf; /* New initialization of FL[n_h]. */
1097 decrement_bcount(p_s_tb->CFL[n_h]);
1098 p_s_tb->CFL[n_h] = p_s_curcf; /* New initialization of CFL[n_h]. */
1099
1100 RFALSE( (p_s_curf && !B_IS_IN_TREE (p_s_curf)) ||
1101 (p_s_curcf && !B_IS_IN_TREE (p_s_curcf)),
1102 "PAP-8195: FL (%b) or CFL (%b) is invalid", p_s_curf, p_s_curcf);
1103 1127
1104/* Get parent FR[n_h] of R[n_h]. */ 1128/* Get parent FR[n_h] of R[n_h]. */
1105 1129
1106/* Current node is the last child of F[n_h]. FR[n_h] != F[n_h]. */ 1130/* Current node is the last child of F[n_h]. FR[n_h] != F[n_h]. */
1107 if ( n_position == B_NR_ITEMS (PATH_H_PBUFFER(p_s_path, n_h + 1)) ) { 1131 if (n_position == B_NR_ITEMS(PATH_H_PBUFFER(p_s_path, n_h + 1))) {
1108/* Calculate current parent of R[n_h], which is the right neighbor of F[n_h]. 1132/* Calculate current parent of R[n_h], which is the right neighbor of F[n_h].
1109 Calculate current common parent of R[n_h] and current node. Note that CFR[n_h] 1133 Calculate current common parent of R[n_h] and current node. Note that CFR[n_h]
1110 not equal FR[n_path_offset] and CFR[n_h] not equal F[n_h]. */ 1134 not equal FR[n_path_offset] and CFR[n_h] not equal F[n_h]. */
1111 if ( (n_ret_value = get_far_parent(p_s_tb, n_h + 1, &p_s_curf, &p_s_curcf, RIGHT_PARENTS)) != CARRY_ON ) 1135 if ((n_ret_value =
1112 return n_ret_value; 1136 get_far_parent(p_s_tb, n_h + 1, &p_s_curf, &p_s_curcf,
1113 } 1137 RIGHT_PARENTS)) != CARRY_ON)
1114 else { 1138 return n_ret_value;
1139 } else {
1115/* Current node is not the last child of its parent F[n_h]. */ 1140/* Current node is not the last child of its parent F[n_h]. */
1116 /*(p_s_curf = p_s_curcf = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1))->b_count += 2;*/ 1141 /*(p_s_curf = p_s_curcf = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1))->b_count += 2; */
1117 p_s_curf = p_s_curcf = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1); 1142 p_s_curf = p_s_curcf =
1118 get_bh(p_s_curf) ; 1143 PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1);
1119 get_bh(p_s_curf) ; 1144 get_bh(p_s_curf);
1120 p_s_tb->rkey[n_h] = n_position; 1145 get_bh(p_s_curf);
1121 } 1146 p_s_tb->rkey[n_h] = n_position;
1122 1147 }
1123 decrement_bcount(p_s_tb->FR[n_h]);
1124 p_s_tb->FR[n_h] = p_s_curf; /* New initialization of FR[n_path_offset]. */
1125
1126 decrement_bcount(p_s_tb->CFR[n_h]);
1127 p_s_tb->CFR[n_h] = p_s_curcf; /* New initialization of CFR[n_path_offset]. */
1128
1129 RFALSE( (p_s_curf && !B_IS_IN_TREE (p_s_curf)) ||
1130 (p_s_curcf && !B_IS_IN_TREE (p_s_curcf)),
1131 "PAP-8205: FR (%b) or CFR (%b) is invalid", p_s_curf, p_s_curcf);
1132
1133 return CARRY_ON;
1134}
1135 1148
1149 decrement_bcount(p_s_tb->FR[n_h]);
1150 p_s_tb->FR[n_h] = p_s_curf; /* New initialization of FR[n_path_offset]. */
1151
1152 decrement_bcount(p_s_tb->CFR[n_h]);
1153 p_s_tb->CFR[n_h] = p_s_curcf; /* New initialization of CFR[n_path_offset]. */
1154
1155 RFALSE((p_s_curf && !B_IS_IN_TREE(p_s_curf)) ||
1156 (p_s_curcf && !B_IS_IN_TREE(p_s_curcf)),
1157 "PAP-8205: FR (%b) or CFR (%b) is invalid", p_s_curf, p_s_curcf);
1158
1159 return CARRY_ON;
1160}
1136 1161
1137/* it is possible to remove node as result of shiftings to 1162/* it is possible to remove node as result of shiftings to
1138 neighbors even when we insert or paste item. */ 1163 neighbors even when we insert or paste item. */
1139static inline int can_node_be_removed (int mode, int lfree, int sfree, int rfree, struct tree_balance * tb, int h) 1164static inline int can_node_be_removed(int mode, int lfree, int sfree, int rfree,
1165 struct tree_balance *tb, int h)
1140{ 1166{
1141 struct buffer_head * Sh = PATH_H_PBUFFER (tb->tb_path, h); 1167 struct buffer_head *Sh = PATH_H_PBUFFER(tb->tb_path, h);
1142 int levbytes = tb->insert_size[h]; 1168 int levbytes = tb->insert_size[h];
1143 struct item_head * ih; 1169 struct item_head *ih;
1144 struct reiserfs_key * r_key = NULL; 1170 struct reiserfs_key *r_key = NULL;
1145 1171
1146 ih = B_N_PITEM_HEAD (Sh, 0); 1172 ih = B_N_PITEM_HEAD(Sh, 0);
1147 if ( tb->CFR[h] ) 1173 if (tb->CFR[h])
1148 r_key = B_N_PDELIM_KEY(tb->CFR[h],tb->rkey[h]); 1174 r_key = B_N_PDELIM_KEY(tb->CFR[h], tb->rkey[h]);
1149 1175
1150 if ( 1176 if (lfree + rfree + sfree < MAX_CHILD_SIZE(Sh) + levbytes
1151 lfree + rfree + sfree < MAX_CHILD_SIZE(Sh) + levbytes 1177 /* shifting may merge items which might save space */
1152 /* shifting may merge items which might save space */ 1178 -
1153 - (( ! h && op_is_left_mergeable (&(ih->ih_key), Sh->b_size) ) ? IH_SIZE : 0) 1179 ((!h
1154 - (( ! h && r_key && op_is_left_mergeable (r_key, Sh->b_size) ) ? IH_SIZE : 0) 1180 && op_is_left_mergeable(&(ih->ih_key), Sh->b_size)) ? IH_SIZE : 0)
1155 + (( h ) ? KEY_SIZE : 0)) 1181 -
1156 { 1182 ((!h && r_key
1157 /* node can not be removed */ 1183 && op_is_left_mergeable(r_key, Sh->b_size)) ? IH_SIZE : 0)
1158 if (sfree >= levbytes ) { /* new item fits into node S[h] without any shifting */ 1184 + ((h) ? KEY_SIZE : 0)) {
1159 if ( ! h ) 1185 /* node can not be removed */
1160 tb->s0num = B_NR_ITEMS(Sh) + ((mode == M_INSERT ) ? 1 : 0); 1186 if (sfree >= levbytes) { /* new item fits into node S[h] without any shifting */
1161 set_parameters (tb, h, 0, 0, 1, NULL, -1, -1); 1187 if (!h)
1162 return NO_BALANCING_NEEDED; 1188 tb->s0num =
1189 B_NR_ITEMS(Sh) +
1190 ((mode == M_INSERT) ? 1 : 0);
1191 set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1192 return NO_BALANCING_NEEDED;
1193 }
1163 } 1194 }
1164 } 1195 PROC_INFO_INC(tb->tb_sb, can_node_be_removed[h]);
1165 PROC_INFO_INC( tb -> tb_sb, can_node_be_removed[ h ] ); 1196 return !NO_BALANCING_NEEDED;
1166 return !NO_BALANCING_NEEDED;
1167} 1197}
1168 1198
1169
1170
1171/* Check whether current node S[h] is balanced when increasing its size by 1199/* Check whether current node S[h] is balanced when increasing its size by
1172 * Inserting or Pasting. 1200 * Inserting or Pasting.
1173 * Calculate parameters for balancing for current level h. 1201 * Calculate parameters for balancing for current level h.
@@ -1182,154 +1210,157 @@ static inline int can_node_be_removed (int mode, int lfree, int sfree, int rfree
1182 * -2 - no disk space. 1210 * -2 - no disk space.
1183 */ 1211 */
1184/* ip means Inserting or Pasting */ 1212/* ip means Inserting or Pasting */
1185static int ip_check_balance (struct tree_balance * tb, int h) 1213static int ip_check_balance(struct tree_balance *tb, int h)
1186{ 1214{
1187 struct virtual_node * vn = tb->tb_vn; 1215 struct virtual_node *vn = tb->tb_vn;
1188 int levbytes, /* Number of bytes that must be inserted into (value 1216 int levbytes, /* Number of bytes that must be inserted into (value
1189 is negative if bytes are deleted) buffer which 1217 is negative if bytes are deleted) buffer which
1190 contains node being balanced. The mnemonic is 1218 contains node being balanced. The mnemonic is
1191 that the attempted change in node space used level 1219 that the attempted change in node space used level
1192 is levbytes bytes. */ 1220 is levbytes bytes. */
1193 n_ret_value; 1221 n_ret_value;
1194 1222
1195 int lfree, sfree, rfree /* free space in L, S and R */; 1223 int lfree, sfree, rfree /* free space in L, S and R */ ;
1196 1224
1197 /* nver is short for number of vertixes, and lnver is the number if 1225 /* nver is short for number of vertixes, and lnver is the number if
1198 we shift to the left, rnver is the number if we shift to the 1226 we shift to the left, rnver is the number if we shift to the
1199 right, and lrnver is the number if we shift in both directions. 1227 right, and lrnver is the number if we shift in both directions.
1200 The goal is to minimize first the number of vertixes, and second, 1228 The goal is to minimize first the number of vertixes, and second,
1201 the number of vertixes whose contents are changed by shifting, 1229 the number of vertixes whose contents are changed by shifting,
1202 and third the number of uncached vertixes whose contents are 1230 and third the number of uncached vertixes whose contents are
1203 changed by shifting and must be read from disk. */ 1231 changed by shifting and must be read from disk. */
1204 int nver, lnver, rnver, lrnver; 1232 int nver, lnver, rnver, lrnver;
1205 1233
1206 /* used at leaf level only, S0 = S[0] is the node being balanced, 1234 /* used at leaf level only, S0 = S[0] is the node being balanced,
1207 sInum [ I = 0,1,2 ] is the number of items that will 1235 sInum [ I = 0,1,2 ] is the number of items that will
1208 remain in node SI after balancing. S1 and S2 are new 1236 remain in node SI after balancing. S1 and S2 are new
1209 nodes that might be created. */ 1237 nodes that might be created. */
1210 1238
1211 /* we perform 8 calls to get_num_ver(). For each call we calculate five parameters. 1239 /* we perform 8 calls to get_num_ver(). For each call we calculate five parameters.
1212 where 4th parameter is s1bytes and 5th - s2bytes 1240 where 4th parameter is s1bytes and 5th - s2bytes
1213 */ 1241 */
1214 short snum012[40] = {0,}; /* s0num, s1num, s2num for 8 cases 1242 short snum012[40] = { 0, }; /* s0num, s1num, s2num for 8 cases
1215 0,1 - do not shift and do not shift but bottle 1243 0,1 - do not shift and do not shift but bottle
1216 2 - shift only whole item to left 1244 2 - shift only whole item to left
1217 3 - shift to left and bottle as much as possible 1245 3 - shift to left and bottle as much as possible
1218 4,5 - shift to right (whole items and as much as possible 1246 4,5 - shift to right (whole items and as much as possible
1219 6,7 - shift to both directions (whole items and as much as possible) 1247 6,7 - shift to both directions (whole items and as much as possible)
1220 */ 1248 */
1221 1249
1222 /* Sh is the node whose balance is currently being checked */ 1250 /* Sh is the node whose balance is currently being checked */
1223 struct buffer_head * Sh; 1251 struct buffer_head *Sh;
1224 1252
1225 Sh = PATH_H_PBUFFER (tb->tb_path, h); 1253 Sh = PATH_H_PBUFFER(tb->tb_path, h);
1226 levbytes = tb->insert_size[h]; 1254 levbytes = tb->insert_size[h];
1227 1255
1228 /* Calculate balance parameters for creating new root. */ 1256 /* Calculate balance parameters for creating new root. */
1229 if ( ! Sh ) { 1257 if (!Sh) {
1230 if ( ! h ) 1258 if (!h)
1231 reiserfs_panic (tb->tb_sb, "vs-8210: ip_check_balance: S[0] can not be 0"); 1259 reiserfs_panic(tb->tb_sb,
1232 switch ( n_ret_value = get_empty_nodes (tb, h) ) { 1260 "vs-8210: ip_check_balance: S[0] can not be 0");
1233 case CARRY_ON: 1261 switch (n_ret_value = get_empty_nodes(tb, h)) {
1234 set_parameters (tb, h, 0, 0, 1, NULL, -1, -1); 1262 case CARRY_ON:
1235 return NO_BALANCING_NEEDED; /* no balancing for higher levels needed */ 1263 set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1236 1264 return NO_BALANCING_NEEDED; /* no balancing for higher levels needed */
1237 case NO_DISK_SPACE: 1265
1238 case REPEAT_SEARCH: 1266 case NO_DISK_SPACE:
1239 return n_ret_value; 1267 case REPEAT_SEARCH:
1240 default: 1268 return n_ret_value;
1241 reiserfs_panic(tb->tb_sb, "vs-8215: ip_check_balance: incorrect return value of get_empty_nodes"); 1269 default:
1270 reiserfs_panic(tb->tb_sb,
1271 "vs-8215: ip_check_balance: incorrect return value of get_empty_nodes");
1272 }
1242 } 1273 }
1243 }
1244
1245 if ( (n_ret_value = get_parents (tb, h)) != CARRY_ON ) /* get parents of S[h] neighbors. */
1246 return n_ret_value;
1247
1248 sfree = B_FREE_SPACE (Sh);
1249
1250 /* get free space of neighbors */
1251 rfree = get_rfree (tb, h);
1252 lfree = get_lfree (tb, h);
1253
1254 if (can_node_be_removed (vn->vn_mode, lfree, sfree, rfree, tb, h) == NO_BALANCING_NEEDED)
1255 /* and new item fits into node S[h] without any shifting */
1256 return NO_BALANCING_NEEDED;
1257
1258 create_virtual_node (tb, h);
1259
1260 /*
1261 determine maximal number of items we can shift to the left neighbor (in tb structure)
1262 and the maximal number of bytes that can flow to the left neighbor
1263 from the left most liquid item that cannot be shifted from S[0] entirely (returned value)
1264 */
1265 check_left (tb, h, lfree);
1266
1267 /*
1268 determine maximal number of items we can shift to the right neighbor (in tb structure)
1269 and the maximal number of bytes that can flow to the right neighbor
1270 from the right most liquid item that cannot be shifted from S[0] entirely (returned value)
1271 */
1272 check_right (tb, h, rfree);
1273
1274
1275 /* all contents of internal node S[h] can be moved into its
1276 neighbors, S[h] will be removed after balancing */
1277 if (h && (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1)) {
1278 int to_r;
1279
1280 /* Since we are working on internal nodes, and our internal
1281 nodes have fixed size entries, then we can balance by the
1282 number of items rather than the space they consume. In this
1283 routine we set the left node equal to the right node,
1284 allowing a difference of less than or equal to 1 child
1285 pointer. */
1286 to_r = ((MAX_NR_KEY(Sh)<<1)+2-tb->lnum[h]-tb->rnum[h]+vn->vn_nr_item+1)/2 -
1287 (MAX_NR_KEY(Sh) + 1 - tb->rnum[h]);
1288 set_parameters (tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL, -1, -1);
1289 return CARRY_ON;
1290 }
1291
1292 /* this checks balance condition, that any two neighboring nodes can not fit in one node */
1293 RFALSE( h &&
1294 ( tb->lnum[h] >= vn->vn_nr_item + 1 ||
1295 tb->rnum[h] >= vn->vn_nr_item + 1),
1296 "vs-8220: tree is not balanced on internal level");
1297 RFALSE( ! h && ((tb->lnum[h] >= vn->vn_nr_item && (tb->lbytes == -1)) ||
1298 (tb->rnum[h] >= vn->vn_nr_item && (tb->rbytes == -1)) ),
1299 "vs-8225: tree is not balanced on leaf level");
1300
1301 /* all contents of S[0] can be moved into its neighbors
1302 S[0] will be removed after balancing. */
1303 if (!h && is_leaf_removable (tb))
1304 return CARRY_ON;
1305 1274
1275 if ((n_ret_value = get_parents(tb, h)) != CARRY_ON) /* get parents of S[h] neighbors. */
1276 return n_ret_value;
1306 1277
1307 /* why do we perform this check here rather than earlier?? 1278 sfree = B_FREE_SPACE(Sh);
1308 Answer: we can win 1 node in some cases above. Moreover we 1279
1309 checked it above, when we checked, that S[0] is not removable 1280 /* get free space of neighbors */
1310 in principle */ 1281 rfree = get_rfree(tb, h);
1311 if (sfree >= levbytes) { /* new item fits into node S[h] without any shifting */ 1282 lfree = get_lfree(tb, h);
1312 if ( ! h ) 1283
1313 tb->s0num = vn->vn_nr_item; 1284 if (can_node_be_removed(vn->vn_mode, lfree, sfree, rfree, tb, h) ==
1314 set_parameters (tb, h, 0, 0, 1, NULL, -1, -1); 1285 NO_BALANCING_NEEDED)
1315 return NO_BALANCING_NEEDED; 1286 /* and new item fits into node S[h] without any shifting */
1316 } 1287 return NO_BALANCING_NEEDED;
1317 1288
1289 create_virtual_node(tb, h);
1318 1290
1319 { 1291 /*
1320 int lpar, rpar, nset, lset, rset, lrset; 1292 determine maximal number of items we can shift to the left neighbor (in tb structure)
1321 /* 1293 and the maximal number of bytes that can flow to the left neighbor
1322 * regular overflowing of the node 1294 from the left most liquid item that cannot be shifted from S[0] entirely (returned value)
1323 */ 1295 */
1296 check_left(tb, h, lfree);
1324 1297
1325 /* get_num_ver works in 2 modes (FLOW & NO_FLOW) 1298 /*
1326 lpar, rpar - number of items we can shift to left/right neighbor (including splitting item) 1299 determine maximal number of items we can shift to the right neighbor (in tb structure)
1327 nset, lset, rset, lrset - shows, whether flowing items give better packing 1300 and the maximal number of bytes that can flow to the right neighbor
1328 */ 1301 from the right most liquid item that cannot be shifted from S[0] entirely (returned value)
1302 */
1303 check_right(tb, h, rfree);
1304
1305 /* all contents of internal node S[h] can be moved into its
1306 neighbors, S[h] will be removed after balancing */
1307 if (h && (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1)) {
1308 int to_r;
1309
1310 /* Since we are working on internal nodes, and our internal
1311 nodes have fixed size entries, then we can balance by the
1312 number of items rather than the space they consume. In this
1313 routine we set the left node equal to the right node,
1314 allowing a difference of less than or equal to 1 child
1315 pointer. */
1316 to_r =
1317 ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] - tb->rnum[h] +
1318 vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 -
1319 tb->rnum[h]);
1320 set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL,
1321 -1, -1);
1322 return CARRY_ON;
1323 }
1324
1325 /* this checks balance condition, that any two neighboring nodes can not fit in one node */
1326 RFALSE(h &&
1327 (tb->lnum[h] >= vn->vn_nr_item + 1 ||
1328 tb->rnum[h] >= vn->vn_nr_item + 1),
1329 "vs-8220: tree is not balanced on internal level");
1330 RFALSE(!h && ((tb->lnum[h] >= vn->vn_nr_item && (tb->lbytes == -1)) ||
1331 (tb->rnum[h] >= vn->vn_nr_item && (tb->rbytes == -1))),
1332 "vs-8225: tree is not balanced on leaf level");
1333
1334 /* all contents of S[0] can be moved into its neighbors
1335 S[0] will be removed after balancing. */
1336 if (!h && is_leaf_removable(tb))
1337 return CARRY_ON;
1338
1339 /* why do we perform this check here rather than earlier??
1340 Answer: we can win 1 node in some cases above. Moreover we
1341 checked it above, when we checked, that S[0] is not removable
1342 in principle */
1343 if (sfree >= levbytes) { /* new item fits into node S[h] without any shifting */
1344 if (!h)
1345 tb->s0num = vn->vn_nr_item;
1346 set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1347 return NO_BALANCING_NEEDED;
1348 }
1349
1350 {
1351 int lpar, rpar, nset, lset, rset, lrset;
1352 /*
1353 * regular overflowing of the node
1354 */
1355
1356 /* get_num_ver works in 2 modes (FLOW & NO_FLOW)
1357 lpar, rpar - number of items we can shift to left/right neighbor (including splitting item)
1358 nset, lset, rset, lrset - shows, whether flowing items give better packing
1359 */
1329#define FLOW 1 1360#define FLOW 1
1330#define NO_FLOW 0 /* do not any splitting */ 1361#define NO_FLOW 0 /* do not any splitting */
1331 1362
1332 /* we choose one the following */ 1363 /* we choose one the following */
1333#define NOTHING_SHIFT_NO_FLOW 0 1364#define NOTHING_SHIFT_NO_FLOW 0
1334#define NOTHING_SHIFT_FLOW 5 1365#define NOTHING_SHIFT_FLOW 5
1335#define LEFT_SHIFT_NO_FLOW 10 1366#define LEFT_SHIFT_NO_FLOW 10
@@ -1339,164 +1370,173 @@ static int ip_check_balance (struct tree_balance * tb, int h)
1339#define LR_SHIFT_NO_FLOW 30 1370#define LR_SHIFT_NO_FLOW 30
1340#define LR_SHIFT_FLOW 35 1371#define LR_SHIFT_FLOW 35
1341 1372
1373 lpar = tb->lnum[h];
1374 rpar = tb->rnum[h];
1375
1376 /* calculate number of blocks S[h] must be split into when
1377 nothing is shifted to the neighbors,
1378 as well as number of items in each part of the split node (s012 numbers),
1379 and number of bytes (s1bytes) of the shared drop which flow to S1 if any */
1380 nset = NOTHING_SHIFT_NO_FLOW;
1381 nver = get_num_ver(vn->vn_mode, tb, h,
1382 0, -1, h ? vn->vn_nr_item : 0, -1,
1383 snum012, NO_FLOW);
1384
1385 if (!h) {
1386 int nver1;
1387
1388 /* note, that in this case we try to bottle between S[0] and S1 (S1 - the first new node) */
1389 nver1 = get_num_ver(vn->vn_mode, tb, h,
1390 0, -1, 0, -1,
1391 snum012 + NOTHING_SHIFT_FLOW, FLOW);
1392 if (nver > nver1)
1393 nset = NOTHING_SHIFT_FLOW, nver = nver1;
1394 }
1342 1395
1343 lpar = tb->lnum[h]; 1396 /* calculate number of blocks S[h] must be split into when
1344 rpar = tb->rnum[h]; 1397 l_shift_num first items and l_shift_bytes of the right most
1345 1398 liquid item to be shifted are shifted to the left neighbor,
1346 1399 as well as number of items in each part of the splitted node (s012 numbers),
1347 /* calculate number of blocks S[h] must be split into when 1400 and number of bytes (s1bytes) of the shared drop which flow to S1 if any
1348 nothing is shifted to the neighbors, 1401 */
1349 as well as number of items in each part of the split node (s012 numbers), 1402 lset = LEFT_SHIFT_NO_FLOW;
1350 and number of bytes (s1bytes) of the shared drop which flow to S1 if any */ 1403 lnver = get_num_ver(vn->vn_mode, tb, h,
1351 nset = NOTHING_SHIFT_NO_FLOW; 1404 lpar - ((h || tb->lbytes == -1) ? 0 : 1),
1352 nver = get_num_ver (vn->vn_mode, tb, h, 1405 -1, h ? vn->vn_nr_item : 0, -1,
1353 0, -1, h?vn->vn_nr_item:0, -1, 1406 snum012 + LEFT_SHIFT_NO_FLOW, NO_FLOW);
1354 snum012, NO_FLOW); 1407 if (!h) {
1355 1408 int lnver1;
1356 if (!h) 1409
1357 { 1410 lnver1 = get_num_ver(vn->vn_mode, tb, h,
1358 int nver1; 1411 lpar -
1359 1412 ((tb->lbytes != -1) ? 1 : 0),
1360 /* note, that in this case we try to bottle between S[0] and S1 (S1 - the first new node) */ 1413 tb->lbytes, 0, -1,
1361 nver1 = get_num_ver (vn->vn_mode, tb, h, 1414 snum012 + LEFT_SHIFT_FLOW, FLOW);
1362 0, -1, 0, -1, 1415 if (lnver > lnver1)
1363 snum012 + NOTHING_SHIFT_FLOW, FLOW); 1416 lset = LEFT_SHIFT_FLOW, lnver = lnver1;
1364 if (nver > nver1) 1417 }
1365 nset = NOTHING_SHIFT_FLOW, nver = nver1;
1366 }
1367
1368
1369 /* calculate number of blocks S[h] must be split into when
1370 l_shift_num first items and l_shift_bytes of the right most
1371 liquid item to be shifted are shifted to the left neighbor,
1372 as well as number of items in each part of the splitted node (s012 numbers),
1373 and number of bytes (s1bytes) of the shared drop which flow to S1 if any
1374 */
1375 lset = LEFT_SHIFT_NO_FLOW;
1376 lnver = get_num_ver (vn->vn_mode, tb, h,
1377 lpar - (( h || tb->lbytes == -1 ) ? 0 : 1), -1, h ? vn->vn_nr_item:0, -1,
1378 snum012 + LEFT_SHIFT_NO_FLOW, NO_FLOW);
1379 if (!h)
1380 {
1381 int lnver1;
1382
1383 lnver1 = get_num_ver (vn->vn_mode, tb, h,
1384 lpar - ((tb->lbytes != -1) ? 1 : 0), tb->lbytes, 0, -1,
1385 snum012 + LEFT_SHIFT_FLOW, FLOW);
1386 if (lnver > lnver1)
1387 lset = LEFT_SHIFT_FLOW, lnver = lnver1;
1388 }
1389
1390
1391 /* calculate number of blocks S[h] must be split into when
1392 r_shift_num first items and r_shift_bytes of the left most
1393 liquid item to be shifted are shifted to the right neighbor,
1394 as well as number of items in each part of the splitted node (s012 numbers),
1395 and number of bytes (s1bytes) of the shared drop which flow to S1 if any
1396 */
1397 rset = RIGHT_SHIFT_NO_FLOW;
1398 rnver = get_num_ver (vn->vn_mode, tb, h,
1399 0, -1, h ? (vn->vn_nr_item-rpar) : (rpar - (( tb->rbytes != -1 ) ? 1 : 0)), -1,
1400 snum012 + RIGHT_SHIFT_NO_FLOW, NO_FLOW);
1401 if (!h)
1402 {
1403 int rnver1;
1404
1405 rnver1 = get_num_ver (vn->vn_mode, tb, h,
1406 0, -1, (rpar - ((tb->rbytes != -1) ? 1 : 0)), tb->rbytes,
1407 snum012 + RIGHT_SHIFT_FLOW, FLOW);
1408
1409 if (rnver > rnver1)
1410 rset = RIGHT_SHIFT_FLOW, rnver = rnver1;
1411 }
1412
1413
1414 /* calculate number of blocks S[h] must be split into when
1415 items are shifted in both directions,
1416 as well as number of items in each part of the splitted node (s012 numbers),
1417 and number of bytes (s1bytes) of the shared drop which flow to S1 if any
1418 */
1419 lrset = LR_SHIFT_NO_FLOW;
1420 lrnver = get_num_ver (vn->vn_mode, tb, h,
1421 lpar - ((h || tb->lbytes == -1) ? 0 : 1), -1, h ? (vn->vn_nr_item-rpar):(rpar - ((tb->rbytes != -1) ? 1 : 0)), -1,
1422 snum012 + LR_SHIFT_NO_FLOW, NO_FLOW);
1423 if (!h)
1424 {
1425 int lrnver1;
1426
1427 lrnver1 = get_num_ver (vn->vn_mode, tb, h,
1428 lpar - ((tb->lbytes != -1) ? 1 : 0), tb->lbytes, (rpar - ((tb->rbytes != -1) ? 1 : 0)), tb->rbytes,
1429 snum012 + LR_SHIFT_FLOW, FLOW);
1430 if (lrnver > lrnver1)
1431 lrset = LR_SHIFT_FLOW, lrnver = lrnver1;
1432 }
1433
1434
1435 1418
1436 /* Our general shifting strategy is: 1419 /* calculate number of blocks S[h] must be split into when
1437 1) to minimized number of new nodes; 1420 r_shift_num first items and r_shift_bytes of the left most
1438 2) to minimized number of neighbors involved in shifting; 1421 liquid item to be shifted are shifted to the right neighbor,
1439 3) to minimized number of disk reads; */ 1422 as well as number of items in each part of the splitted node (s012 numbers),
1423 and number of bytes (s1bytes) of the shared drop which flow to S1 if any
1424 */
1425 rset = RIGHT_SHIFT_NO_FLOW;
1426 rnver = get_num_ver(vn->vn_mode, tb, h,
1427 0, -1,
1428 h ? (vn->vn_nr_item - rpar) : (rpar -
1429 ((tb->
1430 rbytes !=
1431 -1) ? 1 :
1432 0)), -1,
1433 snum012 + RIGHT_SHIFT_NO_FLOW, NO_FLOW);
1434 if (!h) {
1435 int rnver1;
1436
1437 rnver1 = get_num_ver(vn->vn_mode, tb, h,
1438 0, -1,
1439 (rpar -
1440 ((tb->rbytes != -1) ? 1 : 0)),
1441 tb->rbytes,
1442 snum012 + RIGHT_SHIFT_FLOW, FLOW);
1443
1444 if (rnver > rnver1)
1445 rset = RIGHT_SHIFT_FLOW, rnver = rnver1;
1446 }
1440 1447
1441 /* we can win TWO or ONE nodes by shifting in both directions */ 1448 /* calculate number of blocks S[h] must be split into when
1442 if (lrnver < lnver && lrnver < rnver) 1449 items are shifted in both directions,
1443 { 1450 as well as number of items in each part of the splitted node (s012 numbers),
1444 RFALSE( h && 1451 and number of bytes (s1bytes) of the shared drop which flow to S1 if any
1445 (tb->lnum[h] != 1 || 1452 */
1446 tb->rnum[h] != 1 || 1453 lrset = LR_SHIFT_NO_FLOW;
1447 lrnver != 1 || rnver != 2 || lnver != 2 || h != 1), 1454 lrnver = get_num_ver(vn->vn_mode, tb, h,
1448 "vs-8230: bad h"); 1455 lpar - ((h || tb->lbytes == -1) ? 0 : 1),
1449 if (lrset == LR_SHIFT_FLOW) 1456 -1,
1450 set_parameters (tb, h, tb->lnum[h], tb->rnum[h], lrnver, snum012 + lrset, 1457 h ? (vn->vn_nr_item - rpar) : (rpar -
1451 tb->lbytes, tb->rbytes); 1458 ((tb->
1452 else 1459 rbytes !=
1453 set_parameters (tb, h, tb->lnum[h] - ((tb->lbytes == -1) ? 0 : 1), 1460 -1) ? 1 :
1454 tb->rnum[h] - ((tb->rbytes == -1) ? 0 : 1), lrnver, snum012 + lrset, -1, -1); 1461 0)), -1,
1455 1462 snum012 + LR_SHIFT_NO_FLOW, NO_FLOW);
1456 return CARRY_ON; 1463 if (!h) {
1457 } 1464 int lrnver1;
1465
1466 lrnver1 = get_num_ver(vn->vn_mode, tb, h,
1467 lpar -
1468 ((tb->lbytes != -1) ? 1 : 0),
1469 tb->lbytes,
1470 (rpar -
1471 ((tb->rbytes != -1) ? 1 : 0)),
1472 tb->rbytes,
1473 snum012 + LR_SHIFT_FLOW, FLOW);
1474 if (lrnver > lrnver1)
1475 lrset = LR_SHIFT_FLOW, lrnver = lrnver1;
1476 }
1458 1477
1459 /* if shifting doesn't lead to better packing then don't shift */ 1478 /* Our general shifting strategy is:
1460 if (nver == lrnver) 1479 1) to minimized number of new nodes;
1461 { 1480 2) to minimized number of neighbors involved in shifting;
1462 set_parameters (tb, h, 0, 0, nver, snum012 + nset, -1, -1); 1481 3) to minimized number of disk reads; */
1463 return CARRY_ON; 1482
1464 } 1483 /* we can win TWO or ONE nodes by shifting in both directions */
1484 if (lrnver < lnver && lrnver < rnver) {
1485 RFALSE(h &&
1486 (tb->lnum[h] != 1 ||
1487 tb->rnum[h] != 1 ||
1488 lrnver != 1 || rnver != 2 || lnver != 2
1489 || h != 1), "vs-8230: bad h");
1490 if (lrset == LR_SHIFT_FLOW)
1491 set_parameters(tb, h, tb->lnum[h], tb->rnum[h],
1492 lrnver, snum012 + lrset,
1493 tb->lbytes, tb->rbytes);
1494 else
1495 set_parameters(tb, h,
1496 tb->lnum[h] -
1497 ((tb->lbytes == -1) ? 0 : 1),
1498 tb->rnum[h] -
1499 ((tb->rbytes == -1) ? 0 : 1),
1500 lrnver, snum012 + lrset, -1, -1);
1501
1502 return CARRY_ON;
1503 }
1465 1504
1505 /* if shifting doesn't lead to better packing then don't shift */
1506 if (nver == lrnver) {
1507 set_parameters(tb, h, 0, 0, nver, snum012 + nset, -1,
1508 -1);
1509 return CARRY_ON;
1510 }
1466 1511
1467 /* now we know that for better packing shifting in only one 1512 /* now we know that for better packing shifting in only one
1468 direction either to the left or to the right is required */ 1513 direction either to the left or to the right is required */
1469 1514
1470 /* if shifting to the left is better than shifting to the right */ 1515 /* if shifting to the left is better than shifting to the right */
1471 if (lnver < rnver) 1516 if (lnver < rnver) {
1472 { 1517 SET_PAR_SHIFT_LEFT;
1473 SET_PAR_SHIFT_LEFT; 1518 return CARRY_ON;
1474 return CARRY_ON; 1519 }
1475 }
1476 1520
1477 /* if shifting to the right is better than shifting to the left */ 1521 /* if shifting to the right is better than shifting to the left */
1478 if (lnver > rnver) 1522 if (lnver > rnver) {
1479 { 1523 SET_PAR_SHIFT_RIGHT;
1480 SET_PAR_SHIFT_RIGHT; 1524 return CARRY_ON;
1481 return CARRY_ON; 1525 }
1482 }
1483 1526
1527 /* now shifting in either direction gives the same number
1528 of nodes and we can make use of the cached neighbors */
1529 if (is_left_neighbor_in_cache(tb, h)) {
1530 SET_PAR_SHIFT_LEFT;
1531 return CARRY_ON;
1532 }
1484 1533
1485 /* now shifting in either direction gives the same number 1534 /* shift to the right independently on whether the right neighbor in cache or not */
1486 of nodes and we can make use of the cached neighbors */ 1535 SET_PAR_SHIFT_RIGHT;
1487 if (is_left_neighbor_in_cache (tb,h)) 1536 return CARRY_ON;
1488 {
1489 SET_PAR_SHIFT_LEFT;
1490 return CARRY_ON;
1491 } 1537 }
1492
1493 /* shift to the right independently on whether the right neighbor in cache or not */
1494 SET_PAR_SHIFT_RIGHT;
1495 return CARRY_ON;
1496 }
1497} 1538}
1498 1539
1499
1500/* Check whether current node S[h] is balanced when Decreasing its size by 1540/* Check whether current node S[h] is balanced when Decreasing its size by
1501 * Deleting or Cutting for INTERNAL node of S+tree. 1541 * Deleting or Cutting for INTERNAL node of S+tree.
1502 * Calculate parameters for balancing for current level h. 1542 * Calculate parameters for balancing for current level h.
@@ -1513,157 +1553,173 @@ static int ip_check_balance (struct tree_balance * tb, int h)
1513 * Note: Items of internal nodes have fixed size, so the balance condition for 1553 * Note: Items of internal nodes have fixed size, so the balance condition for
1514 * the internal part of S+tree is as for the B-trees. 1554 * the internal part of S+tree is as for the B-trees.
1515 */ 1555 */
1516static int dc_check_balance_internal (struct tree_balance * tb, int h) 1556static int dc_check_balance_internal(struct tree_balance *tb, int h)
1517{ 1557{
1518 struct virtual_node * vn = tb->tb_vn; 1558 struct virtual_node *vn = tb->tb_vn;
1519 1559
1520 /* Sh is the node whose balance is currently being checked, 1560 /* Sh is the node whose balance is currently being checked,
1521 and Fh is its father. */ 1561 and Fh is its father. */
1522 struct buffer_head * Sh, * Fh; 1562 struct buffer_head *Sh, *Fh;
1523 int maxsize, 1563 int maxsize, n_ret_value;
1524 n_ret_value; 1564 int lfree, rfree /* free space in L and R */ ;
1525 int lfree, rfree /* free space in L and R */;
1526 1565
1527 Sh = PATH_H_PBUFFER (tb->tb_path, h); 1566 Sh = PATH_H_PBUFFER(tb->tb_path, h);
1528 Fh = PATH_H_PPARENT (tb->tb_path, h); 1567 Fh = PATH_H_PPARENT(tb->tb_path, h);
1529 1568
1530 maxsize = MAX_CHILD_SIZE(Sh); 1569 maxsize = MAX_CHILD_SIZE(Sh);
1531 1570
1532/* using tb->insert_size[h], which is negative in this case, create_virtual_node calculates: */ 1571/* using tb->insert_size[h], which is negative in this case, create_virtual_node calculates: */
1533/* new_nr_item = number of items node would have if operation is */ 1572/* new_nr_item = number of items node would have if operation is */
1534/* performed without balancing (new_nr_item); */ 1573/* performed without balancing (new_nr_item); */
1535 create_virtual_node (tb, h); 1574 create_virtual_node(tb, h);
1536 1575
1537 if ( ! Fh ) 1576 if (!Fh) { /* S[h] is the root. */
1538 { /* S[h] is the root. */ 1577 if (vn->vn_nr_item > 0) {
1539 if ( vn->vn_nr_item > 0 ) 1578 set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1540 { 1579 return NO_BALANCING_NEEDED; /* no balancing for higher levels needed */
1541 set_parameters (tb, h, 0, 0, 1, NULL, -1, -1); 1580 }
1542 return NO_BALANCING_NEEDED; /* no balancing for higher levels needed */ 1581 /* new_nr_item == 0.
1582 * Current root will be deleted resulting in
1583 * decrementing the tree height. */
1584 set_parameters(tb, h, 0, 0, 0, NULL, -1, -1);
1585 return CARRY_ON;
1586 }
1587
1588 if ((n_ret_value = get_parents(tb, h)) != CARRY_ON)
1589 return n_ret_value;
1590
1591 /* get free space of neighbors */
1592 rfree = get_rfree(tb, h);
1593 lfree = get_lfree(tb, h);
1594
1595 /* determine maximal number of items we can fit into neighbors */
1596 check_left(tb, h, lfree);
1597 check_right(tb, h, rfree);
1598
1599 if (vn->vn_nr_item >= MIN_NR_KEY(Sh)) { /* Balance condition for the internal node is valid.
1600 * In this case we balance only if it leads to better packing. */
1601 if (vn->vn_nr_item == MIN_NR_KEY(Sh)) { /* Here we join S[h] with one of its neighbors,
1602 * which is impossible with greater values of new_nr_item. */
1603 if (tb->lnum[h] >= vn->vn_nr_item + 1) {
1604 /* All contents of S[h] can be moved to L[h]. */
1605 int n;
1606 int order_L;
1607
1608 order_L =
1609 ((n =
1610 PATH_H_B_ITEM_ORDER(tb->tb_path,
1611 h)) ==
1612 0) ? B_NR_ITEMS(tb->FL[h]) : n - 1;
1613 n = dc_size(B_N_CHILD(tb->FL[h], order_L)) /
1614 (DC_SIZE + KEY_SIZE);
1615 set_parameters(tb, h, -n - 1, 0, 0, NULL, -1,
1616 -1);
1617 return CARRY_ON;
1618 }
1619
1620 if (tb->rnum[h] >= vn->vn_nr_item + 1) {
1621 /* All contents of S[h] can be moved to R[h]. */
1622 int n;
1623 int order_R;
1624
1625 order_R =
1626 ((n =
1627 PATH_H_B_ITEM_ORDER(tb->tb_path,
1628 h)) ==
1629 B_NR_ITEMS(Fh)) ? 0 : n + 1;
1630 n = dc_size(B_N_CHILD(tb->FR[h], order_R)) /
1631 (DC_SIZE + KEY_SIZE);
1632 set_parameters(tb, h, 0, -n - 1, 0, NULL, -1,
1633 -1);
1634 return CARRY_ON;
1635 }
1636 }
1637
1638 if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1) {
1639 /* All contents of S[h] can be moved to the neighbors (L[h] & R[h]). */
1640 int to_r;
1641
1642 to_r =
1643 ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] -
1644 tb->rnum[h] + vn->vn_nr_item + 1) / 2 -
1645 (MAX_NR_KEY(Sh) + 1 - tb->rnum[h]);
1646 set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r,
1647 0, NULL, -1, -1);
1648 return CARRY_ON;
1649 }
1650
1651 /* Balancing does not lead to better packing. */
1652 set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1653 return NO_BALANCING_NEEDED;
1543 } 1654 }
1544 /* new_nr_item == 0. 1655
1545 * Current root will be deleted resulting in 1656 /* Current node contain insufficient number of items. Balancing is required. */
1546 * decrementing the tree height. */ 1657 /* Check whether we can merge S[h] with left neighbor. */
1547 set_parameters (tb, h, 0, 0, 0, NULL, -1, -1); 1658 if (tb->lnum[h] >= vn->vn_nr_item + 1)
1548 return CARRY_ON; 1659 if (is_left_neighbor_in_cache(tb, h)
1549 } 1660 || tb->rnum[h] < vn->vn_nr_item + 1 || !tb->FR[h]) {
1550 1661 int n;
1551 if ( (n_ret_value = get_parents(tb,h)) != CARRY_ON ) 1662 int order_L;
1552 return n_ret_value; 1663
1553 1664 order_L =
1554 1665 ((n =
1555 /* get free space of neighbors */ 1666 PATH_H_B_ITEM_ORDER(tb->tb_path,
1556 rfree = get_rfree (tb, h); 1667 h)) ==
1557 lfree = get_lfree (tb, h); 1668 0) ? B_NR_ITEMS(tb->FL[h]) : n - 1;
1558 1669 n = dc_size(B_N_CHILD(tb->FL[h], order_L)) / (DC_SIZE +
1559 /* determine maximal number of items we can fit into neighbors */ 1670 KEY_SIZE);
1560 check_left (tb, h, lfree); 1671 set_parameters(tb, h, -n - 1, 0, 0, NULL, -1, -1);
1561 check_right (tb, h, rfree); 1672 return CARRY_ON;
1562 1673 }
1563 1674
1564 if ( vn->vn_nr_item >= MIN_NR_KEY(Sh) ) 1675 /* Check whether we can merge S[h] with right neighbor. */
1565 { /* Balance condition for the internal node is valid. 1676 if (tb->rnum[h] >= vn->vn_nr_item + 1) {
1566 * In this case we balance only if it leads to better packing. */ 1677 int n;
1567 if ( vn->vn_nr_item == MIN_NR_KEY(Sh) ) 1678 int order_R;
1568 { /* Here we join S[h] with one of its neighbors, 1679
1569 * which is impossible with greater values of new_nr_item. */ 1680 order_R =
1570 if ( tb->lnum[h] >= vn->vn_nr_item + 1 ) 1681 ((n =
1571 { 1682 PATH_H_B_ITEM_ORDER(tb->tb_path,
1572 /* All contents of S[h] can be moved to L[h]. */ 1683 h)) == B_NR_ITEMS(Fh)) ? 0 : (n + 1);
1573 int n; 1684 n = dc_size(B_N_CHILD(tb->FR[h], order_R)) / (DC_SIZE +
1574 int order_L; 1685 KEY_SIZE);
1575 1686 set_parameters(tb, h, 0, -n - 1, 0, NULL, -1, -1);
1576 order_L = ((n=PATH_H_B_ITEM_ORDER(tb->tb_path, h))==0) ? B_NR_ITEMS(tb->FL[h]) : n - 1; 1687 return CARRY_ON;
1577 n = dc_size(B_N_CHILD(tb->FL[h],order_L)) / (DC_SIZE + KEY_SIZE);
1578 set_parameters (tb, h, -n-1, 0, 0, NULL, -1, -1);
1579 return CARRY_ON;
1580 }
1581
1582 if ( tb->rnum[h] >= vn->vn_nr_item + 1 )
1583 {
1584 /* All contents of S[h] can be moved to R[h]. */
1585 int n;
1586 int order_R;
1587
1588 order_R = ((n=PATH_H_B_ITEM_ORDER(tb->tb_path, h))==B_NR_ITEMS(Fh)) ? 0 : n + 1;
1589 n = dc_size(B_N_CHILD(tb->FR[h],order_R)) / (DC_SIZE + KEY_SIZE);
1590 set_parameters (tb, h, 0, -n-1, 0, NULL, -1, -1);
1591 return CARRY_ON;
1592 }
1593 } 1688 }
1594 1689
1595 if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1) 1690 /* All contents of S[h] can be moved to the neighbors (L[h] & R[h]). */
1596 { 1691 if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1) {
1597 /* All contents of S[h] can be moved to the neighbors (L[h] & R[h]). */ 1692 int to_r;
1598 int to_r; 1693
1694 to_r =
1695 ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] - tb->rnum[h] +
1696 vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 -
1697 tb->rnum[h]);
1698 set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL,
1699 -1, -1);
1700 return CARRY_ON;
1701 }
1599 1702
1600 to_r = ((MAX_NR_KEY(Sh)<<1)+2-tb->lnum[h]-tb->rnum[h]+vn->vn_nr_item+1)/2 - 1703 /* For internal nodes try to borrow item from a neighbor */
1601 (MAX_NR_KEY(Sh) + 1 - tb->rnum[h]); 1704 RFALSE(!tb->FL[h] && !tb->FR[h], "vs-8235: trying to borrow for root");
1602 set_parameters (tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL, -1, -1); 1705
1603 return CARRY_ON; 1706 /* Borrow one or two items from caching neighbor */
1707 if (is_left_neighbor_in_cache(tb, h) || !tb->FR[h]) {
1708 int from_l;
1709
1710 from_l =
1711 (MAX_NR_KEY(Sh) + 1 - tb->lnum[h] + vn->vn_nr_item +
1712 1) / 2 - (vn->vn_nr_item + 1);
1713 set_parameters(tb, h, -from_l, 0, 1, NULL, -1, -1);
1714 return CARRY_ON;
1604 } 1715 }
1605 1716
1606 /* Balancing does not lead to better packing. */ 1717 set_parameters(tb, h, 0,
1607 set_parameters (tb, h, 0, 0, 1, NULL, -1, -1); 1718 -((MAX_NR_KEY(Sh) + 1 - tb->rnum[h] + vn->vn_nr_item +
1608 return NO_BALANCING_NEEDED; 1719 1) / 2 - (vn->vn_nr_item + 1)), 1, NULL, -1, -1);
1609 }
1610
1611 /* Current node contain insufficient number of items. Balancing is required. */
1612 /* Check whether we can merge S[h] with left neighbor. */
1613 if (tb->lnum[h] >= vn->vn_nr_item + 1)
1614 if (is_left_neighbor_in_cache (tb,h) || tb->rnum[h] < vn->vn_nr_item + 1 || !tb->FR[h])
1615 {
1616 int n;
1617 int order_L;
1618
1619 order_L = ((n=PATH_H_B_ITEM_ORDER(tb->tb_path, h))==0) ? B_NR_ITEMS(tb->FL[h]) : n - 1;
1620 n = dc_size(B_N_CHILD(tb->FL[h],order_L)) / (DC_SIZE + KEY_SIZE);
1621 set_parameters (tb, h, -n-1, 0, 0, NULL, -1, -1);
1622 return CARRY_ON; 1720 return CARRY_ON;
1623 }
1624
1625 /* Check whether we can merge S[h] with right neighbor. */
1626 if (tb->rnum[h] >= vn->vn_nr_item + 1)
1627 {
1628 int n;
1629 int order_R;
1630
1631 order_R = ((n=PATH_H_B_ITEM_ORDER(tb->tb_path, h))==B_NR_ITEMS(Fh)) ? 0 : (n + 1);
1632 n = dc_size(B_N_CHILD(tb->FR[h],order_R)) / (DC_SIZE + KEY_SIZE);
1633 set_parameters (tb, h, 0, -n-1, 0, NULL, -1, -1);
1634 return CARRY_ON;
1635 }
1636
1637 /* All contents of S[h] can be moved to the neighbors (L[h] & R[h]). */
1638 if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1)
1639 {
1640 int to_r;
1641
1642 to_r = ((MAX_NR_KEY(Sh)<<1)+2-tb->lnum[h]-tb->rnum[h]+vn->vn_nr_item+1)/2 -
1643 (MAX_NR_KEY(Sh) + 1 - tb->rnum[h]);
1644 set_parameters (tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL, -1, -1);
1645 return CARRY_ON;
1646 }
1647
1648 /* For internal nodes try to borrow item from a neighbor */
1649 RFALSE( !tb->FL[h] && !tb->FR[h], "vs-8235: trying to borrow for root");
1650
1651 /* Borrow one or two items from caching neighbor */
1652 if (is_left_neighbor_in_cache (tb,h) || !tb->FR[h])
1653 {
1654 int from_l;
1655
1656 from_l = (MAX_NR_KEY(Sh) + 1 - tb->lnum[h] + vn->vn_nr_item + 1) / 2 - (vn->vn_nr_item + 1);
1657 set_parameters (tb, h, -from_l, 0, 1, NULL, -1, -1);
1658 return CARRY_ON;
1659 }
1660
1661 set_parameters (tb, h, 0, -((MAX_NR_KEY(Sh)+1-tb->rnum[h]+vn->vn_nr_item+1)/2-(vn->vn_nr_item+1)), 1,
1662 NULL, -1, -1);
1663 return CARRY_ON;
1664} 1721}
1665 1722
1666
1667/* Check whether current node S[h] is balanced when Decreasing its size by 1723/* Check whether current node S[h] is balanced when Decreasing its size by
1668 * Deleting or Truncating for LEAF node of S+tree. 1724 * Deleting or Truncating for LEAF node of S+tree.
1669 * Calculate parameters for balancing for current level h. 1725 * Calculate parameters for balancing for current level h.
@@ -1677,90 +1733,86 @@ static int dc_check_balance_internal (struct tree_balance * tb, int h)
1677 * -1 - no balancing for higher levels needed; 1733 * -1 - no balancing for higher levels needed;
1678 * -2 - no disk space. 1734 * -2 - no disk space.
1679 */ 1735 */
1680static int dc_check_balance_leaf (struct tree_balance * tb, int h) 1736static int dc_check_balance_leaf(struct tree_balance *tb, int h)
1681{ 1737{
1682 struct virtual_node * vn = tb->tb_vn; 1738 struct virtual_node *vn = tb->tb_vn;
1683 1739
1684 /* Number of bytes that must be deleted from 1740 /* Number of bytes that must be deleted from
1685 (value is negative if bytes are deleted) buffer which 1741 (value is negative if bytes are deleted) buffer which
1686 contains node being balanced. The mnemonic is that the 1742 contains node being balanced. The mnemonic is that the
1687 attempted change in node space used level is levbytes bytes. */ 1743 attempted change in node space used level is levbytes bytes. */
1688 int levbytes; 1744 int levbytes;
1689 /* the maximal item size */ 1745 /* the maximal item size */
1690 int maxsize, 1746 int maxsize, n_ret_value;
1691 n_ret_value; 1747 /* S0 is the node whose balance is currently being checked,
1692 /* S0 is the node whose balance is currently being checked, 1748 and F0 is its father. */
1693 and F0 is its father. */ 1749 struct buffer_head *S0, *F0;
1694 struct buffer_head * S0, * F0; 1750 int lfree, rfree /* free space in L and R */ ;
1695 int lfree, rfree /* free space in L and R */; 1751
1696 1752 S0 = PATH_H_PBUFFER(tb->tb_path, 0);
1697 S0 = PATH_H_PBUFFER (tb->tb_path, 0); 1753 F0 = PATH_H_PPARENT(tb->tb_path, 0);
1698 F0 = PATH_H_PPARENT (tb->tb_path, 0);
1699
1700 levbytes = tb->insert_size[h];
1701
1702 maxsize = MAX_CHILD_SIZE(S0); /* maximal possible size of an item */
1703
1704 if ( ! F0 )
1705 { /* S[0] is the root now. */
1706
1707 RFALSE( -levbytes >= maxsize - B_FREE_SPACE (S0),
1708 "vs-8240: attempt to create empty buffer tree");
1709
1710 set_parameters (tb, h, 0, 0, 1, NULL, -1, -1);
1711 return NO_BALANCING_NEEDED;
1712 }
1713
1714 if ( (n_ret_value = get_parents(tb,h)) != CARRY_ON )
1715 return n_ret_value;
1716
1717 /* get free space of neighbors */
1718 rfree = get_rfree (tb, h);
1719 lfree = get_lfree (tb, h);
1720
1721 create_virtual_node (tb, h);
1722
1723 /* if 3 leaves can be merge to one, set parameters and return */
1724 if (are_leaves_removable (tb, lfree, rfree))
1725 return CARRY_ON;
1726
1727 /* determine maximal number of items we can shift to the left/right neighbor
1728 and the maximal number of bytes that can flow to the left/right neighbor
1729 from the left/right most liquid item that cannot be shifted from S[0] entirely
1730 */
1731 check_left (tb, h, lfree);
1732 check_right (tb, h, rfree);
1733
1734 /* check whether we can merge S with left neighbor. */
1735 if (tb->lnum[0] >= vn->vn_nr_item && tb->lbytes == -1)
1736 if (is_left_neighbor_in_cache (tb,h) ||
1737 ((tb->rnum[0] - ((tb->rbytes == -1) ? 0 : 1)) < vn->vn_nr_item) || /* S can not be merged with R */
1738 !tb->FR[h]) {
1739
1740 RFALSE( !tb->FL[h], "vs-8245: dc_check_balance_leaf: FL[h] must exist");
1741
1742 /* set parameter to merge S[0] with its left neighbor */
1743 set_parameters (tb, h, -1, 0, 0, NULL, -1, -1);
1744 return CARRY_ON;
1745 }
1746
1747 /* check whether we can merge S[0] with right neighbor. */
1748 if (tb->rnum[0] >= vn->vn_nr_item && tb->rbytes == -1) {
1749 set_parameters (tb, h, 0, -1, 0, NULL, -1, -1);
1750 return CARRY_ON;
1751 }
1752
1753 /* All contents of S[0] can be moved to the neighbors (L[0] & R[0]). Set parameters and return */
1754 if (is_leaf_removable (tb))
1755 return CARRY_ON;
1756
1757 /* Balancing is not required. */
1758 tb->s0num = vn->vn_nr_item;
1759 set_parameters (tb, h, 0, 0, 1, NULL, -1, -1);
1760 return NO_BALANCING_NEEDED;
1761}
1762 1754
1755 levbytes = tb->insert_size[h];
1763 1756
1757 maxsize = MAX_CHILD_SIZE(S0); /* maximal possible size of an item */
1758
1759 if (!F0) { /* S[0] is the root now. */
1760
1761 RFALSE(-levbytes >= maxsize - B_FREE_SPACE(S0),
1762 "vs-8240: attempt to create empty buffer tree");
1763
1764 set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1765 return NO_BALANCING_NEEDED;
1766 }
1767
1768 if ((n_ret_value = get_parents(tb, h)) != CARRY_ON)
1769 return n_ret_value;
1770
1771 /* get free space of neighbors */
1772 rfree = get_rfree(tb, h);
1773 lfree = get_lfree(tb, h);
1774
1775 create_virtual_node(tb, h);
1776
1777 /* if 3 leaves can be merge to one, set parameters and return */
1778 if (are_leaves_removable(tb, lfree, rfree))
1779 return CARRY_ON;
1780
1781 /* determine maximal number of items we can shift to the left/right neighbor
1782 and the maximal number of bytes that can flow to the left/right neighbor
1783 from the left/right most liquid item that cannot be shifted from S[0] entirely
1784 */
1785 check_left(tb, h, lfree);
1786 check_right(tb, h, rfree);
1787
1788 /* check whether we can merge S with left neighbor. */
1789 if (tb->lnum[0] >= vn->vn_nr_item && tb->lbytes == -1)
1790 if (is_left_neighbor_in_cache(tb, h) || ((tb->rnum[0] - ((tb->rbytes == -1) ? 0 : 1)) < vn->vn_nr_item) || /* S can not be merged with R */
1791 !tb->FR[h]) {
1792
1793 RFALSE(!tb->FL[h],
1794 "vs-8245: dc_check_balance_leaf: FL[h] must exist");
1795
1796 /* set parameter to merge S[0] with its left neighbor */
1797 set_parameters(tb, h, -1, 0, 0, NULL, -1, -1);
1798 return CARRY_ON;
1799 }
1800
1801 /* check whether we can merge S[0] with right neighbor. */
1802 if (tb->rnum[0] >= vn->vn_nr_item && tb->rbytes == -1) {
1803 set_parameters(tb, h, 0, -1, 0, NULL, -1, -1);
1804 return CARRY_ON;
1805 }
1806
1807 /* All contents of S[0] can be moved to the neighbors (L[0] & R[0]). Set parameters and return */
1808 if (is_leaf_removable(tb))
1809 return CARRY_ON;
1810
1811 /* Balancing is not required. */
1812 tb->s0num = vn->vn_nr_item;
1813 set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1814 return NO_BALANCING_NEEDED;
1815}
1764 1816
1765/* Check whether current node S[h] is balanced when Decreasing its size by 1817/* Check whether current node S[h] is balanced when Decreasing its size by
1766 * Deleting or Cutting. 1818 * Deleting or Cutting.
@@ -1775,18 +1827,17 @@ static int dc_check_balance_leaf (struct tree_balance * tb, int h)
1775 * -1 - no balancing for higher levels needed; 1827 * -1 - no balancing for higher levels needed;
1776 * -2 - no disk space. 1828 * -2 - no disk space.
1777 */ 1829 */
1778static int dc_check_balance (struct tree_balance * tb, int h) 1830static int dc_check_balance(struct tree_balance *tb, int h)
1779{ 1831{
1780 RFALSE( ! (PATH_H_PBUFFER (tb->tb_path, h)), "vs-8250: S is not initialized"); 1832 RFALSE(!(PATH_H_PBUFFER(tb->tb_path, h)),
1833 "vs-8250: S is not initialized");
1781 1834
1782 if ( h ) 1835 if (h)
1783 return dc_check_balance_internal (tb, h); 1836 return dc_check_balance_internal(tb, h);
1784 else 1837 else
1785 return dc_check_balance_leaf (tb, h); 1838 return dc_check_balance_leaf(tb, h);
1786} 1839}
1787 1840
1788
1789
1790/* Check whether current node S[h] is balanced. 1841/* Check whether current node S[h] is balanced.
1791 * Calculate parameters for balancing for current level h. 1842 * Calculate parameters for balancing for current level h.
1792 * Parameters: 1843 * Parameters:
@@ -1805,83 +1856,80 @@ static int dc_check_balance (struct tree_balance * tb, int h)
1805 * -1 - no balancing for higher levels needed; 1856 * -1 - no balancing for higher levels needed;
1806 * -2 - no disk space. 1857 * -2 - no disk space.
1807 */ 1858 */
1808static int check_balance (int mode, 1859static int check_balance(int mode,
1809 struct tree_balance * tb, 1860 struct tree_balance *tb,
1810 int h, 1861 int h,
1811 int inum, 1862 int inum,
1812 int pos_in_item, 1863 int pos_in_item,
1813 struct item_head * ins_ih, 1864 struct item_head *ins_ih, const void *data)
1814 const void * data
1815 )
1816{ 1865{
1817 struct virtual_node * vn; 1866 struct virtual_node *vn;
1818 1867
1819 vn = tb->tb_vn = (struct virtual_node *)(tb->vn_buf); 1868 vn = tb->tb_vn = (struct virtual_node *)(tb->vn_buf);
1820 vn->vn_free_ptr = (char *)(tb->tb_vn + 1); 1869 vn->vn_free_ptr = (char *)(tb->tb_vn + 1);
1821 vn->vn_mode = mode; 1870 vn->vn_mode = mode;
1822 vn->vn_affected_item_num = inum; 1871 vn->vn_affected_item_num = inum;
1823 vn->vn_pos_in_item = pos_in_item; 1872 vn->vn_pos_in_item = pos_in_item;
1824 vn->vn_ins_ih = ins_ih; 1873 vn->vn_ins_ih = ins_ih;
1825 vn->vn_data = data; 1874 vn->vn_data = data;
1826 1875
1827 RFALSE( mode == M_INSERT && !vn->vn_ins_ih, 1876 RFALSE(mode == M_INSERT && !vn->vn_ins_ih,
1828 "vs-8255: ins_ih can not be 0 in insert mode"); 1877 "vs-8255: ins_ih can not be 0 in insert mode");
1829 1878
1830 if ( tb->insert_size[h] > 0 ) 1879 if (tb->insert_size[h] > 0)
1831 /* Calculate balance parameters when size of node is increasing. */ 1880 /* Calculate balance parameters when size of node is increasing. */
1832 return ip_check_balance (tb, h); 1881 return ip_check_balance(tb, h);
1833 1882
1834 /* Calculate balance parameters when size of node is decreasing. */ 1883 /* Calculate balance parameters when size of node is decreasing. */
1835 return dc_check_balance (tb, h); 1884 return dc_check_balance(tb, h);
1836} 1885}
1837 1886
1887/* Check whether parent at the path is the really parent of the current node.*/
1888static int get_direct_parent(struct tree_balance *p_s_tb, int n_h)
1889{
1890 struct buffer_head *p_s_bh;
1891 struct path *p_s_path = p_s_tb->tb_path;
1892 int n_position,
1893 n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h);
1894
1895 /* We are in the root or in the new root. */
1896 if (n_path_offset <= FIRST_PATH_ELEMENT_OFFSET) {
1897
1898 RFALSE(n_path_offset < FIRST_PATH_ELEMENT_OFFSET - 1,
1899 "PAP-8260: invalid offset in the path");
1900
1901 if (PATH_OFFSET_PBUFFER(p_s_path, FIRST_PATH_ELEMENT_OFFSET)->
1902 b_blocknr == SB_ROOT_BLOCK(p_s_tb->tb_sb)) {
1903 /* Root is not changed. */
1904 PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1) = NULL;
1905 PATH_OFFSET_POSITION(p_s_path, n_path_offset - 1) = 0;
1906 return CARRY_ON;
1907 }
1908 return REPEAT_SEARCH; /* Root is changed and we must recalculate the path. */
1909 }
1910
1911 if (!B_IS_IN_TREE
1912 (p_s_bh = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1)))
1913 return REPEAT_SEARCH; /* Parent in the path is not in the tree. */
1838 1914
1915 if ((n_position =
1916 PATH_OFFSET_POSITION(p_s_path,
1917 n_path_offset - 1)) > B_NR_ITEMS(p_s_bh))
1918 return REPEAT_SEARCH;
1839 1919
1840/* Check whether parent at the path is the really parent of the current node.*/ 1920 if (B_N_CHILD_NUM(p_s_bh, n_position) !=
1841static int get_direct_parent( 1921 PATH_OFFSET_PBUFFER(p_s_path, n_path_offset)->b_blocknr)
1842 struct tree_balance * p_s_tb, 1922 /* Parent in the path is not parent of the current node in the tree. */
1843 int n_h 1923 return REPEAT_SEARCH;
1844 ) { 1924
1845 struct buffer_head * p_s_bh; 1925 if (buffer_locked(p_s_bh)) {
1846 struct path * p_s_path = p_s_tb->tb_path; 1926 __wait_on_buffer(p_s_bh);
1847 int n_position, 1927 if (FILESYSTEM_CHANGED_TB(p_s_tb))
1848 n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h); 1928 return REPEAT_SEARCH;
1849
1850 /* We are in the root or in the new root. */
1851 if ( n_path_offset <= FIRST_PATH_ELEMENT_OFFSET ) {
1852
1853 RFALSE( n_path_offset < FIRST_PATH_ELEMENT_OFFSET - 1,
1854 "PAP-8260: invalid offset in the path");
1855
1856 if ( PATH_OFFSET_PBUFFER(p_s_path, FIRST_PATH_ELEMENT_OFFSET)->b_blocknr ==
1857 SB_ROOT_BLOCK (p_s_tb->tb_sb) ) {
1858 /* Root is not changed. */
1859 PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1) = NULL;
1860 PATH_OFFSET_POSITION(p_s_path, n_path_offset - 1) = 0;
1861 return CARRY_ON;
1862 } 1929 }
1863 return REPEAT_SEARCH; /* Root is changed and we must recalculate the path. */
1864 }
1865
1866 if ( ! B_IS_IN_TREE(p_s_bh = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1)) )
1867 return REPEAT_SEARCH; /* Parent in the path is not in the tree. */
1868
1869 if ( (n_position = PATH_OFFSET_POSITION(p_s_path, n_path_offset - 1)) > B_NR_ITEMS(p_s_bh) )
1870 return REPEAT_SEARCH;
1871
1872 if ( B_N_CHILD_NUM(p_s_bh, n_position) != PATH_OFFSET_PBUFFER(p_s_path, n_path_offset)->b_blocknr )
1873 /* Parent in the path is not parent of the current node in the tree. */
1874 return REPEAT_SEARCH;
1875
1876 if ( buffer_locked(p_s_bh) ) {
1877 __wait_on_buffer(p_s_bh);
1878 if ( FILESYSTEM_CHANGED_TB (p_s_tb) )
1879 return REPEAT_SEARCH;
1880 }
1881
1882 return CARRY_ON; /* Parent in the path is unlocked and really parent of the current node. */
1883}
1884 1930
1931 return CARRY_ON; /* Parent in the path is unlocked and really parent of the current node. */
1932}
1885 1933
1886/* Using lnum[n_h] and rnum[n_h] we should determine what neighbors 1934/* Using lnum[n_h] and rnum[n_h] we should determine what neighbors
1887 * of S[n_h] we 1935 * of S[n_h] we
@@ -1889,356 +1937,401 @@ static int get_direct_parent(
1889 * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked; 1937 * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked;
1890 * CARRY_ON - schedule didn't occur while the function worked; 1938 * CARRY_ON - schedule didn't occur while the function worked;
1891 */ 1939 */
1892static int get_neighbors( 1940static int get_neighbors(struct tree_balance *p_s_tb, int n_h)
1893 struct tree_balance * p_s_tb, 1941{
1894 int n_h 1942 int n_child_position,
1895 ) { 1943 n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h + 1);
1896 int n_child_position, 1944 unsigned long n_son_number;
1897 n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h + 1); 1945 struct super_block *p_s_sb = p_s_tb->tb_sb;
1898 unsigned long n_son_number; 1946 struct buffer_head *p_s_bh;
1899 struct super_block * p_s_sb = p_s_tb->tb_sb; 1947
1900 struct buffer_head * p_s_bh; 1948 PROC_INFO_INC(p_s_sb, get_neighbors[n_h]);
1901 1949
1902 1950 if (p_s_tb->lnum[n_h]) {
1903 PROC_INFO_INC( p_s_sb, get_neighbors[ n_h ] ); 1951 /* We need left neighbor to balance S[n_h]. */
1904 1952 PROC_INFO_INC(p_s_sb, need_l_neighbor[n_h]);
1905 if ( p_s_tb->lnum[n_h] ) { 1953 p_s_bh = PATH_OFFSET_PBUFFER(p_s_tb->tb_path, n_path_offset);
1906 /* We need left neighbor to balance S[n_h]. */ 1954
1907 PROC_INFO_INC( p_s_sb, need_l_neighbor[ n_h ] ); 1955 RFALSE(p_s_bh == p_s_tb->FL[n_h] &&
1908 p_s_bh = PATH_OFFSET_PBUFFER(p_s_tb->tb_path, n_path_offset); 1956 !PATH_OFFSET_POSITION(p_s_tb->tb_path, n_path_offset),
1909 1957 "PAP-8270: invalid position in the parent");
1910 RFALSE( p_s_bh == p_s_tb->FL[n_h] && 1958
1911 ! PATH_OFFSET_POSITION(p_s_tb->tb_path, n_path_offset), 1959 n_child_position =
1912 "PAP-8270: invalid position in the parent"); 1960 (p_s_bh ==
1913 1961 p_s_tb->FL[n_h]) ? p_s_tb->lkey[n_h] : B_NR_ITEMS(p_s_tb->
1914 n_child_position = ( p_s_bh == p_s_tb->FL[n_h] ) ? p_s_tb->lkey[n_h] : B_NR_ITEMS (p_s_tb->FL[n_h]); 1962 FL[n_h]);
1915 n_son_number = B_N_CHILD_NUM(p_s_tb->FL[n_h], n_child_position); 1963 n_son_number = B_N_CHILD_NUM(p_s_tb->FL[n_h], n_child_position);
1916 p_s_bh = sb_bread(p_s_sb, n_son_number); 1964 p_s_bh = sb_bread(p_s_sb, n_son_number);
1917 if (!p_s_bh) 1965 if (!p_s_bh)
1918 return IO_ERROR; 1966 return IO_ERROR;
1919 if ( FILESYSTEM_CHANGED_TB (p_s_tb) ) { 1967 if (FILESYSTEM_CHANGED_TB(p_s_tb)) {
1920 decrement_bcount(p_s_bh); 1968 decrement_bcount(p_s_bh);
1921 PROC_INFO_INC( p_s_sb, get_neighbors_restart[ n_h ] ); 1969 PROC_INFO_INC(p_s_sb, get_neighbors_restart[n_h]);
1922 return REPEAT_SEARCH; 1970 return REPEAT_SEARCH;
1971 }
1972
1973 RFALSE(!B_IS_IN_TREE(p_s_tb->FL[n_h]) ||
1974 n_child_position > B_NR_ITEMS(p_s_tb->FL[n_h]) ||
1975 B_N_CHILD_NUM(p_s_tb->FL[n_h], n_child_position) !=
1976 p_s_bh->b_blocknr, "PAP-8275: invalid parent");
1977 RFALSE(!B_IS_IN_TREE(p_s_bh), "PAP-8280: invalid child");
1978 RFALSE(!n_h &&
1979 B_FREE_SPACE(p_s_bh) !=
1980 MAX_CHILD_SIZE(p_s_bh) -
1981 dc_size(B_N_CHILD(p_s_tb->FL[0], n_child_position)),
1982 "PAP-8290: invalid child size of left neighbor");
1983
1984 decrement_bcount(p_s_tb->L[n_h]);
1985 p_s_tb->L[n_h] = p_s_bh;
1923 } 1986 }
1924 1987
1925 RFALSE( ! B_IS_IN_TREE(p_s_tb->FL[n_h]) || 1988 if (p_s_tb->rnum[n_h]) { /* We need right neighbor to balance S[n_path_offset]. */
1926 n_child_position > B_NR_ITEMS(p_s_tb->FL[n_h]) || 1989 PROC_INFO_INC(p_s_sb, need_r_neighbor[n_h]);
1927 B_N_CHILD_NUM(p_s_tb->FL[n_h], n_child_position) != 1990 p_s_bh = PATH_OFFSET_PBUFFER(p_s_tb->tb_path, n_path_offset);
1928 p_s_bh->b_blocknr, "PAP-8275: invalid parent"); 1991
1929 RFALSE( ! B_IS_IN_TREE(p_s_bh), "PAP-8280: invalid child"); 1992 RFALSE(p_s_bh == p_s_tb->FR[n_h] &&
1930 RFALSE( ! n_h && 1993 PATH_OFFSET_POSITION(p_s_tb->tb_path,
1931 B_FREE_SPACE (p_s_bh) != MAX_CHILD_SIZE (p_s_bh) - dc_size(B_N_CHILD (p_s_tb->FL[0],n_child_position)), 1994 n_path_offset) >=
1932 "PAP-8290: invalid child size of left neighbor"); 1995 B_NR_ITEMS(p_s_bh),
1933 1996 "PAP-8295: invalid position in the parent");
1934 decrement_bcount(p_s_tb->L[n_h]); 1997
1935 p_s_tb->L[n_h] = p_s_bh; 1998 n_child_position =
1936 } 1999 (p_s_bh == p_s_tb->FR[n_h]) ? p_s_tb->rkey[n_h] + 1 : 0;
1937 2000 n_son_number = B_N_CHILD_NUM(p_s_tb->FR[n_h], n_child_position);
1938 2001 p_s_bh = sb_bread(p_s_sb, n_son_number);
1939 if ( p_s_tb->rnum[n_h] ) { /* We need right neighbor to balance S[n_path_offset]. */ 2002 if (!p_s_bh)
1940 PROC_INFO_INC( p_s_sb, need_r_neighbor[ n_h ] ); 2003 return IO_ERROR;
1941 p_s_bh = PATH_OFFSET_PBUFFER(p_s_tb->tb_path, n_path_offset); 2004 if (FILESYSTEM_CHANGED_TB(p_s_tb)) {
1942 2005 decrement_bcount(p_s_bh);
1943 RFALSE( p_s_bh == p_s_tb->FR[n_h] && 2006 PROC_INFO_INC(p_s_sb, get_neighbors_restart[n_h]);
1944 PATH_OFFSET_POSITION(p_s_tb->tb_path, n_path_offset) >= B_NR_ITEMS(p_s_bh), 2007 return REPEAT_SEARCH;
1945 "PAP-8295: invalid position in the parent"); 2008 }
1946 2009 decrement_bcount(p_s_tb->R[n_h]);
1947 n_child_position = ( p_s_bh == p_s_tb->FR[n_h] ) ? p_s_tb->rkey[n_h] + 1 : 0; 2010 p_s_tb->R[n_h] = p_s_bh;
1948 n_son_number = B_N_CHILD_NUM(p_s_tb->FR[n_h], n_child_position); 2011
1949 p_s_bh = sb_bread(p_s_sb, n_son_number); 2012 RFALSE(!n_h
1950 if (!p_s_bh) 2013 && B_FREE_SPACE(p_s_bh) !=
1951 return IO_ERROR; 2014 MAX_CHILD_SIZE(p_s_bh) -
1952 if ( FILESYSTEM_CHANGED_TB (p_s_tb) ) { 2015 dc_size(B_N_CHILD(p_s_tb->FR[0], n_child_position)),
1953 decrement_bcount(p_s_bh); 2016 "PAP-8300: invalid child size of right neighbor (%d != %d - %d)",
1954 PROC_INFO_INC( p_s_sb, get_neighbors_restart[ n_h ] ); 2017 B_FREE_SPACE(p_s_bh), MAX_CHILD_SIZE(p_s_bh),
1955 return REPEAT_SEARCH; 2018 dc_size(B_N_CHILD(p_s_tb->FR[0], n_child_position)));
2019
1956 } 2020 }
1957 decrement_bcount(p_s_tb->R[n_h]); 2021 return CARRY_ON;
1958 p_s_tb->R[n_h] = p_s_bh;
1959
1960 RFALSE( ! n_h && B_FREE_SPACE (p_s_bh) != MAX_CHILD_SIZE (p_s_bh) - dc_size(B_N_CHILD (p_s_tb->FR[0],n_child_position)),
1961 "PAP-8300: invalid child size of right neighbor (%d != %d - %d)",
1962 B_FREE_SPACE (p_s_bh), MAX_CHILD_SIZE (p_s_bh),
1963 dc_size(B_N_CHILD (p_s_tb->FR[0],n_child_position)));
1964
1965 }
1966 return CARRY_ON;
1967} 2022}
1968 2023
1969#ifdef CONFIG_REISERFS_CHECK 2024#ifdef CONFIG_REISERFS_CHECK
1970void * reiserfs_kmalloc (size_t size, int flags, struct super_block * s) 2025void *reiserfs_kmalloc(size_t size, int flags, struct super_block *s)
1971{ 2026{
1972 void * vp; 2027 void *vp;
1973 static size_t malloced; 2028 static size_t malloced;
1974 2029
1975 2030 vp = kmalloc(size, flags);
1976 vp = kmalloc (size, flags); 2031 if (vp) {
1977 if (vp) { 2032 REISERFS_SB(s)->s_kmallocs += size;
1978 REISERFS_SB(s)->s_kmallocs += size; 2033 if (REISERFS_SB(s)->s_kmallocs > malloced + 200000) {
1979 if (REISERFS_SB(s)->s_kmallocs > malloced + 200000) { 2034 reiserfs_warning(s,
1980 reiserfs_warning (s, 2035 "vs-8301: reiserfs_kmalloc: allocated memory %d",
1981 "vs-8301: reiserfs_kmalloc: allocated memory %d", 2036 REISERFS_SB(s)->s_kmallocs);
1982 REISERFS_SB(s)->s_kmallocs); 2037 malloced = REISERFS_SB(s)->s_kmallocs;
1983 malloced = REISERFS_SB(s)->s_kmallocs; 2038 }
1984 } 2039 }
1985 } 2040 return vp;
1986 return vp;
1987} 2041}
1988 2042
1989void reiserfs_kfree (const void * vp, size_t size, struct super_block * s) 2043void reiserfs_kfree(const void *vp, size_t size, struct super_block *s)
1990{ 2044{
1991 kfree (vp); 2045 kfree(vp);
1992 2046
1993 REISERFS_SB(s)->s_kmallocs -= size; 2047 REISERFS_SB(s)->s_kmallocs -= size;
1994 if (REISERFS_SB(s)->s_kmallocs < 0) 2048 if (REISERFS_SB(s)->s_kmallocs < 0)
1995 reiserfs_warning (s, "vs-8302: reiserfs_kfree: allocated memory %d", 2049 reiserfs_warning(s,
1996 REISERFS_SB(s)->s_kmallocs); 2050 "vs-8302: reiserfs_kfree: allocated memory %d",
2051 REISERFS_SB(s)->s_kmallocs);
1997 2052
1998} 2053}
1999#endif 2054#endif
2000 2055
2001 2056static int get_virtual_node_size(struct super_block *sb, struct buffer_head *bh)
2002static int get_virtual_node_size (struct super_block * sb, struct buffer_head * bh)
2003{ 2057{
2004 int max_num_of_items; 2058 int max_num_of_items;
2005 int max_num_of_entries; 2059 int max_num_of_entries;
2006 unsigned long blocksize = sb->s_blocksize; 2060 unsigned long blocksize = sb->s_blocksize;
2007 2061
2008#define MIN_NAME_LEN 1 2062#define MIN_NAME_LEN 1
2009 2063
2010 max_num_of_items = (blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN); 2064 max_num_of_items = (blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN);
2011 max_num_of_entries = (blocksize - BLKH_SIZE - IH_SIZE) / 2065 max_num_of_entries = (blocksize - BLKH_SIZE - IH_SIZE) /
2012 (DEH_SIZE + MIN_NAME_LEN); 2066 (DEH_SIZE + MIN_NAME_LEN);
2013 2067
2014 return sizeof(struct virtual_node) + 2068 return sizeof(struct virtual_node) +
2015 max(max_num_of_items * sizeof (struct virtual_item), 2069 max(max_num_of_items * sizeof(struct virtual_item),
2016 sizeof (struct virtual_item) + sizeof(struct direntry_uarea) + 2070 sizeof(struct virtual_item) + sizeof(struct direntry_uarea) +
2017 (max_num_of_entries - 1) * sizeof (__u16)); 2071 (max_num_of_entries - 1) * sizeof(__u16));
2018} 2072}
2019 2073
2020
2021
2022/* maybe we should fail balancing we are going to perform when kmalloc 2074/* maybe we should fail balancing we are going to perform when kmalloc
2023 fails several times. But now it will loop until kmalloc gets 2075 fails several times. But now it will loop until kmalloc gets
2024 required memory */ 2076 required memory */
2025static int get_mem_for_virtual_node (struct tree_balance * tb) 2077static int get_mem_for_virtual_node(struct tree_balance *tb)
2026{ 2078{
2027 int check_fs = 0; 2079 int check_fs = 0;
2028 int size; 2080 int size;
2029 char * buf; 2081 char *buf;
2030 2082
2031 size = get_virtual_node_size (tb->tb_sb, PATH_PLAST_BUFFER (tb->tb_path)); 2083 size = get_virtual_node_size(tb->tb_sb, PATH_PLAST_BUFFER(tb->tb_path));
2032 2084
2033 if (size > tb->vn_buf_size) { 2085 if (size > tb->vn_buf_size) {
2034 /* we have to allocate more memory for virtual node */ 2086 /* we have to allocate more memory for virtual node */
2035 if (tb->vn_buf) { 2087 if (tb->vn_buf) {
2036 /* free memory allocated before */ 2088 /* free memory allocated before */
2037 reiserfs_kfree (tb->vn_buf, tb->vn_buf_size, tb->tb_sb); 2089 reiserfs_kfree(tb->vn_buf, tb->vn_buf_size, tb->tb_sb);
2038 /* this is not needed if kfree is atomic */ 2090 /* this is not needed if kfree is atomic */
2039 check_fs = 1; 2091 check_fs = 1;
2040 } 2092 }
2041 2093
2042 /* virtual node requires now more memory */ 2094 /* virtual node requires now more memory */
2043 tb->vn_buf_size = size; 2095 tb->vn_buf_size = size;
2044 2096
2045 /* get memory for virtual item */ 2097 /* get memory for virtual item */
2046 buf = reiserfs_kmalloc(size, GFP_ATOMIC | __GFP_NOWARN, tb->tb_sb); 2098 buf =
2047 if ( ! buf ) { 2099 reiserfs_kmalloc(size, GFP_ATOMIC | __GFP_NOWARN,
2048 /* getting memory with GFP_KERNEL priority may involve 2100 tb->tb_sb);
2049 balancing now (due to indirect_to_direct conversion on 2101 if (!buf) {
2050 dcache shrinking). So, release path and collected 2102 /* getting memory with GFP_KERNEL priority may involve
2051 resources here */ 2103 balancing now (due to indirect_to_direct conversion on
2052 free_buffers_in_tb (tb); 2104 dcache shrinking). So, release path and collected
2053 buf = reiserfs_kmalloc(size, GFP_NOFS, tb->tb_sb); 2105 resources here */
2054 if ( !buf ) { 2106 free_buffers_in_tb(tb);
2107 buf = reiserfs_kmalloc(size, GFP_NOFS, tb->tb_sb);
2108 if (!buf) {
2055#ifdef CONFIG_REISERFS_CHECK 2109#ifdef CONFIG_REISERFS_CHECK
2056 reiserfs_warning (tb->tb_sb, 2110 reiserfs_warning(tb->tb_sb,
2057 "vs-8345: get_mem_for_virtual_node: " 2111 "vs-8345: get_mem_for_virtual_node: "
2058 "kmalloc failed. reiserfs kmalloced %d bytes", 2112 "kmalloc failed. reiserfs kmalloced %d bytes",
2059 REISERFS_SB(tb->tb_sb)->s_kmallocs); 2113 REISERFS_SB(tb->tb_sb)->
2114 s_kmallocs);
2060#endif 2115#endif
2061 tb->vn_buf_size = 0; 2116 tb->vn_buf_size = 0;
2062 } 2117 }
2063 tb->vn_buf = buf; 2118 tb->vn_buf = buf;
2064 schedule() ; 2119 schedule();
2065 return REPEAT_SEARCH; 2120 return REPEAT_SEARCH;
2066 } 2121 }
2067 2122
2068 tb->vn_buf = buf; 2123 tb->vn_buf = buf;
2069 } 2124 }
2070 2125
2071 if ( check_fs && FILESYSTEM_CHANGED_TB (tb) ) 2126 if (check_fs && FILESYSTEM_CHANGED_TB(tb))
2072 return REPEAT_SEARCH; 2127 return REPEAT_SEARCH;
2073 2128
2074 return CARRY_ON; 2129 return CARRY_ON;
2075} 2130}
2076 2131
2077
2078#ifdef CONFIG_REISERFS_CHECK 2132#ifdef CONFIG_REISERFS_CHECK
2079static void tb_buffer_sanity_check (struct super_block * p_s_sb, 2133static void tb_buffer_sanity_check(struct super_block *p_s_sb,
2080 struct buffer_head * p_s_bh, 2134 struct buffer_head *p_s_bh,
2081 const char *descr, int level) { 2135 const char *descr, int level)
2082 if (p_s_bh) {
2083 if (atomic_read (&(p_s_bh->b_count)) <= 0) {
2084
2085 reiserfs_panic (p_s_sb, "jmacd-1: tb_buffer_sanity_check(): negative or zero reference counter for buffer %s[%d] (%b)\n", descr, level, p_s_bh);
2086 }
2087
2088 if ( ! buffer_uptodate (p_s_bh) ) {
2089 reiserfs_panic (p_s_sb, "jmacd-2: tb_buffer_sanity_check(): buffer is not up to date %s[%d] (%b)\n", descr, level, p_s_bh);
2090 }
2091
2092 if ( ! B_IS_IN_TREE (p_s_bh) ) {
2093 reiserfs_panic (p_s_sb, "jmacd-3: tb_buffer_sanity_check(): buffer is not in tree %s[%d] (%b)\n", descr, level, p_s_bh);
2094 }
2095
2096 if (p_s_bh->b_bdev != p_s_sb->s_bdev) {
2097 reiserfs_panic (p_s_sb, "jmacd-4: tb_buffer_sanity_check(): buffer has wrong device %s[%d] (%b)\n", descr, level, p_s_bh);
2098 }
2099
2100 if (p_s_bh->b_size != p_s_sb->s_blocksize) {
2101 reiserfs_panic (p_s_sb, "jmacd-5: tb_buffer_sanity_check(): buffer has wrong blocksize %s[%d] (%b)\n", descr, level, p_s_bh);
2102 }
2103
2104 if (p_s_bh->b_blocknr > SB_BLOCK_COUNT(p_s_sb)) {
2105 reiserfs_panic (p_s_sb, "jmacd-6: tb_buffer_sanity_check(): buffer block number too high %s[%d] (%b)\n", descr, level, p_s_bh);
2106 }
2107 }
2108}
2109#else
2110static void tb_buffer_sanity_check (struct super_block * p_s_sb,
2111 struct buffer_head * p_s_bh,
2112 const char *descr, int level)
2113{;}
2114#endif
2115
2116static int clear_all_dirty_bits(struct super_block *s,
2117 struct buffer_head *bh) {
2118 return reiserfs_prepare_for_journal(s, bh, 0) ;
2119}
2120
2121static int wait_tb_buffers_until_unlocked (struct tree_balance * p_s_tb)
2122{ 2136{
2123 struct buffer_head * locked; 2137 if (p_s_bh) {
2124#ifdef CONFIG_REISERFS_CHECK 2138 if (atomic_read(&(p_s_bh->b_count)) <= 0) {
2125 int repeat_counter = 0;
2126#endif
2127 int i;
2128 2139
2129 do { 2140 reiserfs_panic(p_s_sb,
2130 2141 "jmacd-1: tb_buffer_sanity_check(): negative or zero reference counter for buffer %s[%d] (%b)\n",
2131 locked = NULL; 2142 descr, level, p_s_bh);
2132
2133 for ( i = p_s_tb->tb_path->path_length; !locked && i > ILLEGAL_PATH_ELEMENT_OFFSET; i-- ) {
2134 if ( PATH_OFFSET_PBUFFER (p_s_tb->tb_path, i) ) {
2135 /* if I understand correctly, we can only be sure the last buffer
2136 ** in the path is in the tree --clm
2137 */
2138#ifdef CONFIG_REISERFS_CHECK
2139 if (PATH_PLAST_BUFFER(p_s_tb->tb_path) ==
2140 PATH_OFFSET_PBUFFER(p_s_tb->tb_path, i)) {
2141 tb_buffer_sanity_check (p_s_tb->tb_sb,
2142 PATH_OFFSET_PBUFFER (p_s_tb->tb_path, i),
2143 "S",
2144 p_s_tb->tb_path->path_length - i);
2145 } 2143 }
2146#endif
2147 if (!clear_all_dirty_bits(p_s_tb->tb_sb,
2148 PATH_OFFSET_PBUFFER (p_s_tb->tb_path, i)))
2149 {
2150 locked = PATH_OFFSET_PBUFFER (p_s_tb->tb_path, i);
2151 }
2152 }
2153 }
2154 2144
2155 for ( i = 0; !locked && i < MAX_HEIGHT && p_s_tb->insert_size[i]; i++ ) { 2145 if (!buffer_uptodate(p_s_bh)) {
2146 reiserfs_panic(p_s_sb,
2147 "jmacd-2: tb_buffer_sanity_check(): buffer is not up to date %s[%d] (%b)\n",
2148 descr, level, p_s_bh);
2149 }
2156 2150
2157 if (p_s_tb->lnum[i] ) { 2151 if (!B_IS_IN_TREE(p_s_bh)) {
2152 reiserfs_panic(p_s_sb,
2153 "jmacd-3: tb_buffer_sanity_check(): buffer is not in tree %s[%d] (%b)\n",
2154 descr, level, p_s_bh);
2155 }
2158 2156
2159 if ( p_s_tb->L[i] ) { 2157 if (p_s_bh->b_bdev != p_s_sb->s_bdev) {
2160 tb_buffer_sanity_check (p_s_tb->tb_sb, p_s_tb->L[i], "L", i); 2158 reiserfs_panic(p_s_sb,
2161 if (!clear_all_dirty_bits(p_s_tb->tb_sb, p_s_tb->L[i])) 2159 "jmacd-4: tb_buffer_sanity_check(): buffer has wrong device %s[%d] (%b)\n",
2162 locked = p_s_tb->L[i]; 2160 descr, level, p_s_bh);
2163 } 2161 }
2164 2162
2165 if ( !locked && p_s_tb->FL[i] ) { 2163 if (p_s_bh->b_size != p_s_sb->s_blocksize) {
2166 tb_buffer_sanity_check (p_s_tb->tb_sb, p_s_tb->FL[i], "FL", i); 2164 reiserfs_panic(p_s_sb,
2167 if (!clear_all_dirty_bits(p_s_tb->tb_sb, p_s_tb->FL[i])) 2165 "jmacd-5: tb_buffer_sanity_check(): buffer has wrong blocksize %s[%d] (%b)\n",
2168 locked = p_s_tb->FL[i]; 2166 descr, level, p_s_bh);
2169 } 2167 }
2170 2168
2171 if ( !locked && p_s_tb->CFL[i] ) { 2169 if (p_s_bh->b_blocknr > SB_BLOCK_COUNT(p_s_sb)) {
2172 tb_buffer_sanity_check (p_s_tb->tb_sb, p_s_tb->CFL[i], "CFL", i); 2170 reiserfs_panic(p_s_sb,
2173 if (!clear_all_dirty_bits(p_s_tb->tb_sb, p_s_tb->CFL[i])) 2171 "jmacd-6: tb_buffer_sanity_check(): buffer block number too high %s[%d] (%b)\n",
2174 locked = p_s_tb->CFL[i]; 2172 descr, level, p_s_bh);
2175 } 2173 }
2174 }
2175}
2176#else
2177static void tb_buffer_sanity_check(struct super_block *p_s_sb,
2178 struct buffer_head *p_s_bh,
2179 const char *descr, int level)
2180{;
2181}
2182#endif
2176 2183
2177 } 2184static int clear_all_dirty_bits(struct super_block *s, struct buffer_head *bh)
2185{
2186 return reiserfs_prepare_for_journal(s, bh, 0);
2187}
2178 2188
2179 if ( !locked && (p_s_tb->rnum[i]) ) { 2189static int wait_tb_buffers_until_unlocked(struct tree_balance *p_s_tb)
2190{
2191 struct buffer_head *locked;
2192#ifdef CONFIG_REISERFS_CHECK
2193 int repeat_counter = 0;
2194#endif
2195 int i;
2180 2196
2181 if ( p_s_tb->R[i] ) { 2197 do {
2182 tb_buffer_sanity_check (p_s_tb->tb_sb, p_s_tb->R[i], "R", i);
2183 if (!clear_all_dirty_bits(p_s_tb->tb_sb, p_s_tb->R[i]))
2184 locked = p_s_tb->R[i];
2185 }
2186 2198
2187 2199 locked = NULL;
2188 if ( !locked && p_s_tb->FR[i] ) { 2200
2189 tb_buffer_sanity_check (p_s_tb->tb_sb, p_s_tb->FR[i], "FR", i); 2201 for (i = p_s_tb->tb_path->path_length;
2190 if (!clear_all_dirty_bits(p_s_tb->tb_sb, p_s_tb->FR[i])) 2202 !locked && i > ILLEGAL_PATH_ELEMENT_OFFSET; i--) {
2191 locked = p_s_tb->FR[i]; 2203 if (PATH_OFFSET_PBUFFER(p_s_tb->tb_path, i)) {
2204 /* if I understand correctly, we can only be sure the last buffer
2205 ** in the path is in the tree --clm
2206 */
2207#ifdef CONFIG_REISERFS_CHECK
2208 if (PATH_PLAST_BUFFER(p_s_tb->tb_path) ==
2209 PATH_OFFSET_PBUFFER(p_s_tb->tb_path, i)) {
2210 tb_buffer_sanity_check(p_s_tb->tb_sb,
2211 PATH_OFFSET_PBUFFER
2212 (p_s_tb->tb_path,
2213 i), "S",
2214 p_s_tb->tb_path->
2215 path_length - i);
2216 }
2217#endif
2218 if (!clear_all_dirty_bits(p_s_tb->tb_sb,
2219 PATH_OFFSET_PBUFFER
2220 (p_s_tb->tb_path,
2221 i))) {
2222 locked =
2223 PATH_OFFSET_PBUFFER(p_s_tb->tb_path,
2224 i);
2225 }
2226 }
2192 } 2227 }
2193 2228
2194 if ( !locked && p_s_tb->CFR[i] ) { 2229 for (i = 0; !locked && i < MAX_HEIGHT && p_s_tb->insert_size[i];
2195 tb_buffer_sanity_check (p_s_tb->tb_sb, p_s_tb->CFR[i], "CFR", i); 2230 i++) {
2196 if (!clear_all_dirty_bits(p_s_tb->tb_sb, p_s_tb->CFR[i])) 2231
2197 locked = p_s_tb->CFR[i]; 2232 if (p_s_tb->lnum[i]) {
2233
2234 if (p_s_tb->L[i]) {
2235 tb_buffer_sanity_check(p_s_tb->tb_sb,
2236 p_s_tb->L[i],
2237 "L", i);
2238 if (!clear_all_dirty_bits
2239 (p_s_tb->tb_sb, p_s_tb->L[i]))
2240 locked = p_s_tb->L[i];
2241 }
2242
2243 if (!locked && p_s_tb->FL[i]) {
2244 tb_buffer_sanity_check(p_s_tb->tb_sb,
2245 p_s_tb->FL[i],
2246 "FL", i);
2247 if (!clear_all_dirty_bits
2248 (p_s_tb->tb_sb, p_s_tb->FL[i]))
2249 locked = p_s_tb->FL[i];
2250 }
2251
2252 if (!locked && p_s_tb->CFL[i]) {
2253 tb_buffer_sanity_check(p_s_tb->tb_sb,
2254 p_s_tb->CFL[i],
2255 "CFL", i);
2256 if (!clear_all_dirty_bits
2257 (p_s_tb->tb_sb, p_s_tb->CFL[i]))
2258 locked = p_s_tb->CFL[i];
2259 }
2260
2261 }
2262
2263 if (!locked && (p_s_tb->rnum[i])) {
2264
2265 if (p_s_tb->R[i]) {
2266 tb_buffer_sanity_check(p_s_tb->tb_sb,
2267 p_s_tb->R[i],
2268 "R", i);
2269 if (!clear_all_dirty_bits
2270 (p_s_tb->tb_sb, p_s_tb->R[i]))
2271 locked = p_s_tb->R[i];
2272 }
2273
2274 if (!locked && p_s_tb->FR[i]) {
2275 tb_buffer_sanity_check(p_s_tb->tb_sb,
2276 p_s_tb->FR[i],
2277 "FR", i);
2278 if (!clear_all_dirty_bits
2279 (p_s_tb->tb_sb, p_s_tb->FR[i]))
2280 locked = p_s_tb->FR[i];
2281 }
2282
2283 if (!locked && p_s_tb->CFR[i]) {
2284 tb_buffer_sanity_check(p_s_tb->tb_sb,
2285 p_s_tb->CFR[i],
2286 "CFR", i);
2287 if (!clear_all_dirty_bits
2288 (p_s_tb->tb_sb, p_s_tb->CFR[i]))
2289 locked = p_s_tb->CFR[i];
2290 }
2291 }
2292 }
2293 /* as far as I can tell, this is not required. The FEB list seems
2294 ** to be full of newly allocated nodes, which will never be locked,
2295 ** dirty, or anything else.
2296 ** To be safe, I'm putting in the checks and waits in. For the moment,
2297 ** they are needed to keep the code in journal.c from complaining
2298 ** about the buffer. That code is inside CONFIG_REISERFS_CHECK as well.
2299 ** --clm
2300 */
2301 for (i = 0; !locked && i < MAX_FEB_SIZE; i++) {
2302 if (p_s_tb->FEB[i]) {
2303 if (!clear_all_dirty_bits
2304 (p_s_tb->tb_sb, p_s_tb->FEB[i]))
2305 locked = p_s_tb->FEB[i];
2306 }
2198 } 2307 }
2199 }
2200 }
2201 /* as far as I can tell, this is not required. The FEB list seems
2202 ** to be full of newly allocated nodes, which will never be locked,
2203 ** dirty, or anything else.
2204 ** To be safe, I'm putting in the checks and waits in. For the moment,
2205 ** they are needed to keep the code in journal.c from complaining
2206 ** about the buffer. That code is inside CONFIG_REISERFS_CHECK as well.
2207 ** --clm
2208 */
2209 for ( i = 0; !locked && i < MAX_FEB_SIZE; i++ ) {
2210 if ( p_s_tb->FEB[i] ) {
2211 if (!clear_all_dirty_bits(p_s_tb->tb_sb, p_s_tb->FEB[i]))
2212 locked = p_s_tb->FEB[i] ;
2213 }
2214 }
2215 2308
2216 if (locked) { 2309 if (locked) {
2217#ifdef CONFIG_REISERFS_CHECK 2310#ifdef CONFIG_REISERFS_CHECK
2218 repeat_counter++; 2311 repeat_counter++;
2219 if ( (repeat_counter % 10000) == 0) { 2312 if ((repeat_counter % 10000) == 0) {
2220 reiserfs_warning (p_s_tb->tb_sb, 2313 reiserfs_warning(p_s_tb->tb_sb,
2221 "wait_tb_buffers_until_released(): too many " 2314 "wait_tb_buffers_until_released(): too many "
2222 "iterations waiting for buffer to unlock " 2315 "iterations waiting for buffer to unlock "
2223 "(%b)", locked); 2316 "(%b)", locked);
2224 2317
2225 /* Don't loop forever. Try to recover from possible error. */ 2318 /* Don't loop forever. Try to recover from possible error. */
2226 2319
2227 return ( FILESYSTEM_CHANGED_TB (p_s_tb) ) ? REPEAT_SEARCH : CARRY_ON; 2320 return (FILESYSTEM_CHANGED_TB(p_s_tb)) ?
2228 } 2321 REPEAT_SEARCH : CARRY_ON;
2322 }
2229#endif 2323#endif
2230 __wait_on_buffer (locked); 2324 __wait_on_buffer(locked);
2231 if ( FILESYSTEM_CHANGED_TB (p_s_tb) ) { 2325 if (FILESYSTEM_CHANGED_TB(p_s_tb)) {
2232 return REPEAT_SEARCH; 2326 return REPEAT_SEARCH;
2233 } 2327 }
2234 } 2328 }
2235 2329
2236 } while (locked); 2330 } while (locked);
2237 2331
2238 return CARRY_ON; 2332 return CARRY_ON;
2239} 2333}
2240 2334
2241
2242/* Prepare for balancing, that is 2335/* Prepare for balancing, that is
2243 * get all necessary parents, and neighbors; 2336 * get all necessary parents, and neighbors;
2244 * analyze what and where should be moved; 2337 * analyze what and where should be moved;
@@ -2267,252 +2360,266 @@ static int wait_tb_buffers_until_unlocked (struct tree_balance * p_s_tb)
2267 * -1 - if no_disk_space 2360 * -1 - if no_disk_space
2268 */ 2361 */
2269 2362
2363int fix_nodes(int n_op_mode, struct tree_balance *p_s_tb, struct item_head *p_s_ins_ih, // item head of item being inserted
2364 const void *data // inserted item or data to be pasted
2365 )
2366{
2367 int n_ret_value, n_h, n_item_num = PATH_LAST_POSITION(p_s_tb->tb_path);
2368 int n_pos_in_item;
2270 2369
2271int fix_nodes (int n_op_mode, 2370 /* we set wait_tb_buffers_run when we have to restore any dirty bits cleared
2272 struct tree_balance * p_s_tb, 2371 ** during wait_tb_buffers_run
2273 struct item_head * p_s_ins_ih, // item head of item being inserted 2372 */
2274 const void * data // inserted item or data to be pasted 2373 int wait_tb_buffers_run = 0;
2275 ) { 2374 struct buffer_head *p_s_tbS0 = PATH_PLAST_BUFFER(p_s_tb->tb_path);
2276 int n_ret_value,
2277 n_h,
2278 n_item_num = PATH_LAST_POSITION(p_s_tb->tb_path);
2279 int n_pos_in_item;
2280
2281 /* we set wait_tb_buffers_run when we have to restore any dirty bits cleared
2282 ** during wait_tb_buffers_run
2283 */
2284 int wait_tb_buffers_run = 0 ;
2285 struct buffer_head * p_s_tbS0 = PATH_PLAST_BUFFER(p_s_tb->tb_path);
2286
2287 ++ REISERFS_SB(p_s_tb -> tb_sb) -> s_fix_nodes;
2288
2289 n_pos_in_item = p_s_tb->tb_path->pos_in_item;
2290
2291
2292 p_s_tb->fs_gen = get_generation (p_s_tb->tb_sb);
2293
2294 /* we prepare and log the super here so it will already be in the
2295 ** transaction when do_balance needs to change it.
2296 ** This way do_balance won't have to schedule when trying to prepare
2297 ** the super for logging
2298 */
2299 reiserfs_prepare_for_journal(p_s_tb->tb_sb,
2300 SB_BUFFER_WITH_SB(p_s_tb->tb_sb), 1) ;
2301 journal_mark_dirty(p_s_tb->transaction_handle, p_s_tb->tb_sb,
2302 SB_BUFFER_WITH_SB(p_s_tb->tb_sb)) ;
2303 if ( FILESYSTEM_CHANGED_TB (p_s_tb) )
2304 return REPEAT_SEARCH;
2305
2306 /* if it possible in indirect_to_direct conversion */
2307 if (buffer_locked (p_s_tbS0)) {
2308 __wait_on_buffer (p_s_tbS0);
2309 if ( FILESYSTEM_CHANGED_TB (p_s_tb) )
2310 return REPEAT_SEARCH;
2311 }
2312 2375
2313#ifdef CONFIG_REISERFS_CHECK 2376 ++REISERFS_SB(p_s_tb->tb_sb)->s_fix_nodes;
2314 if ( cur_tb ) { 2377
2315 print_cur_tb ("fix_nodes"); 2378 n_pos_in_item = p_s_tb->tb_path->pos_in_item;
2316 reiserfs_panic(p_s_tb->tb_sb,"PAP-8305: fix_nodes: there is pending do_balance"); 2379
2317 } 2380 p_s_tb->fs_gen = get_generation(p_s_tb->tb_sb);
2318
2319 if (!buffer_uptodate (p_s_tbS0) || !B_IS_IN_TREE (p_s_tbS0)) {
2320 reiserfs_panic (p_s_tb->tb_sb, "PAP-8320: fix_nodes: S[0] (%b %z) is not uptodate "
2321 "at the beginning of fix_nodes or not in tree (mode %c)", p_s_tbS0, p_s_tbS0, n_op_mode);
2322 }
2323
2324 /* Check parameters. */
2325 switch (n_op_mode) {
2326 case M_INSERT:
2327 if ( n_item_num <= 0 || n_item_num > B_NR_ITEMS(p_s_tbS0) )
2328 reiserfs_panic(p_s_tb->tb_sb,"PAP-8330: fix_nodes: Incorrect item number %d (in S0 - %d) in case of insert",
2329 n_item_num, B_NR_ITEMS(p_s_tbS0));
2330 break;
2331 case M_PASTE:
2332 case M_DELETE:
2333 case M_CUT:
2334 if ( n_item_num < 0 || n_item_num >= B_NR_ITEMS(p_s_tbS0) ) {
2335 print_block (p_s_tbS0, 0, -1, -1);
2336 reiserfs_panic(p_s_tb->tb_sb,"PAP-8335: fix_nodes: Incorrect item number(%d); mode = %c insert_size = %d\n", n_item_num, n_op_mode, p_s_tb->insert_size[0]);
2337 }
2338 break;
2339 default:
2340 reiserfs_panic(p_s_tb->tb_sb,"PAP-8340: fix_nodes: Incorrect mode of operation");
2341 }
2342#endif
2343 2381
2344 if (get_mem_for_virtual_node (p_s_tb) == REPEAT_SEARCH) 2382 /* we prepare and log the super here so it will already be in the
2345 // FIXME: maybe -ENOMEM when tb->vn_buf == 0? Now just repeat 2383 ** transaction when do_balance needs to change it.
2346 return REPEAT_SEARCH; 2384 ** This way do_balance won't have to schedule when trying to prepare
2385 ** the super for logging
2386 */
2387 reiserfs_prepare_for_journal(p_s_tb->tb_sb,
2388 SB_BUFFER_WITH_SB(p_s_tb->tb_sb), 1);
2389 journal_mark_dirty(p_s_tb->transaction_handle, p_s_tb->tb_sb,
2390 SB_BUFFER_WITH_SB(p_s_tb->tb_sb));
2391 if (FILESYSTEM_CHANGED_TB(p_s_tb))
2392 return REPEAT_SEARCH;
2347 2393
2394 /* if it possible in indirect_to_direct conversion */
2395 if (buffer_locked(p_s_tbS0)) {
2396 __wait_on_buffer(p_s_tbS0);
2397 if (FILESYSTEM_CHANGED_TB(p_s_tb))
2398 return REPEAT_SEARCH;
2399 }
2400#ifdef CONFIG_REISERFS_CHECK
2401 if (cur_tb) {
2402 print_cur_tb("fix_nodes");
2403 reiserfs_panic(p_s_tb->tb_sb,
2404 "PAP-8305: fix_nodes: there is pending do_balance");
2405 }
2348 2406
2349 /* Starting from the leaf level; for all levels n_h of the tree. */ 2407 if (!buffer_uptodate(p_s_tbS0) || !B_IS_IN_TREE(p_s_tbS0)) {
2350 for ( n_h = 0; n_h < MAX_HEIGHT && p_s_tb->insert_size[n_h]; n_h++ ) { 2408 reiserfs_panic(p_s_tb->tb_sb,
2351 if ( (n_ret_value = get_direct_parent(p_s_tb, n_h)) != CARRY_ON ) { 2409 "PAP-8320: fix_nodes: S[0] (%b %z) is not uptodate "
2352 goto repeat; 2410 "at the beginning of fix_nodes or not in tree (mode %c)",
2411 p_s_tbS0, p_s_tbS0, n_op_mode);
2353 } 2412 }
2354 2413
2355 if ( (n_ret_value = check_balance (n_op_mode, p_s_tb, n_h, n_item_num, 2414 /* Check parameters. */
2356 n_pos_in_item, p_s_ins_ih, data)) != CARRY_ON ) { 2415 switch (n_op_mode) {
2357 if ( n_ret_value == NO_BALANCING_NEEDED ) { 2416 case M_INSERT:
2358 /* No balancing for higher levels needed. */ 2417 if (n_item_num <= 0 || n_item_num > B_NR_ITEMS(p_s_tbS0))
2359 if ( (n_ret_value = get_neighbors(p_s_tb, n_h)) != CARRY_ON ) { 2418 reiserfs_panic(p_s_tb->tb_sb,
2360 goto repeat; 2419 "PAP-8330: fix_nodes: Incorrect item number %d (in S0 - %d) in case of insert",
2420 n_item_num, B_NR_ITEMS(p_s_tbS0));
2421 break;
2422 case M_PASTE:
2423 case M_DELETE:
2424 case M_CUT:
2425 if (n_item_num < 0 || n_item_num >= B_NR_ITEMS(p_s_tbS0)) {
2426 print_block(p_s_tbS0, 0, -1, -1);
2427 reiserfs_panic(p_s_tb->tb_sb,
2428 "PAP-8335: fix_nodes: Incorrect item number(%d); mode = %c insert_size = %d\n",
2429 n_item_num, n_op_mode,
2430 p_s_tb->insert_size[0]);
2361 } 2431 }
2362 if ( n_h != MAX_HEIGHT - 1 )
2363 p_s_tb->insert_size[n_h + 1] = 0;
2364 /* ok, analysis and resource gathering are complete */
2365 break; 2432 break;
2366 } 2433 default:
2367 goto repeat; 2434 reiserfs_panic(p_s_tb->tb_sb,
2435 "PAP-8340: fix_nodes: Incorrect mode of operation");
2368 } 2436 }
2437#endif
2369 2438
2370 if ( (n_ret_value = get_neighbors(p_s_tb, n_h)) != CARRY_ON ) { 2439 if (get_mem_for_virtual_node(p_s_tb) == REPEAT_SEARCH)
2371 goto repeat; 2440 // FIXME: maybe -ENOMEM when tb->vn_buf == 0? Now just repeat
2372 } 2441 return REPEAT_SEARCH;
2373 2442
2374 if ( (n_ret_value = get_empty_nodes(p_s_tb, n_h)) != CARRY_ON ) { 2443 /* Starting from the leaf level; for all levels n_h of the tree. */
2375 goto repeat; /* No disk space, or schedule occurred and 2444 for (n_h = 0; n_h < MAX_HEIGHT && p_s_tb->insert_size[n_h]; n_h++) {
2376 analysis may be invalid and needs to be redone. */ 2445 if ((n_ret_value = get_direct_parent(p_s_tb, n_h)) != CARRY_ON) {
2377 } 2446 goto repeat;
2378 2447 }
2379 if ( ! PATH_H_PBUFFER(p_s_tb->tb_path, n_h) ) {
2380 /* We have a positive insert size but no nodes exist on this
2381 level, this means that we are creating a new root. */
2382 2448
2383 RFALSE( p_s_tb->blknum[n_h] != 1, 2449 if ((n_ret_value =
2384 "PAP-8350: creating new empty root"); 2450 check_balance(n_op_mode, p_s_tb, n_h, n_item_num,
2451 n_pos_in_item, p_s_ins_ih,
2452 data)) != CARRY_ON) {
2453 if (n_ret_value == NO_BALANCING_NEEDED) {
2454 /* No balancing for higher levels needed. */
2455 if ((n_ret_value =
2456 get_neighbors(p_s_tb, n_h)) != CARRY_ON) {
2457 goto repeat;
2458 }
2459 if (n_h != MAX_HEIGHT - 1)
2460 p_s_tb->insert_size[n_h + 1] = 0;
2461 /* ok, analysis and resource gathering are complete */
2462 break;
2463 }
2464 goto repeat;
2465 }
2385 2466
2386 if ( n_h < MAX_HEIGHT - 1 ) 2467 if ((n_ret_value = get_neighbors(p_s_tb, n_h)) != CARRY_ON) {
2387 p_s_tb->insert_size[n_h + 1] = 0; 2468 goto repeat;
2388 }
2389 else
2390 if ( ! PATH_H_PBUFFER(p_s_tb->tb_path, n_h + 1) ) {
2391 if ( p_s_tb->blknum[n_h] > 1 ) {
2392 /* The tree needs to be grown, so this node S[n_h]
2393 which is the root node is split into two nodes,
2394 and a new node (S[n_h+1]) will be created to
2395 become the root node. */
2396
2397 RFALSE( n_h == MAX_HEIGHT - 1,
2398 "PAP-8355: attempt to create too high of a tree");
2399
2400 p_s_tb->insert_size[n_h + 1] = (DC_SIZE + KEY_SIZE) * (p_s_tb->blknum[n_h] - 1) + DC_SIZE;
2401 } 2469 }
2402 else 2470
2403 if ( n_h < MAX_HEIGHT - 1 ) 2471 if ((n_ret_value = get_empty_nodes(p_s_tb, n_h)) != CARRY_ON) {
2404 p_s_tb->insert_size[n_h + 1] = 0; 2472 goto repeat; /* No disk space, or schedule occurred and
2405 } 2473 analysis may be invalid and needs to be redone. */
2406 else 2474 }
2407 p_s_tb->insert_size[n_h + 1] = (DC_SIZE + KEY_SIZE) * (p_s_tb->blknum[n_h] - 1); 2475
2408 } 2476 if (!PATH_H_PBUFFER(p_s_tb->tb_path, n_h)) {
2409 2477 /* We have a positive insert size but no nodes exist on this
2410 if ((n_ret_value = wait_tb_buffers_until_unlocked (p_s_tb)) == CARRY_ON) { 2478 level, this means that we are creating a new root. */
2411 if (FILESYSTEM_CHANGED_TB(p_s_tb)) { 2479
2412 wait_tb_buffers_run = 1 ; 2480 RFALSE(p_s_tb->blknum[n_h] != 1,
2413 n_ret_value = REPEAT_SEARCH ; 2481 "PAP-8350: creating new empty root");
2414 goto repeat; 2482
2415 } else { 2483 if (n_h < MAX_HEIGHT - 1)
2416 return CARRY_ON; 2484 p_s_tb->insert_size[n_h + 1] = 0;
2485 } else if (!PATH_H_PBUFFER(p_s_tb->tb_path, n_h + 1)) {
2486 if (p_s_tb->blknum[n_h] > 1) {
2487 /* The tree needs to be grown, so this node S[n_h]
2488 which is the root node is split into two nodes,
2489 and a new node (S[n_h+1]) will be created to
2490 become the root node. */
2491
2492 RFALSE(n_h == MAX_HEIGHT - 1,
2493 "PAP-8355: attempt to create too high of a tree");
2494
2495 p_s_tb->insert_size[n_h + 1] =
2496 (DC_SIZE +
2497 KEY_SIZE) * (p_s_tb->blknum[n_h] - 1) +
2498 DC_SIZE;
2499 } else if (n_h < MAX_HEIGHT - 1)
2500 p_s_tb->insert_size[n_h + 1] = 0;
2501 } else
2502 p_s_tb->insert_size[n_h + 1] =
2503 (DC_SIZE + KEY_SIZE) * (p_s_tb->blknum[n_h] - 1);
2417 } 2504 }
2418 } else {
2419 wait_tb_buffers_run = 1 ;
2420 goto repeat;
2421 }
2422
2423 repeat:
2424 // fix_nodes was unable to perform its calculation due to
2425 // filesystem got changed under us, lack of free disk space or i/o
2426 // failure. If the first is the case - the search will be
2427 // repeated. For now - free all resources acquired so far except
2428 // for the new allocated nodes
2429 {
2430 int i;
2431 2505
2432 /* Release path buffers. */ 2506 if ((n_ret_value = wait_tb_buffers_until_unlocked(p_s_tb)) == CARRY_ON) {
2433 if (wait_tb_buffers_run) { 2507 if (FILESYSTEM_CHANGED_TB(p_s_tb)) {
2434 pathrelse_and_restore(p_s_tb->tb_sb, p_s_tb->tb_path) ; 2508 wait_tb_buffers_run = 1;
2509 n_ret_value = REPEAT_SEARCH;
2510 goto repeat;
2511 } else {
2512 return CARRY_ON;
2513 }
2435 } else { 2514 } else {
2436 pathrelse (p_s_tb->tb_path); 2515 wait_tb_buffers_run = 1;
2437 } 2516 goto repeat;
2438 /* brelse all resources collected for balancing */
2439 for ( i = 0; i < MAX_HEIGHT; i++ ) {
2440 if (wait_tb_buffers_run) {
2441 reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, p_s_tb->L[i]);
2442 reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, p_s_tb->R[i]);
2443 reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, p_s_tb->FL[i]);
2444 reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, p_s_tb->FR[i]);
2445 reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, p_s_tb->CFL[i]);
2446 reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, p_s_tb->CFR[i]);
2447 }
2448
2449 brelse (p_s_tb->L[i]);p_s_tb->L[i] = NULL;
2450 brelse (p_s_tb->R[i]);p_s_tb->R[i] = NULL;
2451 brelse (p_s_tb->FL[i]);p_s_tb->FL[i] = NULL;
2452 brelse (p_s_tb->FR[i]);p_s_tb->FR[i] = NULL;
2453 brelse (p_s_tb->CFL[i]);p_s_tb->CFL[i] = NULL;
2454 brelse (p_s_tb->CFR[i]);p_s_tb->CFR[i] = NULL;
2455 } 2517 }
2456 2518
2457 if (wait_tb_buffers_run) { 2519 repeat:
2458 for ( i = 0; i < MAX_FEB_SIZE; i++ ) { 2520 // fix_nodes was unable to perform its calculation due to
2459 if ( p_s_tb->FEB[i] ) { 2521 // filesystem got changed under us, lack of free disk space or i/o
2460 reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, 2522 // failure. If the first is the case - the search will be
2461 p_s_tb->FEB[i]) ; 2523 // repeated. For now - free all resources acquired so far except
2524 // for the new allocated nodes
2525 {
2526 int i;
2527
2528 /* Release path buffers. */
2529 if (wait_tb_buffers_run) {
2530 pathrelse_and_restore(p_s_tb->tb_sb, p_s_tb->tb_path);
2531 } else {
2532 pathrelse(p_s_tb->tb_path);
2533 }
2534 /* brelse all resources collected for balancing */
2535 for (i = 0; i < MAX_HEIGHT; i++) {
2536 if (wait_tb_buffers_run) {
2537 reiserfs_restore_prepared_buffer(p_s_tb->tb_sb,
2538 p_s_tb->L[i]);
2539 reiserfs_restore_prepared_buffer(p_s_tb->tb_sb,
2540 p_s_tb->R[i]);
2541 reiserfs_restore_prepared_buffer(p_s_tb->tb_sb,
2542 p_s_tb->FL[i]);
2543 reiserfs_restore_prepared_buffer(p_s_tb->tb_sb,
2544 p_s_tb->FR[i]);
2545 reiserfs_restore_prepared_buffer(p_s_tb->tb_sb,
2546 p_s_tb->
2547 CFL[i]);
2548 reiserfs_restore_prepared_buffer(p_s_tb->tb_sb,
2549 p_s_tb->
2550 CFR[i]);
2551 }
2552
2553 brelse(p_s_tb->L[i]);
2554 p_s_tb->L[i] = NULL;
2555 brelse(p_s_tb->R[i]);
2556 p_s_tb->R[i] = NULL;
2557 brelse(p_s_tb->FL[i]);
2558 p_s_tb->FL[i] = NULL;
2559 brelse(p_s_tb->FR[i]);
2560 p_s_tb->FR[i] = NULL;
2561 brelse(p_s_tb->CFL[i]);
2562 p_s_tb->CFL[i] = NULL;
2563 brelse(p_s_tb->CFR[i]);
2564 p_s_tb->CFR[i] = NULL;
2565 }
2566
2567 if (wait_tb_buffers_run) {
2568 for (i = 0; i < MAX_FEB_SIZE; i++) {
2569 if (p_s_tb->FEB[i]) {
2570 reiserfs_restore_prepared_buffer
2571 (p_s_tb->tb_sb, p_s_tb->FEB[i]);
2572 }
2573 }
2462 } 2574 }
2463 } 2575 return n_ret_value;
2464 } 2576 }
2465 return n_ret_value;
2466 }
2467 2577
2468} 2578}
2469 2579
2470
2471/* Anatoly will probably forgive me renaming p_s_tb to tb. I just 2580/* Anatoly will probably forgive me renaming p_s_tb to tb. I just
2472 wanted to make lines shorter */ 2581 wanted to make lines shorter */
2473void unfix_nodes (struct tree_balance * tb) 2582void unfix_nodes(struct tree_balance *tb)
2474{ 2583{
2475 int i; 2584 int i;
2476
2477 /* Release path buffers. */
2478 pathrelse_and_restore (tb->tb_sb, tb->tb_path);
2479
2480 /* brelse all resources collected for balancing */
2481 for ( i = 0; i < MAX_HEIGHT; i++ ) {
2482 reiserfs_restore_prepared_buffer (tb->tb_sb, tb->L[i]);
2483 reiserfs_restore_prepared_buffer (tb->tb_sb, tb->R[i]);
2484 reiserfs_restore_prepared_buffer (tb->tb_sb, tb->FL[i]);
2485 reiserfs_restore_prepared_buffer (tb->tb_sb, tb->FR[i]);
2486 reiserfs_restore_prepared_buffer (tb->tb_sb, tb->CFL[i]);
2487 reiserfs_restore_prepared_buffer (tb->tb_sb, tb->CFR[i]);
2488
2489 brelse (tb->L[i]);
2490 brelse (tb->R[i]);
2491 brelse (tb->FL[i]);
2492 brelse (tb->FR[i]);
2493 brelse (tb->CFL[i]);
2494 brelse (tb->CFR[i]);
2495 }
2496
2497 /* deal with list of allocated (used and unused) nodes */
2498 for ( i = 0; i < MAX_FEB_SIZE; i++ ) {
2499 if ( tb->FEB[i] ) {
2500 b_blocknr_t blocknr = tb->FEB[i]->b_blocknr ;
2501 /* de-allocated block which was not used by balancing and
2502 bforget about buffer for it */
2503 brelse (tb->FEB[i]);
2504 reiserfs_free_block (tb->transaction_handle, NULL, blocknr, 0);
2505 }
2506 if (tb->used[i]) {
2507 /* release used as new nodes including a new root */
2508 brelse (tb->used[i]);
2509 }
2510 }
2511 2585
2512 if (tb->vn_buf) 2586 /* Release path buffers. */
2513 reiserfs_kfree (tb->vn_buf, tb->vn_buf_size, tb->tb_sb); 2587 pathrelse_and_restore(tb->tb_sb, tb->tb_path);
2514 2588
2515} 2589 /* brelse all resources collected for balancing */
2590 for (i = 0; i < MAX_HEIGHT; i++) {
2591 reiserfs_restore_prepared_buffer(tb->tb_sb, tb->L[i]);
2592 reiserfs_restore_prepared_buffer(tb->tb_sb, tb->R[i]);
2593 reiserfs_restore_prepared_buffer(tb->tb_sb, tb->FL[i]);
2594 reiserfs_restore_prepared_buffer(tb->tb_sb, tb->FR[i]);
2595 reiserfs_restore_prepared_buffer(tb->tb_sb, tb->CFL[i]);
2596 reiserfs_restore_prepared_buffer(tb->tb_sb, tb->CFR[i]);
2597
2598 brelse(tb->L[i]);
2599 brelse(tb->R[i]);
2600 brelse(tb->FL[i]);
2601 brelse(tb->FR[i]);
2602 brelse(tb->CFL[i]);
2603 brelse(tb->CFR[i]);
2604 }
2516 2605
2606 /* deal with list of allocated (used and unused) nodes */
2607 for (i = 0; i < MAX_FEB_SIZE; i++) {
2608 if (tb->FEB[i]) {
2609 b_blocknr_t blocknr = tb->FEB[i]->b_blocknr;
2610 /* de-allocated block which was not used by balancing and
2611 bforget about buffer for it */
2612 brelse(tb->FEB[i]);
2613 reiserfs_free_block(tb->transaction_handle, NULL,
2614 blocknr, 0);
2615 }
2616 if (tb->used[i]) {
2617 /* release used as new nodes including a new root */
2618 brelse(tb->used[i]);
2619 }
2620 }
2517 2621
2622 if (tb->vn_buf)
2623 reiserfs_kfree(tb->vn_buf, tb->vn_buf_size, tb->tb_sb);
2518 2624
2625}
diff --git a/fs/reiserfs/hashes.c b/fs/reiserfs/hashes.c
index 08d0508c2d39..37c1306eb9b7 100644
--- a/fs/reiserfs/hashes.c
+++ b/fs/reiserfs/hashes.c
@@ -22,7 +22,6 @@
22#include <asm/types.h> 22#include <asm/types.h>
23#include <asm/bug.h> 23#include <asm/bug.h>
24 24
25
26#define DELTA 0x9E3779B9 25#define DELTA 0x9E3779B9
27#define FULLROUNDS 10 /* 32 is overkill, 16 is strong crypto */ 26#define FULLROUNDS 10 /* 32 is overkill, 16 is strong crypto */
28#define PARTROUNDS 6 /* 6 gets complete mixing */ 27#define PARTROUNDS 6 /* 6 gets complete mixing */
@@ -48,105 +47,75 @@
48 h1 += b1; \ 47 h1 += b1; \
49 } while(0) 48 } while(0)
50 49
51
52u32 keyed_hash(const signed char *msg, int len) 50u32 keyed_hash(const signed char *msg, int len)
53{ 51{
54 u32 k[] = { 0x9464a485, 0x542e1a94, 0x3e846bff, 0xb75bcfc3}; 52 u32 k[] = { 0x9464a485, 0x542e1a94, 0x3e846bff, 0xb75bcfc3 };
55 53
56 u32 h0 = k[0], h1 = k[1]; 54 u32 h0 = k[0], h1 = k[1];
57 u32 a, b, c, d; 55 u32 a, b, c, d;
58 u32 pad; 56 u32 pad;
59 int i; 57 int i;
60
61 // assert(len >= 0 && len < 256);
62 58
63 pad = (u32)len | ((u32)len << 8); 59 // assert(len >= 0 && len < 256);
60
61 pad = (u32) len | ((u32) len << 8);
64 pad |= pad << 16; 62 pad |= pad << 16;
65 63
66 while(len >= 16) 64 while (len >= 16) {
67 { 65 a = (u32) msg[0] |
68 a = (u32)msg[ 0] | 66 (u32) msg[1] << 8 | (u32) msg[2] << 16 | (u32) msg[3] << 24;
69 (u32)msg[ 1] << 8 | 67 b = (u32) msg[4] |
70 (u32)msg[ 2] << 16| 68 (u32) msg[5] << 8 | (u32) msg[6] << 16 | (u32) msg[7] << 24;
71 (u32)msg[ 3] << 24; 69 c = (u32) msg[8] |
72 b = (u32)msg[ 4] | 70 (u32) msg[9] << 8 |
73 (u32)msg[ 5] << 8 | 71 (u32) msg[10] << 16 | (u32) msg[11] << 24;
74 (u32)msg[ 6] << 16| 72 d = (u32) msg[12] |
75 (u32)msg[ 7] << 24; 73 (u32) msg[13] << 8 |
76 c = (u32)msg[ 8] | 74 (u32) msg[14] << 16 | (u32) msg[15] << 24;
77 (u32)msg[ 9] << 8 | 75
78 (u32)msg[10] << 16|
79 (u32)msg[11] << 24;
80 d = (u32)msg[12] |
81 (u32)msg[13] << 8 |
82 (u32)msg[14] << 16|
83 (u32)msg[15] << 24;
84
85 TEACORE(PARTROUNDS); 76 TEACORE(PARTROUNDS);
86 77
87 len -= 16; 78 len -= 16;
88 msg += 16; 79 msg += 16;
89 } 80 }
90 81
91 if (len >= 12) 82 if (len >= 12) {
92 { 83 a = (u32) msg[0] |
93 a = (u32)msg[ 0] | 84 (u32) msg[1] << 8 | (u32) msg[2] << 16 | (u32) msg[3] << 24;
94 (u32)msg[ 1] << 8 | 85 b = (u32) msg[4] |
95 (u32)msg[ 2] << 16| 86 (u32) msg[5] << 8 | (u32) msg[6] << 16 | (u32) msg[7] << 24;
96 (u32)msg[ 3] << 24; 87 c = (u32) msg[8] |
97 b = (u32)msg[ 4] | 88 (u32) msg[9] << 8 |
98 (u32)msg[ 5] << 8 | 89 (u32) msg[10] << 16 | (u32) msg[11] << 24;
99 (u32)msg[ 6] << 16|
100 (u32)msg[ 7] << 24;
101 c = (u32)msg[ 8] |
102 (u32)msg[ 9] << 8 |
103 (u32)msg[10] << 16|
104 (u32)msg[11] << 24;
105 90
106 d = pad; 91 d = pad;
107 for(i = 12; i < len; i++) 92 for (i = 12; i < len; i++) {
108 {
109 d <<= 8; 93 d <<= 8;
110 d |= msg[i]; 94 d |= msg[i];
111 } 95 }
112 } 96 } else if (len >= 8) {
113 else if (len >= 8) 97 a = (u32) msg[0] |
114 { 98 (u32) msg[1] << 8 | (u32) msg[2] << 16 | (u32) msg[3] << 24;
115 a = (u32)msg[ 0] | 99 b = (u32) msg[4] |
116 (u32)msg[ 1] << 8 | 100 (u32) msg[5] << 8 | (u32) msg[6] << 16 | (u32) msg[7] << 24;
117 (u32)msg[ 2] << 16|
118 (u32)msg[ 3] << 24;
119 b = (u32)msg[ 4] |
120 (u32)msg[ 5] << 8 |
121 (u32)msg[ 6] << 16|
122 (u32)msg[ 7] << 24;
123 101
124 c = d = pad; 102 c = d = pad;
125 for(i = 8; i < len; i++) 103 for (i = 8; i < len; i++) {
126 {
127 c <<= 8; 104 c <<= 8;
128 c |= msg[i]; 105 c |= msg[i];
129 } 106 }
130 } 107 } else if (len >= 4) {
131 else if (len >= 4) 108 a = (u32) msg[0] |
132 { 109 (u32) msg[1] << 8 | (u32) msg[2] << 16 | (u32) msg[3] << 24;
133 a = (u32)msg[ 0] |
134 (u32)msg[ 1] << 8 |
135 (u32)msg[ 2] << 16|
136 (u32)msg[ 3] << 24;
137 110
138 b = c = d = pad; 111 b = c = d = pad;
139 for(i = 4; i < len; i++) 112 for (i = 4; i < len; i++) {
140 {
141 b <<= 8; 113 b <<= 8;
142 b |= msg[i]; 114 b |= msg[i];
143 } 115 }
144 } 116 } else {
145 else
146 {
147 a = b = c = d = pad; 117 a = b = c = d = pad;
148 for(i = 0; i < len; i++) 118 for (i = 0; i < len; i++) {
149 {
150 a <<= 8; 119 a <<= 8;
151 a |= msg[i]; 120 a |= msg[i];
152 } 121 }
@@ -155,55 +124,59 @@ u32 keyed_hash(const signed char *msg, int len)
155 TEACORE(FULLROUNDS); 124 TEACORE(FULLROUNDS);
156 125
157/* return 0;*/ 126/* return 0;*/
158 return h0^h1; 127 return h0 ^ h1;
159} 128}
160 129
161/* What follows in this file is copyright 2000 by Hans Reiser, and the 130/* What follows in this file is copyright 2000 by Hans Reiser, and the
162 * licensing of what follows is governed by reiserfs/README */ 131 * licensing of what follows is governed by reiserfs/README */
163 132
164u32 yura_hash (const signed char *msg, int len) 133u32 yura_hash(const signed char *msg, int len)
165{ 134{
166 int j, pow; 135 int j, pow;
167 u32 a, c; 136 u32 a, c;
168 int i; 137 int i;
169 138
170 for (pow=1,i=1; i < len; i++) pow = pow * 10; 139 for (pow = 1, i = 1; i < len; i++)
171 140 pow = pow * 10;
172 if (len == 1) 141
173 a = msg[0]-48; 142 if (len == 1)
174 else 143 a = msg[0] - 48;
175 a = (msg[0] - 48) * pow; 144 else
176 145 a = (msg[0] - 48) * pow;
177 for (i=1; i < len; i++) { 146
178 c = msg[i] - 48; 147 for (i = 1; i < len; i++) {
179 for (pow=1,j=i; j < len-1; j++) pow = pow * 10; 148 c = msg[i] - 48;
180 a = a + c * pow; 149 for (pow = 1, j = i; j < len - 1; j++)
181 } 150 pow = pow * 10;
182 151 a = a + c * pow;
183 for (; i < 40; i++) { 152 }
184 c = '0' - 48; 153
185 for (pow=1,j=i; j < len-1; j++) pow = pow * 10; 154 for (; i < 40; i++) {
186 a = a + c * pow; 155 c = '0' - 48;
187 } 156 for (pow = 1, j = i; j < len - 1; j++)
188 157 pow = pow * 10;
189 for (; i < 256; i++) { 158 a = a + c * pow;
190 c = i; 159 }
191 for (pow=1,j=i; j < len-1; j++) pow = pow * 10; 160
192 a = a + c * pow; 161 for (; i < 256; i++) {
193 } 162 c = i;
194 163 for (pow = 1, j = i; j < len - 1; j++)
195 a = a << 7; 164 pow = pow * 10;
196 return a; 165 a = a + c * pow;
166 }
167
168 a = a << 7;
169 return a;
197} 170}
198 171
199u32 r5_hash (const signed char *msg, int len) 172u32 r5_hash(const signed char *msg, int len)
200{ 173{
201 u32 a=0; 174 u32 a = 0;
202 while(*msg) { 175 while (*msg) {
203 a += *msg << 4; 176 a += *msg << 4;
204 a += *msg >> 4; 177 a += *msg >> 4;
205 a *= 11; 178 a *= 11;
206 msg++; 179 msg++;
207 } 180 }
208 return a; 181 return a;
209} 182}
diff --git a/fs/reiserfs/ibalance.c b/fs/reiserfs/ibalance.c
index a362125da0d8..6c5a726fd34b 100644
--- a/fs/reiserfs/ibalance.c
+++ b/fs/reiserfs/ibalance.c
@@ -10,13 +10,8 @@
10#include <linux/buffer_head.h> 10#include <linux/buffer_head.h>
11 11
12/* this is one and only function that is used outside (do_balance.c) */ 12/* this is one and only function that is used outside (do_balance.c) */
13int balance_internal ( 13int balance_internal(struct tree_balance *,
14 struct tree_balance * , 14 int, int, struct item_head *, struct buffer_head **);
15 int,
16 int,
17 struct item_head * ,
18 struct buffer_head **
19 );
20 15
21/* modes of internal_shift_left, internal_shift_right and internal_insert_childs */ 16/* modes of internal_shift_left, internal_shift_right and internal_insert_childs */
22#define INTERNAL_SHIFT_FROM_S_TO_L 0 17#define INTERNAL_SHIFT_FROM_S_TO_L 0
@@ -27,464 +22,474 @@ int balance_internal (
27#define INTERNAL_INSERT_TO_L 5 22#define INTERNAL_INSERT_TO_L 5
28#define INTERNAL_INSERT_TO_R 6 23#define INTERNAL_INSERT_TO_R 6
29 24
30static void internal_define_dest_src_infos ( 25static void internal_define_dest_src_infos(int shift_mode,
31 int shift_mode, 26 struct tree_balance *tb,
32 struct tree_balance * tb, 27 int h,
33 int h, 28 struct buffer_info *dest_bi,
34 struct buffer_info * dest_bi, 29 struct buffer_info *src_bi,
35 struct buffer_info * src_bi, 30 int *d_key, struct buffer_head **cf)
36 int * d_key,
37 struct buffer_head ** cf
38 )
39{ 31{
40 memset (dest_bi, 0, sizeof (struct buffer_info)); 32 memset(dest_bi, 0, sizeof(struct buffer_info));
41 memset (src_bi, 0, sizeof (struct buffer_info)); 33 memset(src_bi, 0, sizeof(struct buffer_info));
42 /* define dest, src, dest parent, dest position */ 34 /* define dest, src, dest parent, dest position */
43 switch (shift_mode) { 35 switch (shift_mode) {
44 case INTERNAL_SHIFT_FROM_S_TO_L: /* used in internal_shift_left */ 36 case INTERNAL_SHIFT_FROM_S_TO_L: /* used in internal_shift_left */
45 src_bi->tb = tb; 37 src_bi->tb = tb;
46 src_bi->bi_bh = PATH_H_PBUFFER (tb->tb_path, h); 38 src_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h);
47 src_bi->bi_parent = PATH_H_PPARENT (tb->tb_path, h); 39 src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h);
48 src_bi->bi_position = PATH_H_POSITION (tb->tb_path, h + 1); 40 src_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1);
49 dest_bi->tb = tb; 41 dest_bi->tb = tb;
50 dest_bi->bi_bh = tb->L[h]; 42 dest_bi->bi_bh = tb->L[h];
51 dest_bi->bi_parent = tb->FL[h]; 43 dest_bi->bi_parent = tb->FL[h];
52 dest_bi->bi_position = get_left_neighbor_position (tb, h); 44 dest_bi->bi_position = get_left_neighbor_position(tb, h);
53 *d_key = tb->lkey[h]; 45 *d_key = tb->lkey[h];
54 *cf = tb->CFL[h]; 46 *cf = tb->CFL[h];
55 break; 47 break;
56 case INTERNAL_SHIFT_FROM_L_TO_S: 48 case INTERNAL_SHIFT_FROM_L_TO_S:
57 src_bi->tb = tb; 49 src_bi->tb = tb;
58 src_bi->bi_bh = tb->L[h]; 50 src_bi->bi_bh = tb->L[h];
59 src_bi->bi_parent = tb->FL[h]; 51 src_bi->bi_parent = tb->FL[h];
60 src_bi->bi_position = get_left_neighbor_position (tb, h); 52 src_bi->bi_position = get_left_neighbor_position(tb, h);
61 dest_bi->tb = tb; 53 dest_bi->tb = tb;
62 dest_bi->bi_bh = PATH_H_PBUFFER (tb->tb_path, h); 54 dest_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h);
63 dest_bi->bi_parent = PATH_H_PPARENT (tb->tb_path, h); 55 dest_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h);
64 dest_bi->bi_position = PATH_H_POSITION (tb->tb_path, h + 1); /* dest position is analog of dest->b_item_order */ 56 dest_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1); /* dest position is analog of dest->b_item_order */
65 *d_key = tb->lkey[h]; 57 *d_key = tb->lkey[h];
66 *cf = tb->CFL[h]; 58 *cf = tb->CFL[h];
67 break; 59 break;
68 60
69 case INTERNAL_SHIFT_FROM_R_TO_S: /* used in internal_shift_left */ 61 case INTERNAL_SHIFT_FROM_R_TO_S: /* used in internal_shift_left */
70 src_bi->tb = tb; 62 src_bi->tb = tb;
71 src_bi->bi_bh = tb->R[h]; 63 src_bi->bi_bh = tb->R[h];
72 src_bi->bi_parent = tb->FR[h]; 64 src_bi->bi_parent = tb->FR[h];
73 src_bi->bi_position = get_right_neighbor_position (tb, h); 65 src_bi->bi_position = get_right_neighbor_position(tb, h);
74 dest_bi->tb = tb; 66 dest_bi->tb = tb;
75 dest_bi->bi_bh = PATH_H_PBUFFER (tb->tb_path, h); 67 dest_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h);
76 dest_bi->bi_parent = PATH_H_PPARENT (tb->tb_path, h); 68 dest_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h);
77 dest_bi->bi_position = PATH_H_POSITION (tb->tb_path, h + 1); 69 dest_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1);
78 *d_key = tb->rkey[h]; 70 *d_key = tb->rkey[h];
79 *cf = tb->CFR[h]; 71 *cf = tb->CFR[h];
80 break; 72 break;
81 73
82 case INTERNAL_SHIFT_FROM_S_TO_R: 74 case INTERNAL_SHIFT_FROM_S_TO_R:
83 src_bi->tb = tb; 75 src_bi->tb = tb;
84 src_bi->bi_bh = PATH_H_PBUFFER (tb->tb_path, h); 76 src_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h);
85 src_bi->bi_parent = PATH_H_PPARENT (tb->tb_path, h); 77 src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h);
86 src_bi->bi_position = PATH_H_POSITION (tb->tb_path, h + 1); 78 src_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1);
87 dest_bi->tb = tb; 79 dest_bi->tb = tb;
88 dest_bi->bi_bh = tb->R[h]; 80 dest_bi->bi_bh = tb->R[h];
89 dest_bi->bi_parent = tb->FR[h]; 81 dest_bi->bi_parent = tb->FR[h];
90 dest_bi->bi_position = get_right_neighbor_position (tb, h); 82 dest_bi->bi_position = get_right_neighbor_position(tb, h);
91 *d_key = tb->rkey[h]; 83 *d_key = tb->rkey[h];
92 *cf = tb->CFR[h]; 84 *cf = tb->CFR[h];
93 break; 85 break;
94 86
95 case INTERNAL_INSERT_TO_L: 87 case INTERNAL_INSERT_TO_L:
96 dest_bi->tb = tb; 88 dest_bi->tb = tb;
97 dest_bi->bi_bh = tb->L[h]; 89 dest_bi->bi_bh = tb->L[h];
98 dest_bi->bi_parent = tb->FL[h]; 90 dest_bi->bi_parent = tb->FL[h];
99 dest_bi->bi_position = get_left_neighbor_position (tb, h); 91 dest_bi->bi_position = get_left_neighbor_position(tb, h);
100 break; 92 break;
101 93
102 case INTERNAL_INSERT_TO_S: 94 case INTERNAL_INSERT_TO_S:
103 dest_bi->tb = tb; 95 dest_bi->tb = tb;
104 dest_bi->bi_bh = PATH_H_PBUFFER (tb->tb_path, h); 96 dest_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h);
105 dest_bi->bi_parent = PATH_H_PPARENT (tb->tb_path, h); 97 dest_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h);
106 dest_bi->bi_position = PATH_H_POSITION (tb->tb_path, h + 1); 98 dest_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1);
107 break; 99 break;
108 100
109 case INTERNAL_INSERT_TO_R: 101 case INTERNAL_INSERT_TO_R:
110 dest_bi->tb = tb; 102 dest_bi->tb = tb;
111 dest_bi->bi_bh = tb->R[h]; 103 dest_bi->bi_bh = tb->R[h];
112 dest_bi->bi_parent = tb->FR[h]; 104 dest_bi->bi_parent = tb->FR[h];
113 dest_bi->bi_position = get_right_neighbor_position (tb, h); 105 dest_bi->bi_position = get_right_neighbor_position(tb, h);
114 break; 106 break;
115 107
116 default: 108 default:
117 reiserfs_panic (tb->tb_sb, "internal_define_dest_src_infos: shift type is unknown (%d)", shift_mode); 109 reiserfs_panic(tb->tb_sb,
118 } 110 "internal_define_dest_src_infos: shift type is unknown (%d)",
111 shift_mode);
112 }
119} 113}
120 114
121
122
123/* Insert count node pointers into buffer cur before position to + 1. 115/* Insert count node pointers into buffer cur before position to + 1.
124 * Insert count items into buffer cur before position to. 116 * Insert count items into buffer cur before position to.
125 * Items and node pointers are specified by inserted and bh respectively. 117 * Items and node pointers are specified by inserted and bh respectively.
126 */ 118 */
127static void internal_insert_childs (struct buffer_info * cur_bi, 119static void internal_insert_childs(struct buffer_info *cur_bi,
128 int to, int count, 120 int to, int count,
129 struct item_head * inserted, 121 struct item_head *inserted,
130 struct buffer_head ** bh 122 struct buffer_head **bh)
131 )
132{ 123{
133 struct buffer_head * cur = cur_bi->bi_bh; 124 struct buffer_head *cur = cur_bi->bi_bh;
134 struct block_head * blkh; 125 struct block_head *blkh;
135 int nr; 126 int nr;
136 struct reiserfs_key * ih; 127 struct reiserfs_key *ih;
137 struct disk_child new_dc[2]; 128 struct disk_child new_dc[2];
138 struct disk_child * dc; 129 struct disk_child *dc;
139 int i; 130 int i;
140 131
141 if (count <= 0) 132 if (count <= 0)
142 return; 133 return;
143 134
144 blkh = B_BLK_HEAD(cur); 135 blkh = B_BLK_HEAD(cur);
145 nr = blkh_nr_item(blkh); 136 nr = blkh_nr_item(blkh);
146 137
147 RFALSE( count > 2, 138 RFALSE(count > 2, "too many children (%d) are to be inserted", count);
148 "too many children (%d) are to be inserted", count); 139 RFALSE(B_FREE_SPACE(cur) < count * (KEY_SIZE + DC_SIZE),
149 RFALSE( B_FREE_SPACE (cur) < count * (KEY_SIZE + DC_SIZE), 140 "no enough free space (%d), needed %d bytes",
150 "no enough free space (%d), needed %d bytes", 141 B_FREE_SPACE(cur), count * (KEY_SIZE + DC_SIZE));
151 B_FREE_SPACE (cur), count * (KEY_SIZE + DC_SIZE)); 142
152 143 /* prepare space for count disk_child */
153 /* prepare space for count disk_child */ 144 dc = B_N_CHILD(cur, to + 1);
154 dc = B_N_CHILD(cur,to+1); 145
155 146 memmove(dc + count, dc, (nr + 1 - (to + 1)) * DC_SIZE);
156 memmove (dc + count, dc, (nr+1-(to+1)) * DC_SIZE); 147
157 148 /* copy to_be_insert disk children */
158 /* copy to_be_insert disk children */ 149 for (i = 0; i < count; i++) {
159 for (i = 0; i < count; i ++) { 150 put_dc_size(&(new_dc[i]),
160 put_dc_size( &(new_dc[i]), MAX_CHILD_SIZE(bh[i]) - B_FREE_SPACE(bh[i])); 151 MAX_CHILD_SIZE(bh[i]) - B_FREE_SPACE(bh[i]));
161 put_dc_block_number( &(new_dc[i]), bh[i]->b_blocknr ); 152 put_dc_block_number(&(new_dc[i]), bh[i]->b_blocknr);
162 } 153 }
163 memcpy (dc, new_dc, DC_SIZE * count); 154 memcpy(dc, new_dc, DC_SIZE * count);
164 155
165 156 /* prepare space for count items */
166 /* prepare space for count items */ 157 ih = B_N_PDELIM_KEY(cur, ((to == -1) ? 0 : to));
167 ih = B_N_PDELIM_KEY (cur, ((to == -1) ? 0 : to)); 158
168 159 memmove(ih + count, ih,
169 memmove (ih + count, ih, (nr - to) * KEY_SIZE + (nr + 1 + count) * DC_SIZE); 160 (nr - to) * KEY_SIZE + (nr + 1 + count) * DC_SIZE);
170 161
171 /* copy item headers (keys) */ 162 /* copy item headers (keys) */
172 memcpy (ih, inserted, KEY_SIZE); 163 memcpy(ih, inserted, KEY_SIZE);
173 if ( count > 1 ) 164 if (count > 1)
174 memcpy (ih + 1, inserted + 1, KEY_SIZE); 165 memcpy(ih + 1, inserted + 1, KEY_SIZE);
175 166
176 /* sizes, item number */ 167 /* sizes, item number */
177 set_blkh_nr_item( blkh, blkh_nr_item(blkh) + count ); 168 set_blkh_nr_item(blkh, blkh_nr_item(blkh) + count);
178 set_blkh_free_space( blkh, 169 set_blkh_free_space(blkh,
179 blkh_free_space(blkh) - count * (DC_SIZE + KEY_SIZE ) ); 170 blkh_free_space(blkh) - count * (DC_SIZE +
180 171 KEY_SIZE));
181 do_balance_mark_internal_dirty (cur_bi->tb, cur,0); 172
182 173 do_balance_mark_internal_dirty(cur_bi->tb, cur, 0);
183 /*&&&&&&&&&&&&&&&&&&&&&&&&*/ 174
184 check_internal (cur); 175 /*&&&&&&&&&&&&&&&&&&&&&&&& */
185 /*&&&&&&&&&&&&&&&&&&&&&&&&*/ 176 check_internal(cur);
186 177 /*&&&&&&&&&&&&&&&&&&&&&&&& */
187 if (cur_bi->bi_parent) { 178
188 struct disk_child *t_dc = B_N_CHILD (cur_bi->bi_parent,cur_bi->bi_position); 179 if (cur_bi->bi_parent) {
189 put_dc_size( t_dc, dc_size(t_dc) + (count * (DC_SIZE + KEY_SIZE))); 180 struct disk_child *t_dc =
190 do_balance_mark_internal_dirty(cur_bi->tb, cur_bi->bi_parent, 0); 181 B_N_CHILD(cur_bi->bi_parent, cur_bi->bi_position);
191 182 put_dc_size(t_dc,
192 /*&&&&&&&&&&&&&&&&&&&&&&&&*/ 183 dc_size(t_dc) + (count * (DC_SIZE + KEY_SIZE)));
193 check_internal (cur_bi->bi_parent); 184 do_balance_mark_internal_dirty(cur_bi->tb, cur_bi->bi_parent,
194 /*&&&&&&&&&&&&&&&&&&&&&&&&*/ 185 0);
195 } 186
187 /*&&&&&&&&&&&&&&&&&&&&&&&& */
188 check_internal(cur_bi->bi_parent);
189 /*&&&&&&&&&&&&&&&&&&&&&&&& */
190 }
196 191
197} 192}
198 193
199
200/* Delete del_num items and node pointers from buffer cur starting from * 194/* Delete del_num items and node pointers from buffer cur starting from *
201 * the first_i'th item and first_p'th pointers respectively. */ 195 * the first_i'th item and first_p'th pointers respectively. */
202static void internal_delete_pointers_items ( 196static void internal_delete_pointers_items(struct buffer_info *cur_bi,
203 struct buffer_info * cur_bi, 197 int first_p,
204 int first_p, 198 int first_i, int del_num)
205 int first_i,
206 int del_num
207 )
208{ 199{
209 struct buffer_head * cur = cur_bi->bi_bh; 200 struct buffer_head *cur = cur_bi->bi_bh;
210 int nr; 201 int nr;
211 struct block_head * blkh; 202 struct block_head *blkh;
212 struct reiserfs_key * key; 203 struct reiserfs_key *key;
213 struct disk_child * dc; 204 struct disk_child *dc;
214 205
215 RFALSE( cur == NULL, "buffer is 0"); 206 RFALSE(cur == NULL, "buffer is 0");
216 RFALSE( del_num < 0, 207 RFALSE(del_num < 0,
217 "negative number of items (%d) can not be deleted", del_num); 208 "negative number of items (%d) can not be deleted", del_num);
218 RFALSE( first_p < 0 || first_p + del_num > B_NR_ITEMS (cur) + 1 || first_i < 0, 209 RFALSE(first_p < 0 || first_p + del_num > B_NR_ITEMS(cur) + 1
219 "first pointer order (%d) < 0 or " 210 || first_i < 0,
220 "no so many pointers (%d), only (%d) or " 211 "first pointer order (%d) < 0 or "
221 "first key order %d < 0", first_p, 212 "no so many pointers (%d), only (%d) or "
222 first_p + del_num, B_NR_ITEMS (cur) + 1, first_i); 213 "first key order %d < 0", first_p, first_p + del_num,
223 if ( del_num == 0 ) 214 B_NR_ITEMS(cur) + 1, first_i);
224 return; 215 if (del_num == 0)
225 216 return;
226 blkh = B_BLK_HEAD(cur); 217
227 nr = blkh_nr_item(blkh); 218 blkh = B_BLK_HEAD(cur);
228 219 nr = blkh_nr_item(blkh);
229 if ( first_p == 0 && del_num == nr + 1 ) { 220
230 RFALSE( first_i != 0, "1st deleted key must have order 0, not %d", first_i); 221 if (first_p == 0 && del_num == nr + 1) {
231 make_empty_node (cur_bi); 222 RFALSE(first_i != 0,
232 return; 223 "1st deleted key must have order 0, not %d", first_i);
233 } 224 make_empty_node(cur_bi);
234 225 return;
235 RFALSE( first_i + del_num > B_NR_ITEMS (cur), 226 }
236 "first_i = %d del_num = %d "
237 "no so many keys (%d) in the node (%b)(%z)",
238 first_i, del_num, first_i + del_num, cur, cur);
239
240
241 /* deleting */
242 dc = B_N_CHILD (cur, first_p);
243
244 memmove (dc, dc + del_num, (nr + 1 - first_p - del_num) * DC_SIZE);
245 key = B_N_PDELIM_KEY (cur, first_i);
246 memmove (key, key + del_num, (nr - first_i - del_num) * KEY_SIZE + (nr + 1 - del_num) * DC_SIZE);
247
248
249 /* sizes, item number */
250 set_blkh_nr_item( blkh, blkh_nr_item(blkh) - del_num );
251 set_blkh_free_space( blkh,
252 blkh_free_space(blkh) + (del_num * (KEY_SIZE + DC_SIZE) ) );
253
254 do_balance_mark_internal_dirty (cur_bi->tb, cur, 0);
255 /*&&&&&&&&&&&&&&&&&&&&&&&*/
256 check_internal (cur);
257 /*&&&&&&&&&&&&&&&&&&&&&&&*/
258
259 if (cur_bi->bi_parent) {
260 struct disk_child *t_dc;
261 t_dc = B_N_CHILD (cur_bi->bi_parent, cur_bi->bi_position);
262 put_dc_size( t_dc, dc_size(t_dc) - (del_num * (KEY_SIZE + DC_SIZE) ) );
263
264 do_balance_mark_internal_dirty (cur_bi->tb, cur_bi->bi_parent,0);
265 /*&&&&&&&&&&&&&&&&&&&&&&&&*/
266 check_internal (cur_bi->bi_parent);
267 /*&&&&&&&&&&&&&&&&&&&&&&&&*/
268 }
269}
270 227
228 RFALSE(first_i + del_num > B_NR_ITEMS(cur),
229 "first_i = %d del_num = %d "
230 "no so many keys (%d) in the node (%b)(%z)",
231 first_i, del_num, first_i + del_num, cur, cur);
232
233 /* deleting */
234 dc = B_N_CHILD(cur, first_p);
235
236 memmove(dc, dc + del_num, (nr + 1 - first_p - del_num) * DC_SIZE);
237 key = B_N_PDELIM_KEY(cur, first_i);
238 memmove(key, key + del_num,
239 (nr - first_i - del_num) * KEY_SIZE + (nr + 1 -
240 del_num) * DC_SIZE);
241
242 /* sizes, item number */
243 set_blkh_nr_item(blkh, blkh_nr_item(blkh) - del_num);
244 set_blkh_free_space(blkh,
245 blkh_free_space(blkh) +
246 (del_num * (KEY_SIZE + DC_SIZE)));
247
248 do_balance_mark_internal_dirty(cur_bi->tb, cur, 0);
249 /*&&&&&&&&&&&&&&&&&&&&&&& */
250 check_internal(cur);
251 /*&&&&&&&&&&&&&&&&&&&&&&& */
252
253 if (cur_bi->bi_parent) {
254 struct disk_child *t_dc;
255 t_dc = B_N_CHILD(cur_bi->bi_parent, cur_bi->bi_position);
256 put_dc_size(t_dc,
257 dc_size(t_dc) - (del_num * (KEY_SIZE + DC_SIZE)));
258
259 do_balance_mark_internal_dirty(cur_bi->tb, cur_bi->bi_parent,
260 0);
261 /*&&&&&&&&&&&&&&&&&&&&&&&& */
262 check_internal(cur_bi->bi_parent);
263 /*&&&&&&&&&&&&&&&&&&&&&&&& */
264 }
265}
271 266
272/* delete n node pointers and items starting from given position */ 267/* delete n node pointers and items starting from given position */
273static void internal_delete_childs (struct buffer_info * cur_bi, 268static void internal_delete_childs(struct buffer_info *cur_bi, int from, int n)
274 int from, int n)
275{ 269{
276 int i_from; 270 int i_from;
277 271
278 i_from = (from == 0) ? from : from - 1; 272 i_from = (from == 0) ? from : from - 1;
279 273
280 /* delete n pointers starting from `from' position in CUR; 274 /* delete n pointers starting from `from' position in CUR;
281 delete n keys starting from 'i_from' position in CUR; 275 delete n keys starting from 'i_from' position in CUR;
282 */ 276 */
283 internal_delete_pointers_items (cur_bi, from, i_from, n); 277 internal_delete_pointers_items(cur_bi, from, i_from, n);
284} 278}
285 279
286
287/* copy cpy_num node pointers and cpy_num - 1 items from buffer src to buffer dest 280/* copy cpy_num node pointers and cpy_num - 1 items from buffer src to buffer dest
288* last_first == FIRST_TO_LAST means, that we copy first items from src to tail of dest 281* last_first == FIRST_TO_LAST means, that we copy first items from src to tail of dest
289 * last_first == LAST_TO_FIRST means, that we copy last items from src to head of dest 282 * last_first == LAST_TO_FIRST means, that we copy last items from src to head of dest
290 */ 283 */
291static void internal_copy_pointers_items ( 284static void internal_copy_pointers_items(struct buffer_info *dest_bi,
292 struct buffer_info * dest_bi, 285 struct buffer_head *src,
293 struct buffer_head * src, 286 int last_first, int cpy_num)
294 int last_first, int cpy_num
295 )
296{ 287{
297 /* ATTENTION! Number of node pointers in DEST is equal to number of items in DEST * 288 /* ATTENTION! Number of node pointers in DEST is equal to number of items in DEST *
298 * as delimiting key have already inserted to buffer dest.*/ 289 * as delimiting key have already inserted to buffer dest.*/
299 struct buffer_head * dest = dest_bi->bi_bh; 290 struct buffer_head *dest = dest_bi->bi_bh;
300 int nr_dest, nr_src; 291 int nr_dest, nr_src;
301 int dest_order, src_order; 292 int dest_order, src_order;
302 struct block_head * blkh; 293 struct block_head *blkh;
303 struct reiserfs_key * key; 294 struct reiserfs_key *key;
304 struct disk_child * dc; 295 struct disk_child *dc;
305 296
306 nr_src = B_NR_ITEMS (src); 297 nr_src = B_NR_ITEMS(src);
307 298
308 RFALSE( dest == NULL || src == NULL, 299 RFALSE(dest == NULL || src == NULL,
309 "src (%p) or dest (%p) buffer is 0", src, dest); 300 "src (%p) or dest (%p) buffer is 0", src, dest);
310 RFALSE( last_first != FIRST_TO_LAST && last_first != LAST_TO_FIRST, 301 RFALSE(last_first != FIRST_TO_LAST && last_first != LAST_TO_FIRST,
311 "invalid last_first parameter (%d)", last_first); 302 "invalid last_first parameter (%d)", last_first);
312 RFALSE( nr_src < cpy_num - 1, 303 RFALSE(nr_src < cpy_num - 1,
313 "no so many items (%d) in src (%d)", cpy_num, nr_src); 304 "no so many items (%d) in src (%d)", cpy_num, nr_src);
314 RFALSE( cpy_num < 0, "cpy_num less than 0 (%d)", cpy_num); 305 RFALSE(cpy_num < 0, "cpy_num less than 0 (%d)", cpy_num);
315 RFALSE( cpy_num - 1 + B_NR_ITEMS(dest) > (int)MAX_NR_KEY(dest), 306 RFALSE(cpy_num - 1 + B_NR_ITEMS(dest) > (int)MAX_NR_KEY(dest),
316 "cpy_num (%d) + item number in dest (%d) can not be > MAX_NR_KEY(%d)", 307 "cpy_num (%d) + item number in dest (%d) can not be > MAX_NR_KEY(%d)",
317 cpy_num, B_NR_ITEMS(dest), MAX_NR_KEY(dest)); 308 cpy_num, B_NR_ITEMS(dest), MAX_NR_KEY(dest));
318 309
319 if ( cpy_num == 0 ) 310 if (cpy_num == 0)
320 return; 311 return;
321 312
322 /* coping */ 313 /* coping */
323 blkh = B_BLK_HEAD(dest); 314 blkh = B_BLK_HEAD(dest);
324 nr_dest = blkh_nr_item(blkh); 315 nr_dest = blkh_nr_item(blkh);
325 316
326 /*dest_order = (last_first == LAST_TO_FIRST) ? 0 : nr_dest;*/ 317 /*dest_order = (last_first == LAST_TO_FIRST) ? 0 : nr_dest; */
327 /*src_order = (last_first == LAST_TO_FIRST) ? (nr_src - cpy_num + 1) : 0;*/ 318 /*src_order = (last_first == LAST_TO_FIRST) ? (nr_src - cpy_num + 1) : 0; */
328 (last_first == LAST_TO_FIRST) ? (dest_order = 0, src_order = nr_src - cpy_num + 1) : 319 (last_first == LAST_TO_FIRST) ? (dest_order = 0, src_order =
329 (dest_order = nr_dest, src_order = 0); 320 nr_src - cpy_num + 1) : (dest_order =
321 nr_dest,
322 src_order =
323 0);
330 324
331 /* prepare space for cpy_num pointers */ 325 /* prepare space for cpy_num pointers */
332 dc = B_N_CHILD (dest, dest_order); 326 dc = B_N_CHILD(dest, dest_order);
333 327
334 memmove (dc + cpy_num, dc, (nr_dest - dest_order) * DC_SIZE); 328 memmove(dc + cpy_num, dc, (nr_dest - dest_order) * DC_SIZE);
335 329
336 /* insert pointers */ 330 /* insert pointers */
337 memcpy (dc, B_N_CHILD (src, src_order), DC_SIZE * cpy_num); 331 memcpy(dc, B_N_CHILD(src, src_order), DC_SIZE * cpy_num);
338 332
339 333 /* prepare space for cpy_num - 1 item headers */
340 /* prepare space for cpy_num - 1 item headers */ 334 key = B_N_PDELIM_KEY(dest, dest_order);
341 key = B_N_PDELIM_KEY(dest, dest_order); 335 memmove(key + cpy_num - 1, key,
342 memmove (key + cpy_num - 1, key, 336 KEY_SIZE * (nr_dest - dest_order) + DC_SIZE * (nr_dest +
343 KEY_SIZE * (nr_dest - dest_order) + DC_SIZE * (nr_dest + cpy_num)); 337 cpy_num));
344 338
345 339 /* insert headers */
346 /* insert headers */ 340 memcpy(key, B_N_PDELIM_KEY(src, src_order), KEY_SIZE * (cpy_num - 1));
347 memcpy (key, B_N_PDELIM_KEY (src, src_order), KEY_SIZE * (cpy_num - 1)); 341
348 342 /* sizes, item number */
349 /* sizes, item number */ 343 set_blkh_nr_item(blkh, blkh_nr_item(blkh) + (cpy_num - 1));
350 set_blkh_nr_item( blkh, blkh_nr_item(blkh) + (cpy_num - 1 ) ); 344 set_blkh_free_space(blkh,
351 set_blkh_free_space( blkh, 345 blkh_free_space(blkh) - (KEY_SIZE * (cpy_num - 1) +
352 blkh_free_space(blkh) - (KEY_SIZE * (cpy_num - 1) + DC_SIZE * cpy_num ) ); 346 DC_SIZE * cpy_num));
353 347
354 do_balance_mark_internal_dirty (dest_bi->tb, dest, 0); 348 do_balance_mark_internal_dirty(dest_bi->tb, dest, 0);
355 349
356 /*&&&&&&&&&&&&&&&&&&&&&&&&*/ 350 /*&&&&&&&&&&&&&&&&&&&&&&&& */
357 check_internal (dest); 351 check_internal(dest);
358 /*&&&&&&&&&&&&&&&&&&&&&&&&*/ 352 /*&&&&&&&&&&&&&&&&&&&&&&&& */
359 353
360 if (dest_bi->bi_parent) { 354 if (dest_bi->bi_parent) {
361 struct disk_child *t_dc; 355 struct disk_child *t_dc;
362 t_dc = B_N_CHILD(dest_bi->bi_parent,dest_bi->bi_position); 356 t_dc = B_N_CHILD(dest_bi->bi_parent, dest_bi->bi_position);
363 put_dc_size( t_dc, dc_size(t_dc) + (KEY_SIZE * (cpy_num - 1) + DC_SIZE * cpy_num) ); 357 put_dc_size(t_dc,
364 358 dc_size(t_dc) + (KEY_SIZE * (cpy_num - 1) +
365 do_balance_mark_internal_dirty (dest_bi->tb, dest_bi->bi_parent,0); 359 DC_SIZE * cpy_num));
366 /*&&&&&&&&&&&&&&&&&&&&&&&&*/ 360
367 check_internal (dest_bi->bi_parent); 361 do_balance_mark_internal_dirty(dest_bi->tb, dest_bi->bi_parent,
368 /*&&&&&&&&&&&&&&&&&&&&&&&&*/ 362 0);
369 } 363 /*&&&&&&&&&&&&&&&&&&&&&&&& */
364 check_internal(dest_bi->bi_parent);
365 /*&&&&&&&&&&&&&&&&&&&&&&&& */
366 }
370 367
371} 368}
372 369
373
374/* Copy cpy_num node pointers and cpy_num - 1 items from buffer src to buffer dest. 370/* Copy cpy_num node pointers and cpy_num - 1 items from buffer src to buffer dest.
375 * Delete cpy_num - del_par items and node pointers from buffer src. 371 * Delete cpy_num - del_par items and node pointers from buffer src.
376 * last_first == FIRST_TO_LAST means, that we copy/delete first items from src. 372 * last_first == FIRST_TO_LAST means, that we copy/delete first items from src.
377 * last_first == LAST_TO_FIRST means, that we copy/delete last items from src. 373 * last_first == LAST_TO_FIRST means, that we copy/delete last items from src.
378 */ 374 */
379static void internal_move_pointers_items (struct buffer_info * dest_bi, 375static void internal_move_pointers_items(struct buffer_info *dest_bi,
380 struct buffer_info * src_bi, 376 struct buffer_info *src_bi,
381 int last_first, int cpy_num, int del_par) 377 int last_first, int cpy_num,
378 int del_par)
382{ 379{
383 int first_pointer; 380 int first_pointer;
384 int first_item; 381 int first_item;
385 382
386 internal_copy_pointers_items (dest_bi, src_bi->bi_bh, last_first, cpy_num); 383 internal_copy_pointers_items(dest_bi, src_bi->bi_bh, last_first,
387 384 cpy_num);
388 if (last_first == FIRST_TO_LAST) { /* shift_left occurs */ 385
389 first_pointer = 0; 386 if (last_first == FIRST_TO_LAST) { /* shift_left occurs */
390 first_item = 0; 387 first_pointer = 0;
391 /* delete cpy_num - del_par pointers and keys starting for pointers with first_pointer, 388 first_item = 0;
392 for key - with first_item */ 389 /* delete cpy_num - del_par pointers and keys starting for pointers with first_pointer,
393 internal_delete_pointers_items (src_bi, first_pointer, first_item, cpy_num - del_par); 390 for key - with first_item */
394 } else { /* shift_right occurs */ 391 internal_delete_pointers_items(src_bi, first_pointer,
395 int i, j; 392 first_item, cpy_num - del_par);
396 393 } else { /* shift_right occurs */
397 i = ( cpy_num - del_par == ( j = B_NR_ITEMS(src_bi->bi_bh)) + 1 ) ? 0 : j - cpy_num + del_par; 394 int i, j;
398 395
399 internal_delete_pointers_items (src_bi, j + 1 - cpy_num + del_par, i, cpy_num - del_par); 396 i = (cpy_num - del_par ==
400 } 397 (j =
398 B_NR_ITEMS(src_bi->bi_bh)) + 1) ? 0 : j - cpy_num +
399 del_par;
400
401 internal_delete_pointers_items(src_bi,
402 j + 1 - cpy_num + del_par, i,
403 cpy_num - del_par);
404 }
401} 405}
402 406
403/* Insert n_src'th key of buffer src before n_dest'th key of buffer dest. */ 407/* Insert n_src'th key of buffer src before n_dest'th key of buffer dest. */
404static void internal_insert_key (struct buffer_info * dest_bi, 408static void internal_insert_key(struct buffer_info *dest_bi, int dest_position_before, /* insert key before key with n_dest number */
405 int dest_position_before, /* insert key before key with n_dest number */ 409 struct buffer_head *src, int src_position)
406 struct buffer_head * src,
407 int src_position)
408{ 410{
409 struct buffer_head * dest = dest_bi->bi_bh; 411 struct buffer_head *dest = dest_bi->bi_bh;
410 int nr; 412 int nr;
411 struct block_head * blkh; 413 struct block_head *blkh;
412 struct reiserfs_key * key; 414 struct reiserfs_key *key;
413 415
414 RFALSE( dest == NULL || src == NULL, 416 RFALSE(dest == NULL || src == NULL,
415 "source(%p) or dest(%p) buffer is 0", src, dest); 417 "source(%p) or dest(%p) buffer is 0", src, dest);
416 RFALSE( dest_position_before < 0 || src_position < 0, 418 RFALSE(dest_position_before < 0 || src_position < 0,
417 "source(%d) or dest(%d) key number less than 0", 419 "source(%d) or dest(%d) key number less than 0",
418 src_position, dest_position_before); 420 src_position, dest_position_before);
419 RFALSE( dest_position_before > B_NR_ITEMS (dest) || 421 RFALSE(dest_position_before > B_NR_ITEMS(dest) ||
420 src_position >= B_NR_ITEMS(src), 422 src_position >= B_NR_ITEMS(src),
421 "invalid position in dest (%d (key number %d)) or in src (%d (key number %d))", 423 "invalid position in dest (%d (key number %d)) or in src (%d (key number %d))",
422 dest_position_before, B_NR_ITEMS (dest), 424 dest_position_before, B_NR_ITEMS(dest),
423 src_position, B_NR_ITEMS(src)); 425 src_position, B_NR_ITEMS(src));
424 RFALSE( B_FREE_SPACE (dest) < KEY_SIZE, 426 RFALSE(B_FREE_SPACE(dest) < KEY_SIZE,
425 "no enough free space (%d) in dest buffer", B_FREE_SPACE (dest)); 427 "no enough free space (%d) in dest buffer", B_FREE_SPACE(dest));
426 428
427 blkh = B_BLK_HEAD(dest); 429 blkh = B_BLK_HEAD(dest);
428 nr = blkh_nr_item(blkh); 430 nr = blkh_nr_item(blkh);
429 431
430 /* prepare space for inserting key */ 432 /* prepare space for inserting key */
431 key = B_N_PDELIM_KEY (dest, dest_position_before); 433 key = B_N_PDELIM_KEY(dest, dest_position_before);
432 memmove (key + 1, key, (nr - dest_position_before) * KEY_SIZE + (nr + 1) * DC_SIZE); 434 memmove(key + 1, key,
433 435 (nr - dest_position_before) * KEY_SIZE + (nr + 1) * DC_SIZE);
434 /* insert key */ 436
435 memcpy (key, B_N_PDELIM_KEY(src, src_position), KEY_SIZE); 437 /* insert key */
436 438 memcpy(key, B_N_PDELIM_KEY(src, src_position), KEY_SIZE);
437 /* Change dirt, free space, item number fields. */ 439
438 440 /* Change dirt, free space, item number fields. */
439 set_blkh_nr_item( blkh, blkh_nr_item(blkh) + 1 ); 441
440 set_blkh_free_space( blkh, blkh_free_space(blkh) - KEY_SIZE ); 442 set_blkh_nr_item(blkh, blkh_nr_item(blkh) + 1);
441 443 set_blkh_free_space(blkh, blkh_free_space(blkh) - KEY_SIZE);
442 do_balance_mark_internal_dirty (dest_bi->tb, dest, 0); 444
443 445 do_balance_mark_internal_dirty(dest_bi->tb, dest, 0);
444 if (dest_bi->bi_parent) { 446
445 struct disk_child *t_dc; 447 if (dest_bi->bi_parent) {
446 t_dc = B_N_CHILD(dest_bi->bi_parent,dest_bi->bi_position); 448 struct disk_child *t_dc;
447 put_dc_size( t_dc, dc_size(t_dc) + KEY_SIZE ); 449 t_dc = B_N_CHILD(dest_bi->bi_parent, dest_bi->bi_position);
448 450 put_dc_size(t_dc, dc_size(t_dc) + KEY_SIZE);
449 do_balance_mark_internal_dirty (dest_bi->tb, dest_bi->bi_parent,0); 451
450 } 452 do_balance_mark_internal_dirty(dest_bi->tb, dest_bi->bi_parent,
453 0);
454 }
451} 455}
452 456
453
454
455/* Insert d_key'th (delimiting) key from buffer cfl to tail of dest. 457/* Insert d_key'th (delimiting) key from buffer cfl to tail of dest.
456 * Copy pointer_amount node pointers and pointer_amount - 1 items from buffer src to buffer dest. 458 * Copy pointer_amount node pointers and pointer_amount - 1 items from buffer src to buffer dest.
457 * Replace d_key'th key in buffer cfl. 459 * Replace d_key'th key in buffer cfl.
458 * Delete pointer_amount items and node pointers from buffer src. 460 * Delete pointer_amount items and node pointers from buffer src.
459 */ 461 */
460/* this can be invoked both to shift from S to L and from R to S */ 462/* this can be invoked both to shift from S to L and from R to S */
461static void internal_shift_left ( 463static void internal_shift_left(int mode, /* INTERNAL_FROM_S_TO_L | INTERNAL_FROM_R_TO_S */
462 int mode, /* INTERNAL_FROM_S_TO_L | INTERNAL_FROM_R_TO_S */ 464 struct tree_balance *tb,
463 struct tree_balance * tb, 465 int h, int pointer_amount)
464 int h,
465 int pointer_amount
466 )
467{ 466{
468 struct buffer_info dest_bi, src_bi; 467 struct buffer_info dest_bi, src_bi;
469 struct buffer_head * cf; 468 struct buffer_head *cf;
470 int d_key_position; 469 int d_key_position;
471 470
472 internal_define_dest_src_infos (mode, tb, h, &dest_bi, &src_bi, &d_key_position, &cf); 471 internal_define_dest_src_infos(mode, tb, h, &dest_bi, &src_bi,
473 472 &d_key_position, &cf);
474 /*printk("pointer_amount = %d\n",pointer_amount);*/ 473
475 474 /*printk("pointer_amount = %d\n",pointer_amount); */
476 if (pointer_amount) { 475
477 /* insert delimiting key from common father of dest and src to node dest into position B_NR_ITEM(dest) */ 476 if (pointer_amount) {
478 internal_insert_key (&dest_bi, B_NR_ITEMS(dest_bi.bi_bh), cf, d_key_position); 477 /* insert delimiting key from common father of dest and src to node dest into position B_NR_ITEM(dest) */
479 478 internal_insert_key(&dest_bi, B_NR_ITEMS(dest_bi.bi_bh), cf,
480 if (B_NR_ITEMS(src_bi.bi_bh) == pointer_amount - 1) { 479 d_key_position);
481 if (src_bi.bi_position/*src->b_item_order*/ == 0) 480
482 replace_key (tb, cf, d_key_position, src_bi.bi_parent/*src->b_parent*/, 0); 481 if (B_NR_ITEMS(src_bi.bi_bh) == pointer_amount - 1) {
483 } else 482 if (src_bi.bi_position /*src->b_item_order */ == 0)
484 replace_key (tb, cf, d_key_position, src_bi.bi_bh, pointer_amount - 1); 483 replace_key(tb, cf, d_key_position,
485 } 484 src_bi.
486 /* last parameter is del_parameter */ 485 bi_parent /*src->b_parent */ , 0);
487 internal_move_pointers_items (&dest_bi, &src_bi, FIRST_TO_LAST, pointer_amount, 0); 486 } else
487 replace_key(tb, cf, d_key_position, src_bi.bi_bh,
488 pointer_amount - 1);
489 }
490 /* last parameter is del_parameter */
491 internal_move_pointers_items(&dest_bi, &src_bi, FIRST_TO_LAST,
492 pointer_amount, 0);
488 493
489} 494}
490 495
@@ -493,67 +498,66 @@ static void internal_shift_left (
493 * Delete n - 1 items and node pointers from buffer S[h]. 498 * Delete n - 1 items and node pointers from buffer S[h].
494 */ 499 */
495/* it always shifts from S[h] to L[h] */ 500/* it always shifts from S[h] to L[h] */
496static void internal_shift1_left ( 501static void internal_shift1_left(struct tree_balance *tb,
497 struct tree_balance * tb, 502 int h, int pointer_amount)
498 int h,
499 int pointer_amount
500 )
501{ 503{
502 struct buffer_info dest_bi, src_bi; 504 struct buffer_info dest_bi, src_bi;
503 struct buffer_head * cf; 505 struct buffer_head *cf;
504 int d_key_position; 506 int d_key_position;
505 507
506 internal_define_dest_src_infos (INTERNAL_SHIFT_FROM_S_TO_L, tb, h, &dest_bi, &src_bi, &d_key_position, &cf); 508 internal_define_dest_src_infos(INTERNAL_SHIFT_FROM_S_TO_L, tb, h,
509 &dest_bi, &src_bi, &d_key_position, &cf);
507 510
508 if ( pointer_amount > 0 ) /* insert lkey[h]-th key from CFL[h] to left neighbor L[h] */ 511 if (pointer_amount > 0) /* insert lkey[h]-th key from CFL[h] to left neighbor L[h] */
509 internal_insert_key (&dest_bi, B_NR_ITEMS(dest_bi.bi_bh), cf, d_key_position); 512 internal_insert_key(&dest_bi, B_NR_ITEMS(dest_bi.bi_bh), cf,
510 /* internal_insert_key (tb->L[h], B_NR_ITEM(tb->L[h]), tb->CFL[h], tb->lkey[h]);*/ 513 d_key_position);
514 /* internal_insert_key (tb->L[h], B_NR_ITEM(tb->L[h]), tb->CFL[h], tb->lkey[h]); */
511 515
512 /* last parameter is del_parameter */ 516 /* last parameter is del_parameter */
513 internal_move_pointers_items (&dest_bi, &src_bi, FIRST_TO_LAST, pointer_amount, 1); 517 internal_move_pointers_items(&dest_bi, &src_bi, FIRST_TO_LAST,
514 /* internal_move_pointers_items (tb->L[h], tb->S[h], FIRST_TO_LAST, pointer_amount, 1);*/ 518 pointer_amount, 1);
519 /* internal_move_pointers_items (tb->L[h], tb->S[h], FIRST_TO_LAST, pointer_amount, 1); */
515} 520}
516 521
517
518/* Insert d_key'th (delimiting) key from buffer cfr to head of dest. 522/* Insert d_key'th (delimiting) key from buffer cfr to head of dest.
519 * Copy n node pointers and n - 1 items from buffer src to buffer dest. 523 * Copy n node pointers and n - 1 items from buffer src to buffer dest.
520 * Replace d_key'th key in buffer cfr. 524 * Replace d_key'th key in buffer cfr.
521 * Delete n items and node pointers from buffer src. 525 * Delete n items and node pointers from buffer src.
522 */ 526 */
523static void internal_shift_right ( 527static void internal_shift_right(int mode, /* INTERNAL_FROM_S_TO_R | INTERNAL_FROM_L_TO_S */
524 int mode, /* INTERNAL_FROM_S_TO_R | INTERNAL_FROM_L_TO_S */ 528 struct tree_balance *tb,
525 struct tree_balance * tb, 529 int h, int pointer_amount)
526 int h,
527 int pointer_amount
528 )
529{ 530{
530 struct buffer_info dest_bi, src_bi; 531 struct buffer_info dest_bi, src_bi;
531 struct buffer_head * cf; 532 struct buffer_head *cf;
532 int d_key_position; 533 int d_key_position;
533 int nr; 534 int nr;
534 535
535 536 internal_define_dest_src_infos(mode, tb, h, &dest_bi, &src_bi,
536 internal_define_dest_src_infos (mode, tb, h, &dest_bi, &src_bi, &d_key_position, &cf); 537 &d_key_position, &cf);
537 538
538 nr = B_NR_ITEMS (src_bi.bi_bh); 539 nr = B_NR_ITEMS(src_bi.bi_bh);
539 540
540 if (pointer_amount > 0) { 541 if (pointer_amount > 0) {
541 /* insert delimiting key from common father of dest and src to dest node into position 0 */ 542 /* insert delimiting key from common father of dest and src to dest node into position 0 */
542 internal_insert_key (&dest_bi, 0, cf, d_key_position); 543 internal_insert_key(&dest_bi, 0, cf, d_key_position);
543 if (nr == pointer_amount - 1) { 544 if (nr == pointer_amount - 1) {
544 RFALSE( src_bi.bi_bh != PATH_H_PBUFFER (tb->tb_path, h)/*tb->S[h]*/ || 545 RFALSE(src_bi.bi_bh != PATH_H_PBUFFER(tb->tb_path, h) /*tb->S[h] */ ||
545 dest_bi.bi_bh != tb->R[h], 546 dest_bi.bi_bh != tb->R[h],
546 "src (%p) must be == tb->S[h](%p) when it disappears", 547 "src (%p) must be == tb->S[h](%p) when it disappears",
547 src_bi.bi_bh, PATH_H_PBUFFER (tb->tb_path, h)); 548 src_bi.bi_bh, PATH_H_PBUFFER(tb->tb_path, h));
548 /* when S[h] disappers replace left delemiting key as well */ 549 /* when S[h] disappers replace left delemiting key as well */
549 if (tb->CFL[h]) 550 if (tb->CFL[h])
550 replace_key (tb, cf, d_key_position, tb->CFL[h], tb->lkey[h]); 551 replace_key(tb, cf, d_key_position, tb->CFL[h],
551 } else 552 tb->lkey[h]);
552 replace_key (tb, cf, d_key_position, src_bi.bi_bh, nr - pointer_amount); 553 } else
553 } 554 replace_key(tb, cf, d_key_position, src_bi.bi_bh,
554 555 nr - pointer_amount);
555 /* last parameter is del_parameter */ 556 }
556 internal_move_pointers_items (&dest_bi, &src_bi, LAST_TO_FIRST, pointer_amount, 0); 557
558 /* last parameter is del_parameter */
559 internal_move_pointers_items(&dest_bi, &src_bi, LAST_TO_FIRST,
560 pointer_amount, 0);
557} 561}
558 562
559/* Insert delimiting key to R[h]. 563/* Insert delimiting key to R[h].
@@ -561,498 +565,526 @@ static void internal_shift_right (
561 * Delete n - 1 items and node pointers from buffer S[h]. 565 * Delete n - 1 items and node pointers from buffer S[h].
562 */ 566 */
563/* it always shift from S[h] to R[h] */ 567/* it always shift from S[h] to R[h] */
564static void internal_shift1_right ( 568static void internal_shift1_right(struct tree_balance *tb,
565 struct tree_balance * tb, 569 int h, int pointer_amount)
566 int h,
567 int pointer_amount
568 )
569{ 570{
570 struct buffer_info dest_bi, src_bi; 571 struct buffer_info dest_bi, src_bi;
571 struct buffer_head * cf; 572 struct buffer_head *cf;
572 int d_key_position; 573 int d_key_position;
573 574
574 internal_define_dest_src_infos (INTERNAL_SHIFT_FROM_S_TO_R, tb, h, &dest_bi, &src_bi, &d_key_position, &cf); 575 internal_define_dest_src_infos(INTERNAL_SHIFT_FROM_S_TO_R, tb, h,
575 576 &dest_bi, &src_bi, &d_key_position, &cf);
576 if (pointer_amount > 0) /* insert rkey from CFR[h] to right neighbor R[h] */ 577
577 internal_insert_key (&dest_bi, 0, cf, d_key_position); 578 if (pointer_amount > 0) /* insert rkey from CFR[h] to right neighbor R[h] */
578 /* internal_insert_key (tb->R[h], 0, tb->CFR[h], tb->rkey[h]);*/ 579 internal_insert_key(&dest_bi, 0, cf, d_key_position);
579 580 /* internal_insert_key (tb->R[h], 0, tb->CFR[h], tb->rkey[h]); */
580 /* last parameter is del_parameter */
581 internal_move_pointers_items (&dest_bi, &src_bi, LAST_TO_FIRST, pointer_amount, 1);
582 /* internal_move_pointers_items (tb->R[h], tb->S[h], LAST_TO_FIRST, pointer_amount, 1);*/
583}
584 581
582 /* last parameter is del_parameter */
583 internal_move_pointers_items(&dest_bi, &src_bi, LAST_TO_FIRST,
584 pointer_amount, 1);
585 /* internal_move_pointers_items (tb->R[h], tb->S[h], LAST_TO_FIRST, pointer_amount, 1); */
586}
585 587
586/* Delete insert_num node pointers together with their left items 588/* Delete insert_num node pointers together with their left items
587 * and balance current node.*/ 589 * and balance current node.*/
588static void balance_internal_when_delete (struct tree_balance * tb, 590static void balance_internal_when_delete(struct tree_balance *tb,
589 int h, int child_pos) 591 int h, int child_pos)
590{ 592{
591 int insert_num; 593 int insert_num;
592 int n; 594 int n;
593 struct buffer_head * tbSh = PATH_H_PBUFFER (tb->tb_path, h); 595 struct buffer_head *tbSh = PATH_H_PBUFFER(tb->tb_path, h);
594 struct buffer_info bi; 596 struct buffer_info bi;
595 597
596 insert_num = tb->insert_size[h] / ((int)(DC_SIZE + KEY_SIZE)); 598 insert_num = tb->insert_size[h] / ((int)(DC_SIZE + KEY_SIZE));
597 599
598 /* delete child-node-pointer(s) together with their left item(s) */ 600 /* delete child-node-pointer(s) together with their left item(s) */
599 bi.tb = tb; 601 bi.tb = tb;
600 bi.bi_bh = tbSh; 602 bi.bi_bh = tbSh;
601 bi.bi_parent = PATH_H_PPARENT (tb->tb_path, h); 603 bi.bi_parent = PATH_H_PPARENT(tb->tb_path, h);
602 bi.bi_position = PATH_H_POSITION (tb->tb_path, h + 1); 604 bi.bi_position = PATH_H_POSITION(tb->tb_path, h + 1);
603 605
604 internal_delete_childs (&bi, child_pos, -insert_num); 606 internal_delete_childs(&bi, child_pos, -insert_num);
605 607
606 RFALSE( tb->blknum[h] > 1, 608 RFALSE(tb->blknum[h] > 1,
607 "tb->blknum[%d]=%d when insert_size < 0", h, tb->blknum[h]); 609 "tb->blknum[%d]=%d when insert_size < 0", h, tb->blknum[h]);
608 610
609 n = B_NR_ITEMS(tbSh); 611 n = B_NR_ITEMS(tbSh);
610 612
611 if ( tb->lnum[h] == 0 && tb->rnum[h] == 0 ) { 613 if (tb->lnum[h] == 0 && tb->rnum[h] == 0) {
612 if ( tb->blknum[h] == 0 ) { 614 if (tb->blknum[h] == 0) {
613 /* node S[h] (root of the tree) is empty now */ 615 /* node S[h] (root of the tree) is empty now */
614 struct buffer_head *new_root; 616 struct buffer_head *new_root;
615 617
616 RFALSE( n || B_FREE_SPACE (tbSh) != MAX_CHILD_SIZE(tbSh) - DC_SIZE, 618 RFALSE(n
617 "buffer must have only 0 keys (%d)", n); 619 || B_FREE_SPACE(tbSh) !=
618 RFALSE( bi.bi_parent, "root has parent (%p)", bi.bi_parent); 620 MAX_CHILD_SIZE(tbSh) - DC_SIZE,
619 621 "buffer must have only 0 keys (%d)", n);
620 /* choose a new root */ 622 RFALSE(bi.bi_parent, "root has parent (%p)",
621 if ( ! tb->L[h-1] || ! B_NR_ITEMS(tb->L[h-1]) ) 623 bi.bi_parent);
622 new_root = tb->R[h-1]; 624
623 else 625 /* choose a new root */
624 new_root = tb->L[h-1]; 626 if (!tb->L[h - 1] || !B_NR_ITEMS(tb->L[h - 1]))
625 /* switch super block's tree root block number to the new value */ 627 new_root = tb->R[h - 1];
626 PUT_SB_ROOT_BLOCK( tb->tb_sb, new_root->b_blocknr ); 628 else
627 //REISERFS_SB(tb->tb_sb)->s_rs->s_tree_height --; 629 new_root = tb->L[h - 1];
628 PUT_SB_TREE_HEIGHT( tb->tb_sb, SB_TREE_HEIGHT(tb->tb_sb) - 1 ); 630 /* switch super block's tree root block number to the new value */
629 631 PUT_SB_ROOT_BLOCK(tb->tb_sb, new_root->b_blocknr);
630 do_balance_mark_sb_dirty (tb, REISERFS_SB(tb->tb_sb)->s_sbh, 1); 632 //REISERFS_SB(tb->tb_sb)->s_rs->s_tree_height --;
631 /*&&&&&&&&&&&&&&&&&&&&&&*/ 633 PUT_SB_TREE_HEIGHT(tb->tb_sb,
632 if (h > 1) 634 SB_TREE_HEIGHT(tb->tb_sb) - 1);
633 /* use check_internal if new root is an internal node */ 635
634 check_internal (new_root); 636 do_balance_mark_sb_dirty(tb,
635 /*&&&&&&&&&&&&&&&&&&&&&&*/ 637 REISERFS_SB(tb->tb_sb)->s_sbh,
636 638 1);
637 /* do what is needed for buffer thrown from tree */ 639 /*&&&&&&&&&&&&&&&&&&&&&& */
638 reiserfs_invalidate_buffer(tb, tbSh); 640 if (h > 1)
639 return; 641 /* use check_internal if new root is an internal node */
642 check_internal(new_root);
643 /*&&&&&&&&&&&&&&&&&&&&&& */
644
645 /* do what is needed for buffer thrown from tree */
646 reiserfs_invalidate_buffer(tb, tbSh);
647 return;
648 }
649 return;
650 }
651
652 if (tb->L[h] && tb->lnum[h] == -B_NR_ITEMS(tb->L[h]) - 1) { /* join S[h] with L[h] */
653
654 RFALSE(tb->rnum[h] != 0,
655 "invalid tb->rnum[%d]==%d when joining S[h] with L[h]",
656 h, tb->rnum[h]);
657
658 internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h, n + 1);
659 reiserfs_invalidate_buffer(tb, tbSh);
660
661 return;
662 }
663
664 if (tb->R[h] && tb->rnum[h] == -B_NR_ITEMS(tb->R[h]) - 1) { /* join S[h] with R[h] */
665 RFALSE(tb->lnum[h] != 0,
666 "invalid tb->lnum[%d]==%d when joining S[h] with R[h]",
667 h, tb->lnum[h]);
668
669 internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h, n + 1);
670
671 reiserfs_invalidate_buffer(tb, tbSh);
672 return;
640 } 673 }
641 return;
642 }
643
644 if ( tb->L[h] && tb->lnum[h] == -B_NR_ITEMS(tb->L[h]) - 1 ) { /* join S[h] with L[h] */
645
646 RFALSE( tb->rnum[h] != 0,
647 "invalid tb->rnum[%d]==%d when joining S[h] with L[h]",
648 h, tb->rnum[h]);
649
650 internal_shift_left (INTERNAL_SHIFT_FROM_S_TO_L, tb, h, n + 1);
651 reiserfs_invalidate_buffer(tb, tbSh);
652
653 return;
654 }
655
656 if ( tb->R[h] && tb->rnum[h] == -B_NR_ITEMS(tb->R[h]) - 1 ) { /* join S[h] with R[h] */
657 RFALSE( tb->lnum[h] != 0,
658 "invalid tb->lnum[%d]==%d when joining S[h] with R[h]",
659 h, tb->lnum[h]);
660
661 internal_shift_right (INTERNAL_SHIFT_FROM_S_TO_R, tb, h, n + 1);
662
663 reiserfs_invalidate_buffer(tb,tbSh);
664 return;
665 }
666
667 if ( tb->lnum[h] < 0 ) { /* borrow from left neighbor L[h] */
668 RFALSE( tb->rnum[h] != 0,
669 "wrong tb->rnum[%d]==%d when borrow from L[h]", h, tb->rnum[h]);
670 /*internal_shift_right (tb, h, tb->L[h], tb->CFL[h], tb->lkey[h], tb->S[h], -tb->lnum[h]);*/
671 internal_shift_right (INTERNAL_SHIFT_FROM_L_TO_S, tb, h, -tb->lnum[h]);
672 return;
673 }
674
675 if ( tb->rnum[h] < 0 ) { /* borrow from right neighbor R[h] */
676 RFALSE( tb->lnum[h] != 0,
677 "invalid tb->lnum[%d]==%d when borrow from R[h]",
678 h, tb->lnum[h]);
679 internal_shift_left (INTERNAL_SHIFT_FROM_R_TO_S, tb, h, -tb->rnum[h]);/*tb->S[h], tb->CFR[h], tb->rkey[h], tb->R[h], -tb->rnum[h]);*/
680 return;
681 }
682
683 if ( tb->lnum[h] > 0 ) { /* split S[h] into two parts and put them into neighbors */
684 RFALSE( tb->rnum[h] == 0 || tb->lnum[h] + tb->rnum[h] != n + 1,
685 "invalid tb->lnum[%d]==%d or tb->rnum[%d]==%d when S[h](item number == %d) is split between them",
686 h, tb->lnum[h], h, tb->rnum[h], n);
687
688 internal_shift_left (INTERNAL_SHIFT_FROM_S_TO_L, tb, h, tb->lnum[h]);/*tb->L[h], tb->CFL[h], tb->lkey[h], tb->S[h], tb->lnum[h]);*/
689 internal_shift_right (INTERNAL_SHIFT_FROM_S_TO_R, tb, h, tb->rnum[h]);
690
691 reiserfs_invalidate_buffer (tb, tbSh);
692
693 return;
694 }
695 reiserfs_panic (tb->tb_sb, "balance_internal_when_delete: unexpected tb->lnum[%d]==%d or tb->rnum[%d]==%d",
696 h, tb->lnum[h], h, tb->rnum[h]);
697}
698 674
675 if (tb->lnum[h] < 0) { /* borrow from left neighbor L[h] */
676 RFALSE(tb->rnum[h] != 0,
677 "wrong tb->rnum[%d]==%d when borrow from L[h]", h,
678 tb->rnum[h]);
679 /*internal_shift_right (tb, h, tb->L[h], tb->CFL[h], tb->lkey[h], tb->S[h], -tb->lnum[h]); */
680 internal_shift_right(INTERNAL_SHIFT_FROM_L_TO_S, tb, h,
681 -tb->lnum[h]);
682 return;
683 }
684
685 if (tb->rnum[h] < 0) { /* borrow from right neighbor R[h] */
686 RFALSE(tb->lnum[h] != 0,
687 "invalid tb->lnum[%d]==%d when borrow from R[h]",
688 h, tb->lnum[h]);
689 internal_shift_left(INTERNAL_SHIFT_FROM_R_TO_S, tb, h, -tb->rnum[h]); /*tb->S[h], tb->CFR[h], tb->rkey[h], tb->R[h], -tb->rnum[h]); */
690 return;
691 }
692
693 if (tb->lnum[h] > 0) { /* split S[h] into two parts and put them into neighbors */
694 RFALSE(tb->rnum[h] == 0 || tb->lnum[h] + tb->rnum[h] != n + 1,
695 "invalid tb->lnum[%d]==%d or tb->rnum[%d]==%d when S[h](item number == %d) is split between them",
696 h, tb->lnum[h], h, tb->rnum[h], n);
697
698 internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h, tb->lnum[h]); /*tb->L[h], tb->CFL[h], tb->lkey[h], tb->S[h], tb->lnum[h]); */
699 internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h,
700 tb->rnum[h]);
701
702 reiserfs_invalidate_buffer(tb, tbSh);
703
704 return;
705 }
706 reiserfs_panic(tb->tb_sb,
707 "balance_internal_when_delete: unexpected tb->lnum[%d]==%d or tb->rnum[%d]==%d",
708 h, tb->lnum[h], h, tb->rnum[h]);
709}
699 710
700/* Replace delimiting key of buffers L[h] and S[h] by the given key.*/ 711/* Replace delimiting key of buffers L[h] and S[h] by the given key.*/
701static void replace_lkey ( 712static void replace_lkey(struct tree_balance *tb, int h, struct item_head *key)
702 struct tree_balance * tb,
703 int h,
704 struct item_head * key
705 )
706{ 713{
707 RFALSE( tb->L[h] == NULL || tb->CFL[h] == NULL, 714 RFALSE(tb->L[h] == NULL || tb->CFL[h] == NULL,
708 "L[h](%p) and CFL[h](%p) must exist in replace_lkey", 715 "L[h](%p) and CFL[h](%p) must exist in replace_lkey",
709 tb->L[h], tb->CFL[h]); 716 tb->L[h], tb->CFL[h]);
710 717
711 if (B_NR_ITEMS(PATH_H_PBUFFER(tb->tb_path, h)) == 0) 718 if (B_NR_ITEMS(PATH_H_PBUFFER(tb->tb_path, h)) == 0)
712 return; 719 return;
713 720
714 memcpy (B_N_PDELIM_KEY(tb->CFL[h],tb->lkey[h]), key, KEY_SIZE); 721 memcpy(B_N_PDELIM_KEY(tb->CFL[h], tb->lkey[h]), key, KEY_SIZE);
715 722
716 do_balance_mark_internal_dirty (tb, tb->CFL[h],0); 723 do_balance_mark_internal_dirty(tb, tb->CFL[h], 0);
717} 724}
718 725
719
720/* Replace delimiting key of buffers S[h] and R[h] by the given key.*/ 726/* Replace delimiting key of buffers S[h] and R[h] by the given key.*/
721static void replace_rkey ( 727static void replace_rkey(struct tree_balance *tb, int h, struct item_head *key)
722 struct tree_balance * tb,
723 int h,
724 struct item_head * key
725 )
726{ 728{
727 RFALSE( tb->R[h] == NULL || tb->CFR[h] == NULL, 729 RFALSE(tb->R[h] == NULL || tb->CFR[h] == NULL,
728 "R[h](%p) and CFR[h](%p) must exist in replace_rkey", 730 "R[h](%p) and CFR[h](%p) must exist in replace_rkey",
729 tb->R[h], tb->CFR[h]); 731 tb->R[h], tb->CFR[h]);
730 RFALSE( B_NR_ITEMS(tb->R[h]) == 0, 732 RFALSE(B_NR_ITEMS(tb->R[h]) == 0,
731 "R[h] can not be empty if it exists (item number=%d)", 733 "R[h] can not be empty if it exists (item number=%d)",
732 B_NR_ITEMS(tb->R[h])); 734 B_NR_ITEMS(tb->R[h]));
733 735
734 memcpy (B_N_PDELIM_KEY(tb->CFR[h],tb->rkey[h]), key, KEY_SIZE); 736 memcpy(B_N_PDELIM_KEY(tb->CFR[h], tb->rkey[h]), key, KEY_SIZE);
735 737
736 do_balance_mark_internal_dirty (tb, tb->CFR[h], 0); 738 do_balance_mark_internal_dirty(tb, tb->CFR[h], 0);
737} 739}
738 740
739 741int balance_internal(struct tree_balance *tb, /* tree_balance structure */
740int balance_internal (struct tree_balance * tb, /* tree_balance structure */ 742 int h, /* level of the tree */
741 int h, /* level of the tree */ 743 int child_pos, struct item_head *insert_key, /* key for insertion on higher level */
742 int child_pos, 744 struct buffer_head **insert_ptr /* node for insertion on higher level */
743 struct item_head * insert_key, /* key for insertion on higher level */
744 struct buffer_head ** insert_ptr /* node for insertion on higher level*/
745 ) 745 )
746 /* if inserting/pasting 746 /* if inserting/pasting
747 { 747 {
748 child_pos is the position of the node-pointer in S[h] that * 748 child_pos is the position of the node-pointer in S[h] that *
749 pointed to S[h-1] before balancing of the h-1 level; * 749 pointed to S[h-1] before balancing of the h-1 level; *
750 this means that new pointers and items must be inserted AFTER * 750 this means that new pointers and items must be inserted AFTER *
751 child_pos 751 child_pos
752 } 752 }
753 else 753 else
754 { 754 {
755 it is the position of the leftmost pointer that must be deleted (together with 755 it is the position of the leftmost pointer that must be deleted (together with
756 its corresponding key to the left of the pointer) 756 its corresponding key to the left of the pointer)
757 as a result of the previous level's balancing. 757 as a result of the previous level's balancing.
758 } 758 }
759*/ 759 */
760{ 760{
761 struct buffer_head * tbSh = PATH_H_PBUFFER (tb->tb_path, h); 761 struct buffer_head *tbSh = PATH_H_PBUFFER(tb->tb_path, h);
762 struct buffer_info bi; 762 struct buffer_info bi;
763 int order; /* we return this: it is 0 if there is no S[h], else it is tb->S[h]->b_item_order */ 763 int order; /* we return this: it is 0 if there is no S[h], else it is tb->S[h]->b_item_order */
764 int insert_num, n, k; 764 int insert_num, n, k;
765 struct buffer_head * S_new; 765 struct buffer_head *S_new;
766 struct item_head new_insert_key; 766 struct item_head new_insert_key;
767 struct buffer_head * new_insert_ptr = NULL; 767 struct buffer_head *new_insert_ptr = NULL;
768 struct item_head * new_insert_key_addr = insert_key; 768 struct item_head *new_insert_key_addr = insert_key;
769 769
770 RFALSE( h < 1, "h (%d) can not be < 1 on internal level", h); 770 RFALSE(h < 1, "h (%d) can not be < 1 on internal level", h);
771 771
772 PROC_INFO_INC( tb -> tb_sb, balance_at[ h ] ); 772 PROC_INFO_INC(tb->tb_sb, balance_at[h]);
773 773
774 order = ( tbSh ) ? PATH_H_POSITION (tb->tb_path, h + 1)/*tb->S[h]->b_item_order*/ : 0; 774 order =
775 775 (tbSh) ? PATH_H_POSITION(tb->tb_path,
776 /* Using insert_size[h] calculate the number insert_num of items 776 h + 1) /*tb->S[h]->b_item_order */ : 0;
777 that must be inserted to or deleted from S[h]. */ 777
778 insert_num = tb->insert_size[h]/((int)(KEY_SIZE + DC_SIZE)); 778 /* Using insert_size[h] calculate the number insert_num of items
779 779 that must be inserted to or deleted from S[h]. */
780 /* Check whether insert_num is proper **/ 780 insert_num = tb->insert_size[h] / ((int)(KEY_SIZE + DC_SIZE));
781 RFALSE( insert_num < -2 || insert_num > 2, 781
782 "incorrect number of items inserted to the internal node (%d)", 782 /* Check whether insert_num is proper * */
783 insert_num); 783 RFALSE(insert_num < -2 || insert_num > 2,
784 RFALSE( h > 1 && (insert_num > 1 || insert_num < -1), 784 "incorrect number of items inserted to the internal node (%d)",
785 "incorrect number of items (%d) inserted to the internal node on a level (h=%d) higher than last internal level", 785 insert_num);
786 insert_num, h); 786 RFALSE(h > 1 && (insert_num > 1 || insert_num < -1),
787 787 "incorrect number of items (%d) inserted to the internal node on a level (h=%d) higher than last internal level",
788 /* Make balance in case insert_num < 0 */ 788 insert_num, h);
789 if ( insert_num < 0 ) { 789
790 balance_internal_when_delete (tb, h, child_pos); 790 /* Make balance in case insert_num < 0 */
791 return order; 791 if (insert_num < 0) {
792 } 792 balance_internal_when_delete(tb, h, child_pos);
793 793 return order;
794 k = 0;
795 if ( tb->lnum[h] > 0 ) {
796 /* shift lnum[h] items from S[h] to the left neighbor L[h].
797 check how many of new items fall into L[h] or CFL[h] after
798 shifting */
799 n = B_NR_ITEMS (tb->L[h]); /* number of items in L[h] */
800 if ( tb->lnum[h] <= child_pos ) {
801 /* new items don't fall into L[h] or CFL[h] */
802 internal_shift_left (INTERNAL_SHIFT_FROM_S_TO_L, tb, h, tb->lnum[h]);
803 /*internal_shift_left (tb->L[h],tb->CFL[h],tb->lkey[h],tbSh,tb->lnum[h]);*/
804 child_pos -= tb->lnum[h];
805 } else if ( tb->lnum[h] > child_pos + insert_num ) {
806 /* all new items fall into L[h] */
807 internal_shift_left (INTERNAL_SHIFT_FROM_S_TO_L, tb, h, tb->lnum[h] - insert_num);
808 /* internal_shift_left(tb->L[h],tb->CFL[h],tb->lkey[h],tbSh,
809 tb->lnum[h]-insert_num);
810 */
811 /* insert insert_num keys and node-pointers into L[h] */
812 bi.tb = tb;
813 bi.bi_bh = tb->L[h];
814 bi.bi_parent = tb->FL[h];
815 bi.bi_position = get_left_neighbor_position (tb, h);
816 internal_insert_childs (&bi,/*tb->L[h], tb->S[h-1]->b_next*/ n + child_pos + 1,
817 insert_num,insert_key,insert_ptr);
818
819 insert_num = 0;
820 } else {
821 struct disk_child * dc;
822
823 /* some items fall into L[h] or CFL[h], but some don't fall */
824 internal_shift1_left(tb,h,child_pos+1);
825 /* calculate number of new items that fall into L[h] */
826 k = tb->lnum[h] - child_pos - 1;
827 bi.tb = tb;
828 bi.bi_bh = tb->L[h];
829 bi.bi_parent = tb->FL[h];
830 bi.bi_position = get_left_neighbor_position (tb, h);
831 internal_insert_childs (&bi,/*tb->L[h], tb->S[h-1]->b_next,*/ n + child_pos + 1,k,
832 insert_key,insert_ptr);
833
834 replace_lkey(tb,h,insert_key + k);
835
836 /* replace the first node-ptr in S[h] by node-ptr to insert_ptr[k] */
837 dc = B_N_CHILD(tbSh, 0);
838 put_dc_size( dc, MAX_CHILD_SIZE(insert_ptr[k]) - B_FREE_SPACE (insert_ptr[k]));
839 put_dc_block_number( dc, insert_ptr[k]->b_blocknr );
840
841 do_balance_mark_internal_dirty (tb, tbSh, 0);
842
843 k++;
844 insert_key += k;
845 insert_ptr += k;
846 insert_num -= k;
847 child_pos = 0;
848 } 794 }
849 } /* tb->lnum[h] > 0 */
850
851 if ( tb->rnum[h] > 0 ) {
852 /*shift rnum[h] items from S[h] to the right neighbor R[h]*/
853 /* check how many of new items fall into R or CFR after shifting */
854 n = B_NR_ITEMS (tbSh); /* number of items in S[h] */
855 if ( n - tb->rnum[h] >= child_pos )
856 /* new items fall into S[h] */
857 /*internal_shift_right(tb,h,tbSh,tb->CFR[h],tb->rkey[h],tb->R[h],tb->rnum[h]);*/
858 internal_shift_right (INTERNAL_SHIFT_FROM_S_TO_R, tb, h, tb->rnum[h]);
859 else
860 if ( n + insert_num - tb->rnum[h] < child_pos )
861 {
862 /* all new items fall into R[h] */
863 /*internal_shift_right(tb,h,tbSh,tb->CFR[h],tb->rkey[h],tb->R[h],
864 tb->rnum[h] - insert_num);*/
865 internal_shift_right (INTERNAL_SHIFT_FROM_S_TO_R, tb, h, tb->rnum[h] - insert_num);
866
867 /* insert insert_num keys and node-pointers into R[h] */
868 bi.tb = tb;
869 bi.bi_bh = tb->R[h];
870 bi.bi_parent = tb->FR[h];
871 bi.bi_position = get_right_neighbor_position (tb, h);
872 internal_insert_childs (&bi, /*tb->R[h],tb->S[h-1]->b_next*/ child_pos - n - insert_num + tb->rnum[h] - 1,
873 insert_num,insert_key,insert_ptr);
874 insert_num = 0;
875 }
876 else
877 {
878 struct disk_child * dc;
879
880 /* one of the items falls into CFR[h] */
881 internal_shift1_right(tb,h,n - child_pos + 1);
882 /* calculate number of new items that fall into R[h] */
883 k = tb->rnum[h] - n + child_pos - 1;
884 bi.tb = tb;
885 bi.bi_bh = tb->R[h];
886 bi.bi_parent = tb->FR[h];
887 bi.bi_position = get_right_neighbor_position (tb, h);
888 internal_insert_childs (&bi, /*tb->R[h], tb->R[h]->b_child,*/ 0, k, insert_key + 1, insert_ptr + 1);
889 795
890 replace_rkey(tb,h,insert_key + insert_num - k - 1); 796 k = 0;
797 if (tb->lnum[h] > 0) {
798 /* shift lnum[h] items from S[h] to the left neighbor L[h].
799 check how many of new items fall into L[h] or CFL[h] after
800 shifting */
801 n = B_NR_ITEMS(tb->L[h]); /* number of items in L[h] */
802 if (tb->lnum[h] <= child_pos) {
803 /* new items don't fall into L[h] or CFL[h] */
804 internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h,
805 tb->lnum[h]);
806 /*internal_shift_left (tb->L[h],tb->CFL[h],tb->lkey[h],tbSh,tb->lnum[h]); */
807 child_pos -= tb->lnum[h];
808 } else if (tb->lnum[h] > child_pos + insert_num) {
809 /* all new items fall into L[h] */
810 internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h,
811 tb->lnum[h] - insert_num);
812 /* internal_shift_left(tb->L[h],tb->CFL[h],tb->lkey[h],tbSh,
813 tb->lnum[h]-insert_num);
814 */
815 /* insert insert_num keys and node-pointers into L[h] */
816 bi.tb = tb;
817 bi.bi_bh = tb->L[h];
818 bi.bi_parent = tb->FL[h];
819 bi.bi_position = get_left_neighbor_position(tb, h);
820 internal_insert_childs(&bi,
821 /*tb->L[h], tb->S[h-1]->b_next */
822 n + child_pos + 1,
823 insert_num, insert_key,
824 insert_ptr);
825
826 insert_num = 0;
827 } else {
828 struct disk_child *dc;
829
830 /* some items fall into L[h] or CFL[h], but some don't fall */
831 internal_shift1_left(tb, h, child_pos + 1);
832 /* calculate number of new items that fall into L[h] */
833 k = tb->lnum[h] - child_pos - 1;
834 bi.tb = tb;
835 bi.bi_bh = tb->L[h];
836 bi.bi_parent = tb->FL[h];
837 bi.bi_position = get_left_neighbor_position(tb, h);
838 internal_insert_childs(&bi,
839 /*tb->L[h], tb->S[h-1]->b_next, */
840 n + child_pos + 1, k,
841 insert_key, insert_ptr);
842
843 replace_lkey(tb, h, insert_key + k);
844
845 /* replace the first node-ptr in S[h] by node-ptr to insert_ptr[k] */
846 dc = B_N_CHILD(tbSh, 0);
847 put_dc_size(dc,
848 MAX_CHILD_SIZE(insert_ptr[k]) -
849 B_FREE_SPACE(insert_ptr[k]));
850 put_dc_block_number(dc, insert_ptr[k]->b_blocknr);
851
852 do_balance_mark_internal_dirty(tb, tbSh, 0);
853
854 k++;
855 insert_key += k;
856 insert_ptr += k;
857 insert_num -= k;
858 child_pos = 0;
859 }
860 }
861 /* tb->lnum[h] > 0 */
862 if (tb->rnum[h] > 0) {
863 /*shift rnum[h] items from S[h] to the right neighbor R[h] */
864 /* check how many of new items fall into R or CFR after shifting */
865 n = B_NR_ITEMS(tbSh); /* number of items in S[h] */
866 if (n - tb->rnum[h] >= child_pos)
867 /* new items fall into S[h] */
868 /*internal_shift_right(tb,h,tbSh,tb->CFR[h],tb->rkey[h],tb->R[h],tb->rnum[h]); */
869 internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h,
870 tb->rnum[h]);
871 else if (n + insert_num - tb->rnum[h] < child_pos) {
872 /* all new items fall into R[h] */
873 /*internal_shift_right(tb,h,tbSh,tb->CFR[h],tb->rkey[h],tb->R[h],
874 tb->rnum[h] - insert_num); */
875 internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h,
876 tb->rnum[h] - insert_num);
877
878 /* insert insert_num keys and node-pointers into R[h] */
879 bi.tb = tb;
880 bi.bi_bh = tb->R[h];
881 bi.bi_parent = tb->FR[h];
882 bi.bi_position = get_right_neighbor_position(tb, h);
883 internal_insert_childs(&bi,
884 /*tb->R[h],tb->S[h-1]->b_next */
885 child_pos - n - insert_num +
886 tb->rnum[h] - 1,
887 insert_num, insert_key,
888 insert_ptr);
889 insert_num = 0;
890 } else {
891 struct disk_child *dc;
892
893 /* one of the items falls into CFR[h] */
894 internal_shift1_right(tb, h, n - child_pos + 1);
895 /* calculate number of new items that fall into R[h] */
896 k = tb->rnum[h] - n + child_pos - 1;
897 bi.tb = tb;
898 bi.bi_bh = tb->R[h];
899 bi.bi_parent = tb->FR[h];
900 bi.bi_position = get_right_neighbor_position(tb, h);
901 internal_insert_childs(&bi,
902 /*tb->R[h], tb->R[h]->b_child, */
903 0, k, insert_key + 1,
904 insert_ptr + 1);
905
906 replace_rkey(tb, h, insert_key + insert_num - k - 1);
907
908 /* replace the first node-ptr in R[h] by node-ptr insert_ptr[insert_num-k-1] */
909 dc = B_N_CHILD(tb->R[h], 0);
910 put_dc_size(dc,
911 MAX_CHILD_SIZE(insert_ptr
912 [insert_num - k - 1]) -
913 B_FREE_SPACE(insert_ptr
914 [insert_num - k - 1]));
915 put_dc_block_number(dc,
916 insert_ptr[insert_num - k -
917 1]->b_blocknr);
918
919 do_balance_mark_internal_dirty(tb, tb->R[h], 0);
920
921 insert_num -= (k + 1);
922 }
923 }
891 924
892 /* replace the first node-ptr in R[h] by node-ptr insert_ptr[insert_num-k-1]*/ 925 /** Fill new node that appears instead of S[h] **/
893 dc = B_N_CHILD(tb->R[h], 0); 926 RFALSE(tb->blknum[h] > 2, "blknum can not be > 2 for internal level");
894 put_dc_size( dc, MAX_CHILD_SIZE(insert_ptr[insert_num-k-1]) - 927 RFALSE(tb->blknum[h] < 0, "blknum can not be < 0");
895 B_FREE_SPACE (insert_ptr[insert_num-k-1]));
896 put_dc_block_number( dc, insert_ptr[insert_num-k-1]->b_blocknr );
897 928
898 do_balance_mark_internal_dirty (tb, tb->R[h],0); 929 if (!tb->blknum[h]) { /* node S[h] is empty now */
930 RFALSE(!tbSh, "S[h] is equal NULL");
899 931
900 insert_num -= (k + 1); 932 /* do what is needed for buffer thrown from tree */
901 } 933 reiserfs_invalidate_buffer(tb, tbSh);
902 } 934 return order;
935 }
903 936
904 /** Fill new node that appears instead of S[h] **/ 937 if (!tbSh) {
905 RFALSE( tb->blknum[h] > 2, "blknum can not be > 2 for internal level"); 938 /* create new root */
906 RFALSE( tb->blknum[h] < 0, "blknum can not be < 0"); 939 struct disk_child *dc;
940 struct buffer_head *tbSh_1 = PATH_H_PBUFFER(tb->tb_path, h - 1);
941 struct block_head *blkh;
907 942
908 if ( ! tb->blknum[h] ) 943 if (tb->blknum[h] != 1)
909 { /* node S[h] is empty now */ 944 reiserfs_panic(NULL,
910 RFALSE( ! tbSh, "S[h] is equal NULL"); 945 "balance_internal: One new node required for creating the new root");
946 /* S[h] = empty buffer from the list FEB. */
947 tbSh = get_FEB(tb);
948 blkh = B_BLK_HEAD(tbSh);
949 set_blkh_level(blkh, h + 1);
911 950
912 /* do what is needed for buffer thrown from tree */ 951 /* Put the unique node-pointer to S[h] that points to S[h-1]. */
913 reiserfs_invalidate_buffer(tb,tbSh); 952
914 return order; 953 dc = B_N_CHILD(tbSh, 0);
915 } 954 put_dc_block_number(dc, tbSh_1->b_blocknr);
916 955 put_dc_size(dc,
917 if ( ! tbSh ) { 956 (MAX_CHILD_SIZE(tbSh_1) - B_FREE_SPACE(tbSh_1)));
918 /* create new root */ 957
919 struct disk_child * dc; 958 tb->insert_size[h] -= DC_SIZE;
920 struct buffer_head * tbSh_1 = PATH_H_PBUFFER (tb->tb_path, h - 1); 959 set_blkh_free_space(blkh, blkh_free_space(blkh) - DC_SIZE);
921 struct block_head * blkh;
922
923
924 if ( tb->blknum[h] != 1 )
925 reiserfs_panic(NULL, "balance_internal: One new node required for creating the new root");
926 /* S[h] = empty buffer from the list FEB. */
927 tbSh = get_FEB (tb);
928 blkh = B_BLK_HEAD(tbSh);
929 set_blkh_level( blkh, h + 1 );
930
931 /* Put the unique node-pointer to S[h] that points to S[h-1]. */
932
933 dc = B_N_CHILD(tbSh, 0);
934 put_dc_block_number( dc, tbSh_1->b_blocknr );
935 put_dc_size( dc, (MAX_CHILD_SIZE (tbSh_1) - B_FREE_SPACE (tbSh_1)));
936
937 tb->insert_size[h] -= DC_SIZE;
938 set_blkh_free_space( blkh, blkh_free_space(blkh) - DC_SIZE );
939
940 do_balance_mark_internal_dirty (tb, tbSh, 0);
941
942 /*&&&&&&&&&&&&&&&&&&&&&&&&*/
943 check_internal (tbSh);
944 /*&&&&&&&&&&&&&&&&&&&&&&&&*/
945
946 /* put new root into path structure */
947 PATH_OFFSET_PBUFFER(tb->tb_path, ILLEGAL_PATH_ELEMENT_OFFSET) = tbSh;
948
949 /* Change root in structure super block. */
950 PUT_SB_ROOT_BLOCK( tb->tb_sb, tbSh->b_blocknr );
951 PUT_SB_TREE_HEIGHT( tb->tb_sb, SB_TREE_HEIGHT(tb->tb_sb) + 1 );
952 do_balance_mark_sb_dirty (tb, REISERFS_SB(tb->tb_sb)->s_sbh, 1);
953 }
954
955 if ( tb->blknum[h] == 2 ) {
956 int snum;
957 struct buffer_info dest_bi, src_bi;
958 960
961 do_balance_mark_internal_dirty(tb, tbSh, 0);
959 962
960 /* S_new = free buffer from list FEB */ 963 /*&&&&&&&&&&&&&&&&&&&&&&&& */
961 S_new = get_FEB(tb); 964 check_internal(tbSh);
962 965 /*&&&&&&&&&&&&&&&&&&&&&&&& */
963 set_blkh_level( B_BLK_HEAD(S_new), h + 1 ); 966
964 967 /* put new root into path structure */
965 dest_bi.tb = tb; 968 PATH_OFFSET_PBUFFER(tb->tb_path, ILLEGAL_PATH_ELEMENT_OFFSET) =
966 dest_bi.bi_bh = S_new; 969 tbSh;
967 dest_bi.bi_parent = NULL; 970
968 dest_bi.bi_position = 0; 971 /* Change root in structure super block. */
969 src_bi.tb = tb; 972 PUT_SB_ROOT_BLOCK(tb->tb_sb, tbSh->b_blocknr);
970 src_bi.bi_bh = tbSh; 973 PUT_SB_TREE_HEIGHT(tb->tb_sb, SB_TREE_HEIGHT(tb->tb_sb) + 1);
971 src_bi.bi_parent = PATH_H_PPARENT (tb->tb_path, h); 974 do_balance_mark_sb_dirty(tb, REISERFS_SB(tb->tb_sb)->s_sbh, 1);
972 src_bi.bi_position = PATH_H_POSITION (tb->tb_path, h + 1);
973
974 n = B_NR_ITEMS (tbSh); /* number of items in S[h] */
975 snum = (insert_num + n + 1)/2;
976 if ( n - snum >= child_pos ) {
977 /* new items don't fall into S_new */
978 /* store the delimiting key for the next level */
979 /* new_insert_key = (n - snum)'th key in S[h] */
980 memcpy (&new_insert_key,B_N_PDELIM_KEY(tbSh,n - snum),
981 KEY_SIZE);
982 /* last parameter is del_par */
983 internal_move_pointers_items (&dest_bi, &src_bi, LAST_TO_FIRST, snum, 0);
984 /* internal_move_pointers_items(S_new, tbSh, LAST_TO_FIRST, snum, 0);*/
985 } else if ( n + insert_num - snum < child_pos ) {
986 /* all new items fall into S_new */
987 /* store the delimiting key for the next level */
988 /* new_insert_key = (n + insert_item - snum)'th key in S[h] */
989 memcpy(&new_insert_key,B_N_PDELIM_KEY(tbSh,n + insert_num - snum),
990 KEY_SIZE);
991 /* last parameter is del_par */
992 internal_move_pointers_items (&dest_bi, &src_bi, LAST_TO_FIRST, snum - insert_num, 0);
993 /* internal_move_pointers_items(S_new,tbSh,1,snum - insert_num,0);*/
994
995 /* insert insert_num keys and node-pointers into S_new */
996 internal_insert_childs (&dest_bi, /*S_new,tb->S[h-1]->b_next,*/child_pos - n - insert_num + snum - 1,
997 insert_num,insert_key,insert_ptr);
998
999 insert_num = 0;
1000 } else {
1001 struct disk_child * dc;
1002
1003 /* some items fall into S_new, but some don't fall */
1004 /* last parameter is del_par */
1005 internal_move_pointers_items (&dest_bi, &src_bi, LAST_TO_FIRST, n - child_pos + 1, 1);
1006 /* internal_move_pointers_items(S_new,tbSh,1,n - child_pos + 1,1);*/
1007 /* calculate number of new items that fall into S_new */
1008 k = snum - n + child_pos - 1;
1009
1010 internal_insert_childs (&dest_bi, /*S_new,*/ 0, k, insert_key + 1, insert_ptr+1);
1011
1012 /* new_insert_key = insert_key[insert_num - k - 1] */
1013 memcpy(&new_insert_key,insert_key + insert_num - k - 1,
1014 KEY_SIZE);
1015 /* replace first node-ptr in S_new by node-ptr to insert_ptr[insert_num-k-1] */
1016
1017 dc = B_N_CHILD(S_new,0);
1018 put_dc_size( dc, (MAX_CHILD_SIZE(insert_ptr[insert_num-k-1]) -
1019 B_FREE_SPACE(insert_ptr[insert_num-k-1])) );
1020 put_dc_block_number( dc, insert_ptr[insert_num-k-1]->b_blocknr );
1021
1022 do_balance_mark_internal_dirty (tb, S_new,0);
1023
1024 insert_num -= (k + 1);
1025 } 975 }
1026 /* new_insert_ptr = node_pointer to S_new */ 976
1027 new_insert_ptr = S_new; 977 if (tb->blknum[h] == 2) {
1028 978 int snum;
1029 RFALSE (!buffer_journaled(S_new) || buffer_journal_dirty(S_new) || 979 struct buffer_info dest_bi, src_bi;
1030 buffer_dirty (S_new), 980
1031 "cm-00001: bad S_new (%b)", S_new); 981 /* S_new = free buffer from list FEB */
1032 982 S_new = get_FEB(tb);
1033 // S_new is released in unfix_nodes 983
1034 } 984 set_blkh_level(B_BLK_HEAD(S_new), h + 1);
1035 985
1036 n = B_NR_ITEMS (tbSh); /*number of items in S[h] */ 986 dest_bi.tb = tb;
1037 987 dest_bi.bi_bh = S_new;
1038 if ( 0 <= child_pos && child_pos <= n && insert_num > 0 ) { 988 dest_bi.bi_parent = NULL;
1039 bi.tb = tb; 989 dest_bi.bi_position = 0;
1040 bi.bi_bh = tbSh; 990 src_bi.tb = tb;
1041 bi.bi_parent = PATH_H_PPARENT (tb->tb_path, h); 991 src_bi.bi_bh = tbSh;
1042 bi.bi_position = PATH_H_POSITION (tb->tb_path, h + 1); 992 src_bi.bi_parent = PATH_H_PPARENT(tb->tb_path, h);
1043 internal_insert_childs ( 993 src_bi.bi_position = PATH_H_POSITION(tb->tb_path, h + 1);
1044 &bi,/*tbSh,*/ 994
1045 /* ( tb->S[h-1]->b_parent == tb->S[h] ) ? tb->S[h-1]->b_next : tb->S[h]->b_child->b_next,*/ 995 n = B_NR_ITEMS(tbSh); /* number of items in S[h] */
1046 child_pos,insert_num,insert_key,insert_ptr 996 snum = (insert_num + n + 1) / 2;
1047 ); 997 if (n - snum >= child_pos) {
998 /* new items don't fall into S_new */
999 /* store the delimiting key for the next level */
1000 /* new_insert_key = (n - snum)'th key in S[h] */
1001 memcpy(&new_insert_key, B_N_PDELIM_KEY(tbSh, n - snum),
1002 KEY_SIZE);
1003 /* last parameter is del_par */
1004 internal_move_pointers_items(&dest_bi, &src_bi,
1005 LAST_TO_FIRST, snum, 0);
1006 /* internal_move_pointers_items(S_new, tbSh, LAST_TO_FIRST, snum, 0); */
1007 } else if (n + insert_num - snum < child_pos) {
1008 /* all new items fall into S_new */
1009 /* store the delimiting key for the next level */
1010 /* new_insert_key = (n + insert_item - snum)'th key in S[h] */
1011 memcpy(&new_insert_key,
1012 B_N_PDELIM_KEY(tbSh, n + insert_num - snum),
1013 KEY_SIZE);
1014 /* last parameter is del_par */
1015 internal_move_pointers_items(&dest_bi, &src_bi,
1016 LAST_TO_FIRST,
1017 snum - insert_num, 0);
1018 /* internal_move_pointers_items(S_new,tbSh,1,snum - insert_num,0); */
1019
1020 /* insert insert_num keys and node-pointers into S_new */
1021 internal_insert_childs(&dest_bi,
1022 /*S_new,tb->S[h-1]->b_next, */
1023 child_pos - n - insert_num +
1024 snum - 1,
1025 insert_num, insert_key,
1026 insert_ptr);
1027
1028 insert_num = 0;
1029 } else {
1030 struct disk_child *dc;
1031
1032 /* some items fall into S_new, but some don't fall */
1033 /* last parameter is del_par */
1034 internal_move_pointers_items(&dest_bi, &src_bi,
1035 LAST_TO_FIRST,
1036 n - child_pos + 1, 1);
1037 /* internal_move_pointers_items(S_new,tbSh,1,n - child_pos + 1,1); */
1038 /* calculate number of new items that fall into S_new */
1039 k = snum - n + child_pos - 1;
1040
1041 internal_insert_childs(&dest_bi, /*S_new, */ 0, k,
1042 insert_key + 1, insert_ptr + 1);
1043
1044 /* new_insert_key = insert_key[insert_num - k - 1] */
1045 memcpy(&new_insert_key, insert_key + insert_num - k - 1,
1046 KEY_SIZE);
1047 /* replace first node-ptr in S_new by node-ptr to insert_ptr[insert_num-k-1] */
1048
1049 dc = B_N_CHILD(S_new, 0);
1050 put_dc_size(dc,
1051 (MAX_CHILD_SIZE
1052 (insert_ptr[insert_num - k - 1]) -
1053 B_FREE_SPACE(insert_ptr
1054 [insert_num - k - 1])));
1055 put_dc_block_number(dc,
1056 insert_ptr[insert_num - k -
1057 1]->b_blocknr);
1058
1059 do_balance_mark_internal_dirty(tb, S_new, 0);
1060
1061 insert_num -= (k + 1);
1062 }
1063 /* new_insert_ptr = node_pointer to S_new */
1064 new_insert_ptr = S_new;
1065
1066 RFALSE(!buffer_journaled(S_new) || buffer_journal_dirty(S_new)
1067 || buffer_dirty(S_new), "cm-00001: bad S_new (%b)",
1068 S_new);
1069
1070 // S_new is released in unfix_nodes
1048 } 1071 }
1049 1072
1073 n = B_NR_ITEMS(tbSh); /*number of items in S[h] */
1050 1074
1051 memcpy (new_insert_key_addr,&new_insert_key,KEY_SIZE); 1075 if (0 <= child_pos && child_pos <= n && insert_num > 0) {
1076 bi.tb = tb;
1077 bi.bi_bh = tbSh;
1078 bi.bi_parent = PATH_H_PPARENT(tb->tb_path, h);
1079 bi.bi_position = PATH_H_POSITION(tb->tb_path, h + 1);
1080 internal_insert_childs(&bi, /*tbSh, */
1081 /* ( tb->S[h-1]->b_parent == tb->S[h] ) ? tb->S[h-1]->b_next : tb->S[h]->b_child->b_next, */
1082 child_pos, insert_num, insert_key,
1083 insert_ptr);
1084 }
1085
1086 memcpy(new_insert_key_addr, &new_insert_key, KEY_SIZE);
1052 insert_ptr[0] = new_insert_ptr; 1087 insert_ptr[0] = new_insert_ptr;
1053 1088
1054 return order; 1089 return order;
1055 } 1090}
1056
1057
1058
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 2711dff1b7b4..ff291c973a56 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -18,107 +18,109 @@
18#include <linux/writeback.h> 18#include <linux/writeback.h>
19#include <linux/quotaops.h> 19#include <linux/quotaops.h>
20 20
21extern int reiserfs_default_io_size; /* default io size devuned in super.c */ 21extern int reiserfs_default_io_size; /* default io size devuned in super.c */
22 22
23static int reiserfs_commit_write(struct file *f, struct page *page, 23static int reiserfs_commit_write(struct file *f, struct page *page,
24 unsigned from, unsigned to); 24 unsigned from, unsigned to);
25static int reiserfs_prepare_write(struct file *f, struct page *page, 25static int reiserfs_prepare_write(struct file *f, struct page *page,
26 unsigned from, unsigned to); 26 unsigned from, unsigned to);
27 27
28void reiserfs_delete_inode (struct inode * inode) 28void reiserfs_delete_inode(struct inode *inode)
29{ 29{
30 /* We need blocks for transaction + (user+group) quota update (possibly delete) */ 30 /* We need blocks for transaction + (user+group) quota update (possibly delete) */
31 int jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 * REISERFS_QUOTA_INIT_BLOCKS; 31 int jbegin_count =
32 struct reiserfs_transaction_handle th ; 32 JOURNAL_PER_BALANCE_CNT * 2 +
33 33 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb);
34 reiserfs_write_lock(inode->i_sb); 34 struct reiserfs_transaction_handle th;
35 35
36 /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */ 36 reiserfs_write_lock(inode->i_sb);
37 if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */
38 down (&inode->i_sem);
39 37
40 reiserfs_delete_xattrs (inode); 38 /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */
39 if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */
40 down(&inode->i_sem);
41 41
42 if (journal_begin(&th, inode->i_sb, jbegin_count)) { 42 reiserfs_delete_xattrs(inode);
43 up (&inode->i_sem);
44 goto out;
45 }
46 reiserfs_update_inode_transaction(inode) ;
47 43
48 if (reiserfs_delete_object (&th, inode)) { 44 if (journal_begin(&th, inode->i_sb, jbegin_count)) {
49 up (&inode->i_sem); 45 up(&inode->i_sem);
50 goto out; 46 goto out;
51 } 47 }
48 reiserfs_update_inode_transaction(inode);
52 49
53 /* Do quota update inside a transaction for journaled quotas. We must do that 50 if (reiserfs_delete_object(&th, inode)) {
54 * after delete_object so that quota updates go into the same transaction as 51 up(&inode->i_sem);
55 * stat data deletion */ 52 goto out;
56 DQUOT_FREE_INODE(inode); 53 }
57 54
58 if (journal_end(&th, inode->i_sb, jbegin_count)) { 55 /* Do quota update inside a transaction for journaled quotas. We must do that
59 up (&inode->i_sem); 56 * after delete_object so that quota updates go into the same transaction as
60 goto out; 57 * stat data deletion */
61 } 58 DQUOT_FREE_INODE(inode);
59
60 if (journal_end(&th, inode->i_sb, jbegin_count)) {
61 up(&inode->i_sem);
62 goto out;
63 }
62 64
63 up (&inode->i_sem); 65 up(&inode->i_sem);
64 66
65 /* all items of file are deleted, so we can remove "save" link */ 67 /* all items of file are deleted, so we can remove "save" link */
66 remove_save_link (inode, 0/* not truncate */); /* we can't do anything 68 remove_save_link(inode, 0 /* not truncate */ ); /* we can't do anything
67 * about an error here */ 69 * about an error here */
68 } else { 70 } else {
69 /* no object items are in the tree */ 71 /* no object items are in the tree */
70 ; 72 ;
71 } 73 }
72out: 74 out:
73 clear_inode (inode); /* note this must go after the journal_end to prevent deadlock */ 75 clear_inode(inode); /* note this must go after the journal_end to prevent deadlock */
74 inode->i_blocks = 0; 76 inode->i_blocks = 0;
75 reiserfs_write_unlock(inode->i_sb); 77 reiserfs_write_unlock(inode->i_sb);
76} 78}
77 79
78static void _make_cpu_key (struct cpu_key * key, int version, __u32 dirid, __u32 objectid, 80static void _make_cpu_key(struct cpu_key *key, int version, __u32 dirid,
79 loff_t offset, int type, int length ) 81 __u32 objectid, loff_t offset, int type, int length)
80{ 82{
81 key->version = version; 83 key->version = version;
82 84
83 key->on_disk_key.k_dir_id = dirid; 85 key->on_disk_key.k_dir_id = dirid;
84 key->on_disk_key.k_objectid = objectid; 86 key->on_disk_key.k_objectid = objectid;
85 set_cpu_key_k_offset (key, offset); 87 set_cpu_key_k_offset(key, offset);
86 set_cpu_key_k_type (key, type); 88 set_cpu_key_k_type(key, type);
87 key->key_length = length; 89 key->key_length = length;
88} 90}
89 91
90
91/* take base of inode_key (it comes from inode always) (dirid, objectid) and version from an inode, set 92/* take base of inode_key (it comes from inode always) (dirid, objectid) and version from an inode, set
92 offset and type of key */ 93 offset and type of key */
93void make_cpu_key (struct cpu_key * key, struct inode * inode, loff_t offset, 94void make_cpu_key(struct cpu_key *key, struct inode *inode, loff_t offset,
94 int type, int length ) 95 int type, int length)
95{ 96{
96 _make_cpu_key (key, get_inode_item_key_version (inode), le32_to_cpu (INODE_PKEY (inode)->k_dir_id), 97 _make_cpu_key(key, get_inode_item_key_version(inode),
97 le32_to_cpu (INODE_PKEY (inode)->k_objectid), 98 le32_to_cpu(INODE_PKEY(inode)->k_dir_id),
98 offset, type, length); 99 le32_to_cpu(INODE_PKEY(inode)->k_objectid), offset, type,
100 length);
99} 101}
100 102
101
102// 103//
103// when key is 0, do not set version and short key 104// when key is 0, do not set version and short key
104// 105//
105inline void make_le_item_head (struct item_head * ih, const struct cpu_key * key, 106inline void make_le_item_head(struct item_head *ih, const struct cpu_key *key,
106 int version, 107 int version,
107 loff_t offset, int type, int length, 108 loff_t offset, int type, int length,
108 int entry_count/*or ih_free_space*/) 109 int entry_count /*or ih_free_space */ )
109{ 110{
110 if (key) { 111 if (key) {
111 ih->ih_key.k_dir_id = cpu_to_le32 (key->on_disk_key.k_dir_id); 112 ih->ih_key.k_dir_id = cpu_to_le32(key->on_disk_key.k_dir_id);
112 ih->ih_key.k_objectid = cpu_to_le32 (key->on_disk_key.k_objectid); 113 ih->ih_key.k_objectid =
113 } 114 cpu_to_le32(key->on_disk_key.k_objectid);
114 put_ih_version( ih, version ); 115 }
115 set_le_ih_k_offset (ih, offset); 116 put_ih_version(ih, version);
116 set_le_ih_k_type (ih, type); 117 set_le_ih_k_offset(ih, offset);
117 put_ih_item_len( ih, length ); 118 set_le_ih_k_type(ih, type);
118 /* set_ih_free_space (ih, 0);*/ 119 put_ih_item_len(ih, length);
119 // for directory items it is entry count, for directs and stat 120 /* set_ih_free_space (ih, 0); */
120 // datas - 0xffff, for indirects - 0 121 // for directory items it is entry count, for directs and stat
121 put_ih_entry_count( ih, entry_count ); 122 // datas - 0xffff, for indirects - 0
123 put_ih_entry_count(ih, entry_count);
122} 124}
123 125
124// 126//
@@ -153,84 +155,84 @@ inline void make_le_item_head (struct item_head * ih, const struct cpu_key * key
153** to be unmapped, so that block_prepare_write will correctly call 155** to be unmapped, so that block_prepare_write will correctly call
154** reiserfs_get_block to convert the tail into an unformatted node 156** reiserfs_get_block to convert the tail into an unformatted node
155*/ 157*/
156static inline void fix_tail_page_for_writing(struct page *page) { 158static inline void fix_tail_page_for_writing(struct page *page)
157 struct buffer_head *head, *next, *bh ; 159{
158 160 struct buffer_head *head, *next, *bh;
159 if (page && page_has_buffers(page)) { 161
160 head = page_buffers(page) ; 162 if (page && page_has_buffers(page)) {
161 bh = head ; 163 head = page_buffers(page);
162 do { 164 bh = head;
163 next = bh->b_this_page ; 165 do {
164 if (buffer_mapped(bh) && bh->b_blocknr == 0) { 166 next = bh->b_this_page;
165 reiserfs_unmap_buffer(bh) ; 167 if (buffer_mapped(bh) && bh->b_blocknr == 0) {
166 } 168 reiserfs_unmap_buffer(bh);
167 bh = next ; 169 }
168 } while (bh != head) ; 170 bh = next;
169 } 171 } while (bh != head);
172 }
170} 173}
171 174
172/* reiserfs_get_block does not need to allocate a block only if it has been 175/* reiserfs_get_block does not need to allocate a block only if it has been
173 done already or non-hole position has been found in the indirect item */ 176 done already or non-hole position has been found in the indirect item */
174static inline int allocation_needed (int retval, b_blocknr_t allocated, 177static inline int allocation_needed(int retval, b_blocknr_t allocated,
175 struct item_head * ih, 178 struct item_head *ih,
176 __le32 * item, int pos_in_item) 179 __le32 * item, int pos_in_item)
177{ 180{
178 if (allocated) 181 if (allocated)
179 return 0; 182 return 0;
180 if (retval == POSITION_FOUND && is_indirect_le_ih (ih) && 183 if (retval == POSITION_FOUND && is_indirect_le_ih(ih) &&
181 get_block_num(item, pos_in_item)) 184 get_block_num(item, pos_in_item))
182 return 0; 185 return 0;
183 return 1; 186 return 1;
184} 187}
185 188
186static inline int indirect_item_found (int retval, struct item_head * ih) 189static inline int indirect_item_found(int retval, struct item_head *ih)
187{ 190{
188 return (retval == POSITION_FOUND) && is_indirect_le_ih (ih); 191 return (retval == POSITION_FOUND) && is_indirect_le_ih(ih);
189} 192}
190 193
191 194static inline void set_block_dev_mapped(struct buffer_head *bh,
192static inline void set_block_dev_mapped (struct buffer_head * bh, 195 b_blocknr_t block, struct inode *inode)
193 b_blocknr_t block, struct inode * inode)
194{ 196{
195 map_bh(bh, inode->i_sb, block); 197 map_bh(bh, inode->i_sb, block);
196} 198}
197 199
198
199// 200//
200// files which were created in the earlier version can not be longer, 201// files which were created in the earlier version can not be longer,
201// than 2 gb 202// than 2 gb
202// 203//
203static int file_capable (struct inode * inode, long block) 204static int file_capable(struct inode *inode, long block)
204{ 205{
205 if (get_inode_item_key_version (inode) != KEY_FORMAT_3_5 || // it is new file. 206 if (get_inode_item_key_version(inode) != KEY_FORMAT_3_5 || // it is new file.
206 block < (1 << (31 - inode->i_sb->s_blocksize_bits))) // old file, but 'block' is inside of 2gb 207 block < (1 << (31 - inode->i_sb->s_blocksize_bits))) // old file, but 'block' is inside of 2gb
207 return 1; 208 return 1;
208 209
209 return 0; 210 return 0;
210} 211}
211 212
212/*static*/ int restart_transaction(struct reiserfs_transaction_handle *th, 213/*static*/ int restart_transaction(struct reiserfs_transaction_handle *th,
213 struct inode *inode, struct path *path) { 214 struct inode *inode, struct path *path)
214 struct super_block *s = th->t_super ; 215{
215 int len = th->t_blocks_allocated ; 216 struct super_block *s = th->t_super;
216 int err; 217 int len = th->t_blocks_allocated;
217 218 int err;
218 BUG_ON (!th->t_trans_id); 219
219 BUG_ON (!th->t_refcount); 220 BUG_ON(!th->t_trans_id);
220 221 BUG_ON(!th->t_refcount);
221 /* we cannot restart while nested */ 222
222 if (th->t_refcount > 1) { 223 /* we cannot restart while nested */
223 return 0 ; 224 if (th->t_refcount > 1) {
224 } 225 return 0;
225 pathrelse(path) ; 226 }
226 reiserfs_update_sd(th, inode) ; 227 pathrelse(path);
227 err = journal_end(th, s, len) ; 228 reiserfs_update_sd(th, inode);
228 if (!err) { 229 err = journal_end(th, s, len);
229 err = journal_begin(th, s, JOURNAL_PER_BALANCE_CNT * 6) ; 230 if (!err) {
230 if (!err) 231 err = journal_begin(th, s, JOURNAL_PER_BALANCE_CNT * 6);
231 reiserfs_update_inode_transaction(inode) ; 232 if (!err)
232 } 233 reiserfs_update_inode_transaction(inode);
233 return err; 234 }
235 return err;
234} 236}
235 237
236// it is called by get_block when create == 0. Returns block number 238// it is called by get_block when create == 0. Returns block number
@@ -241,181 +243,192 @@ static int file_capable (struct inode * inode, long block)
241// Please improve the english/clarity in the comment above, as it is 243// Please improve the english/clarity in the comment above, as it is
242// hard to understand. 244// hard to understand.
243 245
244static int _get_block_create_0 (struct inode * inode, long block, 246static int _get_block_create_0(struct inode *inode, long block,
245 struct buffer_head * bh_result, 247 struct buffer_head *bh_result, int args)
246 int args)
247{ 248{
248 INITIALIZE_PATH (path); 249 INITIALIZE_PATH(path);
249 struct cpu_key key; 250 struct cpu_key key;
250 struct buffer_head * bh; 251 struct buffer_head *bh;
251 struct item_head * ih, tmp_ih; 252 struct item_head *ih, tmp_ih;
252 int fs_gen ; 253 int fs_gen;
253 int blocknr; 254 int blocknr;
254 char * p = NULL; 255 char *p = NULL;
255 int chars; 256 int chars;
256 int ret ; 257 int ret;
257 int done = 0 ; 258 int result;
258 unsigned long offset ; 259 int done = 0;
259 260 unsigned long offset;
260 // prepare the key to look for the 'block'-th block of file 261
261 make_cpu_key (&key, inode, 262 // prepare the key to look for the 'block'-th block of file
262 (loff_t)block * inode->i_sb->s_blocksize + 1, TYPE_ANY, 3); 263 make_cpu_key(&key, inode,
263 264 (loff_t) block * inode->i_sb->s_blocksize + 1, TYPE_ANY,
264research: 265 3);
265 if (search_for_position_by_key (inode->i_sb, &key, &path) != POSITION_FOUND) { 266
266 pathrelse (&path); 267 research:
267 if (p) 268 result = search_for_position_by_key(inode->i_sb, &key, &path);
268 kunmap(bh_result->b_page) ; 269 if (result != POSITION_FOUND) {
269 // We do not return -ENOENT if there is a hole but page is uptodate, because it means 270 pathrelse(&path);
270 // That there is some MMAPED data associated with it that is yet to be written to disk. 271 if (p)
271 if ((args & GET_BLOCK_NO_HOLE) && !PageUptodate(bh_result->b_page) ) { 272 kunmap(bh_result->b_page);
272 return -ENOENT ; 273 if (result == IO_ERROR)
273 } 274 return -EIO;
274 return 0 ; 275 // We do not return -ENOENT if there is a hole but page is uptodate, because it means
275 } 276 // That there is some MMAPED data associated with it that is yet to be written to disk.
276 277 if ((args & GET_BLOCK_NO_HOLE)
277 // 278 && !PageUptodate(bh_result->b_page)) {
278 bh = get_last_bh (&path); 279 return -ENOENT;
279 ih = get_ih (&path); 280 }
280 if (is_indirect_le_ih (ih)) { 281 return 0;
281 __le32 * ind_item = (__le32 *)B_I_PITEM (bh, ih); 282 }
282 283 //
283 /* FIXME: here we could cache indirect item or part of it in 284 bh = get_last_bh(&path);
284 the inode to avoid search_by_key in case of subsequent 285 ih = get_ih(&path);
285 access to file */ 286 if (is_indirect_le_ih(ih)) {
286 blocknr = get_block_num(ind_item, path.pos_in_item) ; 287 __le32 *ind_item = (__le32 *) B_I_PITEM(bh, ih);
287 ret = 0 ; 288
288 if (blocknr) { 289 /* FIXME: here we could cache indirect item or part of it in
289 map_bh(bh_result, inode->i_sb, blocknr); 290 the inode to avoid search_by_key in case of subsequent
290 if (path.pos_in_item == ((ih_item_len(ih) / UNFM_P_SIZE) - 1)) { 291 access to file */
291 set_buffer_boundary(bh_result); 292 blocknr = get_block_num(ind_item, path.pos_in_item);
292 } 293 ret = 0;
293 } else 294 if (blocknr) {
294 // We do not return -ENOENT if there is a hole but page is uptodate, because it means 295 map_bh(bh_result, inode->i_sb, blocknr);
295 // That there is some MMAPED data associated with it that is yet to be written to disk. 296 if (path.pos_in_item ==
296 if ((args & GET_BLOCK_NO_HOLE) && !PageUptodate(bh_result->b_page) ) { 297 ((ih_item_len(ih) / UNFM_P_SIZE) - 1)) {
297 ret = -ENOENT ; 298 set_buffer_boundary(bh_result);
298 } 299 }
299 300 } else
300 pathrelse (&path); 301 // We do not return -ENOENT if there is a hole but page is uptodate, because it means
301 if (p) 302 // That there is some MMAPED data associated with it that is yet to be written to disk.
302 kunmap(bh_result->b_page) ; 303 if ((args & GET_BLOCK_NO_HOLE)
303 return ret ; 304 && !PageUptodate(bh_result->b_page)) {
304 } 305 ret = -ENOENT;
305 306 }
306 // requested data are in direct item(s) 307
307 if (!(args & GET_BLOCK_READ_DIRECT)) { 308 pathrelse(&path);
308 // we are called by bmap. FIXME: we can not map block of file 309 if (p)
309 // when it is stored in direct item(s) 310 kunmap(bh_result->b_page);
310 pathrelse (&path); 311 return ret;
311 if (p) 312 }
312 kunmap(bh_result->b_page) ; 313 // requested data are in direct item(s)
313 return -ENOENT; 314 if (!(args & GET_BLOCK_READ_DIRECT)) {
314 } 315 // we are called by bmap. FIXME: we can not map block of file
315 316 // when it is stored in direct item(s)
316 /* if we've got a direct item, and the buffer or page was uptodate, 317 pathrelse(&path);
317 ** we don't want to pull data off disk again. skip to the 318 if (p)
318 ** end, where we map the buffer and return 319 kunmap(bh_result->b_page);
319 */ 320 return -ENOENT;
320 if (buffer_uptodate(bh_result)) { 321 }
321 goto finished ; 322
322 } else 323 /* if we've got a direct item, and the buffer or page was uptodate,
323 /* 324 ** we don't want to pull data off disk again. skip to the
324 ** grab_tail_page can trigger calls to reiserfs_get_block on up to date 325 ** end, where we map the buffer and return
325 ** pages without any buffers. If the page is up to date, we don't want 326 */
326 ** read old data off disk. Set the up to date bit on the buffer instead 327 if (buffer_uptodate(bh_result)) {
327 ** and jump to the end 328 goto finished;
328 */ 329 } else
329 if (!bh_result->b_page || PageUptodate(bh_result->b_page)) { 330 /*
331 ** grab_tail_page can trigger calls to reiserfs_get_block on up to date
332 ** pages without any buffers. If the page is up to date, we don't want
333 ** read old data off disk. Set the up to date bit on the buffer instead
334 ** and jump to the end
335 */
336 if (!bh_result->b_page || PageUptodate(bh_result->b_page)) {
330 set_buffer_uptodate(bh_result); 337 set_buffer_uptodate(bh_result);
331 goto finished ; 338 goto finished;
332 } 339 }
333 340 // read file tail into part of page
334 // read file tail into part of page 341 offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1);
335 offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1) ; 342 fs_gen = get_generation(inode->i_sb);
336 fs_gen = get_generation(inode->i_sb) ; 343 copy_item_head(&tmp_ih, ih);
337 copy_item_head (&tmp_ih, ih); 344
338 345 /* we only want to kmap if we are reading the tail into the page.
339 /* we only want to kmap if we are reading the tail into the page. 346 ** this is not the common case, so we don't kmap until we are
340 ** this is not the common case, so we don't kmap until we are 347 ** sure we need to. But, this means the item might move if
341 ** sure we need to. But, this means the item might move if 348 ** kmap schedules
342 ** kmap schedules 349 */
343 */ 350 if (!p) {
344 if (!p) { 351 p = (char *)kmap(bh_result->b_page);
345 p = (char *)kmap(bh_result->b_page) ; 352 if (fs_changed(fs_gen, inode->i_sb)
346 if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) { 353 && item_moved(&tmp_ih, &path)) {
347 goto research; 354 goto research;
348 } 355 }
349 } 356 }
350 p += offset ; 357 p += offset;
351 memset (p, 0, inode->i_sb->s_blocksize); 358 memset(p, 0, inode->i_sb->s_blocksize);
352 do { 359 do {
353 if (!is_direct_le_ih (ih)) { 360 if (!is_direct_le_ih(ih)) {
354 BUG (); 361 BUG();
355 } 362 }
356 /* make sure we don't read more bytes than actually exist in 363 /* make sure we don't read more bytes than actually exist in
357 ** the file. This can happen in odd cases where i_size isn't 364 ** the file. This can happen in odd cases where i_size isn't
358 ** correct, and when direct item padding results in a few 365 ** correct, and when direct item padding results in a few
359 ** extra bytes at the end of the direct item 366 ** extra bytes at the end of the direct item
360 */ 367 */
361 if ((le_ih_k_offset(ih) + path.pos_in_item) > inode->i_size) 368 if ((le_ih_k_offset(ih) + path.pos_in_item) > inode->i_size)
362 break ; 369 break;
363 if ((le_ih_k_offset(ih) - 1 + ih_item_len(ih)) > inode->i_size) { 370 if ((le_ih_k_offset(ih) - 1 + ih_item_len(ih)) > inode->i_size) {
364 chars = inode->i_size - (le_ih_k_offset(ih) - 1) - path.pos_in_item; 371 chars =
365 done = 1 ; 372 inode->i_size - (le_ih_k_offset(ih) - 1) -
366 } else { 373 path.pos_in_item;
367 chars = ih_item_len(ih) - path.pos_in_item; 374 done = 1;
368 } 375 } else {
369 memcpy (p, B_I_PITEM (bh, ih) + path.pos_in_item, chars); 376 chars = ih_item_len(ih) - path.pos_in_item;
370 377 }
371 if (done) 378 memcpy(p, B_I_PITEM(bh, ih) + path.pos_in_item, chars);
372 break ; 379
373 380 if (done)
374 p += chars; 381 break;
375 382
376 if (PATH_LAST_POSITION (&path) != (B_NR_ITEMS (bh) - 1)) 383 p += chars;
377 // we done, if read direct item is not the last item of
378 // node FIXME: we could try to check right delimiting key
379 // to see whether direct item continues in the right
380 // neighbor or rely on i_size
381 break;
382
383 // update key to look for the next piece
384 set_cpu_key_k_offset (&key, cpu_key_k_offset (&key) + chars);
385 if (search_for_position_by_key (inode->i_sb, &key, &path) != POSITION_FOUND)
386 // we read something from tail, even if now we got IO_ERROR
387 break;
388 bh = get_last_bh (&path);
389 ih = get_ih (&path);
390 } while (1);
391
392 flush_dcache_page(bh_result->b_page) ;
393 kunmap(bh_result->b_page) ;
394
395finished:
396 pathrelse (&path);
397 /* this buffer has valid data, but isn't valid for io. mapping it to
398 * block #0 tells the rest of reiserfs it just has a tail in it
399 */
400 map_bh(bh_result, inode->i_sb, 0);
401 set_buffer_uptodate (bh_result);
402 return 0;
403}
404 384
385 if (PATH_LAST_POSITION(&path) != (B_NR_ITEMS(bh) - 1))
386 // we done, if read direct item is not the last item of
387 // node FIXME: we could try to check right delimiting key
388 // to see whether direct item continues in the right
389 // neighbor or rely on i_size
390 break;
391
392 // update key to look for the next piece
393 set_cpu_key_k_offset(&key, cpu_key_k_offset(&key) + chars);
394 result = search_for_position_by_key(inode->i_sb, &key, &path);
395 if (result != POSITION_FOUND)
396 // i/o error most likely
397 break;
398 bh = get_last_bh(&path);
399 ih = get_ih(&path);
400 } while (1);
401
402 flush_dcache_page(bh_result->b_page);
403 kunmap(bh_result->b_page);
404
405 finished:
406 pathrelse(&path);
407
408 if (result == IO_ERROR)
409 return -EIO;
410
411 /* this buffer has valid data, but isn't valid for io. mapping it to
412 * block #0 tells the rest of reiserfs it just has a tail in it
413 */
414 map_bh(bh_result, inode->i_sb, 0);
415 set_buffer_uptodate(bh_result);
416 return 0;
417}
405 418
406// this is called to create file map. So, _get_block_create_0 will not 419// this is called to create file map. So, _get_block_create_0 will not
407// read direct item 420// read direct item
408static int reiserfs_bmap (struct inode * inode, sector_t block, 421static int reiserfs_bmap(struct inode *inode, sector_t block,
409 struct buffer_head * bh_result, int create) 422 struct buffer_head *bh_result, int create)
410{ 423{
411 if (!file_capable (inode, block)) 424 if (!file_capable(inode, block))
412 return -EFBIG; 425 return -EFBIG;
413 426
414 reiserfs_write_lock(inode->i_sb); 427 reiserfs_write_lock(inode->i_sb);
415 /* do not read the direct item */ 428 /* do not read the direct item */
416 _get_block_create_0 (inode, block, bh_result, 0) ; 429 _get_block_create_0(inode, block, bh_result, 0);
417 reiserfs_write_unlock(inode->i_sb); 430 reiserfs_write_unlock(inode->i_sb);
418 return 0; 431 return 0;
419} 432}
420 433
421/* special version of get_block that is only used by grab_tail_page right 434/* special version of get_block that is only used by grab_tail_page right
@@ -435,9 +448,11 @@ static int reiserfs_bmap (struct inode * inode, sector_t block,
435** don't want to send create == GET_BLOCK_NO_HOLE to reiserfs_get_block, 448** don't want to send create == GET_BLOCK_NO_HOLE to reiserfs_get_block,
436** don't use this function. 449** don't use this function.
437*/ 450*/
438static int reiserfs_get_block_create_0 (struct inode * inode, sector_t block, 451static int reiserfs_get_block_create_0(struct inode *inode, sector_t block,
439 struct buffer_head * bh_result, int create) { 452 struct buffer_head *bh_result,
440 return reiserfs_get_block(inode, block, bh_result, GET_BLOCK_NO_HOLE) ; 453 int create)
454{
455 return reiserfs_get_block(inode, block, bh_result, GET_BLOCK_NO_HOLE);
441} 456}
442 457
443/* This is special helper for reiserfs_get_block in case we are executing 458/* This is special helper for reiserfs_get_block in case we are executing
@@ -448,43 +463,42 @@ static int reiserfs_get_blocks_direct_io(struct inode *inode,
448 struct buffer_head *bh_result, 463 struct buffer_head *bh_result,
449 int create) 464 int create)
450{ 465{
451 int ret ; 466 int ret;
452 467
453 bh_result->b_page = NULL; 468 bh_result->b_page = NULL;
454 469
455 /* We set the b_size before reiserfs_get_block call since it is 470 /* We set the b_size before reiserfs_get_block call since it is
456 referenced in convert_tail_for_hole() that may be called from 471 referenced in convert_tail_for_hole() that may be called from
457 reiserfs_get_block() */ 472 reiserfs_get_block() */
458 bh_result->b_size = (1 << inode->i_blkbits); 473 bh_result->b_size = (1 << inode->i_blkbits);
459 474
460 ret = reiserfs_get_block(inode, iblock, bh_result, 475 ret = reiserfs_get_block(inode, iblock, bh_result,
461 create | GET_BLOCK_NO_DANGLE) ; 476 create | GET_BLOCK_NO_DANGLE);
462 if (ret) 477 if (ret)
463 goto out; 478 goto out;
464
465 /* don't allow direct io onto tail pages */
466 if (buffer_mapped(bh_result) && bh_result->b_blocknr == 0) {
467 /* make sure future calls to the direct io funcs for this offset
468 ** in the file fail by unmapping the buffer
469 */
470 clear_buffer_mapped(bh_result);
471 ret = -EINVAL ;
472 }
473 /* Possible unpacked tail. Flush the data before pages have
474 disappeared */
475 if (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) {
476 int err;
477 lock_kernel();
478 err = reiserfs_commit_for_inode(inode);
479 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
480 unlock_kernel();
481 if (err < 0)
482 ret = err;
483 }
484out:
485 return ret ;
486}
487 479
480 /* don't allow direct io onto tail pages */
481 if (buffer_mapped(bh_result) && bh_result->b_blocknr == 0) {
482 /* make sure future calls to the direct io funcs for this offset
483 ** in the file fail by unmapping the buffer
484 */
485 clear_buffer_mapped(bh_result);
486 ret = -EINVAL;
487 }
488 /* Possible unpacked tail. Flush the data before pages have
489 disappeared */
490 if (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) {
491 int err;
492 lock_kernel();
493 err = reiserfs_commit_for_inode(inode);
494 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
495 unlock_kernel();
496 if (err < 0)
497 ret = err;
498 }
499 out:
500 return ret;
501}
488 502
489/* 503/*
490** helper function for when reiserfs_get_block is called for a hole 504** helper function for when reiserfs_get_block is called for a hole
@@ -496,490 +510,547 @@ out:
496** you should not be in a transaction, or have any paths held when you 510** you should not be in a transaction, or have any paths held when you
497** call this. 511** call this.
498*/ 512*/
499static int convert_tail_for_hole(struct inode *inode, 513static int convert_tail_for_hole(struct inode *inode,
500 struct buffer_head *bh_result, 514 struct buffer_head *bh_result,
501 loff_t tail_offset) { 515 loff_t tail_offset)
502 unsigned long index ; 516{
503 unsigned long tail_end ; 517 unsigned long index;
504 unsigned long tail_start ; 518 unsigned long tail_end;
505 struct page * tail_page ; 519 unsigned long tail_start;
506 struct page * hole_page = bh_result->b_page ; 520 struct page *tail_page;
507 int retval = 0 ; 521 struct page *hole_page = bh_result->b_page;
508 522 int retval = 0;
509 if ((tail_offset & (bh_result->b_size - 1)) != 1) 523
510 return -EIO ; 524 if ((tail_offset & (bh_result->b_size - 1)) != 1)
511 525 return -EIO;
512 /* always try to read until the end of the block */ 526
513 tail_start = tail_offset & (PAGE_CACHE_SIZE - 1) ; 527 /* always try to read until the end of the block */
514 tail_end = (tail_start | (bh_result->b_size - 1)) + 1 ; 528 tail_start = tail_offset & (PAGE_CACHE_SIZE - 1);
515 529 tail_end = (tail_start | (bh_result->b_size - 1)) + 1;
516 index = tail_offset >> PAGE_CACHE_SHIFT ; 530
517 /* hole_page can be zero in case of direct_io, we are sure 531 index = tail_offset >> PAGE_CACHE_SHIFT;
518 that we cannot get here if we write with O_DIRECT into 532 /* hole_page can be zero in case of direct_io, we are sure
519 tail page */ 533 that we cannot get here if we write with O_DIRECT into
520 if (!hole_page || index != hole_page->index) { 534 tail page */
521 tail_page = grab_cache_page(inode->i_mapping, index) ; 535 if (!hole_page || index != hole_page->index) {
522 retval = -ENOMEM; 536 tail_page = grab_cache_page(inode->i_mapping, index);
523 if (!tail_page) { 537 retval = -ENOMEM;
524 goto out ; 538 if (!tail_page) {
525 } 539 goto out;
526 } else { 540 }
527 tail_page = hole_page ; 541 } else {
528 } 542 tail_page = hole_page;
529 543 }
530 /* we don't have to make sure the conversion did not happen while 544
531 ** we were locking the page because anyone that could convert 545 /* we don't have to make sure the conversion did not happen while
532 ** must first take i_sem. 546 ** we were locking the page because anyone that could convert
533 ** 547 ** must first take i_sem.
534 ** We must fix the tail page for writing because it might have buffers 548 **
535 ** that are mapped, but have a block number of 0. This indicates tail 549 ** We must fix the tail page for writing because it might have buffers
536 ** data that has been read directly into the page, and block_prepare_write 550 ** that are mapped, but have a block number of 0. This indicates tail
537 ** won't trigger a get_block in this case. 551 ** data that has been read directly into the page, and block_prepare_write
538 */ 552 ** won't trigger a get_block in this case.
539 fix_tail_page_for_writing(tail_page) ; 553 */
540 retval = reiserfs_prepare_write(NULL, tail_page, tail_start, tail_end); 554 fix_tail_page_for_writing(tail_page);
541 if (retval) 555 retval = reiserfs_prepare_write(NULL, tail_page, tail_start, tail_end);
542 goto unlock ; 556 if (retval)
543 557 goto unlock;
544 /* tail conversion might change the data in the page */ 558
545 flush_dcache_page(tail_page) ; 559 /* tail conversion might change the data in the page */
546 560 flush_dcache_page(tail_page);
547 retval = reiserfs_commit_write(NULL, tail_page, tail_start, tail_end) ; 561
548 562 retval = reiserfs_commit_write(NULL, tail_page, tail_start, tail_end);
549unlock: 563
550 if (tail_page != hole_page) { 564 unlock:
551 unlock_page(tail_page) ; 565 if (tail_page != hole_page) {
552 page_cache_release(tail_page) ; 566 unlock_page(tail_page);
553 } 567 page_cache_release(tail_page);
554out: 568 }
555 return retval ; 569 out:
570 return retval;
556} 571}
557 572
558static inline int _allocate_block(struct reiserfs_transaction_handle *th, 573static inline int _allocate_block(struct reiserfs_transaction_handle *th,
559 long block, 574 long block,
560 struct inode *inode, 575 struct inode *inode,
561 b_blocknr_t *allocated_block_nr, 576 b_blocknr_t * allocated_block_nr,
562 struct path * path, 577 struct path *path, int flags)
563 int flags) { 578{
564 BUG_ON (!th->t_trans_id); 579 BUG_ON(!th->t_trans_id);
565 580
566#ifdef REISERFS_PREALLOCATE 581#ifdef REISERFS_PREALLOCATE
567 if (!(flags & GET_BLOCK_NO_ISEM)) { 582 if (!(flags & GET_BLOCK_NO_ISEM)) {
568 return reiserfs_new_unf_blocknrs2(th, inode, allocated_block_nr, path, block); 583 return reiserfs_new_unf_blocknrs2(th, inode, allocated_block_nr,
569 } 584 path, block);
585 }
570#endif 586#endif
571 return reiserfs_new_unf_blocknrs (th, inode, allocated_block_nr, path, block); 587 return reiserfs_new_unf_blocknrs(th, inode, allocated_block_nr, path,
588 block);
572} 589}
573 590
574int reiserfs_get_block (struct inode * inode, sector_t block, 591int reiserfs_get_block(struct inode *inode, sector_t block,
575 struct buffer_head * bh_result, int create) 592 struct buffer_head *bh_result, int create)
576{ 593{
577 int repeat, retval = 0; 594 int repeat, retval = 0;
578 b_blocknr_t allocated_block_nr = 0;// b_blocknr_t is (unsigned) 32 bit int 595 b_blocknr_t allocated_block_nr = 0; // b_blocknr_t is (unsigned) 32 bit int
579 INITIALIZE_PATH(path); 596 INITIALIZE_PATH(path);
580 int pos_in_item; 597 int pos_in_item;
581 struct cpu_key key; 598 struct cpu_key key;
582 struct buffer_head * bh, * unbh = NULL; 599 struct buffer_head *bh, *unbh = NULL;
583 struct item_head * ih, tmp_ih; 600 struct item_head *ih, tmp_ih;
584 __le32 * item; 601 __le32 *item;
585 int done; 602 int done;
586 int fs_gen; 603 int fs_gen;
587 struct reiserfs_transaction_handle *th = NULL; 604 struct reiserfs_transaction_handle *th = NULL;
588 /* space reserved in transaction batch: 605 /* space reserved in transaction batch:
589 . 3 balancings in direct->indirect conversion 606 . 3 balancings in direct->indirect conversion
590 . 1 block involved into reiserfs_update_sd() 607 . 1 block involved into reiserfs_update_sd()
591 XXX in practically impossible worst case direct2indirect() 608 XXX in practically impossible worst case direct2indirect()
592 can incur (much) more than 3 balancings. 609 can incur (much) more than 3 balancings.
593 quota update for user, group */ 610 quota update for user, group */
594 int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS; 611 int jbegin_count =
595 int version; 612 JOURNAL_PER_BALANCE_CNT * 3 + 1 +
596 int dangle = 1; 613 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb);
597 loff_t new_offset = (((loff_t)block) << inode->i_sb->s_blocksize_bits) + 1 ; 614 int version;
598 615 int dangle = 1;
599 /* bad.... */ 616 loff_t new_offset =
600 reiserfs_write_lock(inode->i_sb); 617 (((loff_t) block) << inode->i_sb->s_blocksize_bits) + 1;
601 version = get_inode_item_key_version (inode); 618
602 619 /* bad.... */
603 if (block < 0) { 620 reiserfs_write_lock(inode->i_sb);
604 reiserfs_write_unlock(inode->i_sb); 621 version = get_inode_item_key_version(inode);
605 return -EIO;
606 }
607 622
608 if (!file_capable (inode, block)) { 623 if (block < 0) {
609 reiserfs_write_unlock(inode->i_sb); 624 reiserfs_write_unlock(inode->i_sb);
610 return -EFBIG; 625 return -EIO;
611 } 626 }
612
613 /* if !create, we aren't changing the FS, so we don't need to
614 ** log anything, so we don't need to start a transaction
615 */
616 if (!(create & GET_BLOCK_CREATE)) {
617 int ret ;
618 /* find number of block-th logical block of the file */
619 ret = _get_block_create_0 (inode, block, bh_result,
620 create | GET_BLOCK_READ_DIRECT) ;
621 reiserfs_write_unlock(inode->i_sb);
622 return ret;
623 }
624 /*
625 * if we're already in a transaction, make sure to close
626 * any new transactions we start in this func
627 */
628 if ((create & GET_BLOCK_NO_DANGLE) ||
629 reiserfs_transaction_running(inode->i_sb))
630 dangle = 0;
631
632 /* If file is of such a size, that it might have a tail and tails are enabled
633 ** we should mark it as possibly needing tail packing on close
634 */
635 if ( (have_large_tails (inode->i_sb) && inode->i_size < i_block_size (inode)*4) ||
636 (have_small_tails (inode->i_sb) && inode->i_size < i_block_size(inode)) )
637 REISERFS_I(inode)->i_flags |= i_pack_on_close_mask ;
638
639 /* set the key of the first byte in the 'block'-th block of file */
640 make_cpu_key (&key, inode, new_offset,
641 TYPE_ANY, 3/*key length*/);
642 if ((new_offset + inode->i_sb->s_blocksize - 1) > inode->i_size) {
643start_trans:
644 th = reiserfs_persistent_transaction(inode->i_sb, jbegin_count);
645 if (!th) {
646 retval = -ENOMEM;
647 goto failure;
648 }
649 reiserfs_update_inode_transaction(inode) ;
650 }
651 research:
652
653 retval = search_for_position_by_key (inode->i_sb, &key, &path);
654 if (retval == IO_ERROR) {
655 retval = -EIO;
656 goto failure;
657 }
658
659 bh = get_last_bh (&path);
660 ih = get_ih (&path);
661 item = get_item (&path);
662 pos_in_item = path.pos_in_item;
663
664 fs_gen = get_generation (inode->i_sb);
665 copy_item_head (&tmp_ih, ih);
666
667 if (allocation_needed (retval, allocated_block_nr, ih, item, pos_in_item)) {
668 /* we have to allocate block for the unformatted node */
669 if (!th) {
670 pathrelse(&path) ;
671 goto start_trans;
672 }
673
674 repeat = _allocate_block(th, block, inode, &allocated_block_nr, &path, create);
675
676 if (repeat == NO_DISK_SPACE || repeat == QUOTA_EXCEEDED) {
677 /* restart the transaction to give the journal a chance to free
678 ** some blocks. releases the path, so we have to go back to
679 ** research if we succeed on the second try
680 */
681 SB_JOURNAL(inode->i_sb)->j_next_async_flush = 1;
682 retval = restart_transaction(th, inode, &path) ;
683 if (retval)
684 goto failure;
685 repeat = _allocate_block(th, block, inode, &allocated_block_nr, NULL, create);
686
687 if (repeat != NO_DISK_SPACE && repeat != QUOTA_EXCEEDED) {
688 goto research ;
689 }
690 if (repeat == QUOTA_EXCEEDED)
691 retval = -EDQUOT;
692 else
693 retval = -ENOSPC;
694 goto failure;
695 }
696
697 if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) {
698 goto research;
699 }
700 }
701
702 if (indirect_item_found (retval, ih)) {
703 b_blocknr_t unfm_ptr;
704 /* 'block'-th block is in the file already (there is
705 corresponding cell in some indirect item). But it may be
706 zero unformatted node pointer (hole) */
707 unfm_ptr = get_block_num (item, pos_in_item);
708 if (unfm_ptr == 0) {
709 /* use allocated block to plug the hole */
710 reiserfs_prepare_for_journal(inode->i_sb, bh, 1) ;
711 if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) {
712 reiserfs_restore_prepared_buffer(inode->i_sb, bh) ;
713 goto research;
714 }
715 set_buffer_new(bh_result);
716 if (buffer_dirty(bh_result) && reiserfs_data_ordered(inode->i_sb))
717 reiserfs_add_ordered_list(inode, bh_result);
718 put_block_num(item, pos_in_item, allocated_block_nr) ;
719 unfm_ptr = allocated_block_nr;
720 journal_mark_dirty (th, inode->i_sb, bh);
721 reiserfs_update_sd(th, inode) ;
722 }
723 set_block_dev_mapped(bh_result, unfm_ptr, inode);
724 pathrelse (&path);
725 retval = 0;
726 if (!dangle && th)
727 retval = reiserfs_end_persistent_transaction(th);
728 627
729 reiserfs_write_unlock(inode->i_sb); 628 if (!file_capable(inode, block)) {
730 629 reiserfs_write_unlock(inode->i_sb);
731 /* the item was found, so new blocks were not added to the file 630 return -EFBIG;
732 ** there is no need to make sure the inode is updated with this 631 }
733 ** transaction 632
734 */ 633 /* if !create, we aren't changing the FS, so we don't need to
735 return retval; 634 ** log anything, so we don't need to start a transaction
736 } 635 */
737 636 if (!(create & GET_BLOCK_CREATE)) {
738 if (!th) { 637 int ret;
739 pathrelse(&path) ; 638 /* find number of block-th logical block of the file */
740 goto start_trans; 639 ret = _get_block_create_0(inode, block, bh_result,
741 } 640 create | GET_BLOCK_READ_DIRECT);
742 641 reiserfs_write_unlock(inode->i_sb);
743 /* desired position is not found or is in the direct item. We have 642 return ret;
744 to append file with holes up to 'block'-th block converting 643 }
745 direct items to indirect one if necessary */ 644 /*
746 done = 0; 645 * if we're already in a transaction, make sure to close
747 do { 646 * any new transactions we start in this func
748 if (is_statdata_le_ih (ih)) { 647 */
749 __le32 unp = 0; 648 if ((create & GET_BLOCK_NO_DANGLE) ||
750 struct cpu_key tmp_key; 649 reiserfs_transaction_running(inode->i_sb))
751 650 dangle = 0;
752 /* indirect item has to be inserted */ 651
753 make_le_item_head (&tmp_ih, &key, version, 1, TYPE_INDIRECT, 652 /* If file is of such a size, that it might have a tail and tails are enabled
754 UNFM_P_SIZE, 0/* free_space */); 653 ** we should mark it as possibly needing tail packing on close
755 654 */
756 if (cpu_key_k_offset (&key) == 1) { 655 if ((have_large_tails(inode->i_sb)
757 /* we are going to add 'block'-th block to the file. Use 656 && inode->i_size < i_block_size(inode) * 4)
758 allocated block for that */ 657 || (have_small_tails(inode->i_sb)
759 unp = cpu_to_le32 (allocated_block_nr); 658 && inode->i_size < i_block_size(inode)))
760 set_block_dev_mapped (bh_result, allocated_block_nr, inode); 659 REISERFS_I(inode)->i_flags |= i_pack_on_close_mask;
761 set_buffer_new(bh_result); 660
762 done = 1; 661 /* set the key of the first byte in the 'block'-th block of file */
763 } 662 make_cpu_key(&key, inode, new_offset, TYPE_ANY, 3 /*key length */ );
764 tmp_key = key; // ;) 663 if ((new_offset + inode->i_sb->s_blocksize - 1) > inode->i_size) {
765 set_cpu_key_k_offset (&tmp_key, 1); 664 start_trans:
766 PATH_LAST_POSITION(&path) ++; 665 th = reiserfs_persistent_transaction(inode->i_sb, jbegin_count);
767 666 if (!th) {
768 retval = reiserfs_insert_item (th, &path, &tmp_key, &tmp_ih, inode, (char *)&unp); 667 retval = -ENOMEM;
769 if (retval) {
770 reiserfs_free_block (th, inode, allocated_block_nr, 1);
771 goto failure; // retval == -ENOSPC, -EDQUOT or -EIO or -EEXIST
772 }
773 //mark_tail_converted (inode);
774 } else if (is_direct_le_ih (ih)) {
775 /* direct item has to be converted */
776 loff_t tail_offset;
777
778 tail_offset = ((le_ih_k_offset (ih) - 1) & ~(inode->i_sb->s_blocksize - 1)) + 1;
779 if (tail_offset == cpu_key_k_offset (&key)) {
780 /* direct item we just found fits into block we have
781 to map. Convert it into unformatted node: use
782 bh_result for the conversion */
783 set_block_dev_mapped (bh_result, allocated_block_nr, inode);
784 unbh = bh_result;
785 done = 1;
786 } else {
787 /* we have to padd file tail stored in direct item(s)
788 up to block size and convert it to unformatted
789 node. FIXME: this should also get into page cache */
790
791 pathrelse(&path) ;
792 /*
793 * ugly, but we can only end the transaction if
794 * we aren't nested
795 */
796 BUG_ON (!th->t_refcount);
797 if (th->t_refcount == 1) {
798 retval = reiserfs_end_persistent_transaction(th);
799 th = NULL;
800 if (retval)
801 goto failure; 668 goto failure;
802 } 669 }
670 reiserfs_update_inode_transaction(inode);
671 }
672 research:
803 673
804 retval = convert_tail_for_hole(inode, bh_result, tail_offset) ; 674 retval = search_for_position_by_key(inode->i_sb, &key, &path);
805 if (retval) {
806 if ( retval != -ENOSPC )
807 reiserfs_warning (inode->i_sb, "clm-6004: convert tail failed inode %lu, error %d", inode->i_ino, retval) ;
808 if (allocated_block_nr) {
809 /* the bitmap, the super, and the stat data == 3 */
810 if (!th)
811 th = reiserfs_persistent_transaction(inode->i_sb,3);
812 if (th)
813 reiserfs_free_block (th,inode,allocated_block_nr,1);
814 }
815 goto failure ;
816 }
817 goto research ;
818 }
819 retval = direct2indirect (th, inode, &path, unbh, tail_offset);
820 if (retval) {
821 reiserfs_unmap_buffer(unbh);
822 reiserfs_free_block (th, inode, allocated_block_nr, 1);
823 goto failure;
824 }
825 /* it is important the set_buffer_uptodate is done after
826 ** the direct2indirect. The buffer might contain valid
827 ** data newer than the data on disk (read by readpage, changed,
828 ** and then sent here by writepage). direct2indirect needs
829 ** to know if unbh was already up to date, so it can decide
830 ** if the data in unbh needs to be replaced with data from
831 ** the disk
832 */
833 set_buffer_uptodate (unbh);
834
835 /* unbh->b_page == NULL in case of DIRECT_IO request, this means
836 buffer will disappear shortly, so it should not be added to
837 */
838 if ( unbh->b_page ) {
839 /* we've converted the tail, so we must
840 ** flush unbh before the transaction commits
841 */
842 reiserfs_add_tail_list(inode, unbh) ;
843
844 /* mark it dirty now to prevent commit_write from adding
845 ** this buffer to the inode's dirty buffer list
846 */
847 /*
848 * AKPM: changed __mark_buffer_dirty to mark_buffer_dirty().
849 * It's still atomic, but it sets the page dirty too,
850 * which makes it eligible for writeback at any time by the
851 * VM (which was also the case with __mark_buffer_dirty())
852 */
853 mark_buffer_dirty(unbh) ;
854 }
855 } else {
856 /* append indirect item with holes if needed, when appending
857 pointer to 'block'-th block use block, which is already
858 allocated */
859 struct cpu_key tmp_key;
860 unp_t unf_single=0; // We use this in case we need to allocate only
861 // one block which is a fastpath
862 unp_t *un;
863 __u64 max_to_insert=MAX_ITEM_LEN(inode->i_sb->s_blocksize)/UNFM_P_SIZE;
864 __u64 blocks_needed;
865
866 RFALSE( pos_in_item != ih_item_len(ih) / UNFM_P_SIZE,
867 "vs-804: invalid position for append");
868 /* indirect item has to be appended, set up key of that position */
869 make_cpu_key (&tmp_key, inode,
870 le_key_k_offset (version, &(ih->ih_key)) + op_bytes_number (ih, inode->i_sb->s_blocksize),
871 //pos_in_item * inode->i_sb->s_blocksize,
872 TYPE_INDIRECT, 3);// key type is unimportant
873
874 blocks_needed = 1 + ((cpu_key_k_offset (&key) - cpu_key_k_offset (&tmp_key)) >> inode->i_sb->s_blocksize_bits);
875 RFALSE( blocks_needed < 0, "green-805: invalid offset");
876
877 if ( blocks_needed == 1 ) {
878 un = &unf_single;
879 } else {
880 un=kmalloc( min(blocks_needed,max_to_insert)*UNFM_P_SIZE,
881 GFP_ATOMIC); // We need to avoid scheduling.
882 if ( !un) {
883 un = &unf_single;
884 blocks_needed = 1;
885 max_to_insert = 0;
886 } else
887 memset(un, 0, UNFM_P_SIZE * min(blocks_needed,max_to_insert));
888 }
889 if ( blocks_needed <= max_to_insert) {
890 /* we are going to add target block to the file. Use allocated
891 block for that */
892 un[blocks_needed-1] = cpu_to_le32 (allocated_block_nr);
893 set_block_dev_mapped (bh_result, allocated_block_nr, inode);
894 set_buffer_new(bh_result);
895 done = 1;
896 } else {
897 /* paste hole to the indirect item */
898 /* If kmalloc failed, max_to_insert becomes zero and it means we
899 only have space for one block */
900 blocks_needed=max_to_insert?max_to_insert:1;
901 }
902 retval = reiserfs_paste_into_item (th, &path, &tmp_key, inode, (char *)un, UNFM_P_SIZE * blocks_needed);
903
904 if (blocks_needed != 1)
905 kfree(un);
906
907 if (retval) {
908 reiserfs_free_block (th, inode, allocated_block_nr, 1);
909 goto failure;
910 }
911 if (!done) {
912 /* We need to mark new file size in case this function will be
913 interrupted/aborted later on. And we may do this only for
914 holes. */
915 inode->i_size += inode->i_sb->s_blocksize * blocks_needed;
916 }
917 }
918
919 if (done == 1)
920 break;
921
922 /* this loop could log more blocks than we had originally asked
923 ** for. So, we have to allow the transaction to end if it is
924 ** too big or too full. Update the inode so things are
925 ** consistent if we crash before the function returns
926 **
927 ** release the path so that anybody waiting on the path before
928 ** ending their transaction will be able to continue.
929 */
930 if (journal_transaction_should_end(th, th->t_blocks_allocated)) {
931 retval = restart_transaction(th, inode, &path) ;
932 if (retval)
933 goto failure;
934 }
935 /* inserting indirect pointers for a hole can take a
936 ** long time. reschedule if needed
937 */
938 cond_resched();
939
940 retval = search_for_position_by_key (inode->i_sb, &key, &path);
941 if (retval == IO_ERROR) { 675 if (retval == IO_ERROR) {
942 retval = -EIO; 676 retval = -EIO;
943 goto failure; 677 goto failure;
944 } 678 }
945 if (retval == POSITION_FOUND) { 679
946 reiserfs_warning (inode->i_sb, "vs-825: reiserfs_get_block: " 680 bh = get_last_bh(&path);
947 "%K should not be found", &key); 681 ih = get_ih(&path);
948 retval = -EEXIST; 682 item = get_item(&path);
949 if (allocated_block_nr)
950 reiserfs_free_block (th, inode, allocated_block_nr, 1);
951 pathrelse(&path) ;
952 goto failure;
953 }
954 bh = get_last_bh (&path);
955 ih = get_ih (&path);
956 item = get_item (&path);
957 pos_in_item = path.pos_in_item; 683 pos_in_item = path.pos_in_item;
958 } while (1);
959 684
685 fs_gen = get_generation(inode->i_sb);
686 copy_item_head(&tmp_ih, ih);
687
688 if (allocation_needed
689 (retval, allocated_block_nr, ih, item, pos_in_item)) {
690 /* we have to allocate block for the unformatted node */
691 if (!th) {
692 pathrelse(&path);
693 goto start_trans;
694 }
695
696 repeat =
697 _allocate_block(th, block, inode, &allocated_block_nr,
698 &path, create);
699
700 if (repeat == NO_DISK_SPACE || repeat == QUOTA_EXCEEDED) {
701 /* restart the transaction to give the journal a chance to free
702 ** some blocks. releases the path, so we have to go back to
703 ** research if we succeed on the second try
704 */
705 SB_JOURNAL(inode->i_sb)->j_next_async_flush = 1;
706 retval = restart_transaction(th, inode, &path);
707 if (retval)
708 goto failure;
709 repeat =
710 _allocate_block(th, block, inode,
711 &allocated_block_nr, NULL, create);
712
713 if (repeat != NO_DISK_SPACE && repeat != QUOTA_EXCEEDED) {
714 goto research;
715 }
716 if (repeat == QUOTA_EXCEEDED)
717 retval = -EDQUOT;
718 else
719 retval = -ENOSPC;
720 goto failure;
721 }
722
723 if (fs_changed(fs_gen, inode->i_sb)
724 && item_moved(&tmp_ih, &path)) {
725 goto research;
726 }
727 }
728
729 if (indirect_item_found(retval, ih)) {
730 b_blocknr_t unfm_ptr;
731 /* 'block'-th block is in the file already (there is
732 corresponding cell in some indirect item). But it may be
733 zero unformatted node pointer (hole) */
734 unfm_ptr = get_block_num(item, pos_in_item);
735 if (unfm_ptr == 0) {
736 /* use allocated block to plug the hole */
737 reiserfs_prepare_for_journal(inode->i_sb, bh, 1);
738 if (fs_changed(fs_gen, inode->i_sb)
739 && item_moved(&tmp_ih, &path)) {
740 reiserfs_restore_prepared_buffer(inode->i_sb,
741 bh);
742 goto research;
743 }
744 set_buffer_new(bh_result);
745 if (buffer_dirty(bh_result)
746 && reiserfs_data_ordered(inode->i_sb))
747 reiserfs_add_ordered_list(inode, bh_result);
748 put_block_num(item, pos_in_item, allocated_block_nr);
749 unfm_ptr = allocated_block_nr;
750 journal_mark_dirty(th, inode->i_sb, bh);
751 reiserfs_update_sd(th, inode);
752 }
753 set_block_dev_mapped(bh_result, unfm_ptr, inode);
754 pathrelse(&path);
755 retval = 0;
756 if (!dangle && th)
757 retval = reiserfs_end_persistent_transaction(th);
758
759 reiserfs_write_unlock(inode->i_sb);
760
761 /* the item was found, so new blocks were not added to the file
762 ** there is no need to make sure the inode is updated with this
763 ** transaction
764 */
765 return retval;
766 }
767
768 if (!th) {
769 pathrelse(&path);
770 goto start_trans;
771 }
772
773 /* desired position is not found or is in the direct item. We have
774 to append file with holes up to 'block'-th block converting
775 direct items to indirect one if necessary */
776 done = 0;
777 do {
778 if (is_statdata_le_ih(ih)) {
779 __le32 unp = 0;
780 struct cpu_key tmp_key;
781
782 /* indirect item has to be inserted */
783 make_le_item_head(&tmp_ih, &key, version, 1,
784 TYPE_INDIRECT, UNFM_P_SIZE,
785 0 /* free_space */ );
786
787 if (cpu_key_k_offset(&key) == 1) {
788 /* we are going to add 'block'-th block to the file. Use
789 allocated block for that */
790 unp = cpu_to_le32(allocated_block_nr);
791 set_block_dev_mapped(bh_result,
792 allocated_block_nr, inode);
793 set_buffer_new(bh_result);
794 done = 1;
795 }
796 tmp_key = key; // ;)
797 set_cpu_key_k_offset(&tmp_key, 1);
798 PATH_LAST_POSITION(&path)++;
799
800 retval =
801 reiserfs_insert_item(th, &path, &tmp_key, &tmp_ih,
802 inode, (char *)&unp);
803 if (retval) {
804 reiserfs_free_block(th, inode,
805 allocated_block_nr, 1);
806 goto failure; // retval == -ENOSPC, -EDQUOT or -EIO or -EEXIST
807 }
808 //mark_tail_converted (inode);
809 } else if (is_direct_le_ih(ih)) {
810 /* direct item has to be converted */
811 loff_t tail_offset;
812
813 tail_offset =
814 ((le_ih_k_offset(ih) -
815 1) & ~(inode->i_sb->s_blocksize - 1)) + 1;
816 if (tail_offset == cpu_key_k_offset(&key)) {
817 /* direct item we just found fits into block we have
818 to map. Convert it into unformatted node: use
819 bh_result for the conversion */
820 set_block_dev_mapped(bh_result,
821 allocated_block_nr, inode);
822 unbh = bh_result;
823 done = 1;
824 } else {
825 /* we have to padd file tail stored in direct item(s)
826 up to block size and convert it to unformatted
827 node. FIXME: this should also get into page cache */
828
829 pathrelse(&path);
830 /*
831 * ugly, but we can only end the transaction if
832 * we aren't nested
833 */
834 BUG_ON(!th->t_refcount);
835 if (th->t_refcount == 1) {
836 retval =
837 reiserfs_end_persistent_transaction
838 (th);
839 th = NULL;
840 if (retval)
841 goto failure;
842 }
843
844 retval =
845 convert_tail_for_hole(inode, bh_result,
846 tail_offset);
847 if (retval) {
848 if (retval != -ENOSPC)
849 reiserfs_warning(inode->i_sb,
850 "clm-6004: convert tail failed inode %lu, error %d",
851 inode->i_ino,
852 retval);
853 if (allocated_block_nr) {
854 /* the bitmap, the super, and the stat data == 3 */
855 if (!th)
856 th = reiserfs_persistent_transaction(inode->i_sb, 3);
857 if (th)
858 reiserfs_free_block(th,
859 inode,
860 allocated_block_nr,
861 1);
862 }
863 goto failure;
864 }
865 goto research;
866 }
867 retval =
868 direct2indirect(th, inode, &path, unbh,
869 tail_offset);
870 if (retval) {
871 reiserfs_unmap_buffer(unbh);
872 reiserfs_free_block(th, inode,
873 allocated_block_nr, 1);
874 goto failure;
875 }
876 /* it is important the set_buffer_uptodate is done after
877 ** the direct2indirect. The buffer might contain valid
878 ** data newer than the data on disk (read by readpage, changed,
879 ** and then sent here by writepage). direct2indirect needs
880 ** to know if unbh was already up to date, so it can decide
881 ** if the data in unbh needs to be replaced with data from
882 ** the disk
883 */
884 set_buffer_uptodate(unbh);
885
886 /* unbh->b_page == NULL in case of DIRECT_IO request, this means
887 buffer will disappear shortly, so it should not be added to
888 */
889 if (unbh->b_page) {
890 /* we've converted the tail, so we must
891 ** flush unbh before the transaction commits
892 */
893 reiserfs_add_tail_list(inode, unbh);
894
895 /* mark it dirty now to prevent commit_write from adding
896 ** this buffer to the inode's dirty buffer list
897 */
898 /*
899 * AKPM: changed __mark_buffer_dirty to mark_buffer_dirty().
900 * It's still atomic, but it sets the page dirty too,
901 * which makes it eligible for writeback at any time by the
902 * VM (which was also the case with __mark_buffer_dirty())
903 */
904 mark_buffer_dirty(unbh);
905 }
906 } else {
907 /* append indirect item with holes if needed, when appending
908 pointer to 'block'-th block use block, which is already
909 allocated */
910 struct cpu_key tmp_key;
911 unp_t unf_single = 0; // We use this in case we need to allocate only
912 // one block which is a fastpath
913 unp_t *un;
914 __u64 max_to_insert =
915 MAX_ITEM_LEN(inode->i_sb->s_blocksize) /
916 UNFM_P_SIZE;
917 __u64 blocks_needed;
918
919 RFALSE(pos_in_item != ih_item_len(ih) / UNFM_P_SIZE,
920 "vs-804: invalid position for append");
921 /* indirect item has to be appended, set up key of that position */
922 make_cpu_key(&tmp_key, inode,
923 le_key_k_offset(version,
924 &(ih->ih_key)) +
925 op_bytes_number(ih,
926 inode->i_sb->s_blocksize),
927 //pos_in_item * inode->i_sb->s_blocksize,
928 TYPE_INDIRECT, 3); // key type is unimportant
929
930 blocks_needed =
931 1 +
932 ((cpu_key_k_offset(&key) -
933 cpu_key_k_offset(&tmp_key)) >> inode->i_sb->
934 s_blocksize_bits);
935 RFALSE(blocks_needed < 0, "green-805: invalid offset");
936
937 if (blocks_needed == 1) {
938 un = &unf_single;
939 } else {
940 un = kmalloc(min(blocks_needed, max_to_insert) * UNFM_P_SIZE, GFP_ATOMIC); // We need to avoid scheduling.
941 if (!un) {
942 un = &unf_single;
943 blocks_needed = 1;
944 max_to_insert = 0;
945 } else
946 memset(un, 0,
947 UNFM_P_SIZE * min(blocks_needed,
948 max_to_insert));
949 }
950 if (blocks_needed <= max_to_insert) {
951 /* we are going to add target block to the file. Use allocated
952 block for that */
953 un[blocks_needed - 1] =
954 cpu_to_le32(allocated_block_nr);
955 set_block_dev_mapped(bh_result,
956 allocated_block_nr, inode);
957 set_buffer_new(bh_result);
958 done = 1;
959 } else {
960 /* paste hole to the indirect item */
961 /* If kmalloc failed, max_to_insert becomes zero and it means we
962 only have space for one block */
963 blocks_needed =
964 max_to_insert ? max_to_insert : 1;
965 }
966 retval =
967 reiserfs_paste_into_item(th, &path, &tmp_key, inode,
968 (char *)un,
969 UNFM_P_SIZE *
970 blocks_needed);
971
972 if (blocks_needed != 1)
973 kfree(un);
974
975 if (retval) {
976 reiserfs_free_block(th, inode,
977 allocated_block_nr, 1);
978 goto failure;
979 }
980 if (!done) {
981 /* We need to mark new file size in case this function will be
982 interrupted/aborted later on. And we may do this only for
983 holes. */
984 inode->i_size +=
985 inode->i_sb->s_blocksize * blocks_needed;
986 }
987 }
960 988
961 retval = 0; 989 if (done == 1)
990 break;
962 991
963 failure: 992 /* this loop could log more blocks than we had originally asked
964 if (th && (!dangle || (retval && !th->t_trans_id))) { 993 ** for. So, we have to allow the transaction to end if it is
965 int err; 994 ** too big or too full. Update the inode so things are
966 if (th->t_trans_id) 995 ** consistent if we crash before the function returns
967 reiserfs_update_sd(th, inode); 996 **
968 err = reiserfs_end_persistent_transaction(th); 997 ** release the path so that anybody waiting on the path before
969 if (err) 998 ** ending their transaction will be able to continue.
970 retval = err; 999 */
971 } 1000 if (journal_transaction_should_end(th, th->t_blocks_allocated)) {
1001 retval = restart_transaction(th, inode, &path);
1002 if (retval)
1003 goto failure;
1004 }
1005 /* inserting indirect pointers for a hole can take a
1006 ** long time. reschedule if needed
1007 */
1008 cond_resched();
1009
1010 retval = search_for_position_by_key(inode->i_sb, &key, &path);
1011 if (retval == IO_ERROR) {
1012 retval = -EIO;
1013 goto failure;
1014 }
1015 if (retval == POSITION_FOUND) {
1016 reiserfs_warning(inode->i_sb,
1017 "vs-825: reiserfs_get_block: "
1018 "%K should not be found", &key);
1019 retval = -EEXIST;
1020 if (allocated_block_nr)
1021 reiserfs_free_block(th, inode,
1022 allocated_block_nr, 1);
1023 pathrelse(&path);
1024 goto failure;
1025 }
1026 bh = get_last_bh(&path);
1027 ih = get_ih(&path);
1028 item = get_item(&path);
1029 pos_in_item = path.pos_in_item;
1030 } while (1);
972 1031
973 reiserfs_write_unlock(inode->i_sb); 1032 retval = 0;
974 reiserfs_check_path(&path) ; 1033
975 return retval; 1034 failure:
1035 if (th && (!dangle || (retval && !th->t_trans_id))) {
1036 int err;
1037 if (th->t_trans_id)
1038 reiserfs_update_sd(th, inode);
1039 err = reiserfs_end_persistent_transaction(th);
1040 if (err)
1041 retval = err;
1042 }
1043
1044 reiserfs_write_unlock(inode->i_sb);
1045 reiserfs_check_path(&path);
1046 return retval;
976} 1047}
977 1048
978static int 1049static int
979reiserfs_readpages(struct file *file, struct address_space *mapping, 1050reiserfs_readpages(struct file *file, struct address_space *mapping,
980 struct list_head *pages, unsigned nr_pages) 1051 struct list_head *pages, unsigned nr_pages)
981{ 1052{
982 return mpage_readpages(mapping, pages, nr_pages, reiserfs_get_block); 1053 return mpage_readpages(mapping, pages, nr_pages, reiserfs_get_block);
983} 1054}
984 1055
985/* Compute real number of used bytes by file 1056/* Compute real number of used bytes by file
@@ -987,51 +1058,56 @@ reiserfs_readpages(struct file *file, struct address_space *mapping,
987 */ 1058 */
988static int real_space_diff(struct inode *inode, int sd_size) 1059static int real_space_diff(struct inode *inode, int sd_size)
989{ 1060{
990 int bytes; 1061 int bytes;
991 loff_t blocksize = inode->i_sb->s_blocksize ; 1062 loff_t blocksize = inode->i_sb->s_blocksize;
992 1063
993 if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) 1064 if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode))
994 return sd_size ; 1065 return sd_size;
995 1066
996 /* End of file is also in full block with indirect reference, so round 1067 /* End of file is also in full block with indirect reference, so round
997 ** up to the next block. 1068 ** up to the next block.
998 ** 1069 **
999 ** there is just no way to know if the tail is actually packed 1070 ** there is just no way to know if the tail is actually packed
1000 ** on the file, so we have to assume it isn't. When we pack the 1071 ** on the file, so we have to assume it isn't. When we pack the
1001 ** tail, we add 4 bytes to pretend there really is an unformatted 1072 ** tail, we add 4 bytes to pretend there really is an unformatted
1002 ** node pointer 1073 ** node pointer
1003 */ 1074 */
1004 bytes = ((inode->i_size + (blocksize-1)) >> inode->i_sb->s_blocksize_bits) * UNFM_P_SIZE + sd_size; 1075 bytes =
1005 return bytes ; 1076 ((inode->i_size +
1077 (blocksize - 1)) >> inode->i_sb->s_blocksize_bits) * UNFM_P_SIZE +
1078 sd_size;
1079 return bytes;
1006} 1080}
1007 1081
1008static inline loff_t to_real_used_space(struct inode *inode, ulong blocks, 1082static inline loff_t to_real_used_space(struct inode *inode, ulong blocks,
1009 int sd_size) 1083 int sd_size)
1010{ 1084{
1011 if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) { 1085 if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) {
1012 return inode->i_size + (loff_t)(real_space_diff(inode, sd_size)) ; 1086 return inode->i_size +
1013 } 1087 (loff_t) (real_space_diff(inode, sd_size));
1014 return ((loff_t)real_space_diff(inode, sd_size)) + (((loff_t)blocks) << 9); 1088 }
1089 return ((loff_t) real_space_diff(inode, sd_size)) +
1090 (((loff_t) blocks) << 9);
1015} 1091}
1016 1092
1017/* Compute number of blocks used by file in ReiserFS counting */ 1093/* Compute number of blocks used by file in ReiserFS counting */
1018static inline ulong to_fake_used_blocks(struct inode *inode, int sd_size) 1094static inline ulong to_fake_used_blocks(struct inode *inode, int sd_size)
1019{ 1095{
1020 loff_t bytes = inode_get_bytes(inode) ; 1096 loff_t bytes = inode_get_bytes(inode);
1021 loff_t real_space = real_space_diff(inode, sd_size) ; 1097 loff_t real_space = real_space_diff(inode, sd_size);
1022 1098
1023 /* keeps fsck and non-quota versions of reiserfs happy */ 1099 /* keeps fsck and non-quota versions of reiserfs happy */
1024 if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) { 1100 if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) {
1025 bytes += (loff_t)511 ; 1101 bytes += (loff_t) 511;
1026 } 1102 }
1027 1103
1028 /* files from before the quota patch might i_blocks such that 1104 /* files from before the quota patch might i_blocks such that
1029 ** bytes < real_space. Deal with that here to prevent it from 1105 ** bytes < real_space. Deal with that here to prevent it from
1030 ** going negative. 1106 ** going negative.
1031 */ 1107 */
1032 if (bytes < real_space) 1108 if (bytes < real_space)
1033 return 0 ; 1109 return 0;
1034 return (bytes - real_space) >> 9; 1110 return (bytes - real_space) >> 9;
1035} 1111}
1036 1112
1037// 1113//
@@ -1042,263 +1118,269 @@ static inline ulong to_fake_used_blocks(struct inode *inode, int sd_size)
1042// 1118//
1043 1119
1044// called by read_locked_inode 1120// called by read_locked_inode
1045static void init_inode (struct inode * inode, struct path * path) 1121static void init_inode(struct inode *inode, struct path *path)
1046{ 1122{
1047 struct buffer_head * bh; 1123 struct buffer_head *bh;
1048 struct item_head * ih; 1124 struct item_head *ih;
1049 __u32 rdev; 1125 __u32 rdev;
1050 //int version = ITEM_VERSION_1; 1126 //int version = ITEM_VERSION_1;
1051 1127
1052 bh = PATH_PLAST_BUFFER (path); 1128 bh = PATH_PLAST_BUFFER(path);
1053 ih = PATH_PITEM_HEAD (path); 1129 ih = PATH_PITEM_HEAD(path);
1054 1130
1055 1131 copy_key(INODE_PKEY(inode), &(ih->ih_key));
1056 copy_key (INODE_PKEY (inode), &(ih->ih_key)); 1132 inode->i_blksize = reiserfs_default_io_size;
1057 inode->i_blksize = reiserfs_default_io_size; 1133
1058 1134 INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list));
1059 INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list )); 1135 REISERFS_I(inode)->i_flags = 0;
1060 REISERFS_I(inode)->i_flags = 0; 1136 REISERFS_I(inode)->i_prealloc_block = 0;
1061 REISERFS_I(inode)->i_prealloc_block = 0; 1137 REISERFS_I(inode)->i_prealloc_count = 0;
1062 REISERFS_I(inode)->i_prealloc_count = 0; 1138 REISERFS_I(inode)->i_trans_id = 0;
1063 REISERFS_I(inode)->i_trans_id = 0; 1139 REISERFS_I(inode)->i_jl = NULL;
1064 REISERFS_I(inode)->i_jl = NULL; 1140 REISERFS_I(inode)->i_acl_access = NULL;
1065 REISERFS_I(inode)->i_acl_access = NULL; 1141 REISERFS_I(inode)->i_acl_default = NULL;
1066 REISERFS_I(inode)->i_acl_default = NULL; 1142 init_rwsem(&REISERFS_I(inode)->xattr_sem);
1067 init_rwsem (&REISERFS_I(inode)->xattr_sem); 1143
1068 1144 if (stat_data_v1(ih)) {
1069 if (stat_data_v1 (ih)) { 1145 struct stat_data_v1 *sd =
1070 struct stat_data_v1 * sd = (struct stat_data_v1 *)B_I_PITEM (bh, ih); 1146 (struct stat_data_v1 *)B_I_PITEM(bh, ih);
1071 unsigned long blocks; 1147 unsigned long blocks;
1072 1148
1073 set_inode_item_key_version (inode, KEY_FORMAT_3_5); 1149 set_inode_item_key_version(inode, KEY_FORMAT_3_5);
1074 set_inode_sd_version (inode, STAT_DATA_V1); 1150 set_inode_sd_version(inode, STAT_DATA_V1);
1075 inode->i_mode = sd_v1_mode(sd); 1151 inode->i_mode = sd_v1_mode(sd);
1076 inode->i_nlink = sd_v1_nlink(sd); 1152 inode->i_nlink = sd_v1_nlink(sd);
1077 inode->i_uid = sd_v1_uid(sd); 1153 inode->i_uid = sd_v1_uid(sd);
1078 inode->i_gid = sd_v1_gid(sd); 1154 inode->i_gid = sd_v1_gid(sd);
1079 inode->i_size = sd_v1_size(sd); 1155 inode->i_size = sd_v1_size(sd);
1080 inode->i_atime.tv_sec = sd_v1_atime(sd); 1156 inode->i_atime.tv_sec = sd_v1_atime(sd);
1081 inode->i_mtime.tv_sec = sd_v1_mtime(sd); 1157 inode->i_mtime.tv_sec = sd_v1_mtime(sd);
1082 inode->i_ctime.tv_sec = sd_v1_ctime(sd); 1158 inode->i_ctime.tv_sec = sd_v1_ctime(sd);
1083 inode->i_atime.tv_nsec = 0; 1159 inode->i_atime.tv_nsec = 0;
1084 inode->i_ctime.tv_nsec = 0; 1160 inode->i_ctime.tv_nsec = 0;
1085 inode->i_mtime.tv_nsec = 0; 1161 inode->i_mtime.tv_nsec = 0;
1086 1162
1087 inode->i_blocks = sd_v1_blocks(sd); 1163 inode->i_blocks = sd_v1_blocks(sd);
1088 inode->i_generation = le32_to_cpu (INODE_PKEY (inode)->k_dir_id); 1164 inode->i_generation = le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
1089 blocks = (inode->i_size + 511) >> 9; 1165 blocks = (inode->i_size + 511) >> 9;
1090 blocks = _ROUND_UP (blocks, inode->i_sb->s_blocksize >> 9); 1166 blocks = _ROUND_UP(blocks, inode->i_sb->s_blocksize >> 9);
1091 if (inode->i_blocks > blocks) { 1167 if (inode->i_blocks > blocks) {
1092 // there was a bug in <=3.5.23 when i_blocks could take negative 1168 // there was a bug in <=3.5.23 when i_blocks could take negative
1093 // values. Starting from 3.5.17 this value could even be stored in 1169 // values. Starting from 3.5.17 this value could even be stored in
1094 // stat data. For such files we set i_blocks based on file 1170 // stat data. For such files we set i_blocks based on file
1095 // size. Just 2 notes: this can be wrong for sparce files. On-disk value will be 1171 // size. Just 2 notes: this can be wrong for sparce files. On-disk value will be
1096 // only updated if file's inode will ever change 1172 // only updated if file's inode will ever change
1097 inode->i_blocks = blocks; 1173 inode->i_blocks = blocks;
1098 } 1174 }
1099
1100 rdev = sd_v1_rdev(sd);
1101 REISERFS_I(inode)->i_first_direct_byte = sd_v1_first_direct_byte(sd);
1102 /* an early bug in the quota code can give us an odd number for the
1103 ** block count. This is incorrect, fix it here.
1104 */
1105 if (inode->i_blocks & 1) {
1106 inode->i_blocks++ ;
1107 }
1108 inode_set_bytes(inode, to_real_used_space(inode, inode->i_blocks,
1109 SD_V1_SIZE));
1110 /* nopack is initially zero for v1 objects. For v2 objects,
1111 nopack is initialised from sd_attrs */
1112 REISERFS_I(inode)->i_flags &= ~i_nopack_mask;
1113 } else {
1114 // new stat data found, but object may have old items
1115 // (directories and symlinks)
1116 struct stat_data * sd = (struct stat_data *)B_I_PITEM (bh, ih);
1117
1118 inode->i_mode = sd_v2_mode(sd);
1119 inode->i_nlink = sd_v2_nlink(sd);
1120 inode->i_uid = sd_v2_uid(sd);
1121 inode->i_size = sd_v2_size(sd);
1122 inode->i_gid = sd_v2_gid(sd);
1123 inode->i_mtime.tv_sec = sd_v2_mtime(sd);
1124 inode->i_atime.tv_sec = sd_v2_atime(sd);
1125 inode->i_ctime.tv_sec = sd_v2_ctime(sd);
1126 inode->i_ctime.tv_nsec = 0;
1127 inode->i_mtime.tv_nsec = 0;
1128 inode->i_atime.tv_nsec = 0;
1129 inode->i_blocks = sd_v2_blocks(sd);
1130 rdev = sd_v2_rdev(sd);
1131 if( S_ISCHR( inode -> i_mode ) || S_ISBLK( inode -> i_mode ) )
1132 inode->i_generation = le32_to_cpu (INODE_PKEY (inode)->k_dir_id);
1133 else
1134 inode->i_generation = sd_v2_generation(sd);
1135 1175
1136 if (S_ISDIR (inode->i_mode) || S_ISLNK (inode->i_mode)) 1176 rdev = sd_v1_rdev(sd);
1137 set_inode_item_key_version (inode, KEY_FORMAT_3_5); 1177 REISERFS_I(inode)->i_first_direct_byte =
1138 else 1178 sd_v1_first_direct_byte(sd);
1139 set_inode_item_key_version (inode, KEY_FORMAT_3_6); 1179 /* an early bug in the quota code can give us an odd number for the
1140 REISERFS_I(inode)->i_first_direct_byte = 0; 1180 ** block count. This is incorrect, fix it here.
1141 set_inode_sd_version (inode, STAT_DATA_V2); 1181 */
1142 inode_set_bytes(inode, to_real_used_space(inode, inode->i_blocks, 1182 if (inode->i_blocks & 1) {
1143 SD_V2_SIZE)); 1183 inode->i_blocks++;
1144 /* read persistent inode attributes from sd and initalise 1184 }
1145 generic inode flags from them */ 1185 inode_set_bytes(inode,
1146 REISERFS_I(inode)->i_attrs = sd_v2_attrs( sd ); 1186 to_real_used_space(inode, inode->i_blocks,
1147 sd_attrs_to_i_attrs( sd_v2_attrs( sd ), inode ); 1187 SD_V1_SIZE));
1148 } 1188 /* nopack is initially zero for v1 objects. For v2 objects,
1149 1189 nopack is initialised from sd_attrs */
1150 pathrelse (path); 1190 REISERFS_I(inode)->i_flags &= ~i_nopack_mask;
1151 if (S_ISREG (inode->i_mode)) { 1191 } else {
1152 inode->i_op = &reiserfs_file_inode_operations; 1192 // new stat data found, but object may have old items
1153 inode->i_fop = &reiserfs_file_operations; 1193 // (directories and symlinks)
1154 inode->i_mapping->a_ops = &reiserfs_address_space_operations ; 1194 struct stat_data *sd = (struct stat_data *)B_I_PITEM(bh, ih);
1155 } else if (S_ISDIR (inode->i_mode)) { 1195
1156 inode->i_op = &reiserfs_dir_inode_operations; 1196 inode->i_mode = sd_v2_mode(sd);
1157 inode->i_fop = &reiserfs_dir_operations; 1197 inode->i_nlink = sd_v2_nlink(sd);
1158 } else if (S_ISLNK (inode->i_mode)) { 1198 inode->i_uid = sd_v2_uid(sd);
1159 inode->i_op = &reiserfs_symlink_inode_operations; 1199 inode->i_size = sd_v2_size(sd);
1160 inode->i_mapping->a_ops = &reiserfs_address_space_operations; 1200 inode->i_gid = sd_v2_gid(sd);
1161 } else { 1201 inode->i_mtime.tv_sec = sd_v2_mtime(sd);
1162 inode->i_blocks = 0; 1202 inode->i_atime.tv_sec = sd_v2_atime(sd);
1163 inode->i_op = &reiserfs_special_inode_operations; 1203 inode->i_ctime.tv_sec = sd_v2_ctime(sd);
1164 init_special_inode(inode, inode->i_mode, new_decode_dev(rdev)); 1204 inode->i_ctime.tv_nsec = 0;
1165 } 1205 inode->i_mtime.tv_nsec = 0;
1166} 1206 inode->i_atime.tv_nsec = 0;
1207 inode->i_blocks = sd_v2_blocks(sd);
1208 rdev = sd_v2_rdev(sd);
1209 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
1210 inode->i_generation =
1211 le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
1212 else
1213 inode->i_generation = sd_v2_generation(sd);
1167 1214
1215 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
1216 set_inode_item_key_version(inode, KEY_FORMAT_3_5);
1217 else
1218 set_inode_item_key_version(inode, KEY_FORMAT_3_6);
1219 REISERFS_I(inode)->i_first_direct_byte = 0;
1220 set_inode_sd_version(inode, STAT_DATA_V2);
1221 inode_set_bytes(inode,
1222 to_real_used_space(inode, inode->i_blocks,
1223 SD_V2_SIZE));
1224 /* read persistent inode attributes from sd and initalise
1225 generic inode flags from them */
1226 REISERFS_I(inode)->i_attrs = sd_v2_attrs(sd);
1227 sd_attrs_to_i_attrs(sd_v2_attrs(sd), inode);
1228 }
1229
1230 pathrelse(path);
1231 if (S_ISREG(inode->i_mode)) {
1232 inode->i_op = &reiserfs_file_inode_operations;
1233 inode->i_fop = &reiserfs_file_operations;
1234 inode->i_mapping->a_ops = &reiserfs_address_space_operations;
1235 } else if (S_ISDIR(inode->i_mode)) {
1236 inode->i_op = &reiserfs_dir_inode_operations;
1237 inode->i_fop = &reiserfs_dir_operations;
1238 } else if (S_ISLNK(inode->i_mode)) {
1239 inode->i_op = &reiserfs_symlink_inode_operations;
1240 inode->i_mapping->a_ops = &reiserfs_address_space_operations;
1241 } else {
1242 inode->i_blocks = 0;
1243 inode->i_op = &reiserfs_special_inode_operations;
1244 init_special_inode(inode, inode->i_mode, new_decode_dev(rdev));
1245 }
1246}
1168 1247
1169// update new stat data with inode fields 1248// update new stat data with inode fields
1170static void inode2sd (void * sd, struct inode * inode, loff_t size) 1249static void inode2sd(void *sd, struct inode *inode, loff_t size)
1171{ 1250{
1172 struct stat_data * sd_v2 = (struct stat_data *)sd; 1251 struct stat_data *sd_v2 = (struct stat_data *)sd;
1173 __u16 flags; 1252 __u16 flags;
1174 1253
1175 set_sd_v2_mode(sd_v2, inode->i_mode ); 1254 set_sd_v2_mode(sd_v2, inode->i_mode);
1176 set_sd_v2_nlink(sd_v2, inode->i_nlink ); 1255 set_sd_v2_nlink(sd_v2, inode->i_nlink);
1177 set_sd_v2_uid(sd_v2, inode->i_uid ); 1256 set_sd_v2_uid(sd_v2, inode->i_uid);
1178 set_sd_v2_size(sd_v2, size ); 1257 set_sd_v2_size(sd_v2, size);
1179 set_sd_v2_gid(sd_v2, inode->i_gid ); 1258 set_sd_v2_gid(sd_v2, inode->i_gid);
1180 set_sd_v2_mtime(sd_v2, inode->i_mtime.tv_sec ); 1259 set_sd_v2_mtime(sd_v2, inode->i_mtime.tv_sec);
1181 set_sd_v2_atime(sd_v2, inode->i_atime.tv_sec ); 1260 set_sd_v2_atime(sd_v2, inode->i_atime.tv_sec);
1182 set_sd_v2_ctime(sd_v2, inode->i_ctime.tv_sec ); 1261 set_sd_v2_ctime(sd_v2, inode->i_ctime.tv_sec);
1183 set_sd_v2_blocks(sd_v2, to_fake_used_blocks(inode, SD_V2_SIZE)); 1262 set_sd_v2_blocks(sd_v2, to_fake_used_blocks(inode, SD_V2_SIZE));
1184 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) 1263 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
1185 set_sd_v2_rdev(sd_v2, new_encode_dev(inode->i_rdev)); 1264 set_sd_v2_rdev(sd_v2, new_encode_dev(inode->i_rdev));
1186 else 1265 else
1187 set_sd_v2_generation(sd_v2, inode->i_generation); 1266 set_sd_v2_generation(sd_v2, inode->i_generation);
1188 flags = REISERFS_I(inode)->i_attrs; 1267 flags = REISERFS_I(inode)->i_attrs;
1189 i_attrs_to_sd_attrs( inode, &flags ); 1268 i_attrs_to_sd_attrs(inode, &flags);
1190 set_sd_v2_attrs( sd_v2, flags ); 1269 set_sd_v2_attrs(sd_v2, flags);
1191} 1270}
1192 1271
1193
1194// used to copy inode's fields to old stat data 1272// used to copy inode's fields to old stat data
1195static void inode2sd_v1 (void * sd, struct inode * inode, loff_t size) 1273static void inode2sd_v1(void *sd, struct inode *inode, loff_t size)
1196{ 1274{
1197 struct stat_data_v1 * sd_v1 = (struct stat_data_v1 *)sd; 1275 struct stat_data_v1 *sd_v1 = (struct stat_data_v1 *)sd;
1198 1276
1199 set_sd_v1_mode(sd_v1, inode->i_mode ); 1277 set_sd_v1_mode(sd_v1, inode->i_mode);
1200 set_sd_v1_uid(sd_v1, inode->i_uid ); 1278 set_sd_v1_uid(sd_v1, inode->i_uid);
1201 set_sd_v1_gid(sd_v1, inode->i_gid ); 1279 set_sd_v1_gid(sd_v1, inode->i_gid);
1202 set_sd_v1_nlink(sd_v1, inode->i_nlink ); 1280 set_sd_v1_nlink(sd_v1, inode->i_nlink);
1203 set_sd_v1_size(sd_v1, size ); 1281 set_sd_v1_size(sd_v1, size);
1204 set_sd_v1_atime(sd_v1, inode->i_atime.tv_sec ); 1282 set_sd_v1_atime(sd_v1, inode->i_atime.tv_sec);
1205 set_sd_v1_ctime(sd_v1, inode->i_ctime.tv_sec ); 1283 set_sd_v1_ctime(sd_v1, inode->i_ctime.tv_sec);
1206 set_sd_v1_mtime(sd_v1, inode->i_mtime.tv_sec ); 1284 set_sd_v1_mtime(sd_v1, inode->i_mtime.tv_sec);
1207 1285
1208 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) 1286 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
1209 set_sd_v1_rdev(sd_v1, new_encode_dev(inode->i_rdev)); 1287 set_sd_v1_rdev(sd_v1, new_encode_dev(inode->i_rdev));
1210 else 1288 else
1211 set_sd_v1_blocks(sd_v1, to_fake_used_blocks(inode, SD_V1_SIZE)); 1289 set_sd_v1_blocks(sd_v1, to_fake_used_blocks(inode, SD_V1_SIZE));
1212
1213 // Sigh. i_first_direct_byte is back
1214 set_sd_v1_first_direct_byte(sd_v1, REISERFS_I(inode)->i_first_direct_byte);
1215}
1216 1290
1291 // Sigh. i_first_direct_byte is back
1292 set_sd_v1_first_direct_byte(sd_v1,
1293 REISERFS_I(inode)->i_first_direct_byte);
1294}
1217 1295
1218/* NOTE, you must prepare the buffer head before sending it here, 1296/* NOTE, you must prepare the buffer head before sending it here,
1219** and then log it after the call 1297** and then log it after the call
1220*/ 1298*/
1221static void update_stat_data (struct path * path, struct inode * inode, 1299static void update_stat_data(struct path *path, struct inode *inode,
1222 loff_t size) 1300 loff_t size)
1223{ 1301{
1224 struct buffer_head * bh; 1302 struct buffer_head *bh;
1225 struct item_head * ih; 1303 struct item_head *ih;
1226 1304
1227 bh = PATH_PLAST_BUFFER (path); 1305 bh = PATH_PLAST_BUFFER(path);
1228 ih = PATH_PITEM_HEAD (path); 1306 ih = PATH_PITEM_HEAD(path);
1229 1307
1230 if (!is_statdata_le_ih (ih)) 1308 if (!is_statdata_le_ih(ih))
1231 reiserfs_panic (inode->i_sb, "vs-13065: update_stat_data: key %k, found item %h", 1309 reiserfs_panic(inode->i_sb,
1232 INODE_PKEY (inode), ih); 1310 "vs-13065: update_stat_data: key %k, found item %h",
1233 1311 INODE_PKEY(inode), ih);
1234 if (stat_data_v1 (ih)) { 1312
1235 // path points to old stat data 1313 if (stat_data_v1(ih)) {
1236 inode2sd_v1 (B_I_PITEM (bh, ih), inode, size); 1314 // path points to old stat data
1237 } else { 1315 inode2sd_v1(B_I_PITEM(bh, ih), inode, size);
1238 inode2sd (B_I_PITEM (bh, ih), inode, size); 1316 } else {
1239 } 1317 inode2sd(B_I_PITEM(bh, ih), inode, size);
1240 1318 }
1241 return;
1242}
1243 1319
1320 return;
1321}
1244 1322
1245void reiserfs_update_sd_size (struct reiserfs_transaction_handle *th, 1323void reiserfs_update_sd_size(struct reiserfs_transaction_handle *th,
1246 struct inode * inode, loff_t size) 1324 struct inode *inode, loff_t size)
1247{ 1325{
1248 struct cpu_key key; 1326 struct cpu_key key;
1249 INITIALIZE_PATH(path); 1327 INITIALIZE_PATH(path);
1250 struct buffer_head *bh ; 1328 struct buffer_head *bh;
1251 int fs_gen ; 1329 int fs_gen;
1252 struct item_head *ih, tmp_ih ; 1330 struct item_head *ih, tmp_ih;
1253 int retval; 1331 int retval;
1254 1332
1255 BUG_ON (!th->t_trans_id); 1333 BUG_ON(!th->t_trans_id);
1256 1334
1257 make_cpu_key (&key, inode, SD_OFFSET, TYPE_STAT_DATA, 3);//key type is unimportant 1335 make_cpu_key(&key, inode, SD_OFFSET, TYPE_STAT_DATA, 3); //key type is unimportant
1258 1336
1259 for(;;) { 1337 for (;;) {
1260 int pos; 1338 int pos;
1261 /* look for the object's stat data */ 1339 /* look for the object's stat data */
1262 retval = search_item (inode->i_sb, &key, &path); 1340 retval = search_item(inode->i_sb, &key, &path);
1263 if (retval == IO_ERROR) { 1341 if (retval == IO_ERROR) {
1264 reiserfs_warning (inode->i_sb, "vs-13050: reiserfs_update_sd: " 1342 reiserfs_warning(inode->i_sb,
1265 "i/o failure occurred trying to update %K stat data", 1343 "vs-13050: reiserfs_update_sd: "
1266 &key); 1344 "i/o failure occurred trying to update %K stat data",
1267 return; 1345 &key);
1268 } 1346 return;
1269 if (retval == ITEM_NOT_FOUND) { 1347 }
1270 pos = PATH_LAST_POSITION (&path); 1348 if (retval == ITEM_NOT_FOUND) {
1271 pathrelse(&path) ; 1349 pos = PATH_LAST_POSITION(&path);
1272 if (inode->i_nlink == 0) { 1350 pathrelse(&path);
1273 /*reiserfs_warning (inode->i_sb, "vs-13050: reiserfs_update_sd: i_nlink == 0, stat data not found");*/ 1351 if (inode->i_nlink == 0) {
1274 return; 1352 /*reiserfs_warning (inode->i_sb, "vs-13050: reiserfs_update_sd: i_nlink == 0, stat data not found"); */
1275 } 1353 return;
1276 reiserfs_warning (inode->i_sb, "vs-13060: reiserfs_update_sd: " 1354 }
1277 "stat data of object %k (nlink == %d) not found (pos %d)", 1355 reiserfs_warning(inode->i_sb,
1278 INODE_PKEY (inode), inode->i_nlink, pos); 1356 "vs-13060: reiserfs_update_sd: "
1279 reiserfs_check_path(&path) ; 1357 "stat data of object %k (nlink == %d) not found (pos %d)",
1280 return; 1358 INODE_PKEY(inode), inode->i_nlink,
1281 } 1359 pos);
1282 1360 reiserfs_check_path(&path);
1283 /* sigh, prepare_for_journal might schedule. When it schedules the 1361 return;
1284 ** FS might change. We have to detect that, and loop back to the 1362 }
1285 ** search if the stat data item has moved 1363
1286 */ 1364 /* sigh, prepare_for_journal might schedule. When it schedules the
1287 bh = get_last_bh(&path) ; 1365 ** FS might change. We have to detect that, and loop back to the
1288 ih = get_ih(&path) ; 1366 ** search if the stat data item has moved
1289 copy_item_head (&tmp_ih, ih); 1367 */
1290 fs_gen = get_generation (inode->i_sb); 1368 bh = get_last_bh(&path);
1291 reiserfs_prepare_for_journal(inode->i_sb, bh, 1) ; 1369 ih = get_ih(&path);
1292 if (fs_changed (fs_gen, inode->i_sb) && item_moved(&tmp_ih, &path)) { 1370 copy_item_head(&tmp_ih, ih);
1293 reiserfs_restore_prepared_buffer(inode->i_sb, bh) ; 1371 fs_gen = get_generation(inode->i_sb);
1294 continue ; /* Stat_data item has been moved after scheduling. */ 1372 reiserfs_prepare_for_journal(inode->i_sb, bh, 1);
1295 } 1373 if (fs_changed(fs_gen, inode->i_sb)
1296 break; 1374 && item_moved(&tmp_ih, &path)) {
1297 } 1375 reiserfs_restore_prepared_buffer(inode->i_sb, bh);
1298 update_stat_data (&path, inode, size); 1376 continue; /* Stat_data item has been moved after scheduling. */
1299 journal_mark_dirty(th, th->t_super, bh) ; 1377 }
1300 pathrelse (&path); 1378 break;
1301 return; 1379 }
1380 update_stat_data(&path, inode, size);
1381 journal_mark_dirty(th, th->t_super, bh);
1382 pathrelse(&path);
1383 return;
1302} 1384}
1303 1385
1304/* reiserfs_read_locked_inode is called to read the inode off disk, and it 1386/* reiserfs_read_locked_inode is called to read the inode off disk, and it
@@ -1307,9 +1389,10 @@ void reiserfs_update_sd_size (struct reiserfs_transaction_handle *th,
1307** corresponding iput might try to delete whatever object the inode last 1389** corresponding iput might try to delete whatever object the inode last
1308** represented. 1390** represented.
1309*/ 1391*/
1310static void reiserfs_make_bad_inode(struct inode *inode) { 1392static void reiserfs_make_bad_inode(struct inode *inode)
1311 memset(INODE_PKEY(inode), 0, KEY_SIZE); 1393{
1312 make_bad_inode(inode); 1394 memset(INODE_PKEY(inode), 0, KEY_SIZE);
1395 make_bad_inode(inode);
1313} 1396}
1314 1397
1315// 1398//
@@ -1317,77 +1400,79 @@ static void reiserfs_make_bad_inode(struct inode *inode) {
1317// evolved as the prototype did 1400// evolved as the prototype did
1318// 1401//
1319 1402
1320int reiserfs_init_locked_inode (struct inode * inode, void *p) 1403int reiserfs_init_locked_inode(struct inode *inode, void *p)
1321{ 1404{
1322 struct reiserfs_iget_args *args = (struct reiserfs_iget_args *)p ; 1405 struct reiserfs_iget_args *args = (struct reiserfs_iget_args *)p;
1323 inode->i_ino = args->objectid; 1406 inode->i_ino = args->objectid;
1324 INODE_PKEY(inode)->k_dir_id = cpu_to_le32(args->dirid); 1407 INODE_PKEY(inode)->k_dir_id = cpu_to_le32(args->dirid);
1325 return 0; 1408 return 0;
1326} 1409}
1327 1410
1328/* looks for stat data in the tree, and fills up the fields of in-core 1411/* looks for stat data in the tree, and fills up the fields of in-core
1329 inode stat data fields */ 1412 inode stat data fields */
1330void reiserfs_read_locked_inode (struct inode * inode, struct reiserfs_iget_args *args) 1413void reiserfs_read_locked_inode(struct inode *inode,
1414 struct reiserfs_iget_args *args)
1331{ 1415{
1332 INITIALIZE_PATH (path_to_sd); 1416 INITIALIZE_PATH(path_to_sd);
1333 struct cpu_key key; 1417 struct cpu_key key;
1334 unsigned long dirino; 1418 unsigned long dirino;
1335 int retval; 1419 int retval;
1336 1420
1337 dirino = args->dirid ; 1421 dirino = args->dirid;
1338 1422
1339 /* set version 1, version 2 could be used too, because stat data 1423 /* set version 1, version 2 could be used too, because stat data
1340 key is the same in both versions */ 1424 key is the same in both versions */
1341 key.version = KEY_FORMAT_3_5; 1425 key.version = KEY_FORMAT_3_5;
1342 key.on_disk_key.k_dir_id = dirino; 1426 key.on_disk_key.k_dir_id = dirino;
1343 key.on_disk_key.k_objectid = inode->i_ino; 1427 key.on_disk_key.k_objectid = inode->i_ino;
1344 key.on_disk_key.k_offset = 0; 1428 key.on_disk_key.k_offset = 0;
1345 key.on_disk_key.k_type = 0; 1429 key.on_disk_key.k_type = 0;
1346 1430
1347 /* look for the object's stat data */ 1431 /* look for the object's stat data */
1348 retval = search_item (inode->i_sb, &key, &path_to_sd); 1432 retval = search_item(inode->i_sb, &key, &path_to_sd);
1349 if (retval == IO_ERROR) { 1433 if (retval == IO_ERROR) {
1350 reiserfs_warning (inode->i_sb, "vs-13070: reiserfs_read_locked_inode: " 1434 reiserfs_warning(inode->i_sb,
1351 "i/o failure occurred trying to find stat data of %K", 1435 "vs-13070: reiserfs_read_locked_inode: "
1352 &key); 1436 "i/o failure occurred trying to find stat data of %K",
1353 reiserfs_make_bad_inode(inode) ; 1437 &key);
1354 return; 1438 reiserfs_make_bad_inode(inode);
1355 } 1439 return;
1356 if (retval != ITEM_FOUND) { 1440 }
1357 /* a stale NFS handle can trigger this without it being an error */ 1441 if (retval != ITEM_FOUND) {
1358 pathrelse (&path_to_sd); 1442 /* a stale NFS handle can trigger this without it being an error */
1359 reiserfs_make_bad_inode(inode) ; 1443 pathrelse(&path_to_sd);
1360 inode->i_nlink = 0; 1444 reiserfs_make_bad_inode(inode);
1361 return; 1445 inode->i_nlink = 0;
1362 } 1446 return;
1363 1447 }
1364 init_inode (inode, &path_to_sd); 1448
1365 1449 init_inode(inode, &path_to_sd);
1366 /* It is possible that knfsd is trying to access inode of a file 1450
1367 that is being removed from the disk by some other thread. As we 1451 /* It is possible that knfsd is trying to access inode of a file
1368 update sd on unlink all that is required is to check for nlink 1452 that is being removed from the disk by some other thread. As we
1369 here. This bug was first found by Sizif when debugging 1453 update sd on unlink all that is required is to check for nlink
1370 SquidNG/Butterfly, forgotten, and found again after Philippe 1454 here. This bug was first found by Sizif when debugging
1371 Gramoulle <philippe.gramoulle@mmania.com> reproduced it. 1455 SquidNG/Butterfly, forgotten, and found again after Philippe
1372 1456 Gramoulle <philippe.gramoulle@mmania.com> reproduced it.
1373 More logical fix would require changes in fs/inode.c:iput() to 1457
1374 remove inode from hash-table _after_ fs cleaned disk stuff up and 1458 More logical fix would require changes in fs/inode.c:iput() to
1375 in iget() to return NULL if I_FREEING inode is found in 1459 remove inode from hash-table _after_ fs cleaned disk stuff up and
1376 hash-table. */ 1460 in iget() to return NULL if I_FREEING inode is found in
1377 /* Currently there is one place where it's ok to meet inode with 1461 hash-table. */
1378 nlink==0: processing of open-unlinked and half-truncated files 1462 /* Currently there is one place where it's ok to meet inode with
1379 during mount (fs/reiserfs/super.c:finish_unfinished()). */ 1463 nlink==0: processing of open-unlinked and half-truncated files
1380 if( ( inode -> i_nlink == 0 ) && 1464 during mount (fs/reiserfs/super.c:finish_unfinished()). */
1381 ! REISERFS_SB(inode -> i_sb) -> s_is_unlinked_ok ) { 1465 if ((inode->i_nlink == 0) &&
1382 reiserfs_warning (inode->i_sb, 1466 !REISERFS_SB(inode->i_sb)->s_is_unlinked_ok) {
1383 "vs-13075: reiserfs_read_locked_inode: " 1467 reiserfs_warning(inode->i_sb,
1384 "dead inode read from disk %K. " 1468 "vs-13075: reiserfs_read_locked_inode: "
1385 "This is likely to be race with knfsd. Ignore", 1469 "dead inode read from disk %K. "
1386 &key ); 1470 "This is likely to be race with knfsd. Ignore",
1387 reiserfs_make_bad_inode( inode ); 1471 &key);
1388 } 1472 reiserfs_make_bad_inode(inode);
1389 1473 }
1390 reiserfs_check_path(&path_to_sd) ; /* init inode should be relsing */ 1474
1475 reiserfs_check_path(&path_to_sd); /* init inode should be relsing */
1391 1476
1392} 1477}
1393 1478
@@ -1403,140 +1488,148 @@ void reiserfs_read_locked_inode (struct inode * inode, struct reiserfs_iget_args
1403 * inode numbers (objectids) are distinguished by parent directory ids. 1488 * inode numbers (objectids) are distinguished by parent directory ids.
1404 * 1489 *
1405 */ 1490 */
1406int reiserfs_find_actor( struct inode *inode, void *opaque ) 1491int reiserfs_find_actor(struct inode *inode, void *opaque)
1407{ 1492{
1408 struct reiserfs_iget_args *args; 1493 struct reiserfs_iget_args *args;
1409 1494
1410 args = opaque; 1495 args = opaque;
1411 /* args is already in CPU order */ 1496 /* args is already in CPU order */
1412 return (inode->i_ino == args->objectid) && 1497 return (inode->i_ino == args->objectid) &&
1413 (le32_to_cpu(INODE_PKEY(inode)->k_dir_id) == args->dirid); 1498 (le32_to_cpu(INODE_PKEY(inode)->k_dir_id) == args->dirid);
1414} 1499}
1415 1500
1416struct inode * reiserfs_iget (struct super_block * s, const struct cpu_key * key) 1501struct inode *reiserfs_iget(struct super_block *s, const struct cpu_key *key)
1417{ 1502{
1418 struct inode * inode; 1503 struct inode *inode;
1419 struct reiserfs_iget_args args ; 1504 struct reiserfs_iget_args args;
1420 1505
1421 args.objectid = key->on_disk_key.k_objectid ; 1506 args.objectid = key->on_disk_key.k_objectid;
1422 args.dirid = key->on_disk_key.k_dir_id ; 1507 args.dirid = key->on_disk_key.k_dir_id;
1423 inode = iget5_locked (s, key->on_disk_key.k_objectid, 1508 inode = iget5_locked(s, key->on_disk_key.k_objectid,
1424 reiserfs_find_actor, reiserfs_init_locked_inode, (void *)(&args)); 1509 reiserfs_find_actor, reiserfs_init_locked_inode,
1425 if (!inode) 1510 (void *)(&args));
1426 return ERR_PTR(-ENOMEM) ; 1511 if (!inode)
1427 1512 return ERR_PTR(-ENOMEM);
1428 if (inode->i_state & I_NEW) { 1513
1429 reiserfs_read_locked_inode(inode, &args); 1514 if (inode->i_state & I_NEW) {
1430 unlock_new_inode(inode); 1515 reiserfs_read_locked_inode(inode, &args);
1431 } 1516 unlock_new_inode(inode);
1432 1517 }
1433 if (comp_short_keys (INODE_PKEY (inode), key) || is_bad_inode (inode)) { 1518
1434 /* either due to i/o error or a stale NFS handle */ 1519 if (comp_short_keys(INODE_PKEY(inode), key) || is_bad_inode(inode)) {
1435 iput (inode); 1520 /* either due to i/o error or a stale NFS handle */
1436 inode = NULL; 1521 iput(inode);
1437 } 1522 inode = NULL;
1438 return inode; 1523 }
1524 return inode;
1439} 1525}
1440 1526
1441struct dentry *reiserfs_get_dentry(struct super_block *sb, void *vobjp) 1527struct dentry *reiserfs_get_dentry(struct super_block *sb, void *vobjp)
1442{ 1528{
1443 __u32 *data = vobjp; 1529 __u32 *data = vobjp;
1444 struct cpu_key key ; 1530 struct cpu_key key;
1445 struct dentry *result; 1531 struct dentry *result;
1446 struct inode *inode; 1532 struct inode *inode;
1447 1533
1448 key.on_disk_key.k_objectid = data[0] ; 1534 key.on_disk_key.k_objectid = data[0];
1449 key.on_disk_key.k_dir_id = data[1] ; 1535 key.on_disk_key.k_dir_id = data[1];
1450 reiserfs_write_lock(sb); 1536 reiserfs_write_lock(sb);
1451 inode = reiserfs_iget(sb, &key) ; 1537 inode = reiserfs_iget(sb, &key);
1452 if (inode && !IS_ERR(inode) && data[2] != 0 && 1538 if (inode && !IS_ERR(inode) && data[2] != 0 &&
1453 data[2] != inode->i_generation) { 1539 data[2] != inode->i_generation) {
1454 iput(inode) ; 1540 iput(inode);
1455 inode = NULL ; 1541 inode = NULL;
1456 } 1542 }
1457 reiserfs_write_unlock(sb); 1543 reiserfs_write_unlock(sb);
1458 if (!inode) 1544 if (!inode)
1459 inode = ERR_PTR(-ESTALE); 1545 inode = ERR_PTR(-ESTALE);
1460 if (IS_ERR(inode)) 1546 if (IS_ERR(inode))
1461 return ERR_PTR(PTR_ERR(inode)); 1547 return ERR_PTR(PTR_ERR(inode));
1462 result = d_alloc_anon(inode); 1548 result = d_alloc_anon(inode);
1463 if (!result) { 1549 if (!result) {
1464 iput(inode); 1550 iput(inode);
1465 return ERR_PTR(-ENOMEM); 1551 return ERR_PTR(-ENOMEM);
1466 } 1552 }
1467 return result; 1553 return result;
1468} 1554}
1469 1555
1470struct dentry *reiserfs_decode_fh(struct super_block *sb, __u32 *data, 1556struct dentry *reiserfs_decode_fh(struct super_block *sb, __u32 * data,
1471 int len, int fhtype, 1557 int len, int fhtype,
1472 int (*acceptable)(void *contect, struct dentry *de), 1558 int (*acceptable) (void *contect,
1473 void *context) { 1559 struct dentry * de),
1474 __u32 obj[3], parent[3]; 1560 void *context)
1475 1561{
1476 /* fhtype happens to reflect the number of u32s encoded. 1562 __u32 obj[3], parent[3];
1477 * due to a bug in earlier code, fhtype might indicate there 1563
1478 * are more u32s then actually fitted. 1564 /* fhtype happens to reflect the number of u32s encoded.
1479 * so if fhtype seems to be more than len, reduce fhtype. 1565 * due to a bug in earlier code, fhtype might indicate there
1480 * Valid types are: 1566 * are more u32s then actually fitted.
1481 * 2 - objectid + dir_id - legacy support 1567 * so if fhtype seems to be more than len, reduce fhtype.
1482 * 3 - objectid + dir_id + generation 1568 * Valid types are:
1483 * 4 - objectid + dir_id + objectid and dirid of parent - legacy 1569 * 2 - objectid + dir_id - legacy support
1484 * 5 - objectid + dir_id + generation + objectid and dirid of parent 1570 * 3 - objectid + dir_id + generation
1485 * 6 - as above plus generation of directory 1571 * 4 - objectid + dir_id + objectid and dirid of parent - legacy
1486 * 6 does not fit in NFSv2 handles 1572 * 5 - objectid + dir_id + generation + objectid and dirid of parent
1487 */ 1573 * 6 - as above plus generation of directory
1488 if (fhtype > len) { 1574 * 6 does not fit in NFSv2 handles
1489 if (fhtype != 6 || len != 5) 1575 */
1490 reiserfs_warning (sb, "nfsd/reiserfs, fhtype=%d, len=%d - odd", 1576 if (fhtype > len) {
1491 fhtype, len); 1577 if (fhtype != 6 || len != 5)
1492 fhtype = 5; 1578 reiserfs_warning(sb,
1493 } 1579 "nfsd/reiserfs, fhtype=%d, len=%d - odd",
1494 1580 fhtype, len);
1495 obj[0] = data[0]; 1581 fhtype = 5;
1496 obj[1] = data[1]; 1582 }
1497 if (fhtype == 3 || fhtype >= 5)
1498 obj[2] = data[2];
1499 else obj[2] = 0; /* generation number */
1500
1501 if (fhtype >= 4) {
1502 parent[0] = data[fhtype>=5?3:2] ;
1503 parent[1] = data[fhtype>=5?4:3] ;
1504 if (fhtype == 6)
1505 parent[2] = data[5];
1506 else parent[2] = 0;
1507 }
1508 return sb->s_export_op->find_exported_dentry(sb, obj, fhtype < 4 ? NULL : parent,
1509 acceptable, context);
1510}
1511 1583
1512int reiserfs_encode_fh(struct dentry *dentry, __u32 *data, int *lenp, int need_parent) { 1584 obj[0] = data[0];
1513 struct inode *inode = dentry->d_inode ; 1585 obj[1] = data[1];
1514 int maxlen = *lenp; 1586 if (fhtype == 3 || fhtype >= 5)
1515 1587 obj[2] = data[2];
1516 if (maxlen < 3) 1588 else
1517 return 255 ; 1589 obj[2] = 0; /* generation number */
1518 1590
1519 data[0] = inode->i_ino ; 1591 if (fhtype >= 4) {
1520 data[1] = le32_to_cpu(INODE_PKEY (inode)->k_dir_id) ; 1592 parent[0] = data[fhtype >= 5 ? 3 : 2];
1521 data[2] = inode->i_generation ; 1593 parent[1] = data[fhtype >= 5 ? 4 : 3];
1522 *lenp = 3 ; 1594 if (fhtype == 6)
1523 /* no room for directory info? return what we've stored so far */ 1595 parent[2] = data[5];
1524 if (maxlen < 5 || ! need_parent) 1596 else
1525 return 3 ; 1597 parent[2] = 0;
1526 1598 }
1527 spin_lock(&dentry->d_lock); 1599 return sb->s_export_op->find_exported_dentry(sb, obj,
1528 inode = dentry->d_parent->d_inode ; 1600 fhtype < 4 ? NULL : parent,
1529 data[3] = inode->i_ino ; 1601 acceptable, context);
1530 data[4] = le32_to_cpu(INODE_PKEY (inode)->k_dir_id) ;
1531 *lenp = 5 ;
1532 if (maxlen >= 6) {
1533 data[5] = inode->i_generation ;
1534 *lenp = 6 ;
1535 }
1536 spin_unlock(&dentry->d_lock);
1537 return *lenp ;
1538} 1602}
1539 1603
1604int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp,
1605 int need_parent)
1606{
1607 struct inode *inode = dentry->d_inode;
1608 int maxlen = *lenp;
1609
1610 if (maxlen < 3)
1611 return 255;
1612
1613 data[0] = inode->i_ino;
1614 data[1] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
1615 data[2] = inode->i_generation;
1616 *lenp = 3;
1617 /* no room for directory info? return what we've stored so far */
1618 if (maxlen < 5 || !need_parent)
1619 return 3;
1620
1621 spin_lock(&dentry->d_lock);
1622 inode = dentry->d_parent->d_inode;
1623 data[3] = inode->i_ino;
1624 data[4] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
1625 *lenp = 5;
1626 if (maxlen >= 6) {
1627 data[5] = inode->i_generation;
1628 *lenp = 6;
1629 }
1630 spin_unlock(&dentry->d_lock);
1631 return *lenp;
1632}
1540 1633
1541/* looks for stat data, then copies fields to it, marks the buffer 1634/* looks for stat data, then copies fields to it, marks the buffer
1542 containing stat data as dirty */ 1635 containing stat data as dirty */
@@ -1545,120 +1638,127 @@ int reiserfs_encode_fh(struct dentry *dentry, __u32 *data, int *lenp, int need_p
1545** to properly mark inodes for datasync and such, but only actually 1638** to properly mark inodes for datasync and such, but only actually
1546** does something when called for a synchronous update. 1639** does something when called for a synchronous update.
1547*/ 1640*/
1548int reiserfs_write_inode (struct inode * inode, int do_sync) { 1641int reiserfs_write_inode(struct inode *inode, int do_sync)
1549 struct reiserfs_transaction_handle th ; 1642{
1550 int jbegin_count = 1 ; 1643 struct reiserfs_transaction_handle th;
1551 1644 int jbegin_count = 1;
1552 if (inode->i_sb->s_flags & MS_RDONLY) 1645
1553 return -EROFS; 1646 if (inode->i_sb->s_flags & MS_RDONLY)
1554 /* memory pressure can sometimes initiate write_inode calls with sync == 1, 1647 return -EROFS;
1555 ** these cases are just when the system needs ram, not when the 1648 /* memory pressure can sometimes initiate write_inode calls with sync == 1,
1556 ** inode needs to reach disk for safety, and they can safely be 1649 ** these cases are just when the system needs ram, not when the
1557 ** ignored because the altered inode has already been logged. 1650 ** inode needs to reach disk for safety, and they can safely be
1558 */ 1651 ** ignored because the altered inode has already been logged.
1559 if (do_sync && !(current->flags & PF_MEMALLOC)) { 1652 */
1560 reiserfs_write_lock(inode->i_sb); 1653 if (do_sync && !(current->flags & PF_MEMALLOC)) {
1561 if (!journal_begin(&th, inode->i_sb, jbegin_count)) { 1654 reiserfs_write_lock(inode->i_sb);
1562 reiserfs_update_sd (&th, inode); 1655 if (!journal_begin(&th, inode->i_sb, jbegin_count)) {
1563 journal_end_sync(&th, inode->i_sb, jbegin_count) ; 1656 reiserfs_update_sd(&th, inode);
1564 } 1657 journal_end_sync(&th, inode->i_sb, jbegin_count);
1565 reiserfs_write_unlock(inode->i_sb); 1658 }
1566 } 1659 reiserfs_write_unlock(inode->i_sb);
1567 return 0; 1660 }
1661 return 0;
1568} 1662}
1569 1663
1570/* stat data of new object is inserted already, this inserts the item 1664/* stat data of new object is inserted already, this inserts the item
1571 containing "." and ".." entries */ 1665 containing "." and ".." entries */
1572static int reiserfs_new_directory (struct reiserfs_transaction_handle *th, 1666static int reiserfs_new_directory(struct reiserfs_transaction_handle *th,
1573 struct inode *inode, 1667 struct inode *inode,
1574 struct item_head * ih, struct path * path, 1668 struct item_head *ih, struct path *path,
1575 struct inode * dir) 1669 struct inode *dir)
1576{ 1670{
1577 struct super_block * sb = th->t_super; 1671 struct super_block *sb = th->t_super;
1578 char empty_dir [EMPTY_DIR_SIZE]; 1672 char empty_dir[EMPTY_DIR_SIZE];
1579 char * body = empty_dir; 1673 char *body = empty_dir;
1580 struct cpu_key key; 1674 struct cpu_key key;
1581 int retval; 1675 int retval;
1582 1676
1583 BUG_ON (!th->t_trans_id); 1677 BUG_ON(!th->t_trans_id);
1584 1678
1585 _make_cpu_key (&key, KEY_FORMAT_3_5, le32_to_cpu (ih->ih_key.k_dir_id), 1679 _make_cpu_key(&key, KEY_FORMAT_3_5, le32_to_cpu(ih->ih_key.k_dir_id),
1586 le32_to_cpu (ih->ih_key.k_objectid), DOT_OFFSET, TYPE_DIRENTRY, 3/*key length*/); 1680 le32_to_cpu(ih->ih_key.k_objectid), DOT_OFFSET,
1587 1681 TYPE_DIRENTRY, 3 /*key length */ );
1588 /* compose item head for new item. Directories consist of items of 1682
1589 old type (ITEM_VERSION_1). Do not set key (second arg is 0), it 1683 /* compose item head for new item. Directories consist of items of
1590 is done by reiserfs_new_inode */ 1684 old type (ITEM_VERSION_1). Do not set key (second arg is 0), it
1591 if (old_format_only (sb)) { 1685 is done by reiserfs_new_inode */
1592 make_le_item_head (ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET, TYPE_DIRENTRY, EMPTY_DIR_SIZE_V1, 2); 1686 if (old_format_only(sb)) {
1593 1687 make_le_item_head(ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET,
1594 make_empty_dir_item_v1 (body, ih->ih_key.k_dir_id, ih->ih_key.k_objectid, 1688 TYPE_DIRENTRY, EMPTY_DIR_SIZE_V1, 2);
1595 INODE_PKEY (dir)->k_dir_id, 1689
1596 INODE_PKEY (dir)->k_objectid ); 1690 make_empty_dir_item_v1(body, ih->ih_key.k_dir_id,
1597 } else { 1691 ih->ih_key.k_objectid,
1598 make_le_item_head (ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET, TYPE_DIRENTRY, EMPTY_DIR_SIZE, 2); 1692 INODE_PKEY(dir)->k_dir_id,
1599 1693 INODE_PKEY(dir)->k_objectid);
1600 make_empty_dir_item (body, ih->ih_key.k_dir_id, ih->ih_key.k_objectid, 1694 } else {
1601 INODE_PKEY (dir)->k_dir_id, 1695 make_le_item_head(ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET,
1602 INODE_PKEY (dir)->k_objectid ); 1696 TYPE_DIRENTRY, EMPTY_DIR_SIZE, 2);
1603 } 1697
1604 1698 make_empty_dir_item(body, ih->ih_key.k_dir_id,
1605 /* look for place in the tree for new item */ 1699 ih->ih_key.k_objectid,
1606 retval = search_item (sb, &key, path); 1700 INODE_PKEY(dir)->k_dir_id,
1607 if (retval == IO_ERROR) { 1701 INODE_PKEY(dir)->k_objectid);
1608 reiserfs_warning (sb, "vs-13080: reiserfs_new_directory: " 1702 }
1609 "i/o failure occurred creating new directory"); 1703
1610 return -EIO; 1704 /* look for place in the tree for new item */
1611 } 1705 retval = search_item(sb, &key, path);
1612 if (retval == ITEM_FOUND) { 1706 if (retval == IO_ERROR) {
1613 pathrelse (path); 1707 reiserfs_warning(sb, "vs-13080: reiserfs_new_directory: "
1614 reiserfs_warning (sb, "vs-13070: reiserfs_new_directory: " 1708 "i/o failure occurred creating new directory");
1615 "object with this key exists (%k)", &(ih->ih_key)); 1709 return -EIO;
1616 return -EEXIST; 1710 }
1617 } 1711 if (retval == ITEM_FOUND) {
1618 1712 pathrelse(path);
1619 /* insert item, that is empty directory item */ 1713 reiserfs_warning(sb, "vs-13070: reiserfs_new_directory: "
1620 return reiserfs_insert_item (th, path, &key, ih, inode, body); 1714 "object with this key exists (%k)",
1621} 1715 &(ih->ih_key));
1716 return -EEXIST;
1717 }
1622 1718
1719 /* insert item, that is empty directory item */
1720 return reiserfs_insert_item(th, path, &key, ih, inode, body);
1721}
1623 1722
1624/* stat data of object has been inserted, this inserts the item 1723/* stat data of object has been inserted, this inserts the item
1625 containing the body of symlink */ 1724 containing the body of symlink */
1626static int reiserfs_new_symlink (struct reiserfs_transaction_handle *th, 1725static int reiserfs_new_symlink(struct reiserfs_transaction_handle *th, struct inode *inode, /* Inode of symlink */
1627 struct inode *inode, /* Inode of symlink */ 1726 struct item_head *ih,
1628 struct item_head * ih, 1727 struct path *path, const char *symname,
1629 struct path * path, const char * symname, int item_len) 1728 int item_len)
1630{ 1729{
1631 struct super_block * sb = th->t_super; 1730 struct super_block *sb = th->t_super;
1632 struct cpu_key key; 1731 struct cpu_key key;
1633 int retval; 1732 int retval;
1634 1733
1635 BUG_ON (!th->t_trans_id); 1734 BUG_ON(!th->t_trans_id);
1636 1735
1637 _make_cpu_key (&key, KEY_FORMAT_3_5, 1736 _make_cpu_key(&key, KEY_FORMAT_3_5,
1638 le32_to_cpu (ih->ih_key.k_dir_id), 1737 le32_to_cpu(ih->ih_key.k_dir_id),
1639 le32_to_cpu (ih->ih_key.k_objectid), 1738 le32_to_cpu(ih->ih_key.k_objectid),
1640 1, TYPE_DIRECT, 3/*key length*/); 1739 1, TYPE_DIRECT, 3 /*key length */ );
1641 1740
1642 make_le_item_head (ih, NULL, KEY_FORMAT_3_5, 1, TYPE_DIRECT, item_len, 0/*free_space*/); 1741 make_le_item_head(ih, NULL, KEY_FORMAT_3_5, 1, TYPE_DIRECT, item_len,
1643 1742 0 /*free_space */ );
1644 /* look for place in the tree for new item */ 1743
1645 retval = search_item (sb, &key, path); 1744 /* look for place in the tree for new item */
1646 if (retval == IO_ERROR) { 1745 retval = search_item(sb, &key, path);
1647 reiserfs_warning (sb, "vs-13080: reiserfs_new_symlinik: " 1746 if (retval == IO_ERROR) {
1648 "i/o failure occurred creating new symlink"); 1747 reiserfs_warning(sb, "vs-13080: reiserfs_new_symlinik: "
1649 return -EIO; 1748 "i/o failure occurred creating new symlink");
1650 } 1749 return -EIO;
1651 if (retval == ITEM_FOUND) { 1750 }
1652 pathrelse (path); 1751 if (retval == ITEM_FOUND) {
1653 reiserfs_warning (sb, "vs-13080: reiserfs_new_symlink: " 1752 pathrelse(path);
1654 "object with this key exists (%k)", &(ih->ih_key)); 1753 reiserfs_warning(sb, "vs-13080: reiserfs_new_symlink: "
1655 return -EEXIST; 1754 "object with this key exists (%k)",
1656 } 1755 &(ih->ih_key));
1657 1756 return -EEXIST;
1658 /* insert item, that is body of symlink */ 1757 }
1659 return reiserfs_insert_item (th, path, &key, ih, inode, symname);
1660}
1661 1758
1759 /* insert item, that is body of symlink */
1760 return reiserfs_insert_item(th, path, &key, ih, inode, symname);
1761}
1662 1762
1663/* inserts the stat data into the tree, and then calls 1763/* inserts the stat data into the tree, and then calls
1664 reiserfs_new_directory (to insert ".", ".." item if new object is 1764 reiserfs_new_directory (to insert ".", ".." item if new object is
@@ -1669,213 +1769,229 @@ static int reiserfs_new_symlink (struct reiserfs_transaction_handle *th,
1669 non-zero due to an error, we have to drop the quota previously allocated 1769 non-zero due to an error, we have to drop the quota previously allocated
1670 for the fresh inode. This can only be done outside a transaction, so 1770 for the fresh inode. This can only be done outside a transaction, so
1671 if we return non-zero, we also end the transaction. */ 1771 if we return non-zero, we also end the transaction. */
1672int reiserfs_new_inode (struct reiserfs_transaction_handle *th, 1772int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1673 struct inode * dir, int mode, 1773 struct inode *dir, int mode, const char *symname,
1674 const char * symname, 1774 /* 0 for regular, EMTRY_DIR_SIZE for dirs,
1675 /* 0 for regular, EMTRY_DIR_SIZE for dirs, 1775 strlen (symname) for symlinks) */
1676 strlen (symname) for symlinks)*/ 1776 loff_t i_size, struct dentry *dentry,
1677 loff_t i_size, struct dentry *dentry, 1777 struct inode *inode)
1678 struct inode *inode)
1679{ 1778{
1680 struct super_block * sb; 1779 struct super_block *sb;
1681 INITIALIZE_PATH (path_to_key); 1780 INITIALIZE_PATH(path_to_key);
1682 struct cpu_key key; 1781 struct cpu_key key;
1683 struct item_head ih; 1782 struct item_head ih;
1684 struct stat_data sd; 1783 struct stat_data sd;
1685 int retval; 1784 int retval;
1686 int err; 1785 int err;
1687 1786
1688 BUG_ON (!th->t_trans_id); 1787 BUG_ON(!th->t_trans_id);
1689 1788
1690 if (DQUOT_ALLOC_INODE(inode)) { 1789 if (DQUOT_ALLOC_INODE(inode)) {
1691 err = -EDQUOT; 1790 err = -EDQUOT;
1692 goto out_end_trans; 1791 goto out_end_trans;
1693 } 1792 }
1694 if (!dir || !dir->i_nlink) { 1793 if (!dir || !dir->i_nlink) {
1695 err = -EPERM; 1794 err = -EPERM;
1696 goto out_bad_inode; 1795 goto out_bad_inode;
1697 } 1796 }
1698 1797
1699 sb = dir->i_sb; 1798 sb = dir->i_sb;
1700 1799
1701 /* item head of new item */ 1800 /* item head of new item */
1702 ih.ih_key.k_dir_id = reiserfs_choose_packing(dir); 1801 ih.ih_key.k_dir_id = reiserfs_choose_packing(dir);
1703 ih.ih_key.k_objectid = cpu_to_le32 (reiserfs_get_unused_objectid (th)); 1802 ih.ih_key.k_objectid = cpu_to_le32(reiserfs_get_unused_objectid(th));
1704 if (!ih.ih_key.k_objectid) { 1803 if (!ih.ih_key.k_objectid) {
1705 err = -ENOMEM; 1804 err = -ENOMEM;
1706 goto out_bad_inode ; 1805 goto out_bad_inode;
1707 } 1806 }
1708 if (old_format_only (sb)) 1807 if (old_format_only(sb))
1709 /* not a perfect generation count, as object ids can be reused, but 1808 /* not a perfect generation count, as object ids can be reused, but
1710 ** this is as good as reiserfs can do right now. 1809 ** this is as good as reiserfs can do right now.
1711 ** note that the private part of inode isn't filled in yet, we have 1810 ** note that the private part of inode isn't filled in yet, we have
1712 ** to use the directory. 1811 ** to use the directory.
1713 */ 1812 */
1714 inode->i_generation = le32_to_cpu (INODE_PKEY (dir)->k_objectid); 1813 inode->i_generation = le32_to_cpu(INODE_PKEY(dir)->k_objectid);
1715 else 1814 else
1716#if defined( USE_INODE_GENERATION_COUNTER ) 1815#if defined( USE_INODE_GENERATION_COUNTER )
1717 inode->i_generation = le32_to_cpu(REISERFS_SB(sb)->s_rs->s_inode_generation); 1816 inode->i_generation =
1817 le32_to_cpu(REISERFS_SB(sb)->s_rs->s_inode_generation);
1718#else 1818#else
1719 inode->i_generation = ++event; 1819 inode->i_generation = ++event;
1720#endif 1820#endif
1721 1821
1722 /* fill stat data */ 1822 /* fill stat data */
1723 inode->i_nlink = (S_ISDIR (mode) ? 2 : 1); 1823 inode->i_nlink = (S_ISDIR(mode) ? 2 : 1);
1724 1824
1725 /* uid and gid must already be set by the caller for quota init */ 1825 /* uid and gid must already be set by the caller for quota init */
1726 1826
1727 /* symlink cannot be immutable or append only, right? */ 1827 /* symlink cannot be immutable or append only, right? */
1728 if( S_ISLNK( inode -> i_mode ) ) 1828 if (S_ISLNK(inode->i_mode))
1729 inode -> i_flags &= ~ ( S_IMMUTABLE | S_APPEND ); 1829 inode->i_flags &= ~(S_IMMUTABLE | S_APPEND);
1730 1830
1731 inode->i_mtime = inode->i_atime = inode->i_ctime = 1831 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
1732 CURRENT_TIME_SEC; 1832 inode->i_size = i_size;
1733 inode->i_size = i_size; 1833 inode->i_blocks = 0;
1734 inode->i_blocks = 0; 1834 inode->i_bytes = 0;
1735 inode->i_bytes = 0; 1835 REISERFS_I(inode)->i_first_direct_byte = S_ISLNK(mode) ? 1 :
1736 REISERFS_I(inode)->i_first_direct_byte = S_ISLNK(mode) ? 1 : 1836 U32_MAX /*NO_BYTES_IN_DIRECT_ITEM */ ;
1737 U32_MAX/*NO_BYTES_IN_DIRECT_ITEM*/; 1837
1738 1838 INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list));
1739 INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list )); 1839 REISERFS_I(inode)->i_flags = 0;
1740 REISERFS_I(inode)->i_flags = 0; 1840 REISERFS_I(inode)->i_prealloc_block = 0;
1741 REISERFS_I(inode)->i_prealloc_block = 0; 1841 REISERFS_I(inode)->i_prealloc_count = 0;
1742 REISERFS_I(inode)->i_prealloc_count = 0; 1842 REISERFS_I(inode)->i_trans_id = 0;
1743 REISERFS_I(inode)->i_trans_id = 0; 1843 REISERFS_I(inode)->i_jl = NULL;
1744 REISERFS_I(inode)->i_jl = NULL; 1844 REISERFS_I(inode)->i_attrs =
1745 REISERFS_I(inode)->i_attrs = 1845 REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK;
1746 REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK; 1846 sd_attrs_to_i_attrs(REISERFS_I(inode)->i_attrs, inode);
1747 sd_attrs_to_i_attrs( REISERFS_I(inode) -> i_attrs, inode ); 1847 REISERFS_I(inode)->i_acl_access = NULL;
1748 REISERFS_I(inode)->i_acl_access = NULL; 1848 REISERFS_I(inode)->i_acl_default = NULL;
1749 REISERFS_I(inode)->i_acl_default = NULL; 1849 init_rwsem(&REISERFS_I(inode)->xattr_sem);
1750 init_rwsem (&REISERFS_I(inode)->xattr_sem); 1850
1751 1851 if (old_format_only(sb))
1752 if (old_format_only (sb)) 1852 make_le_item_head(&ih, NULL, KEY_FORMAT_3_5, SD_OFFSET,
1753 make_le_item_head (&ih, NULL, KEY_FORMAT_3_5, SD_OFFSET, TYPE_STAT_DATA, SD_V1_SIZE, MAX_US_INT); 1853 TYPE_STAT_DATA, SD_V1_SIZE, MAX_US_INT);
1754 else 1854 else
1755 make_le_item_head (&ih, NULL, KEY_FORMAT_3_6, SD_OFFSET, TYPE_STAT_DATA, SD_SIZE, MAX_US_INT); 1855 make_le_item_head(&ih, NULL, KEY_FORMAT_3_6, SD_OFFSET,
1756 1856 TYPE_STAT_DATA, SD_SIZE, MAX_US_INT);
1757 /* key to search for correct place for new stat data */ 1857
1758 _make_cpu_key (&key, KEY_FORMAT_3_6, le32_to_cpu (ih.ih_key.k_dir_id), 1858 /* key to search for correct place for new stat data */
1759 le32_to_cpu (ih.ih_key.k_objectid), SD_OFFSET, TYPE_STAT_DATA, 3/*key length*/); 1859 _make_cpu_key(&key, KEY_FORMAT_3_6, le32_to_cpu(ih.ih_key.k_dir_id),
1760 1860 le32_to_cpu(ih.ih_key.k_objectid), SD_OFFSET,
1761 /* find proper place for inserting of stat data */ 1861 TYPE_STAT_DATA, 3 /*key length */ );
1762 retval = search_item (sb, &key, &path_to_key); 1862
1763 if (retval == IO_ERROR) { 1863 /* find proper place for inserting of stat data */
1764 err = -EIO; 1864 retval = search_item(sb, &key, &path_to_key);
1765 goto out_bad_inode; 1865 if (retval == IO_ERROR) {
1766 } 1866 err = -EIO;
1767 if (retval == ITEM_FOUND) { 1867 goto out_bad_inode;
1768 pathrelse (&path_to_key); 1868 }
1769 err = -EEXIST; 1869 if (retval == ITEM_FOUND) {
1770 goto out_bad_inode; 1870 pathrelse(&path_to_key);
1771 } 1871 err = -EEXIST;
1772 if (old_format_only (sb)) { 1872 goto out_bad_inode;
1773 if (inode->i_uid & ~0xffff || inode->i_gid & ~0xffff) { 1873 }
1774 pathrelse (&path_to_key); 1874 if (old_format_only(sb)) {
1775 /* i_uid or i_gid is too big to be stored in stat data v3.5 */ 1875 if (inode->i_uid & ~0xffff || inode->i_gid & ~0xffff) {
1776 err = -EINVAL; 1876 pathrelse(&path_to_key);
1777 goto out_bad_inode; 1877 /* i_uid or i_gid is too big to be stored in stat data v3.5 */
1778 } 1878 err = -EINVAL;
1779 inode2sd_v1 (&sd, inode, inode->i_size); 1879 goto out_bad_inode;
1780 } else { 1880 }
1781 inode2sd (&sd, inode, inode->i_size); 1881 inode2sd_v1(&sd, inode, inode->i_size);
1782 } 1882 } else {
1783 // these do not go to on-disk stat data 1883 inode2sd(&sd, inode, inode->i_size);
1784 inode->i_ino = le32_to_cpu (ih.ih_key.k_objectid); 1884 }
1785 inode->i_blksize = reiserfs_default_io_size; 1885 // these do not go to on-disk stat data
1786 1886 inode->i_ino = le32_to_cpu(ih.ih_key.k_objectid);
1787 // store in in-core inode the key of stat data and version all 1887 inode->i_blksize = reiserfs_default_io_size;
1788 // object items will have (directory items will have old offset 1888
1789 // format, other new objects will consist of new items) 1889 // store in in-core inode the key of stat data and version all
1790 memcpy (INODE_PKEY (inode), &(ih.ih_key), KEY_SIZE); 1890 // object items will have (directory items will have old offset
1791 if (old_format_only (sb) || S_ISDIR(mode) || S_ISLNK(mode)) 1891 // format, other new objects will consist of new items)
1792 set_inode_item_key_version (inode, KEY_FORMAT_3_5); 1892 memcpy(INODE_PKEY(inode), &(ih.ih_key), KEY_SIZE);
1793 else 1893 if (old_format_only(sb) || S_ISDIR(mode) || S_ISLNK(mode))
1794 set_inode_item_key_version (inode, KEY_FORMAT_3_6); 1894 set_inode_item_key_version(inode, KEY_FORMAT_3_5);
1795 if (old_format_only (sb)) 1895 else
1796 set_inode_sd_version (inode, STAT_DATA_V1); 1896 set_inode_item_key_version(inode, KEY_FORMAT_3_6);
1797 else 1897 if (old_format_only(sb))
1798 set_inode_sd_version (inode, STAT_DATA_V2); 1898 set_inode_sd_version(inode, STAT_DATA_V1);
1799 1899 else
1800 /* insert the stat data into the tree */ 1900 set_inode_sd_version(inode, STAT_DATA_V2);
1901
1902 /* insert the stat data into the tree */
1801#ifdef DISPLACE_NEW_PACKING_LOCALITIES 1903#ifdef DISPLACE_NEW_PACKING_LOCALITIES
1802 if (REISERFS_I(dir)->new_packing_locality) 1904 if (REISERFS_I(dir)->new_packing_locality)
1803 th->displace_new_blocks = 1; 1905 th->displace_new_blocks = 1;
1804#endif 1906#endif
1805 retval = reiserfs_insert_item (th, &path_to_key, &key, &ih, inode, (char *)(&sd)); 1907 retval =
1806 if (retval) { 1908 reiserfs_insert_item(th, &path_to_key, &key, &ih, inode,
1807 err = retval; 1909 (char *)(&sd));
1808 reiserfs_check_path(&path_to_key) ; 1910 if (retval) {
1809 goto out_bad_inode; 1911 err = retval;
1810 } 1912 reiserfs_check_path(&path_to_key);
1811 1913 goto out_bad_inode;
1914 }
1812#ifdef DISPLACE_NEW_PACKING_LOCALITIES 1915#ifdef DISPLACE_NEW_PACKING_LOCALITIES
1813 if (!th->displace_new_blocks) 1916 if (!th->displace_new_blocks)
1814 REISERFS_I(dir)->new_packing_locality = 0; 1917 REISERFS_I(dir)->new_packing_locality = 0;
1815#endif 1918#endif
1816 if (S_ISDIR(mode)) { 1919 if (S_ISDIR(mode)) {
1817 /* insert item with "." and ".." */ 1920 /* insert item with "." and ".." */
1818 retval = reiserfs_new_directory (th, inode, &ih, &path_to_key, dir); 1921 retval =
1819 } 1922 reiserfs_new_directory(th, inode, &ih, &path_to_key, dir);
1820 1923 }
1821 if (S_ISLNK(mode)) { 1924
1822 /* insert body of symlink */ 1925 if (S_ISLNK(mode)) {
1823 if (!old_format_only (sb)) 1926 /* insert body of symlink */
1824 i_size = ROUND_UP(i_size); 1927 if (!old_format_only(sb))
1825 retval = reiserfs_new_symlink (th, inode, &ih, &path_to_key, symname, i_size); 1928 i_size = ROUND_UP(i_size);
1826 } 1929 retval =
1827 if (retval) { 1930 reiserfs_new_symlink(th, inode, &ih, &path_to_key, symname,
1828 err = retval; 1931 i_size);
1829 reiserfs_check_path(&path_to_key) ; 1932 }
1830 journal_end(th, th->t_super, th->t_blocks_allocated); 1933 if (retval) {
1831 goto out_inserted_sd; 1934 err = retval;
1832 } 1935 reiserfs_check_path(&path_to_key);
1833 1936 journal_end(th, th->t_super, th->t_blocks_allocated);
1834 /* XXX CHECK THIS */ 1937 goto out_inserted_sd;
1835 if (reiserfs_posixacl (inode->i_sb)) { 1938 }
1836 retval = reiserfs_inherit_default_acl (dir, dentry, inode); 1939
1837 if (retval) { 1940 /* XXX CHECK THIS */
1838 err = retval; 1941 if (reiserfs_posixacl(inode->i_sb)) {
1839 reiserfs_check_path(&path_to_key) ; 1942 retval = reiserfs_inherit_default_acl(dir, dentry, inode);
1840 journal_end(th, th->t_super, th->t_blocks_allocated); 1943 if (retval) {
1841 goto out_inserted_sd; 1944 err = retval;
1842 } 1945 reiserfs_check_path(&path_to_key);
1843 } else if (inode->i_sb->s_flags & MS_POSIXACL) { 1946 journal_end(th, th->t_super, th->t_blocks_allocated);
1844 reiserfs_warning (inode->i_sb, "ACLs aren't enabled in the fs, " 1947 goto out_inserted_sd;
1845 "but vfs thinks they are!"); 1948 }
1846 } else if (is_reiserfs_priv_object (dir)) { 1949 } else if (inode->i_sb->s_flags & MS_POSIXACL) {
1847 reiserfs_mark_inode_private (inode); 1950 reiserfs_warning(inode->i_sb, "ACLs aren't enabled in the fs, "
1848 } 1951 "but vfs thinks they are!");
1849 1952 } else if (is_reiserfs_priv_object(dir)) {
1850 insert_inode_hash (inode); 1953 reiserfs_mark_inode_private(inode);
1851 reiserfs_update_sd(th, inode); 1954 }
1852 reiserfs_check_path(&path_to_key) ; 1955
1853 1956 insert_inode_hash(inode);
1854 return 0; 1957 reiserfs_update_sd(th, inode);
1958 reiserfs_check_path(&path_to_key);
1959
1960 return 0;
1855 1961
1856/* it looks like you can easily compress these two goto targets into 1962/* it looks like you can easily compress these two goto targets into
1857 * one. Keeping it like this doesn't actually hurt anything, and they 1963 * one. Keeping it like this doesn't actually hurt anything, and they
1858 * are place holders for what the quota code actually needs. 1964 * are place holders for what the quota code actually needs.
1859 */ 1965 */
1860out_bad_inode: 1966 out_bad_inode:
1861 /* Invalidate the object, nothing was inserted yet */ 1967 /* Invalidate the object, nothing was inserted yet */
1862 INODE_PKEY(inode)->k_objectid = 0; 1968 INODE_PKEY(inode)->k_objectid = 0;
1863 1969
1864 /* Quota change must be inside a transaction for journaling */ 1970 /* Quota change must be inside a transaction for journaling */
1865 DQUOT_FREE_INODE(inode); 1971 DQUOT_FREE_INODE(inode);
1866 1972
1867out_end_trans: 1973 out_end_trans:
1868 journal_end(th, th->t_super, th->t_blocks_allocated) ; 1974 journal_end(th, th->t_super, th->t_blocks_allocated);
1869 /* Drop can be outside and it needs more credits so it's better to have it outside */ 1975 /* Drop can be outside and it needs more credits so it's better to have it outside */
1870 DQUOT_DROP(inode); 1976 DQUOT_DROP(inode);
1871 inode->i_flags |= S_NOQUOTA; 1977 inode->i_flags |= S_NOQUOTA;
1872 make_bad_inode(inode); 1978 make_bad_inode(inode);
1873 1979
1874out_inserted_sd: 1980 out_inserted_sd:
1875 inode->i_nlink = 0; 1981 inode->i_nlink = 0;
1876 th->t_trans_id = 0; /* so the caller can't use this handle later */ 1982 th->t_trans_id = 0; /* so the caller can't use this handle later */
1877 iput(inode); 1983
1878 return err; 1984 /* If we were inheriting an ACL, we need to release the lock so that
1985 * iput doesn't deadlock in reiserfs_delete_xattrs. The locking
1986 * code really needs to be reworked, but this will take care of it
1987 * for now. -jeffm */
1988 if (REISERFS_I(dir)->i_acl_default && !IS_ERR(REISERFS_I(dir)->i_acl_default)) {
1989 reiserfs_write_unlock_xattrs(dir->i_sb);
1990 iput(inode);
1991 reiserfs_write_lock_xattrs(dir->i_sb);
1992 } else
1993 iput(inode);
1994 return err;
1879} 1995}
1880 1996
1881/* 1997/*
@@ -1891,77 +2007,78 @@ out_inserted_sd:
1891** 2007**
1892** on failure, nonzero is returned, page_result and bh_result are untouched. 2008** on failure, nonzero is returned, page_result and bh_result are untouched.
1893*/ 2009*/
1894static int grab_tail_page(struct inode *p_s_inode, 2010static int grab_tail_page(struct inode *p_s_inode,
1895 struct page **page_result, 2011 struct page **page_result,
1896 struct buffer_head **bh_result) { 2012 struct buffer_head **bh_result)
1897 2013{
1898 /* we want the page with the last byte in the file, 2014
1899 ** not the page that will hold the next byte for appending 2015 /* we want the page with the last byte in the file,
1900 */ 2016 ** not the page that will hold the next byte for appending
1901 unsigned long index = (p_s_inode->i_size-1) >> PAGE_CACHE_SHIFT ; 2017 */
1902 unsigned long pos = 0 ; 2018 unsigned long index = (p_s_inode->i_size - 1) >> PAGE_CACHE_SHIFT;
1903 unsigned long start = 0 ; 2019 unsigned long pos = 0;
1904 unsigned long blocksize = p_s_inode->i_sb->s_blocksize ; 2020 unsigned long start = 0;
1905 unsigned long offset = (p_s_inode->i_size) & (PAGE_CACHE_SIZE - 1) ; 2021 unsigned long blocksize = p_s_inode->i_sb->s_blocksize;
1906 struct buffer_head *bh ; 2022 unsigned long offset = (p_s_inode->i_size) & (PAGE_CACHE_SIZE - 1);
1907 struct buffer_head *head ; 2023 struct buffer_head *bh;
1908 struct page * page ; 2024 struct buffer_head *head;
1909 int error ; 2025 struct page *page;
1910 2026 int error;
1911 /* we know that we are only called with inode->i_size > 0. 2027
1912 ** we also know that a file tail can never be as big as a block 2028 /* we know that we are only called with inode->i_size > 0.
1913 ** If i_size % blocksize == 0, our file is currently block aligned 2029 ** we also know that a file tail can never be as big as a block
1914 ** and it won't need converting or zeroing after a truncate. 2030 ** If i_size % blocksize == 0, our file is currently block aligned
1915 */ 2031 ** and it won't need converting or zeroing after a truncate.
1916 if ((offset & (blocksize - 1)) == 0) { 2032 */
1917 return -ENOENT ; 2033 if ((offset & (blocksize - 1)) == 0) {
1918 } 2034 return -ENOENT;
1919 page = grab_cache_page(p_s_inode->i_mapping, index) ; 2035 }
1920 error = -ENOMEM ; 2036 page = grab_cache_page(p_s_inode->i_mapping, index);
1921 if (!page) { 2037 error = -ENOMEM;
1922 goto out ; 2038 if (!page) {
1923 } 2039 goto out;
1924 /* start within the page of the last block in the file */ 2040 }
1925 start = (offset / blocksize) * blocksize ; 2041 /* start within the page of the last block in the file */
1926 2042 start = (offset / blocksize) * blocksize;
1927 error = block_prepare_write(page, start, offset, 2043
1928 reiserfs_get_block_create_0) ; 2044 error = block_prepare_write(page, start, offset,
1929 if (error) 2045 reiserfs_get_block_create_0);
1930 goto unlock ; 2046 if (error)
1931 2047 goto unlock;
1932 head = page_buffers(page) ; 2048
1933 bh = head; 2049 head = page_buffers(page);
1934 do { 2050 bh = head;
1935 if (pos >= start) { 2051 do {
1936 break ; 2052 if (pos >= start) {
1937 } 2053 break;
1938 bh = bh->b_this_page ; 2054 }
1939 pos += blocksize ; 2055 bh = bh->b_this_page;
1940 } while(bh != head) ; 2056 pos += blocksize;
1941 2057 } while (bh != head);
1942 if (!buffer_uptodate(bh)) { 2058
1943 /* note, this should never happen, prepare_write should 2059 if (!buffer_uptodate(bh)) {
1944 ** be taking care of this for us. If the buffer isn't up to date, 2060 /* note, this should never happen, prepare_write should
1945 ** I've screwed up the code to find the buffer, or the code to 2061 ** be taking care of this for us. If the buffer isn't up to date,
1946 ** call prepare_write 2062 ** I've screwed up the code to find the buffer, or the code to
1947 */ 2063 ** call prepare_write
1948 reiserfs_warning (p_s_inode->i_sb, 2064 */
1949 "clm-6000: error reading block %lu on dev %s", 2065 reiserfs_warning(p_s_inode->i_sb,
1950 bh->b_blocknr, 2066 "clm-6000: error reading block %lu on dev %s",
1951 reiserfs_bdevname (p_s_inode->i_sb)) ; 2067 bh->b_blocknr,
1952 error = -EIO ; 2068 reiserfs_bdevname(p_s_inode->i_sb));
1953 goto unlock ; 2069 error = -EIO;
1954 } 2070 goto unlock;
1955 *bh_result = bh ; 2071 }
1956 *page_result = page ; 2072 *bh_result = bh;
1957 2073 *page_result = page;
1958out: 2074
1959 return error ; 2075 out:
1960 2076 return error;
1961unlock: 2077
1962 unlock_page(page) ; 2078 unlock:
1963 page_cache_release(page) ; 2079 unlock_page(page);
1964 return error ; 2080 page_cache_release(page);
2081 return error;
1965} 2082}
1966 2083
1967/* 2084/*
@@ -1970,235 +2087,247 @@ unlock:
1970** 2087**
1971** some code taken from block_truncate_page 2088** some code taken from block_truncate_page
1972*/ 2089*/
1973int reiserfs_truncate_file(struct inode *p_s_inode, int update_timestamps) { 2090int reiserfs_truncate_file(struct inode *p_s_inode, int update_timestamps)
1974 struct reiserfs_transaction_handle th ; 2091{
1975 /* we want the offset for the first byte after the end of the file */ 2092 struct reiserfs_transaction_handle th;
1976 unsigned long offset = p_s_inode->i_size & (PAGE_CACHE_SIZE - 1) ; 2093 /* we want the offset for the first byte after the end of the file */
1977 unsigned blocksize = p_s_inode->i_sb->s_blocksize ; 2094 unsigned long offset = p_s_inode->i_size & (PAGE_CACHE_SIZE - 1);
1978 unsigned length ; 2095 unsigned blocksize = p_s_inode->i_sb->s_blocksize;
1979 struct page *page = NULL ; 2096 unsigned length;
1980 int error ; 2097 struct page *page = NULL;
1981 struct buffer_head *bh = NULL ; 2098 int error;
1982 2099 struct buffer_head *bh = NULL;
1983 reiserfs_write_lock(p_s_inode->i_sb); 2100
1984 2101 reiserfs_write_lock(p_s_inode->i_sb);
1985 if (p_s_inode->i_size > 0) { 2102
1986 if ((error = grab_tail_page(p_s_inode, &page, &bh))) { 2103 if (p_s_inode->i_size > 0) {
1987 // -ENOENT means we truncated past the end of the file, 2104 if ((error = grab_tail_page(p_s_inode, &page, &bh))) {
1988 // and get_block_create_0 could not find a block to read in, 2105 // -ENOENT means we truncated past the end of the file,
1989 // which is ok. 2106 // and get_block_create_0 could not find a block to read in,
1990 if (error != -ENOENT) 2107 // which is ok.
1991 reiserfs_warning (p_s_inode->i_sb, 2108 if (error != -ENOENT)
1992 "clm-6001: grab_tail_page failed %d", 2109 reiserfs_warning(p_s_inode->i_sb,
1993 error); 2110 "clm-6001: grab_tail_page failed %d",
1994 page = NULL ; 2111 error);
1995 bh = NULL ; 2112 page = NULL;
1996 } 2113 bh = NULL;
1997 } 2114 }
1998 2115 }
1999 /* so, if page != NULL, we have a buffer head for the offset at
2000 ** the end of the file. if the bh is mapped, and bh->b_blocknr != 0,
2001 ** then we have an unformatted node. Otherwise, we have a direct item,
2002 ** and no zeroing is required on disk. We zero after the truncate,
2003 ** because the truncate might pack the item anyway
2004 ** (it will unmap bh if it packs).
2005 */
2006 /* it is enough to reserve space in transaction for 2 balancings:
2007 one for "save" link adding and another for the first
2008 cut_from_item. 1 is for update_sd */
2009 error = journal_begin (&th, p_s_inode->i_sb,
2010 JOURNAL_PER_BALANCE_CNT * 2 + 1);
2011 if (error)
2012 goto out;
2013 reiserfs_update_inode_transaction(p_s_inode) ;
2014 if (update_timestamps)
2015 /* we are doing real truncate: if the system crashes before the last
2016 transaction of truncating gets committed - on reboot the file
2017 either appears truncated properly or not truncated at all */
2018 add_save_link (&th, p_s_inode, 1);
2019 error = reiserfs_do_truncate (&th, p_s_inode, page, update_timestamps) ;
2020 if (error)
2021 goto out;
2022 error = journal_end (&th, p_s_inode->i_sb, JOURNAL_PER_BALANCE_CNT * 2 + 1);
2023 if (error)
2024 goto out;
2025
2026 if (update_timestamps) {
2027 error = remove_save_link (p_s_inode, 1/* truncate */);
2028 if (error)
2029 goto out;
2030 }
2031
2032 if (page) {
2033 length = offset & (blocksize - 1) ;
2034 /* if we are not on a block boundary */
2035 if (length) {
2036 char *kaddr;
2037
2038 length = blocksize - length ;
2039 kaddr = kmap_atomic(page, KM_USER0) ;
2040 memset(kaddr + offset, 0, length) ;
2041 flush_dcache_page(page) ;
2042 kunmap_atomic(kaddr, KM_USER0) ;
2043 if (buffer_mapped(bh) && bh->b_blocknr != 0) {
2044 mark_buffer_dirty(bh) ;
2045 }
2046 }
2047 unlock_page(page) ;
2048 page_cache_release(page) ;
2049 }
2050
2051 reiserfs_write_unlock(p_s_inode->i_sb);
2052 return 0;
2053out:
2054 if (page) {
2055 unlock_page (page);
2056 page_cache_release (page);
2057 }
2058 reiserfs_write_unlock(p_s_inode->i_sb);
2059 return error;
2060}
2061 2116
2062static int map_block_for_writepage(struct inode *inode, 2117 /* so, if page != NULL, we have a buffer head for the offset at
2063 struct buffer_head *bh_result, 2118 ** the end of the file. if the bh is mapped, and bh->b_blocknr != 0,
2064 unsigned long block) { 2119 ** then we have an unformatted node. Otherwise, we have a direct item,
2065 struct reiserfs_transaction_handle th ; 2120 ** and no zeroing is required on disk. We zero after the truncate,
2066 int fs_gen ; 2121 ** because the truncate might pack the item anyway
2067 struct item_head tmp_ih ; 2122 ** (it will unmap bh if it packs).
2068 struct item_head *ih ;
2069 struct buffer_head *bh ;
2070 __le32 *item ;
2071 struct cpu_key key ;
2072 INITIALIZE_PATH(path) ;
2073 int pos_in_item ;
2074 int jbegin_count = JOURNAL_PER_BALANCE_CNT ;
2075 loff_t byte_offset = (block << inode->i_sb->s_blocksize_bits) + 1 ;
2076 int retval ;
2077 int use_get_block = 0 ;
2078 int bytes_copied = 0 ;
2079 int copy_size ;
2080 int trans_running = 0;
2081
2082 /* catch places below that try to log something without starting a trans */
2083 th.t_trans_id = 0;
2084
2085 if (!buffer_uptodate(bh_result)) {
2086 return -EIO;
2087 }
2088
2089 kmap(bh_result->b_page) ;
2090start_over:
2091 reiserfs_write_lock(inode->i_sb);
2092 make_cpu_key(&key, inode, byte_offset, TYPE_ANY, 3) ;
2093
2094research:
2095 retval = search_for_position_by_key(inode->i_sb, &key, &path) ;
2096 if (retval != POSITION_FOUND) {
2097 use_get_block = 1;
2098 goto out ;
2099 }
2100
2101 bh = get_last_bh(&path) ;
2102 ih = get_ih(&path) ;
2103 item = get_item(&path) ;
2104 pos_in_item = path.pos_in_item ;
2105
2106 /* we've found an unformatted node */
2107 if (indirect_item_found(retval, ih)) {
2108 if (bytes_copied > 0) {
2109 reiserfs_warning (inode->i_sb, "clm-6002: bytes_copied %d",
2110 bytes_copied) ;
2111 }
2112 if (!get_block_num(item, pos_in_item)) {
2113 /* crap, we are writing to a hole */
2114 use_get_block = 1;
2115 goto out ;
2116 }
2117 set_block_dev_mapped(bh_result, get_block_num(item,pos_in_item),inode);
2118 } else if (is_direct_le_ih(ih)) {
2119 char *p ;
2120 p = page_address(bh_result->b_page) ;
2121 p += (byte_offset -1) & (PAGE_CACHE_SIZE - 1) ;
2122 copy_size = ih_item_len(ih) - pos_in_item;
2123
2124 fs_gen = get_generation(inode->i_sb) ;
2125 copy_item_head(&tmp_ih, ih) ;
2126
2127 if (!trans_running) {
2128 /* vs-3050 is gone, no need to drop the path */
2129 retval = journal_begin(&th, inode->i_sb, jbegin_count) ;
2130 if (retval)
2131 goto out;
2132 reiserfs_update_inode_transaction(inode) ;
2133 trans_running = 1;
2134 if (fs_changed(fs_gen, inode->i_sb) && item_moved(&tmp_ih, &path)) {
2135 reiserfs_restore_prepared_buffer(inode->i_sb, bh) ;
2136 goto research;
2137 }
2138 }
2139
2140 reiserfs_prepare_for_journal(inode->i_sb, bh, 1) ;
2141
2142 if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) {
2143 reiserfs_restore_prepared_buffer(inode->i_sb, bh) ;
2144 goto research;
2145 }
2146
2147 memcpy( B_I_PITEM(bh, ih) + pos_in_item, p + bytes_copied, copy_size) ;
2148
2149 journal_mark_dirty(&th, inode->i_sb, bh) ;
2150 bytes_copied += copy_size ;
2151 set_block_dev_mapped(bh_result, 0, inode);
2152
2153 /* are there still bytes left? */
2154 if (bytes_copied < bh_result->b_size &&
2155 (byte_offset + bytes_copied) < inode->i_size) {
2156 set_cpu_key_k_offset(&key, cpu_key_k_offset(&key) + copy_size) ;
2157 goto research ;
2158 }
2159 } else {
2160 reiserfs_warning (inode->i_sb,
2161 "clm-6003: bad item inode %lu, device %s",
2162 inode->i_ino, reiserfs_bdevname (inode->i_sb)) ;
2163 retval = -EIO ;
2164 goto out ;
2165 }
2166 retval = 0 ;
2167
2168out:
2169 pathrelse(&path) ;
2170 if (trans_running) {
2171 int err = journal_end(&th, inode->i_sb, jbegin_count) ;
2172 if (err)
2173 retval = err;
2174 trans_running = 0;
2175 }
2176 reiserfs_write_unlock(inode->i_sb);
2177
2178 /* this is where we fill in holes in the file. */
2179 if (use_get_block) {
2180 retval = reiserfs_get_block(inode, block, bh_result,
2181 GET_BLOCK_CREATE | GET_BLOCK_NO_ISEM |
2182 GET_BLOCK_NO_DANGLE);
2183 if (!retval) {
2184 if (!buffer_mapped(bh_result) || bh_result->b_blocknr == 0) {
2185 /* get_block failed to find a mapped unformatted node. */
2186 use_get_block = 0 ;
2187 goto start_over ;
2188 }
2189 }
2190 }
2191 kunmap(bh_result->b_page) ;
2192
2193 if (!retval && buffer_mapped(bh_result) && bh_result->b_blocknr == 0) {
2194 /* we've copied data from the page into the direct item, so the
2195 * buffer in the page is now clean, mark it to reflect that.
2196 */ 2123 */
2197 lock_buffer(bh_result); 2124 /* it is enough to reserve space in transaction for 2 balancings:
2198 clear_buffer_dirty(bh_result); 2125 one for "save" link adding and another for the first
2199 unlock_buffer(bh_result); 2126 cut_from_item. 1 is for update_sd */
2200 } 2127 error = journal_begin(&th, p_s_inode->i_sb,
2201 return retval ; 2128 JOURNAL_PER_BALANCE_CNT * 2 + 1);
2129 if (error)
2130 goto out;
2131 reiserfs_update_inode_transaction(p_s_inode);
2132 if (update_timestamps)
2133 /* we are doing real truncate: if the system crashes before the last
2134 transaction of truncating gets committed - on reboot the file
2135 either appears truncated properly or not truncated at all */
2136 add_save_link(&th, p_s_inode, 1);
2137 error = reiserfs_do_truncate(&th, p_s_inode, page, update_timestamps);
2138 if (error)
2139 goto out;
2140 error =
2141 journal_end(&th, p_s_inode->i_sb, JOURNAL_PER_BALANCE_CNT * 2 + 1);
2142 if (error)
2143 goto out;
2144
2145 if (update_timestamps) {
2146 error = remove_save_link(p_s_inode, 1 /* truncate */ );
2147 if (error)
2148 goto out;
2149 }
2150
2151 if (page) {
2152 length = offset & (blocksize - 1);
2153 /* if we are not on a block boundary */
2154 if (length) {
2155 char *kaddr;
2156
2157 length = blocksize - length;
2158 kaddr = kmap_atomic(page, KM_USER0);
2159 memset(kaddr + offset, 0, length);
2160 flush_dcache_page(page);
2161 kunmap_atomic(kaddr, KM_USER0);
2162 if (buffer_mapped(bh) && bh->b_blocknr != 0) {
2163 mark_buffer_dirty(bh);
2164 }
2165 }
2166 unlock_page(page);
2167 page_cache_release(page);
2168 }
2169
2170 reiserfs_write_unlock(p_s_inode->i_sb);
2171 return 0;
2172 out:
2173 if (page) {
2174 unlock_page(page);
2175 page_cache_release(page);
2176 }
2177 reiserfs_write_unlock(p_s_inode->i_sb);
2178 return error;
2179}
2180
2181static int map_block_for_writepage(struct inode *inode,
2182 struct buffer_head *bh_result,
2183 unsigned long block)
2184{
2185 struct reiserfs_transaction_handle th;
2186 int fs_gen;
2187 struct item_head tmp_ih;
2188 struct item_head *ih;
2189 struct buffer_head *bh;
2190 __le32 *item;
2191 struct cpu_key key;
2192 INITIALIZE_PATH(path);
2193 int pos_in_item;
2194 int jbegin_count = JOURNAL_PER_BALANCE_CNT;
2195 loff_t byte_offset = (block << inode->i_sb->s_blocksize_bits) + 1;
2196 int retval;
2197 int use_get_block = 0;
2198 int bytes_copied = 0;
2199 int copy_size;
2200 int trans_running = 0;
2201
2202 /* catch places below that try to log something without starting a trans */
2203 th.t_trans_id = 0;
2204
2205 if (!buffer_uptodate(bh_result)) {
2206 return -EIO;
2207 }
2208
2209 kmap(bh_result->b_page);
2210 start_over:
2211 reiserfs_write_lock(inode->i_sb);
2212 make_cpu_key(&key, inode, byte_offset, TYPE_ANY, 3);
2213
2214 research:
2215 retval = search_for_position_by_key(inode->i_sb, &key, &path);
2216 if (retval != POSITION_FOUND) {
2217 use_get_block = 1;
2218 goto out;
2219 }
2220
2221 bh = get_last_bh(&path);
2222 ih = get_ih(&path);
2223 item = get_item(&path);
2224 pos_in_item = path.pos_in_item;
2225
2226 /* we've found an unformatted node */
2227 if (indirect_item_found(retval, ih)) {
2228 if (bytes_copied > 0) {
2229 reiserfs_warning(inode->i_sb,
2230 "clm-6002: bytes_copied %d",
2231 bytes_copied);
2232 }
2233 if (!get_block_num(item, pos_in_item)) {
2234 /* crap, we are writing to a hole */
2235 use_get_block = 1;
2236 goto out;
2237 }
2238 set_block_dev_mapped(bh_result,
2239 get_block_num(item, pos_in_item), inode);
2240 } else if (is_direct_le_ih(ih)) {
2241 char *p;
2242 p = page_address(bh_result->b_page);
2243 p += (byte_offset - 1) & (PAGE_CACHE_SIZE - 1);
2244 copy_size = ih_item_len(ih) - pos_in_item;
2245
2246 fs_gen = get_generation(inode->i_sb);
2247 copy_item_head(&tmp_ih, ih);
2248
2249 if (!trans_running) {
2250 /* vs-3050 is gone, no need to drop the path */
2251 retval = journal_begin(&th, inode->i_sb, jbegin_count);
2252 if (retval)
2253 goto out;
2254 reiserfs_update_inode_transaction(inode);
2255 trans_running = 1;
2256 if (fs_changed(fs_gen, inode->i_sb)
2257 && item_moved(&tmp_ih, &path)) {
2258 reiserfs_restore_prepared_buffer(inode->i_sb,
2259 bh);
2260 goto research;
2261 }
2262 }
2263
2264 reiserfs_prepare_for_journal(inode->i_sb, bh, 1);
2265
2266 if (fs_changed(fs_gen, inode->i_sb)
2267 && item_moved(&tmp_ih, &path)) {
2268 reiserfs_restore_prepared_buffer(inode->i_sb, bh);
2269 goto research;
2270 }
2271
2272 memcpy(B_I_PITEM(bh, ih) + pos_in_item, p + bytes_copied,
2273 copy_size);
2274
2275 journal_mark_dirty(&th, inode->i_sb, bh);
2276 bytes_copied += copy_size;
2277 set_block_dev_mapped(bh_result, 0, inode);
2278
2279 /* are there still bytes left? */
2280 if (bytes_copied < bh_result->b_size &&
2281 (byte_offset + bytes_copied) < inode->i_size) {
2282 set_cpu_key_k_offset(&key,
2283 cpu_key_k_offset(&key) +
2284 copy_size);
2285 goto research;
2286 }
2287 } else {
2288 reiserfs_warning(inode->i_sb,
2289 "clm-6003: bad item inode %lu, device %s",
2290 inode->i_ino, reiserfs_bdevname(inode->i_sb));
2291 retval = -EIO;
2292 goto out;
2293 }
2294 retval = 0;
2295
2296 out:
2297 pathrelse(&path);
2298 if (trans_running) {
2299 int err = journal_end(&th, inode->i_sb, jbegin_count);
2300 if (err)
2301 retval = err;
2302 trans_running = 0;
2303 }
2304 reiserfs_write_unlock(inode->i_sb);
2305
2306 /* this is where we fill in holes in the file. */
2307 if (use_get_block) {
2308 retval = reiserfs_get_block(inode, block, bh_result,
2309 GET_BLOCK_CREATE | GET_BLOCK_NO_ISEM
2310 | GET_BLOCK_NO_DANGLE);
2311 if (!retval) {
2312 if (!buffer_mapped(bh_result)
2313 || bh_result->b_blocknr == 0) {
2314 /* get_block failed to find a mapped unformatted node. */
2315 use_get_block = 0;
2316 goto start_over;
2317 }
2318 }
2319 }
2320 kunmap(bh_result->b_page);
2321
2322 if (!retval && buffer_mapped(bh_result) && bh_result->b_blocknr == 0) {
2323 /* we've copied data from the page into the direct item, so the
2324 * buffer in the page is now clean, mark it to reflect that.
2325 */
2326 lock_buffer(bh_result);
2327 clear_buffer_dirty(bh_result);
2328 unlock_buffer(bh_result);
2329 }
2330 return retval;
2202} 2331}
2203 2332
2204/* 2333/*
@@ -2206,383 +2335,390 @@ out:
2206 * start/recovery path as __block_write_full_page, along with special 2335 * start/recovery path as __block_write_full_page, along with special
2207 * code to handle reiserfs tails. 2336 * code to handle reiserfs tails.
2208 */ 2337 */
2209static int reiserfs_write_full_page(struct page *page, struct writeback_control *wbc) { 2338static int reiserfs_write_full_page(struct page *page,
2210 struct inode *inode = page->mapping->host ; 2339 struct writeback_control *wbc)
2211 unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT ; 2340{
2212 int error = 0; 2341 struct inode *inode = page->mapping->host;
2213 unsigned long block ; 2342 unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT;
2214 struct buffer_head *head, *bh; 2343 int error = 0;
2215 int partial = 0 ; 2344 unsigned long block;
2216 int nr = 0; 2345 struct buffer_head *head, *bh;
2217 int checked = PageChecked(page); 2346 int partial = 0;
2218 struct reiserfs_transaction_handle th; 2347 int nr = 0;
2219 struct super_block *s = inode->i_sb; 2348 int checked = PageChecked(page);
2220 int bh_per_page = PAGE_CACHE_SIZE / s->s_blocksize; 2349 struct reiserfs_transaction_handle th;
2221 th.t_trans_id = 0; 2350 struct super_block *s = inode->i_sb;
2222 2351 int bh_per_page = PAGE_CACHE_SIZE / s->s_blocksize;
2223 /* The page dirty bit is cleared before writepage is called, which 2352 th.t_trans_id = 0;
2224 * means we have to tell create_empty_buffers to make dirty buffers 2353
2225 * The page really should be up to date at this point, so tossing 2354 /* The page dirty bit is cleared before writepage is called, which
2226 * in the BH_Uptodate is just a sanity check. 2355 * means we have to tell create_empty_buffers to make dirty buffers
2227 */ 2356 * The page really should be up to date at this point, so tossing
2228 if (!page_has_buffers(page)) { 2357 * in the BH_Uptodate is just a sanity check.
2229 create_empty_buffers(page, s->s_blocksize, 2358 */
2230 (1 << BH_Dirty) | (1 << BH_Uptodate)); 2359 if (!page_has_buffers(page)) {
2231 } 2360 create_empty_buffers(page, s->s_blocksize,
2232 head = page_buffers(page) ; 2361 (1 << BH_Dirty) | (1 << BH_Uptodate));
2233 2362 }
2234 /* last page in the file, zero out any contents past the 2363 head = page_buffers(page);
2235 ** last byte in the file
2236 */
2237 if (page->index >= end_index) {
2238 char *kaddr;
2239 unsigned last_offset;
2240
2241 last_offset = inode->i_size & (PAGE_CACHE_SIZE - 1) ;
2242 /* no file contents in this page */
2243 if (page->index >= end_index + 1 || !last_offset) {
2244 unlock_page(page);
2245 return 0;
2246 }
2247 kaddr = kmap_atomic(page, KM_USER0);
2248 memset(kaddr + last_offset, 0, PAGE_CACHE_SIZE-last_offset) ;
2249 flush_dcache_page(page) ;
2250 kunmap_atomic(kaddr, KM_USER0) ;
2251 }
2252 bh = head ;
2253 block = page->index << (PAGE_CACHE_SHIFT - s->s_blocksize_bits) ;
2254 /* first map all the buffers, logging any direct items we find */
2255 do {
2256 if ((checked || buffer_dirty(bh)) && (!buffer_mapped(bh) ||
2257 (buffer_mapped(bh) && bh->b_blocknr == 0))) {
2258 /* not mapped yet, or it points to a direct item, search
2259 * the btree for the mapping info, and log any direct
2260 * items found
2261 */
2262 if ((error = map_block_for_writepage(inode, bh, block))) {
2263 goto fail ;
2264 }
2265 }
2266 bh = bh->b_this_page;
2267 block++;
2268 } while(bh != head) ;
2269
2270 /*
2271 * we start the transaction after map_block_for_writepage,
2272 * because it can create holes in the file (an unbounded operation).
2273 * starting it here, we can make a reliable estimate for how many
2274 * blocks we're going to log
2275 */
2276 if (checked) {
2277 ClearPageChecked(page);
2278 reiserfs_write_lock(s);
2279 error = journal_begin(&th, s, bh_per_page + 1);
2280 if (error) {
2281 reiserfs_write_unlock(s);
2282 goto fail;
2283 }
2284 reiserfs_update_inode_transaction(inode);
2285 }
2286 /* now go through and lock any dirty buffers on the page */
2287 do {
2288 get_bh(bh);
2289 if (!buffer_mapped(bh))
2290 continue;
2291 if (buffer_mapped(bh) && bh->b_blocknr == 0)
2292 continue;
2293 2364
2294 if (checked) { 2365 /* last page in the file, zero out any contents past the
2295 reiserfs_prepare_for_journal(s, bh, 1); 2366 ** last byte in the file
2296 journal_mark_dirty(&th, s, bh); 2367 */
2297 continue; 2368 if (page->index >= end_index) {
2369 char *kaddr;
2370 unsigned last_offset;
2371
2372 last_offset = inode->i_size & (PAGE_CACHE_SIZE - 1);
2373 /* no file contents in this page */
2374 if (page->index >= end_index + 1 || !last_offset) {
2375 unlock_page(page);
2376 return 0;
2377 }
2378 kaddr = kmap_atomic(page, KM_USER0);
2379 memset(kaddr + last_offset, 0, PAGE_CACHE_SIZE - last_offset);
2380 flush_dcache_page(page);
2381 kunmap_atomic(kaddr, KM_USER0);
2298 } 2382 }
2299 /* from this point on, we know the buffer is mapped to a 2383 bh = head;
2300 * real block and not a direct item 2384 block = page->index << (PAGE_CACHE_SHIFT - s->s_blocksize_bits);
2385 /* first map all the buffers, logging any direct items we find */
2386 do {
2387 if ((checked || buffer_dirty(bh)) && (!buffer_mapped(bh) ||
2388 (buffer_mapped(bh)
2389 && bh->b_blocknr ==
2390 0))) {
2391 /* not mapped yet, or it points to a direct item, search
2392 * the btree for the mapping info, and log any direct
2393 * items found
2394 */
2395 if ((error = map_block_for_writepage(inode, bh, block))) {
2396 goto fail;
2397 }
2398 }
2399 bh = bh->b_this_page;
2400 block++;
2401 } while (bh != head);
2402
2403 /*
2404 * we start the transaction after map_block_for_writepage,
2405 * because it can create holes in the file (an unbounded operation).
2406 * starting it here, we can make a reliable estimate for how many
2407 * blocks we're going to log
2301 */ 2408 */
2302 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) { 2409 if (checked) {
2303 lock_buffer(bh); 2410 ClearPageChecked(page);
2304 } else { 2411 reiserfs_write_lock(s);
2305 if (test_set_buffer_locked(bh)) { 2412 error = journal_begin(&th, s, bh_per_page + 1);
2306 redirty_page_for_writepage(wbc, page); 2413 if (error) {
2307 continue; 2414 reiserfs_write_unlock(s);
2308 } 2415 goto fail;
2416 }
2417 reiserfs_update_inode_transaction(inode);
2309 } 2418 }
2310 if (test_clear_buffer_dirty(bh)) { 2419 /* now go through and lock any dirty buffers on the page */
2311 mark_buffer_async_write(bh); 2420 do {
2312 } else { 2421 get_bh(bh);
2313 unlock_buffer(bh); 2422 if (!buffer_mapped(bh))
2423 continue;
2424 if (buffer_mapped(bh) && bh->b_blocknr == 0)
2425 continue;
2426
2427 if (checked) {
2428 reiserfs_prepare_for_journal(s, bh, 1);
2429 journal_mark_dirty(&th, s, bh);
2430 continue;
2431 }
2432 /* from this point on, we know the buffer is mapped to a
2433 * real block and not a direct item
2434 */
2435 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
2436 lock_buffer(bh);
2437 } else {
2438 if (test_set_buffer_locked(bh)) {
2439 redirty_page_for_writepage(wbc, page);
2440 continue;
2441 }
2442 }
2443 if (test_clear_buffer_dirty(bh)) {
2444 mark_buffer_async_write(bh);
2445 } else {
2446 unlock_buffer(bh);
2447 }
2448 } while ((bh = bh->b_this_page) != head);
2449
2450 if (checked) {
2451 error = journal_end(&th, s, bh_per_page + 1);
2452 reiserfs_write_unlock(s);
2453 if (error)
2454 goto fail;
2314 } 2455 }
2315 } while((bh = bh->b_this_page) != head); 2456 BUG_ON(PageWriteback(page));
2457 set_page_writeback(page);
2458 unlock_page(page);
2316 2459
2317 if (checked) { 2460 /*
2318 error = journal_end(&th, s, bh_per_page + 1); 2461 * since any buffer might be the only dirty buffer on the page,
2319 reiserfs_write_unlock(s); 2462 * the first submit_bh can bring the page out of writeback.
2320 if (error) 2463 * be careful with the buffers.
2321 goto fail;
2322 }
2323 BUG_ON(PageWriteback(page));
2324 set_page_writeback(page);
2325 unlock_page(page);
2326
2327 /*
2328 * since any buffer might be the only dirty buffer on the page,
2329 * the first submit_bh can bring the page out of writeback.
2330 * be careful with the buffers.
2331 */
2332 do {
2333 struct buffer_head *next = bh->b_this_page;
2334 if (buffer_async_write(bh)) {
2335 submit_bh(WRITE, bh);
2336 nr++;
2337 }
2338 put_bh(bh);
2339 bh = next;
2340 } while(bh != head);
2341
2342 error = 0;
2343done:
2344 if (nr == 0) {
2345 /*
2346 * if this page only had a direct item, it is very possible for
2347 * no io to be required without there being an error. Or,
2348 * someone else could have locked them and sent them down the
2349 * pipe without locking the page
2350 */ 2464 */
2351 bh = head ;
2352 do { 2465 do {
2353 if (!buffer_uptodate(bh)) { 2466 struct buffer_head *next = bh->b_this_page;
2354 partial = 1; 2467 if (buffer_async_write(bh)) {
2355 break; 2468 submit_bh(WRITE, bh);
2356 } 2469 nr++;
2357 bh = bh->b_this_page; 2470 }
2358 } while(bh != head); 2471 put_bh(bh);
2359 if (!partial) 2472 bh = next;
2360 SetPageUptodate(page); 2473 } while (bh != head);
2361 end_page_writeback(page);
2362 }
2363 return error;
2364
2365fail:
2366 /* catches various errors, we need to make sure any valid dirty blocks
2367 * get to the media. The page is currently locked and not marked for
2368 * writeback
2369 */
2370 ClearPageUptodate(page);
2371 bh = head;
2372 do {
2373 get_bh(bh);
2374 if (buffer_mapped(bh) && buffer_dirty(bh) && bh->b_blocknr) {
2375 lock_buffer(bh);
2376 mark_buffer_async_write(bh);
2377 } else {
2378 /*
2379 * clear any dirty bits that might have come from getting
2380 * attached to a dirty page
2381 */
2382 clear_buffer_dirty(bh);
2383 }
2384 bh = bh->b_this_page;
2385 } while(bh != head);
2386 SetPageError(page);
2387 BUG_ON(PageWriteback(page));
2388 set_page_writeback(page);
2389 unlock_page(page);
2390 do {
2391 struct buffer_head *next = bh->b_this_page;
2392 if (buffer_async_write(bh)) {
2393 clear_buffer_dirty(bh);
2394 submit_bh(WRITE, bh);
2395 nr++;
2396 }
2397 put_bh(bh);
2398 bh = next;
2399 } while(bh != head);
2400 goto done;
2401}
2402 2474
2475 error = 0;
2476 done:
2477 if (nr == 0) {
2478 /*
2479 * if this page only had a direct item, it is very possible for
2480 * no io to be required without there being an error. Or,
2481 * someone else could have locked them and sent them down the
2482 * pipe without locking the page
2483 */
2484 bh = head;
2485 do {
2486 if (!buffer_uptodate(bh)) {
2487 partial = 1;
2488 break;
2489 }
2490 bh = bh->b_this_page;
2491 } while (bh != head);
2492 if (!partial)
2493 SetPageUptodate(page);
2494 end_page_writeback(page);
2495 }
2496 return error;
2403 2497
2404static int reiserfs_readpage (struct file *f, struct page * page) 2498 fail:
2405{ 2499 /* catches various errors, we need to make sure any valid dirty blocks
2406 return block_read_full_page (page, reiserfs_get_block); 2500 * get to the media. The page is currently locked and not marked for
2501 * writeback
2502 */
2503 ClearPageUptodate(page);
2504 bh = head;
2505 do {
2506 get_bh(bh);
2507 if (buffer_mapped(bh) && buffer_dirty(bh) && bh->b_blocknr) {
2508 lock_buffer(bh);
2509 mark_buffer_async_write(bh);
2510 } else {
2511 /*
2512 * clear any dirty bits that might have come from getting
2513 * attached to a dirty page
2514 */
2515 clear_buffer_dirty(bh);
2516 }
2517 bh = bh->b_this_page;
2518 } while (bh != head);
2519 SetPageError(page);
2520 BUG_ON(PageWriteback(page));
2521 set_page_writeback(page);
2522 unlock_page(page);
2523 do {
2524 struct buffer_head *next = bh->b_this_page;
2525 if (buffer_async_write(bh)) {
2526 clear_buffer_dirty(bh);
2527 submit_bh(WRITE, bh);
2528 nr++;
2529 }
2530 put_bh(bh);
2531 bh = next;
2532 } while (bh != head);
2533 goto done;
2407} 2534}
2408 2535
2536static int reiserfs_readpage(struct file *f, struct page *page)
2537{
2538 return block_read_full_page(page, reiserfs_get_block);
2539}
2409 2540
2410static int reiserfs_writepage (struct page * page, struct writeback_control *wbc) 2541static int reiserfs_writepage(struct page *page, struct writeback_control *wbc)
2411{ 2542{
2412 struct inode *inode = page->mapping->host ; 2543 struct inode *inode = page->mapping->host;
2413 reiserfs_wait_on_write_block(inode->i_sb) ; 2544 reiserfs_wait_on_write_block(inode->i_sb);
2414 return reiserfs_write_full_page(page, wbc) ; 2545 return reiserfs_write_full_page(page, wbc);
2415} 2546}
2416 2547
2417static int reiserfs_prepare_write(struct file *f, struct page *page, 2548static int reiserfs_prepare_write(struct file *f, struct page *page,
2418 unsigned from, unsigned to) { 2549 unsigned from, unsigned to)
2419 struct inode *inode = page->mapping->host ; 2550{
2420 int ret; 2551 struct inode *inode = page->mapping->host;
2421 int old_ref = 0; 2552 int ret;
2422 2553 int old_ref = 0;
2423 reiserfs_wait_on_write_block(inode->i_sb) ; 2554
2424 fix_tail_page_for_writing(page) ; 2555 reiserfs_wait_on_write_block(inode->i_sb);
2425 if (reiserfs_transaction_running(inode->i_sb)) { 2556 fix_tail_page_for_writing(page);
2426 struct reiserfs_transaction_handle *th; 2557 if (reiserfs_transaction_running(inode->i_sb)) {
2427 th = (struct reiserfs_transaction_handle *)current->journal_info; 2558 struct reiserfs_transaction_handle *th;
2428 BUG_ON (!th->t_refcount); 2559 th = (struct reiserfs_transaction_handle *)current->
2429 BUG_ON (!th->t_trans_id); 2560 journal_info;
2430 old_ref = th->t_refcount; 2561 BUG_ON(!th->t_refcount);
2431 th->t_refcount++; 2562 BUG_ON(!th->t_trans_id);
2432 } 2563 old_ref = th->t_refcount;
2433 2564 th->t_refcount++;
2434 ret = block_prepare_write(page, from, to, reiserfs_get_block) ;
2435 if (ret && reiserfs_transaction_running(inode->i_sb)) {
2436 struct reiserfs_transaction_handle *th = current->journal_info;
2437 /* this gets a little ugly. If reiserfs_get_block returned an
2438 * error and left a transacstion running, we've got to close it,
2439 * and we've got to free handle if it was a persistent transaction.
2440 *
2441 * But, if we had nested into an existing transaction, we need
2442 * to just drop the ref count on the handle.
2443 *
2444 * If old_ref == 0, the transaction is from reiserfs_get_block,
2445 * and it was a persistent trans. Otherwise, it was nested above.
2446 */
2447 if (th->t_refcount > old_ref) {
2448 if (old_ref)
2449 th->t_refcount--;
2450 else {
2451 int err;
2452 reiserfs_write_lock(inode->i_sb);
2453 err = reiserfs_end_persistent_transaction(th);
2454 reiserfs_write_unlock(inode->i_sb);
2455 if (err)
2456 ret = err;
2457 }
2458 } 2565 }
2459 }
2460 return ret;
2461 2566
2462} 2567 ret = block_prepare_write(page, from, to, reiserfs_get_block);
2568 if (ret && reiserfs_transaction_running(inode->i_sb)) {
2569 struct reiserfs_transaction_handle *th = current->journal_info;
2570 /* this gets a little ugly. If reiserfs_get_block returned an
2571 * error and left a transacstion running, we've got to close it,
2572 * and we've got to free handle if it was a persistent transaction.
2573 *
2574 * But, if we had nested into an existing transaction, we need
2575 * to just drop the ref count on the handle.
2576 *
2577 * If old_ref == 0, the transaction is from reiserfs_get_block,
2578 * and it was a persistent trans. Otherwise, it was nested above.
2579 */
2580 if (th->t_refcount > old_ref) {
2581 if (old_ref)
2582 th->t_refcount--;
2583 else {
2584 int err;
2585 reiserfs_write_lock(inode->i_sb);
2586 err = reiserfs_end_persistent_transaction(th);
2587 reiserfs_write_unlock(inode->i_sb);
2588 if (err)
2589 ret = err;
2590 }
2591 }
2592 }
2593 return ret;
2463 2594
2595}
2464 2596
2465static sector_t reiserfs_aop_bmap(struct address_space *as, sector_t block) { 2597static sector_t reiserfs_aop_bmap(struct address_space *as, sector_t block)
2466 return generic_block_bmap(as, block, reiserfs_bmap) ; 2598{
2599 return generic_block_bmap(as, block, reiserfs_bmap);
2467} 2600}
2468 2601
2469static int reiserfs_commit_write(struct file *f, struct page *page, 2602static int reiserfs_commit_write(struct file *f, struct page *page,
2470 unsigned from, unsigned to) { 2603 unsigned from, unsigned to)
2471 struct inode *inode = page->mapping->host ; 2604{
2472 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; 2605 struct inode *inode = page->mapping->host;
2473 int ret = 0; 2606 loff_t pos = ((loff_t) page->index << PAGE_CACHE_SHIFT) + to;
2474 int update_sd = 0; 2607 int ret = 0;
2475 struct reiserfs_transaction_handle *th = NULL; 2608 int update_sd = 0;
2476 2609 struct reiserfs_transaction_handle *th = NULL;
2477 reiserfs_wait_on_write_block(inode->i_sb) ; 2610
2478 if (reiserfs_transaction_running(inode->i_sb)) { 2611 reiserfs_wait_on_write_block(inode->i_sb);
2479 th = current->journal_info; 2612 if (reiserfs_transaction_running(inode->i_sb)) {
2480 } 2613 th = current->journal_info;
2481 reiserfs_commit_page(inode, page, from, to); 2614 }
2482 2615 reiserfs_commit_page(inode, page, from, to);
2483 /* generic_commit_write does this for us, but does not update the
2484 ** transaction tracking stuff when the size changes. So, we have
2485 ** to do the i_size updates here.
2486 */
2487 if (pos > inode->i_size) {
2488 struct reiserfs_transaction_handle myth ;
2489 reiserfs_write_lock(inode->i_sb);
2490 /* If the file have grown beyond the border where it
2491 can have a tail, unmark it as needing a tail
2492 packing */
2493 if ( (have_large_tails (inode->i_sb) && inode->i_size > i_block_size (inode)*4) ||
2494 (have_small_tails (inode->i_sb) && inode->i_size > i_block_size(inode)) )
2495 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask ;
2496
2497 ret = journal_begin(&myth, inode->i_sb, 1) ;
2498 if (ret) {
2499 reiserfs_write_unlock(inode->i_sb);
2500 goto journal_error;
2501 }
2502 reiserfs_update_inode_transaction(inode) ;
2503 inode->i_size = pos ;
2504 reiserfs_update_sd(&myth, inode) ;
2505 update_sd = 1;
2506 ret = journal_end(&myth, inode->i_sb, 1) ;
2507 reiserfs_write_unlock(inode->i_sb);
2508 if (ret)
2509 goto journal_error;
2510 }
2511 if (th) {
2512 reiserfs_write_lock(inode->i_sb);
2513 if (!update_sd)
2514 reiserfs_update_sd(th, inode) ;
2515 ret = reiserfs_end_persistent_transaction(th);
2516 reiserfs_write_unlock(inode->i_sb);
2517 if (ret)
2518 goto out;
2519 }
2520
2521 /* we test for O_SYNC here so we can commit the transaction
2522 ** for any packed tails the file might have had
2523 */
2524 if (f && (f->f_flags & O_SYNC)) {
2525 reiserfs_write_lock(inode->i_sb);
2526 ret = reiserfs_commit_for_inode(inode) ;
2527 reiserfs_write_unlock(inode->i_sb);
2528 }
2529out:
2530 return ret ;
2531 2616
2532journal_error: 2617 /* generic_commit_write does this for us, but does not update the
2533 if (th) { 2618 ** transaction tracking stuff when the size changes. So, we have
2534 reiserfs_write_lock(inode->i_sb); 2619 ** to do the i_size updates here.
2535 if (!update_sd) 2620 */
2536 reiserfs_update_sd(th, inode) ; 2621 if (pos > inode->i_size) {
2537 ret = reiserfs_end_persistent_transaction(th); 2622 struct reiserfs_transaction_handle myth;
2538 reiserfs_write_unlock(inode->i_sb); 2623 reiserfs_write_lock(inode->i_sb);
2539 } 2624 /* If the file have grown beyond the border where it
2625 can have a tail, unmark it as needing a tail
2626 packing */
2627 if ((have_large_tails(inode->i_sb)
2628 && inode->i_size > i_block_size(inode) * 4)
2629 || (have_small_tails(inode->i_sb)
2630 && inode->i_size > i_block_size(inode)))
2631 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
2632
2633 ret = journal_begin(&myth, inode->i_sb, 1);
2634 if (ret) {
2635 reiserfs_write_unlock(inode->i_sb);
2636 goto journal_error;
2637 }
2638 reiserfs_update_inode_transaction(inode);
2639 inode->i_size = pos;
2640 reiserfs_update_sd(&myth, inode);
2641 update_sd = 1;
2642 ret = journal_end(&myth, inode->i_sb, 1);
2643 reiserfs_write_unlock(inode->i_sb);
2644 if (ret)
2645 goto journal_error;
2646 }
2647 if (th) {
2648 reiserfs_write_lock(inode->i_sb);
2649 if (!update_sd)
2650 reiserfs_update_sd(th, inode);
2651 ret = reiserfs_end_persistent_transaction(th);
2652 reiserfs_write_unlock(inode->i_sb);
2653 if (ret)
2654 goto out;
2655 }
2656
2657 /* we test for O_SYNC here so we can commit the transaction
2658 ** for any packed tails the file might have had
2659 */
2660 if (f && (f->f_flags & O_SYNC)) {
2661 reiserfs_write_lock(inode->i_sb);
2662 ret = reiserfs_commit_for_inode(inode);
2663 reiserfs_write_unlock(inode->i_sb);
2664 }
2665 out:
2666 return ret;
2540 2667
2541 return ret; 2668 journal_error:
2669 if (th) {
2670 reiserfs_write_lock(inode->i_sb);
2671 if (!update_sd)
2672 reiserfs_update_sd(th, inode);
2673 ret = reiserfs_end_persistent_transaction(th);
2674 reiserfs_write_unlock(inode->i_sb);
2675 }
2676
2677 return ret;
2542} 2678}
2543 2679
2544void sd_attrs_to_i_attrs( __u16 sd_attrs, struct inode *inode ) 2680void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode)
2545{ 2681{
2546 if( reiserfs_attrs( inode -> i_sb ) ) { 2682 if (reiserfs_attrs(inode->i_sb)) {
2547 if( sd_attrs & REISERFS_SYNC_FL ) 2683 if (sd_attrs & REISERFS_SYNC_FL)
2548 inode -> i_flags |= S_SYNC; 2684 inode->i_flags |= S_SYNC;
2549 else 2685 else
2550 inode -> i_flags &= ~S_SYNC; 2686 inode->i_flags &= ~S_SYNC;
2551 if( sd_attrs & REISERFS_IMMUTABLE_FL ) 2687 if (sd_attrs & REISERFS_IMMUTABLE_FL)
2552 inode -> i_flags |= S_IMMUTABLE; 2688 inode->i_flags |= S_IMMUTABLE;
2553 else 2689 else
2554 inode -> i_flags &= ~S_IMMUTABLE; 2690 inode->i_flags &= ~S_IMMUTABLE;
2555 if( sd_attrs & REISERFS_APPEND_FL ) 2691 if (sd_attrs & REISERFS_APPEND_FL)
2556 inode -> i_flags |= S_APPEND; 2692 inode->i_flags |= S_APPEND;
2557 else 2693 else
2558 inode -> i_flags &= ~S_APPEND; 2694 inode->i_flags &= ~S_APPEND;
2559 if( sd_attrs & REISERFS_NOATIME_FL ) 2695 if (sd_attrs & REISERFS_NOATIME_FL)
2560 inode -> i_flags |= S_NOATIME; 2696 inode->i_flags |= S_NOATIME;
2561 else 2697 else
2562 inode -> i_flags &= ~S_NOATIME; 2698 inode->i_flags &= ~S_NOATIME;
2563 if( sd_attrs & REISERFS_NOTAIL_FL ) 2699 if (sd_attrs & REISERFS_NOTAIL_FL)
2564 REISERFS_I(inode)->i_flags |= i_nopack_mask; 2700 REISERFS_I(inode)->i_flags |= i_nopack_mask;
2565 else 2701 else
2566 REISERFS_I(inode)->i_flags &= ~i_nopack_mask; 2702 REISERFS_I(inode)->i_flags &= ~i_nopack_mask;
2567 } 2703 }
2568} 2704}
2569 2705
2570void i_attrs_to_sd_attrs( struct inode *inode, __u16 *sd_attrs ) 2706void i_attrs_to_sd_attrs(struct inode *inode, __u16 * sd_attrs)
2571{ 2707{
2572 if( reiserfs_attrs( inode -> i_sb ) ) { 2708 if (reiserfs_attrs(inode->i_sb)) {
2573 if( inode -> i_flags & S_IMMUTABLE ) 2709 if (inode->i_flags & S_IMMUTABLE)
2574 *sd_attrs |= REISERFS_IMMUTABLE_FL; 2710 *sd_attrs |= REISERFS_IMMUTABLE_FL;
2575 else 2711 else
2576 *sd_attrs &= ~REISERFS_IMMUTABLE_FL; 2712 *sd_attrs &= ~REISERFS_IMMUTABLE_FL;
2577 if( inode -> i_flags & S_SYNC ) 2713 if (inode->i_flags & S_SYNC)
2578 *sd_attrs |= REISERFS_SYNC_FL; 2714 *sd_attrs |= REISERFS_SYNC_FL;
2579 else 2715 else
2580 *sd_attrs &= ~REISERFS_SYNC_FL; 2716 *sd_attrs &= ~REISERFS_SYNC_FL;
2581 if( inode -> i_flags & S_NOATIME ) 2717 if (inode->i_flags & S_NOATIME)
2582 *sd_attrs |= REISERFS_NOATIME_FL; 2718 *sd_attrs |= REISERFS_NOATIME_FL;
2583 else 2719 else
2584 *sd_attrs &= ~REISERFS_NOATIME_FL; 2720 *sd_attrs &= ~REISERFS_NOATIME_FL;
2585 if( REISERFS_I(inode)->i_flags & i_nopack_mask ) 2721 if (REISERFS_I(inode)->i_flags & i_nopack_mask)
2586 *sd_attrs |= REISERFS_NOTAIL_FL; 2722 *sd_attrs |= REISERFS_NOTAIL_FL;
2587 else 2723 else
2588 *sd_attrs &= ~REISERFS_NOTAIL_FL; 2724 *sd_attrs &= ~REISERFS_NOTAIL_FL;
@@ -2594,106 +2730,107 @@ void i_attrs_to_sd_attrs( struct inode *inode, __u16 *sd_attrs )
2594*/ 2730*/
2595static int invalidatepage_can_drop(struct inode *inode, struct buffer_head *bh) 2731static int invalidatepage_can_drop(struct inode *inode, struct buffer_head *bh)
2596{ 2732{
2597 int ret = 1 ; 2733 int ret = 1;
2598 struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb) ; 2734 struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb);
2599 2735
2600 spin_lock(&j->j_dirty_buffers_lock) ; 2736 spin_lock(&j->j_dirty_buffers_lock);
2601 if (!buffer_mapped(bh)) { 2737 if (!buffer_mapped(bh)) {
2602 goto free_jh; 2738 goto free_jh;
2603 } 2739 }
2604 /* the page is locked, and the only places that log a data buffer 2740 /* the page is locked, and the only places that log a data buffer
2605 * also lock the page. 2741 * also lock the page.
2606 */
2607 if (reiserfs_file_data_log(inode)) {
2608 /*
2609 * very conservative, leave the buffer pinned if
2610 * anyone might need it.
2611 */
2612 if (buffer_journaled(bh) || buffer_journal_dirty(bh)) {
2613 ret = 0 ;
2614 }
2615 } else
2616 if (buffer_dirty(bh) || buffer_locked(bh)) {
2617 struct reiserfs_journal_list *jl;
2618 struct reiserfs_jh *jh = bh->b_private;
2619
2620 /* why is this safe?
2621 * reiserfs_setattr updates i_size in the on disk
2622 * stat data before allowing vmtruncate to be called.
2623 *
2624 * If buffer was put onto the ordered list for this
2625 * transaction, we know for sure either this transaction
2626 * or an older one already has updated i_size on disk,
2627 * and this ordered data won't be referenced in the file
2628 * if we crash.
2629 *
2630 * if the buffer was put onto the ordered list for an older
2631 * transaction, we need to leave it around
2632 */ 2742 */
2633 if (jh && (jl = jh->jl) && jl != SB_JOURNAL(inode->i_sb)->j_current_jl) 2743 if (reiserfs_file_data_log(inode)) {
2634 ret = 0; 2744 /*
2635 } 2745 * very conservative, leave the buffer pinned if
2636free_jh: 2746 * anyone might need it.
2637 if (ret && bh->b_private) { 2747 */
2638 reiserfs_free_jh(bh); 2748 if (buffer_journaled(bh) || buffer_journal_dirty(bh)) {
2639 } 2749 ret = 0;
2640 spin_unlock(&j->j_dirty_buffers_lock) ; 2750 }
2641 return ret ; 2751 } else if (buffer_dirty(bh) || buffer_locked(bh)) {
2752 struct reiserfs_journal_list *jl;
2753 struct reiserfs_jh *jh = bh->b_private;
2754
2755 /* why is this safe?
2756 * reiserfs_setattr updates i_size in the on disk
2757 * stat data before allowing vmtruncate to be called.
2758 *
2759 * If buffer was put onto the ordered list for this
2760 * transaction, we know for sure either this transaction
2761 * or an older one already has updated i_size on disk,
2762 * and this ordered data won't be referenced in the file
2763 * if we crash.
2764 *
2765 * if the buffer was put onto the ordered list for an older
2766 * transaction, we need to leave it around
2767 */
2768 if (jh && (jl = jh->jl)
2769 && jl != SB_JOURNAL(inode->i_sb)->j_current_jl)
2770 ret = 0;
2771 }
2772 free_jh:
2773 if (ret && bh->b_private) {
2774 reiserfs_free_jh(bh);
2775 }
2776 spin_unlock(&j->j_dirty_buffers_lock);
2777 return ret;
2642} 2778}
2643 2779
2644/* clm -- taken from fs/buffer.c:block_invalidate_page */ 2780/* clm -- taken from fs/buffer.c:block_invalidate_page */
2645static int reiserfs_invalidatepage(struct page *page, unsigned long offset) 2781static int reiserfs_invalidatepage(struct page *page, unsigned long offset)
2646{ 2782{
2647 struct buffer_head *head, *bh, *next; 2783 struct buffer_head *head, *bh, *next;
2648 struct inode *inode = page->mapping->host; 2784 struct inode *inode = page->mapping->host;
2649 unsigned int curr_off = 0; 2785 unsigned int curr_off = 0;
2650 int ret = 1; 2786 int ret = 1;
2651 2787
2652 BUG_ON(!PageLocked(page)); 2788 BUG_ON(!PageLocked(page));
2653 2789
2654 if (offset == 0) 2790 if (offset == 0)
2655 ClearPageChecked(page); 2791 ClearPageChecked(page);
2656 2792
2657 if (!page_has_buffers(page)) 2793 if (!page_has_buffers(page))
2658 goto out; 2794 goto out;
2795
2796 head = page_buffers(page);
2797 bh = head;
2798 do {
2799 unsigned int next_off = curr_off + bh->b_size;
2800 next = bh->b_this_page;
2659 2801
2660 head = page_buffers(page); 2802 /*
2661 bh = head; 2803 * is this block fully invalidated?
2662 do { 2804 */
2663 unsigned int next_off = curr_off + bh->b_size; 2805 if (offset <= curr_off) {
2664 next = bh->b_this_page; 2806 if (invalidatepage_can_drop(inode, bh))
2807 reiserfs_unmap_buffer(bh);
2808 else
2809 ret = 0;
2810 }
2811 curr_off = next_off;
2812 bh = next;
2813 } while (bh != head);
2665 2814
2666 /* 2815 /*
2667 * is this block fully invalidated? 2816 * We release buffers only if the entire page is being invalidated.
2817 * The get_block cached value has been unconditionally invalidated,
2818 * so real IO is not possible anymore.
2668 */ 2819 */
2669 if (offset <= curr_off) { 2820 if (!offset && ret)
2670 if (invalidatepage_can_drop(inode, bh)) 2821 ret = try_to_release_page(page, 0);
2671 reiserfs_unmap_buffer(bh); 2822 out:
2672 else 2823 return ret;
2673 ret = 0;
2674 }
2675 curr_off = next_off;
2676 bh = next;
2677 } while (bh != head);
2678
2679 /*
2680 * We release buffers only if the entire page is being invalidated.
2681 * The get_block cached value has been unconditionally invalidated,
2682 * so real IO is not possible anymore.
2683 */
2684 if (!offset && ret)
2685 ret = try_to_release_page(page, 0);
2686out:
2687 return ret;
2688} 2824}
2689 2825
2690static int reiserfs_set_page_dirty(struct page *page) { 2826static int reiserfs_set_page_dirty(struct page *page)
2691 struct inode *inode = page->mapping->host; 2827{
2692 if (reiserfs_file_data_log(inode)) { 2828 struct inode *inode = page->mapping->host;
2693 SetPageChecked(page); 2829 if (reiserfs_file_data_log(inode)) {
2694 return __set_page_dirty_nobuffers(page); 2830 SetPageChecked(page);
2695 } 2831 return __set_page_dirty_nobuffers(page);
2696 return __set_page_dirty_buffers(page); 2832 }
2833 return __set_page_dirty_buffers(page);
2697} 2834}
2698 2835
2699/* 2836/*
@@ -2707,140 +2844,152 @@ static int reiserfs_set_page_dirty(struct page *page) {
2707 */ 2844 */
2708static int reiserfs_releasepage(struct page *page, int unused_gfp_flags) 2845static int reiserfs_releasepage(struct page *page, int unused_gfp_flags)
2709{ 2846{
2710 struct inode *inode = page->mapping->host ; 2847 struct inode *inode = page->mapping->host;
2711 struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb) ; 2848 struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb);
2712 struct buffer_head *head ; 2849 struct buffer_head *head;
2713 struct buffer_head *bh ; 2850 struct buffer_head *bh;
2714 int ret = 1 ; 2851 int ret = 1;
2715 2852
2716 WARN_ON(PageChecked(page)); 2853 WARN_ON(PageChecked(page));
2717 spin_lock(&j->j_dirty_buffers_lock) ; 2854 spin_lock(&j->j_dirty_buffers_lock);
2718 head = page_buffers(page) ; 2855 head = page_buffers(page);
2719 bh = head ; 2856 bh = head;
2720 do { 2857 do {
2721 if (bh->b_private) { 2858 if (bh->b_private) {
2722 if (!buffer_dirty(bh) && !buffer_locked(bh)) { 2859 if (!buffer_dirty(bh) && !buffer_locked(bh)) {
2723 reiserfs_free_jh(bh); 2860 reiserfs_free_jh(bh);
2724 } else { 2861 } else {
2725 ret = 0 ; 2862 ret = 0;
2726 break ; 2863 break;
2727 } 2864 }
2728 } 2865 }
2729 bh = bh->b_this_page ; 2866 bh = bh->b_this_page;
2730 } while (bh != head) ; 2867 } while (bh != head);
2731 if (ret) 2868 if (ret)
2732 ret = try_to_free_buffers(page) ; 2869 ret = try_to_free_buffers(page);
2733 spin_unlock(&j->j_dirty_buffers_lock) ; 2870 spin_unlock(&j->j_dirty_buffers_lock);
2734 return ret ; 2871 return ret;
2735} 2872}
2736 2873
2737/* We thank Mingming Cao for helping us understand in great detail what 2874/* We thank Mingming Cao for helping us understand in great detail what
2738 to do in this section of the code. */ 2875 to do in this section of the code. */
2739static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb, 2876static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb,
2740 const struct iovec *iov, loff_t offset, unsigned long nr_segs) 2877 const struct iovec *iov, loff_t offset,
2878 unsigned long nr_segs)
2741{ 2879{
2742 struct file *file = iocb->ki_filp; 2880 struct file *file = iocb->ki_filp;
2743 struct inode *inode = file->f_mapping->host; 2881 struct inode *inode = file->f_mapping->host;
2744 2882
2745 return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, 2883 return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
2746 offset, nr_segs, reiserfs_get_blocks_direct_io, NULL); 2884 offset, nr_segs,
2885 reiserfs_get_blocks_direct_io, NULL);
2747} 2886}
2748 2887
2749int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) { 2888int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
2750 struct inode *inode = dentry->d_inode ; 2889{
2751 int error ; 2890 struct inode *inode = dentry->d_inode;
2752 unsigned int ia_valid = attr->ia_valid; 2891 int error;
2753 reiserfs_write_lock(inode->i_sb); 2892 unsigned int ia_valid = attr->ia_valid;
2754 if (attr->ia_valid & ATTR_SIZE) { 2893 reiserfs_write_lock(inode->i_sb);
2755 /* version 2 items will be caught by the s_maxbytes check 2894 if (attr->ia_valid & ATTR_SIZE) {
2756 ** done for us in vmtruncate 2895 /* version 2 items will be caught by the s_maxbytes check
2757 */ 2896 ** done for us in vmtruncate
2758 if (get_inode_item_key_version(inode) == KEY_FORMAT_3_5 && 2897 */
2759 attr->ia_size > MAX_NON_LFS) { 2898 if (get_inode_item_key_version(inode) == KEY_FORMAT_3_5 &&
2760 error = -EFBIG ; 2899 attr->ia_size > MAX_NON_LFS) {
2761 goto out; 2900 error = -EFBIG;
2762 } 2901 goto out;
2763 /* fill in hole pointers in the expanding truncate case. */ 2902 }
2764 if (attr->ia_size > inode->i_size) { 2903 /* fill in hole pointers in the expanding truncate case. */
2765 error = generic_cont_expand(inode, attr->ia_size) ; 2904 if (attr->ia_size > inode->i_size) {
2766 if (REISERFS_I(inode)->i_prealloc_count > 0) { 2905 error = generic_cont_expand(inode, attr->ia_size);
2767 int err; 2906 if (REISERFS_I(inode)->i_prealloc_count > 0) {
2768 struct reiserfs_transaction_handle th ; 2907 int err;
2769 /* we're changing at most 2 bitmaps, inode + super */ 2908 struct reiserfs_transaction_handle th;
2770 err = journal_begin(&th, inode->i_sb, 4) ; 2909 /* we're changing at most 2 bitmaps, inode + super */
2771 if (!err) { 2910 err = journal_begin(&th, inode->i_sb, 4);
2772 reiserfs_discard_prealloc (&th, inode); 2911 if (!err) {
2773 err = journal_end(&th, inode->i_sb, 4) ; 2912 reiserfs_discard_prealloc(&th, inode);
2913 err = journal_end(&th, inode->i_sb, 4);
2914 }
2915 if (err)
2916 error = err;
2917 }
2918 if (error)
2919 goto out;
2774 } 2920 }
2775 if (err)
2776 error = err;
2777 }
2778 if (error)
2779 goto out;
2780 } 2921 }
2781 }
2782 2922
2783 if ((((attr->ia_valid & ATTR_UID) && (attr->ia_uid & ~0xffff)) || 2923 if ((((attr->ia_valid & ATTR_UID) && (attr->ia_uid & ~0xffff)) ||
2784 ((attr->ia_valid & ATTR_GID) && (attr->ia_gid & ~0xffff))) && 2924 ((attr->ia_valid & ATTR_GID) && (attr->ia_gid & ~0xffff))) &&
2785 (get_inode_sd_version (inode) == STAT_DATA_V1)) { 2925 (get_inode_sd_version(inode) == STAT_DATA_V1)) {
2786 /* stat data of format v3.5 has 16 bit uid and gid */ 2926 /* stat data of format v3.5 has 16 bit uid and gid */
2787 error = -EINVAL; 2927 error = -EINVAL;
2788 goto out; 2928 goto out;
2789 } 2929 }
2790 2930
2791 error = inode_change_ok(inode, attr) ; 2931 error = inode_change_ok(inode, attr);
2792 if (!error) { 2932 if (!error) {
2793 if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || 2933 if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
2794 (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { 2934 (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
2795 error = reiserfs_chown_xattrs (inode, attr); 2935 error = reiserfs_chown_xattrs(inode, attr);
2796 2936
2797 if (!error) { 2937 if (!error) {
2798 struct reiserfs_transaction_handle th; 2938 struct reiserfs_transaction_handle th;
2799 2939 int jbegin_count =
2800 /* (user+group)*(old+new) structure - we count quota info and , inode write (sb, inode) */ 2940 2 *
2801 journal_begin(&th, inode->i_sb, 4*REISERFS_QUOTA_INIT_BLOCKS+2); 2941 (REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb) +
2802 error = DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0; 2942 REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb)) +
2803 if (error) { 2943 2;
2804 journal_end(&th, inode->i_sb, 4*REISERFS_QUOTA_INIT_BLOCKS+2); 2944
2805 goto out; 2945 /* (user+group)*(old+new) structure - we count quota info and , inode write (sb, inode) */
2806 } 2946 error =
2807 /* Update corresponding info in inode so that everything is in 2947 journal_begin(&th, inode->i_sb,
2808 * one transaction */ 2948 jbegin_count);
2809 if (attr->ia_valid & ATTR_UID) 2949 if (error)
2810 inode->i_uid = attr->ia_uid; 2950 goto out;
2811 if (attr->ia_valid & ATTR_GID) 2951 error =
2812 inode->i_gid = attr->ia_gid; 2952 DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0;
2813 mark_inode_dirty(inode); 2953 if (error) {
2814 journal_end(&th, inode->i_sb, 4*REISERFS_QUOTA_INIT_BLOCKS+2); 2954 journal_end(&th, inode->i_sb,
2955 jbegin_count);
2956 goto out;
2957 }
2958 /* Update corresponding info in inode so that everything is in
2959 * one transaction */
2960 if (attr->ia_valid & ATTR_UID)
2961 inode->i_uid = attr->ia_uid;
2962 if (attr->ia_valid & ATTR_GID)
2963 inode->i_gid = attr->ia_gid;
2964 mark_inode_dirty(inode);
2965 error =
2966 journal_end(&th, inode->i_sb, jbegin_count);
2967 }
2815 } 2968 }
2816 } 2969 if (!error)
2817 if (!error) 2970 error = inode_setattr(inode, attr);
2818 error = inode_setattr(inode, attr) ; 2971 }
2819 }
2820
2821 2972
2822 if (!error && reiserfs_posixacl (inode->i_sb)) { 2973 if (!error && reiserfs_posixacl(inode->i_sb)) {
2823 if (attr->ia_valid & ATTR_MODE) 2974 if (attr->ia_valid & ATTR_MODE)
2824 error = reiserfs_acl_chmod (inode); 2975 error = reiserfs_acl_chmod(inode);
2825 } 2976 }
2826 2977
2827out: 2978 out:
2828 reiserfs_write_unlock(inode->i_sb); 2979 reiserfs_write_unlock(inode->i_sb);
2829 return error ; 2980 return error;
2830} 2981}
2831 2982
2832
2833
2834struct address_space_operations reiserfs_address_space_operations = { 2983struct address_space_operations reiserfs_address_space_operations = {
2835 .writepage = reiserfs_writepage, 2984 .writepage = reiserfs_writepage,
2836 .readpage = reiserfs_readpage, 2985 .readpage = reiserfs_readpage,
2837 .readpages = reiserfs_readpages, 2986 .readpages = reiserfs_readpages,
2838 .releasepage = reiserfs_releasepage, 2987 .releasepage = reiserfs_releasepage,
2839 .invalidatepage = reiserfs_invalidatepage, 2988 .invalidatepage = reiserfs_invalidatepage,
2840 .sync_page = block_sync_page, 2989 .sync_page = block_sync_page,
2841 .prepare_write = reiserfs_prepare_write, 2990 .prepare_write = reiserfs_prepare_write,
2842 .commit_write = reiserfs_commit_write, 2991 .commit_write = reiserfs_commit_write,
2843 .bmap = reiserfs_aop_bmap, 2992 .bmap = reiserfs_aop_bmap,
2844 .direct_IO = reiserfs_direct_IO, 2993 .direct_IO = reiserfs_direct_IO,
2845 .set_page_dirty = reiserfs_set_page_dirty, 2994 .set_page_dirty = reiserfs_set_page_dirty,
2846} ; 2995};
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 94dc42475a04..81fc00285f60 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -9,7 +9,7 @@
9#include <linux/pagemap.h> 9#include <linux/pagemap.h>
10#include <linux/smp_lock.h> 10#include <linux/smp_lock.h>
11 11
12static int reiserfs_unpack (struct inode * inode, struct file * filp); 12static int reiserfs_unpack(struct inode *inode, struct file *filp);
13 13
14/* 14/*
15** reiserfs_ioctl - handler for ioctl for inode 15** reiserfs_ioctl - handler for ioctl for inode
@@ -19,63 +19,72 @@ static int reiserfs_unpack (struct inode * inode, struct file * filp);
19** 2) REISERFS_IOC_[GS]ETFLAGS, REISERFS_IOC_[GS]ETVERSION 19** 2) REISERFS_IOC_[GS]ETFLAGS, REISERFS_IOC_[GS]ETVERSION
20** 3) That's all for a while ... 20** 3) That's all for a while ...
21*/ 21*/
22int reiserfs_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, 22int reiserfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
23 unsigned long arg) 23 unsigned long arg)
24{ 24{
25 unsigned int flags; 25 unsigned int flags;
26 26
27 switch (cmd) { 27 switch (cmd) {
28 case REISERFS_IOC_UNPACK: 28 case REISERFS_IOC_UNPACK:
29 if( S_ISREG( inode -> i_mode ) ) { 29 if (S_ISREG(inode->i_mode)) {
30 if (arg) 30 if (arg)
31 return reiserfs_unpack (inode, filp); 31 return reiserfs_unpack(inode, filp);
32 else 32 else
33 return 0; 33 return 0;
34 } else 34 } else
35 return -ENOTTY; 35 return -ENOTTY;
36 /* following two cases are taken from fs/ext2/ioctl.c by Remy 36 /* following two cases are taken from fs/ext2/ioctl.c by Remy
37 Card (card@masi.ibp.fr) */ 37 Card (card@masi.ibp.fr) */
38 case REISERFS_IOC_GETFLAGS: 38 case REISERFS_IOC_GETFLAGS:
39 flags = REISERFS_I(inode) -> i_attrs; 39 if (!reiserfs_attrs(inode->i_sb))
40 i_attrs_to_sd_attrs( inode, ( __u16 * ) &flags ); 40 return -ENOTTY;
41 return put_user(flags, (int __user *) arg);
42 case REISERFS_IOC_SETFLAGS: {
43 if (IS_RDONLY(inode))
44 return -EROFS;
45 41
46 if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) 42 flags = REISERFS_I(inode)->i_attrs;
47 return -EPERM; 43 i_attrs_to_sd_attrs(inode, (__u16 *) & flags);
44 return put_user(flags, (int __user *)arg);
45 case REISERFS_IOC_SETFLAGS:{
46 if (!reiserfs_attrs(inode->i_sb))
47 return -ENOTTY;
48 48
49 if (get_user(flags, (int __user *) arg)) 49 if (IS_RDONLY(inode))
50 return -EFAULT; 50 return -EROFS;
51 51
52 if ( ( ( flags ^ REISERFS_I(inode) -> i_attrs) & ( REISERFS_IMMUTABLE_FL | REISERFS_APPEND_FL)) && 52 if ((current->fsuid != inode->i_uid)
53 !capable( CAP_LINUX_IMMUTABLE ) ) 53 && !capable(CAP_FOWNER))
54 return -EPERM; 54 return -EPERM;
55 55
56 if( ( flags & REISERFS_NOTAIL_FL ) && 56 if (get_user(flags, (int __user *)arg))
57 S_ISREG( inode -> i_mode ) ) { 57 return -EFAULT;
58
59 if (((flags ^ REISERFS_I(inode)->
60 i_attrs) & (REISERFS_IMMUTABLE_FL |
61 REISERFS_APPEND_FL))
62 && !capable(CAP_LINUX_IMMUTABLE))
63 return -EPERM;
64
65 if ((flags & REISERFS_NOTAIL_FL) &&
66 S_ISREG(inode->i_mode)) {
58 int result; 67 int result;
59 68
60 result = reiserfs_unpack( inode, filp ); 69 result = reiserfs_unpack(inode, filp);
61 if( result ) 70 if (result)
62 return result; 71 return result;
72 }
73 sd_attrs_to_i_attrs(flags, inode);
74 REISERFS_I(inode)->i_attrs = flags;
75 inode->i_ctime = CURRENT_TIME_SEC;
76 mark_inode_dirty(inode);
77 return 0;
63 } 78 }
64 sd_attrs_to_i_attrs( flags, inode );
65 REISERFS_I(inode) -> i_attrs = flags;
66 inode->i_ctime = CURRENT_TIME_SEC;
67 mark_inode_dirty(inode);
68 return 0;
69 }
70 case REISERFS_IOC_GETVERSION: 79 case REISERFS_IOC_GETVERSION:
71 return put_user(inode->i_generation, (int __user *) arg); 80 return put_user(inode->i_generation, (int __user *)arg);
72 case REISERFS_IOC_SETVERSION: 81 case REISERFS_IOC_SETVERSION:
73 if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) 82 if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
74 return -EPERM; 83 return -EPERM;
75 if (IS_RDONLY(inode)) 84 if (IS_RDONLY(inode))
76 return -EROFS; 85 return -EROFS;
77 if (get_user(inode->i_generation, (int __user *) arg)) 86 if (get_user(inode->i_generation, (int __user *)arg))
78 return -EFAULT; 87 return -EFAULT;
79 inode->i_ctime = CURRENT_TIME_SEC; 88 inode->i_ctime = CURRENT_TIME_SEC;
80 mark_inode_dirty(inode); 89 mark_inode_dirty(inode);
81 return 0; 90 return 0;
@@ -89,63 +98,65 @@ int reiserfs_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
89** Function try to convert tail from direct item into indirect. 98** Function try to convert tail from direct item into indirect.
90** It set up nopack attribute in the REISERFS_I(inode)->nopack 99** It set up nopack attribute in the REISERFS_I(inode)->nopack
91*/ 100*/
92static int reiserfs_unpack (struct inode * inode, struct file * filp) 101static int reiserfs_unpack(struct inode *inode, struct file *filp)
93{ 102{
94 int retval = 0; 103 int retval = 0;
95 int index ; 104 int index;
96 struct page *page ; 105 struct page *page;
97 struct address_space *mapping ; 106 struct address_space *mapping;
98 unsigned long write_from ; 107 unsigned long write_from;
99 unsigned long blocksize = inode->i_sb->s_blocksize ; 108 unsigned long blocksize = inode->i_sb->s_blocksize;
100 109
101 if (inode->i_size == 0) { 110 if (inode->i_size == 0) {
102 REISERFS_I(inode)->i_flags |= i_nopack_mask; 111 REISERFS_I(inode)->i_flags |= i_nopack_mask;
103 return 0 ; 112 return 0;
104 } 113 }
105 /* ioctl already done */ 114 /* ioctl already done */
106 if (REISERFS_I(inode)->i_flags & i_nopack_mask) { 115 if (REISERFS_I(inode)->i_flags & i_nopack_mask) {
107 return 0 ; 116 return 0;
108 } 117 }
109 reiserfs_write_lock(inode->i_sb); 118 reiserfs_write_lock(inode->i_sb);
110 119
111 /* we need to make sure nobody is changing the file size beneath 120 /* we need to make sure nobody is changing the file size beneath
112 ** us 121 ** us
113 */ 122 */
114 down(&inode->i_sem) ; 123 down(&inode->i_sem);
115 124
116 write_from = inode->i_size & (blocksize - 1) ; 125 write_from = inode->i_size & (blocksize - 1);
117 /* if we are on a block boundary, we are already unpacked. */ 126 /* if we are on a block boundary, we are already unpacked. */
118 if ( write_from == 0) { 127 if (write_from == 0) {
128 REISERFS_I(inode)->i_flags |= i_nopack_mask;
129 goto out;
130 }
131
132 /* we unpack by finding the page with the tail, and calling
133 ** reiserfs_prepare_write on that page. This will force a
134 ** reiserfs_get_block to unpack the tail for us.
135 */
136 index = inode->i_size >> PAGE_CACHE_SHIFT;
137 mapping = inode->i_mapping;
138 page = grab_cache_page(mapping, index);
139 retval = -ENOMEM;
140 if (!page) {
141 goto out;
142 }
143 retval =
144 mapping->a_ops->prepare_write(NULL, page, write_from, write_from);
145 if (retval)
146 goto out_unlock;
147
148 /* conversion can change page contents, must flush */
149 flush_dcache_page(page);
150 retval =
151 mapping->a_ops->commit_write(NULL, page, write_from, write_from);
119 REISERFS_I(inode)->i_flags |= i_nopack_mask; 152 REISERFS_I(inode)->i_flags |= i_nopack_mask;
120 goto out ; 153
121 } 154 out_unlock:
122 155 unlock_page(page);
123 /* we unpack by finding the page with the tail, and calling 156 page_cache_release(page);
124 ** reiserfs_prepare_write on that page. This will force a 157
125 ** reiserfs_get_block to unpack the tail for us. 158 out:
126 */ 159 up(&inode->i_sem);
127 index = inode->i_size >> PAGE_CACHE_SHIFT ; 160 reiserfs_write_unlock(inode->i_sb);
128 mapping = inode->i_mapping ; 161 return retval;
129 page = grab_cache_page(mapping, index) ;
130 retval = -ENOMEM;
131 if (!page) {
132 goto out ;
133 }
134 retval = mapping->a_ops->prepare_write(NULL, page, write_from, write_from) ;
135 if (retval)
136 goto out_unlock ;
137
138 /* conversion can change page contents, must flush */
139 flush_dcache_page(page) ;
140 retval = mapping->a_ops->commit_write(NULL, page, write_from, write_from) ;
141 REISERFS_I(inode)->i_flags |= i_nopack_mask;
142
143out_unlock:
144 unlock_page(page) ;
145 page_cache_release(page) ;
146
147out:
148 up(&inode->i_sem) ;
149 reiserfs_write_unlock(inode->i_sb);
150 return retval;
151} 162}
diff --git a/fs/reiserfs/item_ops.c b/fs/reiserfs/item_ops.c
index 0ce33db1acdf..e237cd668e5b 100644
--- a/fs/reiserfs/item_ops.c
+++ b/fs/reiserfs/item_ops.c
@@ -14,776 +14,741 @@
14////////////////////////////////////////////////////////////////////////////// 14//////////////////////////////////////////////////////////////////////////////
15// stat data functions 15// stat data functions
16// 16//
17static int sd_bytes_number (struct item_head * ih, int block_size) 17static int sd_bytes_number(struct item_head *ih, int block_size)
18{ 18{
19 return 0; 19 return 0;
20} 20}
21 21
22static void sd_decrement_key (struct cpu_key * key) 22static void sd_decrement_key(struct cpu_key *key)
23{ 23{
24 key->on_disk_key.k_objectid --; 24 key->on_disk_key.k_objectid--;
25 set_cpu_key_k_type (key, TYPE_ANY); 25 set_cpu_key_k_type(key, TYPE_ANY);
26 set_cpu_key_k_offset(key, (loff_t)(-1)); 26 set_cpu_key_k_offset(key, (loff_t) (-1));
27} 27}
28 28
29static int sd_is_left_mergeable (struct reiserfs_key * key, unsigned long bsize) 29static int sd_is_left_mergeable(struct reiserfs_key *key, unsigned long bsize)
30{ 30{
31 return 0; 31 return 0;
32} 32}
33 33
34 34static char *print_time(time_t t)
35
36static char * print_time (time_t t)
37{ 35{
38 static char timebuf[256]; 36 static char timebuf[256];
39 37
40 sprintf (timebuf, "%ld", t); 38 sprintf(timebuf, "%ld", t);
41 return timebuf; 39 return timebuf;
42} 40}
43 41
44 42static void sd_print_item(struct item_head *ih, char *item)
45static void sd_print_item (struct item_head * ih, char * item)
46{ 43{
47 printk ("\tmode | size | nlinks | first direct | mtime\n"); 44 printk("\tmode | size | nlinks | first direct | mtime\n");
48 if (stat_data_v1 (ih)) { 45 if (stat_data_v1(ih)) {
49 struct stat_data_v1 * sd = (struct stat_data_v1 *)item; 46 struct stat_data_v1 *sd = (struct stat_data_v1 *)item;
50 47
51 printk ("\t0%-6o | %6u | %2u | %d | %s\n", sd_v1_mode(sd), 48 printk("\t0%-6o | %6u | %2u | %d | %s\n", sd_v1_mode(sd),
52 sd_v1_size(sd), sd_v1_nlink(sd), sd_v1_first_direct_byte(sd), 49 sd_v1_size(sd), sd_v1_nlink(sd),
53 print_time( sd_v1_mtime(sd) ) ); 50 sd_v1_first_direct_byte(sd),
54 } else { 51 print_time(sd_v1_mtime(sd)));
55 struct stat_data * sd = (struct stat_data *)item; 52 } else {
53 struct stat_data *sd = (struct stat_data *)item;
56 54
57 printk ("\t0%-6o | %6Lu | %2u | %d | %s\n", sd_v2_mode(sd), 55 printk("\t0%-6o | %6Lu | %2u | %d | %s\n", sd_v2_mode(sd),
58 (unsigned long long)sd_v2_size(sd), sd_v2_nlink(sd), 56 (unsigned long long)sd_v2_size(sd), sd_v2_nlink(sd),
59 sd_v2_rdev(sd), print_time(sd_v2_mtime(sd))); 57 sd_v2_rdev(sd), print_time(sd_v2_mtime(sd)));
60 } 58 }
61} 59}
62 60
63static void sd_check_item (struct item_head * ih, char * item) 61static void sd_check_item(struct item_head *ih, char *item)
64{ 62{
65 // FIXME: type something here! 63 // FIXME: type something here!
66} 64}
67 65
68 66static int sd_create_vi(struct virtual_node *vn,
69static int sd_create_vi (struct virtual_node * vn, 67 struct virtual_item *vi,
70 struct virtual_item * vi, 68 int is_affected, int insert_size)
71 int is_affected,
72 int insert_size)
73{ 69{
74 vi->vi_index = TYPE_STAT_DATA; 70 vi->vi_index = TYPE_STAT_DATA;
75 //vi->vi_type |= VI_TYPE_STAT_DATA;// not needed? 71 //vi->vi_type |= VI_TYPE_STAT_DATA;// not needed?
76 return 0; 72 return 0;
77} 73}
78 74
79 75static int sd_check_left(struct virtual_item *vi, int free,
80static int sd_check_left (struct virtual_item * vi, int free, 76 int start_skip, int end_skip)
81 int start_skip, int end_skip)
82{ 77{
83 if (start_skip || end_skip) 78 if (start_skip || end_skip)
84 BUG (); 79 BUG();
85 return -1; 80 return -1;
86} 81}
87 82
88 83static int sd_check_right(struct virtual_item *vi, int free)
89static int sd_check_right (struct virtual_item * vi, int free)
90{ 84{
91 return -1; 85 return -1;
92} 86}
93 87
94static int sd_part_size (struct virtual_item * vi, int first, int count) 88static int sd_part_size(struct virtual_item *vi, int first, int count)
95{ 89{
96 if (count) 90 if (count)
97 BUG (); 91 BUG();
98 return 0; 92 return 0;
99} 93}
100 94
101static int sd_unit_num (struct virtual_item * vi) 95static int sd_unit_num(struct virtual_item *vi)
102{ 96{
103 return vi->vi_item_len - IH_SIZE; 97 return vi->vi_item_len - IH_SIZE;
104} 98}
105 99
106 100static void sd_print_vi(struct virtual_item *vi)
107static void sd_print_vi (struct virtual_item * vi)
108{ 101{
109 reiserfs_warning (NULL, "STATDATA, index %d, type 0x%x, %h", 102 reiserfs_warning(NULL, "STATDATA, index %d, type 0x%x, %h",
110 vi->vi_index, vi->vi_type, vi->vi_ih); 103 vi->vi_index, vi->vi_type, vi->vi_ih);
111} 104}
112 105
113static struct item_operations stat_data_ops = { 106static struct item_operations stat_data_ops = {
114 .bytes_number = sd_bytes_number, 107 .bytes_number = sd_bytes_number,
115 .decrement_key = sd_decrement_key, 108 .decrement_key = sd_decrement_key,
116 .is_left_mergeable = sd_is_left_mergeable, 109 .is_left_mergeable = sd_is_left_mergeable,
117 .print_item = sd_print_item, 110 .print_item = sd_print_item,
118 .check_item = sd_check_item, 111 .check_item = sd_check_item,
119 112
120 .create_vi = sd_create_vi, 113 .create_vi = sd_create_vi,
121 .check_left = sd_check_left, 114 .check_left = sd_check_left,
122 .check_right = sd_check_right, 115 .check_right = sd_check_right,
123 .part_size = sd_part_size, 116 .part_size = sd_part_size,
124 .unit_num = sd_unit_num, 117 .unit_num = sd_unit_num,
125 .print_vi = sd_print_vi 118 .print_vi = sd_print_vi
126}; 119};
127 120
128
129
130////////////////////////////////////////////////////////////////////////////// 121//////////////////////////////////////////////////////////////////////////////
131// direct item functions 122// direct item functions
132// 123//
133static int direct_bytes_number (struct item_head * ih, int block_size) 124static int direct_bytes_number(struct item_head *ih, int block_size)
134{ 125{
135 return ih_item_len(ih); 126 return ih_item_len(ih);
136} 127}
137 128
138
139// FIXME: this should probably switch to indirect as well 129// FIXME: this should probably switch to indirect as well
140static void direct_decrement_key (struct cpu_key * key) 130static void direct_decrement_key(struct cpu_key *key)
141{ 131{
142 cpu_key_k_offset_dec (key); 132 cpu_key_k_offset_dec(key);
143 if (cpu_key_k_offset (key) == 0) 133 if (cpu_key_k_offset(key) == 0)
144 set_cpu_key_k_type (key, TYPE_STAT_DATA); 134 set_cpu_key_k_type(key, TYPE_STAT_DATA);
145} 135}
146 136
147 137static int direct_is_left_mergeable(struct reiserfs_key *key,
148static int direct_is_left_mergeable (struct reiserfs_key * key, unsigned long bsize) 138 unsigned long bsize)
149{ 139{
150 int version = le_key_version (key); 140 int version = le_key_version(key);
151 return ((le_key_k_offset (version, key) & (bsize - 1)) != 1); 141 return ((le_key_k_offset(version, key) & (bsize - 1)) != 1);
152} 142}
153 143
154 144static void direct_print_item(struct item_head *ih, char *item)
155static void direct_print_item (struct item_head * ih, char * item)
156{ 145{
157 int j = 0; 146 int j = 0;
158 147
159// return; 148// return;
160 printk ("\""); 149 printk("\"");
161 while (j < ih_item_len(ih)) 150 while (j < ih_item_len(ih))
162 printk ("%c", item[j++]); 151 printk("%c", item[j++]);
163 printk ("\"\n"); 152 printk("\"\n");
164} 153}
165 154
166 155static void direct_check_item(struct item_head *ih, char *item)
167static void direct_check_item (struct item_head * ih, char * item)
168{ 156{
169 // FIXME: type something here! 157 // FIXME: type something here!
170} 158}
171 159
172 160static int direct_create_vi(struct virtual_node *vn,
173static int direct_create_vi (struct virtual_node * vn, 161 struct virtual_item *vi,
174 struct virtual_item * vi, 162 int is_affected, int insert_size)
175 int is_affected,
176 int insert_size)
177{ 163{
178 vi->vi_index = TYPE_DIRECT; 164 vi->vi_index = TYPE_DIRECT;
179 //vi->vi_type |= VI_TYPE_DIRECT; 165 //vi->vi_type |= VI_TYPE_DIRECT;
180 return 0; 166 return 0;
181} 167}
182 168
183static int direct_check_left (struct virtual_item * vi, int free, 169static int direct_check_left(struct virtual_item *vi, int free,
184 int start_skip, int end_skip) 170 int start_skip, int end_skip)
185{ 171{
186 int bytes; 172 int bytes;
187 173
188 bytes = free - free % 8; 174 bytes = free - free % 8;
189 return bytes ?: -1; 175 return bytes ? : -1;
190} 176}
191 177
192 178static int direct_check_right(struct virtual_item *vi, int free)
193static int direct_check_right (struct virtual_item * vi, int free)
194{ 179{
195 return direct_check_left (vi, free, 0, 0); 180 return direct_check_left(vi, free, 0, 0);
196} 181}
197 182
198static int direct_part_size (struct virtual_item * vi, int first, int count) 183static int direct_part_size(struct virtual_item *vi, int first, int count)
199{ 184{
200 return count; 185 return count;
201} 186}
202 187
203 188static int direct_unit_num(struct virtual_item *vi)
204static int direct_unit_num (struct virtual_item * vi)
205{ 189{
206 return vi->vi_item_len - IH_SIZE; 190 return vi->vi_item_len - IH_SIZE;
207} 191}
208 192
209 193static void direct_print_vi(struct virtual_item *vi)
210static void direct_print_vi (struct virtual_item * vi)
211{ 194{
212 reiserfs_warning (NULL, "DIRECT, index %d, type 0x%x, %h", 195 reiserfs_warning(NULL, "DIRECT, index %d, type 0x%x, %h",
213 vi->vi_index, vi->vi_type, vi->vi_ih); 196 vi->vi_index, vi->vi_type, vi->vi_ih);
214} 197}
215 198
216static struct item_operations direct_ops = { 199static struct item_operations direct_ops = {
217 .bytes_number = direct_bytes_number, 200 .bytes_number = direct_bytes_number,
218 .decrement_key = direct_decrement_key, 201 .decrement_key = direct_decrement_key,
219 .is_left_mergeable = direct_is_left_mergeable, 202 .is_left_mergeable = direct_is_left_mergeable,
220 .print_item = direct_print_item, 203 .print_item = direct_print_item,
221 .check_item = direct_check_item, 204 .check_item = direct_check_item,
222 205
223 .create_vi = direct_create_vi, 206 .create_vi = direct_create_vi,
224 .check_left = direct_check_left, 207 .check_left = direct_check_left,
225 .check_right = direct_check_right, 208 .check_right = direct_check_right,
226 .part_size = direct_part_size, 209 .part_size = direct_part_size,
227 .unit_num = direct_unit_num, 210 .unit_num = direct_unit_num,
228 .print_vi = direct_print_vi 211 .print_vi = direct_print_vi
229}; 212};
230 213
231
232
233////////////////////////////////////////////////////////////////////////////// 214//////////////////////////////////////////////////////////////////////////////
234// indirect item functions 215// indirect item functions
235// 216//
236 217
237static int indirect_bytes_number (struct item_head * ih, int block_size) 218static int indirect_bytes_number(struct item_head *ih, int block_size)
238{ 219{
239 return ih_item_len(ih) / UNFM_P_SIZE * block_size; //- get_ih_free_space (ih); 220 return ih_item_len(ih) / UNFM_P_SIZE * block_size; //- get_ih_free_space (ih);
240} 221}
241 222
242
243// decrease offset, if it becomes 0, change type to stat data 223// decrease offset, if it becomes 0, change type to stat data
244static void indirect_decrement_key (struct cpu_key * key) 224static void indirect_decrement_key(struct cpu_key *key)
245{ 225{
246 cpu_key_k_offset_dec (key); 226 cpu_key_k_offset_dec(key);
247 if (cpu_key_k_offset (key) == 0) 227 if (cpu_key_k_offset(key) == 0)
248 set_cpu_key_k_type (key, TYPE_STAT_DATA); 228 set_cpu_key_k_type(key, TYPE_STAT_DATA);
249} 229}
250 230
251
252// if it is not first item of the body, then it is mergeable 231// if it is not first item of the body, then it is mergeable
253static int indirect_is_left_mergeable (struct reiserfs_key * key, unsigned long bsize) 232static int indirect_is_left_mergeable(struct reiserfs_key *key,
233 unsigned long bsize)
254{ 234{
255 int version = le_key_version (key); 235 int version = le_key_version(key);
256 return (le_key_k_offset (version, key) != 1); 236 return (le_key_k_offset(version, key) != 1);
257} 237}
258 238
259
260// printing of indirect item 239// printing of indirect item
261static void start_new_sequence (__u32 * start, int * len, __u32 new) 240static void start_new_sequence(__u32 * start, int *len, __u32 new)
262{ 241{
263 *start = new; 242 *start = new;
264 *len = 1; 243 *len = 1;
265} 244}
266 245
267 246static int sequence_finished(__u32 start, int *len, __u32 new)
268static int sequence_finished (__u32 start, int * len, __u32 new)
269{ 247{
270 if (start == INT_MAX) 248 if (start == INT_MAX)
271 return 1; 249 return 1;
272 250
273 if (start == 0 && new == 0) { 251 if (start == 0 && new == 0) {
274 (*len) ++; 252 (*len)++;
275 return 0; 253 return 0;
276 } 254 }
277 if (start != 0 && (start + *len) == new) { 255 if (start != 0 && (start + *len) == new) {
278 (*len) ++; 256 (*len)++;
279 return 0; 257 return 0;
280 } 258 }
281 return 1; 259 return 1;
282} 260}
283 261
284static void print_sequence (__u32 start, int len) 262static void print_sequence(__u32 start, int len)
285{ 263{
286 if (start == INT_MAX) 264 if (start == INT_MAX)
287 return; 265 return;
288 266
289 if (len == 1) 267 if (len == 1)
290 printk (" %d", start); 268 printk(" %d", start);
291 else 269 else
292 printk (" %d(%d)", start, len); 270 printk(" %d(%d)", start, len);
293} 271}
294 272
295 273static void indirect_print_item(struct item_head *ih, char *item)
296static void indirect_print_item (struct item_head * ih, char * item)
297{ 274{
298 int j; 275 int j;
299 __le32 * unp; 276 __le32 *unp;
300 __u32 prev = INT_MAX; 277 __u32 prev = INT_MAX;
301 int num; 278 int num;
302 279
303 unp = (__le32 *)item; 280 unp = (__le32 *) item;
304 281
305 if (ih_item_len(ih) % UNFM_P_SIZE) 282 if (ih_item_len(ih) % UNFM_P_SIZE)
306 reiserfs_warning (NULL, "indirect_print_item: invalid item len"); 283 reiserfs_warning(NULL, "indirect_print_item: invalid item len");
307 284
308 printk ("%d pointers\n[ ", (int)I_UNFM_NUM (ih)); 285 printk("%d pointers\n[ ", (int)I_UNFM_NUM(ih));
309 for (j = 0; j < I_UNFM_NUM (ih); j ++) { 286 for (j = 0; j < I_UNFM_NUM(ih); j++) {
310 if (sequence_finished (prev, &num, get_block_num(unp, j))) { 287 if (sequence_finished(prev, &num, get_block_num(unp, j))) {
311 print_sequence (prev, num); 288 print_sequence(prev, num);
312 start_new_sequence (&prev, &num, get_block_num(unp, j)); 289 start_new_sequence(&prev, &num, get_block_num(unp, j));
290 }
313 } 291 }
314 } 292 print_sequence(prev, num);
315 print_sequence (prev, num); 293 printk("]\n");
316 printk ("]\n");
317} 294}
318 295
319static void indirect_check_item (struct item_head * ih, char * item) 296static void indirect_check_item(struct item_head *ih, char *item)
320{ 297{
321 // FIXME: type something here! 298 // FIXME: type something here!
322} 299}
323 300
324 301static int indirect_create_vi(struct virtual_node *vn,
325static int indirect_create_vi (struct virtual_node * vn, 302 struct virtual_item *vi,
326 struct virtual_item * vi, 303 int is_affected, int insert_size)
327 int is_affected,
328 int insert_size)
329{ 304{
330 vi->vi_index = TYPE_INDIRECT; 305 vi->vi_index = TYPE_INDIRECT;
331 //vi->vi_type |= VI_TYPE_INDIRECT; 306 //vi->vi_type |= VI_TYPE_INDIRECT;
332 return 0; 307 return 0;
333} 308}
334 309
335static int indirect_check_left (struct virtual_item * vi, int free, 310static int indirect_check_left(struct virtual_item *vi, int free,
336 int start_skip, int end_skip) 311 int start_skip, int end_skip)
337{ 312{
338 int bytes; 313 int bytes;
339 314
340 bytes = free - free % UNFM_P_SIZE; 315 bytes = free - free % UNFM_P_SIZE;
341 return bytes ?: -1; 316 return bytes ? : -1;
342} 317}
343 318
344 319static int indirect_check_right(struct virtual_item *vi, int free)
345static int indirect_check_right (struct virtual_item * vi, int free)
346{ 320{
347 return indirect_check_left (vi, free, 0, 0); 321 return indirect_check_left(vi, free, 0, 0);
348} 322}
349 323
350
351
352// return size in bytes of 'units' units. If first == 0 - calculate from the head (left), otherwise - from tail (right) 324// return size in bytes of 'units' units. If first == 0 - calculate from the head (left), otherwise - from tail (right)
353static int indirect_part_size (struct virtual_item * vi, int first, int units) 325static int indirect_part_size(struct virtual_item *vi, int first, int units)
354{ 326{
355 // unit of indirect item is byte (yet) 327 // unit of indirect item is byte (yet)
356 return units; 328 return units;
357} 329}
358 330
359static int indirect_unit_num (struct virtual_item * vi) 331static int indirect_unit_num(struct virtual_item *vi)
360{ 332{
361 // unit of indirect item is byte (yet) 333 // unit of indirect item is byte (yet)
362 return vi->vi_item_len - IH_SIZE; 334 return vi->vi_item_len - IH_SIZE;
363} 335}
364 336
365static void indirect_print_vi (struct virtual_item * vi) 337static void indirect_print_vi(struct virtual_item *vi)
366{ 338{
367 reiserfs_warning (NULL, "INDIRECT, index %d, type 0x%x, %h", 339 reiserfs_warning(NULL, "INDIRECT, index %d, type 0x%x, %h",
368 vi->vi_index, vi->vi_type, vi->vi_ih); 340 vi->vi_index, vi->vi_type, vi->vi_ih);
369} 341}
370 342
371static struct item_operations indirect_ops = { 343static struct item_operations indirect_ops = {
372 .bytes_number = indirect_bytes_number, 344 .bytes_number = indirect_bytes_number,
373 .decrement_key = indirect_decrement_key, 345 .decrement_key = indirect_decrement_key,
374 .is_left_mergeable = indirect_is_left_mergeable, 346 .is_left_mergeable = indirect_is_left_mergeable,
375 .print_item = indirect_print_item, 347 .print_item = indirect_print_item,
376 .check_item = indirect_check_item, 348 .check_item = indirect_check_item,
377 349
378 .create_vi = indirect_create_vi, 350 .create_vi = indirect_create_vi,
379 .check_left = indirect_check_left, 351 .check_left = indirect_check_left,
380 .check_right = indirect_check_right, 352 .check_right = indirect_check_right,
381 .part_size = indirect_part_size, 353 .part_size = indirect_part_size,
382 .unit_num = indirect_unit_num, 354 .unit_num = indirect_unit_num,
383 .print_vi = indirect_print_vi 355 .print_vi = indirect_print_vi
384}; 356};
385 357
386
387////////////////////////////////////////////////////////////////////////////// 358//////////////////////////////////////////////////////////////////////////////
388// direntry functions 359// direntry functions
389// 360//
390 361
391 362static int direntry_bytes_number(struct item_head *ih, int block_size)
392static int direntry_bytes_number (struct item_head * ih, int block_size)
393{ 363{
394 reiserfs_warning (NULL, "vs-16090: direntry_bytes_number: " 364 reiserfs_warning(NULL, "vs-16090: direntry_bytes_number: "
395 "bytes number is asked for direntry"); 365 "bytes number is asked for direntry");
396 return 0; 366 return 0;
397}
398
399static void direntry_decrement_key (struct cpu_key * key)
400{
401 cpu_key_k_offset_dec (key);
402 if (cpu_key_k_offset (key) == 0)
403 set_cpu_key_k_type (key, TYPE_STAT_DATA);
404} 367}
405 368
406 369static void direntry_decrement_key(struct cpu_key *key)
407static int direntry_is_left_mergeable (struct reiserfs_key * key, unsigned long bsize)
408{ 370{
409 if (le32_to_cpu (key->u.k_offset_v1.k_offset) == DOT_OFFSET) 371 cpu_key_k_offset_dec(key);
410 return 0; 372 if (cpu_key_k_offset(key) == 0)
411 return 1; 373 set_cpu_key_k_type(key, TYPE_STAT_DATA);
412
413} 374}
414 375
415 376static int direntry_is_left_mergeable(struct reiserfs_key *key,
416static void direntry_print_item (struct item_head * ih, char * item) 377 unsigned long bsize)
417{ 378{
418 int i; 379 if (le32_to_cpu(key->u.k_offset_v1.k_offset) == DOT_OFFSET)
419 int namelen; 380 return 0;
420 struct reiserfs_de_head * deh; 381 return 1;
421 char * name;
422 static char namebuf [80];
423
424
425 printk ("\n # %-15s%-30s%-15s%-15s%-15s\n", "Name", "Key of pointed object", "Hash", "Gen number", "Status");
426 382
427 deh = (struct reiserfs_de_head *)item; 383}
428 384
429 for (i = 0; i < I_ENTRY_COUNT (ih); i ++, deh ++) { 385static void direntry_print_item(struct item_head *ih, char *item)
430 namelen = (i ? (deh_location(deh - 1)) : ih_item_len(ih)) - deh_location(deh); 386{
431 name = item + deh_location(deh); 387 int i;
432 if (name[namelen-1] == 0) 388 int namelen;
433 namelen = strlen (name); 389 struct reiserfs_de_head *deh;
434 namebuf[0] = '"'; 390 char *name;
435 if (namelen > sizeof (namebuf) - 3) { 391 static char namebuf[80];
436 strncpy (namebuf + 1, name, sizeof (namebuf) - 3); 392
437 namebuf[sizeof (namebuf) - 2] = '"'; 393 printk("\n # %-15s%-30s%-15s%-15s%-15s\n", "Name",
438 namebuf[sizeof (namebuf) - 1] = 0; 394 "Key of pointed object", "Hash", "Gen number", "Status");
439 } else { 395
440 memcpy (namebuf + 1, name, namelen); 396 deh = (struct reiserfs_de_head *)item;
441 namebuf[namelen + 1] = '"'; 397
442 namebuf[namelen + 2] = 0; 398 for (i = 0; i < I_ENTRY_COUNT(ih); i++, deh++) {
399 namelen =
400 (i ? (deh_location(deh - 1)) : ih_item_len(ih)) -
401 deh_location(deh);
402 name = item + deh_location(deh);
403 if (name[namelen - 1] == 0)
404 namelen = strlen(name);
405 namebuf[0] = '"';
406 if (namelen > sizeof(namebuf) - 3) {
407 strncpy(namebuf + 1, name, sizeof(namebuf) - 3);
408 namebuf[sizeof(namebuf) - 2] = '"';
409 namebuf[sizeof(namebuf) - 1] = 0;
410 } else {
411 memcpy(namebuf + 1, name, namelen);
412 namebuf[namelen + 1] = '"';
413 namebuf[namelen + 2] = 0;
414 }
415
416 printk("%d: %-15s%-15d%-15d%-15Ld%-15Ld(%s)\n",
417 i, namebuf,
418 deh_dir_id(deh), deh_objectid(deh),
419 GET_HASH_VALUE(deh_offset(deh)),
420 GET_GENERATION_NUMBER((deh_offset(deh))),
421 (de_hidden(deh)) ? "HIDDEN" : "VISIBLE");
443 } 422 }
444
445 printk ("%d: %-15s%-15d%-15d%-15Ld%-15Ld(%s)\n",
446 i, namebuf,
447 deh_dir_id(deh), deh_objectid(deh),
448 GET_HASH_VALUE (deh_offset (deh)), GET_GENERATION_NUMBER ((deh_offset (deh))),
449 (de_hidden (deh)) ? "HIDDEN" : "VISIBLE");
450 }
451} 423}
452 424
453 425static void direntry_check_item(struct item_head *ih, char *item)
454static void direntry_check_item (struct item_head * ih, char * item)
455{ 426{
456 int i; 427 int i;
457 struct reiserfs_de_head * deh; 428 struct reiserfs_de_head *deh;
458 429
459 // FIXME: type something here! 430 // FIXME: type something here!
460 deh = (struct reiserfs_de_head *)item; 431 deh = (struct reiserfs_de_head *)item;
461 for (i = 0; i < I_ENTRY_COUNT (ih); i ++, deh ++) { 432 for (i = 0; i < I_ENTRY_COUNT(ih); i++, deh++) {
462 ; 433 ;
463 } 434 }
464} 435}
465 436
466
467
468#define DIRENTRY_VI_FIRST_DIRENTRY_ITEM 1 437#define DIRENTRY_VI_FIRST_DIRENTRY_ITEM 1
469 438
470/* 439/*
471 * function returns old entry number in directory item in real node 440 * function returns old entry number in directory item in real node
472 * using new entry number in virtual item in virtual node */ 441 * using new entry number in virtual item in virtual node */
473static inline int old_entry_num (int is_affected, int virtual_entry_num, int pos_in_item, int mode) 442static inline int old_entry_num(int is_affected, int virtual_entry_num,
443 int pos_in_item, int mode)
474{ 444{
475 if ( mode == M_INSERT || mode == M_DELETE) 445 if (mode == M_INSERT || mode == M_DELETE)
476 return virtual_entry_num; 446 return virtual_entry_num;
477
478 if (!is_affected)
479 /* cut or paste is applied to another item */
480 return virtual_entry_num;
481
482 if (virtual_entry_num < pos_in_item)
483 return virtual_entry_num;
484 447
485 if (mode == M_CUT) 448 if (!is_affected)
486 return virtual_entry_num + 1; 449 /* cut or paste is applied to another item */
450 return virtual_entry_num;
487 451
488 RFALSE( mode != M_PASTE || virtual_entry_num == 0, 452 if (virtual_entry_num < pos_in_item)
489 "vs-8015: old_entry_num: mode must be M_PASTE (mode = \'%c\'", mode); 453 return virtual_entry_num;
490
491 return virtual_entry_num - 1;
492}
493 454
455 if (mode == M_CUT)
456 return virtual_entry_num + 1;
494 457
458 RFALSE(mode != M_PASTE || virtual_entry_num == 0,
459 "vs-8015: old_entry_num: mode must be M_PASTE (mode = \'%c\'",
460 mode);
495 461
462 return virtual_entry_num - 1;
463}
496 464
497/* Create an array of sizes of directory entries for virtual 465/* Create an array of sizes of directory entries for virtual
498 item. Return space used by an item. FIXME: no control over 466 item. Return space used by an item. FIXME: no control over
499 consuming of space used by this item handler */ 467 consuming of space used by this item handler */
500static int direntry_create_vi (struct virtual_node * vn, 468static int direntry_create_vi(struct virtual_node *vn,
501 struct virtual_item * vi, 469 struct virtual_item *vi,
502 int is_affected, 470 int is_affected, int insert_size)
503 int insert_size) 471{
504{ 472 struct direntry_uarea *dir_u = vi->vi_uarea;
505 struct direntry_uarea * dir_u = vi->vi_uarea; 473 int i, j;
506 int i, j; 474 int size = sizeof(struct direntry_uarea);
507 int size = sizeof (struct direntry_uarea); 475 struct reiserfs_de_head *deh;
508 struct reiserfs_de_head * deh;
509
510 vi->vi_index = TYPE_DIRENTRY;
511
512 if (!(vi->vi_ih) || !vi->vi_item)
513 BUG ();
514
515
516 dir_u->flags = 0;
517 if (le_ih_k_offset (vi->vi_ih) == DOT_OFFSET)
518 dir_u->flags |= DIRENTRY_VI_FIRST_DIRENTRY_ITEM;
519
520 deh = (struct reiserfs_de_head *)(vi->vi_item);
521
522
523 /* virtual directory item have this amount of entry after */
524 dir_u->entry_count = ih_entry_count (vi->vi_ih) +
525 ((is_affected) ? ((vn->vn_mode == M_CUT) ? -1 :
526 (vn->vn_mode == M_PASTE ? 1 : 0)) : 0);
527
528 for (i = 0; i < dir_u->entry_count; i ++) {
529 j = old_entry_num (is_affected, i, vn->vn_pos_in_item, vn->vn_mode);
530 dir_u->entry_sizes[i] = (j ? deh_location( &(deh[j - 1]) ) :
531 ih_item_len (vi->vi_ih)) -
532 deh_location( &(deh[j])) + DEH_SIZE;
533 }
534
535 size += (dir_u->entry_count * sizeof (short));
536
537 /* set size of pasted entry */
538 if (is_affected && vn->vn_mode == M_PASTE)
539 dir_u->entry_sizes[vn->vn_pos_in_item] = insert_size;
540 476
477 vi->vi_index = TYPE_DIRENTRY;
478
479 if (!(vi->vi_ih) || !vi->vi_item)
480 BUG();
481
482 dir_u->flags = 0;
483 if (le_ih_k_offset(vi->vi_ih) == DOT_OFFSET)
484 dir_u->flags |= DIRENTRY_VI_FIRST_DIRENTRY_ITEM;
485
486 deh = (struct reiserfs_de_head *)(vi->vi_item);
487
488 /* virtual directory item have this amount of entry after */
489 dir_u->entry_count = ih_entry_count(vi->vi_ih) +
490 ((is_affected) ? ((vn->vn_mode == M_CUT) ? -1 :
491 (vn->vn_mode == M_PASTE ? 1 : 0)) : 0);
492
493 for (i = 0; i < dir_u->entry_count; i++) {
494 j = old_entry_num(is_affected, i, vn->vn_pos_in_item,
495 vn->vn_mode);
496 dir_u->entry_sizes[i] =
497 (j ? deh_location(&(deh[j - 1])) : ih_item_len(vi->vi_ih)) -
498 deh_location(&(deh[j])) + DEH_SIZE;
499 }
500
501 size += (dir_u->entry_count * sizeof(short));
502
503 /* set size of pasted entry */
504 if (is_affected && vn->vn_mode == M_PASTE)
505 dir_u->entry_sizes[vn->vn_pos_in_item] = insert_size;
541 506
542#ifdef CONFIG_REISERFS_CHECK 507#ifdef CONFIG_REISERFS_CHECK
543 /* compare total size of entries with item length */ 508 /* compare total size of entries with item length */
544 { 509 {
545 int k, l; 510 int k, l;
546 511
547 l = 0; 512 l = 0;
548 for (k = 0; k < dir_u->entry_count; k ++) 513 for (k = 0; k < dir_u->entry_count; k++)
549 l += dir_u->entry_sizes[k]; 514 l += dir_u->entry_sizes[k];
550 515
551 if (l + IH_SIZE != vi->vi_item_len + 516 if (l + IH_SIZE != vi->vi_item_len +
552 ((is_affected && (vn->vn_mode == M_PASTE || vn->vn_mode == M_CUT)) ? insert_size : 0) ) { 517 ((is_affected
553 reiserfs_panic (NULL, "vs-8025: set_entry_sizes: (mode==%c, insert_size==%d), invalid length of directory item", 518 && (vn->vn_mode == M_PASTE
554 vn->vn_mode, insert_size); 519 || vn->vn_mode == M_CUT)) ? insert_size : 0)) {
520 reiserfs_panic(NULL,
521 "vs-8025: set_entry_sizes: (mode==%c, insert_size==%d), invalid length of directory item",
522 vn->vn_mode, insert_size);
523 }
555 } 524 }
556 }
557#endif 525#endif
558 526
559 return size; 527 return size;
560
561 528
562} 529}
563 530
564
565// 531//
566// return number of entries which may fit into specified amount of 532// return number of entries which may fit into specified amount of
567// free space, or -1 if free space is not enough even for 1 entry 533// free space, or -1 if free space is not enough even for 1 entry
568// 534//
569static int direntry_check_left (struct virtual_item * vi, int free, 535static int direntry_check_left(struct virtual_item *vi, int free,
570 int start_skip, int end_skip) 536 int start_skip, int end_skip)
571{ 537{
572 int i; 538 int i;
573 int entries = 0; 539 int entries = 0;
574 struct direntry_uarea * dir_u = vi->vi_uarea; 540 struct direntry_uarea *dir_u = vi->vi_uarea;
575 541
576 for (i = start_skip; i < dir_u->entry_count - end_skip; i ++) { 542 for (i = start_skip; i < dir_u->entry_count - end_skip; i++) {
577 if (dir_u->entry_sizes[i] > free) 543 if (dir_u->entry_sizes[i] > free)
578 /* i-th entry doesn't fit into the remaining free space */ 544 /* i-th entry doesn't fit into the remaining free space */
579 break; 545 break;
580
581 free -= dir_u->entry_sizes[i];
582 entries ++;
583 }
584 546
585 if (entries == dir_u->entry_count) { 547 free -= dir_u->entry_sizes[i];
586 reiserfs_panic (NULL, "free space %d, entry_count %d\n", free, dir_u->entry_count); 548 entries++;
587 } 549 }
588 550
589 /* "." and ".." can not be separated from each other */ 551 if (entries == dir_u->entry_count) {
590 if (start_skip == 0 && (dir_u->flags & DIRENTRY_VI_FIRST_DIRENTRY_ITEM) && entries < 2) 552 reiserfs_panic(NULL, "free space %d, entry_count %d\n", free,
591 entries = 0; 553 dir_u->entry_count);
592 554 }
593 return entries ?: -1;
594}
595 555
556 /* "." and ".." can not be separated from each other */
557 if (start_skip == 0 && (dir_u->flags & DIRENTRY_VI_FIRST_DIRENTRY_ITEM)
558 && entries < 2)
559 entries = 0;
596 560
597static int direntry_check_right (struct virtual_item * vi, int free) 561 return entries ? : -1;
562}
563
564static int direntry_check_right(struct virtual_item *vi, int free)
598{ 565{
599 int i; 566 int i;
600 int entries = 0; 567 int entries = 0;
601 struct direntry_uarea * dir_u = vi->vi_uarea; 568 struct direntry_uarea *dir_u = vi->vi_uarea;
602
603 for (i = dir_u->entry_count - 1; i >= 0; i --) {
604 if (dir_u->entry_sizes[i] > free)
605 /* i-th entry doesn't fit into the remaining free space */
606 break;
607
608 free -= dir_u->entry_sizes[i];
609 entries ++;
610 }
611 if (entries == dir_u->entry_count)
612 BUG ();
613 569
614 /* "." and ".." can not be separated from each other */ 570 for (i = dir_u->entry_count - 1; i >= 0; i--) {
615 if ((dir_u->flags & DIRENTRY_VI_FIRST_DIRENTRY_ITEM) && entries > dir_u->entry_count - 2) 571 if (dir_u->entry_sizes[i] > free)
616 entries = dir_u->entry_count - 2; 572 /* i-th entry doesn't fit into the remaining free space */
573 break;
617 574
618 return entries ?: -1; 575 free -= dir_u->entry_sizes[i];
619} 576 entries++;
577 }
578 if (entries == dir_u->entry_count)
579 BUG();
620 580
581 /* "." and ".." can not be separated from each other */
582 if ((dir_u->flags & DIRENTRY_VI_FIRST_DIRENTRY_ITEM)
583 && entries > dir_u->entry_count - 2)
584 entries = dir_u->entry_count - 2;
585
586 return entries ? : -1;
587}
621 588
622/* sum of entry sizes between from-th and to-th entries including both edges */ 589/* sum of entry sizes between from-th and to-th entries including both edges */
623static int direntry_part_size (struct virtual_item * vi, int first, int count) 590static int direntry_part_size(struct virtual_item *vi, int first, int count)
624{ 591{
625 int i, retval; 592 int i, retval;
626 int from, to; 593 int from, to;
627 struct direntry_uarea * dir_u = vi->vi_uarea; 594 struct direntry_uarea *dir_u = vi->vi_uarea;
628
629 retval = 0;
630 if (first == 0)
631 from = 0;
632 else
633 from = dir_u->entry_count - count;
634 to = from + count - 1;
635 595
636 for (i = from; i <= to; i ++) 596 retval = 0;
637 retval += dir_u->entry_sizes[i]; 597 if (first == 0)
598 from = 0;
599 else
600 from = dir_u->entry_count - count;
601 to = from + count - 1;
638 602
639 return retval; 603 for (i = from; i <= to; i++)
640} 604 retval += dir_u->entry_sizes[i];
641 605
642static int direntry_unit_num (struct virtual_item * vi) 606 return retval;
643{
644 struct direntry_uarea * dir_u = vi->vi_uarea;
645
646 return dir_u->entry_count;
647} 607}
648 608
609static int direntry_unit_num(struct virtual_item *vi)
610{
611 struct direntry_uarea *dir_u = vi->vi_uarea;
649 612
613 return dir_u->entry_count;
614}
650 615
651static void direntry_print_vi (struct virtual_item * vi) 616static void direntry_print_vi(struct virtual_item *vi)
652{ 617{
653 int i; 618 int i;
654 struct direntry_uarea * dir_u = vi->vi_uarea; 619 struct direntry_uarea *dir_u = vi->vi_uarea;
655 620
656 reiserfs_warning (NULL, "DIRENTRY, index %d, type 0x%x, %h, flags 0x%x", 621 reiserfs_warning(NULL, "DIRENTRY, index %d, type 0x%x, %h, flags 0x%x",
657 vi->vi_index, vi->vi_type, vi->vi_ih, dir_u->flags); 622 vi->vi_index, vi->vi_type, vi->vi_ih, dir_u->flags);
658 printk ("%d entries: ", dir_u->entry_count); 623 printk("%d entries: ", dir_u->entry_count);
659 for (i = 0; i < dir_u->entry_count; i ++) 624 for (i = 0; i < dir_u->entry_count; i++)
660 printk ("%d ", dir_u->entry_sizes[i]); 625 printk("%d ", dir_u->entry_sizes[i]);
661 printk ("\n"); 626 printk("\n");
662} 627}
663 628
664static struct item_operations direntry_ops = { 629static struct item_operations direntry_ops = {
665 .bytes_number = direntry_bytes_number, 630 .bytes_number = direntry_bytes_number,
666 .decrement_key = direntry_decrement_key, 631 .decrement_key = direntry_decrement_key,
667 .is_left_mergeable = direntry_is_left_mergeable, 632 .is_left_mergeable = direntry_is_left_mergeable,
668 .print_item = direntry_print_item, 633 .print_item = direntry_print_item,
669 .check_item = direntry_check_item, 634 .check_item = direntry_check_item,
670 635
671 .create_vi = direntry_create_vi, 636 .create_vi = direntry_create_vi,
672 .check_left = direntry_check_left, 637 .check_left = direntry_check_left,
673 .check_right = direntry_check_right, 638 .check_right = direntry_check_right,
674 .part_size = direntry_part_size, 639 .part_size = direntry_part_size,
675 .unit_num = direntry_unit_num, 640 .unit_num = direntry_unit_num,
676 .print_vi = direntry_print_vi 641 .print_vi = direntry_print_vi
677}; 642};
678 643
679
680////////////////////////////////////////////////////////////////////////////// 644//////////////////////////////////////////////////////////////////////////////
681// Error catching functions to catch errors caused by incorrect item types. 645// Error catching functions to catch errors caused by incorrect item types.
682// 646//
683static int errcatch_bytes_number (struct item_head * ih, int block_size) 647static int errcatch_bytes_number(struct item_head *ih, int block_size)
684{ 648{
685 reiserfs_warning (NULL, "green-16001: Invalid item type observed, run fsck ASAP"); 649 reiserfs_warning(NULL,
686 return 0; 650 "green-16001: Invalid item type observed, run fsck ASAP");
651 return 0;
687} 652}
688 653
689static void errcatch_decrement_key (struct cpu_key * key) 654static void errcatch_decrement_key(struct cpu_key *key)
690{ 655{
691 reiserfs_warning (NULL, "green-16002: Invalid item type observed, run fsck ASAP"); 656 reiserfs_warning(NULL,
657 "green-16002: Invalid item type observed, run fsck ASAP");
692} 658}
693 659
694 660static int errcatch_is_left_mergeable(struct reiserfs_key *key,
695static int errcatch_is_left_mergeable (struct reiserfs_key * key, unsigned long bsize) 661 unsigned long bsize)
696{ 662{
697 reiserfs_warning (NULL, "green-16003: Invalid item type observed, run fsck ASAP"); 663 reiserfs_warning(NULL,
698 return 0; 664 "green-16003: Invalid item type observed, run fsck ASAP");
665 return 0;
699} 666}
700 667
701 668static void errcatch_print_item(struct item_head *ih, char *item)
702static void errcatch_print_item (struct item_head * ih, char * item)
703{ 669{
704 reiserfs_warning (NULL, "green-16004: Invalid item type observed, run fsck ASAP"); 670 reiserfs_warning(NULL,
671 "green-16004: Invalid item type observed, run fsck ASAP");
705} 672}
706 673
707 674static void errcatch_check_item(struct item_head *ih, char *item)
708static void errcatch_check_item (struct item_head * ih, char * item)
709{ 675{
710 reiserfs_warning (NULL, "green-16005: Invalid item type observed, run fsck ASAP"); 676 reiserfs_warning(NULL,
677 "green-16005: Invalid item type observed, run fsck ASAP");
711} 678}
712 679
713static int errcatch_create_vi (struct virtual_node * vn, 680static int errcatch_create_vi(struct virtual_node *vn,
714 struct virtual_item * vi, 681 struct virtual_item *vi,
715 int is_affected, 682 int is_affected, int insert_size)
716 int insert_size)
717{ 683{
718 reiserfs_warning (NULL, "green-16006: Invalid item type observed, run fsck ASAP"); 684 reiserfs_warning(NULL,
719 return 0; // We might return -1 here as well, but it won't help as create_virtual_node() from where 685 "green-16006: Invalid item type observed, run fsck ASAP");
720 // this operation is called from is of return type void. 686 return 0; // We might return -1 here as well, but it won't help as create_virtual_node() from where
687 // this operation is called from is of return type void.
721} 688}
722 689
723static int errcatch_check_left (struct virtual_item * vi, int free, 690static int errcatch_check_left(struct virtual_item *vi, int free,
724 int start_skip, int end_skip) 691 int start_skip, int end_skip)
725{ 692{
726 reiserfs_warning (NULL, "green-16007: Invalid item type observed, run fsck ASAP"); 693 reiserfs_warning(NULL,
727 return -1; 694 "green-16007: Invalid item type observed, run fsck ASAP");
695 return -1;
728} 696}
729 697
730 698static int errcatch_check_right(struct virtual_item *vi, int free)
731static int errcatch_check_right (struct virtual_item * vi, int free)
732{ 699{
733 reiserfs_warning (NULL, "green-16008: Invalid item type observed, run fsck ASAP"); 700 reiserfs_warning(NULL,
734 return -1; 701 "green-16008: Invalid item type observed, run fsck ASAP");
702 return -1;
735} 703}
736 704
737static int errcatch_part_size (struct virtual_item * vi, int first, int count) 705static int errcatch_part_size(struct virtual_item *vi, int first, int count)
738{ 706{
739 reiserfs_warning (NULL, "green-16009: Invalid item type observed, run fsck ASAP"); 707 reiserfs_warning(NULL,
740 return 0; 708 "green-16009: Invalid item type observed, run fsck ASAP");
709 return 0;
741} 710}
742 711
743static int errcatch_unit_num (struct virtual_item * vi) 712static int errcatch_unit_num(struct virtual_item *vi)
744{ 713{
745 reiserfs_warning (NULL, "green-16010: Invalid item type observed, run fsck ASAP"); 714 reiserfs_warning(NULL,
746 return 0; 715 "green-16010: Invalid item type observed, run fsck ASAP");
716 return 0;
747} 717}
748 718
749static void errcatch_print_vi (struct virtual_item * vi) 719static void errcatch_print_vi(struct virtual_item *vi)
750{ 720{
751 reiserfs_warning (NULL, "green-16011: Invalid item type observed, run fsck ASAP"); 721 reiserfs_warning(NULL,
722 "green-16011: Invalid item type observed, run fsck ASAP");
752} 723}
753 724
754static struct item_operations errcatch_ops = { 725static struct item_operations errcatch_ops = {
755 errcatch_bytes_number, 726 errcatch_bytes_number,
756 errcatch_decrement_key, 727 errcatch_decrement_key,
757 errcatch_is_left_mergeable, 728 errcatch_is_left_mergeable,
758 errcatch_print_item, 729 errcatch_print_item,
759 errcatch_check_item, 730 errcatch_check_item,
760 731
761 errcatch_create_vi, 732 errcatch_create_vi,
762 errcatch_check_left, 733 errcatch_check_left,
763 errcatch_check_right, 734 errcatch_check_right,
764 errcatch_part_size, 735 errcatch_part_size,
765 errcatch_unit_num, 736 errcatch_unit_num,
766 errcatch_print_vi 737 errcatch_print_vi
767}; 738};
768 739
769
770
771////////////////////////////////////////////////////////////////////////////// 740//////////////////////////////////////////////////////////////////////////////
772// 741//
773// 742//
774#if ! (TYPE_STAT_DATA == 0 && TYPE_INDIRECT == 1 && TYPE_DIRECT == 2 && TYPE_DIRENTRY == 3) 743#if ! (TYPE_STAT_DATA == 0 && TYPE_INDIRECT == 1 && TYPE_DIRECT == 2 && TYPE_DIRENTRY == 3)
775 do not compile 744#error Item types must use disk-format assigned values.
776#endif 745#endif
777 746
778struct item_operations * item_ops [TYPE_ANY + 1] = { 747struct item_operations *item_ops[TYPE_ANY + 1] = {
779 &stat_data_ops, 748 &stat_data_ops,
780 &indirect_ops, 749 &indirect_ops,
781 &direct_ops, 750 &direct_ops,
782 &direntry_ops, 751 &direntry_ops,
783 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 752 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
784 &errcatch_ops /* This is to catch errors with invalid type (15th entry for TYPE_ANY) */ 753 &errcatch_ops /* This is to catch errors with invalid type (15th entry for TYPE_ANY) */
785}; 754};
786
787
788
789
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 3072cfdee959..ca7989b04be3 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -55,7 +55,6 @@
55#include <linux/writeback.h> 55#include <linux/writeback.h>
56#include <linux/blkdev.h> 56#include <linux/blkdev.h>
57 57
58
59/* gets a struct reiserfs_journal_list * from a list head */ 58/* gets a struct reiserfs_journal_list * from a list head */
60#define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \ 59#define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
61 j_list)) 60 j_list))
@@ -69,55 +68,61 @@ static int reiserfs_mounted_fs_count;
69 68
70static struct workqueue_struct *commit_wq; 69static struct workqueue_struct *commit_wq;
71 70
72#define JOURNAL_TRANS_HALF 1018 /* must be correct to keep the desc and commit 71#define JOURNAL_TRANS_HALF 1018 /* must be correct to keep the desc and commit
73 structs at 4k */ 72 structs at 4k */
74#define BUFNR 64 /*read ahead */ 73#define BUFNR 64 /*read ahead */
75 74
76/* cnode stat bits. Move these into reiserfs_fs.h */ 75/* cnode stat bits. Move these into reiserfs_fs.h */
77 76
78#define BLOCK_FREED 2 /* this block was freed, and can't be written. */ 77#define BLOCK_FREED 2 /* this block was freed, and can't be written. */
79#define BLOCK_FREED_HOLDER 3 /* this block was freed during this transaction, and can't be written */ 78#define BLOCK_FREED_HOLDER 3 /* this block was freed during this transaction, and can't be written */
80 79
81#define BLOCK_NEEDS_FLUSH 4 /* used in flush_journal_list */ 80#define BLOCK_NEEDS_FLUSH 4 /* used in flush_journal_list */
82#define BLOCK_DIRTIED 5 81#define BLOCK_DIRTIED 5
83 82
84
85/* journal list state bits */ 83/* journal list state bits */
86#define LIST_TOUCHED 1 84#define LIST_TOUCHED 1
87#define LIST_DIRTY 2 85#define LIST_DIRTY 2
88#define LIST_COMMIT_PENDING 4 /* someone will commit this list */ 86#define LIST_COMMIT_PENDING 4 /* someone will commit this list */
89 87
90/* flags for do_journal_end */ 88/* flags for do_journal_end */
91#define FLUSH_ALL 1 /* flush commit and real blocks */ 89#define FLUSH_ALL 1 /* flush commit and real blocks */
92#define COMMIT_NOW 2 /* end and commit this transaction */ 90#define COMMIT_NOW 2 /* end and commit this transaction */
93#define WAIT 4 /* wait for the log blocks to hit the disk*/ 91#define WAIT 4 /* wait for the log blocks to hit the disk */
94 92
95static int do_journal_end(struct reiserfs_transaction_handle *,struct super_block *,unsigned long nblocks,int flags) ; 93static int do_journal_end(struct reiserfs_transaction_handle *,
96static int flush_journal_list(struct super_block *s, struct reiserfs_journal_list *jl, int flushall) ; 94 struct super_block *, unsigned long nblocks,
97static int flush_commit_list(struct super_block *s, struct reiserfs_journal_list *jl, int flushall) ; 95 int flags);
98static int can_dirty(struct reiserfs_journal_cnode *cn) ; 96static int flush_journal_list(struct super_block *s,
99static int journal_join(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, unsigned long nblocks); 97 struct reiserfs_journal_list *jl, int flushall);
100static int release_journal_dev( struct super_block *super, 98static int flush_commit_list(struct super_block *s,
101 struct reiserfs_journal *journal ); 99 struct reiserfs_journal_list *jl, int flushall);
100static int can_dirty(struct reiserfs_journal_cnode *cn);
101static int journal_join(struct reiserfs_transaction_handle *th,
102 struct super_block *p_s_sb, unsigned long nblocks);
103static int release_journal_dev(struct super_block *super,
104 struct reiserfs_journal *journal);
102static int dirty_one_transaction(struct super_block *s, 105static int dirty_one_transaction(struct super_block *s,
103 struct reiserfs_journal_list *jl); 106 struct reiserfs_journal_list *jl);
104static void flush_async_commits(void *p); 107static void flush_async_commits(void *p);
105static void queue_log_writer(struct super_block *s); 108static void queue_log_writer(struct super_block *s);
106 109
107/* values for join in do_journal_begin_r */ 110/* values for join in do_journal_begin_r */
108enum { 111enum {
109 JBEGIN_REG = 0, /* regular journal begin */ 112 JBEGIN_REG = 0, /* regular journal begin */
110 JBEGIN_JOIN = 1, /* join the running transaction if at all possible */ 113 JBEGIN_JOIN = 1, /* join the running transaction if at all possible */
111 JBEGIN_ABORT = 2, /* called from cleanup code, ignores aborted flag */ 114 JBEGIN_ABORT = 2, /* called from cleanup code, ignores aborted flag */
112}; 115};
113 116
114static int do_journal_begin_r(struct reiserfs_transaction_handle *th, 117static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
115 struct super_block * p_s_sb, 118 struct super_block *p_s_sb,
116 unsigned long nblocks,int join); 119 unsigned long nblocks, int join);
117 120
118static void init_journal_hash(struct super_block *p_s_sb) { 121static void init_journal_hash(struct super_block *p_s_sb)
119 struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); 122{
120 memset(journal->j_hash_table, 0, JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *)) ; 123 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
124 memset(journal->j_hash_table, 0,
125 JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *));
121} 126}
122 127
123/* 128/*
@@ -125,149 +130,159 @@ static void init_journal_hash(struct super_block *p_s_sb) {
125** make schedule happen after I've freed a block. Look at remove_from_transaction and journal_mark_freed for 130** make schedule happen after I've freed a block. Look at remove_from_transaction and journal_mark_freed for
126** more details. 131** more details.
127*/ 132*/
128static int reiserfs_clean_and_file_buffer(struct buffer_head *bh) { 133static int reiserfs_clean_and_file_buffer(struct buffer_head *bh)
129 if (bh) { 134{
130 clear_buffer_dirty(bh); 135 if (bh) {
131 clear_buffer_journal_test(bh); 136 clear_buffer_dirty(bh);
132 } 137 clear_buffer_journal_test(bh);
133 return 0 ; 138 }
139 return 0;
134} 140}
135 141
136static void disable_barrier(struct super_block *s) 142static void disable_barrier(struct super_block *s)
137{ 143{
138 REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_BARRIER_FLUSH); 144 REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_BARRIER_FLUSH);
139 printk("reiserfs: disabling flush barriers on %s\n", reiserfs_bdevname(s)); 145 printk("reiserfs: disabling flush barriers on %s\n",
140} 146 reiserfs_bdevname(s));
141 147}
142static struct reiserfs_bitmap_node * 148
143allocate_bitmap_node(struct super_block *p_s_sb) { 149static struct reiserfs_bitmap_node *allocate_bitmap_node(struct super_block
144 struct reiserfs_bitmap_node *bn ; 150 *p_s_sb)
145 static int id; 151{
146 152 struct reiserfs_bitmap_node *bn;
147 bn = reiserfs_kmalloc(sizeof(struct reiserfs_bitmap_node), GFP_NOFS, p_s_sb) ; 153 static int id;
148 if (!bn) { 154
149 return NULL ; 155 bn = reiserfs_kmalloc(sizeof(struct reiserfs_bitmap_node), GFP_NOFS,
150 } 156 p_s_sb);
151 bn->data = reiserfs_kmalloc(p_s_sb->s_blocksize, GFP_NOFS, p_s_sb) ; 157 if (!bn) {
152 if (!bn->data) { 158 return NULL;
153 reiserfs_kfree(bn, sizeof(struct reiserfs_bitmap_node), p_s_sb) ; 159 }
154 return NULL ; 160 bn->data = reiserfs_kmalloc(p_s_sb->s_blocksize, GFP_NOFS, p_s_sb);
155 } 161 if (!bn->data) {
156 bn->id = id++ ; 162 reiserfs_kfree(bn, sizeof(struct reiserfs_bitmap_node), p_s_sb);
157 memset(bn->data, 0, p_s_sb->s_blocksize) ; 163 return NULL;
158 INIT_LIST_HEAD(&bn->list) ; 164 }
159 return bn ; 165 bn->id = id++;
160} 166 memset(bn->data, 0, p_s_sb->s_blocksize);
161 167 INIT_LIST_HEAD(&bn->list);
162static struct reiserfs_bitmap_node * 168 return bn;
163get_bitmap_node(struct super_block *p_s_sb) { 169}
164 struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); 170
165 struct reiserfs_bitmap_node *bn = NULL; 171static struct reiserfs_bitmap_node *get_bitmap_node(struct super_block *p_s_sb)
166 struct list_head *entry = journal->j_bitmap_nodes.next ; 172{
167 173 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
168 journal->j_used_bitmap_nodes++ ; 174 struct reiserfs_bitmap_node *bn = NULL;
169repeat: 175 struct list_head *entry = journal->j_bitmap_nodes.next;
170 176
171 if(entry != &journal->j_bitmap_nodes) { 177 journal->j_used_bitmap_nodes++;
172 bn = list_entry(entry, struct reiserfs_bitmap_node, list) ; 178 repeat:
173 list_del(entry) ; 179
174 memset(bn->data, 0, p_s_sb->s_blocksize) ; 180 if (entry != &journal->j_bitmap_nodes) {
175 journal->j_free_bitmap_nodes-- ; 181 bn = list_entry(entry, struct reiserfs_bitmap_node, list);
176 return bn ; 182 list_del(entry);
177 } 183 memset(bn->data, 0, p_s_sb->s_blocksize);
178 bn = allocate_bitmap_node(p_s_sb) ; 184 journal->j_free_bitmap_nodes--;
179 if (!bn) { 185 return bn;
180 yield(); 186 }
181 goto repeat ; 187 bn = allocate_bitmap_node(p_s_sb);
182 } 188 if (!bn) {
183 return bn ; 189 yield();
190 goto repeat;
191 }
192 return bn;
184} 193}
185static inline void free_bitmap_node(struct super_block *p_s_sb, 194static inline void free_bitmap_node(struct super_block *p_s_sb,
186 struct reiserfs_bitmap_node *bn) { 195 struct reiserfs_bitmap_node *bn)
187 struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); 196{
188 journal->j_used_bitmap_nodes-- ; 197 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
189 if (journal->j_free_bitmap_nodes > REISERFS_MAX_BITMAP_NODES) { 198 journal->j_used_bitmap_nodes--;
190 reiserfs_kfree(bn->data, p_s_sb->s_blocksize, p_s_sb) ; 199 if (journal->j_free_bitmap_nodes > REISERFS_MAX_BITMAP_NODES) {
191 reiserfs_kfree(bn, sizeof(struct reiserfs_bitmap_node), p_s_sb) ; 200 reiserfs_kfree(bn->data, p_s_sb->s_blocksize, p_s_sb);
192 } else { 201 reiserfs_kfree(bn, sizeof(struct reiserfs_bitmap_node), p_s_sb);
193 list_add(&bn->list, &journal->j_bitmap_nodes) ; 202 } else {
194 journal->j_free_bitmap_nodes++ ; 203 list_add(&bn->list, &journal->j_bitmap_nodes);
195 } 204 journal->j_free_bitmap_nodes++;
196} 205 }
197 206}
198static void allocate_bitmap_nodes(struct super_block *p_s_sb) { 207
199 int i ; 208static void allocate_bitmap_nodes(struct super_block *p_s_sb)
200 struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); 209{
201 struct reiserfs_bitmap_node *bn = NULL ; 210 int i;
202 for (i = 0 ; i < REISERFS_MIN_BITMAP_NODES ; i++) { 211 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
203 bn = allocate_bitmap_node(p_s_sb) ; 212 struct reiserfs_bitmap_node *bn = NULL;
204 if (bn) { 213 for (i = 0; i < REISERFS_MIN_BITMAP_NODES; i++) {
205 list_add(&bn->list, &journal->j_bitmap_nodes) ; 214 bn = allocate_bitmap_node(p_s_sb);
206 journal->j_free_bitmap_nodes++ ; 215 if (bn) {
207 } else { 216 list_add(&bn->list, &journal->j_bitmap_nodes);
208 break ; // this is ok, we'll try again when more are needed 217 journal->j_free_bitmap_nodes++;
209 } 218 } else {
210 } 219 break; // this is ok, we'll try again when more are needed
220 }
221 }
211} 222}
212 223
213static int set_bit_in_list_bitmap(struct super_block *p_s_sb, int block, 224static int set_bit_in_list_bitmap(struct super_block *p_s_sb, int block,
214 struct reiserfs_list_bitmap *jb) { 225 struct reiserfs_list_bitmap *jb)
215 int bmap_nr = block / (p_s_sb->s_blocksize << 3) ; 226{
216 int bit_nr = block % (p_s_sb->s_blocksize << 3) ; 227 int bmap_nr = block / (p_s_sb->s_blocksize << 3);
228 int bit_nr = block % (p_s_sb->s_blocksize << 3);
217 229
218 if (!jb->bitmaps[bmap_nr]) { 230 if (!jb->bitmaps[bmap_nr]) {
219 jb->bitmaps[bmap_nr] = get_bitmap_node(p_s_sb) ; 231 jb->bitmaps[bmap_nr] = get_bitmap_node(p_s_sb);
220 } 232 }
221 set_bit(bit_nr, (unsigned long *)jb->bitmaps[bmap_nr]->data) ; 233 set_bit(bit_nr, (unsigned long *)jb->bitmaps[bmap_nr]->data);
222 return 0 ; 234 return 0;
223} 235}
224 236
225static void cleanup_bitmap_list(struct super_block *p_s_sb, 237static void cleanup_bitmap_list(struct super_block *p_s_sb,
226 struct reiserfs_list_bitmap *jb) { 238 struct reiserfs_list_bitmap *jb)
227 int i; 239{
228 if (jb->bitmaps == NULL) 240 int i;
229 return; 241 if (jb->bitmaps == NULL)
230 242 return;
231 for (i = 0 ; i < SB_BMAP_NR(p_s_sb) ; i++) { 243
232 if (jb->bitmaps[i]) { 244 for (i = 0; i < SB_BMAP_NR(p_s_sb); i++) {
233 free_bitmap_node(p_s_sb, jb->bitmaps[i]) ; 245 if (jb->bitmaps[i]) {
234 jb->bitmaps[i] = NULL ; 246 free_bitmap_node(p_s_sb, jb->bitmaps[i]);
235 } 247 jb->bitmaps[i] = NULL;
236 } 248 }
249 }
237} 250}
238 251
239/* 252/*
240** only call this on FS unmount. 253** only call this on FS unmount.
241*/ 254*/
242static int free_list_bitmaps(struct super_block *p_s_sb, 255static int free_list_bitmaps(struct super_block *p_s_sb,
243 struct reiserfs_list_bitmap *jb_array) { 256 struct reiserfs_list_bitmap *jb_array)
244 int i ; 257{
245 struct reiserfs_list_bitmap *jb ; 258 int i;
246 for (i = 0 ; i < JOURNAL_NUM_BITMAPS ; i++) { 259 struct reiserfs_list_bitmap *jb;
247 jb = jb_array + i ; 260 for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) {
248 jb->journal_list = NULL ; 261 jb = jb_array + i;
249 cleanup_bitmap_list(p_s_sb, jb) ; 262 jb->journal_list = NULL;
250 vfree(jb->bitmaps) ; 263 cleanup_bitmap_list(p_s_sb, jb);
251 jb->bitmaps = NULL ; 264 vfree(jb->bitmaps);
252 } 265 jb->bitmaps = NULL;
253 return 0; 266 }
254} 267 return 0;
255 268}
256static int free_bitmap_nodes(struct super_block *p_s_sb) { 269
257 struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); 270static int free_bitmap_nodes(struct super_block *p_s_sb)
258 struct list_head *next = journal->j_bitmap_nodes.next ; 271{
259 struct reiserfs_bitmap_node *bn ; 272 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
260 273 struct list_head *next = journal->j_bitmap_nodes.next;
261 while(next != &journal->j_bitmap_nodes) { 274 struct reiserfs_bitmap_node *bn;
262 bn = list_entry(next, struct reiserfs_bitmap_node, list) ; 275
263 list_del(next) ; 276 while (next != &journal->j_bitmap_nodes) {
264 reiserfs_kfree(bn->data, p_s_sb->s_blocksize, p_s_sb) ; 277 bn = list_entry(next, struct reiserfs_bitmap_node, list);
265 reiserfs_kfree(bn, sizeof(struct reiserfs_bitmap_node), p_s_sb) ; 278 list_del(next);
266 next = journal->j_bitmap_nodes.next ; 279 reiserfs_kfree(bn->data, p_s_sb->s_blocksize, p_s_sb);
267 journal->j_free_bitmap_nodes-- ; 280 reiserfs_kfree(bn, sizeof(struct reiserfs_bitmap_node), p_s_sb);
268 } 281 next = journal->j_bitmap_nodes.next;
269 282 journal->j_free_bitmap_nodes--;
270 return 0 ; 283 }
284
285 return 0;
271} 286}
272 287
273/* 288/*
@@ -275,59 +290,65 @@ static int free_bitmap_nodes(struct super_block *p_s_sb) {
275** jb_array is the array to be filled in. 290** jb_array is the array to be filled in.
276*/ 291*/
277int reiserfs_allocate_list_bitmaps(struct super_block *p_s_sb, 292int reiserfs_allocate_list_bitmaps(struct super_block *p_s_sb,
278 struct reiserfs_list_bitmap *jb_array, 293 struct reiserfs_list_bitmap *jb_array,
279 int bmap_nr) { 294 int bmap_nr)
280 int i ; 295{
281 int failed = 0 ; 296 int i;
282 struct reiserfs_list_bitmap *jb ; 297 int failed = 0;
283 int mem = bmap_nr * sizeof(struct reiserfs_bitmap_node *) ; 298 struct reiserfs_list_bitmap *jb;
284 299 int mem = bmap_nr * sizeof(struct reiserfs_bitmap_node *);
285 for (i = 0 ; i < JOURNAL_NUM_BITMAPS ; i++) { 300
286 jb = jb_array + i ; 301 for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) {
287 jb->journal_list = NULL ; 302 jb = jb_array + i;
288 jb->bitmaps = vmalloc( mem ) ; 303 jb->journal_list = NULL;
289 if (!jb->bitmaps) { 304 jb->bitmaps = vmalloc(mem);
290 reiserfs_warning(p_s_sb, "clm-2000, unable to allocate bitmaps for journal lists") ; 305 if (!jb->bitmaps) {
291 failed = 1; 306 reiserfs_warning(p_s_sb,
292 break ; 307 "clm-2000, unable to allocate bitmaps for journal lists");
293 } 308 failed = 1;
294 memset(jb->bitmaps, 0, mem) ; 309 break;
295 } 310 }
296 if (failed) { 311 memset(jb->bitmaps, 0, mem);
297 free_list_bitmaps(p_s_sb, jb_array) ; 312 }
298 return -1 ; 313 if (failed) {
299 } 314 free_list_bitmaps(p_s_sb, jb_array);
300 return 0 ; 315 return -1;
316 }
317 return 0;
301} 318}
302 319
303/* 320/*
304** find an available list bitmap. If you can't find one, flush a commit list 321** find an available list bitmap. If you can't find one, flush a commit list
305** and try again 322** and try again
306*/ 323*/
307static struct reiserfs_list_bitmap * 324static struct reiserfs_list_bitmap *get_list_bitmap(struct super_block *p_s_sb,
308get_list_bitmap(struct super_block *p_s_sb, struct reiserfs_journal_list *jl) { 325 struct reiserfs_journal_list
309 int i,j ; 326 *jl)
310 struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); 327{
311 struct reiserfs_list_bitmap *jb = NULL ; 328 int i, j;
312 329 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
313 for (j = 0 ; j < (JOURNAL_NUM_BITMAPS * 3) ; j++) { 330 struct reiserfs_list_bitmap *jb = NULL;
314 i = journal->j_list_bitmap_index ; 331
315 journal->j_list_bitmap_index = (i + 1) % JOURNAL_NUM_BITMAPS ; 332 for (j = 0; j < (JOURNAL_NUM_BITMAPS * 3); j++) {
316 jb = journal->j_list_bitmap + i ; 333 i = journal->j_list_bitmap_index;
317 if (journal->j_list_bitmap[i].journal_list) { 334 journal->j_list_bitmap_index = (i + 1) % JOURNAL_NUM_BITMAPS;
318 flush_commit_list(p_s_sb, journal->j_list_bitmap[i].journal_list, 1) ; 335 jb = journal->j_list_bitmap + i;
319 if (!journal->j_list_bitmap[i].journal_list) { 336 if (journal->j_list_bitmap[i].journal_list) {
320 break ; 337 flush_commit_list(p_s_sb,
321 } 338 journal->j_list_bitmap[i].
322 } else { 339 journal_list, 1);
323 break ; 340 if (!journal->j_list_bitmap[i].journal_list) {
324 } 341 break;
325 } 342 }
326 if (jb->journal_list) { /* double check to make sure if flushed correctly */ 343 } else {
327 return NULL ; 344 break;
328 } 345 }
329 jb->journal_list = jl ; 346 }
330 return jb ; 347 if (jb->journal_list) { /* double check to make sure if flushed correctly */
348 return NULL;
349 }
350 jb->journal_list = jl;
351 return jb;
331} 352}
332 353
333/* 354/*
@@ -335,104 +356,114 @@ get_list_bitmap(struct super_block *p_s_sb, struct reiserfs_journal_list *jl) {
335** Uses the cnode->next and cnode->prev pointers 356** Uses the cnode->next and cnode->prev pointers
336** returns NULL on failure 357** returns NULL on failure
337*/ 358*/
338static struct reiserfs_journal_cnode *allocate_cnodes(int num_cnodes) { 359static struct reiserfs_journal_cnode *allocate_cnodes(int num_cnodes)
339 struct reiserfs_journal_cnode *head ; 360{
340 int i ; 361 struct reiserfs_journal_cnode *head;
341 if (num_cnodes <= 0) { 362 int i;
342 return NULL ; 363 if (num_cnodes <= 0) {
343 } 364 return NULL;
344 head = vmalloc(num_cnodes * sizeof(struct reiserfs_journal_cnode)) ; 365 }
345 if (!head) { 366 head = vmalloc(num_cnodes * sizeof(struct reiserfs_journal_cnode));
346 return NULL ; 367 if (!head) {
347 } 368 return NULL;
348 memset(head, 0, num_cnodes * sizeof(struct reiserfs_journal_cnode)) ; 369 }
349 head[0].prev = NULL ; 370 memset(head, 0, num_cnodes * sizeof(struct reiserfs_journal_cnode));
350 head[0].next = head + 1 ; 371 head[0].prev = NULL;
351 for (i = 1 ; i < num_cnodes; i++) { 372 head[0].next = head + 1;
352 head[i].prev = head + (i - 1) ; 373 for (i = 1; i < num_cnodes; i++) {
353 head[i].next = head + (i + 1) ; /* if last one, overwrite it after the if */ 374 head[i].prev = head + (i - 1);
354 } 375 head[i].next = head + (i + 1); /* if last one, overwrite it after the if */
355 head[num_cnodes -1].next = NULL ; 376 }
356 return head ; 377 head[num_cnodes - 1].next = NULL;
378 return head;
357} 379}
358 380
359/* 381/*
360** pulls a cnode off the free list, or returns NULL on failure 382** pulls a cnode off the free list, or returns NULL on failure
361*/ 383*/
362static struct reiserfs_journal_cnode *get_cnode(struct super_block *p_s_sb) { 384static struct reiserfs_journal_cnode *get_cnode(struct super_block *p_s_sb)
363 struct reiserfs_journal_cnode *cn ; 385{
364 struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); 386 struct reiserfs_journal_cnode *cn;
365 387 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
366 reiserfs_check_lock_depth(p_s_sb, "get_cnode") ; 388
367 389 reiserfs_check_lock_depth(p_s_sb, "get_cnode");
368 if (journal->j_cnode_free <= 0) { 390
369 return NULL ; 391 if (journal->j_cnode_free <= 0) {
370 } 392 return NULL;
371 journal->j_cnode_used++ ; 393 }
372 journal->j_cnode_free-- ; 394 journal->j_cnode_used++;
373 cn = journal->j_cnode_free_list ; 395 journal->j_cnode_free--;
374 if (!cn) { 396 cn = journal->j_cnode_free_list;
375 return cn ; 397 if (!cn) {
376 } 398 return cn;
377 if (cn->next) { 399 }
378 cn->next->prev = NULL ; 400 if (cn->next) {
379 } 401 cn->next->prev = NULL;
380 journal->j_cnode_free_list = cn->next ; 402 }
381 memset(cn, 0, sizeof(struct reiserfs_journal_cnode)) ; 403 journal->j_cnode_free_list = cn->next;
382 return cn ; 404 memset(cn, 0, sizeof(struct reiserfs_journal_cnode));
405 return cn;
383} 406}
384 407
385/* 408/*
386** returns a cnode to the free list 409** returns a cnode to the free list
387*/ 410*/
388static void free_cnode(struct super_block *p_s_sb, struct reiserfs_journal_cnode *cn) { 411static void free_cnode(struct super_block *p_s_sb,
389 struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); 412 struct reiserfs_journal_cnode *cn)
413{
414 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
390 415
391 reiserfs_check_lock_depth(p_s_sb, "free_cnode") ; 416 reiserfs_check_lock_depth(p_s_sb, "free_cnode");
392 417
393 journal->j_cnode_used-- ; 418 journal->j_cnode_used--;
394 journal->j_cnode_free++ ; 419 journal->j_cnode_free++;
395 /* memset(cn, 0, sizeof(struct reiserfs_journal_cnode)) ; */ 420 /* memset(cn, 0, sizeof(struct reiserfs_journal_cnode)) ; */
396 cn->next = journal->j_cnode_free_list ; 421 cn->next = journal->j_cnode_free_list;
397 if (journal->j_cnode_free_list) { 422 if (journal->j_cnode_free_list) {
398 journal->j_cnode_free_list->prev = cn ; 423 journal->j_cnode_free_list->prev = cn;
399 } 424 }
400 cn->prev = NULL ; /* not needed with the memset, but I might kill the memset, and forget to do this */ 425 cn->prev = NULL; /* not needed with the memset, but I might kill the memset, and forget to do this */
401 journal->j_cnode_free_list = cn ; 426 journal->j_cnode_free_list = cn;
402} 427}
403 428
404static void clear_prepared_bits(struct buffer_head *bh) { 429static void clear_prepared_bits(struct buffer_head *bh)
405 clear_buffer_journal_prepared (bh); 430{
406 clear_buffer_journal_restore_dirty (bh); 431 clear_buffer_journal_prepared(bh);
432 clear_buffer_journal_restore_dirty(bh);
407} 433}
408 434
409/* utility function to force a BUG if it is called without the big 435/* utility function to force a BUG if it is called without the big
410** kernel lock held. caller is the string printed just before calling BUG() 436** kernel lock held. caller is the string printed just before calling BUG()
411*/ 437*/
412void reiserfs_check_lock_depth(struct super_block *sb, char *caller) { 438void reiserfs_check_lock_depth(struct super_block *sb, char *caller)
439{
413#ifdef CONFIG_SMP 440#ifdef CONFIG_SMP
414 if (current->lock_depth < 0) { 441 if (current->lock_depth < 0) {
415 reiserfs_panic (sb, "%s called without kernel lock held", caller) ; 442 reiserfs_panic(sb, "%s called without kernel lock held",
416 } 443 caller);
444 }
417#else 445#else
418 ; 446 ;
419#endif 447#endif
420} 448}
421 449
422/* return a cnode with same dev, block number and size in table, or null if not found */ 450/* return a cnode with same dev, block number and size in table, or null if not found */
423static inline struct reiserfs_journal_cnode * 451static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct
424get_journal_hash_dev(struct super_block *sb, 452 super_block
425 struct reiserfs_journal_cnode **table, 453 *sb,
426 long bl) 454 struct
455 reiserfs_journal_cnode
456 **table,
457 long bl)
427{ 458{
428 struct reiserfs_journal_cnode *cn ; 459 struct reiserfs_journal_cnode *cn;
429 cn = journal_hash(table, sb, bl) ; 460 cn = journal_hash(table, sb, bl);
430 while(cn) { 461 while (cn) {
431 if (cn->blocknr == bl && cn->sb == sb) 462 if (cn->blocknr == bl && cn->sb == sb)
432 return cn ; 463 return cn;
433 cn = cn->hnext ; 464 cn = cn->hnext;
434 } 465 }
435 return (struct reiserfs_journal_cnode *)0 ; 466 return (struct reiserfs_journal_cnode *)0;
436} 467}
437 468
438/* 469/*
@@ -454,91 +485,103 @@ get_journal_hash_dev(struct super_block *sb,
454** 485**
455*/ 486*/
456int reiserfs_in_journal(struct super_block *p_s_sb, 487int reiserfs_in_journal(struct super_block *p_s_sb,
457 int bmap_nr, int bit_nr, int search_all, 488 int bmap_nr, int bit_nr, int search_all,
458 b_blocknr_t *next_zero_bit) { 489 b_blocknr_t * next_zero_bit)
459 struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); 490{
460 struct reiserfs_journal_cnode *cn ; 491 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
461 struct reiserfs_list_bitmap *jb ; 492 struct reiserfs_journal_cnode *cn;
462 int i ; 493 struct reiserfs_list_bitmap *jb;
463 unsigned long bl; 494 int i;
464 495 unsigned long bl;
465 *next_zero_bit = 0 ; /* always start this at zero. */ 496
466 497 *next_zero_bit = 0; /* always start this at zero. */
467 PROC_INFO_INC( p_s_sb, journal.in_journal ); 498
468 /* If we aren't doing a search_all, this is a metablock, and it will be logged before use. 499 PROC_INFO_INC(p_s_sb, journal.in_journal);
469 ** if we crash before the transaction that freed it commits, this transaction won't 500 /* If we aren't doing a search_all, this is a metablock, and it will be logged before use.
470 ** have committed either, and the block will never be written 501 ** if we crash before the transaction that freed it commits, this transaction won't
471 */ 502 ** have committed either, and the block will never be written
472 if (search_all) { 503 */
473 for (i = 0 ; i < JOURNAL_NUM_BITMAPS ; i++) { 504 if (search_all) {
474 PROC_INFO_INC( p_s_sb, journal.in_journal_bitmap ); 505 for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) {
475 jb = journal->j_list_bitmap + i ; 506 PROC_INFO_INC(p_s_sb, journal.in_journal_bitmap);
476 if (jb->journal_list && jb->bitmaps[bmap_nr] && 507 jb = journal->j_list_bitmap + i;
477 test_bit(bit_nr, (unsigned long *)jb->bitmaps[bmap_nr]->data)) { 508 if (jb->journal_list && jb->bitmaps[bmap_nr] &&
478 *next_zero_bit = find_next_zero_bit((unsigned long *) 509 test_bit(bit_nr,
479 (jb->bitmaps[bmap_nr]->data), 510 (unsigned long *)jb->bitmaps[bmap_nr]->
480 p_s_sb->s_blocksize << 3, bit_nr+1) ; 511 data)) {
481 return 1 ; 512 *next_zero_bit =
482 } 513 find_next_zero_bit((unsigned long *)
483 } 514 (jb->bitmaps[bmap_nr]->
484 } 515 data),
485 516 p_s_sb->s_blocksize << 3,
486 bl = bmap_nr * (p_s_sb->s_blocksize << 3) + bit_nr; 517 bit_nr + 1);
487 /* is it in any old transactions? */ 518 return 1;
488 if (search_all && (cn = get_journal_hash_dev(p_s_sb, journal->j_list_hash_table, bl))) { 519 }
489 return 1; 520 }
490 } 521 }
491 522
492 /* is it in the current transaction. This should never happen */ 523 bl = bmap_nr * (p_s_sb->s_blocksize << 3) + bit_nr;
493 if ((cn = get_journal_hash_dev(p_s_sb, journal->j_hash_table, bl))) { 524 /* is it in any old transactions? */
494 BUG(); 525 if (search_all
495 return 1; 526 && (cn =
496 } 527 get_journal_hash_dev(p_s_sb, journal->j_list_hash_table, bl))) {
497 528 return 1;
498 PROC_INFO_INC( p_s_sb, journal.in_journal_reusable ); 529 }
499 /* safe for reuse */ 530
500 return 0 ; 531 /* is it in the current transaction. This should never happen */
532 if ((cn = get_journal_hash_dev(p_s_sb, journal->j_hash_table, bl))) {
533 BUG();
534 return 1;
535 }
536
537 PROC_INFO_INC(p_s_sb, journal.in_journal_reusable);
538 /* safe for reuse */
539 return 0;
501} 540}
502 541
503/* insert cn into table 542/* insert cn into table
504*/ 543*/
505static inline void insert_journal_hash(struct reiserfs_journal_cnode **table, struct reiserfs_journal_cnode *cn) { 544static inline void insert_journal_hash(struct reiserfs_journal_cnode **table,
506 struct reiserfs_journal_cnode *cn_orig ; 545 struct reiserfs_journal_cnode *cn)
546{
547 struct reiserfs_journal_cnode *cn_orig;
507 548
508 cn_orig = journal_hash(table, cn->sb, cn->blocknr) ; 549 cn_orig = journal_hash(table, cn->sb, cn->blocknr);
509 cn->hnext = cn_orig ; 550 cn->hnext = cn_orig;
510 cn->hprev = NULL ; 551 cn->hprev = NULL;
511 if (cn_orig) { 552 if (cn_orig) {
512 cn_orig->hprev = cn ; 553 cn_orig->hprev = cn;
513 } 554 }
514 journal_hash(table, cn->sb, cn->blocknr) = cn ; 555 journal_hash(table, cn->sb, cn->blocknr) = cn;
515} 556}
516 557
517/* lock the current transaction */ 558/* lock the current transaction */
518inline static void lock_journal(struct super_block *p_s_sb) { 559static inline void lock_journal(struct super_block *p_s_sb)
519 PROC_INFO_INC( p_s_sb, journal.lock_journal ); 560{
520 down(&SB_JOURNAL(p_s_sb)->j_lock); 561 PROC_INFO_INC(p_s_sb, journal.lock_journal);
562 down(&SB_JOURNAL(p_s_sb)->j_lock);
521} 563}
522 564
523/* unlock the current transaction */ 565/* unlock the current transaction */
524inline static void unlock_journal(struct super_block *p_s_sb) { 566static inline void unlock_journal(struct super_block *p_s_sb)
525 up(&SB_JOURNAL(p_s_sb)->j_lock); 567{
568 up(&SB_JOURNAL(p_s_sb)->j_lock);
526} 569}
527 570
528static inline void get_journal_list(struct reiserfs_journal_list *jl) 571static inline void get_journal_list(struct reiserfs_journal_list *jl)
529{ 572{
530 jl->j_refcount++; 573 jl->j_refcount++;
531} 574}
532 575
533static inline void put_journal_list(struct super_block *s, 576static inline void put_journal_list(struct super_block *s,
534 struct reiserfs_journal_list *jl) 577 struct reiserfs_journal_list *jl)
535{ 578{
536 if (jl->j_refcount < 1) { 579 if (jl->j_refcount < 1) {
537 reiserfs_panic (s, "trans id %lu, refcount at %d", jl->j_trans_id, 580 reiserfs_panic(s, "trans id %lu, refcount at %d",
538 jl->j_refcount); 581 jl->j_trans_id, jl->j_refcount);
539 } 582 }
540 if (--jl->j_refcount == 0) 583 if (--jl->j_refcount == 0)
541 reiserfs_kfree(jl, sizeof(struct reiserfs_journal_list), s); 584 reiserfs_kfree(jl, sizeof(struct reiserfs_journal_list), s);
542} 585}
543 586
544/* 587/*
@@ -546,354 +589,375 @@ static inline void put_journal_list(struct super_block *s,
546** it gets called by flush_commit_list, and cleans up any data stored about blocks freed during a 589** it gets called by flush_commit_list, and cleans up any data stored about blocks freed during a
547** transaction. 590** transaction.
548*/ 591*/
549static void cleanup_freed_for_journal_list(struct super_block *p_s_sb, struct reiserfs_journal_list *jl) { 592static void cleanup_freed_for_journal_list(struct super_block *p_s_sb,
593 struct reiserfs_journal_list *jl)
594{
550 595
551 struct reiserfs_list_bitmap *jb = jl->j_list_bitmap ; 596 struct reiserfs_list_bitmap *jb = jl->j_list_bitmap;
552 if (jb) { 597 if (jb) {
553 cleanup_bitmap_list(p_s_sb, jb) ; 598 cleanup_bitmap_list(p_s_sb, jb);
554 } 599 }
555 jl->j_list_bitmap->journal_list = NULL ; 600 jl->j_list_bitmap->journal_list = NULL;
556 jl->j_list_bitmap = NULL ; 601 jl->j_list_bitmap = NULL;
557} 602}
558 603
559static int journal_list_still_alive(struct super_block *s, 604static int journal_list_still_alive(struct super_block *s,
560 unsigned long trans_id) 605 unsigned long trans_id)
561{ 606{
562 struct reiserfs_journal *journal = SB_JOURNAL (s); 607 struct reiserfs_journal *journal = SB_JOURNAL(s);
563 struct list_head *entry = &journal->j_journal_list; 608 struct list_head *entry = &journal->j_journal_list;
564 struct reiserfs_journal_list *jl; 609 struct reiserfs_journal_list *jl;
565 610
566 if (!list_empty(entry)) { 611 if (!list_empty(entry)) {
567 jl = JOURNAL_LIST_ENTRY(entry->next); 612 jl = JOURNAL_LIST_ENTRY(entry->next);
568 if (jl->j_trans_id <= trans_id) { 613 if (jl->j_trans_id <= trans_id) {
569 return 1; 614 return 1;
570 } 615 }
571 } 616 }
572 return 0; 617 return 0;
573} 618}
574 619
575static void reiserfs_end_buffer_io_sync(struct buffer_head *bh, int uptodate) { 620static void reiserfs_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
576 char b[BDEVNAME_SIZE]; 621{
577 622 char b[BDEVNAME_SIZE];
578 if (buffer_journaled(bh)) { 623
579 reiserfs_warning(NULL, "clm-2084: pinned buffer %lu:%s sent to disk", 624 if (buffer_journaled(bh)) {
580 bh->b_blocknr, bdevname(bh->b_bdev, b)) ; 625 reiserfs_warning(NULL,
581 } 626 "clm-2084: pinned buffer %lu:%s sent to disk",
582 if (uptodate) 627 bh->b_blocknr, bdevname(bh->b_bdev, b));
583 set_buffer_uptodate(bh) ; 628 }
584 else 629 if (uptodate)
585 clear_buffer_uptodate(bh) ; 630 set_buffer_uptodate(bh);
586 unlock_buffer(bh) ; 631 else
587 put_bh(bh) ; 632 clear_buffer_uptodate(bh);
588} 633 unlock_buffer(bh);
589 634 put_bh(bh);
590static void reiserfs_end_ordered_io(struct buffer_head *bh, int uptodate) { 635}
591 if (uptodate) 636
592 set_buffer_uptodate(bh) ; 637static void reiserfs_end_ordered_io(struct buffer_head *bh, int uptodate)
593 else 638{
594 clear_buffer_uptodate(bh) ; 639 if (uptodate)
595 unlock_buffer(bh) ; 640 set_buffer_uptodate(bh);
596 put_bh(bh) ; 641 else
597} 642 clear_buffer_uptodate(bh);
598 643 unlock_buffer(bh);
599static void submit_logged_buffer(struct buffer_head *bh) { 644 put_bh(bh);
600 get_bh(bh) ; 645}
601 bh->b_end_io = reiserfs_end_buffer_io_sync ; 646
602 clear_buffer_journal_new (bh); 647static void submit_logged_buffer(struct buffer_head *bh)
603 clear_buffer_dirty(bh) ; 648{
604 if (!test_clear_buffer_journal_test (bh)) 649 get_bh(bh);
605 BUG(); 650 bh->b_end_io = reiserfs_end_buffer_io_sync;
606 if (!buffer_uptodate(bh)) 651 clear_buffer_journal_new(bh);
607 BUG(); 652 clear_buffer_dirty(bh);
608 submit_bh(WRITE, bh) ; 653 if (!test_clear_buffer_journal_test(bh))
609} 654 BUG();
610 655 if (!buffer_uptodate(bh))
611static void submit_ordered_buffer(struct buffer_head *bh) { 656 BUG();
612 get_bh(bh) ; 657 submit_bh(WRITE, bh);
613 bh->b_end_io = reiserfs_end_ordered_io; 658}
614 clear_buffer_dirty(bh) ; 659
615 if (!buffer_uptodate(bh)) 660static void submit_ordered_buffer(struct buffer_head *bh)
616 BUG(); 661{
617 submit_bh(WRITE, bh) ; 662 get_bh(bh);
618} 663 bh->b_end_io = reiserfs_end_ordered_io;
619 664 clear_buffer_dirty(bh);
620static int submit_barrier_buffer(struct buffer_head *bh) { 665 if (!buffer_uptodate(bh))
621 get_bh(bh) ; 666 BUG();
622 bh->b_end_io = reiserfs_end_ordered_io; 667 submit_bh(WRITE, bh);
623 clear_buffer_dirty(bh) ; 668}
624 if (!buffer_uptodate(bh)) 669
625 BUG(); 670static int submit_barrier_buffer(struct buffer_head *bh)
626 return submit_bh(WRITE_BARRIER, bh) ; 671{
672 get_bh(bh);
673 bh->b_end_io = reiserfs_end_ordered_io;
674 clear_buffer_dirty(bh);
675 if (!buffer_uptodate(bh))
676 BUG();
677 return submit_bh(WRITE_BARRIER, bh);
627} 678}
628 679
629static void check_barrier_completion(struct super_block *s, 680static void check_barrier_completion(struct super_block *s,
630 struct buffer_head *bh) { 681 struct buffer_head *bh)
631 if (buffer_eopnotsupp(bh)) { 682{
632 clear_buffer_eopnotsupp(bh); 683 if (buffer_eopnotsupp(bh)) {
633 disable_barrier(s); 684 clear_buffer_eopnotsupp(bh);
634 set_buffer_uptodate(bh); 685 disable_barrier(s);
635 set_buffer_dirty(bh); 686 set_buffer_uptodate(bh);
636 sync_dirty_buffer(bh); 687 set_buffer_dirty(bh);
637 } 688 sync_dirty_buffer(bh);
689 }
638} 690}
639 691
640#define CHUNK_SIZE 32 692#define CHUNK_SIZE 32
641struct buffer_chunk { 693struct buffer_chunk {
642 struct buffer_head *bh[CHUNK_SIZE]; 694 struct buffer_head *bh[CHUNK_SIZE];
643 int nr; 695 int nr;
644}; 696};
645 697
646static void write_chunk(struct buffer_chunk *chunk) { 698static void write_chunk(struct buffer_chunk *chunk)
647 int i; 699{
648 for (i = 0; i < chunk->nr ; i++) { 700 int i;
649 submit_logged_buffer(chunk->bh[i]) ; 701 get_fs_excl();
650 } 702 for (i = 0; i < chunk->nr; i++) {
651 chunk->nr = 0; 703 submit_logged_buffer(chunk->bh[i]);
704 }
705 chunk->nr = 0;
706 put_fs_excl();
652} 707}
653 708
654static void write_ordered_chunk(struct buffer_chunk *chunk) { 709static void write_ordered_chunk(struct buffer_chunk *chunk)
655 int i; 710{
656 for (i = 0; i < chunk->nr ; i++) { 711 int i;
657 submit_ordered_buffer(chunk->bh[i]) ; 712 get_fs_excl();
658 } 713 for (i = 0; i < chunk->nr; i++) {
659 chunk->nr = 0; 714 submit_ordered_buffer(chunk->bh[i]);
715 }
716 chunk->nr = 0;
717 put_fs_excl();
660} 718}
661 719
662static int add_to_chunk(struct buffer_chunk *chunk, struct buffer_head *bh, 720static int add_to_chunk(struct buffer_chunk *chunk, struct buffer_head *bh,
663 spinlock_t *lock, 721 spinlock_t * lock, void (fn) (struct buffer_chunk *))
664 void (fn)(struct buffer_chunk *))
665{ 722{
666 int ret = 0; 723 int ret = 0;
667 if (chunk->nr >= CHUNK_SIZE) 724 if (chunk->nr >= CHUNK_SIZE)
668 BUG(); 725 BUG();
669 chunk->bh[chunk->nr++] = bh; 726 chunk->bh[chunk->nr++] = bh;
670 if (chunk->nr >= CHUNK_SIZE) { 727 if (chunk->nr >= CHUNK_SIZE) {
671 ret = 1; 728 ret = 1;
672 if (lock) 729 if (lock)
673 spin_unlock(lock); 730 spin_unlock(lock);
674 fn(chunk); 731 fn(chunk);
675 if (lock) 732 if (lock)
676 spin_lock(lock); 733 spin_lock(lock);
677 } 734 }
678 return ret; 735 return ret;
679} 736}
680 737
681
682static atomic_t nr_reiserfs_jh = ATOMIC_INIT(0); 738static atomic_t nr_reiserfs_jh = ATOMIC_INIT(0);
683static struct reiserfs_jh *alloc_jh(void) { 739static struct reiserfs_jh *alloc_jh(void)
684 struct reiserfs_jh *jh; 740{
685 while(1) { 741 struct reiserfs_jh *jh;
686 jh = kmalloc(sizeof(*jh), GFP_NOFS); 742 while (1) {
687 if (jh) { 743 jh = kmalloc(sizeof(*jh), GFP_NOFS);
688 atomic_inc(&nr_reiserfs_jh); 744 if (jh) {
689 return jh; 745 atomic_inc(&nr_reiserfs_jh);
746 return jh;
747 }
748 yield();
690 } 749 }
691 yield();
692 }
693} 750}
694 751
695/* 752/*
696 * we want to free the jh when the buffer has been written 753 * we want to free the jh when the buffer has been written
697 * and waited on 754 * and waited on
698 */ 755 */
699void reiserfs_free_jh(struct buffer_head *bh) { 756void reiserfs_free_jh(struct buffer_head *bh)
700 struct reiserfs_jh *jh; 757{
701 758 struct reiserfs_jh *jh;
702 jh = bh->b_private; 759
703 if (jh) { 760 jh = bh->b_private;
704 bh->b_private = NULL; 761 if (jh) {
705 jh->bh = NULL; 762 bh->b_private = NULL;
706 list_del_init(&jh->list); 763 jh->bh = NULL;
707 kfree(jh); 764 list_del_init(&jh->list);
708 if (atomic_read(&nr_reiserfs_jh) <= 0) 765 kfree(jh);
709 BUG(); 766 if (atomic_read(&nr_reiserfs_jh) <= 0)
710 atomic_dec(&nr_reiserfs_jh); 767 BUG();
711 put_bh(bh); 768 atomic_dec(&nr_reiserfs_jh);
712 } 769 put_bh(bh);
770 }
713} 771}
714 772
715static inline int __add_jh(struct reiserfs_journal *j, struct buffer_head *bh, 773static inline int __add_jh(struct reiserfs_journal *j, struct buffer_head *bh,
716 int tail) 774 int tail)
717{ 775{
718 struct reiserfs_jh *jh; 776 struct reiserfs_jh *jh;
719 777
720 if (bh->b_private) { 778 if (bh->b_private) {
721 spin_lock(&j->j_dirty_buffers_lock); 779 spin_lock(&j->j_dirty_buffers_lock);
722 if (!bh->b_private) { 780 if (!bh->b_private) {
723 spin_unlock(&j->j_dirty_buffers_lock); 781 spin_unlock(&j->j_dirty_buffers_lock);
724 goto no_jh; 782 goto no_jh;
783 }
784 jh = bh->b_private;
785 list_del_init(&jh->list);
786 } else {
787 no_jh:
788 get_bh(bh);
789 jh = alloc_jh();
790 spin_lock(&j->j_dirty_buffers_lock);
791 /* buffer must be locked for __add_jh, should be able to have
792 * two adds at the same time
793 */
794 if (bh->b_private)
795 BUG();
796 jh->bh = bh;
797 bh->b_private = jh;
725 } 798 }
726 jh = bh->b_private; 799 jh->jl = j->j_current_jl;
727 list_del_init(&jh->list); 800 if (tail)
728 } else { 801 list_add_tail(&jh->list, &jh->jl->j_tail_bh_list);
729no_jh: 802 else {
730 get_bh(bh); 803 list_add_tail(&jh->list, &jh->jl->j_bh_list);
731 jh = alloc_jh(); 804 }
732 spin_lock(&j->j_dirty_buffers_lock); 805 spin_unlock(&j->j_dirty_buffers_lock);
733 /* buffer must be locked for __add_jh, should be able to have 806 return 0;
734 * two adds at the same time
735 */
736 if (bh->b_private)
737 BUG();
738 jh->bh = bh;
739 bh->b_private = jh;
740 }
741 jh->jl = j->j_current_jl;
742 if (tail)
743 list_add_tail(&jh->list, &jh->jl->j_tail_bh_list);
744 else {
745 list_add_tail(&jh->list, &jh->jl->j_bh_list);
746 }
747 spin_unlock(&j->j_dirty_buffers_lock);
748 return 0;
749} 807}
750 808
751int reiserfs_add_tail_list(struct inode *inode, struct buffer_head *bh) { 809int reiserfs_add_tail_list(struct inode *inode, struct buffer_head *bh)
752 return __add_jh(SB_JOURNAL(inode->i_sb), bh, 1); 810{
811 return __add_jh(SB_JOURNAL(inode->i_sb), bh, 1);
753} 812}
754int reiserfs_add_ordered_list(struct inode *inode, struct buffer_head *bh) { 813int reiserfs_add_ordered_list(struct inode *inode, struct buffer_head *bh)
755 return __add_jh(SB_JOURNAL(inode->i_sb), bh, 0); 814{
815 return __add_jh(SB_JOURNAL(inode->i_sb), bh, 0);
756} 816}
757 817
758#define JH_ENTRY(l) list_entry((l), struct reiserfs_jh, list) 818#define JH_ENTRY(l) list_entry((l), struct reiserfs_jh, list)
759static int write_ordered_buffers(spinlock_t *lock, 819static int write_ordered_buffers(spinlock_t * lock,
760 struct reiserfs_journal *j, 820 struct reiserfs_journal *j,
761 struct reiserfs_journal_list *jl, 821 struct reiserfs_journal_list *jl,
762 struct list_head *list) 822 struct list_head *list)
763{ 823{
764 struct buffer_head *bh; 824 struct buffer_head *bh;
765 struct reiserfs_jh *jh; 825 struct reiserfs_jh *jh;
766 int ret = j->j_errno; 826 int ret = j->j_errno;
767 struct buffer_chunk chunk; 827 struct buffer_chunk chunk;
768 struct list_head tmp; 828 struct list_head tmp;
769 INIT_LIST_HEAD(&tmp); 829 INIT_LIST_HEAD(&tmp);
770 830
771 chunk.nr = 0; 831 chunk.nr = 0;
772 spin_lock(lock); 832 spin_lock(lock);
773 while(!list_empty(list)) { 833 while (!list_empty(list)) {
774 jh = JH_ENTRY(list->next); 834 jh = JH_ENTRY(list->next);
775 bh = jh->bh; 835 bh = jh->bh;
776 get_bh(bh); 836 get_bh(bh);
777 if (test_set_buffer_locked(bh)) { 837 if (test_set_buffer_locked(bh)) {
778 if (!buffer_dirty(bh)) { 838 if (!buffer_dirty(bh)) {
779 list_del_init(&jh->list); 839 list_del_init(&jh->list);
780 list_add(&jh->list, &tmp); 840 list_add(&jh->list, &tmp);
781 goto loop_next; 841 goto loop_next;
782 } 842 }
783 spin_unlock(lock); 843 spin_unlock(lock);
784 if (chunk.nr) 844 if (chunk.nr)
845 write_ordered_chunk(&chunk);
846 wait_on_buffer(bh);
847 cond_resched();
848 spin_lock(lock);
849 goto loop_next;
850 }
851 if (buffer_dirty(bh)) {
852 list_del_init(&jh->list);
853 list_add(&jh->list, &tmp);
854 add_to_chunk(&chunk, bh, lock, write_ordered_chunk);
855 } else {
856 reiserfs_free_jh(bh);
857 unlock_buffer(bh);
858 }
859 loop_next:
860 put_bh(bh);
861 cond_resched_lock(lock);
862 }
863 if (chunk.nr) {
864 spin_unlock(lock);
785 write_ordered_chunk(&chunk); 865 write_ordered_chunk(&chunk);
786 wait_on_buffer(bh); 866 spin_lock(lock);
787 cond_resched();
788 spin_lock(lock);
789 goto loop_next;
790 }
791 if (buffer_dirty(bh)) {
792 list_del_init(&jh->list);
793 list_add(&jh->list, &tmp);
794 add_to_chunk(&chunk, bh, lock, write_ordered_chunk);
795 } else {
796 reiserfs_free_jh(bh);
797 unlock_buffer(bh);
798 } 867 }
799loop_next: 868 while (!list_empty(&tmp)) {
800 put_bh(bh); 869 jh = JH_ENTRY(tmp.prev);
801 cond_resched_lock(lock); 870 bh = jh->bh;
802 } 871 get_bh(bh);
803 if (chunk.nr) { 872 reiserfs_free_jh(bh);
804 spin_unlock(lock); 873
805 write_ordered_chunk(&chunk); 874 if (buffer_locked(bh)) {
806 spin_lock(lock); 875 spin_unlock(lock);
807 } 876 wait_on_buffer(bh);
808 while(!list_empty(&tmp)) { 877 spin_lock(lock);
809 jh = JH_ENTRY(tmp.prev); 878 }
810 bh = jh->bh; 879 if (!buffer_uptodate(bh)) {
811 get_bh(bh); 880 ret = -EIO;
812 reiserfs_free_jh(bh); 881 }
813 882 put_bh(bh);
814 if (buffer_locked(bh)) { 883 cond_resched_lock(lock);
815 spin_unlock(lock);
816 wait_on_buffer(bh);
817 spin_lock(lock);
818 } 884 }
819 if (!buffer_uptodate(bh)) { 885 spin_unlock(lock);
820 ret = -EIO; 886 return ret;
821 } 887}
822 put_bh(bh);
823 cond_resched_lock(lock);
824 }
825 spin_unlock(lock);
826 return ret;
827}
828
829static int flush_older_commits(struct super_block *s, struct reiserfs_journal_list *jl) {
830 struct reiserfs_journal *journal = SB_JOURNAL (s);
831 struct reiserfs_journal_list *other_jl;
832 struct reiserfs_journal_list *first_jl;
833 struct list_head *entry;
834 unsigned long trans_id = jl->j_trans_id;
835 unsigned long other_trans_id;
836 unsigned long first_trans_id;
837
838find_first:
839 /*
840 * first we walk backwards to find the oldest uncommitted transation
841 */
842 first_jl = jl;
843 entry = jl->j_list.prev;
844 while(1) {
845 other_jl = JOURNAL_LIST_ENTRY(entry);
846 if (entry == &journal->j_journal_list ||
847 atomic_read(&other_jl->j_older_commits_done))
848 break;
849
850 first_jl = other_jl;
851 entry = other_jl->j_list.prev;
852 }
853
854 /* if we didn't find any older uncommitted transactions, return now */
855 if (first_jl == jl) {
856 return 0;
857 }
858
859 first_trans_id = first_jl->j_trans_id;
860 888
861 entry = &first_jl->j_list; 889static int flush_older_commits(struct super_block *s,
862 while(1) { 890 struct reiserfs_journal_list *jl)
863 other_jl = JOURNAL_LIST_ENTRY(entry); 891{
864 other_trans_id = other_jl->j_trans_id; 892 struct reiserfs_journal *journal = SB_JOURNAL(s);
893 struct reiserfs_journal_list *other_jl;
894 struct reiserfs_journal_list *first_jl;
895 struct list_head *entry;
896 unsigned long trans_id = jl->j_trans_id;
897 unsigned long other_trans_id;
898 unsigned long first_trans_id;
899
900 find_first:
901 /*
902 * first we walk backwards to find the oldest uncommitted transation
903 */
904 first_jl = jl;
905 entry = jl->j_list.prev;
906 while (1) {
907 other_jl = JOURNAL_LIST_ENTRY(entry);
908 if (entry == &journal->j_journal_list ||
909 atomic_read(&other_jl->j_older_commits_done))
910 break;
865 911
866 if (other_trans_id < trans_id) { 912 first_jl = other_jl;
867 if (atomic_read(&other_jl->j_commit_left) != 0) { 913 entry = other_jl->j_list.prev;
868 flush_commit_list(s, other_jl, 0); 914 }
869 915
870 /* list we were called with is gone, return */ 916 /* if we didn't find any older uncommitted transactions, return now */
871 if (!journal_list_still_alive(s, trans_id)) 917 if (first_jl == jl) {
872 return 1; 918 return 0;
919 }
873 920
874 /* the one we just flushed is gone, this means all 921 first_trans_id = first_jl->j_trans_id;
875 * older lists are also gone, so first_jl is no longer 922
876 * valid either. Go back to the beginning. 923 entry = &first_jl->j_list;
877 */ 924 while (1) {
878 if (!journal_list_still_alive(s, other_trans_id)) { 925 other_jl = JOURNAL_LIST_ENTRY(entry);
879 goto find_first; 926 other_trans_id = other_jl->j_trans_id;
927
928 if (other_trans_id < trans_id) {
929 if (atomic_read(&other_jl->j_commit_left) != 0) {
930 flush_commit_list(s, other_jl, 0);
931
932 /* list we were called with is gone, return */
933 if (!journal_list_still_alive(s, trans_id))
934 return 1;
935
936 /* the one we just flushed is gone, this means all
937 * older lists are also gone, so first_jl is no longer
938 * valid either. Go back to the beginning.
939 */
940 if (!journal_list_still_alive
941 (s, other_trans_id)) {
942 goto find_first;
943 }
944 }
945 entry = entry->next;
946 if (entry == &journal->j_journal_list)
947 return 0;
948 } else {
949 return 0;
880 } 950 }
881 }
882 entry = entry->next;
883 if (entry == &journal->j_journal_list)
884 return 0;
885 } else {
886 return 0;
887 } 951 }
888 } 952 return 0;
889 return 0;
890} 953}
891int reiserfs_async_progress_wait(struct super_block *s) { 954int reiserfs_async_progress_wait(struct super_block *s)
892 DEFINE_WAIT(wait); 955{
893 struct reiserfs_journal *j = SB_JOURNAL(s); 956 DEFINE_WAIT(wait);
894 if (atomic_read(&j->j_async_throttle)) 957 struct reiserfs_journal *j = SB_JOURNAL(s);
895 blk_congestion_wait(WRITE, HZ/10); 958 if (atomic_read(&j->j_async_throttle))
896 return 0; 959 blk_congestion_wait(WRITE, HZ / 10);
960 return 0;
897} 961}
898 962
899/* 963/*
@@ -903,209 +967,225 @@ int reiserfs_async_progress_wait(struct super_block *s) {
903** Before the commit block can by written, every other log block must be safely on disk 967** Before the commit block can by written, every other log block must be safely on disk
904** 968**
905*/ 969*/
906static int flush_commit_list(struct super_block *s, struct reiserfs_journal_list *jl, int flushall) { 970static int flush_commit_list(struct super_block *s,
907 int i; 971 struct reiserfs_journal_list *jl, int flushall)
908 int bn ; 972{
909 struct buffer_head *tbh = NULL ; 973 int i;
910 unsigned long trans_id = jl->j_trans_id; 974 int bn;
911 struct reiserfs_journal *journal = SB_JOURNAL (s); 975 struct buffer_head *tbh = NULL;
912 int barrier = 0; 976 unsigned long trans_id = jl->j_trans_id;
913 int retval = 0; 977 struct reiserfs_journal *journal = SB_JOURNAL(s);
914 978 int barrier = 0;
915 reiserfs_check_lock_depth(s, "flush_commit_list") ; 979 int retval = 0;
916 980
917 if (atomic_read(&jl->j_older_commits_done)) { 981 reiserfs_check_lock_depth(s, "flush_commit_list");
918 return 0 ; 982
919 } 983 if (atomic_read(&jl->j_older_commits_done)) {
920 984 return 0;
921 /* before we can put our commit blocks on disk, we have to make sure everyone older than 985 }
922 ** us is on disk too 986
923 */ 987 get_fs_excl();
924 BUG_ON (jl->j_len <= 0); 988
925 BUG_ON (trans_id == journal->j_trans_id); 989 /* before we can put our commit blocks on disk, we have to make sure everyone older than
926 990 ** us is on disk too
927 get_journal_list(jl); 991 */
928 if (flushall) { 992 BUG_ON(jl->j_len <= 0);
929 if (flush_older_commits(s, jl) == 1) { 993 BUG_ON(trans_id == journal->j_trans_id);
930 /* list disappeared during flush_older_commits. return */ 994
931 goto put_jl; 995 get_journal_list(jl);
932 } 996 if (flushall) {
933 } 997 if (flush_older_commits(s, jl) == 1) {
934 998 /* list disappeared during flush_older_commits. return */
935 /* make sure nobody is trying to flush this one at the same time */ 999 goto put_jl;
936 down(&jl->j_commit_lock); 1000 }
937 if (!journal_list_still_alive(s, trans_id)) { 1001 }
938 up(&jl->j_commit_lock); 1002
939 goto put_jl; 1003 /* make sure nobody is trying to flush this one at the same time */
940 } 1004 down(&jl->j_commit_lock);
941 BUG_ON (jl->j_trans_id == 0); 1005 if (!journal_list_still_alive(s, trans_id)) {
942 1006 up(&jl->j_commit_lock);
943 /* this commit is done, exit */ 1007 goto put_jl;
944 if (atomic_read(&(jl->j_commit_left)) <= 0) { 1008 }
945 if (flushall) { 1009 BUG_ON(jl->j_trans_id == 0);
946 atomic_set(&(jl->j_older_commits_done), 1) ; 1010
947 } 1011 /* this commit is done, exit */
948 up(&jl->j_commit_lock); 1012 if (atomic_read(&(jl->j_commit_left)) <= 0) {
949 goto put_jl; 1013 if (flushall) {
950 } 1014 atomic_set(&(jl->j_older_commits_done), 1);
951 1015 }
952 if (!list_empty(&jl->j_bh_list)) { 1016 up(&jl->j_commit_lock);
953 unlock_kernel(); 1017 goto put_jl;
954 write_ordered_buffers(&journal->j_dirty_buffers_lock, 1018 }
955 journal, jl, &jl->j_bh_list); 1019
956 lock_kernel(); 1020 if (!list_empty(&jl->j_bh_list)) {
957 } 1021 unlock_kernel();
958 BUG_ON (!list_empty(&jl->j_bh_list)); 1022 write_ordered_buffers(&journal->j_dirty_buffers_lock,
959 /* 1023 journal, jl, &jl->j_bh_list);
960 * for the description block and all the log blocks, submit any buffers 1024 lock_kernel();
961 * that haven't already reached the disk 1025 }
962 */ 1026 BUG_ON(!list_empty(&jl->j_bh_list));
963 atomic_inc(&journal->j_async_throttle); 1027 /*
964 for (i = 0 ; i < (jl->j_len + 1) ; i++) { 1028 * for the description block and all the log blocks, submit any buffers
965 bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + (jl->j_start+i) % 1029 * that haven't already reached the disk
966 SB_ONDISK_JOURNAL_SIZE(s); 1030 */
967 tbh = journal_find_get_block(s, bn) ; 1031 atomic_inc(&journal->j_async_throttle);
968 if (buffer_dirty(tbh)) /* redundant, ll_rw_block() checks */ 1032 for (i = 0; i < (jl->j_len + 1); i++) {
969 ll_rw_block(WRITE, 1, &tbh) ; 1033 bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + (jl->j_start + i) %
970 put_bh(tbh) ; 1034 SB_ONDISK_JOURNAL_SIZE(s);
971 } 1035 tbh = journal_find_get_block(s, bn);
972 atomic_dec(&journal->j_async_throttle); 1036 if (buffer_dirty(tbh)) /* redundant, ll_rw_block() checks */
973 1037 ll_rw_block(WRITE, 1, &tbh);
974 /* wait on everything written so far before writing the commit 1038 put_bh(tbh);
975 * if we are in barrier mode, send the commit down now 1039 }
976 */ 1040 atomic_dec(&journal->j_async_throttle);
977 barrier = reiserfs_barrier_flush(s); 1041
978 if (barrier) { 1042 /* wait on everything written so far before writing the commit
979 int ret; 1043 * if we are in barrier mode, send the commit down now
980 lock_buffer(jl->j_commit_bh); 1044 */
981 ret = submit_barrier_buffer(jl->j_commit_bh); 1045 barrier = reiserfs_barrier_flush(s);
982 if (ret == -EOPNOTSUPP) { 1046 if (barrier) {
983 set_buffer_uptodate(jl->j_commit_bh); 1047 int ret;
984 disable_barrier(s); 1048 lock_buffer(jl->j_commit_bh);
985 barrier = 0; 1049 ret = submit_barrier_buffer(jl->j_commit_bh);
986 } 1050 if (ret == -EOPNOTSUPP) {
987 } 1051 set_buffer_uptodate(jl->j_commit_bh);
988 for (i = 0 ; i < (jl->j_len + 1) ; i++) { 1052 disable_barrier(s);
989 bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + 1053 barrier = 0;
990 (jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s) ; 1054 }
991 tbh = journal_find_get_block(s, bn) ; 1055 }
992 wait_on_buffer(tbh) ; 1056 for (i = 0; i < (jl->j_len + 1); i++) {
993 // since we're using ll_rw_blk above, it might have skipped over 1057 bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) +
994 // a locked buffer. Double check here 1058 (jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s);
995 // 1059 tbh = journal_find_get_block(s, bn);
996 if (buffer_dirty(tbh)) /* redundant, sync_dirty_buffer() checks */ 1060 wait_on_buffer(tbh);
997 sync_dirty_buffer(tbh); 1061 // since we're using ll_rw_blk above, it might have skipped over
998 if (unlikely (!buffer_uptodate(tbh))) { 1062 // a locked buffer. Double check here
1063 //
1064 if (buffer_dirty(tbh)) /* redundant, sync_dirty_buffer() checks */
1065 sync_dirty_buffer(tbh);
1066 if (unlikely(!buffer_uptodate(tbh))) {
999#ifdef CONFIG_REISERFS_CHECK 1067#ifdef CONFIG_REISERFS_CHECK
1000 reiserfs_warning(s, "journal-601, buffer write failed") ; 1068 reiserfs_warning(s, "journal-601, buffer write failed");
1001#endif 1069#endif
1002 retval = -EIO; 1070 retval = -EIO;
1003 } 1071 }
1004 put_bh(tbh) ; /* once for journal_find_get_block */ 1072 put_bh(tbh); /* once for journal_find_get_block */
1005 put_bh(tbh) ; /* once due to original getblk in do_journal_end */ 1073 put_bh(tbh); /* once due to original getblk in do_journal_end */
1006 atomic_dec(&(jl->j_commit_left)) ; 1074 atomic_dec(&(jl->j_commit_left));
1007 } 1075 }
1008 1076
1009 BUG_ON (atomic_read(&(jl->j_commit_left)) != 1); 1077 BUG_ON(atomic_read(&(jl->j_commit_left)) != 1);
1010 1078
1011 if (!barrier) { 1079 if (!barrier) {
1012 if (buffer_dirty(jl->j_commit_bh)) 1080 if (buffer_dirty(jl->j_commit_bh))
1013 BUG(); 1081 BUG();
1014 mark_buffer_dirty(jl->j_commit_bh) ; 1082 mark_buffer_dirty(jl->j_commit_bh);
1015 sync_dirty_buffer(jl->j_commit_bh) ; 1083 sync_dirty_buffer(jl->j_commit_bh);
1016 } else 1084 } else
1017 wait_on_buffer(jl->j_commit_bh); 1085 wait_on_buffer(jl->j_commit_bh);
1018 1086
1019 check_barrier_completion(s, jl->j_commit_bh); 1087 check_barrier_completion(s, jl->j_commit_bh);
1020 1088
1021 /* If there was a write error in the journal - we can't commit this 1089 /* If there was a write error in the journal - we can't commit this
1022 * transaction - it will be invalid and, if successful, will just end 1090 * transaction - it will be invalid and, if successful, will just end
1023 * up propogating the write error out to the filesystem. */ 1091 * up propogating the write error out to the filesystem. */
1024 if (unlikely (!buffer_uptodate(jl->j_commit_bh))) { 1092 if (unlikely(!buffer_uptodate(jl->j_commit_bh))) {
1025#ifdef CONFIG_REISERFS_CHECK 1093#ifdef CONFIG_REISERFS_CHECK
1026 reiserfs_warning(s, "journal-615: buffer write failed") ; 1094 reiserfs_warning(s, "journal-615: buffer write failed");
1027#endif 1095#endif
1028 retval = -EIO; 1096 retval = -EIO;
1029 } 1097 }
1030 bforget(jl->j_commit_bh) ; 1098 bforget(jl->j_commit_bh);
1031 if (journal->j_last_commit_id != 0 && 1099 if (journal->j_last_commit_id != 0 &&
1032 (jl->j_trans_id - journal->j_last_commit_id) != 1) { 1100 (jl->j_trans_id - journal->j_last_commit_id) != 1) {
1033 reiserfs_warning(s, "clm-2200: last commit %lu, current %lu", 1101 reiserfs_warning(s, "clm-2200: last commit %lu, current %lu",
1034 journal->j_last_commit_id, 1102 journal->j_last_commit_id, jl->j_trans_id);
1035 jl->j_trans_id); 1103 }
1036 } 1104 journal->j_last_commit_id = jl->j_trans_id;
1037 journal->j_last_commit_id = jl->j_trans_id; 1105
1038 1106 /* now, every commit block is on the disk. It is safe to allow blocks freed during this transaction to be reallocated */
1039 /* now, every commit block is on the disk. It is safe to allow blocks freed during this transaction to be reallocated */ 1107 cleanup_freed_for_journal_list(s, jl);
1040 cleanup_freed_for_journal_list(s, jl) ; 1108
1041 1109 retval = retval ? retval : journal->j_errno;
1042 retval = retval ? retval : journal->j_errno; 1110
1043 1111 /* mark the metadata dirty */
1044 /* mark the metadata dirty */ 1112 if (!retval)
1045 if (!retval) 1113 dirty_one_transaction(s, jl);
1046 dirty_one_transaction(s, jl); 1114 atomic_dec(&(jl->j_commit_left));
1047 atomic_dec(&(jl->j_commit_left)) ; 1115
1048 1116 if (flushall) {
1049 if (flushall) { 1117 atomic_set(&(jl->j_older_commits_done), 1);
1050 atomic_set(&(jl->j_older_commits_done), 1) ; 1118 }
1051 } 1119 up(&jl->j_commit_lock);
1052 up(&jl->j_commit_lock); 1120 put_jl:
1053put_jl: 1121 put_journal_list(s, jl);
1054 put_journal_list(s, jl); 1122
1055 1123 if (retval)
1056 if (retval) 1124 reiserfs_abort(s, retval, "Journal write error in %s",
1057 reiserfs_abort (s, retval, "Journal write error in %s", __FUNCTION__); 1125 __FUNCTION__);
1058 return retval; 1126 put_fs_excl();
1127 return retval;
1059} 1128}
1060 1129
1061/* 1130/*
1062** flush_journal_list frequently needs to find a newer transaction for a given block. This does that, or 1131** flush_journal_list frequently needs to find a newer transaction for a given block. This does that, or
1063** returns NULL if it can't find anything 1132** returns NULL if it can't find anything
1064*/ 1133*/
1065static struct reiserfs_journal_list *find_newer_jl_for_cn(struct reiserfs_journal_cnode *cn) { 1134static struct reiserfs_journal_list *find_newer_jl_for_cn(struct
1066 struct super_block *sb = cn->sb; 1135 reiserfs_journal_cnode
1067 b_blocknr_t blocknr = cn->blocknr ; 1136 *cn)
1137{
1138 struct super_block *sb = cn->sb;
1139 b_blocknr_t blocknr = cn->blocknr;
1068 1140
1069 cn = cn->hprev ; 1141 cn = cn->hprev;
1070 while(cn) { 1142 while (cn) {
1071 if (cn->sb == sb && cn->blocknr == blocknr && cn->jlist) { 1143 if (cn->sb == sb && cn->blocknr == blocknr && cn->jlist) {
1072 return cn->jlist ; 1144 return cn->jlist;
1073 } 1145 }
1074 cn = cn->hprev ; 1146 cn = cn->hprev;
1075 } 1147 }
1076 return NULL ; 1148 return NULL;
1077} 1149}
1078 1150
1079static void remove_journal_hash(struct super_block *, struct reiserfs_journal_cnode **, 1151static void remove_journal_hash(struct super_block *,
1080struct reiserfs_journal_list *, unsigned long, int); 1152 struct reiserfs_journal_cnode **,
1153 struct reiserfs_journal_list *, unsigned long,
1154 int);
1081 1155
1082/* 1156/*
1083** once all the real blocks have been flushed, it is safe to remove them from the 1157** once all the real blocks have been flushed, it is safe to remove them from the
1084** journal list for this transaction. Aside from freeing the cnode, this also allows the 1158** journal list for this transaction. Aside from freeing the cnode, this also allows the
1085** block to be reallocated for data blocks if it had been deleted. 1159** block to be reallocated for data blocks if it had been deleted.
1086*/ 1160*/
1087static void remove_all_from_journal_list(struct super_block *p_s_sb, struct reiserfs_journal_list *jl, int debug) { 1161static void remove_all_from_journal_list(struct super_block *p_s_sb,
1088 struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); 1162 struct reiserfs_journal_list *jl,
1089 struct reiserfs_journal_cnode *cn, *last ; 1163 int debug)
1090 cn = jl->j_realblock ; 1164{
1091 1165 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
1092 /* which is better, to lock once around the whole loop, or 1166 struct reiserfs_journal_cnode *cn, *last;
1093 ** to lock for each call to remove_journal_hash? 1167 cn = jl->j_realblock;
1094 */ 1168
1095 while(cn) { 1169 /* which is better, to lock once around the whole loop, or
1096 if (cn->blocknr != 0) { 1170 ** to lock for each call to remove_journal_hash?
1097 if (debug) { 1171 */
1098 reiserfs_warning (p_s_sb, "block %u, bh is %d, state %ld", cn->blocknr, 1172 while (cn) {
1099 cn->bh ? 1: 0, cn->state) ; 1173 if (cn->blocknr != 0) {
1100 } 1174 if (debug) {
1101 cn->state = 0 ; 1175 reiserfs_warning(p_s_sb,
1102 remove_journal_hash(p_s_sb, journal->j_list_hash_table, jl, cn->blocknr, 1) ; 1176 "block %u, bh is %d, state %ld",
1103 } 1177 cn->blocknr, cn->bh ? 1 : 0,
1104 last = cn ; 1178 cn->state);
1105 cn = cn->next ; 1179 }
1106 free_cnode(p_s_sb, last) ; 1180 cn->state = 0;
1107 } 1181 remove_journal_hash(p_s_sb, journal->j_list_hash_table,
1108 jl->j_realblock = NULL ; 1182 jl, cn->blocknr, 1);
1183 }
1184 last = cn;
1185 cn = cn->next;
1186 free_cnode(p_s_sb, last);
1187 }
1188 jl->j_realblock = NULL;
1109} 1189}
1110 1190
1111/* 1191/*
@@ -1115,98 +1195,107 @@ static void remove_all_from_journal_list(struct super_block *p_s_sb, struct reis
1115** called by flush_journal_list, before it calls remove_all_from_journal_list 1195** called by flush_journal_list, before it calls remove_all_from_journal_list
1116** 1196**
1117*/ 1197*/
1118static int _update_journal_header_block(struct super_block *p_s_sb, unsigned long offset, unsigned long trans_id) { 1198static int _update_journal_header_block(struct super_block *p_s_sb,
1119 struct reiserfs_journal_header *jh ; 1199 unsigned long offset,
1120 struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); 1200 unsigned long trans_id)
1201{
1202 struct reiserfs_journal_header *jh;
1203 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
1121 1204
1122 if (reiserfs_is_journal_aborted (journal)) 1205 if (reiserfs_is_journal_aborted(journal))
1123 return -EIO; 1206 return -EIO;
1124 1207
1125 if (trans_id >= journal->j_last_flush_trans_id) { 1208 if (trans_id >= journal->j_last_flush_trans_id) {
1126 if (buffer_locked((journal->j_header_bh))) { 1209 if (buffer_locked((journal->j_header_bh))) {
1127 wait_on_buffer((journal->j_header_bh)) ; 1210 wait_on_buffer((journal->j_header_bh));
1128 if (unlikely (!buffer_uptodate(journal->j_header_bh))) { 1211 if (unlikely(!buffer_uptodate(journal->j_header_bh))) {
1129#ifdef CONFIG_REISERFS_CHECK 1212#ifdef CONFIG_REISERFS_CHECK
1130 reiserfs_warning (p_s_sb, "journal-699: buffer write failed") ; 1213 reiserfs_warning(p_s_sb,
1214 "journal-699: buffer write failed");
1131#endif 1215#endif
1132 return -EIO; 1216 return -EIO;
1133 } 1217 }
1134 } 1218 }
1135 journal->j_last_flush_trans_id = trans_id ; 1219 journal->j_last_flush_trans_id = trans_id;
1136 journal->j_first_unflushed_offset = offset ; 1220 journal->j_first_unflushed_offset = offset;
1137 jh = (struct reiserfs_journal_header *)(journal->j_header_bh->b_data) ; 1221 jh = (struct reiserfs_journal_header *)(journal->j_header_bh->
1138 jh->j_last_flush_trans_id = cpu_to_le32(trans_id) ; 1222 b_data);
1139 jh->j_first_unflushed_offset = cpu_to_le32(offset) ; 1223 jh->j_last_flush_trans_id = cpu_to_le32(trans_id);
1140 jh->j_mount_id = cpu_to_le32(journal->j_mount_id) ; 1224 jh->j_first_unflushed_offset = cpu_to_le32(offset);
1141 1225 jh->j_mount_id = cpu_to_le32(journal->j_mount_id);
1142 if (reiserfs_barrier_flush(p_s_sb)) { 1226
1143 int ret; 1227 if (reiserfs_barrier_flush(p_s_sb)) {
1144 lock_buffer(journal->j_header_bh); 1228 int ret;
1145 ret = submit_barrier_buffer(journal->j_header_bh); 1229 lock_buffer(journal->j_header_bh);
1146 if (ret == -EOPNOTSUPP) { 1230 ret = submit_barrier_buffer(journal->j_header_bh);
1147 set_buffer_uptodate(journal->j_header_bh); 1231 if (ret == -EOPNOTSUPP) {
1148 disable_barrier(p_s_sb); 1232 set_buffer_uptodate(journal->j_header_bh);
1149 goto sync; 1233 disable_barrier(p_s_sb);
1150 } 1234 goto sync;
1151 wait_on_buffer(journal->j_header_bh); 1235 }
1152 check_barrier_completion(p_s_sb, journal->j_header_bh); 1236 wait_on_buffer(journal->j_header_bh);
1153 } else { 1237 check_barrier_completion(p_s_sb, journal->j_header_bh);
1154sync: 1238 } else {
1155 set_buffer_dirty(journal->j_header_bh) ; 1239 sync:
1156 sync_dirty_buffer(journal->j_header_bh) ; 1240 set_buffer_dirty(journal->j_header_bh);
1157 } 1241 sync_dirty_buffer(journal->j_header_bh);
1158 if (!buffer_uptodate(journal->j_header_bh)) { 1242 }
1159 reiserfs_warning (p_s_sb, "journal-837: IO error during journal replay"); 1243 if (!buffer_uptodate(journal->j_header_bh)) {
1160 return -EIO ; 1244 reiserfs_warning(p_s_sb,
1161 } 1245 "journal-837: IO error during journal replay");
1162 } 1246 return -EIO;
1163 return 0 ; 1247 }
1164} 1248 }
1165 1249 return 0;
1166static int update_journal_header_block(struct super_block *p_s_sb,
1167 unsigned long offset,
1168 unsigned long trans_id) {
1169 return _update_journal_header_block(p_s_sb, offset, trans_id);
1170} 1250}
1251
1252static int update_journal_header_block(struct super_block *p_s_sb,
1253 unsigned long offset,
1254 unsigned long trans_id)
1255{
1256 return _update_journal_header_block(p_s_sb, offset, trans_id);
1257}
1258
1171/* 1259/*
1172** flush any and all journal lists older than you are 1260** flush any and all journal lists older than you are
1173** can only be called from flush_journal_list 1261** can only be called from flush_journal_list
1174*/ 1262*/
1175static int flush_older_journal_lists(struct super_block *p_s_sb, 1263static int flush_older_journal_lists(struct super_block *p_s_sb,
1176 struct reiserfs_journal_list *jl) 1264 struct reiserfs_journal_list *jl)
1177{ 1265{
1178 struct list_head *entry; 1266 struct list_head *entry;
1179 struct reiserfs_journal_list *other_jl ; 1267 struct reiserfs_journal_list *other_jl;
1180 struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); 1268 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
1181 unsigned long trans_id = jl->j_trans_id; 1269 unsigned long trans_id = jl->j_trans_id;
1182 1270
1183 /* we know we are the only ones flushing things, no extra race 1271 /* we know we are the only ones flushing things, no extra race
1184 * protection is required. 1272 * protection is required.
1185 */ 1273 */
1186restart: 1274 restart:
1187 entry = journal->j_journal_list.next; 1275 entry = journal->j_journal_list.next;
1188 /* Did we wrap? */ 1276 /* Did we wrap? */
1189 if (entry == &journal->j_journal_list) 1277 if (entry == &journal->j_journal_list)
1190 return 0; 1278 return 0;
1191 other_jl = JOURNAL_LIST_ENTRY(entry); 1279 other_jl = JOURNAL_LIST_ENTRY(entry);
1192 if (other_jl->j_trans_id < trans_id) { 1280 if (other_jl->j_trans_id < trans_id) {
1193 BUG_ON (other_jl->j_refcount <= 0); 1281 BUG_ON(other_jl->j_refcount <= 0);
1194 /* do not flush all */ 1282 /* do not flush all */
1195 flush_journal_list(p_s_sb, other_jl, 0) ; 1283 flush_journal_list(p_s_sb, other_jl, 0);
1196 1284
1197 /* other_jl is now deleted from the list */ 1285 /* other_jl is now deleted from the list */
1198 goto restart; 1286 goto restart;
1199 } 1287 }
1200 return 0 ; 1288 return 0;
1201} 1289}
1202 1290
1203static void del_from_work_list(struct super_block *s, 1291static void del_from_work_list(struct super_block *s,
1204 struct reiserfs_journal_list *jl) { 1292 struct reiserfs_journal_list *jl)
1205 struct reiserfs_journal *journal = SB_JOURNAL (s); 1293{
1206 if (!list_empty(&jl->j_working_list)) { 1294 struct reiserfs_journal *journal = SB_JOURNAL(s);
1207 list_del_init(&jl->j_working_list); 1295 if (!list_empty(&jl->j_working_list)) {
1208 journal->j_num_work_lists--; 1296 list_del_init(&jl->j_working_list);
1209 } 1297 journal->j_num_work_lists--;
1298 }
1210} 1299}
1211 1300
1212/* flush a journal list, both commit and real blocks 1301/* flush a journal list, both commit and real blocks
@@ -1218,383 +1307,407 @@ static void del_from_work_list(struct super_block *s,
1218** and the journal is locked. That means it can only be called from 1307** and the journal is locked. That means it can only be called from
1219** do_journal_end, or by journal_release 1308** do_journal_end, or by journal_release
1220*/ 1309*/
1221static int flush_journal_list(struct super_block *s, 1310static int flush_journal_list(struct super_block *s,
1222 struct reiserfs_journal_list *jl, int flushall) { 1311 struct reiserfs_journal_list *jl, int flushall)
1223 struct reiserfs_journal_list *pjl ; 1312{
1224 struct reiserfs_journal_cnode *cn, *last ; 1313 struct reiserfs_journal_list *pjl;
1225 int count ; 1314 struct reiserfs_journal_cnode *cn, *last;
1226 int was_jwait = 0 ; 1315 int count;
1227 int was_dirty = 0 ; 1316 int was_jwait = 0;
1228 struct buffer_head *saved_bh ; 1317 int was_dirty = 0;
1229 unsigned long j_len_saved = jl->j_len ; 1318 struct buffer_head *saved_bh;
1230 struct reiserfs_journal *journal = SB_JOURNAL (s); 1319 unsigned long j_len_saved = jl->j_len;
1231 int err = 0; 1320 struct reiserfs_journal *journal = SB_JOURNAL(s);
1232 1321 int err = 0;
1233 BUG_ON (j_len_saved <= 0); 1322
1234 1323 BUG_ON(j_len_saved <= 0);
1235 if (atomic_read(&journal->j_wcount) != 0) { 1324
1236 reiserfs_warning(s, "clm-2048: flush_journal_list called with wcount %d", 1325 if (atomic_read(&journal->j_wcount) != 0) {
1237 atomic_read(&journal->j_wcount)) ; 1326 reiserfs_warning(s,
1238 } 1327 "clm-2048: flush_journal_list called with wcount %d",
1239 BUG_ON (jl->j_trans_id == 0); 1328 atomic_read(&journal->j_wcount));
1240 1329 }
1241 /* if flushall == 0, the lock is already held */ 1330 BUG_ON(jl->j_trans_id == 0);
1242 if (flushall) { 1331
1243 down(&journal->j_flush_sem); 1332 /* if flushall == 0, the lock is already held */
1244 } else if (!down_trylock(&journal->j_flush_sem)) { 1333 if (flushall) {
1245 BUG(); 1334 down(&journal->j_flush_sem);
1246 } 1335 } else if (!down_trylock(&journal->j_flush_sem)) {
1247 1336 BUG();
1248 count = 0 ; 1337 }
1249 if (j_len_saved > journal->j_trans_max) { 1338
1250 reiserfs_panic(s, "journal-715: flush_journal_list, length is %lu, trans id %lu\n", j_len_saved, jl->j_trans_id); 1339 count = 0;
1251 return 0 ; 1340 if (j_len_saved > journal->j_trans_max) {
1252 } 1341 reiserfs_panic(s,
1253 1342 "journal-715: flush_journal_list, length is %lu, trans id %lu\n",
1254 /* if all the work is already done, get out of here */ 1343 j_len_saved, jl->j_trans_id);
1255 if (atomic_read(&(jl->j_nonzerolen)) <= 0 && 1344 return 0;
1256 atomic_read(&(jl->j_commit_left)) <= 0) { 1345 }
1257 goto flush_older_and_return ; 1346
1258 } 1347 get_fs_excl();
1259 1348
1260 /* start by putting the commit list on disk. This will also flush 1349 /* if all the work is already done, get out of here */
1261 ** the commit lists of any olders transactions 1350 if (atomic_read(&(jl->j_nonzerolen)) <= 0 &&
1262 */ 1351 atomic_read(&(jl->j_commit_left)) <= 0) {
1263 flush_commit_list(s, jl, 1) ; 1352 goto flush_older_and_return;
1264 1353 }
1265 if (!(jl->j_state & LIST_DIRTY) && !reiserfs_is_journal_aborted (journal)) 1354
1266 BUG(); 1355 /* start by putting the commit list on disk. This will also flush
1267 1356 ** the commit lists of any olders transactions
1268 /* are we done now? */ 1357 */
1269 if (atomic_read(&(jl->j_nonzerolen)) <= 0 && 1358 flush_commit_list(s, jl, 1);
1270 atomic_read(&(jl->j_commit_left)) <= 0) { 1359
1271 goto flush_older_and_return ; 1360 if (!(jl->j_state & LIST_DIRTY)
1272 } 1361 && !reiserfs_is_journal_aborted(journal))
1273 1362 BUG();
1274 /* loop through each cnode, see if we need to write it, 1363
1275 ** or wait on a more recent transaction, or just ignore it 1364 /* are we done now? */
1276 */ 1365 if (atomic_read(&(jl->j_nonzerolen)) <= 0 &&
1277 if (atomic_read(&(journal->j_wcount)) != 0) { 1366 atomic_read(&(jl->j_commit_left)) <= 0) {
1278 reiserfs_panic(s, "journal-844: panic journal list is flushing, wcount is not 0\n") ; 1367 goto flush_older_and_return;
1279 } 1368 }
1280 cn = jl->j_realblock ; 1369
1281 while(cn) { 1370 /* loop through each cnode, see if we need to write it,
1282 was_jwait = 0 ; 1371 ** or wait on a more recent transaction, or just ignore it
1283 was_dirty = 0 ; 1372 */
1284 saved_bh = NULL ; 1373 if (atomic_read(&(journal->j_wcount)) != 0) {
1285 /* blocknr of 0 is no longer in the hash, ignore it */ 1374 reiserfs_panic(s,
1286 if (cn->blocknr == 0) { 1375 "journal-844: panic journal list is flushing, wcount is not 0\n");
1287 goto free_cnode ; 1376 }
1288 } 1377 cn = jl->j_realblock;
1289 1378 while (cn) {
1290 /* This transaction failed commit. Don't write out to the disk */ 1379 was_jwait = 0;
1291 if (!(jl->j_state & LIST_DIRTY)) 1380 was_dirty = 0;
1292 goto free_cnode; 1381 saved_bh = NULL;
1293 1382 /* blocknr of 0 is no longer in the hash, ignore it */
1294 pjl = find_newer_jl_for_cn(cn) ; 1383 if (cn->blocknr == 0) {
1295 /* the order is important here. We check pjl to make sure we 1384 goto free_cnode;
1296 ** don't clear BH_JDirty_wait if we aren't the one writing this 1385 }
1297 ** block to disk 1386
1298 */ 1387 /* This transaction failed commit. Don't write out to the disk */
1299 if (!pjl && cn->bh) { 1388 if (!(jl->j_state & LIST_DIRTY))
1300 saved_bh = cn->bh ; 1389 goto free_cnode;
1301 1390
1302 /* we do this to make sure nobody releases the buffer while 1391 pjl = find_newer_jl_for_cn(cn);
1303 ** we are working with it 1392 /* the order is important here. We check pjl to make sure we
1304 */ 1393 ** don't clear BH_JDirty_wait if we aren't the one writing this
1305 get_bh(saved_bh) ; 1394 ** block to disk
1306 1395 */
1307 if (buffer_journal_dirty(saved_bh)) { 1396 if (!pjl && cn->bh) {
1308 BUG_ON (!can_dirty (cn)); 1397 saved_bh = cn->bh;
1309 was_jwait = 1 ; 1398
1310 was_dirty = 1 ; 1399 /* we do this to make sure nobody releases the buffer while
1311 } else if (can_dirty(cn)) { 1400 ** we are working with it
1312 /* everything with !pjl && jwait should be writable */ 1401 */
1313 BUG(); 1402 get_bh(saved_bh);
1314 } 1403
1315 } 1404 if (buffer_journal_dirty(saved_bh)) {
1316 1405 BUG_ON(!can_dirty(cn));
1317 /* if someone has this block in a newer transaction, just make 1406 was_jwait = 1;
1318 ** sure they are commited, and don't try writing it to disk 1407 was_dirty = 1;
1319 */ 1408 } else if (can_dirty(cn)) {
1320 if (pjl) { 1409 /* everything with !pjl && jwait should be writable */
1321 if (atomic_read(&pjl->j_commit_left)) 1410 BUG();
1322 flush_commit_list(s, pjl, 1) ; 1411 }
1323 goto free_cnode ; 1412 }
1324 } 1413
1325 1414 /* if someone has this block in a newer transaction, just make
1326 /* bh == NULL when the block got to disk on its own, OR, 1415 ** sure they are commited, and don't try writing it to disk
1327 ** the block got freed in a future transaction 1416 */
1328 */ 1417 if (pjl) {
1329 if (saved_bh == NULL) { 1418 if (atomic_read(&pjl->j_commit_left))
1330 goto free_cnode ; 1419 flush_commit_list(s, pjl, 1);
1331 } 1420 goto free_cnode;
1332 1421 }
1333 /* this should never happen. kupdate_one_transaction has this list 1422
1334 ** locked while it works, so we should never see a buffer here that 1423 /* bh == NULL when the block got to disk on its own, OR,
1335 ** is not marked JDirty_wait 1424 ** the block got freed in a future transaction
1336 */ 1425 */
1337 if ((!was_jwait) && !buffer_locked(saved_bh)) { 1426 if (saved_bh == NULL) {
1338 reiserfs_warning (s, "journal-813: BAD! buffer %llu %cdirty %cjwait, " 1427 goto free_cnode;
1339 "not in a newer tranasction", 1428 }
1340 (unsigned long long)saved_bh->b_blocknr, 1429
1341 was_dirty ? ' ' : '!', was_jwait ? ' ' : '!') ; 1430 /* this should never happen. kupdate_one_transaction has this list
1342 } 1431 ** locked while it works, so we should never see a buffer here that
1343 if (was_dirty) { 1432 ** is not marked JDirty_wait
1344 /* we inc again because saved_bh gets decremented at free_cnode */ 1433 */
1345 get_bh(saved_bh) ; 1434 if ((!was_jwait) && !buffer_locked(saved_bh)) {
1346 set_bit(BLOCK_NEEDS_FLUSH, &cn->state) ; 1435 reiserfs_warning(s,
1347 lock_buffer(saved_bh); 1436 "journal-813: BAD! buffer %llu %cdirty %cjwait, "
1348 BUG_ON (cn->blocknr != saved_bh->b_blocknr); 1437 "not in a newer tranasction",
1349 if (buffer_dirty(saved_bh)) 1438 (unsigned long long)saved_bh->
1350 submit_logged_buffer(saved_bh) ; 1439 b_blocknr, was_dirty ? ' ' : '!',
1351 else 1440 was_jwait ? ' ' : '!');
1352 unlock_buffer(saved_bh); 1441 }
1353 count++ ; 1442 if (was_dirty) {
1354 } else { 1443 /* we inc again because saved_bh gets decremented at free_cnode */
1355 reiserfs_warning (s, "clm-2082: Unable to flush buffer %llu in %s", 1444 get_bh(saved_bh);
1356 (unsigned long long)saved_bh->b_blocknr, __FUNCTION__); 1445 set_bit(BLOCK_NEEDS_FLUSH, &cn->state);
1357 } 1446 lock_buffer(saved_bh);
1358free_cnode: 1447 BUG_ON(cn->blocknr != saved_bh->b_blocknr);
1359 last = cn ; 1448 if (buffer_dirty(saved_bh))
1360 cn = cn->next ; 1449 submit_logged_buffer(saved_bh);
1361 if (saved_bh) { 1450 else
1362 /* we incremented this to keep others from taking the buffer head away */ 1451 unlock_buffer(saved_bh);
1363 put_bh(saved_bh) ; 1452 count++;
1364 if (atomic_read(&(saved_bh->b_count)) < 0) { 1453 } else {
1365 reiserfs_warning (s, "journal-945: saved_bh->b_count < 0"); 1454 reiserfs_warning(s,
1366 } 1455 "clm-2082: Unable to flush buffer %llu in %s",
1367 } 1456 (unsigned long long)saved_bh->
1368 } 1457 b_blocknr, __FUNCTION__);
1369 if (count > 0) { 1458 }
1370 cn = jl->j_realblock ; 1459 free_cnode:
1371 while(cn) { 1460 last = cn;
1372 if (test_bit(BLOCK_NEEDS_FLUSH, &cn->state)) { 1461 cn = cn->next;
1373 if (!cn->bh) { 1462 if (saved_bh) {
1374 reiserfs_panic(s, "journal-1011: cn->bh is NULL\n") ; 1463 /* we incremented this to keep others from taking the buffer head away */
1375 } 1464 put_bh(saved_bh);
1376 wait_on_buffer(cn->bh) ; 1465 if (atomic_read(&(saved_bh->b_count)) < 0) {
1377 if (!cn->bh) { 1466 reiserfs_warning(s,
1378 reiserfs_panic(s, "journal-1012: cn->bh is NULL\n") ; 1467 "journal-945: saved_bh->b_count < 0");
1379 } 1468 }
1380 if (unlikely (!buffer_uptodate(cn->bh))) { 1469 }
1470 }
1471 if (count > 0) {
1472 cn = jl->j_realblock;
1473 while (cn) {
1474 if (test_bit(BLOCK_NEEDS_FLUSH, &cn->state)) {
1475 if (!cn->bh) {
1476 reiserfs_panic(s,
1477 "journal-1011: cn->bh is NULL\n");
1478 }
1479 wait_on_buffer(cn->bh);
1480 if (!cn->bh) {
1481 reiserfs_panic(s,
1482 "journal-1012: cn->bh is NULL\n");
1483 }
1484 if (unlikely(!buffer_uptodate(cn->bh))) {
1381#ifdef CONFIG_REISERFS_CHECK 1485#ifdef CONFIG_REISERFS_CHECK
1382 reiserfs_warning(s, "journal-949: buffer write failed\n") ; 1486 reiserfs_warning(s,
1487 "journal-949: buffer write failed\n");
1383#endif 1488#endif
1384 err = -EIO; 1489 err = -EIO;
1385 } 1490 }
1386 /* note, we must clear the JDirty_wait bit after the up to date 1491 /* note, we must clear the JDirty_wait bit after the up to date
1387 ** check, otherwise we race against our flushpage routine 1492 ** check, otherwise we race against our flushpage routine
1388 */ 1493 */
1389 BUG_ON (!test_clear_buffer_journal_dirty (cn->bh)); 1494 BUG_ON(!test_clear_buffer_journal_dirty
1390 1495 (cn->bh));
1391 /* undo the inc from journal_mark_dirty */ 1496
1392 put_bh(cn->bh) ; 1497 /* undo the inc from journal_mark_dirty */
1393 brelse(cn->bh) ; 1498 put_bh(cn->bh);
1394 } 1499 brelse(cn->bh);
1395 cn = cn->next ; 1500 }
1396 } 1501 cn = cn->next;
1397 } 1502 }
1398 1503 }
1399 if (err) 1504
1400 reiserfs_abort (s, -EIO, "Write error while pushing transaction to disk in %s", __FUNCTION__); 1505 if (err)
1401flush_older_and_return: 1506 reiserfs_abort(s, -EIO,
1402 1507 "Write error while pushing transaction to disk in %s",
1403 1508 __FUNCTION__);
1404 /* before we can update the journal header block, we _must_ flush all 1509 flush_older_and_return:
1405 ** real blocks from all older transactions to disk. This is because 1510
1406 ** once the header block is updated, this transaction will not be 1511 /* before we can update the journal header block, we _must_ flush all
1407 ** replayed after a crash 1512 ** real blocks from all older transactions to disk. This is because
1408 */ 1513 ** once the header block is updated, this transaction will not be
1409 if (flushall) { 1514 ** replayed after a crash
1410 flush_older_journal_lists(s, jl); 1515 */
1411 } 1516 if (flushall) {
1412 1517 flush_older_journal_lists(s, jl);
1413 err = journal->j_errno; 1518 }
1414 /* before we can remove everything from the hash tables for this 1519
1415 ** transaction, we must make sure it can never be replayed 1520 err = journal->j_errno;
1416 ** 1521 /* before we can remove everything from the hash tables for this
1417 ** since we are only called from do_journal_end, we know for sure there 1522 ** transaction, we must make sure it can never be replayed
1418 ** are no allocations going on while we are flushing journal lists. So, 1523 **
1419 ** we only need to update the journal header block for the last list 1524 ** since we are only called from do_journal_end, we know for sure there
1420 ** being flushed 1525 ** are no allocations going on while we are flushing journal lists. So,
1421 */ 1526 ** we only need to update the journal header block for the last list
1422 if (!err && flushall) { 1527 ** being flushed
1423 err = update_journal_header_block(s, (jl->j_start + jl->j_len + 2) % SB_ONDISK_JOURNAL_SIZE(s), jl->j_trans_id) ; 1528 */
1424 if (err) 1529 if (!err && flushall) {
1425 reiserfs_abort (s, -EIO, "Write error while updating journal header in %s", __FUNCTION__); 1530 err =
1426 } 1531 update_journal_header_block(s,
1427 remove_all_from_journal_list(s, jl, 0) ; 1532 (jl->j_start + jl->j_len +
1428 list_del_init(&jl->j_list); 1533 2) % SB_ONDISK_JOURNAL_SIZE(s),
1429 journal->j_num_lists--; 1534 jl->j_trans_id);
1430 del_from_work_list(s, jl); 1535 if (err)
1431 1536 reiserfs_abort(s, -EIO,
1432 if (journal->j_last_flush_id != 0 && 1537 "Write error while updating journal header in %s",
1433 (jl->j_trans_id - journal->j_last_flush_id) != 1) { 1538 __FUNCTION__);
1434 reiserfs_warning(s, "clm-2201: last flush %lu, current %lu", 1539 }
1435 journal->j_last_flush_id, 1540 remove_all_from_journal_list(s, jl, 0);
1436 jl->j_trans_id); 1541 list_del_init(&jl->j_list);
1437 } 1542 journal->j_num_lists--;
1438 journal->j_last_flush_id = jl->j_trans_id; 1543 del_from_work_list(s, jl);
1439 1544
1440 /* not strictly required since we are freeing the list, but it should 1545 if (journal->j_last_flush_id != 0 &&
1441 * help find code using dead lists later on 1546 (jl->j_trans_id - journal->j_last_flush_id) != 1) {
1442 */ 1547 reiserfs_warning(s, "clm-2201: last flush %lu, current %lu",
1443 jl->j_len = 0 ; 1548 journal->j_last_flush_id, jl->j_trans_id);
1444 atomic_set(&(jl->j_nonzerolen), 0) ; 1549 }
1445 jl->j_start = 0 ; 1550 journal->j_last_flush_id = jl->j_trans_id;
1446 jl->j_realblock = NULL ; 1551
1447 jl->j_commit_bh = NULL ; 1552 /* not strictly required since we are freeing the list, but it should
1448 jl->j_trans_id = 0 ; 1553 * help find code using dead lists later on
1449 jl->j_state = 0; 1554 */
1450 put_journal_list(s, jl); 1555 jl->j_len = 0;
1451 if (flushall) 1556 atomic_set(&(jl->j_nonzerolen), 0);
1452 up(&journal->j_flush_sem); 1557 jl->j_start = 0;
1453 return err ; 1558 jl->j_realblock = NULL;
1454} 1559 jl->j_commit_bh = NULL;
1560 jl->j_trans_id = 0;
1561 jl->j_state = 0;
1562 put_journal_list(s, jl);
1563 if (flushall)
1564 up(&journal->j_flush_sem);
1565 put_fs_excl();
1566 return err;
1567}
1455 1568
1456static int write_one_transaction(struct super_block *s, 1569static int write_one_transaction(struct super_block *s,
1457 struct reiserfs_journal_list *jl, 1570 struct reiserfs_journal_list *jl,
1458 struct buffer_chunk *chunk) 1571 struct buffer_chunk *chunk)
1459{ 1572{
1460 struct reiserfs_journal_cnode *cn; 1573 struct reiserfs_journal_cnode *cn;
1461 int ret = 0 ; 1574 int ret = 0;
1462 1575
1463 jl->j_state |= LIST_TOUCHED; 1576 jl->j_state |= LIST_TOUCHED;
1464 del_from_work_list(s, jl); 1577 del_from_work_list(s, jl);
1465 if (jl->j_len == 0 || atomic_read(&jl->j_nonzerolen) == 0) { 1578 if (jl->j_len == 0 || atomic_read(&jl->j_nonzerolen) == 0) {
1466 return 0; 1579 return 0;
1467 } 1580 }
1468 1581
1469 cn = jl->j_realblock ; 1582 cn = jl->j_realblock;
1470 while(cn) { 1583 while (cn) {
1471 /* if the blocknr == 0, this has been cleared from the hash, 1584 /* if the blocknr == 0, this has been cleared from the hash,
1472 ** skip it 1585 ** skip it
1473 */ 1586 */
1474 if (cn->blocknr == 0) { 1587 if (cn->blocknr == 0) {
1475 goto next ; 1588 goto next;
1476 } 1589 }
1477 if (cn->bh && can_dirty(cn) && buffer_dirty(cn->bh)) { 1590 if (cn->bh && can_dirty(cn) && buffer_dirty(cn->bh)) {
1478 struct buffer_head *tmp_bh; 1591 struct buffer_head *tmp_bh;
1479 /* we can race against journal_mark_freed when we try 1592 /* we can race against journal_mark_freed when we try
1480 * to lock_buffer(cn->bh), so we have to inc the buffer 1593 * to lock_buffer(cn->bh), so we have to inc the buffer
1481 * count, and recheck things after locking 1594 * count, and recheck things after locking
1482 */ 1595 */
1483 tmp_bh = cn->bh; 1596 tmp_bh = cn->bh;
1484 get_bh(tmp_bh); 1597 get_bh(tmp_bh);
1485 lock_buffer(tmp_bh); 1598 lock_buffer(tmp_bh);
1486 if (cn->bh && can_dirty(cn) && buffer_dirty(tmp_bh)) { 1599 if (cn->bh && can_dirty(cn) && buffer_dirty(tmp_bh)) {
1487 if (!buffer_journal_dirty(tmp_bh) || 1600 if (!buffer_journal_dirty(tmp_bh) ||
1488 buffer_journal_prepared(tmp_bh)) 1601 buffer_journal_prepared(tmp_bh))
1489 BUG(); 1602 BUG();
1490 add_to_chunk(chunk, tmp_bh, NULL, write_chunk); 1603 add_to_chunk(chunk, tmp_bh, NULL, write_chunk);
1491 ret++; 1604 ret++;
1492 } else { 1605 } else {
1493 /* note, cn->bh might be null now */ 1606 /* note, cn->bh might be null now */
1494 unlock_buffer(tmp_bh); 1607 unlock_buffer(tmp_bh);
1495 } 1608 }
1496 put_bh(tmp_bh); 1609 put_bh(tmp_bh);
1497 } 1610 }
1498next: 1611 next:
1499 cn = cn->next ; 1612 cn = cn->next;
1500 cond_resched(); 1613 cond_resched();
1501 } 1614 }
1502 return ret ; 1615 return ret;
1503} 1616}
1504 1617
1505/* used by flush_commit_list */ 1618/* used by flush_commit_list */
1506static int dirty_one_transaction(struct super_block *s, 1619static int dirty_one_transaction(struct super_block *s,
1507 struct reiserfs_journal_list *jl) 1620 struct reiserfs_journal_list *jl)
1508{ 1621{
1509 struct reiserfs_journal_cnode *cn; 1622 struct reiserfs_journal_cnode *cn;
1510 struct reiserfs_journal_list *pjl; 1623 struct reiserfs_journal_list *pjl;
1511 int ret = 0 ; 1624 int ret = 0;
1512 1625
1513 jl->j_state |= LIST_DIRTY; 1626 jl->j_state |= LIST_DIRTY;
1514 cn = jl->j_realblock ; 1627 cn = jl->j_realblock;
1515 while(cn) { 1628 while (cn) {
1516 /* look for a more recent transaction that logged this 1629 /* look for a more recent transaction that logged this
1517 ** buffer. Only the most recent transaction with a buffer in 1630 ** buffer. Only the most recent transaction with a buffer in
1518 ** it is allowed to send that buffer to disk 1631 ** it is allowed to send that buffer to disk
1519 */ 1632 */
1520 pjl = find_newer_jl_for_cn(cn) ; 1633 pjl = find_newer_jl_for_cn(cn);
1521 if (!pjl && cn->blocknr && cn->bh && buffer_journal_dirty(cn->bh)) 1634 if (!pjl && cn->blocknr && cn->bh
1522 { 1635 && buffer_journal_dirty(cn->bh)) {
1523 BUG_ON (!can_dirty(cn)); 1636 BUG_ON(!can_dirty(cn));
1524 /* if the buffer is prepared, it will either be logged 1637 /* if the buffer is prepared, it will either be logged
1525 * or restored. If restored, we need to make sure 1638 * or restored. If restored, we need to make sure
1526 * it actually gets marked dirty 1639 * it actually gets marked dirty
1527 */ 1640 */
1528 clear_buffer_journal_new (cn->bh); 1641 clear_buffer_journal_new(cn->bh);
1529 if (buffer_journal_prepared (cn->bh)) { 1642 if (buffer_journal_prepared(cn->bh)) {
1530 set_buffer_journal_restore_dirty (cn->bh); 1643 set_buffer_journal_restore_dirty(cn->bh);
1531 } else { 1644 } else {
1532 set_buffer_journal_test (cn->bh); 1645 set_buffer_journal_test(cn->bh);
1533 mark_buffer_dirty(cn->bh); 1646 mark_buffer_dirty(cn->bh);
1534 } 1647 }
1535 } 1648 }
1536 cn = cn->next ; 1649 cn = cn->next;
1537 } 1650 }
1538 return ret ; 1651 return ret;
1539} 1652}
1540 1653
1541static int kupdate_transactions(struct super_block *s, 1654static int kupdate_transactions(struct super_block *s,
1542 struct reiserfs_journal_list *jl, 1655 struct reiserfs_journal_list *jl,
1543 struct reiserfs_journal_list **next_jl, 1656 struct reiserfs_journal_list **next_jl,
1544 unsigned long *next_trans_id, 1657 unsigned long *next_trans_id,
1545 int num_blocks, 1658 int num_blocks, int num_trans)
1546 int num_trans) { 1659{
1547 int ret = 0; 1660 int ret = 0;
1548 int written = 0 ; 1661 int written = 0;
1549 int transactions_flushed = 0; 1662 int transactions_flushed = 0;
1550 unsigned long orig_trans_id = jl->j_trans_id; 1663 unsigned long orig_trans_id = jl->j_trans_id;
1551 struct buffer_chunk chunk; 1664 struct buffer_chunk chunk;
1552 struct list_head *entry; 1665 struct list_head *entry;
1553 struct reiserfs_journal *journal = SB_JOURNAL (s); 1666 struct reiserfs_journal *journal = SB_JOURNAL(s);
1554 chunk.nr = 0; 1667 chunk.nr = 0;
1555 1668
1556 down(&journal->j_flush_sem); 1669 down(&journal->j_flush_sem);
1557 if (!journal_list_still_alive(s, orig_trans_id)) { 1670 if (!journal_list_still_alive(s, orig_trans_id)) {
1558 goto done; 1671 goto done;
1559 } 1672 }
1560 1673
1561 /* we've got j_flush_sem held, nobody is going to delete any 1674 /* we've got j_flush_sem held, nobody is going to delete any
1562 * of these lists out from underneath us 1675 * of these lists out from underneath us
1563 */ 1676 */
1564 while((num_trans && transactions_flushed < num_trans) || 1677 while ((num_trans && transactions_flushed < num_trans) ||
1565 (!num_trans && written < num_blocks)) { 1678 (!num_trans && written < num_blocks)) {
1566 1679
1567 if (jl->j_len == 0 || (jl->j_state & LIST_TOUCHED) || 1680 if (jl->j_len == 0 || (jl->j_state & LIST_TOUCHED) ||
1568 atomic_read(&jl->j_commit_left) || !(jl->j_state & LIST_DIRTY)) 1681 atomic_read(&jl->j_commit_left)
1569 { 1682 || !(jl->j_state & LIST_DIRTY)) {
1570 del_from_work_list(s, jl); 1683 del_from_work_list(s, jl);
1571 break; 1684 break;
1572 } 1685 }
1573 ret = write_one_transaction(s, jl, &chunk); 1686 ret = write_one_transaction(s, jl, &chunk);
1574 1687
1575 if (ret < 0) 1688 if (ret < 0)
1576 goto done; 1689 goto done;
1577 transactions_flushed++; 1690 transactions_flushed++;
1578 written += ret; 1691 written += ret;
1579 entry = jl->j_list.next; 1692 entry = jl->j_list.next;
1580 1693
1581 /* did we wrap? */ 1694 /* did we wrap? */
1582 if (entry == &journal->j_journal_list) { 1695 if (entry == &journal->j_journal_list) {
1583 break; 1696 break;
1584 } 1697 }
1585 jl = JOURNAL_LIST_ENTRY(entry); 1698 jl = JOURNAL_LIST_ENTRY(entry);
1586 1699
1587 /* don't bother with older transactions */ 1700 /* don't bother with older transactions */
1588 if (jl->j_trans_id <= orig_trans_id) 1701 if (jl->j_trans_id <= orig_trans_id)
1589 break; 1702 break;
1590 } 1703 }
1591 if (chunk.nr) { 1704 if (chunk.nr) {
1592 write_chunk(&chunk); 1705 write_chunk(&chunk);
1593 } 1706 }
1594 1707
1595done: 1708 done:
1596 up(&journal->j_flush_sem); 1709 up(&journal->j_flush_sem);
1597 return ret; 1710 return ret;
1598} 1711}
1599 1712
1600/* for o_sync and fsync heavy applications, they tend to use 1713/* for o_sync and fsync heavy applications, they tend to use
@@ -1607,47 +1720,48 @@ done:
1607** list updates the header block 1720** list updates the header block
1608*/ 1721*/
1609static int flush_used_journal_lists(struct super_block *s, 1722static int flush_used_journal_lists(struct super_block *s,
1610 struct reiserfs_journal_list *jl) { 1723 struct reiserfs_journal_list *jl)
1611 unsigned long len = 0; 1724{
1612 unsigned long cur_len; 1725 unsigned long len = 0;
1613 int ret; 1726 unsigned long cur_len;
1614 int i; 1727 int ret;
1615 int limit = 256; 1728 int i;
1616 struct reiserfs_journal_list *tjl; 1729 int limit = 256;
1617 struct reiserfs_journal_list *flush_jl; 1730 struct reiserfs_journal_list *tjl;
1618 unsigned long trans_id; 1731 struct reiserfs_journal_list *flush_jl;
1619 struct reiserfs_journal *journal = SB_JOURNAL (s); 1732 unsigned long trans_id;
1620 1733 struct reiserfs_journal *journal = SB_JOURNAL(s);
1621 flush_jl = tjl = jl; 1734
1622 1735 flush_jl = tjl = jl;
1623 /* in data logging mode, try harder to flush a lot of blocks */ 1736
1624 if (reiserfs_data_log(s)) 1737 /* in data logging mode, try harder to flush a lot of blocks */
1625 limit = 1024; 1738 if (reiserfs_data_log(s))
1626 /* flush for 256 transactions or limit blocks, whichever comes first */ 1739 limit = 1024;
1627 for(i = 0 ; i < 256 && len < limit ; i++) { 1740 /* flush for 256 transactions or limit blocks, whichever comes first */
1628 if (atomic_read(&tjl->j_commit_left) || 1741 for (i = 0; i < 256 && len < limit; i++) {
1629 tjl->j_trans_id < jl->j_trans_id) { 1742 if (atomic_read(&tjl->j_commit_left) ||
1630 break; 1743 tjl->j_trans_id < jl->j_trans_id) {
1631 } 1744 break;
1632 cur_len = atomic_read(&tjl->j_nonzerolen); 1745 }
1633 if (cur_len > 0) { 1746 cur_len = atomic_read(&tjl->j_nonzerolen);
1634 tjl->j_state &= ~LIST_TOUCHED; 1747 if (cur_len > 0) {
1635 } 1748 tjl->j_state &= ~LIST_TOUCHED;
1636 len += cur_len; 1749 }
1637 flush_jl = tjl; 1750 len += cur_len;
1638 if (tjl->j_list.next == &journal->j_journal_list) 1751 flush_jl = tjl;
1639 break; 1752 if (tjl->j_list.next == &journal->j_journal_list)
1640 tjl = JOURNAL_LIST_ENTRY(tjl->j_list.next); 1753 break;
1641 } 1754 tjl = JOURNAL_LIST_ENTRY(tjl->j_list.next);
1642 /* try to find a group of blocks we can flush across all the 1755 }
1643 ** transactions, but only bother if we've actually spanned 1756 /* try to find a group of blocks we can flush across all the
1644 ** across multiple lists 1757 ** transactions, but only bother if we've actually spanned
1645 */ 1758 ** across multiple lists
1646 if (flush_jl != jl) { 1759 */
1647 ret = kupdate_transactions(s, jl, &tjl, &trans_id, len, i); 1760 if (flush_jl != jl) {
1648 } 1761 ret = kupdate_transactions(s, jl, &tjl, &trans_id, len, i);
1649 flush_journal_list(s, flush_jl, 1); 1762 }
1650 return 0; 1763 flush_journal_list(s, flush_jl, 1);
1764 return 0;
1651} 1765}
1652 1766
1653/* 1767/*
@@ -1655,207 +1769,248 @@ static int flush_used_journal_lists(struct super_block *s,
1655** only touchs the hnext and hprev pointers. 1769** only touchs the hnext and hprev pointers.
1656*/ 1770*/
1657void remove_journal_hash(struct super_block *sb, 1771void remove_journal_hash(struct super_block *sb,
1658 struct reiserfs_journal_cnode **table, 1772 struct reiserfs_journal_cnode **table,
1659 struct reiserfs_journal_list *jl, 1773 struct reiserfs_journal_list *jl,
1660 unsigned long block, int remove_freed) 1774 unsigned long block, int remove_freed)
1661{ 1775{
1662 struct reiserfs_journal_cnode *cur ; 1776 struct reiserfs_journal_cnode *cur;
1663 struct reiserfs_journal_cnode **head ; 1777 struct reiserfs_journal_cnode **head;
1664 1778
1665 head= &(journal_hash(table, sb, block)) ; 1779 head = &(journal_hash(table, sb, block));
1666 if (!head) { 1780 if (!head) {
1667 return ; 1781 return;
1668 } 1782 }
1669 cur = *head ; 1783 cur = *head;
1670 while(cur) { 1784 while (cur) {
1671 if (cur->blocknr == block && cur->sb == sb && (jl == NULL || jl == cur->jlist) && 1785 if (cur->blocknr == block && cur->sb == sb
1672 (!test_bit(BLOCK_FREED, &cur->state) || remove_freed)) { 1786 && (jl == NULL || jl == cur->jlist)
1673 if (cur->hnext) { 1787 && (!test_bit(BLOCK_FREED, &cur->state) || remove_freed)) {
1674 cur->hnext->hprev = cur->hprev ; 1788 if (cur->hnext) {
1675 } 1789 cur->hnext->hprev = cur->hprev;
1676 if (cur->hprev) { 1790 }
1677 cur->hprev->hnext = cur->hnext ; 1791 if (cur->hprev) {
1678 } else { 1792 cur->hprev->hnext = cur->hnext;
1679 *head = cur->hnext ; 1793 } else {
1680 } 1794 *head = cur->hnext;
1681 cur->blocknr = 0 ; 1795 }
1682 cur->sb = NULL ; 1796 cur->blocknr = 0;
1683 cur->state = 0 ; 1797 cur->sb = NULL;
1684 if (cur->bh && cur->jlist) /* anybody who clears the cur->bh will also dec the nonzerolen */ 1798 cur->state = 0;
1685 atomic_dec(&(cur->jlist->j_nonzerolen)) ; 1799 if (cur->bh && cur->jlist) /* anybody who clears the cur->bh will also dec the nonzerolen */
1686 cur->bh = NULL ; 1800 atomic_dec(&(cur->jlist->j_nonzerolen));
1687 cur->jlist = NULL ; 1801 cur->bh = NULL;
1688 } 1802 cur->jlist = NULL;
1689 cur = cur->hnext ; 1803 }
1690 } 1804 cur = cur->hnext;
1691} 1805 }
1692 1806}
1693static void free_journal_ram(struct super_block *p_s_sb) { 1807
1694 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 1808static void free_journal_ram(struct super_block *p_s_sb)
1695 reiserfs_kfree(journal->j_current_jl, 1809{
1696 sizeof(struct reiserfs_journal_list), p_s_sb); 1810 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
1697 journal->j_num_lists--; 1811 reiserfs_kfree(journal->j_current_jl,
1698 1812 sizeof(struct reiserfs_journal_list), p_s_sb);
1699 vfree(journal->j_cnode_free_orig) ; 1813 journal->j_num_lists--;
1700 free_list_bitmaps(p_s_sb, journal->j_list_bitmap) ; 1814
1701 free_bitmap_nodes(p_s_sb) ; /* must be after free_list_bitmaps */ 1815 vfree(journal->j_cnode_free_orig);
1702 if (journal->j_header_bh) { 1816 free_list_bitmaps(p_s_sb, journal->j_list_bitmap);
1703 brelse(journal->j_header_bh) ; 1817 free_bitmap_nodes(p_s_sb); /* must be after free_list_bitmaps */
1704 } 1818 if (journal->j_header_bh) {
1705 /* j_header_bh is on the journal dev, make sure not to release the journal 1819 brelse(journal->j_header_bh);
1706 * dev until we brelse j_header_bh 1820 }
1707 */ 1821 /* j_header_bh is on the journal dev, make sure not to release the journal
1708 release_journal_dev(p_s_sb, journal); 1822 * dev until we brelse j_header_bh
1709 vfree(journal) ; 1823 */
1824 release_journal_dev(p_s_sb, journal);
1825 vfree(journal);
1710} 1826}
1711 1827
1712/* 1828/*
1713** call on unmount. Only set error to 1 if you haven't made your way out 1829** call on unmount. Only set error to 1 if you haven't made your way out
1714** of read_super() yet. Any other caller must keep error at 0. 1830** of read_super() yet. Any other caller must keep error at 0.
1715*/ 1831*/
1716static int do_journal_release(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, int error) { 1832static int do_journal_release(struct reiserfs_transaction_handle *th,
1717 struct reiserfs_transaction_handle myth ; 1833 struct super_block *p_s_sb, int error)
1718 int flushed = 0; 1834{
1719 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 1835 struct reiserfs_transaction_handle myth;
1720 1836 int flushed = 0;
1721 /* we only want to flush out transactions if we were called with error == 0 1837 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
1722 */ 1838
1723 if (!error && !(p_s_sb->s_flags & MS_RDONLY)) { 1839 /* we only want to flush out transactions if we were called with error == 0
1724 /* end the current trans */ 1840 */
1725 BUG_ON (!th->t_trans_id); 1841 if (!error && !(p_s_sb->s_flags & MS_RDONLY)) {
1726 do_journal_end(th, p_s_sb,10, FLUSH_ALL) ; 1842 /* end the current trans */
1727 1843 BUG_ON(!th->t_trans_id);
1728 /* make sure something gets logged to force our way into the flush code */ 1844 do_journal_end(th, p_s_sb, 10, FLUSH_ALL);
1729 if (!journal_join(&myth, p_s_sb, 1)) { 1845
1730 reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 1) ; 1846 /* make sure something gets logged to force our way into the flush code */
1731 journal_mark_dirty(&myth, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)) ; 1847 if (!journal_join(&myth, p_s_sb, 1)) {
1732 do_journal_end(&myth, p_s_sb,1, FLUSH_ALL) ; 1848 reiserfs_prepare_for_journal(p_s_sb,
1733 flushed = 1; 1849 SB_BUFFER_WITH_SB(p_s_sb),
1734 } 1850 1);
1735 } 1851 journal_mark_dirty(&myth, p_s_sb,
1736 1852 SB_BUFFER_WITH_SB(p_s_sb));
1737 /* this also catches errors during the do_journal_end above */ 1853 do_journal_end(&myth, p_s_sb, 1, FLUSH_ALL);
1738 if (!error && reiserfs_is_journal_aborted(journal)) { 1854 flushed = 1;
1739 memset(&myth, 0, sizeof(myth)); 1855 }
1740 if (!journal_join_abort(&myth, p_s_sb, 1)) { 1856 }
1741 reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 1) ; 1857
1742 journal_mark_dirty(&myth, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)) ; 1858 /* this also catches errors during the do_journal_end above */
1743 do_journal_end(&myth, p_s_sb, 1, FLUSH_ALL) ; 1859 if (!error && reiserfs_is_journal_aborted(journal)) {
1744 } 1860 memset(&myth, 0, sizeof(myth));
1745 } 1861 if (!journal_join_abort(&myth, p_s_sb, 1)) {
1746 1862 reiserfs_prepare_for_journal(p_s_sb,
1747 reiserfs_mounted_fs_count-- ; 1863 SB_BUFFER_WITH_SB(p_s_sb),
1748 /* wait for all commits to finish */ 1864 1);
1749 cancel_delayed_work(&SB_JOURNAL(p_s_sb)->j_work); 1865 journal_mark_dirty(&myth, p_s_sb,
1750 flush_workqueue(commit_wq); 1866 SB_BUFFER_WITH_SB(p_s_sb));
1751 if (!reiserfs_mounted_fs_count) { 1867 do_journal_end(&myth, p_s_sb, 1, FLUSH_ALL);
1752 destroy_workqueue(commit_wq); 1868 }
1753 commit_wq = NULL; 1869 }
1754 } 1870
1755 1871 reiserfs_mounted_fs_count--;
1756 free_journal_ram(p_s_sb) ; 1872 /* wait for all commits to finish */
1757 1873 cancel_delayed_work(&SB_JOURNAL(p_s_sb)->j_work);
1758 return 0 ; 1874 flush_workqueue(commit_wq);
1875 if (!reiserfs_mounted_fs_count) {
1876 destroy_workqueue(commit_wq);
1877 commit_wq = NULL;
1878 }
1879
1880 free_journal_ram(p_s_sb);
1881
1882 return 0;
1759} 1883}
1760 1884
1761/* 1885/*
1762** call on unmount. flush all journal trans, release all alloc'd ram 1886** call on unmount. flush all journal trans, release all alloc'd ram
1763*/ 1887*/
1764int journal_release(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb) { 1888int journal_release(struct reiserfs_transaction_handle *th,
1765 return do_journal_release(th, p_s_sb, 0) ; 1889 struct super_block *p_s_sb)
1890{
1891 return do_journal_release(th, p_s_sb, 0);
1766} 1892}
1893
1767/* 1894/*
1768** only call from an error condition inside reiserfs_read_super! 1895** only call from an error condition inside reiserfs_read_super!
1769*/ 1896*/
1770int journal_release_error(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb) { 1897int journal_release_error(struct reiserfs_transaction_handle *th,
1771 return do_journal_release(th, p_s_sb, 1) ; 1898 struct super_block *p_s_sb)
1899{
1900 return do_journal_release(th, p_s_sb, 1);
1772} 1901}
1773 1902
1774/* compares description block with commit block. returns 1 if they differ, 0 if they are the same */ 1903/* compares description block with commit block. returns 1 if they differ, 0 if they are the same */
1775static int journal_compare_desc_commit(struct super_block *p_s_sb, struct reiserfs_journal_desc *desc, 1904static int journal_compare_desc_commit(struct super_block *p_s_sb,
1776 struct reiserfs_journal_commit *commit) { 1905 struct reiserfs_journal_desc *desc,
1777 if (get_commit_trans_id (commit) != get_desc_trans_id (desc) || 1906 struct reiserfs_journal_commit *commit)
1778 get_commit_trans_len (commit) != get_desc_trans_len (desc) || 1907{
1779 get_commit_trans_len (commit) > SB_JOURNAL(p_s_sb)->j_trans_max || 1908 if (get_commit_trans_id(commit) != get_desc_trans_id(desc) ||
1780 get_commit_trans_len (commit) <= 0 1909 get_commit_trans_len(commit) != get_desc_trans_len(desc) ||
1781 ) { 1910 get_commit_trans_len(commit) > SB_JOURNAL(p_s_sb)->j_trans_max ||
1782 return 1 ; 1911 get_commit_trans_len(commit) <= 0) {
1783 } 1912 return 1;
1784 return 0 ; 1913 }
1914 return 0;
1785} 1915}
1916
1786/* returns 0 if it did not find a description block 1917/* returns 0 if it did not find a description block
1787** returns -1 if it found a corrupt commit block 1918** returns -1 if it found a corrupt commit block
1788** returns 1 if both desc and commit were valid 1919** returns 1 if both desc and commit were valid
1789*/ 1920*/
1790static int journal_transaction_is_valid(struct super_block *p_s_sb, struct buffer_head *d_bh, unsigned long *oldest_invalid_trans_id, unsigned long *newest_mount_id) { 1921static int journal_transaction_is_valid(struct super_block *p_s_sb,
1791 struct reiserfs_journal_desc *desc ; 1922 struct buffer_head *d_bh,
1792 struct reiserfs_journal_commit *commit ; 1923 unsigned long *oldest_invalid_trans_id,
1793 struct buffer_head *c_bh ; 1924 unsigned long *newest_mount_id)
1794 unsigned long offset ; 1925{
1795 1926 struct reiserfs_journal_desc *desc;
1796 if (!d_bh) 1927 struct reiserfs_journal_commit *commit;
1797 return 0 ; 1928 struct buffer_head *c_bh;
1798 1929 unsigned long offset;
1799 desc = (struct reiserfs_journal_desc *)d_bh->b_data ; 1930
1800 if (get_desc_trans_len(desc) > 0 && !memcmp(get_journal_desc_magic (d_bh), JOURNAL_DESC_MAGIC, 8)) { 1931 if (!d_bh)
1801 if (oldest_invalid_trans_id && *oldest_invalid_trans_id && get_desc_trans_id(desc) > *oldest_invalid_trans_id) { 1932 return 0;
1802 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-986: transaction " 1933
1803 "is valid returning because trans_id %d is greater than " 1934 desc = (struct reiserfs_journal_desc *)d_bh->b_data;
1804 "oldest_invalid %lu", get_desc_trans_id(desc), 1935 if (get_desc_trans_len(desc) > 0
1805 *oldest_invalid_trans_id); 1936 && !memcmp(get_journal_desc_magic(d_bh), JOURNAL_DESC_MAGIC, 8)) {
1806 return 0 ; 1937 if (oldest_invalid_trans_id && *oldest_invalid_trans_id
1807 } 1938 && get_desc_trans_id(desc) > *oldest_invalid_trans_id) {
1808 if (newest_mount_id && *newest_mount_id > get_desc_mount_id (desc)) { 1939 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
1809 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1087: transaction " 1940 "journal-986: transaction "
1810 "is valid returning because mount_id %d is less than " 1941 "is valid returning because trans_id %d is greater than "
1811 "newest_mount_id %lu", get_desc_mount_id (desc), 1942 "oldest_invalid %lu",
1812 *newest_mount_id) ; 1943 get_desc_trans_id(desc),
1813 return -1 ; 1944 *oldest_invalid_trans_id);
1814 } 1945 return 0;
1815 if ( get_desc_trans_len(desc) > SB_JOURNAL(p_s_sb)->j_trans_max ) { 1946 }
1816 reiserfs_warning(p_s_sb, "journal-2018: Bad transaction length %d encountered, ignoring transaction", get_desc_trans_len(desc)); 1947 if (newest_mount_id
1817 return -1 ; 1948 && *newest_mount_id > get_desc_mount_id(desc)) {
1818 } 1949 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
1819 offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) ; 1950 "journal-1087: transaction "
1820 1951 "is valid returning because mount_id %d is less than "
1821 /* ok, we have a journal description block, lets see if the transaction was valid */ 1952 "newest_mount_id %lu",
1822 c_bh = journal_bread(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + 1953 get_desc_mount_id(desc),
1823 ((offset + get_desc_trans_len(desc) + 1) % SB_ONDISK_JOURNAL_SIZE(p_s_sb))) ; 1954 *newest_mount_id);
1824 if (!c_bh) 1955 return -1;
1825 return 0 ; 1956 }
1826 commit = (struct reiserfs_journal_commit *)c_bh->b_data ; 1957 if (get_desc_trans_len(desc) > SB_JOURNAL(p_s_sb)->j_trans_max) {
1827 if (journal_compare_desc_commit(p_s_sb, desc, commit)) { 1958 reiserfs_warning(p_s_sb,
1828 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, 1959 "journal-2018: Bad transaction length %d encountered, ignoring transaction",
1829 "journal_transaction_is_valid, commit offset %ld had bad " 1960 get_desc_trans_len(desc));
1830 "time %d or length %d", 1961 return -1;
1831 c_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), 1962 }
1832 get_commit_trans_id (commit), 1963 offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb);
1833 get_commit_trans_len(commit)); 1964
1834 brelse(c_bh) ; 1965 /* ok, we have a journal description block, lets see if the transaction was valid */
1835 if (oldest_invalid_trans_id) { 1966 c_bh =
1836 *oldest_invalid_trans_id = get_desc_trans_id(desc) ; 1967 journal_bread(p_s_sb,
1837 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1004: " 1968 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
1838 "transaction_is_valid setting oldest invalid trans_id " 1969 ((offset + get_desc_trans_len(desc) +
1839 "to %d", get_desc_trans_id(desc)) ; 1970 1) % SB_ONDISK_JOURNAL_SIZE(p_s_sb)));
1840 } 1971 if (!c_bh)
1841 return -1; 1972 return 0;
1842 } 1973 commit = (struct reiserfs_journal_commit *)c_bh->b_data;
1843 brelse(c_bh) ; 1974 if (journal_compare_desc_commit(p_s_sb, desc, commit)) {
1844 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1006: found valid " 1975 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
1845 "transaction start offset %llu, len %d id %d", 1976 "journal_transaction_is_valid, commit offset %ld had bad "
1846 d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), 1977 "time %d or length %d",
1847 get_desc_trans_len(desc), get_desc_trans_id(desc)) ; 1978 c_bh->b_blocknr -
1848 return 1 ; 1979 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb),
1849 } else { 1980 get_commit_trans_id(commit),
1850 return 0 ; 1981 get_commit_trans_len(commit));
1851 } 1982 brelse(c_bh);
1852} 1983 if (oldest_invalid_trans_id) {
1853 1984 *oldest_invalid_trans_id =
1854static void brelse_array(struct buffer_head **heads, int num) { 1985 get_desc_trans_id(desc);
1855 int i ; 1986 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
1856 for (i = 0 ; i < num ; i++) { 1987 "journal-1004: "
1857 brelse(heads[i]) ; 1988 "transaction_is_valid setting oldest invalid trans_id "
1858 } 1989 "to %d",
1990 get_desc_trans_id(desc));
1991 }
1992 return -1;
1993 }
1994 brelse(c_bh);
1995 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
1996 "journal-1006: found valid "
1997 "transaction start offset %llu, len %d id %d",
1998 d_bh->b_blocknr -
1999 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb),
2000 get_desc_trans_len(desc),
2001 get_desc_trans_id(desc));
2002 return 1;
2003 } else {
2004 return 0;
2005 }
2006}
2007
2008static void brelse_array(struct buffer_head **heads, int num)
2009{
2010 int i;
2011 for (i = 0; i < num; i++) {
2012 brelse(heads[i]);
2013 }
1859} 2014}
1860 2015
1861/* 2016/*
@@ -1863,149 +2018,202 @@ static void brelse_array(struct buffer_head **heads, int num) {
1863** this either reads in a replays a transaction, or returns because the transaction 2018** this either reads in a replays a transaction, or returns because the transaction
1864** is invalid, or too old. 2019** is invalid, or too old.
1865*/ 2020*/
1866static int journal_read_transaction(struct super_block *p_s_sb, unsigned long cur_dblock, unsigned long oldest_start, 2021static int journal_read_transaction(struct super_block *p_s_sb,
1867 unsigned long oldest_trans_id, unsigned long newest_mount_id) { 2022 unsigned long cur_dblock,
1868 struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); 2023 unsigned long oldest_start,
1869 struct reiserfs_journal_desc *desc ; 2024 unsigned long oldest_trans_id,
1870 struct reiserfs_journal_commit *commit ; 2025 unsigned long newest_mount_id)
1871 unsigned long trans_id = 0 ; 2026{
1872 struct buffer_head *c_bh ; 2027 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
1873 struct buffer_head *d_bh ; 2028 struct reiserfs_journal_desc *desc;
1874 struct buffer_head **log_blocks = NULL ; 2029 struct reiserfs_journal_commit *commit;
1875 struct buffer_head **real_blocks = NULL ; 2030 unsigned long trans_id = 0;
1876 unsigned long trans_offset ; 2031 struct buffer_head *c_bh;
1877 int i; 2032 struct buffer_head *d_bh;
1878 int trans_half; 2033 struct buffer_head **log_blocks = NULL;
1879 2034 struct buffer_head **real_blocks = NULL;
1880 d_bh = journal_bread(p_s_sb, cur_dblock) ; 2035 unsigned long trans_offset;
1881 if (!d_bh) 2036 int i;
1882 return 1 ; 2037 int trans_half;
1883 desc = (struct reiserfs_journal_desc *)d_bh->b_data ; 2038
1884 trans_offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) ; 2039 d_bh = journal_bread(p_s_sb, cur_dblock);
1885 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1037: " 2040 if (!d_bh)
1886 "journal_read_transaction, offset %llu, len %d mount_id %d", 2041 return 1;
1887 d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), 2042 desc = (struct reiserfs_journal_desc *)d_bh->b_data;
1888 get_desc_trans_len(desc), get_desc_mount_id(desc)) ; 2043 trans_offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb);
1889 if (get_desc_trans_id(desc) < oldest_trans_id) { 2044 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1037: "
1890 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1039: " 2045 "journal_read_transaction, offset %llu, len %d mount_id %d",
1891 "journal_read_trans skipping because %lu is too old", 2046 d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb),
1892 cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb)) ; 2047 get_desc_trans_len(desc), get_desc_mount_id(desc));
1893 brelse(d_bh) ; 2048 if (get_desc_trans_id(desc) < oldest_trans_id) {
1894 return 1 ; 2049 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1039: "
1895 } 2050 "journal_read_trans skipping because %lu is too old",
1896 if (get_desc_mount_id(desc) != newest_mount_id) { 2051 cur_dblock -
1897 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1146: " 2052 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb));
1898 "journal_read_trans skipping because %d is != " 2053 brelse(d_bh);
1899 "newest_mount_id %lu", get_desc_mount_id(desc), 2054 return 1;
1900 newest_mount_id) ; 2055 }
1901 brelse(d_bh) ; 2056 if (get_desc_mount_id(desc) != newest_mount_id) {
1902 return 1 ; 2057 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1146: "
1903 } 2058 "journal_read_trans skipping because %d is != "
1904 c_bh = journal_bread(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + 2059 "newest_mount_id %lu", get_desc_mount_id(desc),
1905 ((trans_offset + get_desc_trans_len(desc) + 1) % 2060 newest_mount_id);
1906 SB_ONDISK_JOURNAL_SIZE(p_s_sb))) ; 2061 brelse(d_bh);
1907 if (!c_bh) { 2062 return 1;
1908 brelse(d_bh) ; 2063 }
1909 return 1 ; 2064 c_bh = journal_bread(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
1910 } 2065 ((trans_offset + get_desc_trans_len(desc) + 1) %
1911 commit = (struct reiserfs_journal_commit *)c_bh->b_data ; 2066 SB_ONDISK_JOURNAL_SIZE(p_s_sb)));
1912 if (journal_compare_desc_commit(p_s_sb, desc, commit)) { 2067 if (!c_bh) {
1913 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal_read_transaction, " 2068 brelse(d_bh);
1914 "commit offset %llu had bad time %d or length %d", 2069 return 1;
1915 c_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), 2070 }
1916 get_commit_trans_id(commit), get_commit_trans_len(commit)); 2071 commit = (struct reiserfs_journal_commit *)c_bh->b_data;
1917 brelse(c_bh) ; 2072 if (journal_compare_desc_commit(p_s_sb, desc, commit)) {
1918 brelse(d_bh) ; 2073 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
1919 return 1; 2074 "journal_read_transaction, "
1920 } 2075 "commit offset %llu had bad time %d or length %d",
1921 trans_id = get_desc_trans_id(desc) ; 2076 c_bh->b_blocknr -
1922 /* now we know we've got a good transaction, and it was inside the valid time ranges */ 2077 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb),
1923 log_blocks = reiserfs_kmalloc(get_desc_trans_len(desc) * sizeof(struct buffer_head *), GFP_NOFS, p_s_sb) ; 2078 get_commit_trans_id(commit),
1924 real_blocks = reiserfs_kmalloc(get_desc_trans_len(desc) * sizeof(struct buffer_head *), GFP_NOFS, p_s_sb) ; 2079 get_commit_trans_len(commit));
1925 if (!log_blocks || !real_blocks) { 2080 brelse(c_bh);
1926 brelse(c_bh) ; 2081 brelse(d_bh);
1927 brelse(d_bh) ; 2082 return 1;
1928 reiserfs_kfree(log_blocks, get_desc_trans_len(desc) * sizeof(struct buffer_head *), p_s_sb) ; 2083 }
1929 reiserfs_kfree(real_blocks, get_desc_trans_len(desc) * sizeof(struct buffer_head *), p_s_sb) ; 2084 trans_id = get_desc_trans_id(desc);
1930 reiserfs_warning(p_s_sb, "journal-1169: kmalloc failed, unable to mount FS") ; 2085 /* now we know we've got a good transaction, and it was inside the valid time ranges */
1931 return -1 ; 2086 log_blocks =
1932 } 2087 reiserfs_kmalloc(get_desc_trans_len(desc) *
1933 /* get all the buffer heads */ 2088 sizeof(struct buffer_head *), GFP_NOFS, p_s_sb);
1934 trans_half = journal_trans_half (p_s_sb->s_blocksize) ; 2089 real_blocks =
1935 for(i = 0 ; i < get_desc_trans_len(desc) ; i++) { 2090 reiserfs_kmalloc(get_desc_trans_len(desc) *
1936 log_blocks[i] = journal_getblk(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + (trans_offset + 1 + i) % SB_ONDISK_JOURNAL_SIZE(p_s_sb)); 2091 sizeof(struct buffer_head *), GFP_NOFS, p_s_sb);
1937 if (i < trans_half) { 2092 if (!log_blocks || !real_blocks) {
1938 real_blocks[i] = sb_getblk(p_s_sb, le32_to_cpu(desc->j_realblock[i])) ; 2093 brelse(c_bh);
1939 } else { 2094 brelse(d_bh);
1940 real_blocks[i] = sb_getblk(p_s_sb, le32_to_cpu(commit->j_realblock[i - trans_half])) ; 2095 reiserfs_kfree(log_blocks,
1941 } 2096 get_desc_trans_len(desc) *
1942 if ( real_blocks[i]->b_blocknr > SB_BLOCK_COUNT(p_s_sb) ) { 2097 sizeof(struct buffer_head *), p_s_sb);
1943 reiserfs_warning(p_s_sb, "journal-1207: REPLAY FAILURE fsck required! Block to replay is outside of filesystem"); 2098 reiserfs_kfree(real_blocks,
1944 goto abort_replay; 2099 get_desc_trans_len(desc) *
1945 } 2100 sizeof(struct buffer_head *), p_s_sb);
1946 /* make sure we don't try to replay onto log or reserved area */ 2101 reiserfs_warning(p_s_sb,
1947 if (is_block_in_log_or_reserved_area(p_s_sb, real_blocks[i]->b_blocknr)) { 2102 "journal-1169: kmalloc failed, unable to mount FS");
1948 reiserfs_warning(p_s_sb, "journal-1204: REPLAY FAILURE fsck required! Trying to replay onto a log block") ; 2103 return -1;
1949abort_replay: 2104 }
1950 brelse_array(log_blocks, i) ; 2105 /* get all the buffer heads */
1951 brelse_array(real_blocks, i) ; 2106 trans_half = journal_trans_half(p_s_sb->s_blocksize);
1952 brelse(c_bh) ; 2107 for (i = 0; i < get_desc_trans_len(desc); i++) {
1953 brelse(d_bh) ; 2108 log_blocks[i] =
1954 reiserfs_kfree(log_blocks, get_desc_trans_len(desc) * sizeof(struct buffer_head *), p_s_sb) ; 2109 journal_getblk(p_s_sb,
1955 reiserfs_kfree(real_blocks, get_desc_trans_len(desc) * sizeof(struct buffer_head *), p_s_sb) ; 2110 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
1956 return -1 ; 2111 (trans_offset + 1 +
1957 } 2112 i) % SB_ONDISK_JOURNAL_SIZE(p_s_sb));
1958 } 2113 if (i < trans_half) {
1959 /* read in the log blocks, memcpy to the corresponding real block */ 2114 real_blocks[i] =
1960 ll_rw_block(READ, get_desc_trans_len(desc), log_blocks) ; 2115 sb_getblk(p_s_sb,
1961 for (i = 0 ; i < get_desc_trans_len(desc) ; i++) { 2116 le32_to_cpu(desc->j_realblock[i]));
1962 wait_on_buffer(log_blocks[i]) ; 2117 } else {
1963 if (!buffer_uptodate(log_blocks[i])) { 2118 real_blocks[i] =
1964 reiserfs_warning(p_s_sb, "journal-1212: REPLAY FAILURE fsck required! buffer write failed") ; 2119 sb_getblk(p_s_sb,
1965 brelse_array(log_blocks + i, get_desc_trans_len(desc) - i) ; 2120 le32_to_cpu(commit->
1966 brelse_array(real_blocks, get_desc_trans_len(desc)) ; 2121 j_realblock[i - trans_half]));
1967 brelse(c_bh) ; 2122 }
1968 brelse(d_bh) ; 2123 if (real_blocks[i]->b_blocknr > SB_BLOCK_COUNT(p_s_sb)) {
1969 reiserfs_kfree(log_blocks, get_desc_trans_len(desc) * sizeof(struct buffer_head *), p_s_sb) ; 2124 reiserfs_warning(p_s_sb,
1970 reiserfs_kfree(real_blocks, get_desc_trans_len(desc) * sizeof(struct buffer_head *), p_s_sb) ; 2125 "journal-1207: REPLAY FAILURE fsck required! Block to replay is outside of filesystem");
1971 return -1 ; 2126 goto abort_replay;
1972 } 2127 }
1973 memcpy(real_blocks[i]->b_data, log_blocks[i]->b_data, real_blocks[i]->b_size) ; 2128 /* make sure we don't try to replay onto log or reserved area */
1974 set_buffer_uptodate(real_blocks[i]) ; 2129 if (is_block_in_log_or_reserved_area
1975 brelse(log_blocks[i]) ; 2130 (p_s_sb, real_blocks[i]->b_blocknr)) {
1976 } 2131 reiserfs_warning(p_s_sb,
1977 /* flush out the real blocks */ 2132 "journal-1204: REPLAY FAILURE fsck required! Trying to replay onto a log block");
1978 for (i = 0 ; i < get_desc_trans_len(desc) ; i++) { 2133 abort_replay:
1979 set_buffer_dirty(real_blocks[i]) ; 2134 brelse_array(log_blocks, i);
1980 ll_rw_block(WRITE, 1, real_blocks + i) ; 2135 brelse_array(real_blocks, i);
1981 } 2136 brelse(c_bh);
1982 for (i = 0 ; i < get_desc_trans_len(desc) ; i++) { 2137 brelse(d_bh);
1983 wait_on_buffer(real_blocks[i]) ; 2138 reiserfs_kfree(log_blocks,
1984 if (!buffer_uptodate(real_blocks[i])) { 2139 get_desc_trans_len(desc) *
1985 reiserfs_warning(p_s_sb, "journal-1226: REPLAY FAILURE, fsck required! buffer write failed") ; 2140 sizeof(struct buffer_head *), p_s_sb);
1986 brelse_array(real_blocks + i, get_desc_trans_len(desc) - i) ; 2141 reiserfs_kfree(real_blocks,
1987 brelse(c_bh) ; 2142 get_desc_trans_len(desc) *
1988 brelse(d_bh) ; 2143 sizeof(struct buffer_head *), p_s_sb);
1989 reiserfs_kfree(log_blocks, get_desc_trans_len(desc) * sizeof(struct buffer_head *), p_s_sb) ; 2144 return -1;
1990 reiserfs_kfree(real_blocks, get_desc_trans_len(desc) * sizeof(struct buffer_head *), p_s_sb) ; 2145 }
1991 return -1 ; 2146 }
1992 } 2147 /* read in the log blocks, memcpy to the corresponding real block */
1993 brelse(real_blocks[i]) ; 2148 ll_rw_block(READ, get_desc_trans_len(desc), log_blocks);
1994 } 2149 for (i = 0; i < get_desc_trans_len(desc); i++) {
1995 cur_dblock = SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + ((trans_offset + get_desc_trans_len(desc) + 2) % SB_ONDISK_JOURNAL_SIZE(p_s_sb)) ; 2150 wait_on_buffer(log_blocks[i]);
1996 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1095: setting journal " 2151 if (!buffer_uptodate(log_blocks[i])) {
1997 "start to offset %ld", 2152 reiserfs_warning(p_s_sb,
1998 cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb)) ; 2153 "journal-1212: REPLAY FAILURE fsck required! buffer write failed");
1999 2154 brelse_array(log_blocks + i,
2000 /* init starting values for the first transaction, in case this is the last transaction to be replayed. */ 2155 get_desc_trans_len(desc) - i);
2001 journal->j_start = cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) ; 2156 brelse_array(real_blocks, get_desc_trans_len(desc));
2002 journal->j_last_flush_trans_id = trans_id ; 2157 brelse(c_bh);
2003 journal->j_trans_id = trans_id + 1; 2158 brelse(d_bh);
2004 brelse(c_bh) ; 2159 reiserfs_kfree(log_blocks,
2005 brelse(d_bh) ; 2160 get_desc_trans_len(desc) *
2006 reiserfs_kfree(log_blocks, le32_to_cpu(desc->j_len) * sizeof(struct buffer_head *), p_s_sb) ; 2161 sizeof(struct buffer_head *), p_s_sb);
2007 reiserfs_kfree(real_blocks, le32_to_cpu(desc->j_len) * sizeof(struct buffer_head *), p_s_sb) ; 2162 reiserfs_kfree(real_blocks,
2008 return 0 ; 2163 get_desc_trans_len(desc) *
2164 sizeof(struct buffer_head *), p_s_sb);
2165 return -1;
2166 }
2167 memcpy(real_blocks[i]->b_data, log_blocks[i]->b_data,
2168 real_blocks[i]->b_size);
2169 set_buffer_uptodate(real_blocks[i]);
2170 brelse(log_blocks[i]);
2171 }
2172 /* flush out the real blocks */
2173 for (i = 0; i < get_desc_trans_len(desc); i++) {
2174 set_buffer_dirty(real_blocks[i]);
2175 ll_rw_block(WRITE, 1, real_blocks + i);
2176 }
2177 for (i = 0; i < get_desc_trans_len(desc); i++) {
2178 wait_on_buffer(real_blocks[i]);
2179 if (!buffer_uptodate(real_blocks[i])) {
2180 reiserfs_warning(p_s_sb,
2181 "journal-1226: REPLAY FAILURE, fsck required! buffer write failed");
2182 brelse_array(real_blocks + i,
2183 get_desc_trans_len(desc) - i);
2184 brelse(c_bh);
2185 brelse(d_bh);
2186 reiserfs_kfree(log_blocks,
2187 get_desc_trans_len(desc) *
2188 sizeof(struct buffer_head *), p_s_sb);
2189 reiserfs_kfree(real_blocks,
2190 get_desc_trans_len(desc) *
2191 sizeof(struct buffer_head *), p_s_sb);
2192 return -1;
2193 }
2194 brelse(real_blocks[i]);
2195 }
2196 cur_dblock =
2197 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
2198 ((trans_offset + get_desc_trans_len(desc) +
2199 2) % SB_ONDISK_JOURNAL_SIZE(p_s_sb));
2200 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
2201 "journal-1095: setting journal " "start to offset %ld",
2202 cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb));
2203
2204 /* init starting values for the first transaction, in case this is the last transaction to be replayed. */
2205 journal->j_start = cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb);
2206 journal->j_last_flush_trans_id = trans_id;
2207 journal->j_trans_id = trans_id + 1;
2208 brelse(c_bh);
2209 brelse(d_bh);
2210 reiserfs_kfree(log_blocks,
2211 le32_to_cpu(desc->j_len) * sizeof(struct buffer_head *),
2212 p_s_sb);
2213 reiserfs_kfree(real_blocks,
2214 le32_to_cpu(desc->j_len) * sizeof(struct buffer_head *),
2215 p_s_sb);
2216 return 0;
2009} 2217}
2010 2218
2011/* This function reads blocks starting from block and to max_block of bufsize 2219/* This function reads blocks starting from block and to max_block of bufsize
@@ -2014,39 +2222,39 @@ abort_replay:
2014 Right now it is only used from journal code. But later we might use it 2222 Right now it is only used from journal code. But later we might use it
2015 from other places. 2223 from other places.
2016 Note: Do not use journal_getblk/sb_getblk functions here! */ 2224 Note: Do not use journal_getblk/sb_getblk functions here! */
2017static struct buffer_head * reiserfs_breada (struct block_device *dev, int block, int bufsize, 2225static struct buffer_head *reiserfs_breada(struct block_device *dev, int block,
2018 unsigned int max_block) 2226 int bufsize, unsigned int max_block)
2019{ 2227{
2020 struct buffer_head * bhlist[BUFNR]; 2228 struct buffer_head *bhlist[BUFNR];
2021 unsigned int blocks = BUFNR; 2229 unsigned int blocks = BUFNR;
2022 struct buffer_head * bh; 2230 struct buffer_head *bh;
2023 int i, j; 2231 int i, j;
2024 2232
2025 bh = __getblk (dev, block, bufsize ); 2233 bh = __getblk(dev, block, bufsize);
2026 if (buffer_uptodate (bh)) 2234 if (buffer_uptodate(bh))
2027 return (bh); 2235 return (bh);
2028 2236
2029 if (block + BUFNR > max_block) { 2237 if (block + BUFNR > max_block) {
2030 blocks = max_block - block; 2238 blocks = max_block - block;
2031 } 2239 }
2032 bhlist[0] = bh; 2240 bhlist[0] = bh;
2033 j = 1; 2241 j = 1;
2034 for (i = 1; i < blocks; i++) { 2242 for (i = 1; i < blocks; i++) {
2035 bh = __getblk (dev, block + i, bufsize); 2243 bh = __getblk(dev, block + i, bufsize);
2036 if (buffer_uptodate (bh)) { 2244 if (buffer_uptodate(bh)) {
2037 brelse (bh); 2245 brelse(bh);
2038 break; 2246 break;
2039 } 2247 } else
2040 else bhlist[j++] = bh; 2248 bhlist[j++] = bh;
2041 } 2249 }
2042 ll_rw_block (READ, j, bhlist); 2250 ll_rw_block(READ, j, bhlist);
2043 for(i = 1; i < j; i++) 2251 for (i = 1; i < j; i++)
2044 brelse (bhlist[i]); 2252 brelse(bhlist[i]);
2045 bh = bhlist[0]; 2253 bh = bhlist[0];
2046 wait_on_buffer (bh); 2254 wait_on_buffer(bh);
2047 if (buffer_uptodate (bh)) 2255 if (buffer_uptodate(bh))
2048 return bh; 2256 return bh;
2049 brelse (bh); 2257 brelse(bh);
2050 return NULL; 2258 return NULL;
2051} 2259}
2052 2260
@@ -2059,218 +2267,250 @@ static struct buffer_head * reiserfs_breada (struct block_device *dev, int block
2059** 2267**
2060** On exit, it sets things up so the first transaction will work correctly. 2268** On exit, it sets things up so the first transaction will work correctly.
2061*/ 2269*/
2062static int journal_read(struct super_block *p_s_sb) { 2270static int journal_read(struct super_block *p_s_sb)
2063 struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); 2271{
2064 struct reiserfs_journal_desc *desc ; 2272 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
2065 unsigned long oldest_trans_id = 0; 2273 struct reiserfs_journal_desc *desc;
2066 unsigned long oldest_invalid_trans_id = 0 ; 2274 unsigned long oldest_trans_id = 0;
2067 time_t start ; 2275 unsigned long oldest_invalid_trans_id = 0;
2068 unsigned long oldest_start = 0; 2276 time_t start;
2069 unsigned long cur_dblock = 0 ; 2277 unsigned long oldest_start = 0;
2070 unsigned long newest_mount_id = 9 ; 2278 unsigned long cur_dblock = 0;
2071 struct buffer_head *d_bh ; 2279 unsigned long newest_mount_id = 9;
2072 struct reiserfs_journal_header *jh ; 2280 struct buffer_head *d_bh;
2073 int valid_journal_header = 0 ; 2281 struct reiserfs_journal_header *jh;
2074 int replay_count = 0 ; 2282 int valid_journal_header = 0;
2075 int continue_replay = 1 ; 2283 int replay_count = 0;
2076 int ret ; 2284 int continue_replay = 1;
2077 char b[BDEVNAME_SIZE]; 2285 int ret;
2078 2286 char b[BDEVNAME_SIZE];
2079 cur_dblock = SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) ; 2287
2080 reiserfs_info (p_s_sb, "checking transaction log (%s)\n", 2288 cur_dblock = SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb);
2081 bdevname(journal->j_dev_bd, b)); 2289 reiserfs_info(p_s_sb, "checking transaction log (%s)\n",
2082 start = get_seconds(); 2290 bdevname(journal->j_dev_bd, b));
2083 2291 start = get_seconds();
2084 /* step 1, read in the journal header block. Check the transaction it says 2292
2085 ** is the first unflushed, and if that transaction is not valid, 2293 /* step 1, read in the journal header block. Check the transaction it says
2086 ** replay is done 2294 ** is the first unflushed, and if that transaction is not valid,
2087 */ 2295 ** replay is done
2088 journal->j_header_bh = journal_bread(p_s_sb, 2296 */
2089 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + 2297 journal->j_header_bh = journal_bread(p_s_sb,
2090 SB_ONDISK_JOURNAL_SIZE(p_s_sb)); 2298 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb)
2091 if (!journal->j_header_bh) { 2299 + SB_ONDISK_JOURNAL_SIZE(p_s_sb));
2092 return 1 ; 2300 if (!journal->j_header_bh) {
2093 } 2301 return 1;
2094 jh = (struct reiserfs_journal_header *)(journal->j_header_bh->b_data) ; 2302 }
2095 if (le32_to_cpu(jh->j_first_unflushed_offset) >= 0 && 2303 jh = (struct reiserfs_journal_header *)(journal->j_header_bh->b_data);
2096 le32_to_cpu(jh->j_first_unflushed_offset) < SB_ONDISK_JOURNAL_SIZE(p_s_sb) && 2304 if (le32_to_cpu(jh->j_first_unflushed_offset) >= 0 &&
2097 le32_to_cpu(jh->j_last_flush_trans_id) > 0) { 2305 le32_to_cpu(jh->j_first_unflushed_offset) <
2098 oldest_start = SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + 2306 SB_ONDISK_JOURNAL_SIZE(p_s_sb)
2099 le32_to_cpu(jh->j_first_unflushed_offset) ; 2307 && le32_to_cpu(jh->j_last_flush_trans_id) > 0) {
2100 oldest_trans_id = le32_to_cpu(jh->j_last_flush_trans_id) + 1; 2308 oldest_start =
2101 newest_mount_id = le32_to_cpu(jh->j_mount_id); 2309 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
2102 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1153: found in " 2310 le32_to_cpu(jh->j_first_unflushed_offset);
2103 "header: first_unflushed_offset %d, last_flushed_trans_id " 2311 oldest_trans_id = le32_to_cpu(jh->j_last_flush_trans_id) + 1;
2104 "%lu", le32_to_cpu(jh->j_first_unflushed_offset), 2312 newest_mount_id = le32_to_cpu(jh->j_mount_id);
2105 le32_to_cpu(jh->j_last_flush_trans_id)) ; 2313 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
2106 valid_journal_header = 1 ; 2314 "journal-1153: found in "
2107 2315 "header: first_unflushed_offset %d, last_flushed_trans_id "
2108 /* now, we try to read the first unflushed offset. If it is not valid, 2316 "%lu", le32_to_cpu(jh->j_first_unflushed_offset),
2109 ** there is nothing more we can do, and it makes no sense to read 2317 le32_to_cpu(jh->j_last_flush_trans_id));
2110 ** through the whole log. 2318 valid_journal_header = 1;
2111 */ 2319
2112 d_bh = journal_bread(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + le32_to_cpu(jh->j_first_unflushed_offset)) ; 2320 /* now, we try to read the first unflushed offset. If it is not valid,
2113 ret = journal_transaction_is_valid(p_s_sb, d_bh, NULL, NULL) ; 2321 ** there is nothing more we can do, and it makes no sense to read
2114 if (!ret) { 2322 ** through the whole log.
2115 continue_replay = 0 ; 2323 */
2116 } 2324 d_bh =
2117 brelse(d_bh) ; 2325 journal_bread(p_s_sb,
2118 goto start_log_replay; 2326 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
2119 } 2327 le32_to_cpu(jh->j_first_unflushed_offset));
2120 2328 ret = journal_transaction_is_valid(p_s_sb, d_bh, NULL, NULL);
2121 if (continue_replay && bdev_read_only(p_s_sb->s_bdev)) { 2329 if (!ret) {
2122 reiserfs_warning (p_s_sb, 2330 continue_replay = 0;
2123 "clm-2076: device is readonly, unable to replay log") ; 2331 }
2124 return -1 ; 2332 brelse(d_bh);
2125 } 2333 goto start_log_replay;
2126 2334 }
2127 /* ok, there are transactions that need to be replayed. start with the first log block, find 2335
2128 ** all the valid transactions, and pick out the oldest. 2336 if (continue_replay && bdev_read_only(p_s_sb->s_bdev)) {
2129 */ 2337 reiserfs_warning(p_s_sb,
2130 while(continue_replay && cur_dblock < (SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + SB_ONDISK_JOURNAL_SIZE(p_s_sb))) { 2338 "clm-2076: device is readonly, unable to replay log");
2131 /* Note that it is required for blocksize of primary fs device and journal 2339 return -1;
2132 device to be the same */ 2340 }
2133 d_bh = reiserfs_breada(journal->j_dev_bd, cur_dblock, p_s_sb->s_blocksize, 2341
2134 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + SB_ONDISK_JOURNAL_SIZE(p_s_sb)) ; 2342 /* ok, there are transactions that need to be replayed. start with the first log block, find
2135 ret = journal_transaction_is_valid(p_s_sb, d_bh, &oldest_invalid_trans_id, &newest_mount_id) ; 2343 ** all the valid transactions, and pick out the oldest.
2136 if (ret == 1) { 2344 */
2137 desc = (struct reiserfs_journal_desc *)d_bh->b_data ; 2345 while (continue_replay
2138 if (oldest_start == 0) { /* init all oldest_ values */ 2346 && cur_dblock <
2139 oldest_trans_id = get_desc_trans_id(desc) ; 2347 (SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
2140 oldest_start = d_bh->b_blocknr ; 2348 SB_ONDISK_JOURNAL_SIZE(p_s_sb))) {
2141 newest_mount_id = get_desc_mount_id(desc) ; 2349 /* Note that it is required for blocksize of primary fs device and journal
2142 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1179: Setting " 2350 device to be the same */
2143 "oldest_start to offset %llu, trans_id %lu", 2351 d_bh =
2144 oldest_start - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), 2352 reiserfs_breada(journal->j_dev_bd, cur_dblock,
2145 oldest_trans_id) ; 2353 p_s_sb->s_blocksize,
2146 } else if (oldest_trans_id > get_desc_trans_id(desc)) { 2354 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
2147 /* one we just read was older */ 2355 SB_ONDISK_JOURNAL_SIZE(p_s_sb));
2148 oldest_trans_id = get_desc_trans_id(desc) ; 2356 ret =
2149 oldest_start = d_bh->b_blocknr ; 2357 journal_transaction_is_valid(p_s_sb, d_bh,
2150 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1180: Resetting " 2358 &oldest_invalid_trans_id,
2151 "oldest_start to offset %lu, trans_id %lu", 2359 &newest_mount_id);
2152 oldest_start - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), 2360 if (ret == 1) {
2153 oldest_trans_id) ; 2361 desc = (struct reiserfs_journal_desc *)d_bh->b_data;
2154 } 2362 if (oldest_start == 0) { /* init all oldest_ values */
2155 if (newest_mount_id < get_desc_mount_id(desc)) { 2363 oldest_trans_id = get_desc_trans_id(desc);
2156 newest_mount_id = get_desc_mount_id(desc) ; 2364 oldest_start = d_bh->b_blocknr;
2365 newest_mount_id = get_desc_mount_id(desc);
2366 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
2367 "journal-1179: Setting "
2368 "oldest_start to offset %llu, trans_id %lu",
2369 oldest_start -
2370 SB_ONDISK_JOURNAL_1st_BLOCK
2371 (p_s_sb), oldest_trans_id);
2372 } else if (oldest_trans_id > get_desc_trans_id(desc)) {
2373 /* one we just read was older */
2374 oldest_trans_id = get_desc_trans_id(desc);
2375 oldest_start = d_bh->b_blocknr;
2376 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
2377 "journal-1180: Resetting "
2378 "oldest_start to offset %lu, trans_id %lu",
2379 oldest_start -
2380 SB_ONDISK_JOURNAL_1st_BLOCK
2381 (p_s_sb), oldest_trans_id);
2382 }
2383 if (newest_mount_id < get_desc_mount_id(desc)) {
2384 newest_mount_id = get_desc_mount_id(desc);
2385 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
2386 "journal-1299: Setting "
2387 "newest_mount_id to %d",
2388 get_desc_mount_id(desc));
2389 }
2390 cur_dblock += get_desc_trans_len(desc) + 2;
2391 } else {
2392 cur_dblock++;
2393 }
2394 brelse(d_bh);
2395 }
2396
2397 start_log_replay:
2398 cur_dblock = oldest_start;
2399 if (oldest_trans_id) {
2400 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
2401 "journal-1206: Starting replay "
2402 "from offset %llu, trans_id %lu",
2403 cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb),
2404 oldest_trans_id);
2405
2406 }
2407 replay_count = 0;
2408 while (continue_replay && oldest_trans_id > 0) {
2409 ret =
2410 journal_read_transaction(p_s_sb, cur_dblock, oldest_start,
2411 oldest_trans_id, newest_mount_id);
2412 if (ret < 0) {
2413 return ret;
2414 } else if (ret != 0) {
2415 break;
2416 }
2417 cur_dblock =
2418 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + journal->j_start;
2419 replay_count++;
2420 if (cur_dblock == oldest_start)
2421 break;
2422 }
2423
2424 if (oldest_trans_id == 0) {
2425 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
2426 "journal-1225: No valid " "transactions found");
2427 }
2428 /* j_start does not get set correctly if we don't replay any transactions.
2429 ** if we had a valid journal_header, set j_start to the first unflushed transaction value,
2430 ** copy the trans_id from the header
2431 */
2432 if (valid_journal_header && replay_count == 0) {
2433 journal->j_start = le32_to_cpu(jh->j_first_unflushed_offset);
2434 journal->j_trans_id =
2435 le32_to_cpu(jh->j_last_flush_trans_id) + 1;
2436 journal->j_last_flush_trans_id =
2437 le32_to_cpu(jh->j_last_flush_trans_id);
2438 journal->j_mount_id = le32_to_cpu(jh->j_mount_id) + 1;
2439 } else {
2440 journal->j_mount_id = newest_mount_id + 1;
2441 }
2157 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1299: Setting " 2442 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1299: Setting "
2158 "newest_mount_id to %d", get_desc_mount_id(desc)); 2443 "newest_mount_id to %lu", journal->j_mount_id);
2159 } 2444 journal->j_first_unflushed_offset = journal->j_start;
2160 cur_dblock += get_desc_trans_len(desc) + 2 ; 2445 if (replay_count > 0) {
2161 } else { 2446 reiserfs_info(p_s_sb,
2162 cur_dblock++ ; 2447 "replayed %d transactions in %lu seconds\n",
2163 } 2448 replay_count, get_seconds() - start);
2164 brelse(d_bh) ; 2449 }
2165 } 2450 if (!bdev_read_only(p_s_sb->s_bdev) &&
2166 2451 _update_journal_header_block(p_s_sb, journal->j_start,
2167start_log_replay: 2452 journal->j_last_flush_trans_id)) {
2168 cur_dblock = oldest_start ; 2453 /* replay failed, caller must call free_journal_ram and abort
2169 if (oldest_trans_id) { 2454 ** the mount
2170 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1206: Starting replay " 2455 */
2171 "from offset %llu, trans_id %lu", 2456 return -1;
2172 cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), 2457 }
2173 oldest_trans_id) ; 2458 return 0;
2174
2175 }
2176 replay_count = 0 ;
2177 while(continue_replay && oldest_trans_id > 0) {
2178 ret = journal_read_transaction(p_s_sb, cur_dblock, oldest_start, oldest_trans_id, newest_mount_id) ;
2179 if (ret < 0) {
2180 return ret ;
2181 } else if (ret != 0) {
2182 break ;
2183 }
2184 cur_dblock = SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + journal->j_start ;
2185 replay_count++ ;
2186 if (cur_dblock == oldest_start)
2187 break;
2188 }
2189
2190 if (oldest_trans_id == 0) {
2191 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1225: No valid "
2192 "transactions found") ;
2193 }
2194 /* j_start does not get set correctly if we don't replay any transactions.
2195 ** if we had a valid journal_header, set j_start to the first unflushed transaction value,
2196 ** copy the trans_id from the header
2197 */
2198 if (valid_journal_header && replay_count == 0) {
2199 journal->j_start = le32_to_cpu(jh->j_first_unflushed_offset) ;
2200 journal->j_trans_id = le32_to_cpu(jh->j_last_flush_trans_id) + 1;
2201 journal->j_last_flush_trans_id = le32_to_cpu(jh->j_last_flush_trans_id) ;
2202 journal->j_mount_id = le32_to_cpu(jh->j_mount_id) + 1;
2203 } else {
2204 journal->j_mount_id = newest_mount_id + 1 ;
2205 }
2206 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1299: Setting "
2207 "newest_mount_id to %lu", journal->j_mount_id) ;
2208 journal->j_first_unflushed_offset = journal->j_start ;
2209 if (replay_count > 0) {
2210 reiserfs_info (p_s_sb, "replayed %d transactions in %lu seconds\n",
2211 replay_count, get_seconds() - start) ;
2212 }
2213 if (!bdev_read_only(p_s_sb->s_bdev) &&
2214 _update_journal_header_block(p_s_sb, journal->j_start,
2215 journal->j_last_flush_trans_id))
2216 {
2217 /* replay failed, caller must call free_journal_ram and abort
2218 ** the mount
2219 */
2220 return -1 ;
2221 }
2222 return 0 ;
2223} 2459}
2224 2460
2225static struct reiserfs_journal_list *alloc_journal_list(struct super_block *s) 2461static struct reiserfs_journal_list *alloc_journal_list(struct super_block *s)
2226{ 2462{
2227 struct reiserfs_journal_list *jl; 2463 struct reiserfs_journal_list *jl;
2228retry: 2464 retry:
2229 jl = reiserfs_kmalloc(sizeof(struct reiserfs_journal_list), GFP_NOFS, s); 2465 jl = reiserfs_kmalloc(sizeof(struct reiserfs_journal_list), GFP_NOFS,
2230 if (!jl) { 2466 s);
2231 yield(); 2467 if (!jl) {
2232 goto retry; 2468 yield();
2233 } 2469 goto retry;
2234 memset(jl, 0, sizeof(*jl)); 2470 }
2235 INIT_LIST_HEAD(&jl->j_list); 2471 memset(jl, 0, sizeof(*jl));
2236 INIT_LIST_HEAD(&jl->j_working_list); 2472 INIT_LIST_HEAD(&jl->j_list);
2237 INIT_LIST_HEAD(&jl->j_tail_bh_list); 2473 INIT_LIST_HEAD(&jl->j_working_list);
2238 INIT_LIST_HEAD(&jl->j_bh_list); 2474 INIT_LIST_HEAD(&jl->j_tail_bh_list);
2239 sema_init(&jl->j_commit_lock, 1); 2475 INIT_LIST_HEAD(&jl->j_bh_list);
2240 SB_JOURNAL(s)->j_num_lists++; 2476 sema_init(&jl->j_commit_lock, 1);
2241 get_journal_list(jl); 2477 SB_JOURNAL(s)->j_num_lists++;
2242 return jl; 2478 get_journal_list(jl);
2243} 2479 return jl;
2244 2480}
2245static void journal_list_init(struct super_block *p_s_sb) { 2481
2246 SB_JOURNAL(p_s_sb)->j_current_jl = alloc_journal_list(p_s_sb); 2482static void journal_list_init(struct super_block *p_s_sb)
2247} 2483{
2248 2484 SB_JOURNAL(p_s_sb)->j_current_jl = alloc_journal_list(p_s_sb);
2249static int release_journal_dev( struct super_block *super, 2485}
2250 struct reiserfs_journal *journal ) 2486
2251{ 2487static int release_journal_dev(struct super_block *super,
2252 int result; 2488 struct reiserfs_journal *journal)
2253 2489{
2254 result = 0; 2490 int result;
2255 2491
2256 if( journal -> j_dev_file != NULL ) { 2492 result = 0;
2257 result = filp_close( journal -> j_dev_file, NULL ); 2493
2258 journal -> j_dev_file = NULL; 2494 if (journal->j_dev_file != NULL) {
2259 journal -> j_dev_bd = NULL; 2495 result = filp_close(journal->j_dev_file, NULL);
2260 } else if( journal -> j_dev_bd != NULL ) { 2496 journal->j_dev_file = NULL;
2261 result = blkdev_put( journal -> j_dev_bd ); 2497 journal->j_dev_bd = NULL;
2262 journal -> j_dev_bd = NULL; 2498 } else if (journal->j_dev_bd != NULL) {
2263 } 2499 result = blkdev_put(journal->j_dev_bd);
2264 2500 journal->j_dev_bd = NULL;
2265 if( result != 0 ) { 2501 }
2266 reiserfs_warning(super, "sh-457: release_journal_dev: Cannot release journal device: %i", result ); 2502
2267 } 2503 if (result != 0) {
2268 return result; 2504 reiserfs_warning(super,
2269} 2505 "sh-457: release_journal_dev: Cannot release journal device: %i",
2270 2506 result);
2271static int journal_init_dev( struct super_block *super, 2507 }
2272 struct reiserfs_journal *journal, 2508 return result;
2273 const char *jdev_name ) 2509}
2510
2511static int journal_init_dev(struct super_block *super,
2512 struct reiserfs_journal *journal,
2513 const char *jdev_name)
2274{ 2514{
2275 int result; 2515 int result;
2276 dev_t jdev; 2516 dev_t jdev;
@@ -2279,50 +2519,51 @@ static int journal_init_dev( struct super_block *super,
2279 2519
2280 result = 0; 2520 result = 0;
2281 2521
2282 journal -> j_dev_bd = NULL; 2522 journal->j_dev_bd = NULL;
2283 journal -> j_dev_file = NULL; 2523 journal->j_dev_file = NULL;
2284 jdev = SB_ONDISK_JOURNAL_DEVICE( super ) ? 2524 jdev = SB_ONDISK_JOURNAL_DEVICE(super) ?
2285 new_decode_dev(SB_ONDISK_JOURNAL_DEVICE(super)) : super->s_dev; 2525 new_decode_dev(SB_ONDISK_JOURNAL_DEVICE(super)) : super->s_dev;
2286 2526
2287 if (bdev_read_only(super->s_bdev)) 2527 if (bdev_read_only(super->s_bdev))
2288 blkdev_mode = FMODE_READ; 2528 blkdev_mode = FMODE_READ;
2289 2529
2290 /* there is no "jdev" option and journal is on separate device */ 2530 /* there is no "jdev" option and journal is on separate device */
2291 if( ( !jdev_name || !jdev_name[ 0 ] ) ) { 2531 if ((!jdev_name || !jdev_name[0])) {
2292 journal->j_dev_bd = open_by_devnum(jdev, blkdev_mode); 2532 journal->j_dev_bd = open_by_devnum(jdev, blkdev_mode);
2293 if (IS_ERR(journal->j_dev_bd)) { 2533 if (IS_ERR(journal->j_dev_bd)) {
2294 result = PTR_ERR(journal->j_dev_bd); 2534 result = PTR_ERR(journal->j_dev_bd);
2295 journal->j_dev_bd = NULL; 2535 journal->j_dev_bd = NULL;
2296 reiserfs_warning (super, "sh-458: journal_init_dev: " 2536 reiserfs_warning(super, "sh-458: journal_init_dev: "
2297 "cannot init journal device '%s': %i", 2537 "cannot init journal device '%s': %i",
2298 __bdevname(jdev, b), result ); 2538 __bdevname(jdev, b), result);
2299 return result; 2539 return result;
2300 } else if (jdev != super->s_dev) 2540 } else if (jdev != super->s_dev)
2301 set_blocksize(journal->j_dev_bd, super->s_blocksize); 2541 set_blocksize(journal->j_dev_bd, super->s_blocksize);
2302 return 0; 2542 return 0;
2303 } 2543 }
2304 2544
2305 journal -> j_dev_file = filp_open( jdev_name, 0, 0 ); 2545 journal->j_dev_file = filp_open(jdev_name, 0, 0);
2306 if( !IS_ERR( journal -> j_dev_file ) ) { 2546 if (!IS_ERR(journal->j_dev_file)) {
2307 struct inode *jdev_inode = journal->j_dev_file->f_mapping->host; 2547 struct inode *jdev_inode = journal->j_dev_file->f_mapping->host;
2308 if( !S_ISBLK( jdev_inode -> i_mode ) ) { 2548 if (!S_ISBLK(jdev_inode->i_mode)) {
2309 reiserfs_warning(super, "journal_init_dev: '%s' is " 2549 reiserfs_warning(super, "journal_init_dev: '%s' is "
2310 "not a block device", jdev_name ); 2550 "not a block device", jdev_name);
2311 result = -ENOTBLK; 2551 result = -ENOTBLK;
2312 release_journal_dev( super, journal ); 2552 release_journal_dev(super, journal);
2313 } else { 2553 } else {
2314 /* ok */ 2554 /* ok */
2315 journal->j_dev_bd = I_BDEV(jdev_inode); 2555 journal->j_dev_bd = I_BDEV(jdev_inode);
2316 set_blocksize(journal->j_dev_bd, super->s_blocksize); 2556 set_blocksize(journal->j_dev_bd, super->s_blocksize);
2317 reiserfs_info(super, "journal_init_dev: journal device: %s\n", 2557 reiserfs_info(super,
2558 "journal_init_dev: journal device: %s\n",
2318 bdevname(journal->j_dev_bd, b)); 2559 bdevname(journal->j_dev_bd, b));
2319 } 2560 }
2320 } else { 2561 } else {
2321 result = PTR_ERR( journal -> j_dev_file ); 2562 result = PTR_ERR(journal->j_dev_file);
2322 journal -> j_dev_file = NULL; 2563 journal->j_dev_file = NULL;
2323 reiserfs_warning (super, 2564 reiserfs_warning(super,
2324 "journal_init_dev: Cannot open '%s': %i", 2565 "journal_init_dev: Cannot open '%s': %i",
2325 jdev_name, result ); 2566 jdev_name, result);
2326 } 2567 }
2327 return result; 2568 return result;
2328} 2569}
@@ -2330,193 +2571,214 @@ static int journal_init_dev( struct super_block *super,
2330/* 2571/*
2331** must be called once on fs mount. calls journal_read for you 2572** must be called once on fs mount. calls journal_read for you
2332*/ 2573*/
2333int journal_init(struct super_block *p_s_sb, const char * j_dev_name, int old_format, unsigned int commit_max_age) { 2574int journal_init(struct super_block *p_s_sb, const char *j_dev_name,
2334 int num_cnodes = SB_ONDISK_JOURNAL_SIZE(p_s_sb) * 2 ; 2575 int old_format, unsigned int commit_max_age)
2335 struct buffer_head *bhjh; 2576{
2336 struct reiserfs_super_block * rs; 2577 int num_cnodes = SB_ONDISK_JOURNAL_SIZE(p_s_sb) * 2;
2337 struct reiserfs_journal_header *jh; 2578 struct buffer_head *bhjh;
2338 struct reiserfs_journal *journal; 2579 struct reiserfs_super_block *rs;
2339 struct reiserfs_journal_list *jl; 2580 struct reiserfs_journal_header *jh;
2340 char b[BDEVNAME_SIZE]; 2581 struct reiserfs_journal *journal;
2341 2582 struct reiserfs_journal_list *jl;
2342 journal = SB_JOURNAL(p_s_sb) = vmalloc(sizeof (struct reiserfs_journal)) ; 2583 char b[BDEVNAME_SIZE];
2343 if (!journal) { 2584
2344 reiserfs_warning (p_s_sb, "journal-1256: unable to get memory for journal structure") ; 2585 journal = SB_JOURNAL(p_s_sb) = vmalloc(sizeof(struct reiserfs_journal));
2345 return 1 ; 2586 if (!journal) {
2346 } 2587 reiserfs_warning(p_s_sb,
2347 memset(journal, 0, sizeof(struct reiserfs_journal)) ; 2588 "journal-1256: unable to get memory for journal structure");
2348 INIT_LIST_HEAD(&journal->j_bitmap_nodes) ; 2589 return 1;
2349 INIT_LIST_HEAD (&journal->j_prealloc_list); 2590 }
2350 INIT_LIST_HEAD(&journal->j_working_list); 2591 memset(journal, 0, sizeof(struct reiserfs_journal));
2351 INIT_LIST_HEAD(&journal->j_journal_list); 2592 INIT_LIST_HEAD(&journal->j_bitmap_nodes);
2352 journal->j_persistent_trans = 0; 2593 INIT_LIST_HEAD(&journal->j_prealloc_list);
2353 if (reiserfs_allocate_list_bitmaps(p_s_sb, 2594 INIT_LIST_HEAD(&journal->j_working_list);
2354 journal->j_list_bitmap, 2595 INIT_LIST_HEAD(&journal->j_journal_list);
2355 SB_BMAP_NR(p_s_sb))) 2596 journal->j_persistent_trans = 0;
2356 goto free_and_return ; 2597 if (reiserfs_allocate_list_bitmaps(p_s_sb,
2357 allocate_bitmap_nodes(p_s_sb) ; 2598 journal->j_list_bitmap,
2358 2599 SB_BMAP_NR(p_s_sb)))
2359 /* reserved for journal area support */ 2600 goto free_and_return;
2360 SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb) = (old_format ? 2601 allocate_bitmap_nodes(p_s_sb);
2361 REISERFS_OLD_DISK_OFFSET_IN_BYTES / p_s_sb->s_blocksize + 2602
2362 SB_BMAP_NR(p_s_sb) + 1 : 2603 /* reserved for journal area support */
2363 REISERFS_DISK_OFFSET_IN_BYTES / p_s_sb->s_blocksize + 2); 2604 SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb) = (old_format ?
2364 2605 REISERFS_OLD_DISK_OFFSET_IN_BYTES
2365 /* Sanity check to see is the standard journal fitting withing first bitmap 2606 / p_s_sb->s_blocksize +
2366 (actual for small blocksizes) */ 2607 SB_BMAP_NR(p_s_sb) +
2367 if ( !SB_ONDISK_JOURNAL_DEVICE( p_s_sb ) && 2608 1 :
2368 (SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb) + SB_ONDISK_JOURNAL_SIZE(p_s_sb) > p_s_sb->s_blocksize * 8) ) { 2609 REISERFS_DISK_OFFSET_IN_BYTES /
2369 reiserfs_warning (p_s_sb, "journal-1393: journal does not fit for area " 2610 p_s_sb->s_blocksize + 2);
2370 "addressed by first of bitmap blocks. It starts at " 2611
2371 "%u and its size is %u. Block size %ld", 2612 /* Sanity check to see is the standard journal fitting withing first bitmap
2372 SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb), 2613 (actual for small blocksizes) */
2373 SB_ONDISK_JOURNAL_SIZE(p_s_sb), p_s_sb->s_blocksize); 2614 if (!SB_ONDISK_JOURNAL_DEVICE(p_s_sb) &&
2374 goto free_and_return; 2615 (SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb) +
2375 } 2616 SB_ONDISK_JOURNAL_SIZE(p_s_sb) > p_s_sb->s_blocksize * 8)) {
2376 2617 reiserfs_warning(p_s_sb,
2377 if( journal_init_dev( p_s_sb, journal, j_dev_name ) != 0 ) { 2618 "journal-1393: journal does not fit for area "
2378 reiserfs_warning (p_s_sb, "sh-462: unable to initialize jornal device"); 2619 "addressed by first of bitmap blocks. It starts at "
2379 goto free_and_return; 2620 "%u and its size is %u. Block size %ld",
2380 } 2621 SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb),
2381 2622 SB_ONDISK_JOURNAL_SIZE(p_s_sb),
2382 rs = SB_DISK_SUPER_BLOCK(p_s_sb); 2623 p_s_sb->s_blocksize);
2383 2624 goto free_and_return;
2384 /* read journal header */ 2625 }
2385 bhjh = journal_bread(p_s_sb, 2626
2386 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + SB_ONDISK_JOURNAL_SIZE(p_s_sb)); 2627 if (journal_init_dev(p_s_sb, journal, j_dev_name) != 0) {
2387 if (!bhjh) { 2628 reiserfs_warning(p_s_sb,
2388 reiserfs_warning (p_s_sb, "sh-459: unable to read journal header"); 2629 "sh-462: unable to initialize jornal device");
2389 goto free_and_return; 2630 goto free_and_return;
2390 } 2631 }
2391 jh = (struct reiserfs_journal_header *)(bhjh->b_data); 2632
2392 2633 rs = SB_DISK_SUPER_BLOCK(p_s_sb);
2393 /* make sure that journal matches to the super block */ 2634
2394 if (is_reiserfs_jr(rs) && (le32_to_cpu(jh->jh_journal.jp_journal_magic) != sb_jp_journal_magic(rs))) { 2635 /* read journal header */
2395 reiserfs_warning (p_s_sb, "sh-460: journal header magic %x " 2636 bhjh = journal_bread(p_s_sb,
2396 "(device %s) does not match to magic found in super " 2637 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
2397 "block %x", 2638 SB_ONDISK_JOURNAL_SIZE(p_s_sb));
2398 jh->jh_journal.jp_journal_magic, 2639 if (!bhjh) {
2399 bdevname( journal->j_dev_bd, b), 2640 reiserfs_warning(p_s_sb,
2400 sb_jp_journal_magic(rs)); 2641 "sh-459: unable to read journal header");
2401 brelse (bhjh); 2642 goto free_and_return;
2402 goto free_and_return; 2643 }
2403 } 2644 jh = (struct reiserfs_journal_header *)(bhjh->b_data);
2404 2645
2405 journal->j_trans_max = le32_to_cpu (jh->jh_journal.jp_journal_trans_max); 2646 /* make sure that journal matches to the super block */
2406 journal->j_max_batch = le32_to_cpu (jh->jh_journal.jp_journal_max_batch); 2647 if (is_reiserfs_jr(rs)
2407 journal->j_max_commit_age = le32_to_cpu (jh->jh_journal.jp_journal_max_commit_age); 2648 && (le32_to_cpu(jh->jh_journal.jp_journal_magic) !=
2408 journal->j_max_trans_age = JOURNAL_MAX_TRANS_AGE; 2649 sb_jp_journal_magic(rs))) {
2409 2650 reiserfs_warning(p_s_sb,
2410 if (journal->j_trans_max) { 2651 "sh-460: journal header magic %x "
2411 /* make sure these parameters are available, assign it if they are not */ 2652 "(device %s) does not match to magic found in super "
2412 __u32 initial = journal->j_trans_max; 2653 "block %x", jh->jh_journal.jp_journal_magic,
2413 __u32 ratio = 1; 2654 bdevname(journal->j_dev_bd, b),
2414 2655 sb_jp_journal_magic(rs));
2415 if (p_s_sb->s_blocksize < 4096) 2656 brelse(bhjh);
2416 ratio = 4096 / p_s_sb->s_blocksize; 2657 goto free_and_return;
2417 2658 }
2418 if (SB_ONDISK_JOURNAL_SIZE(p_s_sb)/journal->j_trans_max < JOURNAL_MIN_RATIO) 2659
2419 journal->j_trans_max = SB_ONDISK_JOURNAL_SIZE(p_s_sb) / JOURNAL_MIN_RATIO; 2660 journal->j_trans_max = le32_to_cpu(jh->jh_journal.jp_journal_trans_max);
2420 if (journal->j_trans_max > JOURNAL_TRANS_MAX_DEFAULT / ratio) 2661 journal->j_max_batch = le32_to_cpu(jh->jh_journal.jp_journal_max_batch);
2421 journal->j_trans_max = JOURNAL_TRANS_MAX_DEFAULT / ratio; 2662 journal->j_max_commit_age =
2422 if (journal->j_trans_max < JOURNAL_TRANS_MIN_DEFAULT / ratio) 2663 le32_to_cpu(jh->jh_journal.jp_journal_max_commit_age);
2423 journal->j_trans_max = JOURNAL_TRANS_MIN_DEFAULT / ratio; 2664 journal->j_max_trans_age = JOURNAL_MAX_TRANS_AGE;
2424 2665
2425 if (journal->j_trans_max != initial) 2666 if (journal->j_trans_max) {
2426 reiserfs_warning (p_s_sb, "sh-461: journal_init: wrong transaction max size (%u). Changed to %u", 2667 /* make sure these parameters are available, assign it if they are not */
2427 initial, journal->j_trans_max); 2668 __u32 initial = journal->j_trans_max;
2428 2669 __u32 ratio = 1;
2429 journal->j_max_batch = journal->j_trans_max* 2670
2430 JOURNAL_MAX_BATCH_DEFAULT/JOURNAL_TRANS_MAX_DEFAULT; 2671 if (p_s_sb->s_blocksize < 4096)
2431 } 2672 ratio = 4096 / p_s_sb->s_blocksize;
2432 2673
2433 if (!journal->j_trans_max) { 2674 if (SB_ONDISK_JOURNAL_SIZE(p_s_sb) / journal->j_trans_max <
2434 /*we have the file system was created by old version of mkreiserfs 2675 JOURNAL_MIN_RATIO)
2435 so this field contains zero value */ 2676 journal->j_trans_max =
2436 journal->j_trans_max = JOURNAL_TRANS_MAX_DEFAULT ; 2677 SB_ONDISK_JOURNAL_SIZE(p_s_sb) / JOURNAL_MIN_RATIO;
2437 journal->j_max_batch = JOURNAL_MAX_BATCH_DEFAULT ; 2678 if (journal->j_trans_max > JOURNAL_TRANS_MAX_DEFAULT / ratio)
2438 journal->j_max_commit_age = JOURNAL_MAX_COMMIT_AGE ; 2679 journal->j_trans_max =
2439 2680 JOURNAL_TRANS_MAX_DEFAULT / ratio;
2440 /* for blocksize >= 4096 - max transaction size is 1024. For block size < 4096 2681 if (journal->j_trans_max < JOURNAL_TRANS_MIN_DEFAULT / ratio)
2441 trans max size is decreased proportionally */ 2682 journal->j_trans_max =
2442 if (p_s_sb->s_blocksize < 4096) { 2683 JOURNAL_TRANS_MIN_DEFAULT / ratio;
2443 journal->j_trans_max /= (4096 / p_s_sb->s_blocksize) ; 2684
2444 journal->j_max_batch = (journal->j_trans_max) * 9 / 10 ; 2685 if (journal->j_trans_max != initial)
2445 } 2686 reiserfs_warning(p_s_sb,
2446 } 2687 "sh-461: journal_init: wrong transaction max size (%u). Changed to %u",
2447 2688 initial, journal->j_trans_max);
2448 journal->j_default_max_commit_age = journal->j_max_commit_age; 2689
2449 2690 journal->j_max_batch = journal->j_trans_max *
2450 if (commit_max_age != 0) { 2691 JOURNAL_MAX_BATCH_DEFAULT / JOURNAL_TRANS_MAX_DEFAULT;
2451 journal->j_max_commit_age = commit_max_age; 2692 }
2452 journal->j_max_trans_age = commit_max_age; 2693
2453 } 2694 if (!journal->j_trans_max) {
2454 2695 /*we have the file system was created by old version of mkreiserfs
2455 reiserfs_info (p_s_sb, "journal params: device %s, size %u, " 2696 so this field contains zero value */
2456 "journal first block %u, max trans len %u, max batch %u, " 2697 journal->j_trans_max = JOURNAL_TRANS_MAX_DEFAULT;
2457 "max commit age %u, max trans age %u\n", 2698 journal->j_max_batch = JOURNAL_MAX_BATCH_DEFAULT;
2458 bdevname( journal->j_dev_bd, b), 2699 journal->j_max_commit_age = JOURNAL_MAX_COMMIT_AGE;
2459 SB_ONDISK_JOURNAL_SIZE(p_s_sb), 2700
2460 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), 2701 /* for blocksize >= 4096 - max transaction size is 1024. For block size < 4096
2461 journal->j_trans_max, 2702 trans max size is decreased proportionally */
2462 journal->j_max_batch, 2703 if (p_s_sb->s_blocksize < 4096) {
2463 journal->j_max_commit_age, 2704 journal->j_trans_max /= (4096 / p_s_sb->s_blocksize);
2464 journal->j_max_trans_age); 2705 journal->j_max_batch = (journal->j_trans_max) * 9 / 10;
2465 2706 }
2466 brelse (bhjh); 2707 }
2467 2708
2468 journal->j_list_bitmap_index = 0 ; 2709 journal->j_default_max_commit_age = journal->j_max_commit_age;
2469 journal_list_init(p_s_sb) ; 2710
2470 2711 if (commit_max_age != 0) {
2471 memset(journal->j_list_hash_table, 0, JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *)) ; 2712 journal->j_max_commit_age = commit_max_age;
2472 2713 journal->j_max_trans_age = commit_max_age;
2473 INIT_LIST_HEAD(&journal->j_dirty_buffers) ; 2714 }
2474 spin_lock_init(&journal->j_dirty_buffers_lock) ; 2715
2475 2716 reiserfs_info(p_s_sb, "journal params: device %s, size %u, "
2476 journal->j_start = 0 ; 2717 "journal first block %u, max trans len %u, max batch %u, "
2477 journal->j_len = 0 ; 2718 "max commit age %u, max trans age %u\n",
2478 journal->j_len_alloc = 0 ; 2719 bdevname(journal->j_dev_bd, b),
2479 atomic_set(&(journal->j_wcount), 0) ; 2720 SB_ONDISK_JOURNAL_SIZE(p_s_sb),
2480 atomic_set(&(journal->j_async_throttle), 0) ; 2721 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb),
2481 journal->j_bcount = 0 ; 2722 journal->j_trans_max,
2482 journal->j_trans_start_time = 0 ; 2723 journal->j_max_batch,
2483 journal->j_last = NULL ; 2724 journal->j_max_commit_age, journal->j_max_trans_age);
2484 journal->j_first = NULL ; 2725
2485 init_waitqueue_head(&(journal->j_join_wait)) ; 2726 brelse(bhjh);
2486 sema_init(&journal->j_lock, 1); 2727
2487 sema_init(&journal->j_flush_sem, 1); 2728 journal->j_list_bitmap_index = 0;
2488 2729 journal_list_init(p_s_sb);
2489 journal->j_trans_id = 10 ; 2730
2490 journal->j_mount_id = 10 ; 2731 memset(journal->j_list_hash_table, 0,
2491 journal->j_state = 0 ; 2732 JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *));
2492 atomic_set(&(journal->j_jlock), 0) ; 2733
2493 journal->j_cnode_free_list = allocate_cnodes(num_cnodes) ; 2734 INIT_LIST_HEAD(&journal->j_dirty_buffers);
2494 journal->j_cnode_free_orig = journal->j_cnode_free_list ; 2735 spin_lock_init(&journal->j_dirty_buffers_lock);
2495 journal->j_cnode_free = journal->j_cnode_free_list ? num_cnodes : 0 ; 2736
2496 journal->j_cnode_used = 0 ; 2737 journal->j_start = 0;
2497 journal->j_must_wait = 0 ; 2738 journal->j_len = 0;
2498 2739 journal->j_len_alloc = 0;
2499 init_journal_hash(p_s_sb) ; 2740 atomic_set(&(journal->j_wcount), 0);
2500 jl = journal->j_current_jl; 2741 atomic_set(&(journal->j_async_throttle), 0);
2501 jl->j_list_bitmap = get_list_bitmap(p_s_sb, jl); 2742 journal->j_bcount = 0;
2502 if (!jl->j_list_bitmap) { 2743 journal->j_trans_start_time = 0;
2503 reiserfs_warning(p_s_sb, "journal-2005, get_list_bitmap failed for journal list 0") ; 2744 journal->j_last = NULL;
2504 goto free_and_return; 2745 journal->j_first = NULL;
2505 } 2746 init_waitqueue_head(&(journal->j_join_wait));
2506 if (journal_read(p_s_sb) < 0) { 2747 sema_init(&journal->j_lock, 1);
2507 reiserfs_warning(p_s_sb, "Replay Failure, unable to mount") ; 2748 sema_init(&journal->j_flush_sem, 1);
2508 goto free_and_return; 2749
2509 } 2750 journal->j_trans_id = 10;
2510 2751 journal->j_mount_id = 10;
2511 reiserfs_mounted_fs_count++ ; 2752 journal->j_state = 0;
2512 if (reiserfs_mounted_fs_count <= 1) 2753 atomic_set(&(journal->j_jlock), 0);
2513 commit_wq = create_workqueue("reiserfs"); 2754 journal->j_cnode_free_list = allocate_cnodes(num_cnodes);
2514 2755 journal->j_cnode_free_orig = journal->j_cnode_free_list;
2515 INIT_WORK(&journal->j_work, flush_async_commits, p_s_sb); 2756 journal->j_cnode_free = journal->j_cnode_free_list ? num_cnodes : 0;
2516 return 0 ; 2757 journal->j_cnode_used = 0;
2517free_and_return: 2758 journal->j_must_wait = 0;
2518 free_journal_ram(p_s_sb); 2759
2519 return 1; 2760 init_journal_hash(p_s_sb);
2761 jl = journal->j_current_jl;
2762 jl->j_list_bitmap = get_list_bitmap(p_s_sb, jl);
2763 if (!jl->j_list_bitmap) {
2764 reiserfs_warning(p_s_sb,
2765 "journal-2005, get_list_bitmap failed for journal list 0");
2766 goto free_and_return;
2767 }
2768 if (journal_read(p_s_sb) < 0) {
2769 reiserfs_warning(p_s_sb, "Replay Failure, unable to mount");
2770 goto free_and_return;
2771 }
2772
2773 reiserfs_mounted_fs_count++;
2774 if (reiserfs_mounted_fs_count <= 1)
2775 commit_wq = create_workqueue("reiserfs");
2776
2777 INIT_WORK(&journal->j_work, flush_async_commits, p_s_sb);
2778 return 0;
2779 free_and_return:
2780 free_journal_ram(p_s_sb);
2781 return 1;
2520} 2782}
2521 2783
2522/* 2784/*
@@ -2524,96 +2786,102 @@ free_and_return:
2524** be used by delete to make sure they don't write more than can fit inside a single 2786** be used by delete to make sure they don't write more than can fit inside a single
2525** transaction 2787** transaction
2526*/ 2788*/
2527int journal_transaction_should_end(struct reiserfs_transaction_handle *th, int new_alloc) { 2789int journal_transaction_should_end(struct reiserfs_transaction_handle *th,
2528 struct reiserfs_journal *journal = SB_JOURNAL (th->t_super); 2790 int new_alloc)
2529 time_t now = get_seconds() ; 2791{
2530 /* cannot restart while nested */ 2792 struct reiserfs_journal *journal = SB_JOURNAL(th->t_super);
2531 BUG_ON (!th->t_trans_id); 2793 time_t now = get_seconds();
2532 if (th->t_refcount > 1) 2794 /* cannot restart while nested */
2533 return 0 ; 2795 BUG_ON(!th->t_trans_id);
2534 if ( journal->j_must_wait > 0 || 2796 if (th->t_refcount > 1)
2535 (journal->j_len_alloc + new_alloc) >= journal->j_max_batch || 2797 return 0;
2536 atomic_read(&(journal->j_jlock)) || 2798 if (journal->j_must_wait > 0 ||
2537 (now - journal->j_trans_start_time) > journal->j_max_trans_age || 2799 (journal->j_len_alloc + new_alloc) >= journal->j_max_batch ||
2538 journal->j_cnode_free < (journal->j_trans_max * 3)) { 2800 atomic_read(&(journal->j_jlock)) ||
2539 return 1 ; 2801 (now - journal->j_trans_start_time) > journal->j_max_trans_age ||
2540 } 2802 journal->j_cnode_free < (journal->j_trans_max * 3)) {
2541 return 0 ; 2803 return 1;
2804 }
2805 return 0;
2542} 2806}
2543 2807
2544/* this must be called inside a transaction, and requires the 2808/* this must be called inside a transaction, and requires the
2545** kernel_lock to be held 2809** kernel_lock to be held
2546*/ 2810*/
2547void reiserfs_block_writes(struct reiserfs_transaction_handle *th) { 2811void reiserfs_block_writes(struct reiserfs_transaction_handle *th)
2548 struct reiserfs_journal *journal = SB_JOURNAL (th->t_super); 2812{
2549 BUG_ON (!th->t_trans_id); 2813 struct reiserfs_journal *journal = SB_JOURNAL(th->t_super);
2550 journal->j_must_wait = 1 ; 2814 BUG_ON(!th->t_trans_id);
2551 set_bit(J_WRITERS_BLOCKED, &journal->j_state) ; 2815 journal->j_must_wait = 1;
2552 return ; 2816 set_bit(J_WRITERS_BLOCKED, &journal->j_state);
2817 return;
2553} 2818}
2554 2819
2555/* this must be called without a transaction started, and does not 2820/* this must be called without a transaction started, and does not
2556** require BKL 2821** require BKL
2557*/ 2822*/
2558void reiserfs_allow_writes(struct super_block *s) { 2823void reiserfs_allow_writes(struct super_block *s)
2559 struct reiserfs_journal *journal = SB_JOURNAL (s); 2824{
2560 clear_bit(J_WRITERS_BLOCKED, &journal->j_state) ; 2825 struct reiserfs_journal *journal = SB_JOURNAL(s);
2561 wake_up(&journal->j_join_wait) ; 2826 clear_bit(J_WRITERS_BLOCKED, &journal->j_state);
2827 wake_up(&journal->j_join_wait);
2562} 2828}
2563 2829
2564/* this must be called without a transaction started, and does not 2830/* this must be called without a transaction started, and does not
2565** require BKL 2831** require BKL
2566*/ 2832*/
2567void reiserfs_wait_on_write_block(struct super_block *s) { 2833void reiserfs_wait_on_write_block(struct super_block *s)
2568 struct reiserfs_journal *journal = SB_JOURNAL (s); 2834{
2569 wait_event(journal->j_join_wait, 2835 struct reiserfs_journal *journal = SB_JOURNAL(s);
2570 !test_bit(J_WRITERS_BLOCKED, &journal->j_state)) ; 2836 wait_event(journal->j_join_wait,
2571} 2837 !test_bit(J_WRITERS_BLOCKED, &journal->j_state));
2572 2838}
2573static void queue_log_writer(struct super_block *s) { 2839
2574 wait_queue_t wait; 2840static void queue_log_writer(struct super_block *s)
2575 struct reiserfs_journal *journal = SB_JOURNAL (s); 2841{
2576 set_bit(J_WRITERS_QUEUED, &journal->j_state); 2842 wait_queue_t wait;
2577 2843 struct reiserfs_journal *journal = SB_JOURNAL(s);
2578 /* 2844 set_bit(J_WRITERS_QUEUED, &journal->j_state);
2579 * we don't want to use wait_event here because 2845
2580 * we only want to wait once. 2846 /*
2581 */ 2847 * we don't want to use wait_event here because
2582 init_waitqueue_entry(&wait, current); 2848 * we only want to wait once.
2583 add_wait_queue(&journal->j_join_wait, &wait); 2849 */
2584 set_current_state(TASK_UNINTERRUPTIBLE); 2850 init_waitqueue_entry(&wait, current);
2585 if (test_bit(J_WRITERS_QUEUED, &journal->j_state)) 2851 add_wait_queue(&journal->j_join_wait, &wait);
2586 schedule();
2587 current->state = TASK_RUNNING;
2588 remove_wait_queue(&journal->j_join_wait, &wait);
2589}
2590
2591static void wake_queued_writers(struct super_block *s) {
2592 struct reiserfs_journal *journal = SB_JOURNAL (s);
2593 if (test_and_clear_bit(J_WRITERS_QUEUED, &journal->j_state))
2594 wake_up(&journal->j_join_wait);
2595}
2596
2597static void let_transaction_grow(struct super_block *sb,
2598 unsigned long trans_id)
2599{
2600 struct reiserfs_journal *journal = SB_JOURNAL (sb);
2601 unsigned long bcount = journal->j_bcount;
2602 while(1) {
2603 set_current_state(TASK_UNINTERRUPTIBLE); 2852 set_current_state(TASK_UNINTERRUPTIBLE);
2604 schedule_timeout(1); 2853 if (test_bit(J_WRITERS_QUEUED, &journal->j_state))
2605 journal->j_current_jl->j_state |= LIST_COMMIT_PENDING; 2854 schedule();
2606 while ((atomic_read(&journal->j_wcount) > 0 || 2855 current->state = TASK_RUNNING;
2607 atomic_read(&journal->j_jlock)) && 2856 remove_wait_queue(&journal->j_join_wait, &wait);
2608 journal->j_trans_id == trans_id) { 2857}
2609 queue_log_writer(sb); 2858
2859static void wake_queued_writers(struct super_block *s)
2860{
2861 struct reiserfs_journal *journal = SB_JOURNAL(s);
2862 if (test_and_clear_bit(J_WRITERS_QUEUED, &journal->j_state))
2863 wake_up(&journal->j_join_wait);
2864}
2865
2866static void let_transaction_grow(struct super_block *sb, unsigned long trans_id)
2867{
2868 struct reiserfs_journal *journal = SB_JOURNAL(sb);
2869 unsigned long bcount = journal->j_bcount;
2870 while (1) {
2871 set_current_state(TASK_UNINTERRUPTIBLE);
2872 schedule_timeout(1);
2873 journal->j_current_jl->j_state |= LIST_COMMIT_PENDING;
2874 while ((atomic_read(&journal->j_wcount) > 0 ||
2875 atomic_read(&journal->j_jlock)) &&
2876 journal->j_trans_id == trans_id) {
2877 queue_log_writer(sb);
2878 }
2879 if (journal->j_trans_id != trans_id)
2880 break;
2881 if (bcount == journal->j_bcount)
2882 break;
2883 bcount = journal->j_bcount;
2610 } 2884 }
2611 if (journal->j_trans_id != trans_id)
2612 break;
2613 if (bcount == journal->j_bcount)
2614 break;
2615 bcount = journal->j_bcount;
2616 }
2617} 2885}
2618 2886
2619/* join == true if you must join an existing transaction. 2887/* join == true if you must join an existing transaction.
@@ -2622,221 +2890,244 @@ static void let_transaction_grow(struct super_block *sb,
2622** this will block until the transaction is joinable. send the number of blocks you 2890** this will block until the transaction is joinable. send the number of blocks you
2623** expect to use in nblocks. 2891** expect to use in nblocks.
2624*/ 2892*/
2625static int do_journal_begin_r(struct reiserfs_transaction_handle *th, struct super_block * p_s_sb,unsigned long nblocks,int join) { 2893static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
2626 time_t now = get_seconds() ; 2894 struct super_block *p_s_sb, unsigned long nblocks,
2627 int old_trans_id ; 2895 int join)
2628 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 2896{
2629 struct reiserfs_transaction_handle myth; 2897 time_t now = get_seconds();
2630 int sched_count = 0; 2898 int old_trans_id;
2631 int retval; 2899 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
2632 2900 struct reiserfs_transaction_handle myth;
2633 reiserfs_check_lock_depth(p_s_sb, "journal_begin") ; 2901 int sched_count = 0;
2634 2902 int retval;
2635 PROC_INFO_INC( p_s_sb, journal.journal_being ); 2903
2636 /* set here for journal_join */ 2904 reiserfs_check_lock_depth(p_s_sb, "journal_begin");
2637 th->t_refcount = 1; 2905 if (nblocks > journal->j_trans_max)
2638 th->t_super = p_s_sb ; 2906 BUG();
2639 2907
2640relock: 2908 PROC_INFO_INC(p_s_sb, journal.journal_being);
2641 lock_journal(p_s_sb) ; 2909 /* set here for journal_join */
2642 if (join != JBEGIN_ABORT && reiserfs_is_journal_aborted (journal)) { 2910 th->t_refcount = 1;
2643 unlock_journal (p_s_sb); 2911 th->t_super = p_s_sb;
2644 retval = journal->j_errno; 2912
2645 goto out_fail; 2913 relock:
2646 } 2914 lock_journal(p_s_sb);
2647 journal->j_bcount++; 2915 if (join != JBEGIN_ABORT && reiserfs_is_journal_aborted(journal)) {
2648 2916 unlock_journal(p_s_sb);
2649 if (test_bit(J_WRITERS_BLOCKED, &journal->j_state)) { 2917 retval = journal->j_errno;
2650 unlock_journal(p_s_sb) ; 2918 goto out_fail;
2651 reiserfs_wait_on_write_block(p_s_sb) ; 2919 }
2652 PROC_INFO_INC( p_s_sb, journal.journal_relock_writers ); 2920 journal->j_bcount++;
2653 goto relock ; 2921
2654 } 2922 if (test_bit(J_WRITERS_BLOCKED, &journal->j_state)) {
2655 now = get_seconds(); 2923 unlock_journal(p_s_sb);
2656 2924 reiserfs_wait_on_write_block(p_s_sb);
2657 /* if there is no room in the journal OR 2925 PROC_INFO_INC(p_s_sb, journal.journal_relock_writers);
2658 ** if this transaction is too old, and we weren't called joinable, wait for it to finish before beginning 2926 goto relock;
2659 ** we don't sleep if there aren't other writers 2927 }
2660 */ 2928 now = get_seconds();
2661 2929
2662 if ( (!join && journal->j_must_wait > 0) || 2930 /* if there is no room in the journal OR
2663 ( !join && (journal->j_len_alloc + nblocks + 2) >= journal->j_max_batch) || 2931 ** if this transaction is too old, and we weren't called joinable, wait for it to finish before beginning
2664 (!join && atomic_read(&journal->j_wcount) > 0 && journal->j_trans_start_time > 0 && 2932 ** we don't sleep if there aren't other writers
2665 (now - journal->j_trans_start_time) > journal->j_max_trans_age) || 2933 */
2666 (!join && atomic_read(&journal->j_jlock)) || 2934
2667 (!join && journal->j_cnode_free < (journal->j_trans_max * 3))) { 2935 if ((!join && journal->j_must_wait > 0) ||
2668 2936 (!join
2669 old_trans_id = journal->j_trans_id; 2937 && (journal->j_len_alloc + nblocks + 2) >= journal->j_max_batch)
2670 unlock_journal(p_s_sb) ; /* allow others to finish this transaction */ 2938 || (!join && atomic_read(&journal->j_wcount) > 0
2671 2939 && journal->j_trans_start_time > 0
2672 if (!join && (journal->j_len_alloc + nblocks + 2) >= 2940 && (now - journal->j_trans_start_time) >
2673 journal->j_max_batch && 2941 journal->j_max_trans_age) || (!join
2674 ((journal->j_len + nblocks + 2) * 100) < (journal->j_len_alloc * 75)) 2942 && atomic_read(&journal->j_jlock))
2675 { 2943 || (!join && journal->j_cnode_free < (journal->j_trans_max * 3))) {
2676 if (atomic_read(&journal->j_wcount) > 10) { 2944
2677 sched_count++; 2945 old_trans_id = journal->j_trans_id;
2678 queue_log_writer(p_s_sb); 2946 unlock_journal(p_s_sb); /* allow others to finish this transaction */
2679 goto relock; 2947
2680 } 2948 if (!join && (journal->j_len_alloc + nblocks + 2) >=
2681 } 2949 journal->j_max_batch &&
2682 /* don't mess with joining the transaction if all we have to do is 2950 ((journal->j_len + nblocks + 2) * 100) <
2683 * wait for someone else to do a commit 2951 (journal->j_len_alloc * 75)) {
2684 */ 2952 if (atomic_read(&journal->j_wcount) > 10) {
2685 if (atomic_read(&journal->j_jlock)) { 2953 sched_count++;
2686 while (journal->j_trans_id == old_trans_id && 2954 queue_log_writer(p_s_sb);
2687 atomic_read(&journal->j_jlock)) { 2955 goto relock;
2688 queue_log_writer(p_s_sb); 2956 }
2689 } 2957 }
2690 goto relock; 2958 /* don't mess with joining the transaction if all we have to do is
2691 } 2959 * wait for someone else to do a commit
2692 retval = journal_join(&myth, p_s_sb, 1) ; 2960 */
2693 if (retval) 2961 if (atomic_read(&journal->j_jlock)) {
2694 goto out_fail; 2962 while (journal->j_trans_id == old_trans_id &&
2695 2963 atomic_read(&journal->j_jlock)) {
2696 /* someone might have ended the transaction while we joined */ 2964 queue_log_writer(p_s_sb);
2697 if (old_trans_id != journal->j_trans_id) { 2965 }
2698 retval = do_journal_end(&myth, p_s_sb, 1, 0) ; 2966 goto relock;
2699 } else { 2967 }
2700 retval = do_journal_end(&myth, p_s_sb, 1, COMMIT_NOW) ; 2968 retval = journal_join(&myth, p_s_sb, 1);
2701 } 2969 if (retval)
2702 2970 goto out_fail;
2703 if (retval) 2971
2704 goto out_fail; 2972 /* someone might have ended the transaction while we joined */
2705 2973 if (old_trans_id != journal->j_trans_id) {
2706 PROC_INFO_INC( p_s_sb, journal.journal_relock_wcount ); 2974 retval = do_journal_end(&myth, p_s_sb, 1, 0);
2707 goto relock ; 2975 } else {
2708 } 2976 retval = do_journal_end(&myth, p_s_sb, 1, COMMIT_NOW);
2709 /* we are the first writer, set trans_id */ 2977 }
2710 if (journal->j_trans_start_time == 0) { 2978
2711 journal->j_trans_start_time = get_seconds(); 2979 if (retval)
2712 } 2980 goto out_fail;
2713 atomic_inc(&(journal->j_wcount)) ; 2981
2714 journal->j_len_alloc += nblocks ; 2982 PROC_INFO_INC(p_s_sb, journal.journal_relock_wcount);
2715 th->t_blocks_logged = 0 ; 2983 goto relock;
2716 th->t_blocks_allocated = nblocks ; 2984 }
2717 th->t_trans_id = journal->j_trans_id ; 2985 /* we are the first writer, set trans_id */
2718 unlock_journal(p_s_sb) ; 2986 if (journal->j_trans_start_time == 0) {
2719 INIT_LIST_HEAD (&th->t_list); 2987 journal->j_trans_start_time = get_seconds();
2720 return 0 ; 2988 }
2721 2989 atomic_inc(&(journal->j_wcount));
2722out_fail: 2990 journal->j_len_alloc += nblocks;
2723 memset (th, 0, sizeof (*th)); 2991 th->t_blocks_logged = 0;
2724 /* Re-set th->t_super, so we can properly keep track of how many 2992 th->t_blocks_allocated = nblocks;
2725 * persistent transactions there are. We need to do this so if this 2993 th->t_trans_id = journal->j_trans_id;
2726 * call is part of a failed restart_transaction, we can free it later */ 2994 unlock_journal(p_s_sb);
2727 th->t_super = p_s_sb; 2995 INIT_LIST_HEAD(&th->t_list);
2728 return retval; 2996 get_fs_excl();
2729} 2997 return 0;
2730 2998
2731struct reiserfs_transaction_handle * 2999 out_fail:
2732reiserfs_persistent_transaction(struct super_block *s, int nblocks) { 3000 memset(th, 0, sizeof(*th));
2733 int ret ; 3001 /* Re-set th->t_super, so we can properly keep track of how many
2734 struct reiserfs_transaction_handle *th ; 3002 * persistent transactions there are. We need to do this so if this
2735 3003 * call is part of a failed restart_transaction, we can free it later */
2736 /* if we're nesting into an existing transaction. It will be 3004 th->t_super = p_s_sb;
2737 ** persistent on its own 3005 return retval;
2738 */ 3006}
2739 if (reiserfs_transaction_running(s)) { 3007
2740 th = current->journal_info ; 3008struct reiserfs_transaction_handle *reiserfs_persistent_transaction(struct
2741 th->t_refcount++ ; 3009 super_block
2742 if (th->t_refcount < 2) { 3010 *s,
2743 BUG() ; 3011 int nblocks)
2744 } 3012{
2745 return th ; 3013 int ret;
2746 } 3014 struct reiserfs_transaction_handle *th;
2747 th = reiserfs_kmalloc(sizeof(struct reiserfs_transaction_handle), GFP_NOFS, s) ; 3015
2748 if (!th) 3016 /* if we're nesting into an existing transaction. It will be
2749 return NULL; 3017 ** persistent on its own
2750 ret = journal_begin(th, s, nblocks) ; 3018 */
2751 if (ret) { 3019 if (reiserfs_transaction_running(s)) {
2752 reiserfs_kfree(th, sizeof(struct reiserfs_transaction_handle), s) ; 3020 th = current->journal_info;
2753 return NULL; 3021 th->t_refcount++;
2754 } 3022 if (th->t_refcount < 2) {
2755 3023 BUG();
2756 SB_JOURNAL(s)->j_persistent_trans++; 3024 }
2757 return th ; 3025 return th;
2758} 3026 }
2759 3027 th = reiserfs_kmalloc(sizeof(struct reiserfs_transaction_handle),
2760int 3028 GFP_NOFS, s);
2761reiserfs_end_persistent_transaction(struct reiserfs_transaction_handle *th) { 3029 if (!th)
2762 struct super_block *s = th->t_super; 3030 return NULL;
2763 int ret = 0; 3031 ret = journal_begin(th, s, nblocks);
2764 if (th->t_trans_id) 3032 if (ret) {
2765 ret = journal_end(th, th->t_super, th->t_blocks_allocated); 3033 reiserfs_kfree(th, sizeof(struct reiserfs_transaction_handle),
2766 else 3034 s);
2767 ret = -EIO; 3035 return NULL;
2768 if (th->t_refcount == 0) { 3036 }
2769 SB_JOURNAL(s)->j_persistent_trans--; 3037
2770 reiserfs_kfree(th, sizeof(struct reiserfs_transaction_handle), s) ; 3038 SB_JOURNAL(s)->j_persistent_trans++;
2771 } 3039 return th;
2772 return ret; 3040}
2773} 3041
2774 3042int reiserfs_end_persistent_transaction(struct reiserfs_transaction_handle *th)
2775static int journal_join(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, unsigned long nblocks) { 3043{
2776 struct reiserfs_transaction_handle *cur_th = current->journal_info; 3044 struct super_block *s = th->t_super;
2777 3045 int ret = 0;
2778 /* this keeps do_journal_end from NULLing out the current->journal_info 3046 if (th->t_trans_id)
2779 ** pointer 3047 ret = journal_end(th, th->t_super, th->t_blocks_allocated);
2780 */ 3048 else
2781 th->t_handle_save = cur_th ; 3049 ret = -EIO;
2782 if (cur_th && cur_th->t_refcount > 1) { 3050 if (th->t_refcount == 0) {
2783 BUG() ; 3051 SB_JOURNAL(s)->j_persistent_trans--;
2784 } 3052 reiserfs_kfree(th, sizeof(struct reiserfs_transaction_handle),
2785 return do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_JOIN) ; 3053 s);
2786} 3054 }
2787 3055 return ret;
2788int journal_join_abort(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, unsigned long nblocks) { 3056}
2789 struct reiserfs_transaction_handle *cur_th = current->journal_info; 3057
2790 3058static int journal_join(struct reiserfs_transaction_handle *th,
2791 /* this keeps do_journal_end from NULLing out the current->journal_info 3059 struct super_block *p_s_sb, unsigned long nblocks)
2792 ** pointer 3060{
2793 */ 3061 struct reiserfs_transaction_handle *cur_th = current->journal_info;
2794 th->t_handle_save = cur_th ; 3062
2795 if (cur_th && cur_th->t_refcount > 1) { 3063 /* this keeps do_journal_end from NULLing out the current->journal_info
2796 BUG() ; 3064 ** pointer
2797 } 3065 */
2798 return do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_ABORT) ; 3066 th->t_handle_save = cur_th;
2799} 3067 if (cur_th && cur_th->t_refcount > 1) {
2800 3068 BUG();
2801int journal_begin(struct reiserfs_transaction_handle *th, struct super_block * p_s_sb, unsigned long nblocks) { 3069 }
2802 struct reiserfs_transaction_handle *cur_th = current->journal_info ; 3070 return do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_JOIN);
2803 int ret ; 3071}
2804 3072
2805 th->t_handle_save = NULL ; 3073int journal_join_abort(struct reiserfs_transaction_handle *th,
2806 if (cur_th) { 3074 struct super_block *p_s_sb, unsigned long nblocks)
2807 /* we are nesting into the current transaction */ 3075{
2808 if (cur_th->t_super == p_s_sb) { 3076 struct reiserfs_transaction_handle *cur_th = current->journal_info;
2809 BUG_ON (!cur_th->t_refcount); 3077
2810 cur_th->t_refcount++ ; 3078 /* this keeps do_journal_end from NULLing out the current->journal_info
2811 memcpy(th, cur_th, sizeof(*th)); 3079 ** pointer
2812 if (th->t_refcount <= 1) 3080 */
2813 reiserfs_warning (p_s_sb, "BAD: refcount <= 1, but journal_info != 0"); 3081 th->t_handle_save = cur_th;
2814 return 0; 3082 if (cur_th && cur_th->t_refcount > 1) {
3083 BUG();
3084 }
3085 return do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_ABORT);
3086}
3087
3088int journal_begin(struct reiserfs_transaction_handle *th,
3089 struct super_block *p_s_sb, unsigned long nblocks)
3090{
3091 struct reiserfs_transaction_handle *cur_th = current->journal_info;
3092 int ret;
3093
3094 th->t_handle_save = NULL;
3095 if (cur_th) {
3096 /* we are nesting into the current transaction */
3097 if (cur_th->t_super == p_s_sb) {
3098 BUG_ON(!cur_th->t_refcount);
3099 cur_th->t_refcount++;
3100 memcpy(th, cur_th, sizeof(*th));
3101 if (th->t_refcount <= 1)
3102 reiserfs_warning(p_s_sb,
3103 "BAD: refcount <= 1, but journal_info != 0");
3104 return 0;
3105 } else {
3106 /* we've ended up with a handle from a different filesystem.
3107 ** save it and restore on journal_end. This should never
3108 ** really happen...
3109 */
3110 reiserfs_warning(p_s_sb,
3111 "clm-2100: nesting info a different FS");
3112 th->t_handle_save = current->journal_info;
3113 current->journal_info = th;
3114 }
2815 } else { 3115 } else {
2816 /* we've ended up with a handle from a different filesystem. 3116 current->journal_info = th;
2817 ** save it and restore on journal_end. This should never 3117 }
2818 ** really happen... 3118 ret = do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_REG);
2819 */ 3119 if (current->journal_info != th)
2820 reiserfs_warning(p_s_sb, "clm-2100: nesting info a different FS") ; 3120 BUG();
2821 th->t_handle_save = current->journal_info ;
2822 current->journal_info = th;
2823 }
2824 } else {
2825 current->journal_info = th;
2826 }
2827 ret = do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_REG) ;
2828 if (current->journal_info != th)
2829 BUG() ;
2830 3121
2831 /* I guess this boils down to being the reciprocal of clm-2100 above. 3122 /* I guess this boils down to being the reciprocal of clm-2100 above.
2832 * If do_journal_begin_r fails, we need to put it back, since journal_end 3123 * If do_journal_begin_r fails, we need to put it back, since journal_end
2833 * won't be called to do it. */ 3124 * won't be called to do it. */
2834 if (ret) 3125 if (ret)
2835 current->journal_info = th->t_handle_save; 3126 current->journal_info = th->t_handle_save;
2836 else 3127 else
2837 BUG_ON (!th->t_refcount); 3128 BUG_ON(!th->t_refcount);
2838 3129
2839 return ret ; 3130 return ret;
2840} 3131}
2841 3132
2842/* 3133/*
@@ -2848,129 +3139,140 @@ int journal_begin(struct reiserfs_transaction_handle *th, struct super_block *
2848** 3139**
2849** if j_len, is bigger than j_len_alloc, it pushes j_len_alloc to 10 + j_len. 3140** if j_len, is bigger than j_len_alloc, it pushes j_len_alloc to 10 + j_len.
2850*/ 3141*/
2851int journal_mark_dirty(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, struct buffer_head *bh) { 3142int journal_mark_dirty(struct reiserfs_transaction_handle *th,
2852 struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); 3143 struct super_block *p_s_sb, struct buffer_head *bh)
2853 struct reiserfs_journal_cnode *cn = NULL; 3144{
2854 int count_already_incd = 0 ; 3145 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
2855 int prepared = 0 ; 3146 struct reiserfs_journal_cnode *cn = NULL;
2856 BUG_ON (!th->t_trans_id); 3147 int count_already_incd = 0;
2857 3148 int prepared = 0;
2858 PROC_INFO_INC( p_s_sb, journal.mark_dirty ); 3149 BUG_ON(!th->t_trans_id);
2859 if (th->t_trans_id != journal->j_trans_id) { 3150
2860 reiserfs_panic(th->t_super, "journal-1577: handle trans id %ld != current trans id %ld\n", 3151 PROC_INFO_INC(p_s_sb, journal.mark_dirty);
2861 th->t_trans_id, journal->j_trans_id); 3152 if (th->t_trans_id != journal->j_trans_id) {
2862 } 3153 reiserfs_panic(th->t_super,
2863 3154 "journal-1577: handle trans id %ld != current trans id %ld\n",
2864 p_s_sb->s_dirt = 1; 3155 th->t_trans_id, journal->j_trans_id);
2865 3156 }
2866 prepared = test_clear_buffer_journal_prepared (bh); 3157
2867 clear_buffer_journal_restore_dirty (bh); 3158 p_s_sb->s_dirt = 1;
2868 /* already in this transaction, we are done */ 3159
2869 if (buffer_journaled(bh)) { 3160 prepared = test_clear_buffer_journal_prepared(bh);
2870 PROC_INFO_INC( p_s_sb, journal.mark_dirty_already ); 3161 clear_buffer_journal_restore_dirty(bh);
2871 return 0 ; 3162 /* already in this transaction, we are done */
2872 } 3163 if (buffer_journaled(bh)) {
2873 3164 PROC_INFO_INC(p_s_sb, journal.mark_dirty_already);
2874 /* this must be turned into a panic instead of a warning. We can't allow 3165 return 0;
2875 ** a dirty or journal_dirty or locked buffer to be logged, as some changes 3166 }
2876 ** could get to disk too early. NOT GOOD. 3167
2877 */ 3168 /* this must be turned into a panic instead of a warning. We can't allow
2878 if (!prepared || buffer_dirty(bh)) { 3169 ** a dirty or journal_dirty or locked buffer to be logged, as some changes
2879 reiserfs_warning (p_s_sb, "journal-1777: buffer %llu bad state " 3170 ** could get to disk too early. NOT GOOD.
2880 "%cPREPARED %cLOCKED %cDIRTY %cJDIRTY_WAIT", 3171 */
2881 (unsigned long long)bh->b_blocknr, prepared ? ' ' : '!', 3172 if (!prepared || buffer_dirty(bh)) {
2882 buffer_locked(bh) ? ' ' : '!', 3173 reiserfs_warning(p_s_sb, "journal-1777: buffer %llu bad state "
2883 buffer_dirty(bh) ? ' ' : '!', 3174 "%cPREPARED %cLOCKED %cDIRTY %cJDIRTY_WAIT",
2884 buffer_journal_dirty(bh) ? ' ' : '!') ; 3175 (unsigned long long)bh->b_blocknr,
2885 } 3176 prepared ? ' ' : '!',
2886 3177 buffer_locked(bh) ? ' ' : '!',
2887 if (atomic_read(&(journal->j_wcount)) <= 0) { 3178 buffer_dirty(bh) ? ' ' : '!',
2888 reiserfs_warning (p_s_sb, "journal-1409: journal_mark_dirty returning because j_wcount was %d", atomic_read(&(journal->j_wcount))) ; 3179 buffer_journal_dirty(bh) ? ' ' : '!');
2889 return 1 ; 3180 }
2890 } 3181
2891 /* this error means I've screwed up, and we've overflowed the transaction. 3182 if (atomic_read(&(journal->j_wcount)) <= 0) {
2892 ** Nothing can be done here, except make the FS readonly or panic. 3183 reiserfs_warning(p_s_sb,
2893 */ 3184 "journal-1409: journal_mark_dirty returning because j_wcount was %d",
2894 if (journal->j_len >= journal->j_trans_max) { 3185 atomic_read(&(journal->j_wcount)));
2895 reiserfs_panic(th->t_super, "journal-1413: journal_mark_dirty: j_len (%lu) is too big\n", journal->j_len) ; 3186 return 1;
2896 } 3187 }
2897 3188 /* this error means I've screwed up, and we've overflowed the transaction.
2898 if (buffer_journal_dirty(bh)) { 3189 ** Nothing can be done here, except make the FS readonly or panic.
2899 count_already_incd = 1 ; 3190 */
2900 PROC_INFO_INC( p_s_sb, journal.mark_dirty_notjournal ); 3191 if (journal->j_len >= journal->j_trans_max) {
2901 clear_buffer_journal_dirty (bh); 3192 reiserfs_panic(th->t_super,
2902 } 3193 "journal-1413: journal_mark_dirty: j_len (%lu) is too big\n",
2903 3194 journal->j_len);
2904 if (journal->j_len > journal->j_len_alloc) { 3195 }
2905 journal->j_len_alloc = journal->j_len + JOURNAL_PER_BALANCE_CNT ; 3196
2906 } 3197 if (buffer_journal_dirty(bh)) {
2907 3198 count_already_incd = 1;
2908 set_buffer_journaled (bh); 3199 PROC_INFO_INC(p_s_sb, journal.mark_dirty_notjournal);
2909 3200 clear_buffer_journal_dirty(bh);
2910 /* now put this guy on the end */ 3201 }
2911 if (!cn) { 3202
2912 cn = get_cnode(p_s_sb) ; 3203 if (journal->j_len > journal->j_len_alloc) {
2913 if (!cn) { 3204 journal->j_len_alloc = journal->j_len + JOURNAL_PER_BALANCE_CNT;
2914 reiserfs_panic(p_s_sb, "get_cnode failed!\n"); 3205 }
2915 } 3206
2916 3207 set_buffer_journaled(bh);
2917 if (th->t_blocks_logged == th->t_blocks_allocated) { 3208
2918 th->t_blocks_allocated += JOURNAL_PER_BALANCE_CNT ; 3209 /* now put this guy on the end */
2919 journal->j_len_alloc += JOURNAL_PER_BALANCE_CNT ; 3210 if (!cn) {
2920 } 3211 cn = get_cnode(p_s_sb);
2921 th->t_blocks_logged++ ; 3212 if (!cn) {
2922 journal->j_len++ ; 3213 reiserfs_panic(p_s_sb, "get_cnode failed!\n");
2923 3214 }
2924 cn->bh = bh ; 3215
2925 cn->blocknr = bh->b_blocknr ; 3216 if (th->t_blocks_logged == th->t_blocks_allocated) {
2926 cn->sb = p_s_sb; 3217 th->t_blocks_allocated += JOURNAL_PER_BALANCE_CNT;
2927 cn->jlist = NULL ; 3218 journal->j_len_alloc += JOURNAL_PER_BALANCE_CNT;
2928 insert_journal_hash(journal->j_hash_table, cn) ; 3219 }
2929 if (!count_already_incd) { 3220 th->t_blocks_logged++;
2930 get_bh(bh) ; 3221 journal->j_len++;
2931 } 3222
2932 } 3223 cn->bh = bh;
2933 cn->next = NULL ; 3224 cn->blocknr = bh->b_blocknr;
2934 cn->prev = journal->j_last ; 3225 cn->sb = p_s_sb;
2935 cn->bh = bh ; 3226 cn->jlist = NULL;
2936 if (journal->j_last) { 3227 insert_journal_hash(journal->j_hash_table, cn);
2937 journal->j_last->next = cn ; 3228 if (!count_already_incd) {
2938 journal->j_last = cn ; 3229 get_bh(bh);
2939 } else { 3230 }
2940 journal->j_first = cn ; 3231 }
2941 journal->j_last = cn ; 3232 cn->next = NULL;
2942 } 3233 cn->prev = journal->j_last;
2943 return 0 ; 3234 cn->bh = bh;
2944} 3235 if (journal->j_last) {
2945 3236 journal->j_last->next = cn;
2946int journal_end(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, unsigned long nblocks) { 3237 journal->j_last = cn;
2947 if (!current->journal_info && th->t_refcount > 1) 3238 } else {
2948 reiserfs_warning (p_s_sb, "REISER-NESTING: th NULL, refcount %d", 3239 journal->j_first = cn;
2949 th->t_refcount); 3240 journal->j_last = cn;
2950 3241 }
2951 if (!th->t_trans_id) { 3242 return 0;
2952 WARN_ON (1); 3243}
2953 return -EIO; 3244
2954 } 3245int journal_end(struct reiserfs_transaction_handle *th,
2955 3246 struct super_block *p_s_sb, unsigned long nblocks)
2956 th->t_refcount--; 3247{
2957 if (th->t_refcount > 0) { 3248 if (!current->journal_info && th->t_refcount > 1)
2958 struct reiserfs_transaction_handle *cur_th = current->journal_info ; 3249 reiserfs_warning(p_s_sb, "REISER-NESTING: th NULL, refcount %d",
2959 3250 th->t_refcount);
2960 /* we aren't allowed to close a nested transaction on a different 3251
2961 ** filesystem from the one in the task struct 3252 if (!th->t_trans_id) {
2962 */ 3253 WARN_ON(1);
2963 if (cur_th->t_super != th->t_super) 3254 return -EIO;
2964 BUG() ; 3255 }
2965 3256
2966 if (th != cur_th) { 3257 th->t_refcount--;
2967 memcpy(current->journal_info, th, sizeof(*th)); 3258 if (th->t_refcount > 0) {
2968 th->t_trans_id = 0; 3259 struct reiserfs_transaction_handle *cur_th =
2969 } 3260 current->journal_info;
2970 return 0; 3261
2971 } else { 3262 /* we aren't allowed to close a nested transaction on a different
2972 return do_journal_end(th, p_s_sb, nblocks, 0) ; 3263 ** filesystem from the one in the task struct
2973 } 3264 */
3265 if (cur_th->t_super != th->t_super)
3266 BUG();
3267
3268 if (th != cur_th) {
3269 memcpy(current->journal_info, th, sizeof(*th));
3270 th->t_trans_id = 0;
3271 }
3272 return 0;
3273 } else {
3274 return do_journal_end(th, p_s_sb, nblocks, 0);
3275 }
2974} 3276}
2975 3277
2976/* removes from the current transaction, relsing and descrementing any counters. 3278/* removes from the current transaction, relsing and descrementing any counters.
@@ -2980,47 +3282,51 @@ int journal_end(struct reiserfs_transaction_handle *th, struct super_block *p_s_
2980** 3282**
2981** returns 1 if it cleaned and relsed the buffer. 0 otherwise 3283** returns 1 if it cleaned and relsed the buffer. 0 otherwise
2982*/ 3284*/
2983static int remove_from_transaction(struct super_block *p_s_sb, b_blocknr_t blocknr, int already_cleaned) { 3285static int remove_from_transaction(struct super_block *p_s_sb,
2984 struct buffer_head *bh ; 3286 b_blocknr_t blocknr, int already_cleaned)
2985 struct reiserfs_journal_cnode *cn ; 3287{
2986 struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); 3288 struct buffer_head *bh;
2987 int ret = 0; 3289 struct reiserfs_journal_cnode *cn;
2988 3290 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
2989 cn = get_journal_hash_dev(p_s_sb, journal->j_hash_table, blocknr) ; 3291 int ret = 0;
2990 if (!cn || !cn->bh) { 3292
2991 return ret ; 3293 cn = get_journal_hash_dev(p_s_sb, journal->j_hash_table, blocknr);
2992 } 3294 if (!cn || !cn->bh) {
2993 bh = cn->bh ; 3295 return ret;
2994 if (cn->prev) { 3296 }
2995 cn->prev->next = cn->next ; 3297 bh = cn->bh;
2996 } 3298 if (cn->prev) {
2997 if (cn->next) { 3299 cn->prev->next = cn->next;
2998 cn->next->prev = cn->prev ; 3300 }
2999 } 3301 if (cn->next) {
3000 if (cn == journal->j_first) { 3302 cn->next->prev = cn->prev;
3001 journal->j_first = cn->next ; 3303 }
3002 } 3304 if (cn == journal->j_first) {
3003 if (cn == journal->j_last) { 3305 journal->j_first = cn->next;
3004 journal->j_last = cn->prev ; 3306 }
3005 } 3307 if (cn == journal->j_last) {
3006 if (bh) 3308 journal->j_last = cn->prev;
3007 remove_journal_hash(p_s_sb, journal->j_hash_table, NULL, bh->b_blocknr, 0) ; 3309 }
3008 clear_buffer_journaled (bh); /* don't log this one */ 3310 if (bh)
3009 3311 remove_journal_hash(p_s_sb, journal->j_hash_table, NULL,
3010 if (!already_cleaned) { 3312 bh->b_blocknr, 0);
3011 clear_buffer_journal_dirty (bh); 3313 clear_buffer_journaled(bh); /* don't log this one */
3012 clear_buffer_dirty(bh); 3314
3013 clear_buffer_journal_test (bh); 3315 if (!already_cleaned) {
3014 put_bh(bh) ; 3316 clear_buffer_journal_dirty(bh);
3015 if (atomic_read(&(bh->b_count)) < 0) { 3317 clear_buffer_dirty(bh);
3016 reiserfs_warning (p_s_sb, "journal-1752: remove from trans, b_count < 0"); 3318 clear_buffer_journal_test(bh);
3017 } 3319 put_bh(bh);
3018 ret = 1 ; 3320 if (atomic_read(&(bh->b_count)) < 0) {
3019 } 3321 reiserfs_warning(p_s_sb,
3020 journal->j_len-- ; 3322 "journal-1752: remove from trans, b_count < 0");
3021 journal->j_len_alloc-- ; 3323 }
3022 free_cnode(p_s_sb, cn) ; 3324 ret = 1;
3023 return ret ; 3325 }
3326 journal->j_len--;
3327 journal->j_len_alloc--;
3328 free_cnode(p_s_sb, cn);
3329 return ret;
3024} 3330}
3025 3331
3026/* 3332/*
@@ -3033,120 +3339,129 @@ static int remove_from_transaction(struct super_block *p_s_sb, b_blocknr_t block
3033** blocks for a given transaction on disk 3339** blocks for a given transaction on disk
3034** 3340**
3035*/ 3341*/
3036static int can_dirty(struct reiserfs_journal_cnode *cn) { 3342static int can_dirty(struct reiserfs_journal_cnode *cn)
3037 struct super_block *sb = cn->sb; 3343{
3038 b_blocknr_t blocknr = cn->blocknr ; 3344 struct super_block *sb = cn->sb;
3039 struct reiserfs_journal_cnode *cur = cn->hprev ; 3345 b_blocknr_t blocknr = cn->blocknr;
3040 int can_dirty = 1 ; 3346 struct reiserfs_journal_cnode *cur = cn->hprev;
3041 3347 int can_dirty = 1;
3042 /* first test hprev. These are all newer than cn, so any node here 3348
3043 ** with the same block number and dev means this node can't be sent 3349 /* first test hprev. These are all newer than cn, so any node here
3044 ** to disk right now. 3350 ** with the same block number and dev means this node can't be sent
3045 */ 3351 ** to disk right now.
3046 while(cur && can_dirty) { 3352 */
3047 if (cur->jlist && cur->bh && cur->blocknr && cur->sb == sb && 3353 while (cur && can_dirty) {
3048 cur->blocknr == blocknr) { 3354 if (cur->jlist && cur->bh && cur->blocknr && cur->sb == sb &&
3049 can_dirty = 0 ; 3355 cur->blocknr == blocknr) {
3050 } 3356 can_dirty = 0;
3051 cur = cur->hprev ; 3357 }
3052 } 3358 cur = cur->hprev;
3053 /* then test hnext. These are all older than cn. As long as they 3359 }
3054 ** are committed to the log, it is safe to write cn to disk 3360 /* then test hnext. These are all older than cn. As long as they
3055 */ 3361 ** are committed to the log, it is safe to write cn to disk
3056 cur = cn->hnext ; 3362 */
3057 while(cur && can_dirty) { 3363 cur = cn->hnext;
3058 if (cur->jlist && cur->jlist->j_len > 0 && 3364 while (cur && can_dirty) {
3059 atomic_read(&(cur->jlist->j_commit_left)) > 0 && cur->bh && 3365 if (cur->jlist && cur->jlist->j_len > 0 &&
3060 cur->blocknr && cur->sb == sb && cur->blocknr == blocknr) { 3366 atomic_read(&(cur->jlist->j_commit_left)) > 0 && cur->bh &&
3061 can_dirty = 0 ; 3367 cur->blocknr && cur->sb == sb && cur->blocknr == blocknr) {
3062 } 3368 can_dirty = 0;
3063 cur = cur->hnext ; 3369 }
3064 } 3370 cur = cur->hnext;
3065 return can_dirty ; 3371 }
3372 return can_dirty;
3066} 3373}
3067 3374
3068/* syncs the commit blocks, but does not force the real buffers to disk 3375/* syncs the commit blocks, but does not force the real buffers to disk
3069** will wait until the current transaction is done/commited before returning 3376** will wait until the current transaction is done/commited before returning
3070*/ 3377*/
3071int journal_end_sync(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, unsigned long nblocks) { 3378int journal_end_sync(struct reiserfs_transaction_handle *th,
3072 struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); 3379 struct super_block *p_s_sb, unsigned long nblocks)
3380{
3381 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
3073 3382
3074 BUG_ON (!th->t_trans_id); 3383 BUG_ON(!th->t_trans_id);
3075 /* you can sync while nested, very, very bad */ 3384 /* you can sync while nested, very, very bad */
3076 if (th->t_refcount > 1) { 3385 if (th->t_refcount > 1) {
3077 BUG() ; 3386 BUG();
3078 } 3387 }
3079 if (journal->j_len == 0) { 3388 if (journal->j_len == 0) {
3080 reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 1) ; 3389 reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb),
3081 journal_mark_dirty(th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)) ; 3390 1);
3082 } 3391 journal_mark_dirty(th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb));
3083 return do_journal_end(th, p_s_sb, nblocks, COMMIT_NOW | WAIT) ; 3392 }
3393 return do_journal_end(th, p_s_sb, nblocks, COMMIT_NOW | WAIT);
3084} 3394}
3085 3395
3086/* 3396/*
3087** writeback the pending async commits to disk 3397** writeback the pending async commits to disk
3088*/ 3398*/
3089static void flush_async_commits(void *p) { 3399static void flush_async_commits(void *p)
3090 struct super_block *p_s_sb = p; 3400{
3091 struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); 3401 struct super_block *p_s_sb = p;
3092 struct reiserfs_journal_list *jl; 3402 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
3093 struct list_head *entry; 3403 struct reiserfs_journal_list *jl;
3094 3404 struct list_head *entry;
3095 lock_kernel(); 3405
3096 if (!list_empty(&journal->j_journal_list)) { 3406 lock_kernel();
3097 /* last entry is the youngest, commit it and you get everything */ 3407 if (!list_empty(&journal->j_journal_list)) {
3098 entry = journal->j_journal_list.prev; 3408 /* last entry is the youngest, commit it and you get everything */
3099 jl = JOURNAL_LIST_ENTRY(entry); 3409 entry = journal->j_journal_list.prev;
3100 flush_commit_list(p_s_sb, jl, 1); 3410 jl = JOURNAL_LIST_ENTRY(entry);
3101 } 3411 flush_commit_list(p_s_sb, jl, 1);
3102 unlock_kernel(); 3412 }
3103 /* 3413 unlock_kernel();
3104 * this is a little racey, but there's no harm in missing 3414 /*
3105 * the filemap_fdata_write 3415 * this is a little racey, but there's no harm in missing
3106 */ 3416 * the filemap_fdata_write
3107 if (!atomic_read(&journal->j_async_throttle) && !reiserfs_is_journal_aborted (journal)) { 3417 */
3108 atomic_inc(&journal->j_async_throttle); 3418 if (!atomic_read(&journal->j_async_throttle)
3109 filemap_fdatawrite(p_s_sb->s_bdev->bd_inode->i_mapping); 3419 && !reiserfs_is_journal_aborted(journal)) {
3110 atomic_dec(&journal->j_async_throttle); 3420 atomic_inc(&journal->j_async_throttle);
3111 } 3421 filemap_fdatawrite(p_s_sb->s_bdev->bd_inode->i_mapping);
3422 atomic_dec(&journal->j_async_throttle);
3423 }
3112} 3424}
3113 3425
3114/* 3426/*
3115** flushes any old transactions to disk 3427** flushes any old transactions to disk
3116** ends the current transaction if it is too old 3428** ends the current transaction if it is too old
3117*/ 3429*/
3118int reiserfs_flush_old_commits(struct super_block *p_s_sb) { 3430int reiserfs_flush_old_commits(struct super_block *p_s_sb)
3119 time_t now ; 3431{
3120 struct reiserfs_transaction_handle th ; 3432 time_t now;
3121 struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); 3433 struct reiserfs_transaction_handle th;
3122 3434 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
3123 now = get_seconds(); 3435
3124 /* safety check so we don't flush while we are replaying the log during 3436 now = get_seconds();
3125 * mount 3437 /* safety check so we don't flush while we are replaying the log during
3126 */ 3438 * mount
3127 if (list_empty(&journal->j_journal_list)) { 3439 */
3128 return 0 ; 3440 if (list_empty(&journal->j_journal_list)) {
3129 } 3441 return 0;
3130 3442 }
3131 /* check the current transaction. If there are no writers, and it is 3443
3132 * too old, finish it, and force the commit blocks to disk 3444 /* check the current transaction. If there are no writers, and it is
3133 */ 3445 * too old, finish it, and force the commit blocks to disk
3134 if (atomic_read(&journal->j_wcount) <= 0 && 3446 */
3135 journal->j_trans_start_time > 0 && 3447 if (atomic_read(&journal->j_wcount) <= 0 &&
3136 journal->j_len > 0 && 3448 journal->j_trans_start_time > 0 &&
3137 (now - journal->j_trans_start_time) > journal->j_max_trans_age) 3449 journal->j_len > 0 &&
3138 { 3450 (now - journal->j_trans_start_time) > journal->j_max_trans_age) {
3139 if (!journal_join(&th, p_s_sb, 1)) { 3451 if (!journal_join(&th, p_s_sb, 1)) {
3140 reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 1) ; 3452 reiserfs_prepare_for_journal(p_s_sb,
3141 journal_mark_dirty(&th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)) ; 3453 SB_BUFFER_WITH_SB(p_s_sb),
3142 3454 1);
3143 /* we're only being called from kreiserfsd, it makes no sense to do 3455 journal_mark_dirty(&th, p_s_sb,
3144 ** an async commit so that kreiserfsd can do it later 3456 SB_BUFFER_WITH_SB(p_s_sb));
3145 */ 3457
3146 do_journal_end(&th, p_s_sb,1, COMMIT_NOW | WAIT) ; 3458 /* we're only being called from kreiserfsd, it makes no sense to do
3147 } 3459 ** an async commit so that kreiserfsd can do it later
3148 } 3460 */
3149 return p_s_sb->s_dirt; 3461 do_journal_end(&th, p_s_sb, 1, COMMIT_NOW | WAIT);
3462 }
3463 }
3464 return p_s_sb->s_dirt;
3150} 3465}
3151 3466
3152/* 3467/*
@@ -3160,101 +3475,108 @@ int reiserfs_flush_old_commits(struct super_block *p_s_sb) {
3160** 3475**
3161** Note, we can't allow the journal_end to proceed while there are still writers in the log. 3476** Note, we can't allow the journal_end to proceed while there are still writers in the log.
3162*/ 3477*/
3163static int check_journal_end(struct reiserfs_transaction_handle *th, struct super_block * p_s_sb, 3478static int check_journal_end(struct reiserfs_transaction_handle *th,
3164 unsigned long nblocks, int flags) { 3479 struct super_block *p_s_sb, unsigned long nblocks,
3165 3480 int flags)
3166 time_t now ; 3481{
3167 int flush = flags & FLUSH_ALL ; 3482
3168 int commit_now = flags & COMMIT_NOW ; 3483 time_t now;
3169 int wait_on_commit = flags & WAIT ; 3484 int flush = flags & FLUSH_ALL;
3170 struct reiserfs_journal_list *jl; 3485 int commit_now = flags & COMMIT_NOW;
3171 struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); 3486 int wait_on_commit = flags & WAIT;
3172 3487 struct reiserfs_journal_list *jl;
3173 BUG_ON (!th->t_trans_id); 3488 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
3174 3489
3175 if (th->t_trans_id != journal->j_trans_id) { 3490 BUG_ON(!th->t_trans_id);
3176 reiserfs_panic(th->t_super, "journal-1577: handle trans id %ld != current trans id %ld\n", 3491
3177 th->t_trans_id, journal->j_trans_id); 3492 if (th->t_trans_id != journal->j_trans_id) {
3178 } 3493 reiserfs_panic(th->t_super,
3179 3494 "journal-1577: handle trans id %ld != current trans id %ld\n",
3180 journal->j_len_alloc -= (th->t_blocks_allocated - th->t_blocks_logged) ; 3495 th->t_trans_id, journal->j_trans_id);
3181 if (atomic_read(&(journal->j_wcount)) > 0) { /* <= 0 is allowed. unmounting might not call begin */ 3496 }
3182 atomic_dec(&(journal->j_wcount)) ; 3497
3183 } 3498 journal->j_len_alloc -= (th->t_blocks_allocated - th->t_blocks_logged);
3184 3499 if (atomic_read(&(journal->j_wcount)) > 0) { /* <= 0 is allowed. unmounting might not call begin */
3185 /* BUG, deal with case where j_len is 0, but people previously freed blocks need to be released 3500 atomic_dec(&(journal->j_wcount));
3186 ** will be dealt with by next transaction that actually writes something, but should be taken 3501 }
3187 ** care of in this trans 3502
3188 */ 3503 /* BUG, deal with case where j_len is 0, but people previously freed blocks need to be released
3189 if (journal->j_len == 0) { 3504 ** will be dealt with by next transaction that actually writes something, but should be taken
3190 BUG(); 3505 ** care of in this trans
3191 } 3506 */
3192 /* if wcount > 0, and we are called to with flush or commit_now, 3507 if (journal->j_len == 0) {
3193 ** we wait on j_join_wait. We will wake up when the last writer has 3508 BUG();
3194 ** finished the transaction, and started it on its way to the disk. 3509 }
3195 ** Then, we flush the commit or journal list, and just return 0 3510 /* if wcount > 0, and we are called to with flush or commit_now,
3196 ** because the rest of journal end was already done for this transaction. 3511 ** we wait on j_join_wait. We will wake up when the last writer has
3197 */ 3512 ** finished the transaction, and started it on its way to the disk.
3198 if (atomic_read(&(journal->j_wcount)) > 0) { 3513 ** Then, we flush the commit or journal list, and just return 0
3199 if (flush || commit_now) { 3514 ** because the rest of journal end was already done for this transaction.
3200 unsigned trans_id ; 3515 */
3201 3516 if (atomic_read(&(journal->j_wcount)) > 0) {
3202 jl = journal->j_current_jl; 3517 if (flush || commit_now) {
3203 trans_id = jl->j_trans_id; 3518 unsigned trans_id;
3204 if (wait_on_commit) 3519
3205 jl->j_state |= LIST_COMMIT_PENDING; 3520 jl = journal->j_current_jl;
3206 atomic_set(&(journal->j_jlock), 1) ; 3521 trans_id = jl->j_trans_id;
3207 if (flush) { 3522 if (wait_on_commit)
3208 journal->j_next_full_flush = 1 ; 3523 jl->j_state |= LIST_COMMIT_PENDING;
3209 } 3524 atomic_set(&(journal->j_jlock), 1);
3210 unlock_journal(p_s_sb) ; 3525 if (flush) {
3211 3526 journal->j_next_full_flush = 1;
3212 /* sleep while the current transaction is still j_jlocked */ 3527 }
3213 while(journal->j_trans_id == trans_id) { 3528 unlock_journal(p_s_sb);
3214 if (atomic_read(&journal->j_jlock)) { 3529
3215 queue_log_writer(p_s_sb); 3530 /* sleep while the current transaction is still j_jlocked */
3216 } else { 3531 while (journal->j_trans_id == trans_id) {
3217 lock_journal(p_s_sb); 3532 if (atomic_read(&journal->j_jlock)) {
3218 if (journal->j_trans_id == trans_id) { 3533 queue_log_writer(p_s_sb);
3219 atomic_set(&(journal->j_jlock), 1) ; 3534 } else {
3220 } 3535 lock_journal(p_s_sb);
3221 unlock_journal(p_s_sb); 3536 if (journal->j_trans_id == trans_id) {
3222 } 3537 atomic_set(&(journal->j_jlock),
3223 } 3538 1);
3224 if (journal->j_trans_id == trans_id) { 3539 }
3225 BUG(); 3540 unlock_journal(p_s_sb);
3226 } 3541 }
3227 if (commit_now && journal_list_still_alive(p_s_sb, trans_id) && 3542 }
3228 wait_on_commit) 3543 if (journal->j_trans_id == trans_id) {
3229 { 3544 BUG();
3230 flush_commit_list(p_s_sb, jl, 1) ; 3545 }
3231 } 3546 if (commit_now
3232 return 0 ; 3547 && journal_list_still_alive(p_s_sb, trans_id)
3233 } 3548 && wait_on_commit) {
3234 unlock_journal(p_s_sb) ; 3549 flush_commit_list(p_s_sb, jl, 1);
3235 return 0 ; 3550 }
3236 } 3551 return 0;
3237 3552 }
3238 /* deal with old transactions where we are the last writers */ 3553 unlock_journal(p_s_sb);
3239 now = get_seconds(); 3554 return 0;
3240 if ((now - journal->j_trans_start_time) > journal->j_max_trans_age) { 3555 }
3241 commit_now = 1 ; 3556
3242 journal->j_next_async_flush = 1 ; 3557 /* deal with old transactions where we are the last writers */
3243 } 3558 now = get_seconds();
3244 /* don't batch when someone is waiting on j_join_wait */ 3559 if ((now - journal->j_trans_start_time) > journal->j_max_trans_age) {
3245 /* don't batch when syncing the commit or flushing the whole trans */ 3560 commit_now = 1;
3246 if (!(journal->j_must_wait > 0) && !(atomic_read(&(journal->j_jlock))) && !flush && !commit_now && 3561 journal->j_next_async_flush = 1;
3247 (journal->j_len < journal->j_max_batch) && 3562 }
3248 journal->j_len_alloc < journal->j_max_batch && journal->j_cnode_free > (journal->j_trans_max * 3)) { 3563 /* don't batch when someone is waiting on j_join_wait */
3249 journal->j_bcount++ ; 3564 /* don't batch when syncing the commit or flushing the whole trans */
3250 unlock_journal(p_s_sb) ; 3565 if (!(journal->j_must_wait > 0) && !(atomic_read(&(journal->j_jlock)))
3251 return 0 ; 3566 && !flush && !commit_now && (journal->j_len < journal->j_max_batch)
3252 } 3567 && journal->j_len_alloc < journal->j_max_batch
3253 3568 && journal->j_cnode_free > (journal->j_trans_max * 3)) {
3254 if (journal->j_start > SB_ONDISK_JOURNAL_SIZE(p_s_sb)) { 3569 journal->j_bcount++;
3255 reiserfs_panic(p_s_sb, "journal-003: journal_end: j_start (%ld) is too high\n", journal->j_start) ; 3570 unlock_journal(p_s_sb);
3256 } 3571 return 0;
3257 return 1 ; 3572 }
3573
3574 if (journal->j_start > SB_ONDISK_JOURNAL_SIZE(p_s_sb)) {
3575 reiserfs_panic(p_s_sb,
3576 "journal-003: journal_end: j_start (%ld) is too high\n",
3577 journal->j_start);
3578 }
3579 return 1;
3258} 3580}
3259 3581
3260/* 3582/*
@@ -3271,83 +3593,95 @@ static int check_journal_end(struct reiserfs_transaction_handle *th, struct supe
3271** 3593**
3272** Then remove it from the current transaction, decrementing any counters and filing it on the clean list. 3594** Then remove it from the current transaction, decrementing any counters and filing it on the clean list.
3273*/ 3595*/
3274int journal_mark_freed(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, b_blocknr_t blocknr) { 3596int journal_mark_freed(struct reiserfs_transaction_handle *th,
3275 struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); 3597 struct super_block *p_s_sb, b_blocknr_t blocknr)
3276 struct reiserfs_journal_cnode *cn = NULL ; 3598{
3277 struct buffer_head *bh = NULL ; 3599 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
3278 struct reiserfs_list_bitmap *jb = NULL ; 3600 struct reiserfs_journal_cnode *cn = NULL;
3279 int cleaned = 0 ; 3601 struct buffer_head *bh = NULL;
3280 BUG_ON (!th->t_trans_id); 3602 struct reiserfs_list_bitmap *jb = NULL;
3281 3603 int cleaned = 0;
3282 cn = get_journal_hash_dev(p_s_sb, journal->j_hash_table, blocknr); 3604 BUG_ON(!th->t_trans_id);
3283 if (cn && cn->bh) { 3605
3284 bh = cn->bh ; 3606 cn = get_journal_hash_dev(p_s_sb, journal->j_hash_table, blocknr);
3285 get_bh(bh) ; 3607 if (cn && cn->bh) {
3286 } 3608 bh = cn->bh;
3287 /* if it is journal new, we just remove it from this transaction */ 3609 get_bh(bh);
3288 if (bh && buffer_journal_new(bh)) { 3610 }
3289 clear_buffer_journal_new (bh); 3611 /* if it is journal new, we just remove it from this transaction */
3290 clear_prepared_bits(bh) ; 3612 if (bh && buffer_journal_new(bh)) {
3291 reiserfs_clean_and_file_buffer(bh) ; 3613 clear_buffer_journal_new(bh);
3292 cleaned = remove_from_transaction(p_s_sb, blocknr, cleaned) ; 3614 clear_prepared_bits(bh);
3293 } else { 3615 reiserfs_clean_and_file_buffer(bh);
3294 /* set the bit for this block in the journal bitmap for this transaction */ 3616 cleaned = remove_from_transaction(p_s_sb, blocknr, cleaned);
3295 jb = journal->j_current_jl->j_list_bitmap; 3617 } else {
3296 if (!jb) { 3618 /* set the bit for this block in the journal bitmap for this transaction */
3297 reiserfs_panic(p_s_sb, "journal-1702: journal_mark_freed, journal_list_bitmap is NULL\n") ; 3619 jb = journal->j_current_jl->j_list_bitmap;
3298 } 3620 if (!jb) {
3299 set_bit_in_list_bitmap(p_s_sb, blocknr, jb) ; 3621 reiserfs_panic(p_s_sb,
3300 3622 "journal-1702: journal_mark_freed, journal_list_bitmap is NULL\n");
3301 /* Note, the entire while loop is not allowed to schedule. */ 3623 }
3302 3624 set_bit_in_list_bitmap(p_s_sb, blocknr, jb);
3303 if (bh) { 3625
3304 clear_prepared_bits(bh) ; 3626 /* Note, the entire while loop is not allowed to schedule. */
3305 reiserfs_clean_and_file_buffer(bh) ; 3627
3306 } 3628 if (bh) {
3307 cleaned = remove_from_transaction(p_s_sb, blocknr, cleaned) ; 3629 clear_prepared_bits(bh);
3308 3630 reiserfs_clean_and_file_buffer(bh);
3309 /* find all older transactions with this block, make sure they don't try to write it out */ 3631 }
3310 cn = get_journal_hash_dev(p_s_sb,journal->j_list_hash_table, blocknr) ; 3632 cleaned = remove_from_transaction(p_s_sb, blocknr, cleaned);
3311 while (cn) { 3633
3312 if (p_s_sb == cn->sb && blocknr == cn->blocknr) { 3634 /* find all older transactions with this block, make sure they don't try to write it out */
3313 set_bit(BLOCK_FREED, &cn->state) ; 3635 cn = get_journal_hash_dev(p_s_sb, journal->j_list_hash_table,
3314 if (cn->bh) { 3636 blocknr);
3315 if (!cleaned) { 3637 while (cn) {
3316 /* remove_from_transaction will brelse the buffer if it was 3638 if (p_s_sb == cn->sb && blocknr == cn->blocknr) {
3317 ** in the current trans 3639 set_bit(BLOCK_FREED, &cn->state);
3318 */ 3640 if (cn->bh) {
3319 clear_buffer_journal_dirty (cn->bh); 3641 if (!cleaned) {
3320 clear_buffer_dirty(cn->bh); 3642 /* remove_from_transaction will brelse the buffer if it was
3321 clear_buffer_journal_test(cn->bh); 3643 ** in the current trans
3322 cleaned = 1 ; 3644 */
3323 put_bh(cn->bh) ; 3645 clear_buffer_journal_dirty(cn->
3324 if (atomic_read(&(cn->bh->b_count)) < 0) { 3646 bh);
3325 reiserfs_warning (p_s_sb, "journal-2138: cn->bh->b_count < 0"); 3647 clear_buffer_dirty(cn->bh);
3326 } 3648 clear_buffer_journal_test(cn->
3327 } 3649 bh);
3328 if (cn->jlist) { /* since we are clearing the bh, we MUST dec nonzerolen */ 3650 cleaned = 1;
3329 atomic_dec(&(cn->jlist->j_nonzerolen)) ; 3651 put_bh(cn->bh);
3330 } 3652 if (atomic_read
3331 cn->bh = NULL ; 3653 (&(cn->bh->b_count)) < 0) {
3332 } 3654 reiserfs_warning(p_s_sb,
3333 } 3655 "journal-2138: cn->bh->b_count < 0");
3334 cn = cn->hnext ; 3656 }
3335 } 3657 }
3336 } 3658 if (cn->jlist) { /* since we are clearing the bh, we MUST dec nonzerolen */
3337 3659 atomic_dec(&
3338 if (bh) { 3660 (cn->jlist->
3339 put_bh(bh) ; /* get_hash grabs the buffer */ 3661 j_nonzerolen));
3340 if (atomic_read(&(bh->b_count)) < 0) { 3662 }
3341 reiserfs_warning (p_s_sb, "journal-2165: bh->b_count < 0"); 3663 cn->bh = NULL;
3342 } 3664 }
3343 } 3665 }
3344 return 0 ; 3666 cn = cn->hnext;
3345} 3667 }
3346 3668 }
3347void reiserfs_update_inode_transaction(struct inode *inode) { 3669
3348 struct reiserfs_journal *journal = SB_JOURNAL (inode->i_sb); 3670 if (bh) {
3349 REISERFS_I(inode)->i_jl = journal->j_current_jl; 3671 put_bh(bh); /* get_hash grabs the buffer */
3350 REISERFS_I(inode)->i_trans_id = journal->j_trans_id ; 3672 if (atomic_read(&(bh->b_count)) < 0) {
3673 reiserfs_warning(p_s_sb,
3674 "journal-2165: bh->b_count < 0");
3675 }
3676 }
3677 return 0;
3678}
3679
3680void reiserfs_update_inode_transaction(struct inode *inode)
3681{
3682 struct reiserfs_journal *journal = SB_JOURNAL(inode->i_sb);
3683 REISERFS_I(inode)->i_jl = journal->j_current_jl;
3684 REISERFS_I(inode)->i_trans_id = journal->j_trans_id;
3351} 3685}
3352 3686
3353/* 3687/*
@@ -3355,99 +3689,102 @@ void reiserfs_update_inode_transaction(struct inode *inode) {
3355 * if a transaction was actually committed and the barrier was done 3689 * if a transaction was actually committed and the barrier was done
3356 */ 3690 */
3357static int __commit_trans_jl(struct inode *inode, unsigned long id, 3691static int __commit_trans_jl(struct inode *inode, unsigned long id,
3358 struct reiserfs_journal_list *jl) 3692 struct reiserfs_journal_list *jl)
3359{ 3693{
3360 struct reiserfs_transaction_handle th ; 3694 struct reiserfs_transaction_handle th;
3361 struct super_block *sb = inode->i_sb ; 3695 struct super_block *sb = inode->i_sb;
3362 struct reiserfs_journal *journal = SB_JOURNAL (sb); 3696 struct reiserfs_journal *journal = SB_JOURNAL(sb);
3363 int ret = 0; 3697 int ret = 0;
3698
3699 /* is it from the current transaction, or from an unknown transaction? */
3700 if (id == journal->j_trans_id) {
3701 jl = journal->j_current_jl;
3702 /* try to let other writers come in and grow this transaction */
3703 let_transaction_grow(sb, id);
3704 if (journal->j_trans_id != id) {
3705 goto flush_commit_only;
3706 }
3364 3707
3365 /* is it from the current transaction, or from an unknown transaction? */ 3708 ret = journal_begin(&th, sb, 1);
3366 if (id == journal->j_trans_id) { 3709 if (ret)
3367 jl = journal->j_current_jl; 3710 return ret;
3368 /* try to let other writers come in and grow this transaction */ 3711
3369 let_transaction_grow(sb, id); 3712 /* someone might have ended this transaction while we joined */
3370 if (journal->j_trans_id != id) { 3713 if (journal->j_trans_id != id) {
3371 goto flush_commit_only; 3714 reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb),
3372 } 3715 1);
3716 journal_mark_dirty(&th, sb, SB_BUFFER_WITH_SB(sb));
3717 ret = journal_end(&th, sb, 1);
3718 goto flush_commit_only;
3719 }
3373 3720
3374 ret = journal_begin(&th, sb, 1) ; 3721 ret = journal_end_sync(&th, sb, 1);
3375 if (ret) 3722 if (!ret)
3376 return ret; 3723 ret = 1;
3377 3724
3378 /* someone might have ended this transaction while we joined */ 3725 } else {
3379 if (journal->j_trans_id != id) { 3726 /* this gets tricky, we have to make sure the journal list in
3380 reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb), 1) ; 3727 * the inode still exists. We know the list is still around
3381 journal_mark_dirty(&th, sb, SB_BUFFER_WITH_SB(sb)) ; 3728 * if we've got a larger transaction id than the oldest list
3382 ret = journal_end(&th, sb, 1) ; 3729 */
3383 goto flush_commit_only; 3730 flush_commit_only:
3731 if (journal_list_still_alive(inode->i_sb, id)) {
3732 /*
3733 * we only set ret to 1 when we know for sure
3734 * the barrier hasn't been started yet on the commit
3735 * block.
3736 */
3737 if (atomic_read(&jl->j_commit_left) > 1)
3738 ret = 1;
3739 flush_commit_list(sb, jl, 1);
3740 if (journal->j_errno)
3741 ret = journal->j_errno;
3742 }
3384 } 3743 }
3744 /* otherwise the list is gone, and long since committed */
3745 return ret;
3746}
3385 3747
3386 ret = journal_end_sync(&th, sb, 1) ; 3748int reiserfs_commit_for_inode(struct inode *inode)
3387 if (!ret) 3749{
3388 ret = 1; 3750 unsigned long id = REISERFS_I(inode)->i_trans_id;
3751 struct reiserfs_journal_list *jl = REISERFS_I(inode)->i_jl;
3389 3752
3390 } else { 3753 /* for the whole inode, assume unset id means it was
3391 /* this gets tricky, we have to make sure the journal list in 3754 * changed in the current transaction. More conservative
3392 * the inode still exists. We know the list is still around
3393 * if we've got a larger transaction id than the oldest list
3394 */ 3755 */
3395flush_commit_only: 3756 if (!id || !jl) {
3396 if (journal_list_still_alive(inode->i_sb, id)) { 3757 reiserfs_update_inode_transaction(inode);
3397 /* 3758 id = REISERFS_I(inode)->i_trans_id;
3398 * we only set ret to 1 when we know for sure 3759 /* jl will be updated in __commit_trans_jl */
3399 * the barrier hasn't been started yet on the commit 3760 }
3400 * block. 3761
3401 */ 3762 return __commit_trans_jl(inode, id, jl);
3402 if (atomic_read(&jl->j_commit_left) > 1) 3763}
3403 ret = 1; 3764
3404 flush_commit_list(sb, jl, 1) ; 3765void reiserfs_restore_prepared_buffer(struct super_block *p_s_sb,
3405 if (journal->j_errno) 3766 struct buffer_head *bh)
3406 ret = journal->j_errno; 3767{
3407 } 3768 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
3408 } 3769 PROC_INFO_INC(p_s_sb, journal.restore_prepared);
3409 /* otherwise the list is gone, and long since committed */ 3770 if (!bh) {
3410 return ret; 3771 return;
3411} 3772 }
3412 3773 if (test_clear_buffer_journal_restore_dirty(bh) &&
3413int reiserfs_commit_for_inode(struct inode *inode) { 3774 buffer_journal_dirty(bh)) {
3414 unsigned long id = REISERFS_I(inode)->i_trans_id; 3775 struct reiserfs_journal_cnode *cn;
3415 struct reiserfs_journal_list *jl = REISERFS_I(inode)->i_jl; 3776 cn = get_journal_hash_dev(p_s_sb,
3416 3777 journal->j_list_hash_table,
3417 /* for the whole inode, assume unset id means it was 3778 bh->b_blocknr);
3418 * changed in the current transaction. More conservative 3779 if (cn && can_dirty(cn)) {
3419 */ 3780 set_buffer_journal_test(bh);
3420 if (!id || !jl) { 3781 mark_buffer_dirty(bh);
3421 reiserfs_update_inode_transaction(inode) ; 3782 }
3422 id = REISERFS_I(inode)->i_trans_id; 3783 }
3423 /* jl will be updated in __commit_trans_jl */ 3784 clear_buffer_journal_prepared(bh);
3424 } 3785}
3425 3786
3426 return __commit_trans_jl(inode, id, jl); 3787extern struct tree_balance *cur_tb;
3427}
3428
3429void reiserfs_restore_prepared_buffer(struct super_block *p_s_sb,
3430 struct buffer_head *bh) {
3431 struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb);
3432 PROC_INFO_INC( p_s_sb, journal.restore_prepared );
3433 if (!bh) {
3434 return ;
3435 }
3436 if (test_clear_buffer_journal_restore_dirty (bh) &&
3437 buffer_journal_dirty(bh)) {
3438 struct reiserfs_journal_cnode *cn;
3439 cn = get_journal_hash_dev(p_s_sb,
3440 journal->j_list_hash_table,
3441 bh->b_blocknr);
3442 if (cn && can_dirty(cn)) {
3443 set_buffer_journal_test (bh);
3444 mark_buffer_dirty(bh);
3445 }
3446 }
3447 clear_buffer_journal_prepared (bh);
3448}
3449
3450extern struct tree_balance *cur_tb ;
3451/* 3788/*
3452** before we can change a metadata block, we have to make sure it won't 3789** before we can change a metadata block, we have to make sure it won't
3453** be written to disk while we are altering it. So, we must: 3790** be written to disk while we are altering it. So, we must:
@@ -3456,39 +3793,41 @@ extern struct tree_balance *cur_tb ;
3456** 3793**
3457*/ 3794*/
3458int reiserfs_prepare_for_journal(struct super_block *p_s_sb, 3795int reiserfs_prepare_for_journal(struct super_block *p_s_sb,
3459 struct buffer_head *bh, int wait) { 3796 struct buffer_head *bh, int wait)
3460 PROC_INFO_INC( p_s_sb, journal.prepare ); 3797{
3461 3798 PROC_INFO_INC(p_s_sb, journal.prepare);
3462 if (test_set_buffer_locked(bh)) { 3799
3463 if (!wait) 3800 if (test_set_buffer_locked(bh)) {
3464 return 0; 3801 if (!wait)
3465 lock_buffer(bh); 3802 return 0;
3466 } 3803 lock_buffer(bh);
3467 set_buffer_journal_prepared (bh); 3804 }
3468 if (test_clear_buffer_dirty(bh) && buffer_journal_dirty(bh)) { 3805 set_buffer_journal_prepared(bh);
3469 clear_buffer_journal_test (bh); 3806 if (test_clear_buffer_dirty(bh) && buffer_journal_dirty(bh)) {
3470 set_buffer_journal_restore_dirty (bh); 3807 clear_buffer_journal_test(bh);
3471 } 3808 set_buffer_journal_restore_dirty(bh);
3472 unlock_buffer(bh); 3809 }
3473 return 1; 3810 unlock_buffer(bh);
3474} 3811 return 1;
3475 3812}
3476static void flush_old_journal_lists(struct super_block *s) { 3813
3477 struct reiserfs_journal *journal = SB_JOURNAL (s); 3814static void flush_old_journal_lists(struct super_block *s)
3478 struct reiserfs_journal_list *jl; 3815{
3479 struct list_head *entry; 3816 struct reiserfs_journal *journal = SB_JOURNAL(s);
3480 time_t now = get_seconds(); 3817 struct reiserfs_journal_list *jl;
3481 3818 struct list_head *entry;
3482 while(!list_empty(&journal->j_journal_list)) { 3819 time_t now = get_seconds();
3483 entry = journal->j_journal_list.next; 3820
3484 jl = JOURNAL_LIST_ENTRY(entry); 3821 while (!list_empty(&journal->j_journal_list)) {
3485 /* this check should always be run, to send old lists to disk */ 3822 entry = journal->j_journal_list.next;
3486 if (jl->j_timestamp < (now - (JOURNAL_MAX_TRANS_AGE * 4))) { 3823 jl = JOURNAL_LIST_ENTRY(entry);
3487 flush_used_journal_lists(s, jl); 3824 /* this check should always be run, to send old lists to disk */
3488 } else { 3825 if (jl->j_timestamp < (now - (JOURNAL_MAX_TRANS_AGE * 4))) {
3489 break; 3826 flush_used_journal_lists(s, jl);
3827 } else {
3828 break;
3829 }
3490 } 3830 }
3491 }
3492} 3831}
3493 3832
3494/* 3833/*
@@ -3501,374 +3840,390 @@ static void flush_old_journal_lists(struct super_block *s) {
3501** If the journal is aborted, we just clean up. Things like flushing 3840** If the journal is aborted, we just clean up. Things like flushing
3502** journal lists, etc just won't happen. 3841** journal lists, etc just won't happen.
3503*/ 3842*/
3504static int do_journal_end(struct reiserfs_transaction_handle *th, struct super_block * p_s_sb, unsigned long nblocks, 3843static int do_journal_end(struct reiserfs_transaction_handle *th,
3505 int flags) { 3844 struct super_block *p_s_sb, unsigned long nblocks,
3506 struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); 3845 int flags)
3507 struct reiserfs_journal_cnode *cn, *next, *jl_cn; 3846{
3508 struct reiserfs_journal_cnode *last_cn = NULL; 3847 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
3509 struct reiserfs_journal_desc *desc ; 3848 struct reiserfs_journal_cnode *cn, *next, *jl_cn;
3510 struct reiserfs_journal_commit *commit ; 3849 struct reiserfs_journal_cnode *last_cn = NULL;
3511 struct buffer_head *c_bh ; /* commit bh */ 3850 struct reiserfs_journal_desc *desc;
3512 struct buffer_head *d_bh ; /* desc bh */ 3851 struct reiserfs_journal_commit *commit;
3513 int cur_write_start = 0 ; /* start index of current log write */ 3852 struct buffer_head *c_bh; /* commit bh */
3514 int old_start ; 3853 struct buffer_head *d_bh; /* desc bh */
3515 int i ; 3854 int cur_write_start = 0; /* start index of current log write */
3516 int flush = flags & FLUSH_ALL ; 3855 int old_start;
3517 int wait_on_commit = flags & WAIT ; 3856 int i;
3518 struct reiserfs_journal_list *jl, *temp_jl; 3857 int flush = flags & FLUSH_ALL;
3519 struct list_head *entry, *safe; 3858 int wait_on_commit = flags & WAIT;
3520 unsigned long jindex; 3859 struct reiserfs_journal_list *jl, *temp_jl;
3521 unsigned long commit_trans_id; 3860 struct list_head *entry, *safe;
3522 int trans_half; 3861 unsigned long jindex;
3523 3862 unsigned long commit_trans_id;
3524 BUG_ON (th->t_refcount > 1); 3863 int trans_half;
3525 BUG_ON (!th->t_trans_id); 3864
3526 3865 BUG_ON(th->t_refcount > 1);
3527 current->journal_info = th->t_handle_save; 3866 BUG_ON(!th->t_trans_id);
3528 reiserfs_check_lock_depth(p_s_sb, "journal end"); 3867
3529 if (journal->j_len == 0) { 3868 put_fs_excl();
3530 reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 1) ; 3869 current->journal_info = th->t_handle_save;
3531 journal_mark_dirty(th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)) ; 3870 reiserfs_check_lock_depth(p_s_sb, "journal end");
3532 } 3871 if (journal->j_len == 0) {
3533 3872 reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb),
3534 lock_journal(p_s_sb) ; 3873 1);
3535 if (journal->j_next_full_flush) { 3874 journal_mark_dirty(th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb));
3536 flags |= FLUSH_ALL ; 3875 }
3537 flush = 1 ;
3538 }
3539 if (journal->j_next_async_flush) {
3540 flags |= COMMIT_NOW | WAIT;
3541 wait_on_commit = 1;
3542 }
3543
3544 /* check_journal_end locks the journal, and unlocks if it does not return 1
3545 ** it tells us if we should continue with the journal_end, or just return
3546 */
3547 if (!check_journal_end(th, p_s_sb, nblocks, flags)) {
3548 p_s_sb->s_dirt = 1;
3549 wake_queued_writers(p_s_sb);
3550 reiserfs_async_progress_wait(p_s_sb);
3551 goto out ;
3552 }
3553
3554 /* check_journal_end might set these, check again */
3555 if (journal->j_next_full_flush) {
3556 flush = 1 ;
3557 }
3558
3559 /*
3560 ** j must wait means we have to flush the log blocks, and the real blocks for
3561 ** this transaction
3562 */
3563 if (journal->j_must_wait > 0) {
3564 flush = 1 ;
3565 }
3566 3876
3877 lock_journal(p_s_sb);
3878 if (journal->j_next_full_flush) {
3879 flags |= FLUSH_ALL;
3880 flush = 1;
3881 }
3882 if (journal->j_next_async_flush) {
3883 flags |= COMMIT_NOW | WAIT;
3884 wait_on_commit = 1;
3885 }
3886
3887 /* check_journal_end locks the journal, and unlocks if it does not return 1
3888 ** it tells us if we should continue with the journal_end, or just return
3889 */
3890 if (!check_journal_end(th, p_s_sb, nblocks, flags)) {
3891 p_s_sb->s_dirt = 1;
3892 wake_queued_writers(p_s_sb);
3893 reiserfs_async_progress_wait(p_s_sb);
3894 goto out;
3895 }
3896
3897 /* check_journal_end might set these, check again */
3898 if (journal->j_next_full_flush) {
3899 flush = 1;
3900 }
3901
3902 /*
3903 ** j must wait means we have to flush the log blocks, and the real blocks for
3904 ** this transaction
3905 */
3906 if (journal->j_must_wait > 0) {
3907 flush = 1;
3908 }
3567#ifdef REISERFS_PREALLOCATE 3909#ifdef REISERFS_PREALLOCATE
3568 /* quota ops might need to nest, setup the journal_info pointer for them */ 3910 /* quota ops might need to nest, setup the journal_info pointer for them */
3569 current->journal_info = th ; 3911 current->journal_info = th;
3570 reiserfs_discard_all_prealloc(th); /* it should not involve new blocks into 3912 reiserfs_discard_all_prealloc(th); /* it should not involve new blocks into
3571 * the transaction */ 3913 * the transaction */
3572 current->journal_info = th->t_handle_save ; 3914 current->journal_info = th->t_handle_save;
3573#endif 3915#endif
3574 3916
3575 /* setup description block */ 3917 /* setup description block */
3576 d_bh = journal_getblk(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + journal->j_start) ; 3918 d_bh =
3577 set_buffer_uptodate(d_bh); 3919 journal_getblk(p_s_sb,
3578 desc = (struct reiserfs_journal_desc *)(d_bh)->b_data ; 3920 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
3579 memset(d_bh->b_data, 0, d_bh->b_size) ; 3921 journal->j_start);
3580 memcpy(get_journal_desc_magic (d_bh), JOURNAL_DESC_MAGIC, 8) ; 3922 set_buffer_uptodate(d_bh);
3581 set_desc_trans_id(desc, journal->j_trans_id) ; 3923 desc = (struct reiserfs_journal_desc *)(d_bh)->b_data;
3582 3924 memset(d_bh->b_data, 0, d_bh->b_size);
3583 /* setup commit block. Don't write (keep it clean too) this one until after everyone else is written */ 3925 memcpy(get_journal_desc_magic(d_bh), JOURNAL_DESC_MAGIC, 8);
3584 c_bh = journal_getblk(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + 3926 set_desc_trans_id(desc, journal->j_trans_id);
3585 ((journal->j_start + journal->j_len + 1) % SB_ONDISK_JOURNAL_SIZE(p_s_sb))) ; 3927
3586 commit = (struct reiserfs_journal_commit *)c_bh->b_data ; 3928 /* setup commit block. Don't write (keep it clean too) this one until after everyone else is written */
3587 memset(c_bh->b_data, 0, c_bh->b_size) ; 3929 c_bh = journal_getblk(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
3588 set_commit_trans_id(commit, journal->j_trans_id) ; 3930 ((journal->j_start + journal->j_len +
3589 set_buffer_uptodate(c_bh) ; 3931 1) % SB_ONDISK_JOURNAL_SIZE(p_s_sb)));
3590 3932 commit = (struct reiserfs_journal_commit *)c_bh->b_data;
3591 /* init this journal list */ 3933 memset(c_bh->b_data, 0, c_bh->b_size);
3592 jl = journal->j_current_jl; 3934 set_commit_trans_id(commit, journal->j_trans_id);
3593 3935 set_buffer_uptodate(c_bh);
3594 /* we lock the commit before doing anything because 3936
3595 * we want to make sure nobody tries to run flush_commit_list until 3937 /* init this journal list */
3596 * the new transaction is fully setup, and we've already flushed the 3938 jl = journal->j_current_jl;
3597 * ordered bh list 3939
3598 */ 3940 /* we lock the commit before doing anything because
3599 down(&jl->j_commit_lock); 3941 * we want to make sure nobody tries to run flush_commit_list until
3600 3942 * the new transaction is fully setup, and we've already flushed the
3601 /* save the transaction id in case we need to commit it later */ 3943 * ordered bh list
3602 commit_trans_id = jl->j_trans_id; 3944 */
3603 3945 down(&jl->j_commit_lock);
3604 atomic_set(&jl->j_older_commits_done, 0) ; 3946
3605 jl->j_trans_id = journal->j_trans_id ; 3947 /* save the transaction id in case we need to commit it later */
3606 jl->j_timestamp = journal->j_trans_start_time ; 3948 commit_trans_id = jl->j_trans_id;
3607 jl->j_commit_bh = c_bh ; 3949
3608 jl->j_start = journal->j_start ; 3950 atomic_set(&jl->j_older_commits_done, 0);
3609 jl->j_len = journal->j_len ; 3951 jl->j_trans_id = journal->j_trans_id;
3610 atomic_set(&jl->j_nonzerolen, journal->j_len) ; 3952 jl->j_timestamp = journal->j_trans_start_time;
3611 atomic_set(&jl->j_commit_left, journal->j_len + 2); 3953 jl->j_commit_bh = c_bh;
3612 jl->j_realblock = NULL ; 3954 jl->j_start = journal->j_start;
3613 3955 jl->j_len = journal->j_len;
3614 /* The ENTIRE FOR LOOP MUST not cause schedule to occur. 3956 atomic_set(&jl->j_nonzerolen, journal->j_len);
3615 ** for each real block, add it to the journal list hash, 3957 atomic_set(&jl->j_commit_left, journal->j_len + 2);
3616 ** copy into real block index array in the commit or desc block 3958 jl->j_realblock = NULL;
3617 */ 3959
3618 trans_half = journal_trans_half(p_s_sb->s_blocksize); 3960 /* The ENTIRE FOR LOOP MUST not cause schedule to occur.
3619 for (i = 0, cn = journal->j_first ; cn ; cn = cn->next, i++) { 3961 ** for each real block, add it to the journal list hash,
3620 if (buffer_journaled (cn->bh)) { 3962 ** copy into real block index array in the commit or desc block
3621 jl_cn = get_cnode(p_s_sb) ; 3963 */
3622 if (!jl_cn) { 3964 trans_half = journal_trans_half(p_s_sb->s_blocksize);
3623 reiserfs_panic(p_s_sb, "journal-1676, get_cnode returned NULL\n") ; 3965 for (i = 0, cn = journal->j_first; cn; cn = cn->next, i++) {
3624 } 3966 if (buffer_journaled(cn->bh)) {
3625 if (i == 0) { 3967 jl_cn = get_cnode(p_s_sb);
3626 jl->j_realblock = jl_cn ; 3968 if (!jl_cn) {
3627 } 3969 reiserfs_panic(p_s_sb,
3628 jl_cn->prev = last_cn ; 3970 "journal-1676, get_cnode returned NULL\n");
3629 jl_cn->next = NULL ; 3971 }
3630 if (last_cn) { 3972 if (i == 0) {
3631 last_cn->next = jl_cn ; 3973 jl->j_realblock = jl_cn;
3632 } 3974 }
3633 last_cn = jl_cn ; 3975 jl_cn->prev = last_cn;
3634 /* make sure the block we are trying to log is not a block 3976 jl_cn->next = NULL;
3635 of journal or reserved area */ 3977 if (last_cn) {
3636 3978 last_cn->next = jl_cn;
3637 if (is_block_in_log_or_reserved_area(p_s_sb, cn->bh->b_blocknr)) { 3979 }
3638 reiserfs_panic(p_s_sb, "journal-2332: Trying to log block %lu, which is a log block\n", cn->bh->b_blocknr) ; 3980 last_cn = jl_cn;
3639 } 3981 /* make sure the block we are trying to log is not a block
3640 jl_cn->blocknr = cn->bh->b_blocknr ; 3982 of journal or reserved area */
3641 jl_cn->state = 0 ; 3983
3642 jl_cn->sb = p_s_sb; 3984 if (is_block_in_log_or_reserved_area
3643 jl_cn->bh = cn->bh ; 3985 (p_s_sb, cn->bh->b_blocknr)) {
3644 jl_cn->jlist = jl; 3986 reiserfs_panic(p_s_sb,
3645 insert_journal_hash(journal->j_list_hash_table, jl_cn) ; 3987 "journal-2332: Trying to log block %lu, which is a log block\n",
3646 if (i < trans_half) { 3988 cn->bh->b_blocknr);
3647 desc->j_realblock[i] = cpu_to_le32(cn->bh->b_blocknr) ; 3989 }
3648 } else { 3990 jl_cn->blocknr = cn->bh->b_blocknr;
3649 commit->j_realblock[i - trans_half] = cpu_to_le32(cn->bh->b_blocknr) ; 3991 jl_cn->state = 0;
3650 } 3992 jl_cn->sb = p_s_sb;
3651 } else { 3993 jl_cn->bh = cn->bh;
3652 i-- ; 3994 jl_cn->jlist = jl;
3653 } 3995 insert_journal_hash(journal->j_list_hash_table, jl_cn);
3654 } 3996 if (i < trans_half) {
3655 set_desc_trans_len(desc, journal->j_len) ; 3997 desc->j_realblock[i] =
3656 set_desc_mount_id(desc, journal->j_mount_id) ; 3998 cpu_to_le32(cn->bh->b_blocknr);
3657 set_desc_trans_id(desc, journal->j_trans_id) ; 3999 } else {
3658 set_commit_trans_len(commit, journal->j_len); 4000 commit->j_realblock[i - trans_half] =
3659 4001 cpu_to_le32(cn->bh->b_blocknr);
3660 /* special check in case all buffers in the journal were marked for not logging */ 4002 }
3661 if (journal->j_len == 0) { 4003 } else {
3662 BUG(); 4004 i--;
3663 } 4005 }
3664 4006 }
3665 /* we're about to dirty all the log blocks, mark the description block 4007 set_desc_trans_len(desc, journal->j_len);
3666 * dirty now too. Don't mark the commit block dirty until all the 4008 set_desc_mount_id(desc, journal->j_mount_id);
3667 * others are on disk 4009 set_desc_trans_id(desc, journal->j_trans_id);
3668 */ 4010 set_commit_trans_len(commit, journal->j_len);
3669 mark_buffer_dirty(d_bh); 4011
3670 4012 /* special check in case all buffers in the journal were marked for not logging */
3671 /* first data block is j_start + 1, so add one to cur_write_start wherever you use it */ 4013 if (journal->j_len == 0) {
3672 cur_write_start = journal->j_start ; 4014 BUG();
3673 cn = journal->j_first ; 4015 }
3674 jindex = 1 ; /* start at one so we don't get the desc again */ 4016
3675 while(cn) { 4017 /* we're about to dirty all the log blocks, mark the description block
3676 clear_buffer_journal_new (cn->bh); 4018 * dirty now too. Don't mark the commit block dirty until all the
3677 /* copy all the real blocks into log area. dirty log blocks */ 4019 * others are on disk
3678 if (buffer_journaled (cn->bh)) { 4020 */
3679 struct buffer_head *tmp_bh ; 4021 mark_buffer_dirty(d_bh);
3680 char *addr; 4022
3681 struct page *page; 4023 /* first data block is j_start + 1, so add one to cur_write_start wherever you use it */
3682 tmp_bh = journal_getblk(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + 4024 cur_write_start = journal->j_start;
3683 ((cur_write_start + jindex) % SB_ONDISK_JOURNAL_SIZE(p_s_sb))) ; 4025 cn = journal->j_first;
3684 set_buffer_uptodate(tmp_bh); 4026 jindex = 1; /* start at one so we don't get the desc again */
3685 page = cn->bh->b_page; 4027 while (cn) {
3686 addr = kmap(page); 4028 clear_buffer_journal_new(cn->bh);
3687 memcpy(tmp_bh->b_data, addr + offset_in_page(cn->bh->b_data), 4029 /* copy all the real blocks into log area. dirty log blocks */
3688 cn->bh->b_size); 4030 if (buffer_journaled(cn->bh)) {
3689 kunmap(page); 4031 struct buffer_head *tmp_bh;
3690 mark_buffer_dirty(tmp_bh); 4032 char *addr;
3691 jindex++ ; 4033 struct page *page;
3692 set_buffer_journal_dirty (cn->bh); 4034 tmp_bh =
3693 clear_buffer_journaled (cn->bh); 4035 journal_getblk(p_s_sb,
3694 } else { 4036 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
3695 /* JDirty cleared sometime during transaction. don't log this one */ 4037 ((cur_write_start +
3696 reiserfs_warning(p_s_sb, "journal-2048: do_journal_end: BAD, buffer in journal hash, but not JDirty!") ; 4038 jindex) %
3697 brelse(cn->bh) ; 4039 SB_ONDISK_JOURNAL_SIZE(p_s_sb)));
3698 } 4040 set_buffer_uptodate(tmp_bh);
3699 next = cn->next ; 4041 page = cn->bh->b_page;
3700 free_cnode(p_s_sb, cn) ; 4042 addr = kmap(page);
3701 cn = next ; 4043 memcpy(tmp_bh->b_data,
3702 cond_resched(); 4044 addr + offset_in_page(cn->bh->b_data),
3703 } 4045 cn->bh->b_size);
3704 4046 kunmap(page);
3705 /* we are done with both the c_bh and d_bh, but 4047 mark_buffer_dirty(tmp_bh);
3706 ** c_bh must be written after all other commit blocks, 4048 jindex++;
3707 ** so we dirty/relse c_bh in flush_commit_list, with commit_left <= 1. 4049 set_buffer_journal_dirty(cn->bh);
3708 */ 4050 clear_buffer_journaled(cn->bh);
3709 4051 } else {
3710 journal->j_current_jl = alloc_journal_list(p_s_sb); 4052 /* JDirty cleared sometime during transaction. don't log this one */
3711 4053 reiserfs_warning(p_s_sb,
3712 /* now it is safe to insert this transaction on the main list */ 4054 "journal-2048: do_journal_end: BAD, buffer in journal hash, but not JDirty!");
3713 list_add_tail(&jl->j_list, &journal->j_journal_list); 4055 brelse(cn->bh);
3714 list_add_tail(&jl->j_working_list, &journal->j_working_list); 4056 }
3715 journal->j_num_work_lists++; 4057 next = cn->next;
3716 4058 free_cnode(p_s_sb, cn);
3717 /* reset journal values for the next transaction */ 4059 cn = next;
3718 old_start = journal->j_start ; 4060 cond_resched();
3719 journal->j_start = (journal->j_start + journal->j_len + 2) % SB_ONDISK_JOURNAL_SIZE(p_s_sb); 4061 }
3720 atomic_set(&(journal->j_wcount), 0) ; 4062
3721 journal->j_bcount = 0 ; 4063 /* we are done with both the c_bh and d_bh, but
3722 journal->j_last = NULL ; 4064 ** c_bh must be written after all other commit blocks,
3723 journal->j_first = NULL ; 4065 ** so we dirty/relse c_bh in flush_commit_list, with commit_left <= 1.
3724 journal->j_len = 0 ; 4066 */
3725 journal->j_trans_start_time = 0 ; 4067
3726 journal->j_trans_id++ ; 4068 journal->j_current_jl = alloc_journal_list(p_s_sb);
3727 journal->j_current_jl->j_trans_id = journal->j_trans_id; 4069
3728 journal->j_must_wait = 0 ; 4070 /* now it is safe to insert this transaction on the main list */
3729 journal->j_len_alloc = 0 ; 4071 list_add_tail(&jl->j_list, &journal->j_journal_list);
3730 journal->j_next_full_flush = 0 ; 4072 list_add_tail(&jl->j_working_list, &journal->j_working_list);
3731 journal->j_next_async_flush = 0 ; 4073 journal->j_num_work_lists++;
3732 init_journal_hash(p_s_sb) ; 4074
3733 4075 /* reset journal values for the next transaction */
3734 // make sure reiserfs_add_jh sees the new current_jl before we 4076 old_start = journal->j_start;
3735 // write out the tails 4077 journal->j_start =
3736 smp_mb(); 4078 (journal->j_start + journal->j_len +
3737 4079 2) % SB_ONDISK_JOURNAL_SIZE(p_s_sb);
3738 /* tail conversion targets have to hit the disk before we end the 4080 atomic_set(&(journal->j_wcount), 0);
3739 * transaction. Otherwise a later transaction might repack the tail 4081 journal->j_bcount = 0;
3740 * before this transaction commits, leaving the data block unflushed and 4082 journal->j_last = NULL;
3741 * clean, if we crash before the later transaction commits, the data block 4083 journal->j_first = NULL;
3742 * is lost. 4084 journal->j_len = 0;
3743 */ 4085 journal->j_trans_start_time = 0;
3744 if (!list_empty(&jl->j_tail_bh_list)) { 4086 journal->j_trans_id++;
3745 unlock_kernel(); 4087 journal->j_current_jl->j_trans_id = journal->j_trans_id;
3746 write_ordered_buffers(&journal->j_dirty_buffers_lock, 4088 journal->j_must_wait = 0;
3747 journal, jl, &jl->j_tail_bh_list); 4089 journal->j_len_alloc = 0;
3748 lock_kernel(); 4090 journal->j_next_full_flush = 0;
3749 } 4091 journal->j_next_async_flush = 0;
3750 if (!list_empty(&jl->j_tail_bh_list)) 4092 init_journal_hash(p_s_sb);
3751 BUG(); 4093
3752 up(&jl->j_commit_lock); 4094 // make sure reiserfs_add_jh sees the new current_jl before we
3753 4095 // write out the tails
3754 /* honor the flush wishes from the caller, simple commits can 4096 smp_mb();
3755 ** be done outside the journal lock, they are done below 4097
3756 ** 4098 /* tail conversion targets have to hit the disk before we end the
3757 ** if we don't flush the commit list right now, we put it into 4099 * transaction. Otherwise a later transaction might repack the tail
3758 ** the work queue so the people waiting on the async progress work 4100 * before this transaction commits, leaving the data block unflushed and
3759 ** queue don't wait for this proc to flush journal lists and such. 4101 * clean, if we crash before the later transaction commits, the data block
3760 */ 4102 * is lost.
3761 if (flush) { 4103 */
3762 flush_commit_list(p_s_sb, jl, 1) ; 4104 if (!list_empty(&jl->j_tail_bh_list)) {
3763 flush_journal_list(p_s_sb, jl, 1) ; 4105 unlock_kernel();
3764 } else if (!(jl->j_state & LIST_COMMIT_PENDING)) 4106 write_ordered_buffers(&journal->j_dirty_buffers_lock,
3765 queue_delayed_work(commit_wq, &journal->j_work, HZ/10); 4107 journal, jl, &jl->j_tail_bh_list);
3766 4108 lock_kernel();
3767 4109 }
3768 /* if the next transaction has any chance of wrapping, flush 4110 if (!list_empty(&jl->j_tail_bh_list))
3769 ** transactions that might get overwritten. If any journal lists are very 4111 BUG();
3770 ** old flush them as well. 4112 up(&jl->j_commit_lock);
3771 */ 4113
3772first_jl: 4114 /* honor the flush wishes from the caller, simple commits can
3773 list_for_each_safe(entry, safe, &journal->j_journal_list) { 4115 ** be done outside the journal lock, they are done below
3774 temp_jl = JOURNAL_LIST_ENTRY(entry); 4116 **
3775 if (journal->j_start <= temp_jl->j_start) { 4117 ** if we don't flush the commit list right now, we put it into
3776 if ((journal->j_start + journal->j_trans_max + 1) >= 4118 ** the work queue so the people waiting on the async progress work
3777 temp_jl->j_start) 4119 ** queue don't wait for this proc to flush journal lists and such.
3778 { 4120 */
3779 flush_used_journal_lists(p_s_sb, temp_jl); 4121 if (flush) {
3780 goto first_jl; 4122 flush_commit_list(p_s_sb, jl, 1);
3781 } else if ((journal->j_start + 4123 flush_journal_list(p_s_sb, jl, 1);
3782 journal->j_trans_max + 1) < 4124 } else if (!(jl->j_state & LIST_COMMIT_PENDING))
3783 SB_ONDISK_JOURNAL_SIZE(p_s_sb)) 4125 queue_delayed_work(commit_wq, &journal->j_work, HZ / 10);
3784 { 4126
3785 /* if we don't cross into the next transaction and we don't 4127 /* if the next transaction has any chance of wrapping, flush
3786 * wrap, there is no way we can overlap any later transactions 4128 ** transactions that might get overwritten. If any journal lists are very
3787 * break now 4129 ** old flush them as well.
3788 */ 4130 */
3789 break; 4131 first_jl:
3790 } 4132 list_for_each_safe(entry, safe, &journal->j_journal_list) {
3791 } else if ((journal->j_start + 4133 temp_jl = JOURNAL_LIST_ENTRY(entry);
3792 journal->j_trans_max + 1) > 4134 if (journal->j_start <= temp_jl->j_start) {
3793 SB_ONDISK_JOURNAL_SIZE(p_s_sb)) 4135 if ((journal->j_start + journal->j_trans_max + 1) >=
3794 { 4136 temp_jl->j_start) {
3795 if (((journal->j_start + journal->j_trans_max + 1) % 4137 flush_used_journal_lists(p_s_sb, temp_jl);
3796 SB_ONDISK_JOURNAL_SIZE(p_s_sb)) >= temp_jl->j_start) 4138 goto first_jl;
3797 { 4139 } else if ((journal->j_start +
3798 flush_used_journal_lists(p_s_sb, temp_jl); 4140 journal->j_trans_max + 1) <
3799 goto first_jl; 4141 SB_ONDISK_JOURNAL_SIZE(p_s_sb)) {
3800 } else { 4142 /* if we don't cross into the next transaction and we don't
3801 /* we don't overlap anything from out start to the end of the 4143 * wrap, there is no way we can overlap any later transactions
3802 * log, and our wrapped portion doesn't overlap anything at 4144 * break now
3803 * the start of the log. We can break 4145 */
3804 */ 4146 break;
3805 break; 4147 }
3806 } 4148 } else if ((journal->j_start +
3807 } 4149 journal->j_trans_max + 1) >
3808 } 4150 SB_ONDISK_JOURNAL_SIZE(p_s_sb)) {
3809 flush_old_journal_lists(p_s_sb); 4151 if (((journal->j_start + journal->j_trans_max + 1) %
3810 4152 SB_ONDISK_JOURNAL_SIZE(p_s_sb)) >=
3811 journal->j_current_jl->j_list_bitmap = get_list_bitmap(p_s_sb, journal->j_current_jl) ; 4153 temp_jl->j_start) {
3812 4154 flush_used_journal_lists(p_s_sb, temp_jl);
3813 if (!(journal->j_current_jl->j_list_bitmap)) { 4155 goto first_jl;
3814 reiserfs_panic(p_s_sb, "journal-1996: do_journal_end, could not get a list bitmap\n") ; 4156 } else {
3815 } 4157 /* we don't overlap anything from out start to the end of the
3816 4158 * log, and our wrapped portion doesn't overlap anything at
3817 atomic_set(&(journal->j_jlock), 0) ; 4159 * the start of the log. We can break
3818 unlock_journal(p_s_sb) ; 4160 */
3819 /* wake up any body waiting to join. */ 4161 break;
3820 clear_bit(J_WRITERS_QUEUED, &journal->j_state); 4162 }
3821 wake_up(&(journal->j_join_wait)) ; 4163 }
3822 4164 }
3823 if (!flush && wait_on_commit && 4165 flush_old_journal_lists(p_s_sb);
3824 journal_list_still_alive(p_s_sb, commit_trans_id)) { 4166
3825 flush_commit_list(p_s_sb, jl, 1) ; 4167 journal->j_current_jl->j_list_bitmap =
3826 } 4168 get_list_bitmap(p_s_sb, journal->j_current_jl);
3827out: 4169
3828 reiserfs_check_lock_depth(p_s_sb, "journal end2"); 4170 if (!(journal->j_current_jl->j_list_bitmap)) {
3829 4171 reiserfs_panic(p_s_sb,
3830 memset (th, 0, sizeof (*th)); 4172 "journal-1996: do_journal_end, could not get a list bitmap\n");
3831 /* Re-set th->t_super, so we can properly keep track of how many 4173 }
3832 * persistent transactions there are. We need to do this so if this 4174
3833 * call is part of a failed restart_transaction, we can free it later */ 4175 atomic_set(&(journal->j_jlock), 0);
3834 th->t_super = p_s_sb; 4176 unlock_journal(p_s_sb);
3835 4177 /* wake up any body waiting to join. */
3836 return journal->j_errno; 4178 clear_bit(J_WRITERS_QUEUED, &journal->j_state);
3837} 4179 wake_up(&(journal->j_join_wait));
3838 4180
3839static void 4181 if (!flush && wait_on_commit &&
3840__reiserfs_journal_abort_hard (struct super_block *sb) 4182 journal_list_still_alive(p_s_sb, commit_trans_id)) {
3841{ 4183 flush_commit_list(p_s_sb, jl, 1);
3842 struct reiserfs_journal *journal = SB_JOURNAL (sb); 4184 }
3843 if (test_bit (J_ABORTED, &journal->j_state)) 4185 out:
3844 return; 4186 reiserfs_check_lock_depth(p_s_sb, "journal end2");
3845 4187
3846 printk (KERN_CRIT "REISERFS: Aborting journal for filesystem on %s\n", 4188 memset(th, 0, sizeof(*th));
3847 reiserfs_bdevname (sb)); 4189 /* Re-set th->t_super, so we can properly keep track of how many
3848 4190 * persistent transactions there are. We need to do this so if this
3849 sb->s_flags |= MS_RDONLY; 4191 * call is part of a failed restart_transaction, we can free it later */
3850 set_bit (J_ABORTED, &journal->j_state); 4192 th->t_super = p_s_sb;
4193
4194 return journal->j_errno;
4195}
4196
4197static void __reiserfs_journal_abort_hard(struct super_block *sb)
4198{
4199 struct reiserfs_journal *journal = SB_JOURNAL(sb);
4200 if (test_bit(J_ABORTED, &journal->j_state))
4201 return;
4202
4203 printk(KERN_CRIT "REISERFS: Aborting journal for filesystem on %s\n",
4204 reiserfs_bdevname(sb));
4205
4206 sb->s_flags |= MS_RDONLY;
4207 set_bit(J_ABORTED, &journal->j_state);
3851 4208
3852#ifdef CONFIG_REISERFS_CHECK 4209#ifdef CONFIG_REISERFS_CHECK
3853 dump_stack(); 4210 dump_stack();
3854#endif 4211#endif
3855} 4212}
3856 4213
3857static void 4214static void __reiserfs_journal_abort_soft(struct super_block *sb, int errno)
3858__reiserfs_journal_abort_soft (struct super_block *sb, int errno)
3859{ 4215{
3860 struct reiserfs_journal *journal = SB_JOURNAL (sb); 4216 struct reiserfs_journal *journal = SB_JOURNAL(sb);
3861 if (test_bit (J_ABORTED, &journal->j_state)) 4217 if (test_bit(J_ABORTED, &journal->j_state))
3862 return; 4218 return;
3863 4219
3864 if (!journal->j_errno) 4220 if (!journal->j_errno)
3865 journal->j_errno = errno; 4221 journal->j_errno = errno;
3866 4222
3867 __reiserfs_journal_abort_hard (sb); 4223 __reiserfs_journal_abort_hard(sb);
3868} 4224}
3869 4225
3870void 4226void reiserfs_journal_abort(struct super_block *sb, int errno)
3871reiserfs_journal_abort (struct super_block *sb, int errno)
3872{ 4227{
3873 return __reiserfs_journal_abort_soft (sb, errno); 4228 return __reiserfs_journal_abort_soft(sb, errno);
3874} 4229}
diff --git a/fs/reiserfs/lbalance.c b/fs/reiserfs/lbalance.c
index 2406608fc5cd..2533c1f64aba 100644
--- a/fs/reiserfs/lbalance.c
+++ b/fs/reiserfs/lbalance.c
@@ -21,648 +21,709 @@
21 leaf_paste_entries 21 leaf_paste_entries
22 */ 22 */
23 23
24
25/* copy copy_count entries from source directory item to dest buffer (creating new item if needed) */ 24/* copy copy_count entries from source directory item to dest buffer (creating new item if needed) */
26static void leaf_copy_dir_entries (struct buffer_info * dest_bi, struct buffer_head * source, 25static void leaf_copy_dir_entries(struct buffer_info *dest_bi,
27 int last_first, int item_num, int from, int copy_count) 26 struct buffer_head *source, int last_first,
27 int item_num, int from, int copy_count)
28{ 28{
29 struct buffer_head * dest = dest_bi->bi_bh; 29 struct buffer_head *dest = dest_bi->bi_bh;
30 int item_num_in_dest; /* either the number of target item, 30 int item_num_in_dest; /* either the number of target item,
31 or if we must create a new item, 31 or if we must create a new item,
32 the number of the item we will 32 the number of the item we will
33 create it next to */ 33 create it next to */
34 struct item_head * ih; 34 struct item_head *ih;
35 struct reiserfs_de_head * deh; 35 struct reiserfs_de_head *deh;
36 int copy_records_len; /* length of all records in item to be copied */ 36 int copy_records_len; /* length of all records in item to be copied */
37 char * records; 37 char *records;
38 38
39 ih = B_N_PITEM_HEAD (source, item_num); 39 ih = B_N_PITEM_HEAD(source, item_num);
40 40
41 RFALSE( !is_direntry_le_ih (ih), "vs-10000: item must be directory item"); 41 RFALSE(!is_direntry_le_ih(ih), "vs-10000: item must be directory item");
42 42
43 /* length of all record to be copied and first byte of the last of them */ 43 /* length of all record to be copied and first byte of the last of them */
44 deh = B_I_DEH (source, ih); 44 deh = B_I_DEH(source, ih);
45 if (copy_count) { 45 if (copy_count) {
46 copy_records_len = (from ? deh_location( &(deh[from - 1]) ) : 46 copy_records_len = (from ? deh_location(&(deh[from - 1])) :
47 ih_item_len(ih)) - deh_location( &(deh[from + copy_count - 1])); 47 ih_item_len(ih)) -
48 records = source->b_data + ih_location(ih) + 48 deh_location(&(deh[from + copy_count - 1]));
49 deh_location( &(deh[from + copy_count - 1])); 49 records =
50 } else { 50 source->b_data + ih_location(ih) +
51 copy_records_len = 0; 51 deh_location(&(deh[from + copy_count - 1]));
52 records = NULL; 52 } else {
53 } 53 copy_records_len = 0;
54 54 records = NULL;
55 /* when copy last to first, dest buffer can contain 0 items */ 55 }
56 item_num_in_dest = (last_first == LAST_TO_FIRST) ? (( B_NR_ITEMS(dest) ) ? 0 : -1) : (B_NR_ITEMS(dest) - 1); 56
57 57 /* when copy last to first, dest buffer can contain 0 items */
58 /* if there are no items in dest or the first/last item in dest is not item of the same directory */ 58 item_num_in_dest =
59 if ( (item_num_in_dest == - 1) || 59 (last_first ==
60 (last_first == FIRST_TO_LAST && le_ih_k_offset (ih) == DOT_OFFSET) || 60 LAST_TO_FIRST) ? ((B_NR_ITEMS(dest)) ? 0 : -1) : (B_NR_ITEMS(dest)
61 (last_first == LAST_TO_FIRST && comp_short_le_keys/*COMP_SHORT_KEYS*/ (&ih->ih_key, B_N_PKEY (dest, item_num_in_dest)))) { 61 - 1);
62 /* create new item in dest */ 62
63 struct item_head new_ih; 63 /* if there are no items in dest or the first/last item in dest is not item of the same directory */
64 64 if ((item_num_in_dest == -1) ||
65 /* form item header */ 65 (last_first == FIRST_TO_LAST && le_ih_k_offset(ih) == DOT_OFFSET) ||
66 memcpy (&new_ih.ih_key, &ih->ih_key, KEY_SIZE); 66 (last_first == LAST_TO_FIRST
67 put_ih_version( &new_ih, KEY_FORMAT_3_5 ); 67 && comp_short_le_keys /*COMP_SHORT_KEYS */ (&ih->ih_key,
68 /* calculate item len */ 68 B_N_PKEY(dest,
69 put_ih_item_len( &new_ih, DEH_SIZE * copy_count + copy_records_len ); 69 item_num_in_dest))))
70 put_ih_entry_count( &new_ih, 0 ); 70 {
71 71 /* create new item in dest */
72 if (last_first == LAST_TO_FIRST) { 72 struct item_head new_ih;
73 /* form key by the following way */ 73
74 if (from < I_ENTRY_COUNT(ih)) { 74 /* form item header */
75 set_le_ih_k_offset( &new_ih, deh_offset( &(deh[from]) ) ); 75 memcpy(&new_ih.ih_key, &ih->ih_key, KEY_SIZE);
76 /*memcpy (&new_ih.ih_key.k_offset, &deh[from].deh_offset, SHORT_KEY_SIZE);*/ 76 put_ih_version(&new_ih, KEY_FORMAT_3_5);
77 } else { 77 /* calculate item len */
78 /* no entries will be copied to this item in this function */ 78 put_ih_item_len(&new_ih,
79 set_le_ih_k_offset (&new_ih, U32_MAX); 79 DEH_SIZE * copy_count + copy_records_len);
80 /* this item is not yet valid, but we want I_IS_DIRECTORY_ITEM to return 1 for it, so we -1 */ 80 put_ih_entry_count(&new_ih, 0);
81 } 81
82 set_le_key_k_type (KEY_FORMAT_3_5, &(new_ih.ih_key), TYPE_DIRENTRY); 82 if (last_first == LAST_TO_FIRST) {
83 /* form key by the following way */
84 if (from < I_ENTRY_COUNT(ih)) {
85 set_le_ih_k_offset(&new_ih,
86 deh_offset(&(deh[from])));
87 /*memcpy (&new_ih.ih_key.k_offset, &deh[from].deh_offset, SHORT_KEY_SIZE); */
88 } else {
89 /* no entries will be copied to this item in this function */
90 set_le_ih_k_offset(&new_ih, U32_MAX);
91 /* this item is not yet valid, but we want I_IS_DIRECTORY_ITEM to return 1 for it, so we -1 */
92 }
93 set_le_key_k_type(KEY_FORMAT_3_5, &(new_ih.ih_key),
94 TYPE_DIRENTRY);
95 }
96
97 /* insert item into dest buffer */
98 leaf_insert_into_buf(dest_bi,
99 (last_first ==
100 LAST_TO_FIRST) ? 0 : B_NR_ITEMS(dest),
101 &new_ih, NULL, 0);
102 } else {
103 /* prepare space for entries */
104 leaf_paste_in_buffer(dest_bi,
105 (last_first ==
106 FIRST_TO_LAST) ? (B_NR_ITEMS(dest) -
107 1) : 0, MAX_US_INT,
108 DEH_SIZE * copy_count + copy_records_len,
109 records, 0);
83 } 110 }
84
85 /* insert item into dest buffer */
86 leaf_insert_into_buf (dest_bi, (last_first == LAST_TO_FIRST) ? 0 : B_NR_ITEMS(dest), &new_ih, NULL, 0);
87 } else {
88 /* prepare space for entries */
89 leaf_paste_in_buffer (dest_bi, (last_first==FIRST_TO_LAST) ? (B_NR_ITEMS(dest) - 1) : 0, MAX_US_INT,
90 DEH_SIZE * copy_count + copy_records_len, records, 0
91 );
92 }
93
94 item_num_in_dest = (last_first == FIRST_TO_LAST) ? (B_NR_ITEMS(dest)-1) : 0;
95
96 leaf_paste_entries (dest_bi->bi_bh, item_num_in_dest,
97 (last_first == FIRST_TO_LAST) ? I_ENTRY_COUNT(B_N_PITEM_HEAD (dest, item_num_in_dest)) : 0,
98 copy_count, deh + from, records,
99 DEH_SIZE * copy_count + copy_records_len
100 );
101}
102 111
112 item_num_in_dest =
113 (last_first == FIRST_TO_LAST) ? (B_NR_ITEMS(dest) - 1) : 0;
114
115 leaf_paste_entries(dest_bi->bi_bh, item_num_in_dest,
116 (last_first ==
117 FIRST_TO_LAST) ? I_ENTRY_COUNT(B_N_PITEM_HEAD(dest,
118 item_num_in_dest))
119 : 0, copy_count, deh + from, records,
120 DEH_SIZE * copy_count + copy_records_len);
121}
103 122
104/* Copy the first (if last_first == FIRST_TO_LAST) or last (last_first == LAST_TO_FIRST) item or 123/* Copy the first (if last_first == FIRST_TO_LAST) or last (last_first == LAST_TO_FIRST) item or
105 part of it or nothing (see the return 0 below) from SOURCE to the end 124 part of it or nothing (see the return 0 below) from SOURCE to the end
106 (if last_first) or beginning (!last_first) of the DEST */ 125 (if last_first) or beginning (!last_first) of the DEST */
107/* returns 1 if anything was copied, else 0 */ 126/* returns 1 if anything was copied, else 0 */
108static int leaf_copy_boundary_item (struct buffer_info * dest_bi, struct buffer_head * src, int last_first, 127static int leaf_copy_boundary_item(struct buffer_info *dest_bi,
109 int bytes_or_entries) 128 struct buffer_head *src, int last_first,
129 int bytes_or_entries)
110{ 130{
111 struct buffer_head * dest = dest_bi->bi_bh; 131 struct buffer_head *dest = dest_bi->bi_bh;
112 int dest_nr_item, src_nr_item; /* number of items in the source and destination buffers */ 132 int dest_nr_item, src_nr_item; /* number of items in the source and destination buffers */
113 struct item_head * ih; 133 struct item_head *ih;
114 struct item_head * dih; 134 struct item_head *dih;
115 135
116 dest_nr_item = B_NR_ITEMS(dest); 136 dest_nr_item = B_NR_ITEMS(dest);
117 137
118 if ( last_first == FIRST_TO_LAST ) { 138 if (last_first == FIRST_TO_LAST) {
119 /* if ( DEST is empty or first item of SOURCE and last item of DEST are the items of different objects 139 /* if ( DEST is empty or first item of SOURCE and last item of DEST are the items of different objects
120 or of different types ) then there is no need to treat this item differently from the other items 140 or of different types ) then there is no need to treat this item differently from the other items
121 that we copy, so we return */ 141 that we copy, so we return */
122 ih = B_N_PITEM_HEAD (src, 0); 142 ih = B_N_PITEM_HEAD(src, 0);
123 dih = B_N_PITEM_HEAD (dest, dest_nr_item - 1); 143 dih = B_N_PITEM_HEAD(dest, dest_nr_item - 1);
124 if (!dest_nr_item || (!op_is_left_mergeable (&(ih->ih_key), src->b_size))) 144 if (!dest_nr_item
125 /* there is nothing to merge */ 145 || (!op_is_left_mergeable(&(ih->ih_key), src->b_size)))
126 return 0; 146 /* there is nothing to merge */
127 147 return 0;
128 RFALSE( ! ih_item_len(ih), "vs-10010: item can not have empty length"); 148
129 149 RFALSE(!ih_item_len(ih),
130 if ( is_direntry_le_ih (ih) ) { 150 "vs-10010: item can not have empty length");
131 if ( bytes_or_entries == -1 ) 151
132 /* copy all entries to dest */ 152 if (is_direntry_le_ih(ih)) {
133 bytes_or_entries = ih_entry_count(ih); 153 if (bytes_or_entries == -1)
134 leaf_copy_dir_entries (dest_bi, src, FIRST_TO_LAST, 0, 0, bytes_or_entries); 154 /* copy all entries to dest */
135 return 1; 155 bytes_or_entries = ih_entry_count(ih);
136 } 156 leaf_copy_dir_entries(dest_bi, src, FIRST_TO_LAST, 0, 0,
137 157 bytes_or_entries);
138 /* copy part of the body of the first item of SOURCE to the end of the body of the last item of the DEST 158 return 1;
139 part defined by 'bytes_or_entries'; if bytes_or_entries == -1 copy whole body; don't create new item header 159 }
140 */ 160
141 if ( bytes_or_entries == -1 ) 161 /* copy part of the body of the first item of SOURCE to the end of the body of the last item of the DEST
142 bytes_or_entries = ih_item_len(ih); 162 part defined by 'bytes_or_entries'; if bytes_or_entries == -1 copy whole body; don't create new item header
163 */
164 if (bytes_or_entries == -1)
165 bytes_or_entries = ih_item_len(ih);
143 166
144#ifdef CONFIG_REISERFS_CHECK 167#ifdef CONFIG_REISERFS_CHECK
145 else { 168 else {
146 if (bytes_or_entries == ih_item_len(ih) && is_indirect_le_ih(ih)) 169 if (bytes_or_entries == ih_item_len(ih)
147 if (get_ih_free_space (ih)) 170 && is_indirect_le_ih(ih))
148 reiserfs_panic (NULL, "vs-10020: leaf_copy_boundary_item: " 171 if (get_ih_free_space(ih))
149 "last unformatted node must be filled entirely (%h)", 172 reiserfs_panic(NULL,
150 ih); 173 "vs-10020: leaf_copy_boundary_item: "
151 } 174 "last unformatted node must be filled entirely (%h)",
175 ih);
176 }
152#endif 177#endif
153
154 /* merge first item (or its part) of src buffer with the last
155 item of dest buffer. Both are of the same file */
156 leaf_paste_in_buffer (dest_bi,
157 dest_nr_item - 1, ih_item_len(dih), bytes_or_entries, B_I_PITEM(src,ih), 0
158 );
159
160 if (is_indirect_le_ih (dih)) {
161 RFALSE( get_ih_free_space (dih),
162 "vs-10030: merge to left: last unformatted node of non-last indirect item %h must have zerto free space",
163 ih);
164 if (bytes_or_entries == ih_item_len(ih))
165 set_ih_free_space (dih, get_ih_free_space (ih));
166 }
167
168 return 1;
169 }
170
171
172 /* copy boundary item to right (last_first == LAST_TO_FIRST) */
173
174 /* ( DEST is empty or last item of SOURCE and first item of DEST
175 are the items of different object or of different types )
176 */
177 src_nr_item = B_NR_ITEMS (src);
178 ih = B_N_PITEM_HEAD (src, src_nr_item - 1);
179 dih = B_N_PITEM_HEAD (dest, 0);
180
181 if (!dest_nr_item || !op_is_left_mergeable (&(dih->ih_key), src->b_size))
182 return 0;
183
184 if ( is_direntry_le_ih (ih)) {
185 if ( bytes_or_entries == -1 )
186 /* bytes_or_entries = entries number in last item body of SOURCE */
187 bytes_or_entries = ih_entry_count(ih);
188
189 leaf_copy_dir_entries (dest_bi, src, LAST_TO_FIRST, src_nr_item - 1, ih_entry_count(ih) - bytes_or_entries, bytes_or_entries);
190 return 1;
191 }
192
193 /* copy part of the body of the last item of SOURCE to the begin of the body of the first item of the DEST;
194 part defined by 'bytes_or_entries'; if byte_or_entriess == -1 copy whole body; change first item key of the DEST;
195 don't create new item header
196 */
197
198 RFALSE( is_indirect_le_ih(ih) && get_ih_free_space (ih),
199 "vs-10040: merge to right: last unformatted node of non-last indirect item must be filled entirely (%h)",
200 ih);
201
202 if ( bytes_or_entries == -1 ) {
203 /* bytes_or_entries = length of last item body of SOURCE */
204 bytes_or_entries = ih_item_len(ih);
205
206 RFALSE( le_ih_k_offset (dih) !=
207 le_ih_k_offset (ih) + op_bytes_number (ih, src->b_size),
208 "vs-10050: items %h and %h do not match", ih, dih);
209
210 /* change first item key of the DEST */
211 set_le_ih_k_offset (dih, le_ih_k_offset (ih));
212
213 /* item becomes non-mergeable */
214 /* or mergeable if left item was */
215 set_le_ih_k_type (dih, le_ih_k_type (ih));
216 } else {
217 /* merge to right only part of item */
218 RFALSE( ih_item_len(ih) <= bytes_or_entries,
219 "vs-10060: no so much bytes %lu (needed %lu)",
220 ( unsigned long )ih_item_len(ih), ( unsigned long )bytes_or_entries);
221
222 /* change first item key of the DEST */
223 if ( is_direct_le_ih (dih) ) {
224 RFALSE( le_ih_k_offset (dih) <= (unsigned long)bytes_or_entries,
225 "vs-10070: dih %h, bytes_or_entries(%d)", dih, bytes_or_entries);
226 set_le_ih_k_offset (dih, le_ih_k_offset (dih) - bytes_or_entries);
227 } else {
228 RFALSE( le_ih_k_offset (dih) <=
229 (bytes_or_entries / UNFM_P_SIZE) * dest->b_size,
230 "vs-10080: dih %h, bytes_or_entries(%d)",
231 dih, (bytes_or_entries/UNFM_P_SIZE)*dest->b_size);
232 set_le_ih_k_offset (dih, le_ih_k_offset (dih) - ((bytes_or_entries / UNFM_P_SIZE) * dest->b_size));
233 }
234 }
235
236 leaf_paste_in_buffer (dest_bi, 0, 0, bytes_or_entries, B_I_PITEM(src,ih) + ih_item_len(ih) - bytes_or_entries, 0);
237 return 1;
238}
239 178
179 /* merge first item (or its part) of src buffer with the last
180 item of dest buffer. Both are of the same file */
181 leaf_paste_in_buffer(dest_bi,
182 dest_nr_item - 1, ih_item_len(dih),
183 bytes_or_entries, B_I_PITEM(src, ih), 0);
184
185 if (is_indirect_le_ih(dih)) {
186 RFALSE(get_ih_free_space(dih),
187 "vs-10030: merge to left: last unformatted node of non-last indirect item %h must have zerto free space",
188 ih);
189 if (bytes_or_entries == ih_item_len(ih))
190 set_ih_free_space(dih, get_ih_free_space(ih));
191 }
192
193 return 1;
194 }
195
196 /* copy boundary item to right (last_first == LAST_TO_FIRST) */
197
198 /* ( DEST is empty or last item of SOURCE and first item of DEST
199 are the items of different object or of different types )
200 */
201 src_nr_item = B_NR_ITEMS(src);
202 ih = B_N_PITEM_HEAD(src, src_nr_item - 1);
203 dih = B_N_PITEM_HEAD(dest, 0);
204
205 if (!dest_nr_item || !op_is_left_mergeable(&(dih->ih_key), src->b_size))
206 return 0;
207
208 if (is_direntry_le_ih(ih)) {
209 if (bytes_or_entries == -1)
210 /* bytes_or_entries = entries number in last item body of SOURCE */
211 bytes_or_entries = ih_entry_count(ih);
212
213 leaf_copy_dir_entries(dest_bi, src, LAST_TO_FIRST,
214 src_nr_item - 1,
215 ih_entry_count(ih) - bytes_or_entries,
216 bytes_or_entries);
217 return 1;
218 }
219
220 /* copy part of the body of the last item of SOURCE to the begin of the body of the first item of the DEST;
221 part defined by 'bytes_or_entries'; if byte_or_entriess == -1 copy whole body; change first item key of the DEST;
222 don't create new item header
223 */
224
225 RFALSE(is_indirect_le_ih(ih) && get_ih_free_space(ih),
226 "vs-10040: merge to right: last unformatted node of non-last indirect item must be filled entirely (%h)",
227 ih);
228
229 if (bytes_or_entries == -1) {
230 /* bytes_or_entries = length of last item body of SOURCE */
231 bytes_or_entries = ih_item_len(ih);
232
233 RFALSE(le_ih_k_offset(dih) !=
234 le_ih_k_offset(ih) + op_bytes_number(ih, src->b_size),
235 "vs-10050: items %h and %h do not match", ih, dih);
236
237 /* change first item key of the DEST */
238 set_le_ih_k_offset(dih, le_ih_k_offset(ih));
239
240 /* item becomes non-mergeable */
241 /* or mergeable if left item was */
242 set_le_ih_k_type(dih, le_ih_k_type(ih));
243 } else {
244 /* merge to right only part of item */
245 RFALSE(ih_item_len(ih) <= bytes_or_entries,
246 "vs-10060: no so much bytes %lu (needed %lu)",
247 (unsigned long)ih_item_len(ih),
248 (unsigned long)bytes_or_entries);
249
250 /* change first item key of the DEST */
251 if (is_direct_le_ih(dih)) {
252 RFALSE(le_ih_k_offset(dih) <=
253 (unsigned long)bytes_or_entries,
254 "vs-10070: dih %h, bytes_or_entries(%d)", dih,
255 bytes_or_entries);
256 set_le_ih_k_offset(dih,
257 le_ih_k_offset(dih) -
258 bytes_or_entries);
259 } else {
260 RFALSE(le_ih_k_offset(dih) <=
261 (bytes_or_entries / UNFM_P_SIZE) * dest->b_size,
262 "vs-10080: dih %h, bytes_or_entries(%d)",
263 dih,
264 (bytes_or_entries / UNFM_P_SIZE) * dest->b_size);
265 set_le_ih_k_offset(dih,
266 le_ih_k_offset(dih) -
267 ((bytes_or_entries / UNFM_P_SIZE) *
268 dest->b_size));
269 }
270 }
271
272 leaf_paste_in_buffer(dest_bi, 0, 0, bytes_or_entries,
273 B_I_PITEM(src,
274 ih) + ih_item_len(ih) - bytes_or_entries,
275 0);
276 return 1;
277}
240 278
241/* copy cpy_mun items from buffer src to buffer dest 279/* copy cpy_mun items from buffer src to buffer dest
242 * last_first == FIRST_TO_LAST means, that we copy cpy_num items beginning from first-th item in src to tail of dest 280 * last_first == FIRST_TO_LAST means, that we copy cpy_num items beginning from first-th item in src to tail of dest
243 * last_first == LAST_TO_FIRST means, that we copy cpy_num items beginning from first-th item in src to head of dest 281 * last_first == LAST_TO_FIRST means, that we copy cpy_num items beginning from first-th item in src to head of dest
244 */ 282 */
245static void leaf_copy_items_entirely (struct buffer_info * dest_bi, struct buffer_head * src, int last_first, 283static void leaf_copy_items_entirely(struct buffer_info *dest_bi,
246 int first, int cpy_num) 284 struct buffer_head *src, int last_first,
285 int first, int cpy_num)
247{ 286{
248 struct buffer_head * dest; 287 struct buffer_head *dest;
249 int nr, free_space; 288 int nr, free_space;
250 int dest_before; 289 int dest_before;
251 int last_loc, last_inserted_loc, location; 290 int last_loc, last_inserted_loc, location;
252 int i, j; 291 int i, j;
253 struct block_head * blkh; 292 struct block_head *blkh;
254 struct item_head * ih; 293 struct item_head *ih;
255 294
256 RFALSE( last_first != LAST_TO_FIRST && last_first != FIRST_TO_LAST, 295 RFALSE(last_first != LAST_TO_FIRST && last_first != FIRST_TO_LAST,
257 "vs-10090: bad last_first parameter %d", last_first); 296 "vs-10090: bad last_first parameter %d", last_first);
258 RFALSE( B_NR_ITEMS (src) - first < cpy_num, 297 RFALSE(B_NR_ITEMS(src) - first < cpy_num,
259 "vs-10100: too few items in source %d, required %d from %d", 298 "vs-10100: too few items in source %d, required %d from %d",
260 B_NR_ITEMS(src), cpy_num, first); 299 B_NR_ITEMS(src), cpy_num, first);
261 RFALSE( cpy_num < 0, "vs-10110: can not copy negative amount of items"); 300 RFALSE(cpy_num < 0, "vs-10110: can not copy negative amount of items");
262 RFALSE( ! dest_bi, "vs-10120: can not copy negative amount of items"); 301 RFALSE(!dest_bi, "vs-10120: can not copy negative amount of items");
263 302
264 dest = dest_bi->bi_bh; 303 dest = dest_bi->bi_bh;
265 304
266 RFALSE( ! dest, "vs-10130: can not copy negative amount of items"); 305 RFALSE(!dest, "vs-10130: can not copy negative amount of items");
267 306
268 if (cpy_num == 0) 307 if (cpy_num == 0)
269 return; 308 return;
270 309
271 blkh = B_BLK_HEAD(dest); 310 blkh = B_BLK_HEAD(dest);
272 nr = blkh_nr_item( blkh ); 311 nr = blkh_nr_item(blkh);
273 free_space = blkh_free_space(blkh); 312 free_space = blkh_free_space(blkh);
274 313
275 /* we will insert items before 0-th or nr-th item in dest buffer. It depends of last_first parameter */ 314 /* we will insert items before 0-th or nr-th item in dest buffer. It depends of last_first parameter */
276 dest_before = (last_first == LAST_TO_FIRST) ? 0 : nr; 315 dest_before = (last_first == LAST_TO_FIRST) ? 0 : nr;
277 316
278 /* location of head of first new item */ 317 /* location of head of first new item */
279 ih = B_N_PITEM_HEAD (dest, dest_before); 318 ih = B_N_PITEM_HEAD(dest, dest_before);
280 319
281 RFALSE( blkh_free_space(blkh) < cpy_num * IH_SIZE, 320 RFALSE(blkh_free_space(blkh) < cpy_num * IH_SIZE,
282 "vs-10140: not enough free space for headers %d (needed %d)", 321 "vs-10140: not enough free space for headers %d (needed %d)",
283 B_FREE_SPACE (dest), cpy_num * IH_SIZE); 322 B_FREE_SPACE(dest), cpy_num * IH_SIZE);
284 323
285 /* prepare space for headers */ 324 /* prepare space for headers */
286 memmove (ih + cpy_num, ih, (nr-dest_before) * IH_SIZE); 325 memmove(ih + cpy_num, ih, (nr - dest_before) * IH_SIZE);
287
288 /* copy item headers */
289 memcpy (ih, B_N_PITEM_HEAD (src, first), cpy_num * IH_SIZE);
290
291 free_space -= (IH_SIZE * cpy_num);
292 set_blkh_free_space( blkh, free_space );
293
294 /* location of unmovable item */
295 j = location = (dest_before == 0) ? dest->b_size : ih_location(ih-1);
296 for (i = dest_before; i < nr + cpy_num; i ++) {
297 location -= ih_item_len( ih + i - dest_before );
298 put_ih_location( ih + i - dest_before, location );
299 }
300
301 /* prepare space for items */
302 last_loc = ih_location( &(ih[nr+cpy_num-1-dest_before]) );
303 last_inserted_loc = ih_location( &(ih[cpy_num-1]) );
304
305 /* check free space */
306 RFALSE( free_space < j - last_inserted_loc,
307 "vs-10150: not enough free space for items %d (needed %d)",
308 free_space, j - last_inserted_loc);
309
310 memmove (dest->b_data + last_loc,
311 dest->b_data + last_loc + j - last_inserted_loc,
312 last_inserted_loc - last_loc);
313
314 /* copy items */
315 memcpy (dest->b_data + last_inserted_loc, B_N_PITEM(src,(first + cpy_num - 1)),
316 j - last_inserted_loc);
317
318 /* sizes, item number */
319 set_blkh_nr_item( blkh, nr + cpy_num );
320 set_blkh_free_space( blkh, free_space - (j - last_inserted_loc) );
321
322 do_balance_mark_leaf_dirty (dest_bi->tb, dest, 0);
323
324 if (dest_bi->bi_parent) {
325 struct disk_child *t_dc;
326 t_dc = B_N_CHILD (dest_bi->bi_parent, dest_bi->bi_position);
327 RFALSE( dc_block_number(t_dc) != dest->b_blocknr,
328 "vs-10160: block number in bh does not match to field in disk_child structure %lu and %lu",
329 ( long unsigned ) dest->b_blocknr,
330 ( long unsigned ) dc_block_number(t_dc));
331 put_dc_size( t_dc, dc_size(t_dc) + (j - last_inserted_loc + IH_SIZE * cpy_num ) );
332
333 do_balance_mark_internal_dirty (dest_bi->tb, dest_bi->bi_parent, 0);
334 }
335}
336 326
327 /* copy item headers */
328 memcpy(ih, B_N_PITEM_HEAD(src, first), cpy_num * IH_SIZE);
329
330 free_space -= (IH_SIZE * cpy_num);
331 set_blkh_free_space(blkh, free_space);
332
333 /* location of unmovable item */
334 j = location = (dest_before == 0) ? dest->b_size : ih_location(ih - 1);
335 for (i = dest_before; i < nr + cpy_num; i++) {
336 location -= ih_item_len(ih + i - dest_before);
337 put_ih_location(ih + i - dest_before, location);
338 }
339
340 /* prepare space for items */
341 last_loc = ih_location(&(ih[nr + cpy_num - 1 - dest_before]));
342 last_inserted_loc = ih_location(&(ih[cpy_num - 1]));
343
344 /* check free space */
345 RFALSE(free_space < j - last_inserted_loc,
346 "vs-10150: not enough free space for items %d (needed %d)",
347 free_space, j - last_inserted_loc);
348
349 memmove(dest->b_data + last_loc,
350 dest->b_data + last_loc + j - last_inserted_loc,
351 last_inserted_loc - last_loc);
352
353 /* copy items */
354 memcpy(dest->b_data + last_inserted_loc,
355 B_N_PITEM(src, (first + cpy_num - 1)), j - last_inserted_loc);
356
357 /* sizes, item number */
358 set_blkh_nr_item(blkh, nr + cpy_num);
359 set_blkh_free_space(blkh, free_space - (j - last_inserted_loc));
360
361 do_balance_mark_leaf_dirty(dest_bi->tb, dest, 0);
362
363 if (dest_bi->bi_parent) {
364 struct disk_child *t_dc;
365 t_dc = B_N_CHILD(dest_bi->bi_parent, dest_bi->bi_position);
366 RFALSE(dc_block_number(t_dc) != dest->b_blocknr,
367 "vs-10160: block number in bh does not match to field in disk_child structure %lu and %lu",
368 (long unsigned)dest->b_blocknr,
369 (long unsigned)dc_block_number(t_dc));
370 put_dc_size(t_dc,
371 dc_size(t_dc) + (j - last_inserted_loc +
372 IH_SIZE * cpy_num));
373
374 do_balance_mark_internal_dirty(dest_bi->tb, dest_bi->bi_parent,
375 0);
376 }
377}
337 378
338/* This function splits the (liquid) item into two items (useful when 379/* This function splits the (liquid) item into two items (useful when
339 shifting part of an item into another node.) */ 380 shifting part of an item into another node.) */
340static void leaf_item_bottle (struct buffer_info * dest_bi, struct buffer_head * src, int last_first, 381static void leaf_item_bottle(struct buffer_info *dest_bi,
341 int item_num, int cpy_bytes) 382 struct buffer_head *src, int last_first,
383 int item_num, int cpy_bytes)
342{ 384{
343 struct buffer_head * dest = dest_bi->bi_bh; 385 struct buffer_head *dest = dest_bi->bi_bh;
344 struct item_head * ih; 386 struct item_head *ih;
345 387
346 RFALSE( cpy_bytes == -1, "vs-10170: bytes == - 1 means: do not split item"); 388 RFALSE(cpy_bytes == -1,
347 389 "vs-10170: bytes == - 1 means: do not split item");
348 if ( last_first == FIRST_TO_LAST ) { 390
349 /* if ( if item in position item_num in buffer SOURCE is directory item ) */ 391 if (last_first == FIRST_TO_LAST) {
350 if (is_direntry_le_ih (ih = B_N_PITEM_HEAD(src,item_num))) 392 /* if ( if item in position item_num in buffer SOURCE is directory item ) */
351 leaf_copy_dir_entries (dest_bi, src, FIRST_TO_LAST, item_num, 0, cpy_bytes); 393 if (is_direntry_le_ih(ih = B_N_PITEM_HEAD(src, item_num)))
352 else { 394 leaf_copy_dir_entries(dest_bi, src, FIRST_TO_LAST,
353 struct item_head n_ih; 395 item_num, 0, cpy_bytes);
354 396 else {
355 /* copy part of the body of the item number 'item_num' of SOURCE to the end of the DEST 397 struct item_head n_ih;
356 part defined by 'cpy_bytes'; create new item header; change old item_header (????); 398
357 n_ih = new item_header; 399 /* copy part of the body of the item number 'item_num' of SOURCE to the end of the DEST
358 */ 400 part defined by 'cpy_bytes'; create new item header; change old item_header (????);
359 memcpy (&n_ih, ih, IH_SIZE); 401 n_ih = new item_header;
360 put_ih_item_len( &n_ih, cpy_bytes ); 402 */
361 if (is_indirect_le_ih (ih)) { 403 memcpy(&n_ih, ih, IH_SIZE);
362 RFALSE( cpy_bytes == ih_item_len(ih) && get_ih_free_space(ih), 404 put_ih_item_len(&n_ih, cpy_bytes);
363 "vs-10180: when whole indirect item is bottle to left neighbor, it must have free_space==0 (not %lu)", 405 if (is_indirect_le_ih(ih)) {
364 ( long unsigned ) get_ih_free_space (ih)); 406 RFALSE(cpy_bytes == ih_item_len(ih)
365 set_ih_free_space (&n_ih, 0); 407 && get_ih_free_space(ih),
366 } 408 "vs-10180: when whole indirect item is bottle to left neighbor, it must have free_space==0 (not %lu)",
367 409 (long unsigned)get_ih_free_space(ih));
368 RFALSE( op_is_left_mergeable (&(ih->ih_key), src->b_size), 410 set_ih_free_space(&n_ih, 0);
369 "vs-10190: bad mergeability of item %h", ih); 411 }
370 n_ih.ih_version = ih->ih_version; /* JDM Endian safe, both le */ 412
371 leaf_insert_into_buf (dest_bi, B_NR_ITEMS(dest), &n_ih, B_N_PITEM (src, item_num), 0); 413 RFALSE(op_is_left_mergeable(&(ih->ih_key), src->b_size),
414 "vs-10190: bad mergeability of item %h", ih);
415 n_ih.ih_version = ih->ih_version; /* JDM Endian safe, both le */
416 leaf_insert_into_buf(dest_bi, B_NR_ITEMS(dest), &n_ih,
417 B_N_PITEM(src, item_num), 0);
418 }
419 } else {
420 /* if ( if item in position item_num in buffer SOURCE is directory item ) */
421 if (is_direntry_le_ih(ih = B_N_PITEM_HEAD(src, item_num)))
422 leaf_copy_dir_entries(dest_bi, src, LAST_TO_FIRST,
423 item_num,
424 I_ENTRY_COUNT(ih) - cpy_bytes,
425 cpy_bytes);
426 else {
427 struct item_head n_ih;
428
429 /* copy part of the body of the item number 'item_num' of SOURCE to the begin of the DEST
430 part defined by 'cpy_bytes'; create new item header;
431 n_ih = new item_header;
432 */
433 memcpy(&n_ih, ih, SHORT_KEY_SIZE);
434
435 n_ih.ih_version = ih->ih_version; /* JDM Endian safe, both le */
436
437 if (is_direct_le_ih(ih)) {
438 set_le_ih_k_offset(&n_ih,
439 le_ih_k_offset(ih) +
440 ih_item_len(ih) - cpy_bytes);
441 set_le_ih_k_type(&n_ih, TYPE_DIRECT);
442 set_ih_free_space(&n_ih, MAX_US_INT);
443 } else {
444 /* indirect item */
445 RFALSE(!cpy_bytes && get_ih_free_space(ih),
446 "vs-10200: ih->ih_free_space must be 0 when indirect item will be appended");
447 set_le_ih_k_offset(&n_ih,
448 le_ih_k_offset(ih) +
449 (ih_item_len(ih) -
450 cpy_bytes) / UNFM_P_SIZE *
451 dest->b_size);
452 set_le_ih_k_type(&n_ih, TYPE_INDIRECT);
453 set_ih_free_space(&n_ih, get_ih_free_space(ih));
454 }
455
456 /* set item length */
457 put_ih_item_len(&n_ih, cpy_bytes);
458
459 n_ih.ih_version = ih->ih_version; /* JDM Endian safe, both le */
460
461 leaf_insert_into_buf(dest_bi, 0, &n_ih,
462 B_N_PITEM(src,
463 item_num) +
464 ih_item_len(ih) - cpy_bytes, 0);
465 }
372 } 466 }
373 } else {
374 /* if ( if item in position item_num in buffer SOURCE is directory item ) */
375 if (is_direntry_le_ih(ih = B_N_PITEM_HEAD (src, item_num)))
376 leaf_copy_dir_entries (dest_bi, src, LAST_TO_FIRST, item_num, I_ENTRY_COUNT(ih) - cpy_bytes, cpy_bytes);
377 else {
378 struct item_head n_ih;
379
380 /* copy part of the body of the item number 'item_num' of SOURCE to the begin of the DEST
381 part defined by 'cpy_bytes'; create new item header;
382 n_ih = new item_header;
383 */
384 memcpy (&n_ih, ih, SHORT_KEY_SIZE);
385
386 n_ih.ih_version = ih->ih_version; /* JDM Endian safe, both le */
387
388 if (is_direct_le_ih (ih)) {
389 set_le_ih_k_offset (&n_ih, le_ih_k_offset (ih) + ih_item_len(ih) - cpy_bytes);
390 set_le_ih_k_type (&n_ih, TYPE_DIRECT);
391 set_ih_free_space (&n_ih, MAX_US_INT);
392 } else {
393 /* indirect item */
394 RFALSE( !cpy_bytes && get_ih_free_space (ih),
395 "vs-10200: ih->ih_free_space must be 0 when indirect item will be appended");
396 set_le_ih_k_offset (&n_ih, le_ih_k_offset (ih) + (ih_item_len(ih) - cpy_bytes) / UNFM_P_SIZE * dest->b_size);
397 set_le_ih_k_type (&n_ih, TYPE_INDIRECT);
398 set_ih_free_space (&n_ih, get_ih_free_space (ih));
399 }
400
401 /* set item length */
402 put_ih_item_len( &n_ih, cpy_bytes );
403
404 n_ih.ih_version = ih->ih_version; /* JDM Endian safe, both le */
405
406 leaf_insert_into_buf (dest_bi, 0, &n_ih, B_N_PITEM(src,item_num) + ih_item_len(ih) - cpy_bytes, 0);
407 }
408 }
409} 467}
410 468
411
412/* If cpy_bytes equals minus one than copy cpy_num whole items from SOURCE to DEST. 469/* If cpy_bytes equals minus one than copy cpy_num whole items from SOURCE to DEST.
413 If cpy_bytes not equal to minus one than copy cpy_num-1 whole items from SOURCE to DEST. 470 If cpy_bytes not equal to minus one than copy cpy_num-1 whole items from SOURCE to DEST.
414 From last item copy cpy_num bytes for regular item and cpy_num directory entries for 471 From last item copy cpy_num bytes for regular item and cpy_num directory entries for
415 directory item. */ 472 directory item. */
416static int leaf_copy_items (struct buffer_info * dest_bi, struct buffer_head * src, int last_first, int cpy_num, 473static int leaf_copy_items(struct buffer_info *dest_bi, struct buffer_head *src,
417 int cpy_bytes) 474 int last_first, int cpy_num, int cpy_bytes)
418{ 475{
419 struct buffer_head * dest; 476 struct buffer_head *dest;
420 int pos, i, src_nr_item, bytes; 477 int pos, i, src_nr_item, bytes;
421 478
422 dest = dest_bi->bi_bh; 479 dest = dest_bi->bi_bh;
423 RFALSE( !dest || !src, "vs-10210: !dest || !src"); 480 RFALSE(!dest || !src, "vs-10210: !dest || !src");
424 RFALSE( last_first != FIRST_TO_LAST && last_first != LAST_TO_FIRST, 481 RFALSE(last_first != FIRST_TO_LAST && last_first != LAST_TO_FIRST,
425 "vs-10220:last_first != FIRST_TO_LAST && last_first != LAST_TO_FIRST"); 482 "vs-10220:last_first != FIRST_TO_LAST && last_first != LAST_TO_FIRST");
426 RFALSE( B_NR_ITEMS(src) < cpy_num, 483 RFALSE(B_NR_ITEMS(src) < cpy_num,
427 "vs-10230: No enough items: %d, req. %d", B_NR_ITEMS(src), cpy_num); 484 "vs-10230: No enough items: %d, req. %d", B_NR_ITEMS(src),
428 RFALSE( cpy_num < 0,"vs-10240: cpy_num < 0 (%d)", cpy_num); 485 cpy_num);
429 486 RFALSE(cpy_num < 0, "vs-10240: cpy_num < 0 (%d)", cpy_num);
430 if ( cpy_num == 0 ) 487
431 return 0; 488 if (cpy_num == 0)
432 489 return 0;
433 if ( last_first == FIRST_TO_LAST ) { 490
434 /* copy items to left */ 491 if (last_first == FIRST_TO_LAST) {
435 pos = 0; 492 /* copy items to left */
436 if ( cpy_num == 1 ) 493 pos = 0;
437 bytes = cpy_bytes; 494 if (cpy_num == 1)
438 else 495 bytes = cpy_bytes;
439 bytes = -1; 496 else
440 497 bytes = -1;
441 /* copy the first item or it part or nothing to the end of the DEST (i = leaf_copy_boundary_item(DEST,SOURCE,0,bytes)) */ 498
442 i = leaf_copy_boundary_item (dest_bi, src, FIRST_TO_LAST, bytes); 499 /* copy the first item or it part or nothing to the end of the DEST (i = leaf_copy_boundary_item(DEST,SOURCE,0,bytes)) */
443 cpy_num -= i; 500 i = leaf_copy_boundary_item(dest_bi, src, FIRST_TO_LAST, bytes);
444 if ( cpy_num == 0 ) 501 cpy_num -= i;
445 return i; 502 if (cpy_num == 0)
446 pos += i; 503 return i;
447 if ( cpy_bytes == -1 ) 504 pos += i;
448 /* copy first cpy_num items starting from position 'pos' of SOURCE to end of DEST */ 505 if (cpy_bytes == -1)
449 leaf_copy_items_entirely (dest_bi, src, FIRST_TO_LAST, pos, cpy_num); 506 /* copy first cpy_num items starting from position 'pos' of SOURCE to end of DEST */
450 else { 507 leaf_copy_items_entirely(dest_bi, src, FIRST_TO_LAST,
451 /* copy first cpy_num-1 items starting from position 'pos-1' of the SOURCE to the end of the DEST */ 508 pos, cpy_num);
452 leaf_copy_items_entirely (dest_bi, src, FIRST_TO_LAST, pos, cpy_num-1); 509 else {
453 510 /* copy first cpy_num-1 items starting from position 'pos-1' of the SOURCE to the end of the DEST */
454 /* copy part of the item which number is cpy_num+pos-1 to the end of the DEST */ 511 leaf_copy_items_entirely(dest_bi, src, FIRST_TO_LAST,
455 leaf_item_bottle (dest_bi, src, FIRST_TO_LAST, cpy_num+pos-1, cpy_bytes); 512 pos, cpy_num - 1);
456 } 513
457 } else { 514 /* copy part of the item which number is cpy_num+pos-1 to the end of the DEST */
458 /* copy items to right */ 515 leaf_item_bottle(dest_bi, src, FIRST_TO_LAST,
459 src_nr_item = B_NR_ITEMS (src); 516 cpy_num + pos - 1, cpy_bytes);
460 if ( cpy_num == 1 ) 517 }
461 bytes = cpy_bytes; 518 } else {
462 else 519 /* copy items to right */
463 bytes = -1; 520 src_nr_item = B_NR_ITEMS(src);
464 521 if (cpy_num == 1)
465 /* copy the last item or it part or nothing to the begin of the DEST (i = leaf_copy_boundary_item(DEST,SOURCE,1,bytes)); */ 522 bytes = cpy_bytes;
466 i = leaf_copy_boundary_item (dest_bi, src, LAST_TO_FIRST, bytes); 523 else
467 524 bytes = -1;
468 cpy_num -= i; 525
469 if ( cpy_num == 0 ) 526 /* copy the last item or it part or nothing to the begin of the DEST (i = leaf_copy_boundary_item(DEST,SOURCE,1,bytes)); */
470 return i; 527 i = leaf_copy_boundary_item(dest_bi, src, LAST_TO_FIRST, bytes);
471 528
472 pos = src_nr_item - cpy_num - i; 529 cpy_num -= i;
473 if ( cpy_bytes == -1 ) { 530 if (cpy_num == 0)
474 /* starting from position 'pos' copy last cpy_num items of SOURCE to begin of DEST */ 531 return i;
475 leaf_copy_items_entirely (dest_bi, src, LAST_TO_FIRST, pos, cpy_num); 532
476 } else { 533 pos = src_nr_item - cpy_num - i;
477 /* copy last cpy_num-1 items starting from position 'pos+1' of the SOURCE to the begin of the DEST; */ 534 if (cpy_bytes == -1) {
478 leaf_copy_items_entirely (dest_bi, src, LAST_TO_FIRST, pos+1, cpy_num-1); 535 /* starting from position 'pos' copy last cpy_num items of SOURCE to begin of DEST */
479 536 leaf_copy_items_entirely(dest_bi, src, LAST_TO_FIRST,
480 /* copy part of the item which number is pos to the begin of the DEST */ 537 pos, cpy_num);
481 leaf_item_bottle (dest_bi, src, LAST_TO_FIRST, pos, cpy_bytes); 538 } else {
482 } 539 /* copy last cpy_num-1 items starting from position 'pos+1' of the SOURCE to the begin of the DEST; */
483 } 540 leaf_copy_items_entirely(dest_bi, src, LAST_TO_FIRST,
484 return i; 541 pos + 1, cpy_num - 1);
542
543 /* copy part of the item which number is pos to the begin of the DEST */
544 leaf_item_bottle(dest_bi, src, LAST_TO_FIRST, pos,
545 cpy_bytes);
546 }
547 }
548 return i;
485} 549}
486 550
487
488/* there are types of coping: from S[0] to L[0], from S[0] to R[0], 551/* there are types of coping: from S[0] to L[0], from S[0] to R[0],
489 from R[0] to L[0]. for each of these we have to define parent and 552 from R[0] to L[0]. for each of these we have to define parent and
490 positions of destination and source buffers */ 553 positions of destination and source buffers */
491static void leaf_define_dest_src_infos (int shift_mode, struct tree_balance * tb, struct buffer_info * dest_bi, 554static void leaf_define_dest_src_infos(int shift_mode, struct tree_balance *tb,
492 struct buffer_info * src_bi, int * first_last, 555 struct buffer_info *dest_bi,
493 struct buffer_head * Snew) 556 struct buffer_info *src_bi,
557 int *first_last,
558 struct buffer_head *Snew)
494{ 559{
495 memset (dest_bi, 0, sizeof (struct buffer_info)); 560 memset(dest_bi, 0, sizeof(struct buffer_info));
496 memset (src_bi, 0, sizeof (struct buffer_info)); 561 memset(src_bi, 0, sizeof(struct buffer_info));
497 562
498 /* define dest, src, dest parent, dest position */ 563 /* define dest, src, dest parent, dest position */
499 switch (shift_mode) { 564 switch (shift_mode) {
500 case LEAF_FROM_S_TO_L: /* it is used in leaf_shift_left */ 565 case LEAF_FROM_S_TO_L: /* it is used in leaf_shift_left */
501 src_bi->tb = tb; 566 src_bi->tb = tb;
502 src_bi->bi_bh = PATH_PLAST_BUFFER (tb->tb_path); 567 src_bi->bi_bh = PATH_PLAST_BUFFER(tb->tb_path);
503 src_bi->bi_parent = PATH_H_PPARENT (tb->tb_path, 0); 568 src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, 0);
504 src_bi->bi_position = PATH_H_B_ITEM_ORDER (tb->tb_path, 0); /* src->b_item_order */ 569 src_bi->bi_position = PATH_H_B_ITEM_ORDER(tb->tb_path, 0); /* src->b_item_order */
505 dest_bi->tb = tb; 570 dest_bi->tb = tb;
506 dest_bi->bi_bh = tb->L[0]; 571 dest_bi->bi_bh = tb->L[0];
507 dest_bi->bi_parent = tb->FL[0]; 572 dest_bi->bi_parent = tb->FL[0];
508 dest_bi->bi_position = get_left_neighbor_position (tb, 0); 573 dest_bi->bi_position = get_left_neighbor_position(tb, 0);
509 *first_last = FIRST_TO_LAST; 574 *first_last = FIRST_TO_LAST;
510 break; 575 break;
511 576
512 case LEAF_FROM_S_TO_R: /* it is used in leaf_shift_right */ 577 case LEAF_FROM_S_TO_R: /* it is used in leaf_shift_right */
513 src_bi->tb = tb; 578 src_bi->tb = tb;
514 src_bi->bi_bh = PATH_PLAST_BUFFER (tb->tb_path); 579 src_bi->bi_bh = PATH_PLAST_BUFFER(tb->tb_path);
515 src_bi->bi_parent = PATH_H_PPARENT (tb->tb_path, 0); 580 src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, 0);
516 src_bi->bi_position = PATH_H_B_ITEM_ORDER (tb->tb_path, 0); 581 src_bi->bi_position = PATH_H_B_ITEM_ORDER(tb->tb_path, 0);
517 dest_bi->tb = tb; 582 dest_bi->tb = tb;
518 dest_bi->bi_bh = tb->R[0]; 583 dest_bi->bi_bh = tb->R[0];
519 dest_bi->bi_parent = tb->FR[0]; 584 dest_bi->bi_parent = tb->FR[0];
520 dest_bi->bi_position = get_right_neighbor_position (tb, 0); 585 dest_bi->bi_position = get_right_neighbor_position(tb, 0);
521 *first_last = LAST_TO_FIRST; 586 *first_last = LAST_TO_FIRST;
522 break; 587 break;
523 588
524 case LEAF_FROM_R_TO_L: /* it is used in balance_leaf_when_delete */ 589 case LEAF_FROM_R_TO_L: /* it is used in balance_leaf_when_delete */
525 src_bi->tb = tb; 590 src_bi->tb = tb;
526 src_bi->bi_bh = tb->R[0]; 591 src_bi->bi_bh = tb->R[0];
527 src_bi->bi_parent = tb->FR[0]; 592 src_bi->bi_parent = tb->FR[0];
528 src_bi->bi_position = get_right_neighbor_position (tb, 0); 593 src_bi->bi_position = get_right_neighbor_position(tb, 0);
529 dest_bi->tb = tb; 594 dest_bi->tb = tb;
530 dest_bi->bi_bh = tb->L[0]; 595 dest_bi->bi_bh = tb->L[0];
531 dest_bi->bi_parent = tb->FL[0]; 596 dest_bi->bi_parent = tb->FL[0];
532 dest_bi->bi_position = get_left_neighbor_position (tb, 0); 597 dest_bi->bi_position = get_left_neighbor_position(tb, 0);
533 *first_last = FIRST_TO_LAST; 598 *first_last = FIRST_TO_LAST;
534 break; 599 break;
535 600
536 case LEAF_FROM_L_TO_R: /* it is used in balance_leaf_when_delete */ 601 case LEAF_FROM_L_TO_R: /* it is used in balance_leaf_when_delete */
537 src_bi->tb = tb; 602 src_bi->tb = tb;
538 src_bi->bi_bh = tb->L[0]; 603 src_bi->bi_bh = tb->L[0];
539 src_bi->bi_parent = tb->FL[0]; 604 src_bi->bi_parent = tb->FL[0];
540 src_bi->bi_position = get_left_neighbor_position (tb, 0); 605 src_bi->bi_position = get_left_neighbor_position(tb, 0);
541 dest_bi->tb = tb; 606 dest_bi->tb = tb;
542 dest_bi->bi_bh = tb->R[0]; 607 dest_bi->bi_bh = tb->R[0];
543 dest_bi->bi_parent = tb->FR[0]; 608 dest_bi->bi_parent = tb->FR[0];
544 dest_bi->bi_position = get_right_neighbor_position (tb, 0); 609 dest_bi->bi_position = get_right_neighbor_position(tb, 0);
545 *first_last = LAST_TO_FIRST; 610 *first_last = LAST_TO_FIRST;
546 break; 611 break;
547 612
548 case LEAF_FROM_S_TO_SNEW: 613 case LEAF_FROM_S_TO_SNEW:
549 src_bi->tb = tb; 614 src_bi->tb = tb;
550 src_bi->bi_bh = PATH_PLAST_BUFFER (tb->tb_path); 615 src_bi->bi_bh = PATH_PLAST_BUFFER(tb->tb_path);
551 src_bi->bi_parent = PATH_H_PPARENT (tb->tb_path, 0); 616 src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, 0);
552 src_bi->bi_position = PATH_H_B_ITEM_ORDER (tb->tb_path, 0); 617 src_bi->bi_position = PATH_H_B_ITEM_ORDER(tb->tb_path, 0);
553 dest_bi->tb = tb; 618 dest_bi->tb = tb;
554 dest_bi->bi_bh = Snew; 619 dest_bi->bi_bh = Snew;
555 dest_bi->bi_parent = NULL; 620 dest_bi->bi_parent = NULL;
556 dest_bi->bi_position = 0; 621 dest_bi->bi_position = 0;
557 *first_last = LAST_TO_FIRST; 622 *first_last = LAST_TO_FIRST;
558 break; 623 break;
559 624
560 default: 625 default:
561 reiserfs_panic (NULL, "vs-10250: leaf_define_dest_src_infos: shift type is unknown (%d)", shift_mode); 626 reiserfs_panic(NULL,
562 } 627 "vs-10250: leaf_define_dest_src_infos: shift type is unknown (%d)",
563 RFALSE( src_bi->bi_bh == 0 || dest_bi->bi_bh == 0, 628 shift_mode);
564 "vs-10260: mode==%d, source (%p) or dest (%p) buffer is initialized incorrectly", 629 }
565 shift_mode, src_bi->bi_bh, dest_bi->bi_bh); 630 RFALSE(src_bi->bi_bh == 0 || dest_bi->bi_bh == 0,
631 "vs-10260: mode==%d, source (%p) or dest (%p) buffer is initialized incorrectly",
632 shift_mode, src_bi->bi_bh, dest_bi->bi_bh);
566} 633}
567 634
568
569
570
571/* copy mov_num items and mov_bytes of the (mov_num-1)th item to 635/* copy mov_num items and mov_bytes of the (mov_num-1)th item to
572 neighbor. Delete them from source */ 636 neighbor. Delete them from source */
573int leaf_move_items (int shift_mode, struct tree_balance * tb, int mov_num, int mov_bytes, struct buffer_head * Snew) 637int leaf_move_items(int shift_mode, struct tree_balance *tb, int mov_num,
638 int mov_bytes, struct buffer_head *Snew)
574{ 639{
575 int ret_value; 640 int ret_value;
576 struct buffer_info dest_bi, src_bi; 641 struct buffer_info dest_bi, src_bi;
577 int first_last; 642 int first_last;
578 643
579 leaf_define_dest_src_infos (shift_mode, tb, &dest_bi, &src_bi, &first_last, Snew); 644 leaf_define_dest_src_infos(shift_mode, tb, &dest_bi, &src_bi,
645 &first_last, Snew);
580 646
581 ret_value = leaf_copy_items (&dest_bi, src_bi.bi_bh, first_last, mov_num, mov_bytes); 647 ret_value =
648 leaf_copy_items(&dest_bi, src_bi.bi_bh, first_last, mov_num,
649 mov_bytes);
582 650
583 leaf_delete_items (&src_bi, first_last, (first_last == FIRST_TO_LAST) ? 0 : (B_NR_ITEMS(src_bi.bi_bh) - mov_num), mov_num, mov_bytes); 651 leaf_delete_items(&src_bi, first_last,
652 (first_last ==
653 FIRST_TO_LAST) ? 0 : (B_NR_ITEMS(src_bi.bi_bh) -
654 mov_num), mov_num, mov_bytes);
584 655
585 656 return ret_value;
586 return ret_value;
587} 657}
588 658
589
590/* Shift shift_num items (and shift_bytes of last shifted item if shift_bytes != -1) 659/* Shift shift_num items (and shift_bytes of last shifted item if shift_bytes != -1)
591 from S[0] to L[0] and replace the delimiting key */ 660 from S[0] to L[0] and replace the delimiting key */
592int leaf_shift_left (struct tree_balance * tb, int shift_num, int shift_bytes) 661int leaf_shift_left(struct tree_balance *tb, int shift_num, int shift_bytes)
593{ 662{
594 struct buffer_head * S0 = PATH_PLAST_BUFFER (tb->tb_path); 663 struct buffer_head *S0 = PATH_PLAST_BUFFER(tb->tb_path);
595 int i; 664 int i;
596 665
597 /* move shift_num (and shift_bytes bytes) items from S[0] to left neighbor L[0] */ 666 /* move shift_num (and shift_bytes bytes) items from S[0] to left neighbor L[0] */
598 i = leaf_move_items (LEAF_FROM_S_TO_L, tb, shift_num, shift_bytes, NULL); 667 i = leaf_move_items(LEAF_FROM_S_TO_L, tb, shift_num, shift_bytes, NULL);
599 668
600 if ( shift_num ) { 669 if (shift_num) {
601 if (B_NR_ITEMS (S0) == 0) { /* number of items in S[0] == 0 */ 670 if (B_NR_ITEMS(S0) == 0) { /* number of items in S[0] == 0 */
602 671
603 RFALSE( shift_bytes != -1, 672 RFALSE(shift_bytes != -1,
604 "vs-10270: S0 is empty now, but shift_bytes != -1 (%d)", 673 "vs-10270: S0 is empty now, but shift_bytes != -1 (%d)",
605 shift_bytes); 674 shift_bytes);
606#ifdef CONFIG_REISERFS_CHECK 675#ifdef CONFIG_REISERFS_CHECK
607 if (tb->tb_mode == M_PASTE || tb->tb_mode == M_INSERT) { 676 if (tb->tb_mode == M_PASTE || tb->tb_mode == M_INSERT) {
608 print_cur_tb ("vs-10275"); 677 print_cur_tb("vs-10275");
609 reiserfs_panic (tb->tb_sb, "vs-10275: leaf_shift_left: balance condition corrupted (%c)", tb->tb_mode); 678 reiserfs_panic(tb->tb_sb,
610 } 679 "vs-10275: leaf_shift_left: balance condition corrupted (%c)",
680 tb->tb_mode);
681 }
611#endif 682#endif
612 683
613 if (PATH_H_POSITION (tb->tb_path, 1) == 0) 684 if (PATH_H_POSITION(tb->tb_path, 1) == 0)
614 replace_key (tb, tb->CFL[0], tb->lkey[0], PATH_H_PPARENT (tb->tb_path, 0), 0); 685 replace_key(tb, tb->CFL[0], tb->lkey[0],
615 686 PATH_H_PPARENT(tb->tb_path, 0), 0);
616 } else { 687
617 /* replace lkey in CFL[0] by 0-th key from S[0]; */ 688 } else {
618 replace_key (tb, tb->CFL[0], tb->lkey[0], S0, 0); 689 /* replace lkey in CFL[0] by 0-th key from S[0]; */
619 690 replace_key(tb, tb->CFL[0], tb->lkey[0], S0, 0);
620 RFALSE( (shift_bytes != -1 && 691
621 !(is_direntry_le_ih (B_N_PITEM_HEAD (S0, 0)) 692 RFALSE((shift_bytes != -1 &&
622 && !I_ENTRY_COUNT (B_N_PITEM_HEAD (S0, 0)))) && 693 !(is_direntry_le_ih(B_N_PITEM_HEAD(S0, 0))
623 (!op_is_left_mergeable (B_N_PKEY (S0, 0), S0->b_size)), 694 && !I_ENTRY_COUNT(B_N_PITEM_HEAD(S0, 0)))) &&
624 "vs-10280: item must be mergeable"); 695 (!op_is_left_mergeable
625 } 696 (B_N_PKEY(S0, 0), S0->b_size)),
626 } 697 "vs-10280: item must be mergeable");
627 698 }
628 return i; 699 }
629}
630
631
632
633 700
701 return i;
702}
634 703
635/* CLEANING STOPPED HERE */ 704/* CLEANING STOPPED HERE */
636 705
637
638
639
640/* Shift shift_num (shift_bytes) items from S[0] to the right neighbor, and replace the delimiting key */ 706/* Shift shift_num (shift_bytes) items from S[0] to the right neighbor, and replace the delimiting key */
641int leaf_shift_right( 707int leaf_shift_right(struct tree_balance *tb, int shift_num, int shift_bytes)
642 struct tree_balance * tb,
643 int shift_num,
644 int shift_bytes
645 )
646{ 708{
647 // struct buffer_head * S0 = PATH_PLAST_BUFFER (tb->tb_path); 709 // struct buffer_head * S0 = PATH_PLAST_BUFFER (tb->tb_path);
648 int ret_value; 710 int ret_value;
649 711
650 /* move shift_num (and shift_bytes) items from S[0] to right neighbor R[0] */ 712 /* move shift_num (and shift_bytes) items from S[0] to right neighbor R[0] */
651 ret_value = leaf_move_items (LEAF_FROM_S_TO_R, tb, shift_num, shift_bytes, NULL); 713 ret_value =
714 leaf_move_items(LEAF_FROM_S_TO_R, tb, shift_num, shift_bytes, NULL);
652 715
653 /* replace rkey in CFR[0] by the 0-th key from R[0] */ 716 /* replace rkey in CFR[0] by the 0-th key from R[0] */
654 if (shift_num) { 717 if (shift_num) {
655 replace_key (tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0); 718 replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0);
656 719
657 } 720 }
658 721
659 return ret_value; 722 return ret_value;
660} 723}
661 724
662 725static void leaf_delete_items_entirely(struct buffer_info *bi,
663 726 int first, int del_num);
664static void leaf_delete_items_entirely (struct buffer_info * bi,
665 int first, int del_num);
666/* If del_bytes == -1, starting from position 'first' delete del_num items in whole in buffer CUR. 727/* If del_bytes == -1, starting from position 'first' delete del_num items in whole in buffer CUR.
667 If not. 728 If not.
668 If last_first == 0. Starting from position 'first' delete del_num-1 items in whole. Delete part of body of 729 If last_first == 0. Starting from position 'first' delete del_num-1 items in whole. Delete part of body of
@@ -670,287 +731,292 @@ static void leaf_delete_items_entirely (struct buffer_info * bi,
670 If last_first == 1. Starting from position 'first+1' delete del_num-1 items in whole. Delete part of body of 731 If last_first == 1. Starting from position 'first+1' delete del_num-1 items in whole. Delete part of body of
671 the last item . Part defined by del_bytes. Don't delete last item header. 732 the last item . Part defined by del_bytes. Don't delete last item header.
672*/ 733*/
673void leaf_delete_items (struct buffer_info * cur_bi, int last_first, 734void leaf_delete_items(struct buffer_info *cur_bi, int last_first,
674 int first, int del_num, int del_bytes) 735 int first, int del_num, int del_bytes)
675{ 736{
676 struct buffer_head * bh; 737 struct buffer_head *bh;
677 int item_amount = B_NR_ITEMS (bh = cur_bi->bi_bh); 738 int item_amount = B_NR_ITEMS(bh = cur_bi->bi_bh);
678 739
679 RFALSE( !bh, "10155: bh is not defined"); 740 RFALSE(!bh, "10155: bh is not defined");
680 RFALSE( del_num < 0, "10160: del_num can not be < 0. del_num==%d", del_num); 741 RFALSE(del_num < 0, "10160: del_num can not be < 0. del_num==%d",
681 RFALSE( first < 0 || first + del_num > item_amount, 742 del_num);
682 "10165: invalid number of first item to be deleted (%d) or " 743 RFALSE(first < 0
683 "no so much items (%d) to delete (only %d)", 744 || first + del_num > item_amount,
684 first, first + del_num, item_amount); 745 "10165: invalid number of first item to be deleted (%d) or "
685 746 "no so much items (%d) to delete (only %d)", first,
686 if ( del_num == 0 ) 747 first + del_num, item_amount);
687 return; 748
688 749 if (del_num == 0)
689 if ( first == 0 && del_num == item_amount && del_bytes == -1 ) { 750 return;
690 make_empty_node (cur_bi); 751
691 do_balance_mark_leaf_dirty (cur_bi->tb, bh, 0); 752 if (first == 0 && del_num == item_amount && del_bytes == -1) {
692 return; 753 make_empty_node(cur_bi);
693 } 754 do_balance_mark_leaf_dirty(cur_bi->tb, bh, 0);
694 755 return;
695 if ( del_bytes == -1 )
696 /* delete del_num items beginning from item in position first */
697 leaf_delete_items_entirely (cur_bi, first, del_num);
698 else {
699 if ( last_first == FIRST_TO_LAST ) {
700 /* delete del_num-1 items beginning from item in position first */
701 leaf_delete_items_entirely (cur_bi, first, del_num-1);
702
703 /* delete the part of the first item of the bh
704 do not delete item header
705 */
706 leaf_cut_from_buffer (cur_bi, 0, 0, del_bytes);
707 } else {
708 struct item_head * ih;
709 int len;
710
711 /* delete del_num-1 items beginning from item in position first+1 */
712 leaf_delete_items_entirely (cur_bi, first+1, del_num-1);
713
714 if (is_direntry_le_ih (ih = B_N_PITEM_HEAD(bh, B_NR_ITEMS(bh)-1))) /* the last item is directory */
715 /* len = numbers of directory entries in this item */
716 len = ih_entry_count(ih);
717 else
718 /* len = body len of item */
719 len = ih_item_len(ih);
720
721 /* delete the part of the last item of the bh
722 do not delete item header
723 */
724 leaf_cut_from_buffer (cur_bi, B_NR_ITEMS(bh)-1, len - del_bytes, del_bytes);
725 } 756 }
726 }
727}
728 757
758 if (del_bytes == -1)
759 /* delete del_num items beginning from item in position first */
760 leaf_delete_items_entirely(cur_bi, first, del_num);
761 else {
762 if (last_first == FIRST_TO_LAST) {
763 /* delete del_num-1 items beginning from item in position first */
764 leaf_delete_items_entirely(cur_bi, first, del_num - 1);
765
766 /* delete the part of the first item of the bh
767 do not delete item header
768 */
769 leaf_cut_from_buffer(cur_bi, 0, 0, del_bytes);
770 } else {
771 struct item_head *ih;
772 int len;
773
774 /* delete del_num-1 items beginning from item in position first+1 */
775 leaf_delete_items_entirely(cur_bi, first + 1,
776 del_num - 1);
777
778 if (is_direntry_le_ih
779 (ih = B_N_PITEM_HEAD(bh, B_NR_ITEMS(bh) - 1)))
780 /* the last item is directory */
781 /* len = numbers of directory entries in this item */
782 len = ih_entry_count(ih);
783 else
784 /* len = body len of item */
785 len = ih_item_len(ih);
786
787 /* delete the part of the last item of the bh
788 do not delete item header
789 */
790 leaf_cut_from_buffer(cur_bi, B_NR_ITEMS(bh) - 1,
791 len - del_bytes, del_bytes);
792 }
793 }
794}
729 795
730/* insert item into the leaf node in position before */ 796/* insert item into the leaf node in position before */
731void leaf_insert_into_buf (struct buffer_info * bi, int before, 797void leaf_insert_into_buf(struct buffer_info *bi, int before,
732 struct item_head * inserted_item_ih, 798 struct item_head *inserted_item_ih,
733 const char * inserted_item_body, 799 const char *inserted_item_body, int zeros_number)
734 int zeros_number)
735{ 800{
736 struct buffer_head * bh = bi->bi_bh; 801 struct buffer_head *bh = bi->bi_bh;
737 int nr, free_space; 802 int nr, free_space;
738 struct block_head * blkh; 803 struct block_head *blkh;
739 struct item_head * ih; 804 struct item_head *ih;
740 int i; 805 int i;
741 int last_loc, unmoved_loc; 806 int last_loc, unmoved_loc;
742 char * to; 807 char *to;
743 808
744 809 blkh = B_BLK_HEAD(bh);
745 blkh = B_BLK_HEAD(bh); 810 nr = blkh_nr_item(blkh);
746 nr = blkh_nr_item(blkh); 811 free_space = blkh_free_space(blkh);
747 free_space = blkh_free_space( blkh ); 812
748 813 /* check free space */
749 /* check free space */ 814 RFALSE(free_space < ih_item_len(inserted_item_ih) + IH_SIZE,
750 RFALSE( free_space < ih_item_len(inserted_item_ih) + IH_SIZE, 815 "vs-10170: not enough free space in block %z, new item %h",
751 "vs-10170: not enough free space in block %z, new item %h", 816 bh, inserted_item_ih);
752 bh, inserted_item_ih); 817 RFALSE(zeros_number > ih_item_len(inserted_item_ih),
753 RFALSE( zeros_number > ih_item_len(inserted_item_ih), 818 "vs-10172: zero number == %d, item length == %d",
754 "vs-10172: zero number == %d, item length == %d", 819 zeros_number, ih_item_len(inserted_item_ih));
755 zeros_number, ih_item_len(inserted_item_ih)); 820
756 821 /* get item new item must be inserted before */
757 822 ih = B_N_PITEM_HEAD(bh, before);
758 /* get item new item must be inserted before */ 823
759 ih = B_N_PITEM_HEAD (bh, before); 824 /* prepare space for the body of new item */
760 825 last_loc = nr ? ih_location(&(ih[nr - before - 1])) : bh->b_size;
761 /* prepare space for the body of new item */ 826 unmoved_loc = before ? ih_location(ih - 1) : bh->b_size;
762 last_loc = nr ? ih_location( &(ih[nr - before - 1]) ) : bh->b_size; 827
763 unmoved_loc = before ? ih_location( ih-1 ) : bh->b_size; 828 memmove(bh->b_data + last_loc - ih_item_len(inserted_item_ih),
764 829 bh->b_data + last_loc, unmoved_loc - last_loc);
765 830
766 memmove (bh->b_data + last_loc - ih_item_len(inserted_item_ih), 831 to = bh->b_data + unmoved_loc - ih_item_len(inserted_item_ih);
767 bh->b_data + last_loc, unmoved_loc - last_loc); 832 memset(to, 0, zeros_number);
768 833 to += zeros_number;
769 to = bh->b_data + unmoved_loc - ih_item_len(inserted_item_ih); 834
770 memset (to, 0, zeros_number); 835 /* copy body to prepared space */
771 to += zeros_number; 836 if (inserted_item_body)
772 837 memmove(to, inserted_item_body,
773 /* copy body to prepared space */ 838 ih_item_len(inserted_item_ih) - zeros_number);
774 if (inserted_item_body) 839 else
775 memmove (to, inserted_item_body, ih_item_len(inserted_item_ih) - zeros_number); 840 memset(to, '\0', ih_item_len(inserted_item_ih) - zeros_number);
776 else 841
777 memset(to, '\0', ih_item_len(inserted_item_ih) - zeros_number); 842 /* insert item header */
778 843 memmove(ih + 1, ih, IH_SIZE * (nr - before));
779 /* insert item header */ 844 memmove(ih, inserted_item_ih, IH_SIZE);
780 memmove (ih + 1, ih, IH_SIZE * (nr - before)); 845
781 memmove (ih, inserted_item_ih, IH_SIZE); 846 /* change locations */
782 847 for (i = before; i < nr + 1; i++) {
783 /* change locations */ 848 unmoved_loc -= ih_item_len(&(ih[i - before]));
784 for (i = before; i < nr + 1; i ++) 849 put_ih_location(&(ih[i - before]), unmoved_loc);
785 { 850 }
786 unmoved_loc -= ih_item_len( &(ih[i-before]));
787 put_ih_location( &(ih[i-before]), unmoved_loc );
788 }
789
790 /* sizes, free space, item number */
791 set_blkh_nr_item( blkh, blkh_nr_item(blkh) + 1 );
792 set_blkh_free_space( blkh,
793 free_space - (IH_SIZE + ih_item_len(inserted_item_ih ) ) );
794 do_balance_mark_leaf_dirty (bi->tb, bh, 1);
795
796 if (bi->bi_parent) {
797 struct disk_child *t_dc;
798 t_dc = B_N_CHILD (bi->bi_parent, bi->bi_position);
799 put_dc_size( t_dc, dc_size(t_dc) + (IH_SIZE + ih_item_len(inserted_item_ih)));
800 do_balance_mark_internal_dirty (bi->tb, bi->bi_parent, 0);
801 }
802}
803 851
852 /* sizes, free space, item number */
853 set_blkh_nr_item(blkh, blkh_nr_item(blkh) + 1);
854 set_blkh_free_space(blkh,
855 free_space - (IH_SIZE +
856 ih_item_len(inserted_item_ih)));
857 do_balance_mark_leaf_dirty(bi->tb, bh, 1);
858
859 if (bi->bi_parent) {
860 struct disk_child *t_dc;
861 t_dc = B_N_CHILD(bi->bi_parent, bi->bi_position);
862 put_dc_size(t_dc,
863 dc_size(t_dc) + (IH_SIZE +
864 ih_item_len(inserted_item_ih)));
865 do_balance_mark_internal_dirty(bi->tb, bi->bi_parent, 0);
866 }
867}
804 868
805/* paste paste_size bytes to affected_item_num-th item. 869/* paste paste_size bytes to affected_item_num-th item.
806 When item is a directory, this only prepare space for new entries */ 870 When item is a directory, this only prepare space for new entries */
807void leaf_paste_in_buffer (struct buffer_info * bi, int affected_item_num, 871void leaf_paste_in_buffer(struct buffer_info *bi, int affected_item_num,
808 int pos_in_item, int paste_size, 872 int pos_in_item, int paste_size,
809 const char * body, 873 const char *body, int zeros_number)
810 int zeros_number)
811{ 874{
812 struct buffer_head * bh = bi->bi_bh; 875 struct buffer_head *bh = bi->bi_bh;
813 int nr, free_space; 876 int nr, free_space;
814 struct block_head * blkh; 877 struct block_head *blkh;
815 struct item_head * ih; 878 struct item_head *ih;
816 int i; 879 int i;
817 int last_loc, unmoved_loc; 880 int last_loc, unmoved_loc;
818 881
819 blkh = B_BLK_HEAD(bh); 882 blkh = B_BLK_HEAD(bh);
820 nr = blkh_nr_item(blkh); 883 nr = blkh_nr_item(blkh);
821 free_space = blkh_free_space(blkh); 884 free_space = blkh_free_space(blkh);
822 885
823 886 /* check free space */
824 /* check free space */ 887 RFALSE(free_space < paste_size,
825 RFALSE( free_space < paste_size, 888 "vs-10175: not enough free space: needed %d, available %d",
826 "vs-10175: not enough free space: needed %d, available %d", 889 paste_size, free_space);
827 paste_size, free_space);
828 890
829#ifdef CONFIG_REISERFS_CHECK 891#ifdef CONFIG_REISERFS_CHECK
830 if (zeros_number > paste_size) { 892 if (zeros_number > paste_size) {
831 print_cur_tb ("10177"); 893 print_cur_tb("10177");
832 reiserfs_panic ( NULL, "vs-10177: leaf_paste_in_buffer: ero number == %d, paste_size == %d", 894 reiserfs_panic(NULL,
833 zeros_number, paste_size); 895 "vs-10177: leaf_paste_in_buffer: ero number == %d, paste_size == %d",
834 } 896 zeros_number, paste_size);
835#endif /* CONFIG_REISERFS_CHECK */ 897 }
836 898#endif /* CONFIG_REISERFS_CHECK */
837 899
838 /* item to be appended */ 900 /* item to be appended */
839 ih = B_N_PITEM_HEAD(bh, affected_item_num); 901 ih = B_N_PITEM_HEAD(bh, affected_item_num);
840 902
841 last_loc = ih_location( &(ih[nr - affected_item_num - 1]) ); 903 last_loc = ih_location(&(ih[nr - affected_item_num - 1]));
842 unmoved_loc = affected_item_num ? ih_location( ih-1 ) : bh->b_size; 904 unmoved_loc = affected_item_num ? ih_location(ih - 1) : bh->b_size;
843 905
844 /* prepare space */ 906 /* prepare space */
845 memmove (bh->b_data + last_loc - paste_size, bh->b_data + last_loc, 907 memmove(bh->b_data + last_loc - paste_size, bh->b_data + last_loc,
846 unmoved_loc - last_loc); 908 unmoved_loc - last_loc);
847 909
848 910 /* change locations */
849 /* change locations */ 911 for (i = affected_item_num; i < nr; i++)
850 for (i = affected_item_num; i < nr; i ++) 912 put_ih_location(&(ih[i - affected_item_num]),
851 put_ih_location( &(ih[i-affected_item_num]), 913 ih_location(&(ih[i - affected_item_num])) -
852 ih_location( &(ih[i-affected_item_num])) - paste_size ); 914 paste_size);
853 915
854 if ( body ) { 916 if (body) {
855 if (!is_direntry_le_ih (ih)) { 917 if (!is_direntry_le_ih(ih)) {
856 if (!pos_in_item) { 918 if (!pos_in_item) {
857 /* shift data to right */ 919 /* shift data to right */
858 memmove (bh->b_data + ih_location(ih) + paste_size, 920 memmove(bh->b_data + ih_location(ih) +
859 bh->b_data + ih_location(ih), ih_item_len(ih)); 921 paste_size,
860 /* paste data in the head of item */ 922 bh->b_data + ih_location(ih),
861 memset (bh->b_data + ih_location(ih), 0, zeros_number); 923 ih_item_len(ih));
862 memcpy (bh->b_data + ih_location(ih) + zeros_number, body, paste_size - zeros_number); 924 /* paste data in the head of item */
863 } else { 925 memset(bh->b_data + ih_location(ih), 0,
864 memset (bh->b_data + unmoved_loc - paste_size, 0, zeros_number); 926 zeros_number);
865 memcpy (bh->b_data + unmoved_loc - paste_size + zeros_number, body, paste_size - zeros_number); 927 memcpy(bh->b_data + ih_location(ih) +
866 } 928 zeros_number, body,
929 paste_size - zeros_number);
930 } else {
931 memset(bh->b_data + unmoved_loc - paste_size, 0,
932 zeros_number);
933 memcpy(bh->b_data + unmoved_loc - paste_size +
934 zeros_number, body,
935 paste_size - zeros_number);
936 }
937 }
938 } else
939 memset(bh->b_data + unmoved_loc - paste_size, '\0', paste_size);
940
941 put_ih_item_len(ih, ih_item_len(ih) + paste_size);
942
943 /* change free space */
944 set_blkh_free_space(blkh, free_space - paste_size);
945
946 do_balance_mark_leaf_dirty(bi->tb, bh, 0);
947
948 if (bi->bi_parent) {
949 struct disk_child *t_dc =
950 B_N_CHILD(bi->bi_parent, bi->bi_position);
951 put_dc_size(t_dc, dc_size(t_dc) + paste_size);
952 do_balance_mark_internal_dirty(bi->tb, bi->bi_parent, 0);
867 } 953 }
868 }
869 else
870 memset(bh->b_data + unmoved_loc - paste_size, '\0', paste_size);
871
872 put_ih_item_len( ih, ih_item_len(ih) + paste_size );
873
874 /* change free space */
875 set_blkh_free_space( blkh, free_space - paste_size );
876
877 do_balance_mark_leaf_dirty (bi->tb, bh, 0);
878
879 if (bi->bi_parent) {
880 struct disk_child *t_dc = B_N_CHILD (bi->bi_parent, bi->bi_position);
881 put_dc_size( t_dc, dc_size(t_dc) + paste_size );
882 do_balance_mark_internal_dirty (bi->tb, bi->bi_parent, 0);
883 }
884} 954}
885 955
886
887/* cuts DEL_COUNT entries beginning from FROM-th entry. Directory item 956/* cuts DEL_COUNT entries beginning from FROM-th entry. Directory item
888 does not have free space, so it moves DEHs and remaining records as 957 does not have free space, so it moves DEHs and remaining records as
889 necessary. Return value is size of removed part of directory item 958 necessary. Return value is size of removed part of directory item
890 in bytes. */ 959 in bytes. */
891static int leaf_cut_entries ( 960static int leaf_cut_entries(struct buffer_head *bh,
892 struct buffer_head * bh, 961 struct item_head *ih, int from, int del_count)
893 struct item_head * ih,
894 int from,
895 int del_count
896 )
897{ 962{
898 char * item; 963 char *item;
899 struct reiserfs_de_head * deh; 964 struct reiserfs_de_head *deh;
900 int prev_record_offset; /* offset of record, that is (from-1)th */ 965 int prev_record_offset; /* offset of record, that is (from-1)th */
901 char * prev_record; /* */ 966 char *prev_record; /* */
902 int cut_records_len; /* length of all removed records */ 967 int cut_records_len; /* length of all removed records */
903 int i; 968 int i;
904 969
905 970 /* make sure, that item is directory and there are enough entries to
906 /* make sure, that item is directory and there are enough entries to 971 remove */
907 remove */ 972 RFALSE(!is_direntry_le_ih(ih), "10180: item is not directory item");
908 RFALSE( !is_direntry_le_ih (ih), "10180: item is not directory item"); 973 RFALSE(I_ENTRY_COUNT(ih) < from + del_count,
909 RFALSE( I_ENTRY_COUNT(ih) < from + del_count, 974 "10185: item contains not enough entries: entry_cout = %d, from = %d, to delete = %d",
910 "10185: item contains not enough entries: entry_cout = %d, from = %d, to delete = %d", 975 I_ENTRY_COUNT(ih), from, del_count);
911 I_ENTRY_COUNT(ih), from, del_count); 976
912 977 if (del_count == 0)
913 if (del_count == 0) 978 return 0;
914 return 0; 979
915 980 /* first byte of item */
916 /* first byte of item */ 981 item = bh->b_data + ih_location(ih);
917 item = bh->b_data + ih_location(ih); 982
918 983 /* entry head array */
919 /* entry head array */ 984 deh = B_I_DEH(bh, ih);
920 deh = B_I_DEH (bh, ih); 985
921 986 /* first byte of remaining entries, those are BEFORE cut entries
922 /* first byte of remaining entries, those are BEFORE cut entries 987 (prev_record) and length of all removed records (cut_records_len) */
923 (prev_record) and length of all removed records (cut_records_len) */ 988 prev_record_offset =
924 prev_record_offset = (from ? deh_location( &(deh[from - 1])) : ih_item_len(ih)); 989 (from ? deh_location(&(deh[from - 1])) : ih_item_len(ih));
925 cut_records_len = prev_record_offset/*from_record*/ - 990 cut_records_len = prev_record_offset /*from_record */ -
926 deh_location( &(deh[from + del_count - 1])); 991 deh_location(&(deh[from + del_count - 1]));
927 prev_record = item + prev_record_offset; 992 prev_record = item + prev_record_offset;
928 993
929 994 /* adjust locations of remaining entries */
930 /* adjust locations of remaining entries */ 995 for (i = I_ENTRY_COUNT(ih) - 1; i > from + del_count - 1; i--)
931 for (i = I_ENTRY_COUNT(ih) - 1; i > from + del_count - 1; i --) 996 put_deh_location(&(deh[i]),
932 put_deh_location( &(deh[i]), 997 deh_location(&deh[i]) -
933 deh_location( &deh[i] ) - (DEH_SIZE * del_count ) ); 998 (DEH_SIZE * del_count));
934 999
935 for (i = 0; i < from; i ++) 1000 for (i = 0; i < from; i++)
936 put_deh_location( &(deh[i]), 1001 put_deh_location(&(deh[i]),
937 deh_location( &deh[i] ) - (DEH_SIZE * del_count + cut_records_len) ); 1002 deh_location(&deh[i]) - (DEH_SIZE * del_count +
938 1003 cut_records_len));
939 put_ih_entry_count( ih, ih_entry_count(ih) - del_count ); 1004
940 1005 put_ih_entry_count(ih, ih_entry_count(ih) - del_count);
941 /* shift entry head array and entries those are AFTER removed entries */ 1006
942 memmove ((char *)(deh + from), 1007 /* shift entry head array and entries those are AFTER removed entries */
943 deh + from + del_count, 1008 memmove((char *)(deh + from),
944 prev_record - cut_records_len - (char *)(deh + from + del_count)); 1009 deh + from + del_count,
945 1010 prev_record - cut_records_len - (char *)(deh + from +
946 /* shift records, those are BEFORE removed entries */ 1011 del_count));
947 memmove (prev_record - cut_records_len - DEH_SIZE * del_count, 1012
948 prev_record, item + ih_item_len(ih) - prev_record); 1013 /* shift records, those are BEFORE removed entries */
949 1014 memmove(prev_record - cut_records_len - DEH_SIZE * del_count,
950 return DEH_SIZE * del_count + cut_records_len; 1015 prev_record, item + ih_item_len(ih) - prev_record);
1016
1017 return DEH_SIZE * del_count + cut_records_len;
951} 1018}
952 1019
953
954/* when cut item is part of regular file 1020/* when cut item is part of regular file
955 pos_in_item - first byte that must be cut 1021 pos_in_item - first byte that must be cut
956 cut_size - number of bytes to be cut beginning from pos_in_item 1022 cut_size - number of bytes to be cut beginning from pos_in_item
@@ -959,264 +1025,278 @@ static int leaf_cut_entries (
959 pos_in_item - number of first deleted entry 1025 pos_in_item - number of first deleted entry
960 cut_size - count of deleted entries 1026 cut_size - count of deleted entries
961 */ 1027 */
962void leaf_cut_from_buffer (struct buffer_info * bi, int cut_item_num, 1028void leaf_cut_from_buffer(struct buffer_info *bi, int cut_item_num,
963 int pos_in_item, int cut_size) 1029 int pos_in_item, int cut_size)
964{ 1030{
965 int nr; 1031 int nr;
966 struct buffer_head * bh = bi->bi_bh; 1032 struct buffer_head *bh = bi->bi_bh;
967 struct block_head * blkh; 1033 struct block_head *blkh;
968 struct item_head * ih; 1034 struct item_head *ih;
969 int last_loc, unmoved_loc; 1035 int last_loc, unmoved_loc;
970 int i; 1036 int i;
971 1037
972 blkh = B_BLK_HEAD(bh); 1038 blkh = B_BLK_HEAD(bh);
973 nr = blkh_nr_item(blkh); 1039 nr = blkh_nr_item(blkh);
974 1040
975 /* item head of truncated item */ 1041 /* item head of truncated item */
976 ih = B_N_PITEM_HEAD (bh, cut_item_num); 1042 ih = B_N_PITEM_HEAD(bh, cut_item_num);
977 1043
978 if (is_direntry_le_ih (ih)) { 1044 if (is_direntry_le_ih(ih)) {
979 /* first cut entry ()*/ 1045 /* first cut entry () */
980 cut_size = leaf_cut_entries (bh, ih, pos_in_item, cut_size); 1046 cut_size = leaf_cut_entries(bh, ih, pos_in_item, cut_size);
981 if (pos_in_item == 0) { 1047 if (pos_in_item == 0) {
982 /* change key */ 1048 /* change key */
983 RFALSE( cut_item_num, 1049 RFALSE(cut_item_num,
984 "when 0-th enrty of item is cut, that item must be first in the node, not %d-th", cut_item_num); 1050 "when 0-th enrty of item is cut, that item must be first in the node, not %d-th",
985 /* change item key by key of first entry in the item */ 1051 cut_item_num);
986 set_le_ih_k_offset (ih, deh_offset(B_I_DEH (bh, ih))); 1052 /* change item key by key of first entry in the item */
987 /*memcpy (&ih->ih_key.k_offset, &(B_I_DEH (bh, ih)->deh_offset), SHORT_KEY_SIZE);*/ 1053 set_le_ih_k_offset(ih, deh_offset(B_I_DEH(bh, ih)));
988 } 1054 /*memcpy (&ih->ih_key.k_offset, &(B_I_DEH (bh, ih)->deh_offset), SHORT_KEY_SIZE); */
989 } else { 1055 }
990 /* item is direct or indirect */ 1056 } else {
991 RFALSE( is_statdata_le_ih (ih), "10195: item is stat data"); 1057 /* item is direct or indirect */
992 RFALSE( pos_in_item && pos_in_item + cut_size != ih_item_len(ih), 1058 RFALSE(is_statdata_le_ih(ih), "10195: item is stat data");
993 "10200: invalid offset (%lu) or trunc_size (%lu) or ih_item_len (%lu)", 1059 RFALSE(pos_in_item && pos_in_item + cut_size != ih_item_len(ih),
994 ( long unsigned ) pos_in_item, ( long unsigned ) cut_size, 1060 "10200: invalid offset (%lu) or trunc_size (%lu) or ih_item_len (%lu)",
995 ( long unsigned ) ih_item_len (ih)); 1061 (long unsigned)pos_in_item, (long unsigned)cut_size,
996 1062 (long unsigned)ih_item_len(ih));
997 /* shift item body to left if cut is from the head of item */ 1063
998 if (pos_in_item == 0) { 1064 /* shift item body to left if cut is from the head of item */
999 memmove( bh->b_data + ih_location(ih), 1065 if (pos_in_item == 0) {
1000 bh->b_data + ih_location(ih) + cut_size, 1066 memmove(bh->b_data + ih_location(ih),
1001 ih_item_len(ih) - cut_size); 1067 bh->b_data + ih_location(ih) + cut_size,
1002 1068 ih_item_len(ih) - cut_size);
1003 /* change key of item */ 1069
1004 if (is_direct_le_ih (ih)) 1070 /* change key of item */
1005 set_le_ih_k_offset (ih, le_ih_k_offset (ih) + cut_size); 1071 if (is_direct_le_ih(ih))
1006 else { 1072 set_le_ih_k_offset(ih,
1007 set_le_ih_k_offset (ih, le_ih_k_offset (ih) + (cut_size / UNFM_P_SIZE) * bh->b_size); 1073 le_ih_k_offset(ih) +
1008 RFALSE( ih_item_len(ih) == cut_size && get_ih_free_space (ih), 1074 cut_size);
1009 "10205: invalid ih_free_space (%h)", ih); 1075 else {
1010 } 1076 set_le_ih_k_offset(ih,
1011 } 1077 le_ih_k_offset(ih) +
1012 } 1078 (cut_size / UNFM_P_SIZE) *
1013 1079 bh->b_size);
1014 1080 RFALSE(ih_item_len(ih) == cut_size
1015 /* location of the last item */ 1081 && get_ih_free_space(ih),
1016 last_loc = ih_location( &(ih[nr - cut_item_num - 1]) ); 1082 "10205: invalid ih_free_space (%h)", ih);
1017 1083 }
1018 /* location of the item, which is remaining at the same place */ 1084 }
1019 unmoved_loc = cut_item_num ? ih_location(ih-1) : bh->b_size; 1085 }
1020 1086
1021 1087 /* location of the last item */
1022 /* shift */ 1088 last_loc = ih_location(&(ih[nr - cut_item_num - 1]));
1023 memmove (bh->b_data + last_loc + cut_size, bh->b_data + last_loc, 1089
1024 unmoved_loc - last_loc - cut_size); 1090 /* location of the item, which is remaining at the same place */
1025 1091 unmoved_loc = cut_item_num ? ih_location(ih - 1) : bh->b_size;
1026 /* change item length */ 1092
1027 put_ih_item_len( ih, ih_item_len(ih) - cut_size ); 1093 /* shift */
1028 1094 memmove(bh->b_data + last_loc + cut_size, bh->b_data + last_loc,
1029 if (is_indirect_le_ih (ih)) { 1095 unmoved_loc - last_loc - cut_size);
1030 if (pos_in_item) 1096
1031 set_ih_free_space (ih, 0); 1097 /* change item length */
1032 } 1098 put_ih_item_len(ih, ih_item_len(ih) - cut_size);
1033
1034 /* change locations */
1035 for (i = cut_item_num; i < nr; i ++)
1036 put_ih_location( &(ih[i-cut_item_num]), ih_location( &ih[i-cut_item_num]) + cut_size );
1037
1038 /* size, free space */
1039 set_blkh_free_space( blkh, blkh_free_space(blkh) + cut_size );
1040
1041 do_balance_mark_leaf_dirty (bi->tb, bh, 0);
1042
1043 if (bi->bi_parent) {
1044 struct disk_child *t_dc;
1045 t_dc = B_N_CHILD (bi->bi_parent, bi->bi_position);
1046 put_dc_size( t_dc, dc_size(t_dc) - cut_size );
1047 do_balance_mark_internal_dirty (bi->tb, bi->bi_parent, 0);
1048 }
1049}
1050 1099
1100 if (is_indirect_le_ih(ih)) {
1101 if (pos_in_item)
1102 set_ih_free_space(ih, 0);
1103 }
1104
1105 /* change locations */
1106 for (i = cut_item_num; i < nr; i++)
1107 put_ih_location(&(ih[i - cut_item_num]),
1108 ih_location(&ih[i - cut_item_num]) + cut_size);
1109
1110 /* size, free space */
1111 set_blkh_free_space(blkh, blkh_free_space(blkh) + cut_size);
1112
1113 do_balance_mark_leaf_dirty(bi->tb, bh, 0);
1114
1115 if (bi->bi_parent) {
1116 struct disk_child *t_dc;
1117 t_dc = B_N_CHILD(bi->bi_parent, bi->bi_position);
1118 put_dc_size(t_dc, dc_size(t_dc) - cut_size);
1119 do_balance_mark_internal_dirty(bi->tb, bi->bi_parent, 0);
1120 }
1121}
1051 1122
1052/* delete del_num items from buffer starting from the first'th item */ 1123/* delete del_num items from buffer starting from the first'th item */
1053static void leaf_delete_items_entirely (struct buffer_info * bi, 1124static void leaf_delete_items_entirely(struct buffer_info *bi,
1054 int first, int del_num) 1125 int first, int del_num)
1055{ 1126{
1056 struct buffer_head * bh = bi->bi_bh; 1127 struct buffer_head *bh = bi->bi_bh;
1057 int nr; 1128 int nr;
1058 int i, j; 1129 int i, j;
1059 int last_loc, last_removed_loc; 1130 int last_loc, last_removed_loc;
1060 struct block_head * blkh; 1131 struct block_head *blkh;
1061 struct item_head * ih; 1132 struct item_head *ih;
1062 1133
1063 RFALSE( bh == NULL, "10210: buffer is 0"); 1134 RFALSE(bh == NULL, "10210: buffer is 0");
1064 RFALSE( del_num < 0, "10215: del_num less than 0 (%d)", del_num); 1135 RFALSE(del_num < 0, "10215: del_num less than 0 (%d)", del_num);
1065 1136
1066 if (del_num == 0) 1137 if (del_num == 0)
1067 return; 1138 return;
1068 1139
1069 blkh = B_BLK_HEAD(bh); 1140 blkh = B_BLK_HEAD(bh);
1070 nr = blkh_nr_item(blkh); 1141 nr = blkh_nr_item(blkh);
1071
1072 RFALSE( first < 0 || first + del_num > nr,
1073 "10220: first=%d, number=%d, there is %d items", first, del_num, nr);
1074
1075 if (first == 0 && del_num == nr) {
1076 /* this does not work */
1077 make_empty_node (bi);
1078
1079 do_balance_mark_leaf_dirty (bi->tb, bh, 0);
1080 return;
1081 }
1082
1083 ih = B_N_PITEM_HEAD (bh, first);
1084
1085 /* location of unmovable item */
1086 j = (first == 0) ? bh->b_size : ih_location(ih-1);
1087
1088 /* delete items */
1089 last_loc = ih_location( &(ih[nr-1-first]) );
1090 last_removed_loc = ih_location( &(ih[del_num-1]) );
1091
1092 memmove (bh->b_data + last_loc + j - last_removed_loc,
1093 bh->b_data + last_loc, last_removed_loc - last_loc);
1094
1095 /* delete item headers */
1096 memmove (ih, ih + del_num, (nr - first - del_num) * IH_SIZE);
1097
1098 /* change item location */
1099 for (i = first; i < nr - del_num; i ++)
1100 put_ih_location( &(ih[i-first]), ih_location( &(ih[i-first]) ) + (j - last_removed_loc) );
1101
1102 /* sizes, item number */
1103 set_blkh_nr_item( blkh, blkh_nr_item(blkh) - del_num );
1104 set_blkh_free_space( blkh, blkh_free_space(blkh) + (j - last_removed_loc + IH_SIZE * del_num) );
1105
1106 do_balance_mark_leaf_dirty (bi->tb, bh, 0);
1107
1108 if (bi->bi_parent) {
1109 struct disk_child *t_dc = B_N_CHILD (bi->bi_parent, bi->bi_position);
1110 put_dc_size( t_dc, dc_size(t_dc) -
1111 (j - last_removed_loc + IH_SIZE * del_num));
1112 do_balance_mark_internal_dirty (bi->tb, bi->bi_parent, 0);
1113 }
1114}
1115 1142
1143 RFALSE(first < 0 || first + del_num > nr,
1144 "10220: first=%d, number=%d, there is %d items", first, del_num,
1145 nr);
1146
1147 if (first == 0 && del_num == nr) {
1148 /* this does not work */
1149 make_empty_node(bi);
1150
1151 do_balance_mark_leaf_dirty(bi->tb, bh, 0);
1152 return;
1153 }
1116 1154
1155 ih = B_N_PITEM_HEAD(bh, first);
1117 1156
1157 /* location of unmovable item */
1158 j = (first == 0) ? bh->b_size : ih_location(ih - 1);
1118 1159
1160 /* delete items */
1161 last_loc = ih_location(&(ih[nr - 1 - first]));
1162 last_removed_loc = ih_location(&(ih[del_num - 1]));
1163
1164 memmove(bh->b_data + last_loc + j - last_removed_loc,
1165 bh->b_data + last_loc, last_removed_loc - last_loc);
1166
1167 /* delete item headers */
1168 memmove(ih, ih + del_num, (nr - first - del_num) * IH_SIZE);
1169
1170 /* change item location */
1171 for (i = first; i < nr - del_num; i++)
1172 put_ih_location(&(ih[i - first]),
1173 ih_location(&(ih[i - first])) + (j -
1174 last_removed_loc));
1175
1176 /* sizes, item number */
1177 set_blkh_nr_item(blkh, blkh_nr_item(blkh) - del_num);
1178 set_blkh_free_space(blkh,
1179 blkh_free_space(blkh) + (j - last_removed_loc +
1180 IH_SIZE * del_num));
1181
1182 do_balance_mark_leaf_dirty(bi->tb, bh, 0);
1183
1184 if (bi->bi_parent) {
1185 struct disk_child *t_dc =
1186 B_N_CHILD(bi->bi_parent, bi->bi_position);
1187 put_dc_size(t_dc,
1188 dc_size(t_dc) - (j - last_removed_loc +
1189 IH_SIZE * del_num));
1190 do_balance_mark_internal_dirty(bi->tb, bi->bi_parent, 0);
1191 }
1192}
1119 1193
1120/* paste new_entry_count entries (new_dehs, records) into position before to item_num-th item */ 1194/* paste new_entry_count entries (new_dehs, records) into position before to item_num-th item */
1121void leaf_paste_entries ( 1195void leaf_paste_entries(struct buffer_head *bh,
1122 struct buffer_head * bh,
1123 int item_num, 1196 int item_num,
1124 int before, 1197 int before,
1125 int new_entry_count, 1198 int new_entry_count,
1126 struct reiserfs_de_head * new_dehs, 1199 struct reiserfs_de_head *new_dehs,
1127 const char * records, 1200 const char *records, int paste_size)
1128 int paste_size
1129 )
1130{ 1201{
1131 struct item_head * ih; 1202 struct item_head *ih;
1132 char * item; 1203 char *item;
1133 struct reiserfs_de_head * deh; 1204 struct reiserfs_de_head *deh;
1134 char * insert_point; 1205 char *insert_point;
1135 int i, old_entry_num; 1206 int i, old_entry_num;
1136 1207
1137 if (new_entry_count == 0) 1208 if (new_entry_count == 0)
1138 return; 1209 return;
1139 1210
1140 ih = B_N_PITEM_HEAD(bh, item_num); 1211 ih = B_N_PITEM_HEAD(bh, item_num);
1141 1212
1142 /* make sure, that item is directory, and there are enough records in it */ 1213 /* make sure, that item is directory, and there are enough records in it */
1143 RFALSE( !is_direntry_le_ih (ih), "10225: item is not directory item"); 1214 RFALSE(!is_direntry_le_ih(ih), "10225: item is not directory item");
1144 RFALSE( I_ENTRY_COUNT (ih) < before, 1215 RFALSE(I_ENTRY_COUNT(ih) < before,
1145 "10230: there are no entry we paste entries before. entry_count = %d, before = %d", 1216 "10230: there are no entry we paste entries before. entry_count = %d, before = %d",
1146 I_ENTRY_COUNT (ih), before); 1217 I_ENTRY_COUNT(ih), before);
1147 1218
1148 1219 /* first byte of dest item */
1149 /* first byte of dest item */ 1220 item = bh->b_data + ih_location(ih);
1150 item = bh->b_data + ih_location(ih); 1221
1151 1222 /* entry head array */
1152 /* entry head array */ 1223 deh = B_I_DEH(bh, ih);
1153 deh = B_I_DEH (bh, ih); 1224
1154 1225 /* new records will be pasted at this point */
1155 /* new records will be pasted at this point */ 1226 insert_point =
1156 insert_point = item + (before ? deh_location( &(deh[before - 1])) : (ih_item_len(ih) - paste_size)); 1227 item +
1157 1228 (before ? deh_location(&(deh[before - 1]))
1158 /* adjust locations of records that will be AFTER new records */ 1229 : (ih_item_len(ih) - paste_size));
1159 for (i = I_ENTRY_COUNT(ih) - 1; i >= before; i --) 1230
1160 put_deh_location( &(deh[i]), 1231 /* adjust locations of records that will be AFTER new records */
1161 deh_location(&(deh[i])) + (DEH_SIZE * new_entry_count )); 1232 for (i = I_ENTRY_COUNT(ih) - 1; i >= before; i--)
1162 1233 put_deh_location(&(deh[i]),
1163 /* adjust locations of records that will be BEFORE new records */ 1234 deh_location(&(deh[i])) +
1164 for (i = 0; i < before; i ++) 1235 (DEH_SIZE * new_entry_count));
1165 put_deh_location( &(deh[i]), deh_location(&(deh[i])) + paste_size ); 1236
1166 1237 /* adjust locations of records that will be BEFORE new records */
1167 old_entry_num = I_ENTRY_COUNT(ih); 1238 for (i = 0; i < before; i++)
1168 put_ih_entry_count( ih, ih_entry_count(ih) + new_entry_count ); 1239 put_deh_location(&(deh[i]),
1169 1240 deh_location(&(deh[i])) + paste_size);
1170 /* prepare space for pasted records */ 1241
1171 memmove (insert_point + paste_size, insert_point, item + (ih_item_len(ih) - paste_size) - insert_point); 1242 old_entry_num = I_ENTRY_COUNT(ih);
1172 1243 put_ih_entry_count(ih, ih_entry_count(ih) + new_entry_count);
1173 /* copy new records */ 1244
1174 memcpy (insert_point + DEH_SIZE * new_entry_count, records, 1245 /* prepare space for pasted records */
1175 paste_size - DEH_SIZE * new_entry_count); 1246 memmove(insert_point + paste_size, insert_point,
1176 1247 item + (ih_item_len(ih) - paste_size) - insert_point);
1177 /* prepare space for new entry heads */ 1248
1178 deh += before; 1249 /* copy new records */
1179 memmove ((char *)(deh + new_entry_count), deh, insert_point - (char *)deh); 1250 memcpy(insert_point + DEH_SIZE * new_entry_count, records,
1180 1251 paste_size - DEH_SIZE * new_entry_count);
1181 /* copy new entry heads */ 1252
1182 deh = (struct reiserfs_de_head *)((char *)deh); 1253 /* prepare space for new entry heads */
1183 memcpy (deh, new_dehs, DEH_SIZE * new_entry_count); 1254 deh += before;
1184 1255 memmove((char *)(deh + new_entry_count), deh,
1185 /* set locations of new records */ 1256 insert_point - (char *)deh);
1186 for (i = 0; i < new_entry_count; i ++) 1257
1187 { 1258 /* copy new entry heads */
1188 put_deh_location( &(deh[i]), 1259 deh = (struct reiserfs_de_head *)((char *)deh);
1189 deh_location( &(deh[i] )) + 1260 memcpy(deh, new_dehs, DEH_SIZE * new_entry_count);
1190 (- deh_location( &(new_dehs[new_entry_count - 1])) + 1261
1191 insert_point + DEH_SIZE * new_entry_count - item)); 1262 /* set locations of new records */
1192 } 1263 for (i = 0; i < new_entry_count; i++) {
1193 1264 put_deh_location(&(deh[i]),
1194 1265 deh_location(&(deh[i])) +
1195 /* change item key if necessary (when we paste before 0-th entry */ 1266 (-deh_location
1196 if (!before) 1267 (&(new_dehs[new_entry_count - 1])) +
1197 { 1268 insert_point + DEH_SIZE * new_entry_count -
1198 set_le_ih_k_offset (ih, deh_offset(new_dehs)); 1269 item));
1270 }
1271
1272 /* change item key if necessary (when we paste before 0-th entry */
1273 if (!before) {
1274 set_le_ih_k_offset(ih, deh_offset(new_dehs));
1199/* memcpy (&ih->ih_key.k_offset, 1275/* memcpy (&ih->ih_key.k_offset,
1200 &new_dehs->deh_offset, SHORT_KEY_SIZE);*/ 1276 &new_dehs->deh_offset, SHORT_KEY_SIZE);*/
1201 } 1277 }
1202
1203#ifdef CONFIG_REISERFS_CHECK 1278#ifdef CONFIG_REISERFS_CHECK
1204 { 1279 {
1205 int prev, next; 1280 int prev, next;
1206 /* check record locations */ 1281 /* check record locations */
1207 deh = B_I_DEH (bh, ih); 1282 deh = B_I_DEH(bh, ih);
1208 for (i = 0; i < I_ENTRY_COUNT(ih); i ++) { 1283 for (i = 0; i < I_ENTRY_COUNT(ih); i++) {
1209 next = (i < I_ENTRY_COUNT(ih) - 1) ? deh_location( &(deh[i + 1])) : 0; 1284 next =
1210 prev = (i != 0) ? deh_location( &(deh[i - 1]) ) : 0; 1285 (i <
1211 1286 I_ENTRY_COUNT(ih) -
1212 if (prev && prev <= deh_location( &(deh[i]))) 1287 1) ? deh_location(&(deh[i + 1])) : 0;
1213 reiserfs_warning (NULL, "vs-10240: leaf_paste_entries: directory item (%h) corrupted (prev %a, cur(%d) %a)", 1288 prev = (i != 0) ? deh_location(&(deh[i - 1])) : 0;
1214 ih, deh + i - 1, i, deh + i); 1289
1215 if (next && next >= deh_location( &(deh[i]))) 1290 if (prev && prev <= deh_location(&(deh[i])))
1216 reiserfs_warning (NULL, "vs-10250: leaf_paste_entries: directory item (%h) corrupted (cur(%d) %a, next %a)", 1291 reiserfs_warning(NULL,
1217 ih, i, deh + i, deh + i + 1); 1292 "vs-10240: leaf_paste_entries: directory item (%h) corrupted (prev %a, cur(%d) %a)",
1218 } 1293 ih, deh + i - 1, i, deh + i);
1219 } 1294 if (next && next >= deh_location(&(deh[i])))
1295 reiserfs_warning(NULL,
1296 "vs-10250: leaf_paste_entries: directory item (%h) corrupted (cur(%d) %a, next %a)",
1297 ih, i, deh + i, deh + i + 1);
1298 }
1299 }
1220#endif 1300#endif
1221 1301
1222} 1302}
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 7d4dc5f5aa8b..3549067c42d9 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -25,86 +25,85 @@
25 25
26// directory item contains array of entry headers. This performs 26// directory item contains array of entry headers. This performs
27// binary search through that array 27// binary search through that array
28static int bin_search_in_dir_item (struct reiserfs_dir_entry * de, loff_t off) 28static int bin_search_in_dir_item(struct reiserfs_dir_entry *de, loff_t off)
29{ 29{
30 struct item_head * ih = de->de_ih; 30 struct item_head *ih = de->de_ih;
31 struct reiserfs_de_head * deh = de->de_deh; 31 struct reiserfs_de_head *deh = de->de_deh;
32 int rbound, lbound, j; 32 int rbound, lbound, j;
33 33
34 lbound = 0; 34 lbound = 0;
35 rbound = I_ENTRY_COUNT (ih) - 1; 35 rbound = I_ENTRY_COUNT(ih) - 1;
36 36
37 for (j = (rbound + lbound) / 2; lbound <= rbound; j = (rbound + lbound) / 2) { 37 for (j = (rbound + lbound) / 2; lbound <= rbound;
38 if (off < deh_offset (deh + j)) { 38 j = (rbound + lbound) / 2) {
39 rbound = j - 1; 39 if (off < deh_offset(deh + j)) {
40 continue; 40 rbound = j - 1;
41 continue;
42 }
43 if (off > deh_offset(deh + j)) {
44 lbound = j + 1;
45 continue;
46 }
47 // this is not name found, but matched third key component
48 de->de_entry_num = j;
49 return NAME_FOUND;
41 } 50 }
42 if (off > deh_offset (deh + j)) {
43 lbound = j + 1;
44 continue;
45 }
46 // this is not name found, but matched third key component
47 de->de_entry_num = j;
48 return NAME_FOUND;
49 }
50 51
51 de->de_entry_num = lbound; 52 de->de_entry_num = lbound;
52 return NAME_NOT_FOUND; 53 return NAME_NOT_FOUND;
53} 54}
54 55
55
56// comment? maybe something like set de to point to what the path points to? 56// comment? maybe something like set de to point to what the path points to?
57static inline void set_de_item_location (struct reiserfs_dir_entry * de, struct path * path) 57static inline void set_de_item_location(struct reiserfs_dir_entry *de,
58 struct path *path)
58{ 59{
59 de->de_bh = get_last_bh (path); 60 de->de_bh = get_last_bh(path);
60 de->de_ih = get_ih (path); 61 de->de_ih = get_ih(path);
61 de->de_deh = B_I_DEH (de->de_bh, de->de_ih); 62 de->de_deh = B_I_DEH(de->de_bh, de->de_ih);
62 de->de_item_num = PATH_LAST_POSITION (path); 63 de->de_item_num = PATH_LAST_POSITION(path);
63} 64}
64
65 65
66// de_bh, de_ih, de_deh (points to first element of array), de_item_num is set 66// de_bh, de_ih, de_deh (points to first element of array), de_item_num is set
67inline void set_de_name_and_namelen (struct reiserfs_dir_entry * de) 67inline void set_de_name_and_namelen(struct reiserfs_dir_entry *de)
68{ 68{
69 struct reiserfs_de_head * deh = de->de_deh + de->de_entry_num; 69 struct reiserfs_de_head *deh = de->de_deh + de->de_entry_num;
70 70
71 if (de->de_entry_num >= ih_entry_count (de->de_ih)) 71 if (de->de_entry_num >= ih_entry_count(de->de_ih))
72 BUG (); 72 BUG();
73 73
74 de->de_entrylen = entry_length (de->de_bh, de->de_ih, de->de_entry_num); 74 de->de_entrylen = entry_length(de->de_bh, de->de_ih, de->de_entry_num);
75 de->de_namelen = de->de_entrylen - (de_with_sd (deh) ? SD_SIZE : 0); 75 de->de_namelen = de->de_entrylen - (de_with_sd(deh) ? SD_SIZE : 0);
76 de->de_name = B_I_PITEM (de->de_bh, de->de_ih) + deh_location(deh); 76 de->de_name = B_I_PITEM(de->de_bh, de->de_ih) + deh_location(deh);
77 if (de->de_name[de->de_namelen - 1] == 0) 77 if (de->de_name[de->de_namelen - 1] == 0)
78 de->de_namelen = strlen (de->de_name); 78 de->de_namelen = strlen(de->de_name);
79} 79}
80 80
81
82// what entry points to 81// what entry points to
83static inline void set_de_object_key (struct reiserfs_dir_entry * de) 82static inline void set_de_object_key(struct reiserfs_dir_entry *de)
84{ 83{
85 if (de->de_entry_num >= ih_entry_count (de->de_ih)) 84 if (de->de_entry_num >= ih_entry_count(de->de_ih))
86 BUG (); 85 BUG();
87 de->de_dir_id = deh_dir_id( &(de->de_deh[de->de_entry_num])); 86 de->de_dir_id = deh_dir_id(&(de->de_deh[de->de_entry_num]));
88 de->de_objectid = deh_objectid( &(de->de_deh[de->de_entry_num])); 87 de->de_objectid = deh_objectid(&(de->de_deh[de->de_entry_num]));
89} 88}
90 89
91 90static inline void store_de_entry_key(struct reiserfs_dir_entry *de)
92static inline void store_de_entry_key (struct reiserfs_dir_entry * de)
93{ 91{
94 struct reiserfs_de_head * deh = de->de_deh + de->de_entry_num; 92 struct reiserfs_de_head *deh = de->de_deh + de->de_entry_num;
95 93
96 if (de->de_entry_num >= ih_entry_count (de->de_ih)) 94 if (de->de_entry_num >= ih_entry_count(de->de_ih))
97 BUG (); 95 BUG();
98 96
99 /* store key of the found entry */ 97 /* store key of the found entry */
100 de->de_entry_key.version = KEY_FORMAT_3_5; 98 de->de_entry_key.version = KEY_FORMAT_3_5;
101 de->de_entry_key.on_disk_key.k_dir_id = le32_to_cpu (de->de_ih->ih_key.k_dir_id); 99 de->de_entry_key.on_disk_key.k_dir_id =
102 de->de_entry_key.on_disk_key.k_objectid = le32_to_cpu (de->de_ih->ih_key.k_objectid); 100 le32_to_cpu(de->de_ih->ih_key.k_dir_id);
103 set_cpu_key_k_offset (&(de->de_entry_key), deh_offset (deh)); 101 de->de_entry_key.on_disk_key.k_objectid =
104 set_cpu_key_k_type (&(de->de_entry_key), TYPE_DIRENTRY); 102 le32_to_cpu(de->de_ih->ih_key.k_objectid);
103 set_cpu_key_k_offset(&(de->de_entry_key), deh_offset(deh));
104 set_cpu_key_k_type(&(de->de_entry_key), TYPE_DIRENTRY);
105} 105}
106 106
107
108/* We assign a key to each directory item, and place multiple entries 107/* We assign a key to each directory item, and place multiple entries
109in a single directory item. A directory item has a key equal to the 108in a single directory item. A directory item has a key equal to the
110key of the first directory entry in it. 109key of the first directory entry in it.
@@ -117,58 +116,60 @@ entry position in the item
117*/ 116*/
118 117
119/* The function is NOT SCHEDULE-SAFE! */ 118/* The function is NOT SCHEDULE-SAFE! */
120int search_by_entry_key (struct super_block * sb, const struct cpu_key * key, 119int search_by_entry_key(struct super_block *sb, const struct cpu_key *key,
121 struct path * path, struct reiserfs_dir_entry * de) 120 struct path *path, struct reiserfs_dir_entry *de)
122{ 121{
123 int retval; 122 int retval;
124 123
125 retval = search_item (sb, key, path); 124 retval = search_item(sb, key, path);
126 switch (retval) { 125 switch (retval) {
127 case ITEM_NOT_FOUND: 126 case ITEM_NOT_FOUND:
128 if (!PATH_LAST_POSITION (path)) { 127 if (!PATH_LAST_POSITION(path)) {
129 reiserfs_warning (sb, "vs-7000: search_by_entry_key: search_by_key returned item position == 0"); 128 reiserfs_warning(sb,
130 pathrelse(path) ; 129 "vs-7000: search_by_entry_key: search_by_key returned item position == 0");
131 return IO_ERROR ; 130 pathrelse(path);
131 return IO_ERROR;
132 }
133 PATH_LAST_POSITION(path)--;
134
135 case ITEM_FOUND:
136 break;
137
138 case IO_ERROR:
139 return retval;
140
141 default:
142 pathrelse(path);
143 reiserfs_warning(sb,
144 "vs-7002: search_by_entry_key: no path to here");
145 return IO_ERROR;
132 } 146 }
133 PATH_LAST_POSITION (path) --;
134
135 case ITEM_FOUND:
136 break;
137
138 case IO_ERROR:
139 return retval;
140
141 default:
142 pathrelse (path);
143 reiserfs_warning (sb, "vs-7002: search_by_entry_key: no path to here");
144 return IO_ERROR;
145 }
146 147
147 set_de_item_location (de, path); 148 set_de_item_location(de, path);
148 149
149#ifdef CONFIG_REISERFS_CHECK 150#ifdef CONFIG_REISERFS_CHECK
150 if (!is_direntry_le_ih (de->de_ih) || 151 if (!is_direntry_le_ih(de->de_ih) ||
151 COMP_SHORT_KEYS (&(de->de_ih->ih_key), key)) { 152 COMP_SHORT_KEYS(&(de->de_ih->ih_key), key)) {
152 print_block (de->de_bh, 0, -1, -1); 153 print_block(de->de_bh, 0, -1, -1);
153 reiserfs_panic (sb, "vs-7005: search_by_entry_key: found item %h is not directory item or " 154 reiserfs_panic(sb,
154 "does not belong to the same directory as key %K", de->de_ih, key); 155 "vs-7005: search_by_entry_key: found item %h is not directory item or "
155 } 156 "does not belong to the same directory as key %K",
156#endif /* CONFIG_REISERFS_CHECK */ 157 de->de_ih, key);
157 158 }
158 /* binary search in directory item by third componen t of the 159#endif /* CONFIG_REISERFS_CHECK */
159 key. sets de->de_entry_num of de */ 160
160 retval = bin_search_in_dir_item (de, cpu_key_k_offset (key)); 161 /* binary search in directory item by third componen t of the
161 path->pos_in_item = de->de_entry_num; 162 key. sets de->de_entry_num of de */
162 if (retval != NAME_NOT_FOUND) { 163 retval = bin_search_in_dir_item(de, cpu_key_k_offset(key));
163 // ugly, but rename needs de_bh, de_deh, de_name, de_namelen, de_objectid set 164 path->pos_in_item = de->de_entry_num;
164 set_de_name_and_namelen (de); 165 if (retval != NAME_NOT_FOUND) {
165 set_de_object_key (de); 166 // ugly, but rename needs de_bh, de_deh, de_name, de_namelen, de_objectid set
166 } 167 set_de_name_and_namelen(de);
167 return retval; 168 set_de_object_key(de);
169 }
170 return retval;
168} 171}
169 172
170
171
172/* Keyed 32-bit hash function using TEA in a Davis-Meyer function */ 173/* Keyed 32-bit hash function using TEA in a Davis-Meyer function */
173 174
174/* The third component is hashed, and you can choose from more than 175/* The third component is hashed, and you can choose from more than
@@ -176,197 +177,210 @@ int search_by_entry_key (struct super_block * sb, const struct cpu_key * key,
176 but are thought about. This function should be moved to hashes.c 177 but are thought about. This function should be moved to hashes.c
177 Jedi, please do so. -Hans */ 178 Jedi, please do so. -Hans */
178 179
179static __u32 get_third_component (struct super_block * s, 180static __u32 get_third_component(struct super_block *s,
180 const char * name, int len) 181 const char *name, int len)
181{ 182{
182 __u32 res; 183 __u32 res;
183 184
184 if (!len || (len == 1 && name[0] == '.')) 185 if (!len || (len == 1 && name[0] == '.'))
185 return DOT_OFFSET; 186 return DOT_OFFSET;
186 if (len == 2 && name[0] == '.' && name[1] == '.') 187 if (len == 2 && name[0] == '.' && name[1] == '.')
187 return DOT_DOT_OFFSET; 188 return DOT_DOT_OFFSET;
188 189
189 res = REISERFS_SB(s)->s_hash_function (name, len); 190 res = REISERFS_SB(s)->s_hash_function(name, len);
190 191
191 // take bits from 7-th to 30-th including both bounds 192 // take bits from 7-th to 30-th including both bounds
192 res = GET_HASH_VALUE(res); 193 res = GET_HASH_VALUE(res);
193 if (res == 0) 194 if (res == 0)
194 // needed to have no names before "." and ".." those have hash 195 // needed to have no names before "." and ".." those have hash
195 // value == 0 and generation conters 1 and 2 accordingly 196 // value == 0 and generation conters 1 and 2 accordingly
196 res = 128; 197 res = 128;
197 return res + MAX_GENERATION_NUMBER; 198 return res + MAX_GENERATION_NUMBER;
198} 199}
199 200
200 201static int reiserfs_match(struct reiserfs_dir_entry *de,
201static int reiserfs_match (struct reiserfs_dir_entry * de, 202 const char *name, int namelen)
202 const char * name, int namelen)
203{ 203{
204 int retval = NAME_NOT_FOUND; 204 int retval = NAME_NOT_FOUND;
205 205
206 if ((namelen == de->de_namelen) && 206 if ((namelen == de->de_namelen) &&
207 !memcmp(de->de_name, name, de->de_namelen)) 207 !memcmp(de->de_name, name, de->de_namelen))
208 retval = (de_visible (de->de_deh + de->de_entry_num) ? NAME_FOUND : NAME_FOUND_INVISIBLE); 208 retval =
209 (de_visible(de->de_deh + de->de_entry_num) ? NAME_FOUND :
210 NAME_FOUND_INVISIBLE);
209 211
210 return retval; 212 return retval;
211} 213}
212 214
213
214/* de's de_bh, de_ih, de_deh, de_item_num, de_entry_num are set already */ 215/* de's de_bh, de_ih, de_deh, de_item_num, de_entry_num are set already */
215 216
216 /* used when hash collisions exist */ 217 /* used when hash collisions exist */
217 218
218 219static int linear_search_in_dir_item(struct cpu_key *key,
219static int linear_search_in_dir_item (struct cpu_key * key, struct reiserfs_dir_entry * de, 220 struct reiserfs_dir_entry *de,
220 const char * name, int namelen) 221 const char *name, int namelen)
221{ 222{
222 struct reiserfs_de_head * deh = de->de_deh; 223 struct reiserfs_de_head *deh = de->de_deh;
223 int retval; 224 int retval;
224 int i; 225 int i;
225 226
226 i = de->de_entry_num; 227 i = de->de_entry_num;
227 228
228 if (i == I_ENTRY_COUNT (de->de_ih) || 229 if (i == I_ENTRY_COUNT(de->de_ih) ||
229 GET_HASH_VALUE (deh_offset (deh + i)) != GET_HASH_VALUE (cpu_key_k_offset (key))) { 230 GET_HASH_VALUE(deh_offset(deh + i)) !=
230 i --; 231 GET_HASH_VALUE(cpu_key_k_offset(key))) {
231 } 232 i--;
233 }
232 234
233 RFALSE( de->de_deh != B_I_DEH (de->de_bh, de->de_ih), 235 RFALSE(de->de_deh != B_I_DEH(de->de_bh, de->de_ih),
234 "vs-7010: array of entry headers not found"); 236 "vs-7010: array of entry headers not found");
235 237
236 deh += i; 238 deh += i;
237 239
238 for (; i >= 0; i --, deh --) { 240 for (; i >= 0; i--, deh--) {
239 if (GET_HASH_VALUE (deh_offset (deh)) != 241 if (GET_HASH_VALUE(deh_offset(deh)) !=
240 GET_HASH_VALUE (cpu_key_k_offset (key))) { 242 GET_HASH_VALUE(cpu_key_k_offset(key))) {
241 // hash value does not match, no need to check whole name 243 // hash value does not match, no need to check whole name
242 return NAME_NOT_FOUND; 244 return NAME_NOT_FOUND;
243 } 245 }
244 246
245 /* mark, that this generation number is used */ 247 /* mark, that this generation number is used */
246 if (de->de_gen_number_bit_string) 248 if (de->de_gen_number_bit_string)
247 set_bit (GET_GENERATION_NUMBER (deh_offset (deh)), (unsigned long *)de->de_gen_number_bit_string); 249 set_bit(GET_GENERATION_NUMBER(deh_offset(deh)),
250 (unsigned long *)de->de_gen_number_bit_string);
248 251
249 // calculate pointer to name and namelen 252 // calculate pointer to name and namelen
250 de->de_entry_num = i; 253 de->de_entry_num = i;
251 set_de_name_and_namelen (de); 254 set_de_name_and_namelen(de);
252 255
253 if ((retval = reiserfs_match (de, name, namelen)) != NAME_NOT_FOUND) { 256 if ((retval =
254 // de's de_name, de_namelen, de_recordlen are set. Fill the rest: 257 reiserfs_match(de, name, namelen)) != NAME_NOT_FOUND) {
258 // de's de_name, de_namelen, de_recordlen are set. Fill the rest:
255 259
256 // key of pointed object 260 // key of pointed object
257 set_de_object_key (de); 261 set_de_object_key(de);
258 262
259 store_de_entry_key (de); 263 store_de_entry_key(de);
260 264
261 // retval can be NAME_FOUND or NAME_FOUND_INVISIBLE 265 // retval can be NAME_FOUND or NAME_FOUND_INVISIBLE
262 return retval; 266 return retval;
267 }
263 } 268 }
264 }
265
266 if (GET_GENERATION_NUMBER (le_ih_k_offset (de->de_ih)) == 0)
267 /* we have reached left most entry in the node. In common we
268 have to go to the left neighbor, but if generation counter
269 is 0 already, we know for sure, that there is no name with
270 the same hash value */
271 // FIXME: this work correctly only because hash value can not
272 // be 0. Btw, in case of Yura's hash it is probably possible,
273 // so, this is a bug
274 return NAME_NOT_FOUND;
275 269
276 RFALSE( de->de_item_num, 270 if (GET_GENERATION_NUMBER(le_ih_k_offset(de->de_ih)) == 0)
277 "vs-7015: two diritems of the same directory in one node?"); 271 /* we have reached left most entry in the node. In common we
272 have to go to the left neighbor, but if generation counter
273 is 0 already, we know for sure, that there is no name with
274 the same hash value */
275 // FIXME: this work correctly only because hash value can not
276 // be 0. Btw, in case of Yura's hash it is probably possible,
277 // so, this is a bug
278 return NAME_NOT_FOUND;
278 279
279 return GOTO_PREVIOUS_ITEM; 280 RFALSE(de->de_item_num,
280} 281 "vs-7015: two diritems of the same directory in one node?");
281 282
283 return GOTO_PREVIOUS_ITEM;
284}
282 285
283// may return NAME_FOUND, NAME_FOUND_INVISIBLE, NAME_NOT_FOUND 286// may return NAME_FOUND, NAME_FOUND_INVISIBLE, NAME_NOT_FOUND
284// FIXME: should add something like IOERROR 287// FIXME: should add something like IOERROR
285static int reiserfs_find_entry (struct inode * dir, const char * name, int namelen, 288static int reiserfs_find_entry(struct inode *dir, const char *name, int namelen,
286 struct path * path_to_entry, struct reiserfs_dir_entry * de) 289 struct path *path_to_entry,
290 struct reiserfs_dir_entry *de)
287{ 291{
288 struct cpu_key key_to_search; 292 struct cpu_key key_to_search;
289 int retval; 293 int retval;
290 294
291 295 if (namelen > REISERFS_MAX_NAME(dir->i_sb->s_blocksize))
292 if (namelen > REISERFS_MAX_NAME (dir->i_sb->s_blocksize)) 296 return NAME_NOT_FOUND;
293 return NAME_NOT_FOUND; 297
294 298 /* we will search for this key in the tree */
295 /* we will search for this key in the tree */ 299 make_cpu_key(&key_to_search, dir,
296 make_cpu_key (&key_to_search, dir, 300 get_third_component(dir->i_sb, name, namelen),
297 get_third_component (dir->i_sb, name, namelen), TYPE_DIRENTRY, 3); 301 TYPE_DIRENTRY, 3);
298 302
299 while (1) { 303 while (1) {
300 retval = search_by_entry_key (dir->i_sb, &key_to_search, path_to_entry, de); 304 retval =
301 if (retval == IO_ERROR) { 305 search_by_entry_key(dir->i_sb, &key_to_search,
302 reiserfs_warning (dir->i_sb, "zam-7001: io error in %s", 306 path_to_entry, de);
303 __FUNCTION__); 307 if (retval == IO_ERROR) {
304 return IO_ERROR; 308 reiserfs_warning(dir->i_sb, "zam-7001: io error in %s",
305 } 309 __FUNCTION__);
306 310 return IO_ERROR;
307 /* compare names for all entries having given hash value */ 311 }
308 retval = linear_search_in_dir_item (&key_to_search, de, name, namelen); 312
309 if (retval != GOTO_PREVIOUS_ITEM) { 313 /* compare names for all entries having given hash value */
310 /* there is no need to scan directory anymore. Given entry found or does not exist */ 314 retval =
311 path_to_entry->pos_in_item = de->de_entry_num; 315 linear_search_in_dir_item(&key_to_search, de, name,
312 return retval; 316 namelen);
313 } 317 if (retval != GOTO_PREVIOUS_ITEM) {
314 318 /* there is no need to scan directory anymore. Given entry found or does not exist */
315 /* there is left neighboring item of this directory and given entry can be there */ 319 path_to_entry->pos_in_item = de->de_entry_num;
316 set_cpu_key_k_offset (&key_to_search, le_ih_k_offset (de->de_ih) - 1); 320 return retval;
317 pathrelse (path_to_entry); 321 }
318 322
319 } /* while (1) */ 323 /* there is left neighboring item of this directory and given entry can be there */
324 set_cpu_key_k_offset(&key_to_search,
325 le_ih_k_offset(de->de_ih) - 1);
326 pathrelse(path_to_entry);
327
328 } /* while (1) */
320} 329}
321 330
322 331static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry,
323static struct dentry * reiserfs_lookup (struct inode * dir, struct dentry * dentry, struct nameidata *nd) 332 struct nameidata *nd)
324{ 333{
325 int retval; 334 int retval;
326 struct inode * inode = NULL; 335 struct inode *inode = NULL;
327 struct reiserfs_dir_entry de; 336 struct reiserfs_dir_entry de;
328 INITIALIZE_PATH (path_to_entry); 337 INITIALIZE_PATH(path_to_entry);
329 338
330 if (REISERFS_MAX_NAME (dir->i_sb->s_blocksize) < dentry->d_name.len) 339 if (REISERFS_MAX_NAME(dir->i_sb->s_blocksize) < dentry->d_name.len)
331 return ERR_PTR(-ENAMETOOLONG); 340 return ERR_PTR(-ENAMETOOLONG);
332 341
333 reiserfs_write_lock(dir->i_sb); 342 reiserfs_write_lock(dir->i_sb);
334 de.de_gen_number_bit_string = NULL; 343 de.de_gen_number_bit_string = NULL;
335 retval = reiserfs_find_entry (dir, dentry->d_name.name, dentry->d_name.len, &path_to_entry, &de); 344 retval =
336 pathrelse (&path_to_entry); 345 reiserfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len,
337 if (retval == NAME_FOUND) { 346 &path_to_entry, &de);
338 /* Hide the .reiserfs_priv directory */ 347 pathrelse(&path_to_entry);
339 if (reiserfs_xattrs (dir->i_sb) && 348 if (retval == NAME_FOUND) {
340 !old_format_only(dir->i_sb) && 349 /* Hide the .reiserfs_priv directory */
341 REISERFS_SB(dir->i_sb)->priv_root && 350 if (reiserfs_xattrs(dir->i_sb) &&
342 REISERFS_SB(dir->i_sb)->priv_root->d_inode && 351 !old_format_only(dir->i_sb) &&
343 de.de_objectid == le32_to_cpu (INODE_PKEY(REISERFS_SB(dir->i_sb)->priv_root->d_inode)->k_objectid)) { 352 REISERFS_SB(dir->i_sb)->priv_root &&
344 reiserfs_write_unlock (dir->i_sb); 353 REISERFS_SB(dir->i_sb)->priv_root->d_inode &&
345 return ERR_PTR (-EACCES); 354 de.de_objectid ==
355 le32_to_cpu(INODE_PKEY
356 (REISERFS_SB(dir->i_sb)->priv_root->d_inode)->
357 k_objectid)) {
358 reiserfs_write_unlock(dir->i_sb);
359 return ERR_PTR(-EACCES);
360 }
361
362 inode =
363 reiserfs_iget(dir->i_sb, (struct cpu_key *)&(de.de_dir_id));
364 if (!inode || IS_ERR(inode)) {
365 reiserfs_write_unlock(dir->i_sb);
366 return ERR_PTR(-EACCES);
367 }
368
369 /* Propogate the priv_object flag so we know we're in the priv tree */
370 if (is_reiserfs_priv_object(dir))
371 reiserfs_mark_inode_private(inode);
372 }
373 reiserfs_write_unlock(dir->i_sb);
374 if (retval == IO_ERROR) {
375 return ERR_PTR(-EIO);
346 } 376 }
347 377
348 inode = reiserfs_iget (dir->i_sb, (struct cpu_key *)&(de.de_dir_id)); 378 if (inode)
349 if (!inode || IS_ERR(inode)) { 379 return d_splice_alias(inode, dentry);
350 reiserfs_write_unlock(dir->i_sb);
351 return ERR_PTR(-EACCES);
352 }
353
354 /* Propogate the priv_object flag so we know we're in the priv tree */
355 if (is_reiserfs_priv_object (dir))
356 reiserfs_mark_inode_private (inode);
357 }
358 reiserfs_write_unlock(dir->i_sb);
359 if ( retval == IO_ERROR ) {
360 return ERR_PTR(-EIO);
361 }
362
363 if (inode)
364 return d_splice_alias(inode, dentry);
365
366 d_add(dentry, inode);
367 return NULL;
368}
369 380
381 d_add(dentry, inode);
382 return NULL;
383}
370 384
371/* 385/*
372** looks up the dentry of the parent directory for child. 386** looks up the dentry of the parent directory for child.
@@ -374,40 +388,38 @@ static struct dentry * reiserfs_lookup (struct inode * dir, struct dentry * dent
374*/ 388*/
375struct dentry *reiserfs_get_parent(struct dentry *child) 389struct dentry *reiserfs_get_parent(struct dentry *child)
376{ 390{
377 int retval; 391 int retval;
378 struct inode * inode = NULL; 392 struct inode *inode = NULL;
379 struct reiserfs_dir_entry de; 393 struct reiserfs_dir_entry de;
380 INITIALIZE_PATH (path_to_entry); 394 INITIALIZE_PATH(path_to_entry);
381 struct dentry *parent; 395 struct dentry *parent;
382 struct inode *dir = child->d_inode ; 396 struct inode *dir = child->d_inode;
383 397
384 398 if (dir->i_nlink == 0) {
385 if (dir->i_nlink == 0) { 399 return ERR_PTR(-ENOENT);
386 return ERR_PTR(-ENOENT); 400 }
387 } 401 de.de_gen_number_bit_string = NULL;
388 de.de_gen_number_bit_string = NULL; 402
389 403 reiserfs_write_lock(dir->i_sb);
390 reiserfs_write_lock(dir->i_sb); 404 retval = reiserfs_find_entry(dir, "..", 2, &path_to_entry, &de);
391 retval = reiserfs_find_entry (dir, "..", 2, &path_to_entry, &de); 405 pathrelse(&path_to_entry);
392 pathrelse (&path_to_entry); 406 if (retval != NAME_FOUND) {
393 if (retval != NAME_FOUND) { 407 reiserfs_write_unlock(dir->i_sb);
408 return ERR_PTR(-ENOENT);
409 }
410 inode = reiserfs_iget(dir->i_sb, (struct cpu_key *)&(de.de_dir_id));
394 reiserfs_write_unlock(dir->i_sb); 411 reiserfs_write_unlock(dir->i_sb);
395 return ERR_PTR(-ENOENT);
396 }
397 inode = reiserfs_iget (dir->i_sb, (struct cpu_key *)&(de.de_dir_id));
398 reiserfs_write_unlock(dir->i_sb);
399
400 if (!inode || IS_ERR(inode)) {
401 return ERR_PTR(-EACCES);
402 }
403 parent = d_alloc_anon(inode);
404 if (!parent) {
405 iput(inode);
406 parent = ERR_PTR(-ENOMEM);
407 }
408 return parent;
409}
410 412
413 if (!inode || IS_ERR(inode)) {
414 return ERR_PTR(-EACCES);
415 }
416 parent = d_alloc_anon(inode);
417 if (!parent) {
418 iput(inode);
419 parent = ERR_PTR(-ENOMEM);
420 }
421 return parent;
422}
411 423
412/* add entry to the directory (entry can be hidden). 424/* add entry to the directory (entry can be hidden).
413 425
@@ -415,132 +427,143 @@ insert definition of when hidden directories are used here -Hans
415 427
416 Does not mark dir inode dirty, do it after successesfull call to it */ 428 Does not mark dir inode dirty, do it after successesfull call to it */
417 429
418static int reiserfs_add_entry (struct reiserfs_transaction_handle *th, struct inode * dir, 430static int reiserfs_add_entry(struct reiserfs_transaction_handle *th,
419 const char * name, int namelen, struct inode * inode, 431 struct inode *dir, const char *name, int namelen,
420 int visible) 432 struct inode *inode, int visible)
421{ 433{
422 struct cpu_key entry_key; 434 struct cpu_key entry_key;
423 struct reiserfs_de_head * deh; 435 struct reiserfs_de_head *deh;
424 INITIALIZE_PATH (path); 436 INITIALIZE_PATH(path);
425 struct reiserfs_dir_entry de; 437 struct reiserfs_dir_entry de;
426 int bit_string [MAX_GENERATION_NUMBER / (sizeof(int) * 8) + 1]; 438 int bit_string[MAX_GENERATION_NUMBER / (sizeof(int) * 8) + 1];
427 int gen_number; 439 int gen_number;
428 char small_buf[32+DEH_SIZE] ; /* 48 bytes now and we avoid kmalloc 440 char small_buf[32 + DEH_SIZE]; /* 48 bytes now and we avoid kmalloc
429 if we create file with short name */ 441 if we create file with short name */
430 char * buffer; 442 char *buffer;
431 int buflen, paste_size; 443 int buflen, paste_size;
432 int retval; 444 int retval;
433 445
434 BUG_ON (!th->t_trans_id); 446 BUG_ON(!th->t_trans_id);
435 447
436 /* cannot allow items to be added into a busy deleted directory */ 448 /* cannot allow items to be added into a busy deleted directory */
437 if (!namelen) 449 if (!namelen)
438 return -EINVAL; 450 return -EINVAL;
439 451
440 if (namelen > REISERFS_MAX_NAME (dir->i_sb->s_blocksize)) 452 if (namelen > REISERFS_MAX_NAME(dir->i_sb->s_blocksize))
441 return -ENAMETOOLONG; 453 return -ENAMETOOLONG;
442 454
443 /* each entry has unique key. compose it */ 455 /* each entry has unique key. compose it */
444 make_cpu_key (&entry_key, dir, 456 make_cpu_key(&entry_key, dir,
445 get_third_component (dir->i_sb, name, namelen), TYPE_DIRENTRY, 3); 457 get_third_component(dir->i_sb, name, namelen),
446 458 TYPE_DIRENTRY, 3);
447 /* get memory for composing the entry */ 459
448 buflen = DEH_SIZE + ROUND_UP (namelen); 460 /* get memory for composing the entry */
449 if (buflen > sizeof (small_buf)) { 461 buflen = DEH_SIZE + ROUND_UP(namelen);
450 buffer = reiserfs_kmalloc (buflen, GFP_NOFS, dir->i_sb); 462 if (buflen > sizeof(small_buf)) {
451 if (buffer == 0) 463 buffer = reiserfs_kmalloc(buflen, GFP_NOFS, dir->i_sb);
452 return -ENOMEM; 464 if (buffer == 0)
453 } else 465 return -ENOMEM;
454 buffer = small_buf; 466 } else
455 467 buffer = small_buf;
456 paste_size = (get_inode_sd_version (dir) == STAT_DATA_V1) ? (DEH_SIZE + namelen) : buflen; 468
457 469 paste_size =
458 /* fill buffer : directory entry head, name[, dir objectid | , stat data | ,stat data, dir objectid ] */ 470 (get_inode_sd_version(dir) ==
459 deh = (struct reiserfs_de_head *)buffer; 471 STAT_DATA_V1) ? (DEH_SIZE + namelen) : buflen;
460 deh->deh_location = 0; /* JDM Endian safe if 0 */ 472
461 put_deh_offset( deh, cpu_key_k_offset( &entry_key ) ); 473 /* fill buffer : directory entry head, name[, dir objectid | , stat data | ,stat data, dir objectid ] */
462 deh->deh_state = 0; /* JDM Endian safe if 0 */ 474 deh = (struct reiserfs_de_head *)buffer;
463 /* put key (ino analog) to de */ 475 deh->deh_location = 0; /* JDM Endian safe if 0 */
464 deh->deh_dir_id = INODE_PKEY (inode)->k_dir_id; /* safe: k_dir_id is le */ 476 put_deh_offset(deh, cpu_key_k_offset(&entry_key));
465 deh->deh_objectid = INODE_PKEY (inode)->k_objectid; /* safe: k_objectid is le */ 477 deh->deh_state = 0; /* JDM Endian safe if 0 */
466 478 /* put key (ino analog) to de */
467 /* copy name */ 479 deh->deh_dir_id = INODE_PKEY(inode)->k_dir_id; /* safe: k_dir_id is le */
468 memcpy ((char *)(deh + 1), name, namelen); 480 deh->deh_objectid = INODE_PKEY(inode)->k_objectid; /* safe: k_objectid is le */
469 /* padd by 0s to the 4 byte boundary */ 481
470 padd_item ((char *)(deh + 1), ROUND_UP (namelen), namelen); 482 /* copy name */
471 483 memcpy((char *)(deh + 1), name, namelen);
472 /* entry is ready to be pasted into tree, set 'visibility' and 'stat data in entry' attributes */ 484 /* padd by 0s to the 4 byte boundary */
473 mark_de_without_sd (deh); 485 padd_item((char *)(deh + 1), ROUND_UP(namelen), namelen);
474 visible ? mark_de_visible (deh) : mark_de_hidden (deh); 486
475 487 /* entry is ready to be pasted into tree, set 'visibility' and 'stat data in entry' attributes */
476 /* find the proper place for the new entry */ 488 mark_de_without_sd(deh);
477 memset (bit_string, 0, sizeof (bit_string)); 489 visible ? mark_de_visible(deh) : mark_de_hidden(deh);
478 de.de_gen_number_bit_string = (char *)bit_string; 490
479 retval = reiserfs_find_entry (dir, name, namelen, &path, &de); 491 /* find the proper place for the new entry */
480 if( retval != NAME_NOT_FOUND ) { 492 memset(bit_string, 0, sizeof(bit_string));
481 if (buffer != small_buf) 493 de.de_gen_number_bit_string = (char *)bit_string;
482 reiserfs_kfree (buffer, buflen, dir->i_sb); 494 retval = reiserfs_find_entry(dir, name, namelen, &path, &de);
483 pathrelse (&path); 495 if (retval != NAME_NOT_FOUND) {
496 if (buffer != small_buf)
497 reiserfs_kfree(buffer, buflen, dir->i_sb);
498 pathrelse(&path);
499
500 if (retval == IO_ERROR) {
501 return -EIO;
502 }
503
504 if (retval != NAME_FOUND) {
505 reiserfs_warning(dir->i_sb,
506 "zam-7002:%s: \"reiserfs_find_entry\" "
507 "has returned unexpected value (%d)",
508 __FUNCTION__, retval);
509 }
510
511 return -EEXIST;
512 }
484 513
485 if ( retval == IO_ERROR ) { 514 gen_number =
486 return -EIO; 515 find_first_zero_bit((unsigned long *)bit_string,
516 MAX_GENERATION_NUMBER + 1);
517 if (gen_number > MAX_GENERATION_NUMBER) {
518 /* there is no free generation number */
519 reiserfs_warning(dir->i_sb,
520 "reiserfs_add_entry: Congratulations! we have got hash function screwed up");
521 if (buffer != small_buf)
522 reiserfs_kfree(buffer, buflen, dir->i_sb);
523 pathrelse(&path);
524 return -EBUSY;
525 }
526 /* adjust offset of directory enrty */
527 put_deh_offset(deh, SET_GENERATION_NUMBER(deh_offset(deh), gen_number));
528 set_cpu_key_k_offset(&entry_key, deh_offset(deh));
529
530 /* update max-hash-collisions counter in reiserfs_sb_info */
531 PROC_INFO_MAX(th->t_super, max_hash_collisions, gen_number);
532
533 if (gen_number != 0) { /* we need to re-search for the insertion point */
534 if (search_by_entry_key(dir->i_sb, &entry_key, &path, &de) !=
535 NAME_NOT_FOUND) {
536 reiserfs_warning(dir->i_sb,
537 "vs-7032: reiserfs_add_entry: "
538 "entry with this key (%K) already exists",
539 &entry_key);
540
541 if (buffer != small_buf)
542 reiserfs_kfree(buffer, buflen, dir->i_sb);
543 pathrelse(&path);
544 return -EBUSY;
545 }
487 } 546 }
488 547
489 if (retval != NAME_FOUND) { 548 /* perform the insertion of the entry that we have prepared */
490 reiserfs_warning (dir->i_sb, "zam-7002:%s: \"reiserfs_find_entry\" " 549 retval =
491 "has returned unexpected value (%d)", 550 reiserfs_paste_into_item(th, &path, &entry_key, dir, buffer,
492 __FUNCTION__, retval); 551 paste_size);
493 } 552 if (buffer != small_buf)
494 553 reiserfs_kfree(buffer, buflen, dir->i_sb);
495 return -EEXIST; 554 if (retval) {
496 } 555 reiserfs_check_path(&path);
497 556 return retval;
498 gen_number = find_first_zero_bit ((unsigned long *)bit_string, MAX_GENERATION_NUMBER + 1);
499 if (gen_number > MAX_GENERATION_NUMBER) {
500 /* there is no free generation number */
501 reiserfs_warning (dir->i_sb, "reiserfs_add_entry: Congratulations! we have got hash function screwed up");
502 if (buffer != small_buf)
503 reiserfs_kfree (buffer, buflen, dir->i_sb);
504 pathrelse (&path);
505 return -EBUSY;
506 }
507 /* adjust offset of directory enrty */
508 put_deh_offset(deh, SET_GENERATION_NUMBER(deh_offset(deh), gen_number));
509 set_cpu_key_k_offset (&entry_key, deh_offset(deh));
510
511 /* update max-hash-collisions counter in reiserfs_sb_info */
512 PROC_INFO_MAX( th -> t_super, max_hash_collisions, gen_number );
513
514 if (gen_number != 0) { /* we need to re-search for the insertion point */
515 if (search_by_entry_key (dir->i_sb, &entry_key, &path, &de) != NAME_NOT_FOUND) {
516 reiserfs_warning (dir->i_sb, "vs-7032: reiserfs_add_entry: "
517 "entry with this key (%K) already exists",
518 &entry_key);
519
520 if (buffer != small_buf)
521 reiserfs_kfree (buffer, buflen, dir->i_sb);
522 pathrelse (&path);
523 return -EBUSY;
524 } 557 }
525 }
526
527 /* perform the insertion of the entry that we have prepared */
528 retval = reiserfs_paste_into_item (th, &path, &entry_key, dir, buffer, paste_size);
529 if (buffer != small_buf)
530 reiserfs_kfree (buffer, buflen, dir->i_sb);
531 if (retval) {
532 reiserfs_check_path(&path) ;
533 return retval;
534 }
535 558
536 dir->i_size += paste_size; 559 dir->i_size += paste_size;
537 dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; 560 dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
538 if (!S_ISDIR (inode->i_mode) && visible) 561 if (!S_ISDIR(inode->i_mode) && visible)
539 // reiserfs_mkdir or reiserfs_rename will do that by itself 562 // reiserfs_mkdir or reiserfs_rename will do that by itself
540 reiserfs_update_sd (th, dir); 563 reiserfs_update_sd(th, dir);
541 564
542 reiserfs_check_path(&path) ; 565 reiserfs_check_path(&path);
543 return 0; 566 return 0;
544} 567}
545 568
546/* quota utility function, call if you've had to abort after calling 569/* quota utility function, call if you've had to abort after calling
@@ -548,12 +571,13 @@ static int reiserfs_add_entry (struct reiserfs_transaction_handle *th, struct in
548** This should only be called on inodes that do not have stat data 571** This should only be called on inodes that do not have stat data
549** inserted into the tree yet. 572** inserted into the tree yet.
550*/ 573*/
551static int drop_new_inode(struct inode *inode) { 574static int drop_new_inode(struct inode *inode)
552 DQUOT_DROP(inode); 575{
553 make_bad_inode(inode) ; 576 DQUOT_DROP(inode);
554 inode->i_flags |= S_NOQUOTA; 577 make_bad_inode(inode);
555 iput(inode) ; 578 inode->i_flags |= S_NOQUOTA;
556 return 0 ; 579 iput(inode);
580 return 0;
557} 581}
558 582
559/* utility function that does setup for reiserfs_new_inode. 583/* utility function that does setup for reiserfs_new_inode.
@@ -561,902 +585,971 @@ static int drop_new_inode(struct inode *inode) {
561** outside of a transaction, so we had to pull some bits of 585** outside of a transaction, so we had to pull some bits of
562** reiserfs_new_inode out into this func. 586** reiserfs_new_inode out into this func.
563*/ 587*/
564static int new_inode_init(struct inode *inode, struct inode *dir, int mode) { 588static int new_inode_init(struct inode *inode, struct inode *dir, int mode)
565 589{
566 /* the quota init calls have to know who to charge the quota to, so 590
567 ** we have to set uid and gid here 591 /* the quota init calls have to know who to charge the quota to, so
568 */ 592 ** we have to set uid and gid here
569 inode->i_uid = current->fsuid; 593 */
570 inode->i_mode = mode; 594 inode->i_uid = current->fsuid;
571 595 inode->i_mode = mode;
572 if (dir->i_mode & S_ISGID) { 596 /* Make inode invalid - just in case we are going to drop it before
573 inode->i_gid = dir->i_gid; 597 * the initialization happens */
574 if (S_ISDIR(mode)) 598 INODE_PKEY(inode)->k_objectid = 0;
575 inode->i_mode |= S_ISGID; 599
576 } else { 600 if (dir->i_mode & S_ISGID) {
577 inode->i_gid = current->fsgid; 601 inode->i_gid = dir->i_gid;
578 } 602 if (S_ISDIR(mode))
579 DQUOT_INIT(inode); 603 inode->i_mode |= S_ISGID;
580 return 0 ; 604 } else {
605 inode->i_gid = current->fsgid;
606 }
607 DQUOT_INIT(inode);
608 return 0;
581} 609}
582 610
583static int reiserfs_create (struct inode * dir, struct dentry *dentry, int mode, 611static int reiserfs_create(struct inode *dir, struct dentry *dentry, int mode,
584 struct nameidata *nd) 612 struct nameidata *nd)
585{ 613{
586 int retval; 614 int retval;
587 struct inode * inode; 615 struct inode *inode;
588 /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ 616 /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */
589 int jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 * (REISERFS_QUOTA_INIT_BLOCKS+REISERFS_QUOTA_TRANS_BLOCKS); 617 int jbegin_count =
590 struct reiserfs_transaction_handle th ; 618 JOURNAL_PER_BALANCE_CNT * 2 +
591 int locked; 619 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) +
592 620 REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb));
593 if (!(inode = new_inode(dir->i_sb))) { 621 struct reiserfs_transaction_handle th;
594 return -ENOMEM ; 622 int locked;
595 } 623
596 new_inode_init(inode, dir, mode); 624 if (!(inode = new_inode(dir->i_sb))) {
597 625 return -ENOMEM;
598 locked = reiserfs_cache_default_acl (dir); 626 }
599 627 new_inode_init(inode, dir, mode);
600 reiserfs_write_lock(dir->i_sb);
601
602 if (locked)
603 reiserfs_write_lock_xattrs (dir->i_sb);
604
605 retval = journal_begin(&th, dir->i_sb, jbegin_count);
606 if (retval) {
607 drop_new_inode (inode);
608 goto out_failed;
609 }
610
611 retval = reiserfs_new_inode (&th, dir, mode, NULL, 0/*i_size*/, dentry, inode);
612 if (retval)
613 goto out_failed;
614
615 if (locked) {
616 reiserfs_write_unlock_xattrs (dir->i_sb);
617 locked = 0;
618 }
619
620 inode->i_op = &reiserfs_file_inode_operations;
621 inode->i_fop = &reiserfs_file_operations;
622 inode->i_mapping->a_ops = &reiserfs_address_space_operations ;
623
624 retval = reiserfs_add_entry (&th, dir, dentry->d_name.name, dentry->d_name.len,
625 inode, 1/*visible*/);
626 if (retval) {
627 int err;
628 inode->i_nlink--;
629 reiserfs_update_sd (&th, inode);
630 err = journal_end(&th, dir->i_sb, jbegin_count) ;
631 if (err)
632 retval = err;
633 iput (inode);
634 goto out_failed;
635 }
636 reiserfs_update_inode_transaction(inode) ;
637 reiserfs_update_inode_transaction(dir) ;
638
639 d_instantiate(dentry, inode);
640 retval = journal_end(&th, dir->i_sb, jbegin_count) ;
641
642out_failed:
643 if (locked)
644 reiserfs_write_unlock_xattrs (dir->i_sb);
645 reiserfs_write_unlock(dir->i_sb);
646 return retval;
647}
648 628
629 locked = reiserfs_cache_default_acl(dir);
649 630
650static int reiserfs_mknod (struct inode * dir, struct dentry *dentry, int mode, dev_t rdev) 631 reiserfs_write_lock(dir->i_sb);
651{
652 int retval;
653 struct inode * inode;
654 struct reiserfs_transaction_handle th ;
655 /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */
656 int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * (REISERFS_QUOTA_INIT_BLOCKS+REISERFS_QUOTA_TRANS_BLOCKS);
657 int locked;
658 632
659 if (!new_valid_dev(rdev)) 633 if (locked)
660 return -EINVAL; 634 reiserfs_write_lock_xattrs(dir->i_sb);
635
636 retval = journal_begin(&th, dir->i_sb, jbegin_count);
637 if (retval) {
638 drop_new_inode(inode);
639 goto out_failed;
640 }
641
642 retval =
643 reiserfs_new_inode(&th, dir, mode, NULL, 0 /*i_size */ , dentry,
644 inode);
645 if (retval)
646 goto out_failed;
647
648 if (locked) {
649 reiserfs_write_unlock_xattrs(dir->i_sb);
650 locked = 0;
651 }
652
653 inode->i_op = &reiserfs_file_inode_operations;
654 inode->i_fop = &reiserfs_file_operations;
655 inode->i_mapping->a_ops = &reiserfs_address_space_operations;
656
657 retval =
658 reiserfs_add_entry(&th, dir, dentry->d_name.name,
659 dentry->d_name.len, inode, 1 /*visible */ );
660 if (retval) {
661 int err;
662 inode->i_nlink--;
663 reiserfs_update_sd(&th, inode);
664 err = journal_end(&th, dir->i_sb, jbegin_count);
665 if (err)
666 retval = err;
667 iput(inode);
668 goto out_failed;
669 }
670 reiserfs_update_inode_transaction(inode);
671 reiserfs_update_inode_transaction(dir);
661 672
662 if (!(inode = new_inode(dir->i_sb))) { 673 d_instantiate(dentry, inode);
663 return -ENOMEM ; 674 retval = journal_end(&th, dir->i_sb, jbegin_count);
664 }
665 new_inode_init(inode, dir, mode);
666 675
667 locked = reiserfs_cache_default_acl (dir); 676 out_failed:
677 if (locked)
678 reiserfs_write_unlock_xattrs(dir->i_sb);
679 reiserfs_write_unlock(dir->i_sb);
680 return retval;
681}
668 682
669 reiserfs_write_lock(dir->i_sb); 683static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
684 dev_t rdev)
685{
686 int retval;
687 struct inode *inode;
688 struct reiserfs_transaction_handle th;
689 /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */
690 int jbegin_count =
691 JOURNAL_PER_BALANCE_CNT * 3 +
692 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) +
693 REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb));
694 int locked;
695
696 if (!new_valid_dev(rdev))
697 return -EINVAL;
698
699 if (!(inode = new_inode(dir->i_sb))) {
700 return -ENOMEM;
701 }
702 new_inode_init(inode, dir, mode);
670 703
671 if (locked) 704 locked = reiserfs_cache_default_acl(dir);
672 reiserfs_write_lock_xattrs (dir->i_sb);
673 705
674 retval = journal_begin(&th, dir->i_sb, jbegin_count) ; 706 reiserfs_write_lock(dir->i_sb);
675 if (retval) {
676 drop_new_inode (inode);
677 goto out_failed;
678 }
679 707
680 retval = reiserfs_new_inode (&th, dir, mode, NULL, 0/*i_size*/, dentry, inode); 708 if (locked)
681 if (retval) { 709 reiserfs_write_lock_xattrs(dir->i_sb);
682 goto out_failed;
683 }
684 710
685 if (locked) { 711 retval = journal_begin(&th, dir->i_sb, jbegin_count);
686 reiserfs_write_unlock_xattrs (dir->i_sb); 712 if (retval) {
687 locked = 0; 713 drop_new_inode(inode);
688 } 714 goto out_failed;
715 }
689 716
717 retval =
718 reiserfs_new_inode(&th, dir, mode, NULL, 0 /*i_size */ , dentry,
719 inode);
720 if (retval) {
721 goto out_failed;
722 }
690 723
691 inode->i_op = &reiserfs_special_inode_operations; 724 if (locked) {
692 init_special_inode(inode, inode->i_mode, rdev) ; 725 reiserfs_write_unlock_xattrs(dir->i_sb);
726 locked = 0;
727 }
693 728
694 //FIXME: needed for block and char devices only 729 inode->i_op = &reiserfs_special_inode_operations;
695 reiserfs_update_sd (&th, inode); 730 init_special_inode(inode, inode->i_mode, rdev);
731
732 //FIXME: needed for block and char devices only
733 reiserfs_update_sd(&th, inode);
734
735 reiserfs_update_inode_transaction(inode);
736 reiserfs_update_inode_transaction(dir);
737
738 retval =
739 reiserfs_add_entry(&th, dir, dentry->d_name.name,
740 dentry->d_name.len, inode, 1 /*visible */ );
741 if (retval) {
742 int err;
743 inode->i_nlink--;
744 reiserfs_update_sd(&th, inode);
745 err = journal_end(&th, dir->i_sb, jbegin_count);
746 if (err)
747 retval = err;
748 iput(inode);
749 goto out_failed;
750 }
696 751
697 reiserfs_update_inode_transaction(inode) ; 752 d_instantiate(dentry, inode);
698 reiserfs_update_inode_transaction(dir) ; 753 retval = journal_end(&th, dir->i_sb, jbegin_count);
699 754
700 retval = reiserfs_add_entry (&th, dir, dentry->d_name.name, dentry->d_name.len, 755 out_failed:
701 inode, 1/*visible*/); 756 if (locked)
702 if (retval) { 757 reiserfs_write_unlock_xattrs(dir->i_sb);
703 int err; 758 reiserfs_write_unlock(dir->i_sb);
704 inode->i_nlink--; 759 return retval;
705 reiserfs_update_sd (&th, inode);
706 err = journal_end(&th, dir->i_sb, jbegin_count) ;
707 if (err)
708 retval = err;
709 iput (inode);
710 goto out_failed;
711 }
712
713 d_instantiate(dentry, inode);
714 retval = journal_end(&th, dir->i_sb, jbegin_count) ;
715
716out_failed:
717 if (locked)
718 reiserfs_write_unlock_xattrs (dir->i_sb);
719 reiserfs_write_unlock(dir->i_sb);
720 return retval;
721} 760}
722 761
723 762static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
724static int reiserfs_mkdir (struct inode * dir, struct dentry *dentry, int mode)
725{ 763{
726 int retval; 764 int retval;
727 struct inode * inode; 765 struct inode *inode;
728 struct reiserfs_transaction_handle th ; 766 struct reiserfs_transaction_handle th;
729 /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ 767 /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */
730 int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * (REISERFS_QUOTA_INIT_BLOCKS+REISERFS_QUOTA_TRANS_BLOCKS); 768 int jbegin_count =
731 int locked; 769 JOURNAL_PER_BALANCE_CNT * 3 +
770 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) +
771 REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb));
772 int locked;
732 773
733#ifdef DISPLACE_NEW_PACKING_LOCALITIES 774#ifdef DISPLACE_NEW_PACKING_LOCALITIES
734 /* set flag that new packing locality created and new blocks for the content * of that directory are not displaced yet */ 775 /* set flag that new packing locality created and new blocks for the content * of that directory are not displaced yet */
735 REISERFS_I(dir)->new_packing_locality = 1; 776 REISERFS_I(dir)->new_packing_locality = 1;
736#endif 777#endif
737 mode = S_IFDIR | mode; 778 mode = S_IFDIR | mode;
738 if (!(inode = new_inode(dir->i_sb))) { 779 if (!(inode = new_inode(dir->i_sb))) {
739 return -ENOMEM ; 780 return -ENOMEM;
740 } 781 }
741 new_inode_init(inode, dir, mode); 782 new_inode_init(inode, dir, mode);
742 783
743 locked = reiserfs_cache_default_acl (dir); 784 locked = reiserfs_cache_default_acl(dir);
744 785
745 reiserfs_write_lock(dir->i_sb); 786 reiserfs_write_lock(dir->i_sb);
746 if (locked) 787 if (locked)
747 reiserfs_write_lock_xattrs (dir->i_sb); 788 reiserfs_write_lock_xattrs(dir->i_sb);
748 789
749 retval = journal_begin(&th, dir->i_sb, jbegin_count) ; 790 retval = journal_begin(&th, dir->i_sb, jbegin_count);
750 if (retval) { 791 if (retval) {
751 drop_new_inode (inode); 792 drop_new_inode(inode);
752 goto out_failed; 793 goto out_failed;
753 } 794 }
754
755
756 /* inc the link count now, so another writer doesn't overflow it while
757 ** we sleep later on.
758 */
759 INC_DIR_INODE_NLINK(dir)
760
761 retval = reiserfs_new_inode (&th, dir, mode, NULL/*symlink*/,
762 old_format_only (dir->i_sb) ?
763 EMPTY_DIR_SIZE_V1 : EMPTY_DIR_SIZE,
764 dentry, inode);
765 if (retval) {
766 dir->i_nlink-- ;
767 goto out_failed;
768 }
769
770 if (locked) {
771 reiserfs_write_unlock_xattrs (dir->i_sb);
772 locked = 0;
773 }
774
775 reiserfs_update_inode_transaction(inode) ;
776 reiserfs_update_inode_transaction(dir) ;
777
778 inode->i_op = &reiserfs_dir_inode_operations;
779 inode->i_fop = &reiserfs_dir_operations;
780
781 // note, _this_ add_entry will not update dir's stat data
782 retval = reiserfs_add_entry (&th, dir, dentry->d_name.name, dentry->d_name.len,
783 inode, 1/*visible*/);
784 if (retval) {
785 int err;
786 inode->i_nlink = 0;
787 DEC_DIR_INODE_NLINK(dir);
788 reiserfs_update_sd (&th, inode);
789 err = journal_end(&th, dir->i_sb, jbegin_count) ;
790 if (err)
791 retval = err;
792 iput (inode);
793 goto out_failed;
794 }
795
796 // the above add_entry did not update dir's stat data
797 reiserfs_update_sd (&th, dir);
798
799 d_instantiate(dentry, inode);
800 retval = journal_end(&th, dir->i_sb, jbegin_count) ;
801out_failed:
802 if (locked)
803 reiserfs_write_unlock_xattrs (dir->i_sb);
804 reiserfs_write_unlock(dir->i_sb);
805 return retval;
806}
807 795
808static inline int reiserfs_empty_dir(struct inode *inode) { 796 /* inc the link count now, so another writer doesn't overflow it while
809 /* we can cheat because an old format dir cannot have 797 ** we sleep later on.
810 ** EMPTY_DIR_SIZE, and a new format dir cannot have 798 */
811 ** EMPTY_DIR_SIZE_V1. So, if the inode is either size, 799 INC_DIR_INODE_NLINK(dir)
812 ** regardless of disk format version, the directory is empty. 800
813 */ 801 retval = reiserfs_new_inode(&th, dir, mode, NULL /*symlink */ ,
814 if (inode->i_size != EMPTY_DIR_SIZE && 802 old_format_only(dir->i_sb) ?
815 inode->i_size != EMPTY_DIR_SIZE_V1) { 803 EMPTY_DIR_SIZE_V1 : EMPTY_DIR_SIZE,
816 return 0 ; 804 dentry, inode);
817 } 805 if (retval) {
818 return 1 ; 806 dir->i_nlink--;
807 goto out_failed;
808 }
809
810 if (locked) {
811 reiserfs_write_unlock_xattrs(dir->i_sb);
812 locked = 0;
813 }
814
815 reiserfs_update_inode_transaction(inode);
816 reiserfs_update_inode_transaction(dir);
817
818 inode->i_op = &reiserfs_dir_inode_operations;
819 inode->i_fop = &reiserfs_dir_operations;
820
821 // note, _this_ add_entry will not update dir's stat data
822 retval =
823 reiserfs_add_entry(&th, dir, dentry->d_name.name,
824 dentry->d_name.len, inode, 1 /*visible */ );
825 if (retval) {
826 int err;
827 inode->i_nlink = 0;
828 DEC_DIR_INODE_NLINK(dir);
829 reiserfs_update_sd(&th, inode);
830 err = journal_end(&th, dir->i_sb, jbegin_count);
831 if (err)
832 retval = err;
833 iput(inode);
834 goto out_failed;
835 }
836 // the above add_entry did not update dir's stat data
837 reiserfs_update_sd(&th, dir);
838
839 d_instantiate(dentry, inode);
840 retval = journal_end(&th, dir->i_sb, jbegin_count);
841 out_failed:
842 if (locked)
843 reiserfs_write_unlock_xattrs(dir->i_sb);
844 reiserfs_write_unlock(dir->i_sb);
845 return retval;
819} 846}
820 847
821static int reiserfs_rmdir (struct inode * dir, struct dentry *dentry) 848static inline int reiserfs_empty_dir(struct inode *inode)
822{ 849{
823 int retval, err; 850 /* we can cheat because an old format dir cannot have
824 struct inode * inode; 851 ** EMPTY_DIR_SIZE, and a new format dir cannot have
825 struct reiserfs_transaction_handle th ; 852 ** EMPTY_DIR_SIZE_V1. So, if the inode is either size,
826 int jbegin_count; 853 ** regardless of disk format version, the directory is empty.
827 INITIALIZE_PATH (path); 854 */
828 struct reiserfs_dir_entry de; 855 if (inode->i_size != EMPTY_DIR_SIZE &&
829 856 inode->i_size != EMPTY_DIR_SIZE_V1) {
830 857 return 0;
831 /* we will be doing 2 balancings and update 2 stat data, we change quotas 858 }
832 * of the owner of the directory and of the owner of the parent directory */ 859 return 1;
833 jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 + 2 * (REISERFS_QUOTA_INIT_BLOCKS+REISERFS_QUOTA_TRANS_BLOCKS);
834
835 reiserfs_write_lock(dir->i_sb);
836 retval = journal_begin(&th, dir->i_sb, jbegin_count) ;
837 if (retval)
838 goto out_rmdir;
839
840 de.de_gen_number_bit_string = NULL;
841 if ( (retval = reiserfs_find_entry (dir, dentry->d_name.name, dentry->d_name.len, &path, &de)) == NAME_NOT_FOUND) {
842 retval = -ENOENT;
843 goto end_rmdir;
844 } else if ( retval == IO_ERROR) {
845 retval = -EIO;
846 goto end_rmdir;
847 }
848
849 inode = dentry->d_inode;
850
851 reiserfs_update_inode_transaction(inode) ;
852 reiserfs_update_inode_transaction(dir) ;
853
854 if (de.de_objectid != inode->i_ino) {
855 // FIXME: compare key of an object and a key found in the
856 // entry
857 retval = -EIO;
858 goto end_rmdir;
859 }
860 if (!reiserfs_empty_dir(inode)) {
861 retval = -ENOTEMPTY;
862 goto end_rmdir;
863 }
864
865 /* cut entry from dir directory */
866 retval = reiserfs_cut_from_item (&th, &path, &(de.de_entry_key), dir,
867 NULL, /* page */
868 0/*new file size - not used here*/);
869 if (retval < 0)
870 goto end_rmdir;
871
872 if ( inode->i_nlink != 2 && inode->i_nlink != 1 )
873 reiserfs_warning (inode->i_sb, "%s: empty directory has nlink "
874 "!= 2 (%d)", __FUNCTION__, inode->i_nlink);
875
876 inode->i_nlink = 0;
877 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
878 reiserfs_update_sd (&th, inode);
879
880 DEC_DIR_INODE_NLINK(dir)
881 dir->i_size -= (DEH_SIZE + de.de_entrylen);
882 reiserfs_update_sd (&th, dir);
883
884 /* prevent empty directory from getting lost */
885 add_save_link (&th, inode, 0/* not truncate */);
886
887 retval = journal_end(&th, dir->i_sb, jbegin_count) ;
888 reiserfs_check_path(&path) ;
889out_rmdir:
890 reiserfs_write_unlock(dir->i_sb);
891 return retval;
892
893 end_rmdir:
894 /* we must release path, because we did not call
895 reiserfs_cut_from_item, or reiserfs_cut_from_item does not
896 release path if operation was not complete */
897 pathrelse (&path);
898 err = journal_end(&th, dir->i_sb, jbegin_count) ;
899 reiserfs_write_unlock(dir->i_sb);
900 return err ? err : retval;
901} 860}
902 861
903static int reiserfs_unlink (struct inode * dir, struct dentry *dentry) 862static int reiserfs_rmdir(struct inode *dir, struct dentry *dentry)
904{ 863{
905 int retval, err; 864 int retval, err;
906 struct inode * inode; 865 struct inode *inode;
907 struct reiserfs_dir_entry de; 866 struct reiserfs_transaction_handle th;
908 INITIALIZE_PATH (path); 867 int jbegin_count;
909 struct reiserfs_transaction_handle th ; 868 INITIALIZE_PATH(path);
910 int jbegin_count; 869 struct reiserfs_dir_entry de;
911 unsigned long savelink; 870
912 871 /* we will be doing 2 balancings and update 2 stat data, we change quotas
913 inode = dentry->d_inode; 872 * of the owner of the directory and of the owner of the parent directory.
914 873 * The quota structure is possibly deleted only on last iput => outside
915 /* in this transaction we can be doing at max two balancings and update 874 * of this transaction */
916 two stat datas, we change quotas of the owner of the directory and of 875 jbegin_count =
917 the owner of the parent directory */ 876 JOURNAL_PER_BALANCE_CNT * 2 + 2 +
918 jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 + 2 * (REISERFS_QUOTA_INIT_BLOCKS+REISERFS_QUOTA_TRANS_BLOCKS); 877 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
919 878
920 reiserfs_write_lock(dir->i_sb); 879 reiserfs_write_lock(dir->i_sb);
921 retval = journal_begin(&th, dir->i_sb, jbegin_count) ; 880 retval = journal_begin(&th, dir->i_sb, jbegin_count);
922 if (retval) 881 if (retval)
923 goto out_unlink; 882 goto out_rmdir;
924 883
925 de.de_gen_number_bit_string = NULL; 884 de.de_gen_number_bit_string = NULL;
926 if ( (retval = reiserfs_find_entry (dir, dentry->d_name.name, dentry->d_name.len, &path, &de)) == NAME_NOT_FOUND) { 885 if ((retval =
927 retval = -ENOENT; 886 reiserfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len,
928 goto end_unlink; 887 &path, &de)) == NAME_NOT_FOUND) {
929 } else if (retval == IO_ERROR) { 888 retval = -ENOENT;
930 retval = -EIO; 889 goto end_rmdir;
931 goto end_unlink; 890 } else if (retval == IO_ERROR) {
932 } 891 retval = -EIO;
933 892 goto end_rmdir;
934 reiserfs_update_inode_transaction(inode) ; 893 }
935 reiserfs_update_inode_transaction(dir) ; 894
936 895 inode = dentry->d_inode;
937 if (de.de_objectid != inode->i_ino) { 896
938 // FIXME: compare key of an object and a key found in the 897 reiserfs_update_inode_transaction(inode);
939 // entry 898 reiserfs_update_inode_transaction(dir);
940 retval = -EIO; 899
941 goto end_unlink; 900 if (de.de_objectid != inode->i_ino) {
942 } 901 // FIXME: compare key of an object and a key found in the
943 902 // entry
944 if (!inode->i_nlink) { 903 retval = -EIO;
945 reiserfs_warning (inode->i_sb, "%s: deleting nonexistent file " 904 goto end_rmdir;
946 "(%s:%lu), %d", __FUNCTION__, 905 }
947 reiserfs_bdevname (inode->i_sb), inode->i_ino, 906 if (!reiserfs_empty_dir(inode)) {
948 inode->i_nlink); 907 retval = -ENOTEMPTY;
949 inode->i_nlink = 1; 908 goto end_rmdir;
950 } 909 }
951 910
952 inode->i_nlink--; 911 /* cut entry from dir directory */
953 912 retval = reiserfs_cut_from_item(&th, &path, &(de.de_entry_key), dir, NULL, /* page */
954 /* 913 0 /*new file size - not used here */ );
955 * we schedule before doing the add_save_link call, save the link 914 if (retval < 0)
956 * count so we don't race 915 goto end_rmdir;
957 */ 916
958 savelink = inode->i_nlink; 917 if (inode->i_nlink != 2 && inode->i_nlink != 1)
959 918 reiserfs_warning(inode->i_sb, "%s: empty directory has nlink "
960 919 "!= 2 (%d)", __FUNCTION__, inode->i_nlink);
961 retval = reiserfs_cut_from_item (&th, &path, &(de.de_entry_key), dir, NULL, 0); 920
962 if (retval < 0) { 921 inode->i_nlink = 0;
963 inode->i_nlink++; 922 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
964 goto end_unlink; 923 reiserfs_update_sd(&th, inode);
965 } 924
966 inode->i_ctime = CURRENT_TIME_SEC; 925 DEC_DIR_INODE_NLINK(dir)
967 reiserfs_update_sd (&th, inode); 926 dir->i_size -= (DEH_SIZE + de.de_entrylen);
968 927 reiserfs_update_sd(&th, dir);
969 dir->i_size -= (de.de_entrylen + DEH_SIZE); 928
970 dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; 929 /* prevent empty directory from getting lost */
971 reiserfs_update_sd (&th, dir); 930 add_save_link(&th, inode, 0 /* not truncate */ );
972 931
973 if (!savelink) 932 retval = journal_end(&th, dir->i_sb, jbegin_count);
974 /* prevent file from getting lost */ 933 reiserfs_check_path(&path);
975 add_save_link (&th, inode, 0/* not truncate */); 934 out_rmdir:
976 935 reiserfs_write_unlock(dir->i_sb);
977 retval = journal_end(&th, dir->i_sb, jbegin_count) ; 936 return retval;
978 reiserfs_check_path(&path) ; 937
979 reiserfs_write_unlock(dir->i_sb); 938 end_rmdir:
980 return retval; 939 /* we must release path, because we did not call
981 940 reiserfs_cut_from_item, or reiserfs_cut_from_item does not
982 end_unlink: 941 release path if operation was not complete */
983 pathrelse (&path); 942 pathrelse(&path);
984 err = journal_end(&th, dir->i_sb, jbegin_count) ; 943 err = journal_end(&th, dir->i_sb, jbegin_count);
985 reiserfs_check_path(&path) ; 944 reiserfs_write_unlock(dir->i_sb);
986 if (err) 945 return err ? err : retval;
987 retval = err;
988out_unlink:
989 reiserfs_write_unlock(dir->i_sb);
990 return retval;
991} 946}
992 947
993static int reiserfs_symlink (struct inode * parent_dir, 948static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
994 struct dentry * dentry, const char * symname)
995{ 949{
996 int retval; 950 int retval, err;
997 struct inode * inode; 951 struct inode *inode;
998 char * name; 952 struct reiserfs_dir_entry de;
999 int item_len; 953 INITIALIZE_PATH(path);
1000 struct reiserfs_transaction_handle th ; 954 struct reiserfs_transaction_handle th;
1001 int mode = S_IFLNK | S_IRWXUGO; 955 int jbegin_count;
1002 /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ 956 unsigned long savelink;
1003 int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * (REISERFS_QUOTA_INIT_BLOCKS+REISERFS_QUOTA_TRANS_BLOCKS); 957
1004 958 inode = dentry->d_inode;
1005 if (!(inode = new_inode(parent_dir->i_sb))) { 959
1006 return -ENOMEM ; 960 /* in this transaction we can be doing at max two balancings and update
1007 } 961 * two stat datas, we change quotas of the owner of the directory and of
1008 new_inode_init(inode, parent_dir, mode); 962 * the owner of the parent directory. The quota structure is possibly
1009 963 * deleted only on iput => outside of this transaction */
1010 reiserfs_write_lock(parent_dir->i_sb); 964 jbegin_count =
1011 item_len = ROUND_UP (strlen (symname)); 965 JOURNAL_PER_BALANCE_CNT * 2 + 2 +
1012 if (item_len > MAX_DIRECT_ITEM_LEN (parent_dir->i_sb->s_blocksize)) { 966 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
1013 retval = -ENAMETOOLONG; 967
1014 drop_new_inode(inode); 968 reiserfs_write_lock(dir->i_sb);
1015 goto out_failed; 969 retval = journal_begin(&th, dir->i_sb, jbegin_count);
1016 } 970 if (retval)
1017 971 goto out_unlink;
1018 name = reiserfs_kmalloc (item_len, GFP_NOFS, parent_dir->i_sb); 972
1019 if (!name) { 973 de.de_gen_number_bit_string = NULL;
1020 drop_new_inode(inode); 974 if ((retval =
1021 retval = -ENOMEM; 975 reiserfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len,
1022 goto out_failed; 976 &path, &de)) == NAME_NOT_FOUND) {
1023 } 977 retval = -ENOENT;
1024 memcpy (name, symname, strlen (symname)); 978 goto end_unlink;
1025 padd_item (name, item_len, strlen (symname)); 979 } else if (retval == IO_ERROR) {
1026 980 retval = -EIO;
1027 /* We would inherit the default ACL here, but symlinks don't get ACLs */ 981 goto end_unlink;
1028 982 }
1029 retval = journal_begin(&th, parent_dir->i_sb, jbegin_count) ; 983
1030 if (retval) { 984 reiserfs_update_inode_transaction(inode);
1031 drop_new_inode (inode); 985 reiserfs_update_inode_transaction(dir);
1032 reiserfs_kfree (name, item_len, parent_dir->i_sb); 986
1033 goto out_failed; 987 if (de.de_objectid != inode->i_ino) {
1034 } 988 // FIXME: compare key of an object and a key found in the
1035 989 // entry
1036 retval = reiserfs_new_inode (&th, parent_dir, mode, name, strlen (symname), 990 retval = -EIO;
1037 dentry, inode); 991 goto end_unlink;
1038 reiserfs_kfree (name, item_len, parent_dir->i_sb); 992 }
1039 if (retval) { /* reiserfs_new_inode iputs for us */ 993
1040 goto out_failed; 994 if (!inode->i_nlink) {
1041 } 995 reiserfs_warning(inode->i_sb, "%s: deleting nonexistent file "
1042 996 "(%s:%lu), %d", __FUNCTION__,
1043 reiserfs_update_inode_transaction(inode) ; 997 reiserfs_bdevname(inode->i_sb), inode->i_ino,
1044 reiserfs_update_inode_transaction(parent_dir) ; 998 inode->i_nlink);
1045 999 inode->i_nlink = 1;
1046 inode->i_op = &reiserfs_symlink_inode_operations; 1000 }
1047 inode->i_mapping->a_ops = &reiserfs_address_space_operations; 1001
1048
1049 // must be sure this inode is written with this transaction
1050 //
1051 //reiserfs_update_sd (&th, inode, READ_BLOCKS);
1052
1053 retval = reiserfs_add_entry (&th, parent_dir, dentry->d_name.name,
1054 dentry->d_name.len, inode, 1/*visible*/);
1055 if (retval) {
1056 int err;
1057 inode->i_nlink--; 1002 inode->i_nlink--;
1058 reiserfs_update_sd (&th, inode); 1003
1059 err = journal_end(&th, parent_dir->i_sb, jbegin_count) ; 1004 /*
1005 * we schedule before doing the add_save_link call, save the link
1006 * count so we don't race
1007 */
1008 savelink = inode->i_nlink;
1009
1010 retval =
1011 reiserfs_cut_from_item(&th, &path, &(de.de_entry_key), dir, NULL,
1012 0);
1013 if (retval < 0) {
1014 inode->i_nlink++;
1015 goto end_unlink;
1016 }
1017 inode->i_ctime = CURRENT_TIME_SEC;
1018 reiserfs_update_sd(&th, inode);
1019
1020 dir->i_size -= (de.de_entrylen + DEH_SIZE);
1021 dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
1022 reiserfs_update_sd(&th, dir);
1023
1024 if (!savelink)
1025 /* prevent file from getting lost */
1026 add_save_link(&th, inode, 0 /* not truncate */ );
1027
1028 retval = journal_end(&th, dir->i_sb, jbegin_count);
1029 reiserfs_check_path(&path);
1030 reiserfs_write_unlock(dir->i_sb);
1031 return retval;
1032
1033 end_unlink:
1034 pathrelse(&path);
1035 err = journal_end(&th, dir->i_sb, jbegin_count);
1036 reiserfs_check_path(&path);
1060 if (err) 1037 if (err)
1061 retval = err; 1038 retval = err;
1062 iput (inode); 1039 out_unlink:
1063 goto out_failed; 1040 reiserfs_write_unlock(dir->i_sb);
1064 } 1041 return retval;
1065
1066 d_instantiate(dentry, inode);
1067 retval = journal_end(&th, parent_dir->i_sb, jbegin_count) ;
1068out_failed:
1069 reiserfs_write_unlock(parent_dir->i_sb);
1070 return retval;
1071} 1042}
1072 1043
1073static int reiserfs_link (struct dentry * old_dentry, struct inode * dir, struct dentry * dentry) 1044static int reiserfs_symlink(struct inode *parent_dir,
1045 struct dentry *dentry, const char *symname)
1074{ 1046{
1075 int retval; 1047 int retval;
1076 struct inode *inode = old_dentry->d_inode; 1048 struct inode *inode;
1077 struct reiserfs_transaction_handle th ; 1049 char *name;
1078 /* We need blocks for transaction + update of quotas for the owners of the directory */ 1050 int item_len;
1079 int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * REISERFS_QUOTA_TRANS_BLOCKS; 1051 struct reiserfs_transaction_handle th;
1080 1052 int mode = S_IFLNK | S_IRWXUGO;
1081 reiserfs_write_lock(dir->i_sb); 1053 /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */
1082 if (inode->i_nlink >= REISERFS_LINK_MAX) { 1054 int jbegin_count =
1083 //FIXME: sd_nlink is 32 bit for new files 1055 JOURNAL_PER_BALANCE_CNT * 3 +
1084 reiserfs_write_unlock(dir->i_sb); 1056 2 * (REISERFS_QUOTA_INIT_BLOCKS(parent_dir->i_sb) +
1085 return -EMLINK; 1057 REISERFS_QUOTA_TRANS_BLOCKS(parent_dir->i_sb));
1086 } 1058
1087 if (inode->i_nlink == 0) { 1059 if (!(inode = new_inode(parent_dir->i_sb))) {
1088 reiserfs_write_unlock(dir->i_sb); 1060 return -ENOMEM;
1089 return -ENOENT; 1061 }
1090 } 1062 new_inode_init(inode, parent_dir, mode);
1091 1063
1092 /* inc before scheduling so reiserfs_unlink knows we are here */ 1064 reiserfs_write_lock(parent_dir->i_sb);
1093 inode->i_nlink++; 1065 item_len = ROUND_UP(strlen(symname));
1094 1066 if (item_len > MAX_DIRECT_ITEM_LEN(parent_dir->i_sb->s_blocksize)) {
1095 retval = journal_begin(&th, dir->i_sb, jbegin_count) ; 1067 retval = -ENAMETOOLONG;
1096 if (retval) { 1068 drop_new_inode(inode);
1097 inode->i_nlink--; 1069 goto out_failed;
1098 reiserfs_write_unlock (dir->i_sb); 1070 }
1099 return retval; 1071
1100 } 1072 name = reiserfs_kmalloc(item_len, GFP_NOFS, parent_dir->i_sb);
1101 1073 if (!name) {
1102 /* create new entry */ 1074 drop_new_inode(inode);
1103 retval = reiserfs_add_entry (&th, dir, dentry->d_name.name, dentry->d_name.len, 1075 retval = -ENOMEM;
1104 inode, 1/*visible*/); 1076 goto out_failed;
1105 1077 }
1106 reiserfs_update_inode_transaction(inode) ; 1078 memcpy(name, symname, strlen(symname));
1107 reiserfs_update_inode_transaction(dir) ; 1079 padd_item(name, item_len, strlen(symname));
1108 1080
1109 if (retval) { 1081 /* We would inherit the default ACL here, but symlinks don't get ACLs */
1110 int err; 1082
1111 inode->i_nlink--; 1083 retval = journal_begin(&th, parent_dir->i_sb, jbegin_count);
1112 err = journal_end(&th, dir->i_sb, jbegin_count) ; 1084 if (retval) {
1113 reiserfs_write_unlock(dir->i_sb); 1085 drop_new_inode(inode);
1114 return err ? err : retval; 1086 reiserfs_kfree(name, item_len, parent_dir->i_sb);
1115 } 1087 goto out_failed;
1088 }
1089
1090 retval =
1091 reiserfs_new_inode(&th, parent_dir, mode, name, strlen(symname),
1092 dentry, inode);
1093 reiserfs_kfree(name, item_len, parent_dir->i_sb);
1094 if (retval) { /* reiserfs_new_inode iputs for us */
1095 goto out_failed;
1096 }
1116 1097
1117 inode->i_ctime = CURRENT_TIME_SEC; 1098 reiserfs_update_inode_transaction(inode);
1118 reiserfs_update_sd (&th, inode); 1099 reiserfs_update_inode_transaction(parent_dir);
1100
1101 inode->i_op = &reiserfs_symlink_inode_operations;
1102 inode->i_mapping->a_ops = &reiserfs_address_space_operations;
1103
1104 // must be sure this inode is written with this transaction
1105 //
1106 //reiserfs_update_sd (&th, inode, READ_BLOCKS);
1107
1108 retval = reiserfs_add_entry(&th, parent_dir, dentry->d_name.name,
1109 dentry->d_name.len, inode, 1 /*visible */ );
1110 if (retval) {
1111 int err;
1112 inode->i_nlink--;
1113 reiserfs_update_sd(&th, inode);
1114 err = journal_end(&th, parent_dir->i_sb, jbegin_count);
1115 if (err)
1116 retval = err;
1117 iput(inode);
1118 goto out_failed;
1119 }
1119 1120
1120 atomic_inc(&inode->i_count) ; 1121 d_instantiate(dentry, inode);
1121 d_instantiate(dentry, inode); 1122 retval = journal_end(&th, parent_dir->i_sb, jbegin_count);
1122 retval = journal_end(&th, dir->i_sb, jbegin_count) ; 1123 out_failed:
1123 reiserfs_write_unlock(dir->i_sb); 1124 reiserfs_write_unlock(parent_dir->i_sb);
1124 return retval; 1125 return retval;
1125} 1126}
1126 1127
1128static int reiserfs_link(struct dentry *old_dentry, struct inode *dir,
1129 struct dentry *dentry)
1130{
1131 int retval;
1132 struct inode *inode = old_dentry->d_inode;
1133 struct reiserfs_transaction_handle th;
1134 /* We need blocks for transaction + update of quotas for the owners of the directory */
1135 int jbegin_count =
1136 JOURNAL_PER_BALANCE_CNT * 3 +
1137 2 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
1138
1139 reiserfs_write_lock(dir->i_sb);
1140 if (inode->i_nlink >= REISERFS_LINK_MAX) {
1141 //FIXME: sd_nlink is 32 bit for new files
1142 reiserfs_write_unlock(dir->i_sb);
1143 return -EMLINK;
1144 }
1145 if (inode->i_nlink == 0) {
1146 reiserfs_write_unlock(dir->i_sb);
1147 return -ENOENT;
1148 }
1149
1150 /* inc before scheduling so reiserfs_unlink knows we are here */
1151 inode->i_nlink++;
1152
1153 retval = journal_begin(&th, dir->i_sb, jbegin_count);
1154 if (retval) {
1155 inode->i_nlink--;
1156 reiserfs_write_unlock(dir->i_sb);
1157 return retval;
1158 }
1159
1160 /* create new entry */
1161 retval =
1162 reiserfs_add_entry(&th, dir, dentry->d_name.name,
1163 dentry->d_name.len, inode, 1 /*visible */ );
1164
1165 reiserfs_update_inode_transaction(inode);
1166 reiserfs_update_inode_transaction(dir);
1167
1168 if (retval) {
1169 int err;
1170 inode->i_nlink--;
1171 err = journal_end(&th, dir->i_sb, jbegin_count);
1172 reiserfs_write_unlock(dir->i_sb);
1173 return err ? err : retval;
1174 }
1175
1176 inode->i_ctime = CURRENT_TIME_SEC;
1177 reiserfs_update_sd(&th, inode);
1178
1179 atomic_inc(&inode->i_count);
1180 d_instantiate(dentry, inode);
1181 retval = journal_end(&th, dir->i_sb, jbegin_count);
1182 reiserfs_write_unlock(dir->i_sb);
1183 return retval;
1184}
1127 1185
1128// de contains information pointing to an entry which 1186// de contains information pointing to an entry which
1129static int de_still_valid (const char * name, int len, struct reiserfs_dir_entry * de) 1187static int de_still_valid(const char *name, int len,
1188 struct reiserfs_dir_entry *de)
1130{ 1189{
1131 struct reiserfs_dir_entry tmp = *de; 1190 struct reiserfs_dir_entry tmp = *de;
1132 1191
1133 // recalculate pointer to name and name length 1192 // recalculate pointer to name and name length
1134 set_de_name_and_namelen (&tmp); 1193 set_de_name_and_namelen(&tmp);
1135 // FIXME: could check more 1194 // FIXME: could check more
1136 if (tmp.de_namelen != len || memcmp (name, de->de_name, len)) 1195 if (tmp.de_namelen != len || memcmp(name, de->de_name, len))
1137 return 0; 1196 return 0;
1138 return 1; 1197 return 1;
1139} 1198}
1140 1199
1141 1200static int entry_points_to_object(const char *name, int len,
1142static int entry_points_to_object (const char * name, int len, struct reiserfs_dir_entry * de, struct inode * inode) 1201 struct reiserfs_dir_entry *de,
1202 struct inode *inode)
1143{ 1203{
1144 if (!de_still_valid (name, len, de)) 1204 if (!de_still_valid(name, len, de))
1145 return 0; 1205 return 0;
1146 1206
1147 if (inode) { 1207 if (inode) {
1148 if (!de_visible (de->de_deh + de->de_entry_num)) 1208 if (!de_visible(de->de_deh + de->de_entry_num))
1149 reiserfs_panic (NULL, "vs-7042: entry_points_to_object: entry must be visible"); 1209 reiserfs_panic(NULL,
1150 return (de->de_objectid == inode->i_ino) ? 1 : 0; 1210 "vs-7042: entry_points_to_object: entry must be visible");
1151 } 1211 return (de->de_objectid == inode->i_ino) ? 1 : 0;
1212 }
1152 1213
1153 /* this must be added hidden entry */ 1214 /* this must be added hidden entry */
1154 if (de_visible (de->de_deh + de->de_entry_num)) 1215 if (de_visible(de->de_deh + de->de_entry_num))
1155 reiserfs_panic (NULL, "vs-7043: entry_points_to_object: entry must be visible"); 1216 reiserfs_panic(NULL,
1217 "vs-7043: entry_points_to_object: entry must be visible");
1156 1218
1157 return 1; 1219 return 1;
1158} 1220}
1159 1221
1160
1161/* sets key of objectid the entry has to point to */ 1222/* sets key of objectid the entry has to point to */
1162static void set_ino_in_dir_entry (struct reiserfs_dir_entry * de, struct reiserfs_key * key) 1223static void set_ino_in_dir_entry(struct reiserfs_dir_entry *de,
1224 struct reiserfs_key *key)
1163{ 1225{
1164 /* JDM These operations are endian safe - both are le */ 1226 /* JDM These operations are endian safe - both are le */
1165 de->de_deh[de->de_entry_num].deh_dir_id = key->k_dir_id; 1227 de->de_deh[de->de_entry_num].deh_dir_id = key->k_dir_id;
1166 de->de_deh[de->de_entry_num].deh_objectid = key->k_objectid; 1228 de->de_deh[de->de_entry_num].deh_objectid = key->k_objectid;
1167} 1229}
1168 1230
1169
1170/* 1231/*
1171 * process, that is going to call fix_nodes/do_balance must hold only 1232 * process, that is going to call fix_nodes/do_balance must hold only
1172 * one path. If it holds 2 or more, it can get into endless waiting in 1233 * one path. If it holds 2 or more, it can get into endless waiting in
1173 * get_empty_nodes or its clones 1234 * get_empty_nodes or its clones
1174 */ 1235 */
1175static int reiserfs_rename (struct inode * old_dir, struct dentry *old_dentry, 1236static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1176 struct inode * new_dir, struct dentry *new_dentry) 1237 struct inode *new_dir, struct dentry *new_dentry)
1177{ 1238{
1178 int retval; 1239 int retval;
1179 INITIALIZE_PATH (old_entry_path); 1240 INITIALIZE_PATH(old_entry_path);
1180 INITIALIZE_PATH (new_entry_path); 1241 INITIALIZE_PATH(new_entry_path);
1181 INITIALIZE_PATH (dot_dot_entry_path); 1242 INITIALIZE_PATH(dot_dot_entry_path);
1182 struct item_head new_entry_ih, old_entry_ih, dot_dot_ih ; 1243 struct item_head new_entry_ih, old_entry_ih, dot_dot_ih;
1183 struct reiserfs_dir_entry old_de, new_de, dot_dot_de; 1244 struct reiserfs_dir_entry old_de, new_de, dot_dot_de;
1184 struct inode * old_inode, * new_dentry_inode; 1245 struct inode *old_inode, *new_dentry_inode;
1185 struct reiserfs_transaction_handle th ; 1246 struct reiserfs_transaction_handle th;
1186 int jbegin_count ; 1247 int jbegin_count;
1187 umode_t old_inode_mode; 1248 umode_t old_inode_mode;
1188 unsigned long savelink = 1; 1249 unsigned long savelink = 1;
1189 struct timespec ctime; 1250 struct timespec ctime;
1190 1251
1191 /* three balancings: (1) old name removal, (2) new name insertion 1252 /* three balancings: (1) old name removal, (2) new name insertion
1192 and (3) maybe "save" link insertion 1253 and (3) maybe "save" link insertion
1193 stat data updates: (1) old directory, 1254 stat data updates: (1) old directory,
1194 (2) new directory and (3) maybe old object stat data (when it is 1255 (2) new directory and (3) maybe old object stat data (when it is
1195 directory) and (4) maybe stat data of object to which new entry 1256 directory) and (4) maybe stat data of object to which new entry
1196 pointed initially and (5) maybe block containing ".." of 1257 pointed initially and (5) maybe block containing ".." of
1197 renamed directory 1258 renamed directory
1198 quota updates: two parent directories */ 1259 quota updates: two parent directories */
1199 jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 5 + 4 * REISERFS_QUOTA_TRANS_BLOCKS; 1260 jbegin_count =
1200 1261 JOURNAL_PER_BALANCE_CNT * 3 + 5 +
1201 old_inode = old_dentry->d_inode; 1262 4 * REISERFS_QUOTA_TRANS_BLOCKS(old_dir->i_sb);
1202 new_dentry_inode = new_dentry->d_inode; 1263
1203 1264 old_inode = old_dentry->d_inode;
1204 // make sure, that oldname still exists and points to an object we 1265 new_dentry_inode = new_dentry->d_inode;
1205 // are going to rename 1266
1206 old_de.de_gen_number_bit_string = NULL; 1267 // make sure, that oldname still exists and points to an object we
1207 reiserfs_write_lock(old_dir->i_sb); 1268 // are going to rename
1208 retval = reiserfs_find_entry (old_dir, old_dentry->d_name.name, old_dentry->d_name.len, 1269 old_de.de_gen_number_bit_string = NULL;
1209 &old_entry_path, &old_de); 1270 reiserfs_write_lock(old_dir->i_sb);
1210 pathrelse (&old_entry_path); 1271 retval =
1211 if (retval == IO_ERROR) { 1272 reiserfs_find_entry(old_dir, old_dentry->d_name.name,
1212 reiserfs_write_unlock(old_dir->i_sb); 1273 old_dentry->d_name.len, &old_entry_path,
1213 return -EIO; 1274 &old_de);
1214 } 1275 pathrelse(&old_entry_path);
1215 1276 if (retval == IO_ERROR) {
1216 if (retval != NAME_FOUND || old_de.de_objectid != old_inode->i_ino) {
1217 reiserfs_write_unlock(old_dir->i_sb);
1218 return -ENOENT;
1219 }
1220
1221 old_inode_mode = old_inode->i_mode;
1222 if (S_ISDIR(old_inode_mode)) {
1223 // make sure, that directory being renamed has correct ".."
1224 // and that its new parent directory has not too many links
1225 // already
1226
1227 if (new_dentry_inode) {
1228 if (!reiserfs_empty_dir(new_dentry_inode)) {
1229 reiserfs_write_unlock(old_dir->i_sb); 1277 reiserfs_write_unlock(old_dir->i_sb);
1230 return -ENOTEMPTY; 1278 return -EIO;
1231 }
1232 } 1279 }
1233 1280
1234 /* directory is renamed, its parent directory will be changed, 1281 if (retval != NAME_FOUND || old_de.de_objectid != old_inode->i_ino) {
1235 ** so find ".." entry 1282 reiserfs_write_unlock(old_dir->i_sb);
1236 */ 1283 return -ENOENT;
1237 dot_dot_de.de_gen_number_bit_string = NULL;
1238 retval = reiserfs_find_entry (old_inode, "..", 2, &dot_dot_entry_path, &dot_dot_de);
1239 pathrelse (&dot_dot_entry_path);
1240 if (retval != NAME_FOUND) {
1241 reiserfs_write_unlock(old_dir->i_sb);
1242 return -EIO;
1243 } 1284 }
1244 1285
1245 /* inode number of .. must equal old_dir->i_ino */ 1286 old_inode_mode = old_inode->i_mode;
1246 if (dot_dot_de.de_objectid != old_dir->i_ino) { 1287 if (S_ISDIR(old_inode_mode)) {
1247 reiserfs_write_unlock(old_dir->i_sb); 1288 // make sure, that directory being renamed has correct ".."
1248 return -EIO; 1289 // and that its new parent directory has not too many links
1290 // already
1291
1292 if (new_dentry_inode) {
1293 if (!reiserfs_empty_dir(new_dentry_inode)) {
1294 reiserfs_write_unlock(old_dir->i_sb);
1295 return -ENOTEMPTY;
1296 }
1297 }
1298
1299 /* directory is renamed, its parent directory will be changed,
1300 ** so find ".." entry
1301 */
1302 dot_dot_de.de_gen_number_bit_string = NULL;
1303 retval =
1304 reiserfs_find_entry(old_inode, "..", 2, &dot_dot_entry_path,
1305 &dot_dot_de);
1306 pathrelse(&dot_dot_entry_path);
1307 if (retval != NAME_FOUND) {
1308 reiserfs_write_unlock(old_dir->i_sb);
1309 return -EIO;
1310 }
1311
1312 /* inode number of .. must equal old_dir->i_ino */
1313 if (dot_dot_de.de_objectid != old_dir->i_ino) {
1314 reiserfs_write_unlock(old_dir->i_sb);
1315 return -EIO;
1316 }
1249 } 1317 }
1250 } 1318
1251 1319 retval = journal_begin(&th, old_dir->i_sb, jbegin_count);
1252 retval = journal_begin(&th, old_dir->i_sb, jbegin_count) ; 1320 if (retval) {
1253 if (retval) { 1321 reiserfs_write_unlock(old_dir->i_sb);
1254 reiserfs_write_unlock (old_dir->i_sb); 1322 return retval;
1255 return retval;
1256 }
1257
1258 /* add new entry (or find the existing one) */
1259 retval = reiserfs_add_entry (&th, new_dir, new_dentry->d_name.name, new_dentry->d_name.len,
1260 old_inode, 0);
1261 if (retval == -EEXIST) {
1262 if (!new_dentry_inode) {
1263 reiserfs_panic (old_dir->i_sb,
1264 "vs-7050: new entry is found, new inode == 0\n");
1265 } 1323 }
1266 } else if (retval) { 1324
1267 int err = journal_end(&th, old_dir->i_sb, jbegin_count) ; 1325 /* add new entry (or find the existing one) */
1268 reiserfs_write_unlock(old_dir->i_sb); 1326 retval =
1269 return err ? err : retval; 1327 reiserfs_add_entry(&th, new_dir, new_dentry->d_name.name,
1270 } 1328 new_dentry->d_name.len, old_inode, 0);
1271 1329 if (retval == -EEXIST) {
1272 reiserfs_update_inode_transaction(old_dir) ; 1330 if (!new_dentry_inode) {
1273 reiserfs_update_inode_transaction(new_dir) ; 1331 reiserfs_panic(old_dir->i_sb,
1274 1332 "vs-7050: new entry is found, new inode == 0\n");
1275 /* this makes it so an fsync on an open fd for the old name will 1333 }
1276 ** commit the rename operation 1334 } else if (retval) {
1277 */ 1335 int err = journal_end(&th, old_dir->i_sb, jbegin_count);
1278 reiserfs_update_inode_transaction(old_inode) ; 1336 reiserfs_write_unlock(old_dir->i_sb);
1279 1337 return err ? err : retval;
1280 if (new_dentry_inode)
1281 reiserfs_update_inode_transaction(new_dentry_inode) ;
1282
1283 while (1) {
1284 // look for old name using corresponding entry key (found by reiserfs_find_entry)
1285 if ((retval = search_by_entry_key (new_dir->i_sb, &old_de.de_entry_key,
1286 &old_entry_path, &old_de)) != NAME_FOUND) {
1287 pathrelse(&old_entry_path);
1288 journal_end(&th, old_dir->i_sb, jbegin_count);
1289 reiserfs_write_unlock(old_dir->i_sb);
1290 return -EIO;
1291 } 1338 }
1292 1339
1293 copy_item_head(&old_entry_ih, get_ih(&old_entry_path)) ; 1340 reiserfs_update_inode_transaction(old_dir);
1294 1341 reiserfs_update_inode_transaction(new_dir);
1295 reiserfs_prepare_for_journal(old_inode->i_sb, old_de.de_bh, 1) ; 1342
1296 1343 /* this makes it so an fsync on an open fd for the old name will
1297 // look for new name by reiserfs_find_entry 1344 ** commit the rename operation
1298 new_de.de_gen_number_bit_string = NULL; 1345 */
1299 retval = reiserfs_find_entry (new_dir, new_dentry->d_name.name, new_dentry->d_name.len, 1346 reiserfs_update_inode_transaction(old_inode);
1300 &new_entry_path, &new_de); 1347
1301 // reiserfs_add_entry should not return IO_ERROR, because it is called with essentially same parameters from 1348 if (new_dentry_inode)
1302 // reiserfs_add_entry above, and we'll catch any i/o errors before we get here. 1349 reiserfs_update_inode_transaction(new_dentry_inode);
1303 if (retval != NAME_FOUND_INVISIBLE && retval != NAME_FOUND) { 1350
1304 pathrelse(&new_entry_path); 1351 while (1) {
1305 pathrelse(&old_entry_path); 1352 // look for old name using corresponding entry key (found by reiserfs_find_entry)
1306 journal_end(&th, old_dir->i_sb, jbegin_count); 1353 if ((retval =
1307 reiserfs_write_unlock(old_dir->i_sb); 1354 search_by_entry_key(new_dir->i_sb, &old_de.de_entry_key,
1308 return -EIO; 1355 &old_entry_path,
1356 &old_de)) != NAME_FOUND) {
1357 pathrelse(&old_entry_path);
1358 journal_end(&th, old_dir->i_sb, jbegin_count);
1359 reiserfs_write_unlock(old_dir->i_sb);
1360 return -EIO;
1361 }
1362
1363 copy_item_head(&old_entry_ih, get_ih(&old_entry_path));
1364
1365 reiserfs_prepare_for_journal(old_inode->i_sb, old_de.de_bh, 1);
1366
1367 // look for new name by reiserfs_find_entry
1368 new_de.de_gen_number_bit_string = NULL;
1369 retval =
1370 reiserfs_find_entry(new_dir, new_dentry->d_name.name,
1371 new_dentry->d_name.len, &new_entry_path,
1372 &new_de);
1373 // reiserfs_add_entry should not return IO_ERROR, because it is called with essentially same parameters from
1374 // reiserfs_add_entry above, and we'll catch any i/o errors before we get here.
1375 if (retval != NAME_FOUND_INVISIBLE && retval != NAME_FOUND) {
1376 pathrelse(&new_entry_path);
1377 pathrelse(&old_entry_path);
1378 journal_end(&th, old_dir->i_sb, jbegin_count);
1379 reiserfs_write_unlock(old_dir->i_sb);
1380 return -EIO;
1381 }
1382
1383 copy_item_head(&new_entry_ih, get_ih(&new_entry_path));
1384
1385 reiserfs_prepare_for_journal(old_inode->i_sb, new_de.de_bh, 1);
1386
1387 if (S_ISDIR(old_inode->i_mode)) {
1388 if ((retval =
1389 search_by_entry_key(new_dir->i_sb,
1390 &dot_dot_de.de_entry_key,
1391 &dot_dot_entry_path,
1392 &dot_dot_de)) != NAME_FOUND) {
1393 pathrelse(&dot_dot_entry_path);
1394 pathrelse(&new_entry_path);
1395 pathrelse(&old_entry_path);
1396 journal_end(&th, old_dir->i_sb, jbegin_count);
1397 reiserfs_write_unlock(old_dir->i_sb);
1398 return -EIO;
1399 }
1400 copy_item_head(&dot_dot_ih,
1401 get_ih(&dot_dot_entry_path));
1402 // node containing ".." gets into transaction
1403 reiserfs_prepare_for_journal(old_inode->i_sb,
1404 dot_dot_de.de_bh, 1);
1405 }
1406 /* we should check seals here, not do
1407 this stuff, yes? Then, having
1408 gathered everything into RAM we
1409 should lock the buffers, yes? -Hans */
1410 /* probably. our rename needs to hold more
1411 ** than one path at once. The seals would
1412 ** have to be written to deal with multi-path
1413 ** issues -chris
1414 */
1415 /* sanity checking before doing the rename - avoid races many
1416 ** of the above checks could have scheduled. We have to be
1417 ** sure our items haven't been shifted by another process.
1418 */
1419 if (item_moved(&new_entry_ih, &new_entry_path) ||
1420 !entry_points_to_object(new_dentry->d_name.name,
1421 new_dentry->d_name.len,
1422 &new_de, new_dentry_inode) ||
1423 item_moved(&old_entry_ih, &old_entry_path) ||
1424 !entry_points_to_object(old_dentry->d_name.name,
1425 old_dentry->d_name.len,
1426 &old_de, old_inode)) {
1427 reiserfs_restore_prepared_buffer(old_inode->i_sb,
1428 new_de.de_bh);
1429 reiserfs_restore_prepared_buffer(old_inode->i_sb,
1430 old_de.de_bh);
1431 if (S_ISDIR(old_inode_mode))
1432 reiserfs_restore_prepared_buffer(old_inode->
1433 i_sb,
1434 dot_dot_de.
1435 de_bh);
1436 continue;
1437 }
1438 if (S_ISDIR(old_inode_mode)) {
1439 if (item_moved(&dot_dot_ih, &dot_dot_entry_path) ||
1440 !entry_points_to_object("..", 2, &dot_dot_de,
1441 old_dir)) {
1442 reiserfs_restore_prepared_buffer(old_inode->
1443 i_sb,
1444 old_de.de_bh);
1445 reiserfs_restore_prepared_buffer(old_inode->
1446 i_sb,
1447 new_de.de_bh);
1448 reiserfs_restore_prepared_buffer(old_inode->
1449 i_sb,
1450 dot_dot_de.
1451 de_bh);
1452 continue;
1453 }
1454 }
1455
1456 RFALSE(S_ISDIR(old_inode_mode) &&
1457 !buffer_journal_prepared(dot_dot_de.de_bh), "");
1458
1459 break;
1309 } 1460 }
1310 1461
1311 copy_item_head(&new_entry_ih, get_ih(&new_entry_path)) ; 1462 /* ok, all the changes can be done in one fell swoop when we
1463 have claimed all the buffers needed. */
1312 1464
1313 reiserfs_prepare_for_journal(old_inode->i_sb, new_de.de_bh, 1) ; 1465 mark_de_visible(new_de.de_deh + new_de.de_entry_num);
1466 set_ino_in_dir_entry(&new_de, INODE_PKEY(old_inode));
1467 journal_mark_dirty(&th, old_dir->i_sb, new_de.de_bh);
1314 1468
1315 if (S_ISDIR(old_inode->i_mode)) { 1469 mark_de_hidden(old_de.de_deh + old_de.de_entry_num);
1316 if ((retval = search_by_entry_key (new_dir->i_sb, &dot_dot_de.de_entry_key, 1470 journal_mark_dirty(&th, old_dir->i_sb, old_de.de_bh);
1317 &dot_dot_entry_path, &dot_dot_de)) != NAME_FOUND) { 1471 ctime = CURRENT_TIME_SEC;
1318 pathrelse(&dot_dot_entry_path); 1472 old_dir->i_ctime = old_dir->i_mtime = ctime;
1319 pathrelse(&new_entry_path); 1473 new_dir->i_ctime = new_dir->i_mtime = ctime;
1320 pathrelse(&old_entry_path); 1474 /* thanks to Alex Adriaanse <alex_a@caltech.edu> for patch which adds ctime update of
1321 journal_end(&th, old_dir->i_sb, jbegin_count); 1475 renamed object */
1322 reiserfs_write_unlock(old_dir->i_sb); 1476 old_inode->i_ctime = ctime;
1323 return -EIO; 1477
1324 } 1478 if (new_dentry_inode) {
1325 copy_item_head(&dot_dot_ih, get_ih(&dot_dot_entry_path)) ; 1479 // adjust link number of the victim
1326 // node containing ".." gets into transaction 1480 if (S_ISDIR(new_dentry_inode->i_mode)) {
1327 reiserfs_prepare_for_journal(old_inode->i_sb, dot_dot_de.de_bh, 1) ; 1481 new_dentry_inode->i_nlink = 0;
1328 } 1482 } else {
1329 /* we should check seals here, not do 1483 new_dentry_inode->i_nlink--;
1330 this stuff, yes? Then, having 1484 }
1331 gathered everything into RAM we 1485 new_dentry_inode->i_ctime = ctime;
1332 should lock the buffers, yes? -Hans */ 1486 savelink = new_dentry_inode->i_nlink;
1333 /* probably. our rename needs to hold more
1334 ** than one path at once. The seals would
1335 ** have to be written to deal with multi-path
1336 ** issues -chris
1337 */
1338 /* sanity checking before doing the rename - avoid races many
1339 ** of the above checks could have scheduled. We have to be
1340 ** sure our items haven't been shifted by another process.
1341 */
1342 if (item_moved(&new_entry_ih, &new_entry_path) ||
1343 !entry_points_to_object(new_dentry->d_name.name,
1344 new_dentry->d_name.len,
1345 &new_de, new_dentry_inode) ||
1346 item_moved(&old_entry_ih, &old_entry_path) ||
1347 !entry_points_to_object (old_dentry->d_name.name,
1348 old_dentry->d_name.len,
1349 &old_de, old_inode)) {
1350 reiserfs_restore_prepared_buffer (old_inode->i_sb, new_de.de_bh);
1351 reiserfs_restore_prepared_buffer (old_inode->i_sb, old_de.de_bh);
1352 if (S_ISDIR(old_inode_mode))
1353 reiserfs_restore_prepared_buffer (old_inode->i_sb, dot_dot_de.de_bh);
1354 continue;
1355 } 1487 }
1488
1356 if (S_ISDIR(old_inode_mode)) { 1489 if (S_ISDIR(old_inode_mode)) {
1357 if ( item_moved(&dot_dot_ih, &dot_dot_entry_path) || 1490 // adjust ".." of renamed directory
1358 !entry_points_to_object ( "..", 2, &dot_dot_de, old_dir) ) { 1491 set_ino_in_dir_entry(&dot_dot_de, INODE_PKEY(new_dir));
1359 reiserfs_restore_prepared_buffer (old_inode->i_sb, old_de.de_bh); 1492 journal_mark_dirty(&th, new_dir->i_sb, dot_dot_de.de_bh);
1360 reiserfs_restore_prepared_buffer (old_inode->i_sb, new_de.de_bh); 1493
1361 reiserfs_restore_prepared_buffer (old_inode->i_sb, dot_dot_de.de_bh); 1494 if (!new_dentry_inode)
1362 continue; 1495 /* there (in new_dir) was no directory, so it got new link
1363 } 1496 (".." of renamed directory) */
1497 INC_DIR_INODE_NLINK(new_dir);
1498
1499 /* old directory lost one link - ".. " of renamed directory */
1500 DEC_DIR_INODE_NLINK(old_dir);
1364 } 1501 }
1502 // looks like in 2.3.99pre3 brelse is atomic. so we can use pathrelse
1503 pathrelse(&new_entry_path);
1504 pathrelse(&dot_dot_entry_path);
1365 1505
1366 RFALSE( S_ISDIR(old_inode_mode) && 1506 // FIXME: this reiserfs_cut_from_item's return value may screw up
1367 !buffer_journal_prepared(dot_dot_de.de_bh), "" ); 1507 // anybody, but it will panic if will not be able to find the
1368 1508 // entry. This needs one more clean up
1369 break; 1509 if (reiserfs_cut_from_item
1370 } 1510 (&th, &old_entry_path, &(old_de.de_entry_key), old_dir, NULL,
1371 1511 0) < 0)
1372 /* ok, all the changes can be done in one fell swoop when we 1512 reiserfs_warning(old_dir->i_sb,
1373 have claimed all the buffers needed.*/ 1513 "vs-7060: reiserfs_rename: couldn't not cut old name. Fsck later?");
1374 1514
1375 mark_de_visible (new_de.de_deh + new_de.de_entry_num); 1515 old_dir->i_size -= DEH_SIZE + old_de.de_entrylen;
1376 set_ino_in_dir_entry (&new_de, INODE_PKEY (old_inode)); 1516
1377 journal_mark_dirty (&th, old_dir->i_sb, new_de.de_bh); 1517 reiserfs_update_sd(&th, old_dir);
1378 1518 reiserfs_update_sd(&th, new_dir);
1379 mark_de_hidden (old_de.de_deh + old_de.de_entry_num); 1519 reiserfs_update_sd(&th, old_inode);
1380 journal_mark_dirty (&th, old_dir->i_sb, old_de.de_bh); 1520
1381 ctime = CURRENT_TIME_SEC; 1521 if (new_dentry_inode) {
1382 old_dir->i_ctime = old_dir->i_mtime = ctime; 1522 if (savelink == 0)
1383 new_dir->i_ctime = new_dir->i_mtime = ctime; 1523 add_save_link(&th, new_dentry_inode,
1384 /* thanks to Alex Adriaanse <alex_a@caltech.edu> for patch which adds ctime update of 1524 0 /* not truncate */ );
1385 renamed object */ 1525 reiserfs_update_sd(&th, new_dentry_inode);
1386 old_inode->i_ctime = ctime;
1387
1388 if (new_dentry_inode) {
1389 // adjust link number of the victim
1390 if (S_ISDIR(new_dentry_inode->i_mode)) {
1391 new_dentry_inode->i_nlink = 0;
1392 } else {
1393 new_dentry_inode->i_nlink--;
1394 } 1526 }
1395 new_dentry_inode->i_ctime = ctime; 1527
1396 savelink = new_dentry_inode->i_nlink; 1528 retval = journal_end(&th, old_dir->i_sb, jbegin_count);
1397 } 1529 reiserfs_write_unlock(old_dir->i_sb);
1398 1530 return retval;
1399 if (S_ISDIR(old_inode_mode)) {
1400 // adjust ".." of renamed directory
1401 set_ino_in_dir_entry (&dot_dot_de, INODE_PKEY (new_dir));
1402 journal_mark_dirty (&th, new_dir->i_sb, dot_dot_de.de_bh);
1403
1404 if (!new_dentry_inode)
1405 /* there (in new_dir) was no directory, so it got new link
1406 (".." of renamed directory) */
1407 INC_DIR_INODE_NLINK(new_dir);
1408
1409 /* old directory lost one link - ".. " of renamed directory */
1410 DEC_DIR_INODE_NLINK(old_dir);
1411 }
1412
1413 // looks like in 2.3.99pre3 brelse is atomic. so we can use pathrelse
1414 pathrelse (&new_entry_path);
1415 pathrelse (&dot_dot_entry_path);
1416
1417 // FIXME: this reiserfs_cut_from_item's return value may screw up
1418 // anybody, but it will panic if will not be able to find the
1419 // entry. This needs one more clean up
1420 if (reiserfs_cut_from_item (&th, &old_entry_path, &(old_de.de_entry_key), old_dir, NULL, 0) < 0)
1421 reiserfs_warning (old_dir->i_sb, "vs-7060: reiserfs_rename: couldn't not cut old name. Fsck later?");
1422
1423 old_dir->i_size -= DEH_SIZE + old_de.de_entrylen;
1424
1425 reiserfs_update_sd (&th, old_dir);
1426 reiserfs_update_sd (&th, new_dir);
1427 reiserfs_update_sd (&th, old_inode);
1428
1429 if (new_dentry_inode) {
1430 if (savelink == 0)
1431 add_save_link (&th, new_dentry_inode, 0/* not truncate */);
1432 reiserfs_update_sd (&th, new_dentry_inode);
1433 }
1434
1435 retval = journal_end(&th, old_dir->i_sb, jbegin_count) ;
1436 reiserfs_write_unlock(old_dir->i_sb);
1437 return retval;
1438} 1531}
1439 1532
1440/* 1533/*
1441 * directories can handle most operations... 1534 * directories can handle most operations...
1442 */ 1535 */
1443struct inode_operations reiserfs_dir_inode_operations = { 1536struct inode_operations reiserfs_dir_inode_operations = {
1444 //&reiserfs_dir_operations, /* default_file_ops */ 1537 //&reiserfs_dir_operations, /* default_file_ops */
1445 .create = reiserfs_create, 1538 .create = reiserfs_create,
1446 .lookup = reiserfs_lookup, 1539 .lookup = reiserfs_lookup,
1447 .link = reiserfs_link, 1540 .link = reiserfs_link,
1448 .unlink = reiserfs_unlink, 1541 .unlink = reiserfs_unlink,
1449 .symlink = reiserfs_symlink, 1542 .symlink = reiserfs_symlink,
1450 .mkdir = reiserfs_mkdir, 1543 .mkdir = reiserfs_mkdir,
1451 .rmdir = reiserfs_rmdir, 1544 .rmdir = reiserfs_rmdir,
1452 .mknod = reiserfs_mknod, 1545 .mknod = reiserfs_mknod,
1453 .rename = reiserfs_rename, 1546 .rename = reiserfs_rename,
1454 .setattr = reiserfs_setattr, 1547 .setattr = reiserfs_setattr,
1455 .setxattr = reiserfs_setxattr, 1548 .setxattr = reiserfs_setxattr,
1456 .getxattr = reiserfs_getxattr, 1549 .getxattr = reiserfs_getxattr,
1457 .listxattr = reiserfs_listxattr, 1550 .listxattr = reiserfs_listxattr,
1458 .removexattr = reiserfs_removexattr, 1551 .removexattr = reiserfs_removexattr,
1459 .permission = reiserfs_permission, 1552 .permission = reiserfs_permission,
1460}; 1553};
1461 1554
1462/* 1555/*
@@ -1464,28 +1557,27 @@ struct inode_operations reiserfs_dir_inode_operations = {
1464 * stuff added 1557 * stuff added
1465 */ 1558 */
1466struct inode_operations reiserfs_symlink_inode_operations = { 1559struct inode_operations reiserfs_symlink_inode_operations = {
1467 .readlink = generic_readlink, 1560 .readlink = generic_readlink,
1468 .follow_link = page_follow_link_light, 1561 .follow_link = page_follow_link_light,
1469 .put_link = page_put_link, 1562 .put_link = page_put_link,
1470 .setattr = reiserfs_setattr, 1563 .setattr = reiserfs_setattr,
1471 .setxattr = reiserfs_setxattr, 1564 .setxattr = reiserfs_setxattr,
1472 .getxattr = reiserfs_getxattr, 1565 .getxattr = reiserfs_getxattr,
1473 .listxattr = reiserfs_listxattr, 1566 .listxattr = reiserfs_listxattr,
1474 .removexattr = reiserfs_removexattr, 1567 .removexattr = reiserfs_removexattr,
1475 .permission = reiserfs_permission, 1568 .permission = reiserfs_permission,
1476 1569
1477}; 1570};
1478 1571
1479
1480/* 1572/*
1481 * special file operations.. just xattr/acl stuff 1573 * special file operations.. just xattr/acl stuff
1482 */ 1574 */
1483struct inode_operations reiserfs_special_inode_operations = { 1575struct inode_operations reiserfs_special_inode_operations = {
1484 .setattr = reiserfs_setattr, 1576 .setattr = reiserfs_setattr,
1485 .setxattr = reiserfs_setxattr, 1577 .setxattr = reiserfs_setxattr,
1486 .getxattr = reiserfs_getxattr, 1578 .getxattr = reiserfs_getxattr,
1487 .listxattr = reiserfs_listxattr, 1579 .listxattr = reiserfs_listxattr,
1488 .removexattr = reiserfs_removexattr, 1580 .removexattr = reiserfs_removexattr,
1489 .permission = reiserfs_permission, 1581 .permission = reiserfs_permission,
1490 1582
1491}; 1583};
diff --git a/fs/reiserfs/objectid.c b/fs/reiserfs/objectid.c
index bfe8e25ef293..f62590aa9c95 100644
--- a/fs/reiserfs/objectid.c
+++ b/fs/reiserfs/objectid.c
@@ -14,24 +14,24 @@
14 (__le32 *)((struct reiserfs_super_block_v1 *)(rs) + 1) :\ 14 (__le32 *)((struct reiserfs_super_block_v1 *)(rs) + 1) :\
15 (__le32 *)((rs) + 1)) 15 (__le32 *)((rs) + 1))
16 16
17
18#ifdef CONFIG_REISERFS_CHECK 17#ifdef CONFIG_REISERFS_CHECK
19 18
20static void check_objectid_map (struct super_block * s, __le32 * map) 19static void check_objectid_map(struct super_block *s, __le32 * map)
21{ 20{
22 if (le32_to_cpu (map[0]) != 1) 21 if (le32_to_cpu(map[0]) != 1)
23 reiserfs_panic (s, "vs-15010: check_objectid_map: map corrupted: %lx", 22 reiserfs_panic(s,
24 ( long unsigned int ) le32_to_cpu (map[0])); 23 "vs-15010: check_objectid_map: map corrupted: %lx",
24 (long unsigned int)le32_to_cpu(map[0]));
25 25
26 // FIXME: add something else here 26 // FIXME: add something else here
27} 27}
28 28
29#else 29#else
30static void check_objectid_map (struct super_block * s, __le32 * map) 30static void check_objectid_map(struct super_block *s, __le32 * map)
31{;} 31{;
32}
32#endif 33#endif
33 34
34
35/* When we allocate objectids we allocate the first unused objectid. 35/* When we allocate objectids we allocate the first unused objectid.
36 Each sequence of objectids in use (the odd sequences) is followed 36 Each sequence of objectids in use (the odd sequences) is followed
37 by a sequence of objectids not in use (the even sequences). We 37 by a sequence of objectids not in use (the even sequences). We
@@ -46,161 +46,162 @@ static void check_objectid_map (struct super_block * s, __le32 * map)
46 interesting optimizations of layout could result from complicating 46 interesting optimizations of layout could result from complicating
47 objectid assignment, but we have deferred making them for now. */ 47 objectid assignment, but we have deferred making them for now. */
48 48
49
50/* get unique object identifier */ 49/* get unique object identifier */
51__u32 reiserfs_get_unused_objectid (struct reiserfs_transaction_handle *th) 50__u32 reiserfs_get_unused_objectid(struct reiserfs_transaction_handle *th)
52{ 51{
53 struct super_block * s = th->t_super; 52 struct super_block *s = th->t_super;
54 struct reiserfs_super_block * rs = SB_DISK_SUPER_BLOCK (s); 53 struct reiserfs_super_block *rs = SB_DISK_SUPER_BLOCK(s);
55 __le32 * map = objectid_map (s, rs); 54 __le32 *map = objectid_map(s, rs);
56 __u32 unused_objectid; 55 __u32 unused_objectid;
57 56
58 BUG_ON (!th->t_trans_id); 57 BUG_ON(!th->t_trans_id);
58
59 check_objectid_map(s, map);
60
61 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);
62 /* comment needed -Hans */
63 unused_objectid = le32_to_cpu(map[1]);
64 if (unused_objectid == U32_MAX) {
65 reiserfs_warning(s, "%s: no more object ids", __FUNCTION__);
66 reiserfs_restore_prepared_buffer(s, SB_BUFFER_WITH_SB(s));
67 return 0;
68 }
59 69
60 check_objectid_map (s, map); 70 /* This incrementation allocates the first unused objectid. That
71 is to say, the first entry on the objectid map is the first
72 unused objectid, and by incrementing it we use it. See below
73 where we check to see if we eliminated a sequence of unused
74 objectids.... */
75 map[1] = cpu_to_le32(unused_objectid + 1);
76
77 /* Now we check to see if we eliminated the last remaining member of
78 the first even sequence (and can eliminate the sequence by
79 eliminating its last objectid from oids), and can collapse the
80 first two odd sequences into one sequence. If so, then the net
81 result is to eliminate a pair of objectids from oids. We do this
82 by shifting the entire map to the left. */
83 if (sb_oid_cursize(rs) > 2 && map[1] == map[2]) {
84 memmove(map + 1, map + 3,
85 (sb_oid_cursize(rs) - 3) * sizeof(__u32));
86 set_sb_oid_cursize(rs, sb_oid_cursize(rs) - 2);
87 }
61 88
62 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1) ; 89 journal_mark_dirty(th, s, SB_BUFFER_WITH_SB(s));
63 /* comment needed -Hans */ 90 return unused_objectid;
64 unused_objectid = le32_to_cpu (map[1]);
65 if (unused_objectid == U32_MAX) {
66 reiserfs_warning (s, "%s: no more object ids", __FUNCTION__);
67 reiserfs_restore_prepared_buffer(s, SB_BUFFER_WITH_SB(s)) ;
68 return 0;
69 }
70
71 /* This incrementation allocates the first unused objectid. That
72 is to say, the first entry on the objectid map is the first
73 unused objectid, and by incrementing it we use it. See below
74 where we check to see if we eliminated a sequence of unused
75 objectids.... */
76 map[1] = cpu_to_le32 (unused_objectid + 1);
77
78 /* Now we check to see if we eliminated the last remaining member of
79 the first even sequence (and can eliminate the sequence by
80 eliminating its last objectid from oids), and can collapse the
81 first two odd sequences into one sequence. If so, then the net
82 result is to eliminate a pair of objectids from oids. We do this
83 by shifting the entire map to the left. */
84 if (sb_oid_cursize(rs) > 2 && map[1] == map[2]) {
85 memmove (map + 1, map + 3, (sb_oid_cursize(rs) - 3) * sizeof(__u32));
86 set_sb_oid_cursize( rs, sb_oid_cursize(rs) - 2 );
87 }
88
89 journal_mark_dirty(th, s, SB_BUFFER_WITH_SB (s));
90 return unused_objectid;
91} 91}
92 92
93
94/* makes object identifier unused */ 93/* makes object identifier unused */
95void reiserfs_release_objectid (struct reiserfs_transaction_handle *th, 94void reiserfs_release_objectid(struct reiserfs_transaction_handle *th,
96 __u32 objectid_to_release) 95 __u32 objectid_to_release)
97{ 96{
98 struct super_block * s = th->t_super; 97 struct super_block *s = th->t_super;
99 struct reiserfs_super_block * rs = SB_DISK_SUPER_BLOCK (s); 98 struct reiserfs_super_block *rs = SB_DISK_SUPER_BLOCK(s);
100 __le32 * map = objectid_map (s, rs); 99 __le32 *map = objectid_map(s, rs);
101 int i = 0; 100 int i = 0;
102 101
103 BUG_ON (!th->t_trans_id); 102 BUG_ON(!th->t_trans_id);
104 //return; 103 //return;
105 check_objectid_map (s, map); 104 check_objectid_map(s, map);
106 105
107 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1) ; 106 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);
108 journal_mark_dirty(th, s, SB_BUFFER_WITH_SB (s)); 107 journal_mark_dirty(th, s, SB_BUFFER_WITH_SB(s));
109 108
110 /* start at the beginning of the objectid map (i = 0) and go to 109 /* start at the beginning of the objectid map (i = 0) and go to
111 the end of it (i = disk_sb->s_oid_cursize). Linear search is 110 the end of it (i = disk_sb->s_oid_cursize). Linear search is
112 what we use, though it is possible that binary search would be 111 what we use, though it is possible that binary search would be
113 more efficient after performing lots of deletions (which is 112 more efficient after performing lots of deletions (which is
114 when oids is large.) We only check even i's. */ 113 when oids is large.) We only check even i's. */
115 while (i < sb_oid_cursize(rs)) { 114 while (i < sb_oid_cursize(rs)) {
116 if (objectid_to_release == le32_to_cpu (map[i])) { 115 if (objectid_to_release == le32_to_cpu(map[i])) {
117 /* This incrementation unallocates the objectid. */ 116 /* This incrementation unallocates the objectid. */
118 //map[i]++; 117 //map[i]++;
119 map[i] = cpu_to_le32 (le32_to_cpu (map[i]) + 1); 118 map[i] = cpu_to_le32(le32_to_cpu(map[i]) + 1);
120 119
121 /* Did we unallocate the last member of an odd sequence, and can shrink oids? */ 120 /* Did we unallocate the last member of an odd sequence, and can shrink oids? */
122 if (map[i] == map[i+1]) { 121 if (map[i] == map[i + 1]) {
123 /* shrink objectid map */ 122 /* shrink objectid map */
124 memmove (map + i, map + i + 2, 123 memmove(map + i, map + i + 2,
125 (sb_oid_cursize(rs) - i - 2) * sizeof (__u32)); 124 (sb_oid_cursize(rs) - i -
126 //disk_sb->s_oid_cursize -= 2; 125 2) * sizeof(__u32));
127 set_sb_oid_cursize( rs, sb_oid_cursize(rs) - 2 ); 126 //disk_sb->s_oid_cursize -= 2;
128 127 set_sb_oid_cursize(rs, sb_oid_cursize(rs) - 2);
129 RFALSE( sb_oid_cursize(rs) < 2 || 128
130 sb_oid_cursize(rs) > sb_oid_maxsize(rs), 129 RFALSE(sb_oid_cursize(rs) < 2 ||
131 "vs-15005: objectid map corrupted cur_size == %d (max == %d)", 130 sb_oid_cursize(rs) > sb_oid_maxsize(rs),
132 sb_oid_cursize(rs), sb_oid_maxsize(rs)); 131 "vs-15005: objectid map corrupted cur_size == %d (max == %d)",
133 } 132 sb_oid_cursize(rs), sb_oid_maxsize(rs));
134 return; 133 }
134 return;
135 }
136
137 if (objectid_to_release > le32_to_cpu(map[i]) &&
138 objectid_to_release < le32_to_cpu(map[i + 1])) {
139 /* size of objectid map is not changed */
140 if (objectid_to_release + 1 == le32_to_cpu(map[i + 1])) {
141 //objectid_map[i+1]--;
142 map[i + 1] =
143 cpu_to_le32(le32_to_cpu(map[i + 1]) - 1);
144 return;
145 }
146
147 /* JDM comparing two little-endian values for equality -- safe */
148 if (sb_oid_cursize(rs) == sb_oid_maxsize(rs)) {
149 /* objectid map must be expanded, but there is no space */
150 PROC_INFO_INC(s, leaked_oid);
151 return;
152 }
153
154 /* expand the objectid map */
155 memmove(map + i + 3, map + i + 1,
156 (sb_oid_cursize(rs) - i - 1) * sizeof(__u32));
157 map[i + 1] = cpu_to_le32(objectid_to_release);
158 map[i + 2] = cpu_to_le32(objectid_to_release + 1);
159 set_sb_oid_cursize(rs, sb_oid_cursize(rs) + 2);
160 return;
161 }
162 i += 2;
135 } 163 }
136 164
137 if (objectid_to_release > le32_to_cpu (map[i]) && 165 reiserfs_warning(s,
138 objectid_to_release < le32_to_cpu (map[i + 1])) { 166 "vs-15011: reiserfs_release_objectid: tried to free free object id (%lu)",
139 /* size of objectid map is not changed */ 167 (long unsigned)objectid_to_release);
140 if (objectid_to_release + 1 == le32_to_cpu (map[i + 1])) { 168}
141 //objectid_map[i+1]--;
142 map[i + 1] = cpu_to_le32 (le32_to_cpu (map[i + 1]) - 1);
143 return;
144 }
145
146 /* JDM comparing two little-endian values for equality -- safe */
147 if (sb_oid_cursize(rs) == sb_oid_maxsize(rs)) {
148 /* objectid map must be expanded, but there is no space */
149 PROC_INFO_INC( s, leaked_oid );
150 return;
151 }
152 169
153 /* expand the objectid map*/ 170int reiserfs_convert_objectid_map_v1(struct super_block *s)
154 memmove (map + i + 3, map + i + 1, 171{
155 (sb_oid_cursize(rs) - i - 1) * sizeof(__u32)); 172 struct reiserfs_super_block *disk_sb = SB_DISK_SUPER_BLOCK(s);
156 map[i + 1] = cpu_to_le32 (objectid_to_release); 173 int cur_size = sb_oid_cursize(disk_sb);
157 map[i + 2] = cpu_to_le32 (objectid_to_release + 1); 174 int new_size = (s->s_blocksize - SB_SIZE) / sizeof(__u32) / 2 * 2;
158 set_sb_oid_cursize( rs, sb_oid_cursize(rs) + 2 ); 175 int old_max = sb_oid_maxsize(disk_sb);
159 return; 176 struct reiserfs_super_block_v1 *disk_sb_v1;
177 __le32 *objectid_map, *new_objectid_map;
178 int i;
179
180 disk_sb_v1 =
181 (struct reiserfs_super_block_v1 *)(SB_BUFFER_WITH_SB(s)->b_data);
182 objectid_map = (__le32 *) (disk_sb_v1 + 1);
183 new_objectid_map = (__le32 *) (disk_sb + 1);
184
185 if (cur_size > new_size) {
186 /* mark everyone used that was listed as free at the end of the objectid
187 ** map
188 */
189 objectid_map[new_size - 1] = objectid_map[cur_size - 1];
190 set_sb_oid_cursize(disk_sb, new_size);
191 }
192 /* move the smaller objectid map past the end of the new super */
193 for (i = new_size - 1; i >= 0; i--) {
194 objectid_map[i + (old_max - new_size)] = objectid_map[i];
160 } 195 }
161 i += 2;
162 }
163 196
164 reiserfs_warning (s, "vs-15011: reiserfs_release_objectid: tried to free free object id (%lu)", 197 /* set the max size so we don't overflow later */
165 ( long unsigned ) objectid_to_release); 198 set_sb_oid_maxsize(disk_sb, new_size);
166}
167 199
200 /* Zero out label and generate random UUID */
201 memset(disk_sb->s_label, 0, sizeof(disk_sb->s_label));
202 generate_random_uuid(disk_sb->s_uuid);
168 203
169int reiserfs_convert_objectid_map_v1(struct super_block *s) { 204 /* finally, zero out the unused chunk of the new super */
170 struct reiserfs_super_block *disk_sb = SB_DISK_SUPER_BLOCK (s); 205 memset(disk_sb->s_unused, 0, sizeof(disk_sb->s_unused));
171 int cur_size = sb_oid_cursize(disk_sb); 206 return 0;
172 int new_size = (s->s_blocksize - SB_SIZE) / sizeof(__u32) / 2 * 2 ;
173 int old_max = sb_oid_maxsize(disk_sb);
174 struct reiserfs_super_block_v1 *disk_sb_v1 ;
175 __le32 *objectid_map, *new_objectid_map ;
176 int i ;
177
178 disk_sb_v1=(struct reiserfs_super_block_v1 *)(SB_BUFFER_WITH_SB(s)->b_data);
179 objectid_map = (__le32 *)(disk_sb_v1 + 1) ;
180 new_objectid_map = (__le32 *)(disk_sb + 1) ;
181
182 if (cur_size > new_size) {
183 /* mark everyone used that was listed as free at the end of the objectid
184 ** map
185 */
186 objectid_map[new_size - 1] = objectid_map[cur_size - 1] ;
187 set_sb_oid_cursize(disk_sb,new_size) ;
188 }
189 /* move the smaller objectid map past the end of the new super */
190 for (i = new_size - 1 ; i >= 0 ; i--) {
191 objectid_map[i + (old_max - new_size)] = objectid_map[i] ;
192 }
193
194
195 /* set the max size so we don't overflow later */
196 set_sb_oid_maxsize(disk_sb,new_size) ;
197
198 /* Zero out label and generate random UUID */
199 memset(disk_sb->s_label, 0, sizeof(disk_sb->s_label)) ;
200 generate_random_uuid(disk_sb->s_uuid);
201
202 /* finally, zero out the unused chunk of the new super */
203 memset(disk_sb->s_unused, 0, sizeof(disk_sb->s_unused)) ;
204 return 0 ;
205} 207}
206
diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c
index 16fdca1d4bd7..d55e164bd5c2 100644
--- a/fs/reiserfs/prints.c
+++ b/fs/reiserfs/prints.c
@@ -15,168 +15,166 @@ static char error_buf[1024];
15static char fmt_buf[1024]; 15static char fmt_buf[1024];
16static char off_buf[80]; 16static char off_buf[80];
17 17
18 18static char *reiserfs_cpu_offset(struct cpu_key *key)
19static char * reiserfs_cpu_offset (struct cpu_key * key)
20{ 19{
21 if (cpu_key_k_type(key) == TYPE_DIRENTRY) 20 if (cpu_key_k_type(key) == TYPE_DIRENTRY)
22 sprintf (off_buf, "%Lu(%Lu)", 21 sprintf(off_buf, "%Lu(%Lu)",
23 (unsigned long long)GET_HASH_VALUE (cpu_key_k_offset (key)), 22 (unsigned long long)
24 (unsigned long long)GET_GENERATION_NUMBER (cpu_key_k_offset (key))); 23 GET_HASH_VALUE(cpu_key_k_offset(key)),
25 else 24 (unsigned long long)
26 sprintf (off_buf, "0x%Lx", (unsigned long long)cpu_key_k_offset (key)); 25 GET_GENERATION_NUMBER(cpu_key_k_offset(key)));
27 return off_buf; 26 else
27 sprintf(off_buf, "0x%Lx",
28 (unsigned long long)cpu_key_k_offset(key));
29 return off_buf;
28} 30}
29 31
30 32static char *le_offset(struct reiserfs_key *key)
31static char * le_offset (struct reiserfs_key * key)
32{ 33{
33 int version; 34 int version;
34 35
35 version = le_key_version (key); 36 version = le_key_version(key);
36 if (le_key_k_type (version, key) == TYPE_DIRENTRY) 37 if (le_key_k_type(version, key) == TYPE_DIRENTRY)
37 sprintf (off_buf, "%Lu(%Lu)", 38 sprintf(off_buf, "%Lu(%Lu)",
38 (unsigned long long)GET_HASH_VALUE (le_key_k_offset (version, key)), 39 (unsigned long long)
39 (unsigned long long)GET_GENERATION_NUMBER (le_key_k_offset (version, key))); 40 GET_HASH_VALUE(le_key_k_offset(version, key)),
40 else 41 (unsigned long long)
41 sprintf (off_buf, "0x%Lx", (unsigned long long)le_key_k_offset (version, key)); 42 GET_GENERATION_NUMBER(le_key_k_offset(version, key)));
42 return off_buf; 43 else
44 sprintf(off_buf, "0x%Lx",
45 (unsigned long long)le_key_k_offset(version, key));
46 return off_buf;
43} 47}
44 48
45 49static char *cpu_type(struct cpu_key *key)
46static char * cpu_type (struct cpu_key * key)
47{ 50{
48 if (cpu_key_k_type (key) == TYPE_STAT_DATA) 51 if (cpu_key_k_type(key) == TYPE_STAT_DATA)
49 return "SD"; 52 return "SD";
50 if (cpu_key_k_type (key) == TYPE_DIRENTRY) 53 if (cpu_key_k_type(key) == TYPE_DIRENTRY)
51 return "DIR"; 54 return "DIR";
52 if (cpu_key_k_type (key) == TYPE_DIRECT) 55 if (cpu_key_k_type(key) == TYPE_DIRECT)
53 return "DIRECT"; 56 return "DIRECT";
54 if (cpu_key_k_type (key) == TYPE_INDIRECT) 57 if (cpu_key_k_type(key) == TYPE_INDIRECT)
55 return "IND"; 58 return "IND";
56 return "UNKNOWN"; 59 return "UNKNOWN";
57} 60}
58 61
59 62static char *le_type(struct reiserfs_key *key)
60static char * le_type (struct reiserfs_key * key)
61{ 63{
62 int version; 64 int version;
63
64 version = le_key_version (key);
65 65
66 if (le_key_k_type (version, key) == TYPE_STAT_DATA) 66 version = le_key_version(key);
67 return "SD";
68 if (le_key_k_type (version, key) == TYPE_DIRENTRY)
69 return "DIR";
70 if (le_key_k_type (version, key) == TYPE_DIRECT)
71 return "DIRECT";
72 if (le_key_k_type (version, key) == TYPE_INDIRECT)
73 return "IND";
74 return "UNKNOWN";
75}
76 67
68 if (le_key_k_type(version, key) == TYPE_STAT_DATA)
69 return "SD";
70 if (le_key_k_type(version, key) == TYPE_DIRENTRY)
71 return "DIR";
72 if (le_key_k_type(version, key) == TYPE_DIRECT)
73 return "DIRECT";
74 if (le_key_k_type(version, key) == TYPE_INDIRECT)
75 return "IND";
76 return "UNKNOWN";
77}
77 78
78/* %k */ 79/* %k */
79static void sprintf_le_key (char * buf, struct reiserfs_key * key) 80static void sprintf_le_key(char *buf, struct reiserfs_key *key)
80{ 81{
81 if (key) 82 if (key)
82 sprintf (buf, "[%d %d %s %s]", le32_to_cpu (key->k_dir_id), 83 sprintf(buf, "[%d %d %s %s]", le32_to_cpu(key->k_dir_id),
83 le32_to_cpu (key->k_objectid), le_offset (key), le_type (key)); 84 le32_to_cpu(key->k_objectid), le_offset(key),
84 else 85 le_type(key));
85 sprintf (buf, "[NULL]"); 86 else
87 sprintf(buf, "[NULL]");
86} 88}
87 89
88
89/* %K */ 90/* %K */
90static void sprintf_cpu_key (char * buf, struct cpu_key * key) 91static void sprintf_cpu_key(char *buf, struct cpu_key *key)
91{ 92{
92 if (key) 93 if (key)
93 sprintf (buf, "[%d %d %s %s]", key->on_disk_key.k_dir_id, 94 sprintf(buf, "[%d %d %s %s]", key->on_disk_key.k_dir_id,
94 key->on_disk_key.k_objectid, reiserfs_cpu_offset (key), 95 key->on_disk_key.k_objectid, reiserfs_cpu_offset(key),
95 cpu_type (key)); 96 cpu_type(key));
96 else 97 else
97 sprintf (buf, "[NULL]"); 98 sprintf(buf, "[NULL]");
98} 99}
99 100
100static void sprintf_de_head( char *buf, struct reiserfs_de_head *deh ) 101static void sprintf_de_head(char *buf, struct reiserfs_de_head *deh)
101{ 102{
102 if( deh ) 103 if (deh)
103 sprintf( buf, "[offset=%d dir_id=%d objectid=%d location=%d state=%04x]", deh_offset(deh), deh_dir_id(deh), 104 sprintf(buf,
104 deh_objectid(deh), deh_location(deh), deh_state(deh) ); 105 "[offset=%d dir_id=%d objectid=%d location=%d state=%04x]",
105 else 106 deh_offset(deh), deh_dir_id(deh), deh_objectid(deh),
106 sprintf( buf, "[NULL]" ); 107 deh_location(deh), deh_state(deh));
108 else
109 sprintf(buf, "[NULL]");
107 110
108} 111}
109 112
110static void sprintf_item_head (char * buf, struct item_head * ih) 113static void sprintf_item_head(char *buf, struct item_head *ih)
111{ 114{
112 if (ih) { 115 if (ih) {
113 strcpy (buf, (ih_version (ih) == KEY_FORMAT_3_6) ? "*3.6* " : "*3.5*"); 116 strcpy(buf,
114 sprintf_le_key (buf + strlen (buf), &(ih->ih_key)); 117 (ih_version(ih) == KEY_FORMAT_3_6) ? "*3.6* " : "*3.5*");
115 sprintf (buf + strlen (buf), ", item_len %d, item_location %d, " 118 sprintf_le_key(buf + strlen(buf), &(ih->ih_key));
116 "free_space(entry_count) %d", 119 sprintf(buf + strlen(buf), ", item_len %d, item_location %d, "
117 ih_item_len(ih), ih_location(ih), ih_free_space (ih)); 120 "free_space(entry_count) %d",
118 } else 121 ih_item_len(ih), ih_location(ih), ih_free_space(ih));
119 sprintf (buf, "[NULL]"); 122 } else
123 sprintf(buf, "[NULL]");
120} 124}
121 125
122 126static void sprintf_direntry(char *buf, struct reiserfs_dir_entry *de)
123static void sprintf_direntry (char * buf, struct reiserfs_dir_entry * de)
124{ 127{
125 char name[20]; 128 char name[20];
126 129
127 memcpy (name, de->de_name, de->de_namelen > 19 ? 19 : de->de_namelen); 130 memcpy(name, de->de_name, de->de_namelen > 19 ? 19 : de->de_namelen);
128 name [de->de_namelen > 19 ? 19 : de->de_namelen] = 0; 131 name[de->de_namelen > 19 ? 19 : de->de_namelen] = 0;
129 sprintf (buf, "\"%s\"==>[%d %d]", name, de->de_dir_id, de->de_objectid); 132 sprintf(buf, "\"%s\"==>[%d %d]", name, de->de_dir_id, de->de_objectid);
130} 133}
131 134
132 135static void sprintf_block_head(char *buf, struct buffer_head *bh)
133static void sprintf_block_head (char * buf, struct buffer_head * bh)
134{ 136{
135 sprintf (buf, "level=%d, nr_items=%d, free_space=%d rdkey ", 137 sprintf(buf, "level=%d, nr_items=%d, free_space=%d rdkey ",
136 B_LEVEL (bh), B_NR_ITEMS (bh), B_FREE_SPACE (bh)); 138 B_LEVEL(bh), B_NR_ITEMS(bh), B_FREE_SPACE(bh));
137} 139}
138 140
139 141static void sprintf_buffer_head(char *buf, struct buffer_head *bh)
140static void sprintf_buffer_head (char * buf, struct buffer_head * bh)
141{ 142{
142 char b[BDEVNAME_SIZE]; 143 char b[BDEVNAME_SIZE];
143 144
144 sprintf (buf, "dev %s, size %d, blocknr %llu, count %d, state 0x%lx, page %p, (%s, %s, %s)", 145 sprintf(buf,
145 bdevname (bh->b_bdev, b), bh->b_size, 146 "dev %s, size %d, blocknr %llu, count %d, state 0x%lx, page %p, (%s, %s, %s)",
146 (unsigned long long)bh->b_blocknr, 147 bdevname(bh->b_bdev, b), bh->b_size,
147 atomic_read (&(bh->b_count)), 148 (unsigned long long)bh->b_blocknr, atomic_read(&(bh->b_count)),
148 bh->b_state, bh->b_page, 149 bh->b_state, bh->b_page,
149 buffer_uptodate (bh) ? "UPTODATE" : "!UPTODATE", 150 buffer_uptodate(bh) ? "UPTODATE" : "!UPTODATE",
150 buffer_dirty (bh) ? "DIRTY" : "CLEAN", 151 buffer_dirty(bh) ? "DIRTY" : "CLEAN",
151 buffer_locked (bh) ? "LOCKED" : "UNLOCKED"); 152 buffer_locked(bh) ? "LOCKED" : "UNLOCKED");
152} 153}
153 154
154 155static void sprintf_disk_child(char *buf, struct disk_child *dc)
155static void sprintf_disk_child (char * buf, struct disk_child * dc)
156{ 156{
157 sprintf (buf, "[dc_number=%d, dc_size=%u]", dc_block_number(dc), dc_size(dc)); 157 sprintf(buf, "[dc_number=%d, dc_size=%u]", dc_block_number(dc),
158 dc_size(dc));
158} 159}
159 160
160 161static char *is_there_reiserfs_struct(char *fmt, int *what, int *skip)
161static char * is_there_reiserfs_struct (char * fmt, int * what, int * skip)
162{ 162{
163 char * k = fmt; 163 char *k = fmt;
164 164
165 *skip = 0; 165 *skip = 0;
166
167 while ((k = strchr (k, '%')) != NULL)
168 {
169 if (k[1] == 'k' || k[1] == 'K' || k[1] == 'h' || k[1] == 't' ||
170 k[1] == 'z' || k[1] == 'b' || k[1] == 'y' || k[1] == 'a' ) {
171 *what = k[1];
172 break;
173 }
174 (*skip) ++;
175 k ++;
176 }
177 return k;
178}
179 166
167 while ((k = strchr(k, '%')) != NULL) {
168 if (k[1] == 'k' || k[1] == 'K' || k[1] == 'h' || k[1] == 't' ||
169 k[1] == 'z' || k[1] == 'b' || k[1] == 'y' || k[1] == 'a') {
170 *what = k[1];
171 break;
172 }
173 (*skip)++;
174 k++;
175 }
176 return k;
177}
180 178
181/* debugging reiserfs we used to print out a lot of different 179/* debugging reiserfs we used to print out a lot of different
182 variables, like keys, item headers, buffer heads etc. Values of 180 variables, like keys, item headers, buffer heads etc. Values of
@@ -191,61 +189,64 @@ static char * is_there_reiserfs_struct (char * fmt, int * what, int * skip)
191 key->k_offset, key->k_uniqueness); 189 key->k_offset, key->k_uniqueness);
192*/ 190*/
193 191
194 192static void prepare_error_buf(const char *fmt, va_list args)
195static void 193{
196prepare_error_buf( const char *fmt, va_list args ) 194 char *fmt1 = fmt_buf;
197{ 195 char *k;
198 char * fmt1 = fmt_buf; 196 char *p = error_buf;
199 char * k; 197 int i, j, what, skip;
200 char * p = error_buf; 198
201 int i, j, what, skip; 199 strcpy(fmt1, fmt);
202 200
203 strcpy (fmt1, fmt); 201 while ((k = is_there_reiserfs_struct(fmt1, &what, &skip)) != NULL) {
204 202 *k = 0;
205 while( (k = is_there_reiserfs_struct( fmt1, &what, &skip )) != NULL ) 203
206 { 204 p += vsprintf(p, fmt1, args);
207 *k = 0; 205
208 206 for (i = 0; i < skip; i++)
209 p += vsprintf (p, fmt1, args); 207 j = va_arg(args, int);
210 208
211 for (i = 0; i < skip; i ++) 209 switch (what) {
212 j = va_arg (args, int); 210 case 'k':
213 211 sprintf_le_key(p, va_arg(args, struct reiserfs_key *));
214 switch (what) { 212 break;
215 case 'k': 213 case 'K':
216 sprintf_le_key (p, va_arg(args, struct reiserfs_key *)); 214 sprintf_cpu_key(p, va_arg(args, struct cpu_key *));
217 break; 215 break;
218 case 'K': 216 case 'h':
219 sprintf_cpu_key (p, va_arg(args, struct cpu_key *)); 217 sprintf_item_head(p, va_arg(args, struct item_head *));
220 break; 218 break;
221 case 'h': 219 case 't':
222 sprintf_item_head (p, va_arg(args, struct item_head *)); 220 sprintf_direntry(p,
223 break; 221 va_arg(args,
224 case 't': 222 struct reiserfs_dir_entry *));
225 sprintf_direntry (p, va_arg(args, struct reiserfs_dir_entry *)); 223 break;
226 break; 224 case 'y':
227 case 'y': 225 sprintf_disk_child(p,
228 sprintf_disk_child (p, va_arg(args, struct disk_child *)); 226 va_arg(args, struct disk_child *));
229 break; 227 break;
230 case 'z': 228 case 'z':
231 sprintf_block_head (p, va_arg(args, struct buffer_head *)); 229 sprintf_block_head(p,
232 break; 230 va_arg(args, struct buffer_head *));
233 case 'b': 231 break;
234 sprintf_buffer_head (p, va_arg(args, struct buffer_head *)); 232 case 'b':
235 break; 233 sprintf_buffer_head(p,
236 case 'a': 234 va_arg(args, struct buffer_head *));
237 sprintf_de_head (p, va_arg(args, struct reiserfs_de_head *)); 235 break;
238 break; 236 case 'a':
239 } 237 sprintf_de_head(p,
240 238 va_arg(args,
241 p += strlen (p); 239 struct reiserfs_de_head *));
242 fmt1 = k + 2; 240 break;
243 } 241 }
244 vsprintf (p, fmt1, args); 242
243 p += strlen(p);
244 fmt1 = k + 2;
245 }
246 vsprintf(p, fmt1, args);
245 247
246} 248}
247 249
248
249/* in addition to usual conversion specifiers this accepts reiserfs 250/* in addition to usual conversion specifiers this accepts reiserfs
250 specific conversion specifiers: 251 specific conversion specifiers:
251 %k to print little endian key, 252 %k to print little endian key,
@@ -264,43 +265,43 @@ prepare_error_buf( const char *fmt, va_list args )
264 va_end( args );\ 265 va_end( args );\
265} 266}
266 267
267void reiserfs_warning (struct super_block *sb, const char * fmt, ...) 268void reiserfs_warning(struct super_block *sb, const char *fmt, ...)
268{ 269{
269 do_reiserfs_warning(fmt); 270 do_reiserfs_warning(fmt);
270 if (sb) 271 if (sb)
271 printk (KERN_WARNING "ReiserFS: %s: warning: %s\n", 272 printk(KERN_WARNING "ReiserFS: %s: warning: %s\n",
272 reiserfs_bdevname (sb), error_buf); 273 reiserfs_bdevname(sb), error_buf);
273 else 274 else
274 printk (KERN_WARNING "ReiserFS: warning: %s\n", error_buf); 275 printk(KERN_WARNING "ReiserFS: warning: %s\n", error_buf);
275} 276}
276 277
277/* No newline.. reiserfs_info calls can be followed by printk's */ 278/* No newline.. reiserfs_info calls can be followed by printk's */
278void reiserfs_info (struct super_block *sb, const char * fmt, ...) 279void reiserfs_info(struct super_block *sb, const char *fmt, ...)
279{ 280{
280 do_reiserfs_warning(fmt); 281 do_reiserfs_warning(fmt);
281 if (sb) 282 if (sb)
282 printk (KERN_NOTICE "ReiserFS: %s: %s", 283 printk(KERN_NOTICE "ReiserFS: %s: %s",
283 reiserfs_bdevname (sb), error_buf); 284 reiserfs_bdevname(sb), error_buf);
284 else 285 else
285 printk (KERN_NOTICE "ReiserFS: %s", error_buf); 286 printk(KERN_NOTICE "ReiserFS: %s", error_buf);
286} 287}
287 288
288/* No newline.. reiserfs_printk calls can be followed by printk's */ 289/* No newline.. reiserfs_printk calls can be followed by printk's */
289static void reiserfs_printk (const char * fmt, ...) 290static void reiserfs_printk(const char *fmt, ...)
290{ 291{
291 do_reiserfs_warning(fmt); 292 do_reiserfs_warning(fmt);
292 printk (error_buf); 293 printk(error_buf);
293} 294}
294 295
295void reiserfs_debug (struct super_block *s, int level, const char * fmt, ...) 296void reiserfs_debug(struct super_block *s, int level, const char *fmt, ...)
296{ 297{
297#ifdef CONFIG_REISERFS_CHECK 298#ifdef CONFIG_REISERFS_CHECK
298 do_reiserfs_warning(fmt); 299 do_reiserfs_warning(fmt);
299 if (s) 300 if (s)
300 printk (KERN_DEBUG "ReiserFS: %s: %s\n", 301 printk(KERN_DEBUG "ReiserFS: %s: %s\n",
301 reiserfs_bdevname (s), error_buf); 302 reiserfs_bdevname(s), error_buf);
302 else 303 else
303 printk (KERN_DEBUG "ReiserFS: %s\n", error_buf); 304 printk(KERN_DEBUG "ReiserFS: %s\n", error_buf);
304#endif 305#endif
305} 306}
306 307
@@ -349,379 +350,403 @@ void reiserfs_debug (struct super_block *s, int level, const char * fmt, ...)
349 350
350 . */ 351 . */
351 352
352
353#ifdef CONFIG_REISERFS_CHECK 353#ifdef CONFIG_REISERFS_CHECK
354extern struct tree_balance * cur_tb; 354extern struct tree_balance *cur_tb;
355#endif 355#endif
356 356
357void reiserfs_panic (struct super_block * sb, const char * fmt, ...) 357void reiserfs_panic(struct super_block *sb, const char *fmt, ...)
358{ 358{
359 do_reiserfs_warning(fmt); 359 do_reiserfs_warning(fmt);
360 printk (KERN_EMERG "REISERFS: panic (device %s): %s\n", 360 printk(KERN_EMERG "REISERFS: panic (device %s): %s\n",
361 reiserfs_bdevname (sb), error_buf); 361 reiserfs_bdevname(sb), error_buf);
362 BUG (); 362 BUG();
363 363
364 /* this is not actually called, but makes reiserfs_panic() "noreturn" */ 364 /* this is not actually called, but makes reiserfs_panic() "noreturn" */
365 panic ("REISERFS: panic (device %s): %s\n", 365 panic("REISERFS: panic (device %s): %s\n",
366 reiserfs_bdevname (sb), error_buf); 366 reiserfs_bdevname(sb), error_buf);
367} 367}
368 368
369void 369void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...)
370reiserfs_abort (struct super_block *sb, int errno, const char *fmt, ...)
371{ 370{
372 do_reiserfs_warning (fmt); 371 do_reiserfs_warning(fmt);
373 372
374 if (reiserfs_error_panic (sb)) { 373 if (reiserfs_error_panic(sb)) {
375 panic (KERN_CRIT "REISERFS: panic (device %s): %s\n", 374 panic(KERN_CRIT "REISERFS: panic (device %s): %s\n",
376 reiserfs_bdevname (sb), error_buf); 375 reiserfs_bdevname(sb), error_buf);
377 } 376 }
378 377
379 if (sb->s_flags & MS_RDONLY) 378 if (sb->s_flags & MS_RDONLY)
380 return; 379 return;
381 380
382 printk (KERN_CRIT "REISERFS: abort (device %s): %s\n", 381 printk(KERN_CRIT "REISERFS: abort (device %s): %s\n",
383 reiserfs_bdevname (sb), error_buf); 382 reiserfs_bdevname(sb), error_buf);
384 383
385 sb->s_flags |= MS_RDONLY; 384 sb->s_flags |= MS_RDONLY;
386 reiserfs_journal_abort (sb, errno); 385 reiserfs_journal_abort(sb, errno);
387} 386}
388 387
389/* this prints internal nodes (4 keys/items in line) (dc_number, 388/* this prints internal nodes (4 keys/items in line) (dc_number,
390 dc_size)[k_dirid, k_objectid, k_offset, k_uniqueness](dc_number, 389 dc_size)[k_dirid, k_objectid, k_offset, k_uniqueness](dc_number,
391 dc_size)...*/ 390 dc_size)...*/
392static int print_internal (struct buffer_head * bh, int first, int last) 391static int print_internal(struct buffer_head *bh, int first, int last)
393{ 392{
394 struct reiserfs_key * key; 393 struct reiserfs_key *key;
395 struct disk_child * dc; 394 struct disk_child *dc;
396 int i; 395 int i;
397 int from, to; 396 int from, to;
398
399 if (!B_IS_KEYS_LEVEL (bh))
400 return 1;
401
402 check_internal (bh);
403
404 if (first == -1) {
405 from = 0;
406 to = B_NR_ITEMS (bh);
407 } else {
408 from = first;
409 to = last < B_NR_ITEMS (bh) ? last : B_NR_ITEMS (bh);
410 }
411
412 reiserfs_printk ("INTERNAL NODE (%ld) contains %z\n", bh->b_blocknr, bh);
413
414 dc = B_N_CHILD (bh, from);
415 reiserfs_printk ("PTR %d: %y ", from, dc);
416
417 for (i = from, key = B_N_PDELIM_KEY (bh, from), dc ++; i < to; i ++, key ++, dc ++) {
418 reiserfs_printk ("KEY %d: %k PTR %d: %y ", i, key, i + 1, dc);
419 if (i && i % 4 == 0)
420 printk ("\n");
421 }
422 printk ("\n");
423 return 0;
424}
425 397
398 if (!B_IS_KEYS_LEVEL(bh))
399 return 1;
426 400
401 check_internal(bh);
427 402
403 if (first == -1) {
404 from = 0;
405 to = B_NR_ITEMS(bh);
406 } else {
407 from = first;
408 to = last < B_NR_ITEMS(bh) ? last : B_NR_ITEMS(bh);
409 }
428 410
411 reiserfs_printk("INTERNAL NODE (%ld) contains %z\n", bh->b_blocknr, bh);
429 412
430static int print_leaf (struct buffer_head * bh, int print_mode, int first, int last) 413 dc = B_N_CHILD(bh, from);
431{ 414 reiserfs_printk("PTR %d: %y ", from, dc);
432 struct block_head * blkh;
433 struct item_head * ih;
434 int i, nr;
435 int from, to;
436 415
437 if (!B_IS_ITEMS_LEVEL (bh)) 416 for (i = from, key = B_N_PDELIM_KEY(bh, from), dc++; i < to;
438 return 1; 417 i++, key++, dc++) {
418 reiserfs_printk("KEY %d: %k PTR %d: %y ", i, key, i + 1, dc);
419 if (i && i % 4 == 0)
420 printk("\n");
421 }
422 printk("\n");
423 return 0;
424}
439 425
440 check_leaf (bh); 426static int print_leaf(struct buffer_head *bh, int print_mode, int first,
427 int last)
428{
429 struct block_head *blkh;
430 struct item_head *ih;
431 int i, nr;
432 int from, to;
441 433
442 blkh = B_BLK_HEAD (bh); 434 if (!B_IS_ITEMS_LEVEL(bh))
443 ih = B_N_PITEM_HEAD (bh,0); 435 return 1;
444 nr = blkh_nr_item(blkh);
445 436
446 printk ("\n===================================================================\n"); 437 check_leaf(bh);
447 reiserfs_printk ("LEAF NODE (%ld) contains %z\n", bh->b_blocknr, bh);
448 438
449 if (!(print_mode & PRINT_LEAF_ITEMS)) { 439 blkh = B_BLK_HEAD(bh);
450 reiserfs_printk ("FIRST ITEM_KEY: %k, LAST ITEM KEY: %k\n", 440 ih = B_N_PITEM_HEAD(bh, 0);
451 &(ih->ih_key), &((ih + nr - 1)->ih_key)); 441 nr = blkh_nr_item(blkh);
452 return 0;
453 }
454 442
455 if (first < 0 || first > nr - 1) 443 printk
456 from = 0; 444 ("\n===================================================================\n");
457 else 445 reiserfs_printk("LEAF NODE (%ld) contains %z\n", bh->b_blocknr, bh);
458 from = first;
459 446
460 if (last < 0 || last > nr ) 447 if (!(print_mode & PRINT_LEAF_ITEMS)) {
461 to = nr; 448 reiserfs_printk("FIRST ITEM_KEY: %k, LAST ITEM KEY: %k\n",
462 else 449 &(ih->ih_key), &((ih + nr - 1)->ih_key));
463 to = last; 450 return 0;
451 }
464 452
465 ih += from; 453 if (first < 0 || first > nr - 1)
466 printk ("-------------------------------------------------------------------------------\n"); 454 from = 0;
467 printk ("|##| type | key | ilen | free_space | version | loc |\n"); 455 else
468 for (i = from; i < to; i++, ih ++) { 456 from = first;
469 printk ("-------------------------------------------------------------------------------\n"); 457
470 reiserfs_printk ("|%2d| %h |\n", i, ih); 458 if (last < 0 || last > nr)
471 if (print_mode & PRINT_LEAF_ITEMS) 459 to = nr;
472 op_print_item (ih, B_I_PITEM (bh, ih)); 460 else
473 } 461 to = last;
462
463 ih += from;
464 printk
465 ("-------------------------------------------------------------------------------\n");
466 printk
467 ("|##| type | key | ilen | free_space | version | loc |\n");
468 for (i = from; i < to; i++, ih++) {
469 printk
470 ("-------------------------------------------------------------------------------\n");
471 reiserfs_printk("|%2d| %h |\n", i, ih);
472 if (print_mode & PRINT_LEAF_ITEMS)
473 op_print_item(ih, B_I_PITEM(bh, ih));
474 }
474 475
475 printk ("===================================================================\n"); 476 printk
477 ("===================================================================\n");
476 478
477 return 0; 479 return 0;
478} 480}
479 481
480char * reiserfs_hashname(int code) 482char *reiserfs_hashname(int code)
481{ 483{
482 if ( code == YURA_HASH) 484 if (code == YURA_HASH)
483 return "rupasov"; 485 return "rupasov";
484 if ( code == TEA_HASH) 486 if (code == TEA_HASH)
485 return "tea"; 487 return "tea";
486 if ( code == R5_HASH) 488 if (code == R5_HASH)
487 return "r5"; 489 return "r5";
488 490
489 return "unknown"; 491 return "unknown";
490} 492}
491 493
492/* return 1 if this is not super block */ 494/* return 1 if this is not super block */
493static int print_super_block (struct buffer_head * bh) 495static int print_super_block(struct buffer_head *bh)
494{ 496{
495 struct reiserfs_super_block * rs = (struct reiserfs_super_block *)(bh->b_data); 497 struct reiserfs_super_block *rs =
496 int skipped, data_blocks; 498 (struct reiserfs_super_block *)(bh->b_data);
497 char *version; 499 int skipped, data_blocks;
498 char b[BDEVNAME_SIZE]; 500 char *version;
499 501 char b[BDEVNAME_SIZE];
500 if (is_reiserfs_3_5(rs)) { 502
501 version = "3.5"; 503 if (is_reiserfs_3_5(rs)) {
502 } else if (is_reiserfs_3_6(rs)) { 504 version = "3.5";
503 version = "3.6"; 505 } else if (is_reiserfs_3_6(rs)) {
504 } else if (is_reiserfs_jr(rs)) { 506 version = "3.6";
505 version = ((sb_version(rs) == REISERFS_VERSION_2) ? 507 } else if (is_reiserfs_jr(rs)) {
506 "3.6" : "3.5"); 508 version = ((sb_version(rs) == REISERFS_VERSION_2) ?
507 } else { 509 "3.6" : "3.5");
508 return 1; 510 } else {
509 } 511 return 1;
510 512 }
511 printk ("%s\'s super block is in block %llu\n", bdevname (bh->b_bdev, b), 513
512 (unsigned long long)bh->b_blocknr); 514 printk("%s\'s super block is in block %llu\n", bdevname(bh->b_bdev, b),
513 printk ("Reiserfs version %s\n", version ); 515 (unsigned long long)bh->b_blocknr);
514 printk ("Block count %u\n", sb_block_count(rs)); 516 printk("Reiserfs version %s\n", version);
515 printk ("Blocksize %d\n", sb_blocksize(rs)); 517 printk("Block count %u\n", sb_block_count(rs));
516 printk ("Free blocks %u\n", sb_free_blocks(rs)); 518 printk("Blocksize %d\n", sb_blocksize(rs));
517 // FIXME: this would be confusing if 519 printk("Free blocks %u\n", sb_free_blocks(rs));
518 // someone stores reiserfs super block in some data block ;) 520 // FIXME: this would be confusing if
521 // someone stores reiserfs super block in some data block ;)
519// skipped = (bh->b_blocknr * bh->b_size) / sb_blocksize(rs); 522// skipped = (bh->b_blocknr * bh->b_size) / sb_blocksize(rs);
520 skipped = bh->b_blocknr; 523 skipped = bh->b_blocknr;
521 data_blocks = sb_block_count(rs) - skipped - 1 - sb_bmap_nr(rs) - 524 data_blocks = sb_block_count(rs) - skipped - 1 - sb_bmap_nr(rs) -
522 (!is_reiserfs_jr(rs) ? sb_jp_journal_size(rs) + 1 : sb_reserved_for_journal(rs)) - 525 (!is_reiserfs_jr(rs) ? sb_jp_journal_size(rs) +
523 sb_free_blocks(rs); 526 1 : sb_reserved_for_journal(rs)) - sb_free_blocks(rs);
524 printk ("Busy blocks (skipped %d, bitmaps - %d, journal (or reserved) blocks - %d\n" 527 printk
525 "1 super block, %d data blocks\n", 528 ("Busy blocks (skipped %d, bitmaps - %d, journal (or reserved) blocks - %d\n"
526 skipped, sb_bmap_nr(rs), (!is_reiserfs_jr(rs) ? (sb_jp_journal_size(rs) + 1) : 529 "1 super block, %d data blocks\n", skipped, sb_bmap_nr(rs),
527 sb_reserved_for_journal(rs)) , data_blocks); 530 (!is_reiserfs_jr(rs) ? (sb_jp_journal_size(rs) + 1) :
528 printk ("Root block %u\n", sb_root_block(rs)); 531 sb_reserved_for_journal(rs)), data_blocks);
529 printk ("Journal block (first) %d\n", sb_jp_journal_1st_block(rs)); 532 printk("Root block %u\n", sb_root_block(rs));
530 printk ("Journal dev %d\n", sb_jp_journal_dev(rs)); 533 printk("Journal block (first) %d\n", sb_jp_journal_1st_block(rs));
531 printk ("Journal orig size %d\n", sb_jp_journal_size(rs)); 534 printk("Journal dev %d\n", sb_jp_journal_dev(rs));
532 printk ("FS state %d\n", sb_fs_state(rs)); 535 printk("Journal orig size %d\n", sb_jp_journal_size(rs));
533 printk ("Hash function \"%s\"\n", 536 printk("FS state %d\n", sb_fs_state(rs));
534 reiserfs_hashname(sb_hash_function_code(rs))); 537 printk("Hash function \"%s\"\n",
535 538 reiserfs_hashname(sb_hash_function_code(rs)));
536 printk ("Tree height %d\n", sb_tree_height(rs)); 539
537 return 0; 540 printk("Tree height %d\n", sb_tree_height(rs));
541 return 0;
538} 542}
539 543
540static int print_desc_block (struct buffer_head * bh) 544static int print_desc_block(struct buffer_head *bh)
541{ 545{
542 struct reiserfs_journal_desc * desc; 546 struct reiserfs_journal_desc *desc;
543 547
544 if (memcmp(get_journal_desc_magic (bh), JOURNAL_DESC_MAGIC, 8)) 548 if (memcmp(get_journal_desc_magic(bh), JOURNAL_DESC_MAGIC, 8))
545 return 1; 549 return 1;
546 550
547 desc = (struct reiserfs_journal_desc *)(bh->b_data); 551 desc = (struct reiserfs_journal_desc *)(bh->b_data);
548 printk ("Desc block %llu (j_trans_id %d, j_mount_id %d, j_len %d)", 552 printk("Desc block %llu (j_trans_id %d, j_mount_id %d, j_len %d)",
549 (unsigned long long)bh->b_blocknr, get_desc_trans_id (desc), get_desc_mount_id (desc), 553 (unsigned long long)bh->b_blocknr, get_desc_trans_id(desc),
550 get_desc_trans_len (desc)); 554 get_desc_mount_id(desc), get_desc_trans_len(desc));
551 555
552 return 0; 556 return 0;
553} 557}
554 558
555 559void print_block(struct buffer_head *bh, ...) //int print_mode, int first, int last)
556void print_block (struct buffer_head * bh, ...)//int print_mode, int first, int last)
557{ 560{
558 va_list args; 561 va_list args;
559 int mode, first, last; 562 int mode, first, last;
560 563
561 va_start (args, bh); 564 va_start(args, bh);
562 565
563 if ( ! bh ) { 566 if (!bh) {
564 printk("print_block: buffer is NULL\n"); 567 printk("print_block: buffer is NULL\n");
565 return; 568 return;
566 } 569 }
567 570
568 mode = va_arg (args, int); 571 mode = va_arg(args, int);
569 first = va_arg (args, int); 572 first = va_arg(args, int);
570 last = va_arg (args, int); 573 last = va_arg(args, int);
571 if (print_leaf (bh, mode, first, last)) 574 if (print_leaf(bh, mode, first, last))
572 if (print_internal (bh, first, last)) 575 if (print_internal(bh, first, last))
573 if (print_super_block (bh)) 576 if (print_super_block(bh))
574 if (print_desc_block (bh)) 577 if (print_desc_block(bh))
575 printk ("Block %llu contains unformatted data\n", (unsigned long long)bh->b_blocknr); 578 printk
579 ("Block %llu contains unformatted data\n",
580 (unsigned long long)bh->b_blocknr);
576} 581}
577 582
578
579
580static char print_tb_buf[2048]; 583static char print_tb_buf[2048];
581 584
582/* this stores initial state of tree balance in the print_tb_buf */ 585/* this stores initial state of tree balance in the print_tb_buf */
583void store_print_tb (struct tree_balance * tb) 586void store_print_tb(struct tree_balance *tb)
584{ 587{
585 int h = 0; 588 int h = 0;
586 int i; 589 int i;
587 struct buffer_head * tbSh, * tbFh; 590 struct buffer_head *tbSh, *tbFh;
588 591
589 if (!tb) 592 if (!tb)
590 return; 593 return;
591 594
592 sprintf (print_tb_buf, "\n" 595 sprintf(print_tb_buf, "\n"
593 "BALANCING %d\n" 596 "BALANCING %d\n"
594 "MODE=%c, ITEM_POS=%d POS_IN_ITEM=%d\n" 597 "MODE=%c, ITEM_POS=%d POS_IN_ITEM=%d\n"
595 "=====================================================================\n" 598 "=====================================================================\n"
596 "* h * S * L * R * F * FL * FR * CFL * CFR *\n", 599 "* h * S * L * R * F * FL * FR * CFL * CFR *\n",
597 REISERFS_SB(tb->tb_sb)->s_do_balance, 600 REISERFS_SB(tb->tb_sb)->s_do_balance,
598 tb->tb_mode, PATH_LAST_POSITION (tb->tb_path), tb->tb_path->pos_in_item); 601 tb->tb_mode, PATH_LAST_POSITION(tb->tb_path),
599 602 tb->tb_path->pos_in_item);
600 for (h = 0; h < sizeof(tb->insert_size) / sizeof (tb->insert_size[0]); h ++) { 603
601 if (PATH_H_PATH_OFFSET (tb->tb_path, h) <= tb->tb_path->path_length && 604 for (h = 0; h < sizeof(tb->insert_size) / sizeof(tb->insert_size[0]);
602 PATH_H_PATH_OFFSET (tb->tb_path, h) > ILLEGAL_PATH_ELEMENT_OFFSET) { 605 h++) {
603 tbSh = PATH_H_PBUFFER (tb->tb_path, h); 606 if (PATH_H_PATH_OFFSET(tb->tb_path, h) <=
604 tbFh = PATH_H_PPARENT (tb->tb_path, h); 607 tb->tb_path->path_length
605 } else { 608 && PATH_H_PATH_OFFSET(tb->tb_path,
606 tbSh = NULL; 609 h) > ILLEGAL_PATH_ELEMENT_OFFSET) {
607 tbFh = NULL; 610 tbSh = PATH_H_PBUFFER(tb->tb_path, h);
611 tbFh = PATH_H_PPARENT(tb->tb_path, h);
612 } else {
613 tbSh = NULL;
614 tbFh = NULL;
615 }
616 sprintf(print_tb_buf + strlen(print_tb_buf),
617 "* %d * %3lld(%2d) * %3lld(%2d) * %3lld(%2d) * %5lld * %5lld * %5lld * %5lld * %5lld *\n",
618 h,
619 (tbSh) ? (long long)(tbSh->b_blocknr) : (-1LL),
620 (tbSh) ? atomic_read(&(tbSh->b_count)) : -1,
621 (tb->L[h]) ? (long long)(tb->L[h]->b_blocknr) : (-1LL),
622 (tb->L[h]) ? atomic_read(&(tb->L[h]->b_count)) : -1,
623 (tb->R[h]) ? (long long)(tb->R[h]->b_blocknr) : (-1LL),
624 (tb->R[h]) ? atomic_read(&(tb->R[h]->b_count)) : -1,
625 (tbFh) ? (long long)(tbFh->b_blocknr) : (-1LL),
626 (tb->FL[h]) ? (long long)(tb->FL[h]->
627 b_blocknr) : (-1LL),
628 (tb->FR[h]) ? (long long)(tb->FR[h]->
629 b_blocknr) : (-1LL),
630 (tb->CFL[h]) ? (long long)(tb->CFL[h]->
631 b_blocknr) : (-1LL),
632 (tb->CFR[h]) ? (long long)(tb->CFR[h]->
633 b_blocknr) : (-1LL));
608 } 634 }
609 sprintf (print_tb_buf + strlen (print_tb_buf),
610 "* %d * %3lld(%2d) * %3lld(%2d) * %3lld(%2d) * %5lld * %5lld * %5lld * %5lld * %5lld *\n",
611 h,
612 (tbSh) ? (long long)(tbSh->b_blocknr):(-1LL),
613 (tbSh) ? atomic_read (&(tbSh->b_count)) : -1,
614 (tb->L[h]) ? (long long)(tb->L[h]->b_blocknr):(-1LL),
615 (tb->L[h]) ? atomic_read (&(tb->L[h]->b_count)) : -1,
616 (tb->R[h]) ? (long long)(tb->R[h]->b_blocknr):(-1LL),
617 (tb->R[h]) ? atomic_read (&(tb->R[h]->b_count)) : -1,
618 (tbFh) ? (long long)(tbFh->b_blocknr):(-1LL),
619 (tb->FL[h]) ? (long long)(tb->FL[h]->b_blocknr):(-1LL),
620 (tb->FR[h]) ? (long long)(tb->FR[h]->b_blocknr):(-1LL),
621 (tb->CFL[h]) ? (long long)(tb->CFL[h]->b_blocknr):(-1LL),
622 (tb->CFR[h]) ? (long long)(tb->CFR[h]->b_blocknr):(-1LL));
623 }
624
625 sprintf (print_tb_buf + strlen (print_tb_buf),
626 "=====================================================================\n"
627 "* h * size * ln * lb * rn * rb * blkn * s0 * s1 * s1b * s2 * s2b * curb * lk * rk *\n"
628 "* 0 * %4d * %2d * %2d * %2d * %2d * %4d * %2d * %2d * %3d * %2d * %3d * %4d * %2d * %2d *\n",
629 tb->insert_size[0], tb->lnum[0], tb->lbytes, tb->rnum[0],tb->rbytes, tb->blknum[0],
630 tb->s0num, tb->s1num,tb->s1bytes, tb->s2num, tb->s2bytes, tb->cur_blknum, tb->lkey[0], tb->rkey[0]);
631
632 /* this prints balance parameters for non-leaf levels */
633 h = 0;
634 do {
635 h++;
636 sprintf (print_tb_buf + strlen (print_tb_buf),
637 "* %d * %4d * %2d * * %2d * * %2d *\n",
638 h, tb->insert_size[h], tb->lnum[h], tb->rnum[h], tb->blknum[h]);
639 } while (tb->insert_size[h]);
640
641 sprintf (print_tb_buf + strlen (print_tb_buf),
642 "=====================================================================\n"
643 "FEB list: ");
644
645 /* print FEB list (list of buffers in form (bh (b_blocknr, b_count), that will be used for new nodes) */
646 h = 0;
647 for (i = 0; i < sizeof (tb->FEB) / sizeof (tb->FEB[0]); i ++)
648 sprintf (print_tb_buf + strlen (print_tb_buf),
649 "%p (%llu %d)%s", tb->FEB[i], tb->FEB[i] ? (unsigned long long)tb->FEB[i]->b_blocknr : 0ULL,
650 tb->FEB[i] ? atomic_read (&(tb->FEB[i]->b_count)) : 0,
651 (i == sizeof (tb->FEB) / sizeof (tb->FEB[0]) - 1) ? "\n" : ", ");
652
653 sprintf (print_tb_buf + strlen (print_tb_buf),
654 "======================== the end ====================================\n");
655}
656
657void print_cur_tb (char * mes)
658{
659 printk ("%s\n%s", mes, print_tb_buf);
660}
661
662static void check_leaf_block_head (struct buffer_head * bh)
663{
664 struct block_head * blkh;
665 int nr;
666
667 blkh = B_BLK_HEAD (bh);
668 nr = blkh_nr_item(blkh);
669 if ( nr > (bh->b_size - BLKH_SIZE) / IH_SIZE)
670 reiserfs_panic (NULL, "vs-6010: check_leaf_block_head: invalid item number %z", bh);
671 if ( blkh_free_space(blkh) >
672 bh->b_size - BLKH_SIZE - IH_SIZE * nr )
673 reiserfs_panic (NULL, "vs-6020: check_leaf_block_head: invalid free space %z", bh);
674
675}
676 635
677static void check_internal_block_head (struct buffer_head * bh) 636 sprintf(print_tb_buf + strlen(print_tb_buf),
678{ 637 "=====================================================================\n"
679 struct block_head * blkh; 638 "* h * size * ln * lb * rn * rb * blkn * s0 * s1 * s1b * s2 * s2b * curb * lk * rk *\n"
680 639 "* 0 * %4d * %2d * %2d * %2d * %2d * %4d * %2d * %2d * %3d * %2d * %3d * %4d * %2d * %2d *\n",
681 blkh = B_BLK_HEAD (bh); 640 tb->insert_size[0], tb->lnum[0], tb->lbytes, tb->rnum[0],
682 if (!(B_LEVEL (bh) > DISK_LEAF_NODE_LEVEL && B_LEVEL (bh) <= MAX_HEIGHT)) 641 tb->rbytes, tb->blknum[0], tb->s0num, tb->s1num, tb->s1bytes,
683 reiserfs_panic (NULL, "vs-6025: check_internal_block_head: invalid level %z", bh); 642 tb->s2num, tb->s2bytes, tb->cur_blknum, tb->lkey[0],
643 tb->rkey[0]);
644
645 /* this prints balance parameters for non-leaf levels */
646 h = 0;
647 do {
648 h++;
649 sprintf(print_tb_buf + strlen(print_tb_buf),
650 "* %d * %4d * %2d * * %2d * * %2d *\n",
651 h, tb->insert_size[h], tb->lnum[h], tb->rnum[h],
652 tb->blknum[h]);
653 } while (tb->insert_size[h]);
684 654
685 if (B_NR_ITEMS (bh) > (bh->b_size - BLKH_SIZE) / IH_SIZE) 655 sprintf(print_tb_buf + strlen(print_tb_buf),
686 reiserfs_panic (NULL, "vs-6030: check_internal_block_head: invalid item number %z", bh); 656 "=====================================================================\n"
657 "FEB list: ");
687 658
688 if (B_FREE_SPACE (bh) != 659 /* print FEB list (list of buffers in form (bh (b_blocknr, b_count), that will be used for new nodes) */
689 bh->b_size - BLKH_SIZE - KEY_SIZE * B_NR_ITEMS (bh) - DC_SIZE * (B_NR_ITEMS (bh) + 1)) 660 h = 0;
690 reiserfs_panic (NULL, "vs-6040: check_internal_block_head: invalid free space %z", bh); 661 for (i = 0; i < sizeof(tb->FEB) / sizeof(tb->FEB[0]); i++)
662 sprintf(print_tb_buf + strlen(print_tb_buf),
663 "%p (%llu %d)%s", tb->FEB[i],
664 tb->FEB[i] ? (unsigned long long)tb->FEB[i]->
665 b_blocknr : 0ULL,
666 tb->FEB[i] ? atomic_read(&(tb->FEB[i]->b_count)) : 0,
667 (i ==
668 sizeof(tb->FEB) / sizeof(tb->FEB[0]) -
669 1) ? "\n" : ", ");
691 670
671 sprintf(print_tb_buf + strlen(print_tb_buf),
672 "======================== the end ====================================\n");
692} 673}
693 674
675void print_cur_tb(char *mes)
676{
677 printk("%s\n%s", mes, print_tb_buf);
678}
694 679
695void check_leaf (struct buffer_head * bh) 680static void check_leaf_block_head(struct buffer_head *bh)
696{ 681{
697 int i; 682 struct block_head *blkh;
698 struct item_head * ih; 683 int nr;
684
685 blkh = B_BLK_HEAD(bh);
686 nr = blkh_nr_item(blkh);
687 if (nr > (bh->b_size - BLKH_SIZE) / IH_SIZE)
688 reiserfs_panic(NULL,
689 "vs-6010: check_leaf_block_head: invalid item number %z",
690 bh);
691 if (blkh_free_space(blkh) > bh->b_size - BLKH_SIZE - IH_SIZE * nr)
692 reiserfs_panic(NULL,
693 "vs-6020: check_leaf_block_head: invalid free space %z",
694 bh);
699 695
700 if (!bh)
701 return;
702 check_leaf_block_head (bh);
703 for (i = 0, ih = B_N_PITEM_HEAD (bh, 0); i < B_NR_ITEMS (bh); i ++, ih ++)
704 op_check_item (ih, B_I_PITEM (bh, ih));
705} 696}
706 697
698static void check_internal_block_head(struct buffer_head *bh)
699{
700 struct block_head *blkh;
701
702 blkh = B_BLK_HEAD(bh);
703 if (!(B_LEVEL(bh) > DISK_LEAF_NODE_LEVEL && B_LEVEL(bh) <= MAX_HEIGHT))
704 reiserfs_panic(NULL,
705 "vs-6025: check_internal_block_head: invalid level %z",
706 bh);
707
708 if (B_NR_ITEMS(bh) > (bh->b_size - BLKH_SIZE) / IH_SIZE)
709 reiserfs_panic(NULL,
710 "vs-6030: check_internal_block_head: invalid item number %z",
711 bh);
712
713 if (B_FREE_SPACE(bh) !=
714 bh->b_size - BLKH_SIZE - KEY_SIZE * B_NR_ITEMS(bh) -
715 DC_SIZE * (B_NR_ITEMS(bh) + 1))
716 reiserfs_panic(NULL,
717 "vs-6040: check_internal_block_head: invalid free space %z",
718 bh);
719
720}
707 721
708void check_internal (struct buffer_head * bh) 722void check_leaf(struct buffer_head *bh)
709{ 723{
710 if (!bh) 724 int i;
711 return; 725 struct item_head *ih;
712 check_internal_block_head (bh); 726
727 if (!bh)
728 return;
729 check_leaf_block_head(bh);
730 for (i = 0, ih = B_N_PITEM_HEAD(bh, 0); i < B_NR_ITEMS(bh); i++, ih++)
731 op_check_item(ih, B_I_PITEM(bh, ih));
713} 732}
714 733
734void check_internal(struct buffer_head *bh)
735{
736 if (!bh)
737 return;
738 check_internal_block_head(bh);
739}
715 740
716void print_statistics (struct super_block * s) 741void print_statistics(struct super_block *s)
717{ 742{
718 743
719 /* 744 /*
720 printk ("reiserfs_put_super: session statistics: balances %d, fix_nodes %d, \ 745 printk ("reiserfs_put_super: session statistics: balances %d, fix_nodes %d, \
721bmap with search %d, without %d, dir2ind %d, ind2dir %d\n", 746 bmap with search %d, without %d, dir2ind %d, ind2dir %d\n",
722 REISERFS_SB(s)->s_do_balance, REISERFS_SB(s)->s_fix_nodes, 747 REISERFS_SB(s)->s_do_balance, REISERFS_SB(s)->s_fix_nodes,
723 REISERFS_SB(s)->s_bmaps, REISERFS_SB(s)->s_bmaps_without_search, 748 REISERFS_SB(s)->s_bmaps, REISERFS_SB(s)->s_bmaps_without_search,
724 REISERFS_SB(s)->s_direct2indirect, REISERFS_SB(s)->s_indirect2direct); 749 REISERFS_SB(s)->s_direct2indirect, REISERFS_SB(s)->s_indirect2direct);
725 */ 750 */
726 751
727} 752}
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index e242ebc7f6f6..fc2f43c75df4 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -33,28 +33,27 @@
33static int show_version(struct seq_file *m, struct super_block *sb) 33static int show_version(struct seq_file *m, struct super_block *sb)
34{ 34{
35 char *format; 35 char *format;
36 36
37 if ( REISERFS_SB(sb)->s_properties & (1 << REISERFS_3_6) ) { 37 if (REISERFS_SB(sb)->s_properties & (1 << REISERFS_3_6)) {
38 format = "3.6"; 38 format = "3.6";
39 } else if ( REISERFS_SB(sb)->s_properties & (1 << REISERFS_3_5) ) { 39 } else if (REISERFS_SB(sb)->s_properties & (1 << REISERFS_3_5)) {
40 format = "3.5"; 40 format = "3.5";
41 } else { 41 } else {
42 format = "unknown"; 42 format = "unknown";
43 } 43 }
44 44
45 seq_printf(m, "%s format\twith checks %s\n", 45 seq_printf(m, "%s format\twith checks %s\n", format,
46 format,
47#if defined( CONFIG_REISERFS_CHECK ) 46#if defined( CONFIG_REISERFS_CHECK )
48 "on" 47 "on"
49#else 48#else
50 "off" 49 "off"
51#endif 50#endif
52 ); 51 );
53 return 0; 52 return 0;
54} 53}
55 54
56int reiserfs_global_version_in_proc( char *buffer, char **start, off_t offset, 55int reiserfs_global_version_in_proc(char *buffer, char **start, off_t offset,
57 int count, int *eof, void *data ) 56 int count, int *eof, void *data)
58{ 57{
59 *start = buffer; 58 *start = buffer;
60 *eof = 1; 59 *eof = 1;
@@ -79,87 +78,68 @@ int reiserfs_global_version_in_proc( char *buffer, char **start, off_t offset,
79 78
80#define DJF( x ) le32_to_cpu( rs -> x ) 79#define DJF( x ) le32_to_cpu( rs -> x )
81#define DJV( x ) le32_to_cpu( s_v1 -> x ) 80#define DJV( x ) le32_to_cpu( s_v1 -> x )
82#define DJP( x ) le32_to_cpu( jp -> x ) 81#define DJP( x ) le32_to_cpu( jp -> x )
83#define JF( x ) ( r -> s_journal -> x ) 82#define JF( x ) ( r -> s_journal -> x )
84 83
85static int show_super(struct seq_file *m, struct super_block *sb) 84static int show_super(struct seq_file *m, struct super_block *sb)
86{ 85{
87 struct reiserfs_sb_info *r = REISERFS_SB(sb); 86 struct reiserfs_sb_info *r = REISERFS_SB(sb);
88 87
89 seq_printf(m, "state: \t%s\n" 88 seq_printf(m, "state: \t%s\n"
90 "mount options: \t%s%s%s%s%s%s%s%s%s%s%s\n" 89 "mount options: \t%s%s%s%s%s%s%s%s%s%s%s\n"
91 "gen. counter: \t%i\n" 90 "gen. counter: \t%i\n"
92 "s_kmallocs: \t%i\n" 91 "s_kmallocs: \t%i\n"
93 "s_disk_reads: \t%i\n" 92 "s_disk_reads: \t%i\n"
94 "s_disk_writes: \t%i\n" 93 "s_disk_writes: \t%i\n"
95 "s_fix_nodes: \t%i\n" 94 "s_fix_nodes: \t%i\n"
96 "s_do_balance: \t%i\n" 95 "s_do_balance: \t%i\n"
97 "s_unneeded_left_neighbor: \t%i\n" 96 "s_unneeded_left_neighbor: \t%i\n"
98 "s_good_search_by_key_reada: \t%i\n" 97 "s_good_search_by_key_reada: \t%i\n"
99 "s_bmaps: \t%i\n" 98 "s_bmaps: \t%i\n"
100 "s_bmaps_without_search: \t%i\n" 99 "s_bmaps_without_search: \t%i\n"
101 "s_direct2indirect: \t%i\n" 100 "s_direct2indirect: \t%i\n"
102 "s_indirect2direct: \t%i\n" 101 "s_indirect2direct: \t%i\n"
103 "\n" 102 "\n"
104 "max_hash_collisions: \t%i\n" 103 "max_hash_collisions: \t%i\n"
105 104 "breads: \t%lu\n"
106 "breads: \t%lu\n" 105 "bread_misses: \t%lu\n"
107 "bread_misses: \t%lu\n" 106 "search_by_key: \t%lu\n"
108 107 "search_by_key_fs_changed: \t%lu\n"
109 "search_by_key: \t%lu\n" 108 "search_by_key_restarted: \t%lu\n"
110 "search_by_key_fs_changed: \t%lu\n" 109 "insert_item_restarted: \t%lu\n"
111 "search_by_key_restarted: \t%lu\n" 110 "paste_into_item_restarted: \t%lu\n"
112 111 "cut_from_item_restarted: \t%lu\n"
113 "insert_item_restarted: \t%lu\n" 112 "delete_solid_item_restarted: \t%lu\n"
114 "paste_into_item_restarted: \t%lu\n" 113 "delete_item_restarted: \t%lu\n"
115 "cut_from_item_restarted: \t%lu\n" 114 "leaked_oid: \t%lu\n"
116 "delete_solid_item_restarted: \t%lu\n" 115 "leaves_removable: \t%lu\n",
117 "delete_item_restarted: \t%lu\n" 116 SF(s_mount_state) == REISERFS_VALID_FS ?
118 117 "REISERFS_VALID_FS" : "REISERFS_ERROR_FS",
119 "leaked_oid: \t%lu\n" 118 reiserfs_r5_hash(sb) ? "FORCE_R5 " : "",
120 "leaves_removable: \t%lu\n", 119 reiserfs_rupasov_hash(sb) ? "FORCE_RUPASOV " : "",
121 120 reiserfs_tea_hash(sb) ? "FORCE_TEA " : "",
122 SF( s_mount_state ) == REISERFS_VALID_FS ? 121 reiserfs_hash_detect(sb) ? "DETECT_HASH " : "",
123 "REISERFS_VALID_FS" : "REISERFS_ERROR_FS", 122 reiserfs_no_border(sb) ? "NO_BORDER " : "BORDER ",
124 reiserfs_r5_hash( sb ) ? "FORCE_R5 " : "", 123 reiserfs_no_unhashed_relocation(sb) ?
125 reiserfs_rupasov_hash( sb ) ? "FORCE_RUPASOV " : "", 124 "NO_UNHASHED_RELOCATION " : "",
126 reiserfs_tea_hash( sb ) ? "FORCE_TEA " : "", 125 reiserfs_hashed_relocation(sb) ? "UNHASHED_RELOCATION " : "",
127 reiserfs_hash_detect( sb ) ? "DETECT_HASH " : "", 126 reiserfs_test4(sb) ? "TEST4 " : "",
128 reiserfs_no_border( sb ) ? "NO_BORDER " : "BORDER ", 127 have_large_tails(sb) ? "TAILS " : have_small_tails(sb) ?
129 reiserfs_no_unhashed_relocation( sb ) ? "NO_UNHASHED_RELOCATION " : "", 128 "SMALL_TAILS " : "NO_TAILS ",
130 reiserfs_hashed_relocation( sb ) ? "UNHASHED_RELOCATION " : "", 129 replay_only(sb) ? "REPLAY_ONLY " : "",
131 reiserfs_test4( sb ) ? "TEST4 " : "", 130 convert_reiserfs(sb) ? "CONV " : "",
132 have_large_tails( sb ) ? "TAILS " : have_small_tails(sb)?"SMALL_TAILS ":"NO_TAILS ", 131 atomic_read(&r->s_generation_counter), SF(s_kmallocs),
133 replay_only( sb ) ? "REPLAY_ONLY " : "", 132 SF(s_disk_reads), SF(s_disk_writes), SF(s_fix_nodes),
134 convert_reiserfs( sb ) ? "CONV " : "", 133 SF(s_do_balance), SF(s_unneeded_left_neighbor),
135 134 SF(s_good_search_by_key_reada), SF(s_bmaps),
136 atomic_read( &r -> s_generation_counter ), 135 SF(s_bmaps_without_search), SF(s_direct2indirect),
137 SF( s_kmallocs ), 136 SF(s_indirect2direct), SFP(max_hash_collisions), SFP(breads),
138 SF( s_disk_reads ), 137 SFP(bread_miss), SFP(search_by_key),
139 SF( s_disk_writes ), 138 SFP(search_by_key_fs_changed), SFP(search_by_key_restarted),
140 SF( s_fix_nodes ), 139 SFP(insert_item_restarted), SFP(paste_into_item_restarted),
141 SF( s_do_balance ), 140 SFP(cut_from_item_restarted),
142 SF( s_unneeded_left_neighbor ), 141 SFP(delete_solid_item_restarted), SFP(delete_item_restarted),
143 SF( s_good_search_by_key_reada ), 142 SFP(leaked_oid), SFP(leaves_removable));
144 SF( s_bmaps ),
145 SF( s_bmaps_without_search ),
146 SF( s_direct2indirect ),
147 SF( s_indirect2direct ),
148 SFP( max_hash_collisions ),
149 SFP( breads ),
150 SFP( bread_miss ),
151 SFP( search_by_key ),
152 SFP( search_by_key_fs_changed ),
153 SFP( search_by_key_restarted ),
154
155 SFP( insert_item_restarted ),
156 SFP( paste_into_item_restarted ),
157 SFP( cut_from_item_restarted ),
158 SFP( delete_solid_item_restarted ),
159 SFP( delete_item_restarted ),
160
161 SFP( leaked_oid ),
162 SFP( leaves_removable ) );
163 143
164 return 0; 144 return 0;
165} 145}
@@ -169,61 +149,55 @@ static int show_per_level(struct seq_file *m, struct super_block *sb)
169 struct reiserfs_sb_info *r = REISERFS_SB(sb); 149 struct reiserfs_sb_info *r = REISERFS_SB(sb);
170 int level; 150 int level;
171 151
172 seq_printf(m, "level\t" 152 seq_printf(m, "level\t"
173 " balances" 153 " balances"
174 " [sbk: reads" 154 " [sbk: reads"
175 " fs_changed" 155 " fs_changed"
176 " restarted]" 156 " restarted]"
177 " free space" 157 " free space"
178 " items" 158 " items"
179 " can_remove" 159 " can_remove"
180 " lnum" 160 " lnum"
181 " rnum" 161 " rnum"
182 " lbytes" 162 " lbytes"
183 " rbytes" 163 " rbytes"
184 " get_neig" 164 " get_neig"
185 " get_neig_res" 165 " get_neig_res" " need_l_neig" " need_r_neig" "\n");
186 " need_l_neig" 166
187 " need_r_neig" 167 for (level = 0; level < MAX_HEIGHT; ++level) {
188 "\n" 168 seq_printf(m, "%i\t"
189 169 " %12lu"
190 ); 170 " %12lu"
191 171 " %12lu"
192 for( level = 0 ; level < MAX_HEIGHT ; ++ level ) { 172 " %12lu"
193 seq_printf(m, "%i\t" 173 " %12lu"
194 " %12lu" 174 " %12lu"
195 " %12lu" 175 " %12lu"
196 " %12lu" 176 " %12li"
197 " %12lu" 177 " %12li"
198 " %12lu" 178 " %12li"
199 " %12lu" 179 " %12li"
200 " %12lu" 180 " %12lu"
201 " %12li" 181 " %12lu"
202 " %12li" 182 " %12lu"
203 " %12li" 183 " %12lu"
204 " %12li" 184 "\n",
205 " %12lu" 185 level,
206 " %12lu" 186 SFPL(balance_at),
207 " %12lu" 187 SFPL(sbk_read_at),
208 " %12lu" 188 SFPL(sbk_fs_changed),
209 "\n", 189 SFPL(sbk_restarted),
210 level, 190 SFPL(free_at),
211 SFPL( balance_at ), 191 SFPL(items_at),
212 SFPL( sbk_read_at ), 192 SFPL(can_node_be_removed),
213 SFPL( sbk_fs_changed ), 193 SFPL(lnum),
214 SFPL( sbk_restarted ), 194 SFPL(rnum),
215 SFPL( free_at ), 195 SFPL(lbytes),
216 SFPL( items_at ), 196 SFPL(rbytes),
217 SFPL( can_node_be_removed ), 197 SFPL(get_neighbors),
218 SFPL( lnum ), 198 SFPL(get_neighbors_restart),
219 SFPL( rnum ), 199 SFPL(need_l_neighbor), SFPL(need_r_neighbor)
220 SFPL( lbytes ), 200 );
221 SFPL( rbytes ),
222 SFPL( get_neighbors ),
223 SFPL( get_neighbors_restart ),
224 SFPL( need_l_neighbor ),
225 SFPL( need_r_neighbor )
226 );
227 } 201 }
228 return 0; 202 return 0;
229} 203}
@@ -232,31 +206,30 @@ static int show_bitmap(struct seq_file *m, struct super_block *sb)
232{ 206{
233 struct reiserfs_sb_info *r = REISERFS_SB(sb); 207 struct reiserfs_sb_info *r = REISERFS_SB(sb);
234 208
235 seq_printf(m, "free_block: %lu\n" 209 seq_printf(m, "free_block: %lu\n"
236 " scan_bitmap:" 210 " scan_bitmap:"
237 " wait" 211 " wait"
238 " bmap" 212 " bmap"
239 " retry" 213 " retry"
240 " stolen" 214 " stolen"
241 " journal_hint" 215 " journal_hint"
242 "journal_nohint" 216 "journal_nohint"
243 "\n" 217 "\n"
244 " %14lu" 218 " %14lu"
245 " %14lu" 219 " %14lu"
246 " %14lu" 220 " %14lu"
247 " %14lu" 221 " %14lu"
248 " %14lu" 222 " %14lu"
249 " %14lu" 223 " %14lu"
250 " %14lu" 224 " %14lu"
251 "\n", 225 "\n",
252 SFP( free_block ), 226 SFP(free_block),
253 SFPF( call ), 227 SFPF(call),
254 SFPF( wait ), 228 SFPF(wait),
255 SFPF( bmap ), 229 SFPF(bmap),
256 SFPF( retry ), 230 SFPF(retry),
257 SFPF( stolen ), 231 SFPF(stolen),
258 SFPF( in_journal_hint ), 232 SFPF(in_journal_hint), SFPF(in_journal_nohint));
259 SFPF( in_journal_nohint ) );
260 233
261 return 0; 234 return 0;
262} 235}
@@ -264,46 +237,42 @@ static int show_bitmap(struct seq_file *m, struct super_block *sb)
264static int show_on_disk_super(struct seq_file *m, struct super_block *sb) 237static int show_on_disk_super(struct seq_file *m, struct super_block *sb)
265{ 238{
266 struct reiserfs_sb_info *sb_info = REISERFS_SB(sb); 239 struct reiserfs_sb_info *sb_info = REISERFS_SB(sb);
267 struct reiserfs_super_block *rs = sb_info -> s_rs; 240 struct reiserfs_super_block *rs = sb_info->s_rs;
268 int hash_code = DFL( s_hash_function_code ); 241 int hash_code = DFL(s_hash_function_code);
269 __u32 flags = DJF( s_flags ); 242 __u32 flags = DJF(s_flags);
270 243
271 seq_printf(m, "block_count: \t%i\n" 244 seq_printf(m, "block_count: \t%i\n"
272 "free_blocks: \t%i\n" 245 "free_blocks: \t%i\n"
273 "root_block: \t%i\n" 246 "root_block: \t%i\n"
274 "blocksize: \t%i\n" 247 "blocksize: \t%i\n"
275 "oid_maxsize: \t%i\n" 248 "oid_maxsize: \t%i\n"
276 "oid_cursize: \t%i\n" 249 "oid_cursize: \t%i\n"
277 "umount_state: \t%i\n" 250 "umount_state: \t%i\n"
278 "magic: \t%10.10s\n" 251 "magic: \t%10.10s\n"
279 "fs_state: \t%i\n" 252 "fs_state: \t%i\n"
280 "hash: \t%s\n" 253 "hash: \t%s\n"
281 "tree_height: \t%i\n" 254 "tree_height: \t%i\n"
282 "bmap_nr: \t%i\n" 255 "bmap_nr: \t%i\n"
283 "version: \t%i\n" 256 "version: \t%i\n"
284 "flags: \t%x[%s]\n" 257 "flags: \t%x[%s]\n"
285 "reserved_for_journal: \t%i\n", 258 "reserved_for_journal: \t%i\n",
286 259 DFL(s_block_count),
287 DFL( s_block_count ), 260 DFL(s_free_blocks),
288 DFL( s_free_blocks ), 261 DFL(s_root_block),
289 DFL( s_root_block ), 262 DF(s_blocksize),
290 DF( s_blocksize ), 263 DF(s_oid_maxsize),
291 DF( s_oid_maxsize ), 264 DF(s_oid_cursize),
292 DF( s_oid_cursize ), 265 DF(s_umount_state),
293 DF( s_umount_state ), 266 rs->s_v1.s_magic,
294 rs -> s_v1.s_magic, 267 DF(s_fs_state),
295 DF( s_fs_state ), 268 hash_code == TEA_HASH ? "tea" :
296 hash_code == TEA_HASH ? "tea" : 269 (hash_code == YURA_HASH) ? "rupasov" :
297 ( hash_code == YURA_HASH ) ? "rupasov" : 270 (hash_code == R5_HASH) ? "r5" :
298 ( hash_code == R5_HASH ) ? "r5" : 271 (hash_code == UNSET_HASH) ? "unset" : "unknown",
299 ( hash_code == UNSET_HASH ) ? "unset" : "unknown", 272 DF(s_tree_height),
300 DF( s_tree_height ), 273 DF(s_bmap_nr),
301 DF( s_bmap_nr ), 274 DF(s_version), flags, (flags & reiserfs_attrs_cleared)
302 DF( s_version ), 275 ? "attrs_cleared" : "", DF(s_reserved_for_journal));
303 flags,
304 ( flags & reiserfs_attrs_cleared )
305 ? "attrs_cleared" : "",
306 DF (s_reserved_for_journal));
307 276
308 return 0; 277 return 0;
309} 278}
@@ -311,131 +280,122 @@ static int show_on_disk_super(struct seq_file *m, struct super_block *sb)
311static int show_oidmap(struct seq_file *m, struct super_block *sb) 280static int show_oidmap(struct seq_file *m, struct super_block *sb)
312{ 281{
313 struct reiserfs_sb_info *sb_info = REISERFS_SB(sb); 282 struct reiserfs_sb_info *sb_info = REISERFS_SB(sb);
314 struct reiserfs_super_block *rs = sb_info -> s_rs; 283 struct reiserfs_super_block *rs = sb_info->s_rs;
315 unsigned int mapsize = le16_to_cpu( rs -> s_v1.s_oid_cursize ); 284 unsigned int mapsize = le16_to_cpu(rs->s_v1.s_oid_cursize);
316 unsigned long total_used = 0; 285 unsigned long total_used = 0;
317 int i; 286 int i;
318 287
319 for( i = 0 ; i < mapsize ; ++i ) { 288 for (i = 0; i < mapsize; ++i) {
320 __u32 right; 289 __u32 right;
321 290
322 right = ( i == mapsize - 1 ) ? MAX_KEY_OBJECTID : MAP( i + 1 ); 291 right = (i == mapsize - 1) ? MAX_KEY_OBJECTID : MAP(i + 1);
323 seq_printf(m, "%s: [ %x .. %x )\n", 292 seq_printf(m, "%s: [ %x .. %x )\n",
324 ( i & 1 ) ? "free" : "used", MAP( i ), right ); 293 (i & 1) ? "free" : "used", MAP(i), right);
325 if( ! ( i & 1 ) ) { 294 if (!(i & 1)) {
326 total_used += right - MAP( i ); 295 total_used += right - MAP(i);
327 } 296 }
328 } 297 }
329#if defined( REISERFS_USE_OIDMAPF ) 298#if defined( REISERFS_USE_OIDMAPF )
330 if( sb_info -> oidmap.use_file && ( sb_info -> oidmap.mapf != NULL ) ) { 299 if (sb_info->oidmap.use_file && (sb_info->oidmap.mapf != NULL)) {
331 loff_t size = sb_info->oidmap.mapf->f_dentry->d_inode->i_size; 300 loff_t size = sb_info->oidmap.mapf->f_dentry->d_inode->i_size;
332 total_used += size / sizeof( reiserfs_oidinterval_d_t ); 301 total_used += size / sizeof(reiserfs_oidinterval_d_t);
333 } 302 }
334#endif 303#endif
335 seq_printf(m, "total: \t%i [%i/%i] used: %lu [exact]\n", 304 seq_printf(m, "total: \t%i [%i/%i] used: %lu [exact]\n",
336 mapsize, 305 mapsize,
337 mapsize, le16_to_cpu( rs -> s_v1.s_oid_maxsize ), 306 mapsize, le16_to_cpu(rs->s_v1.s_oid_maxsize), total_used);
338 total_used);
339 return 0; 307 return 0;
340} 308}
341 309
342static int show_journal(struct seq_file *m, struct super_block *sb) 310static int show_journal(struct seq_file *m, struct super_block *sb)
343{ 311{
344 struct reiserfs_sb_info *r = REISERFS_SB(sb); 312 struct reiserfs_sb_info *r = REISERFS_SB(sb);
345 struct reiserfs_super_block *rs = r -> s_rs; 313 struct reiserfs_super_block *rs = r->s_rs;
346 struct journal_params *jp = &rs->s_v1.s_journal; 314 struct journal_params *jp = &rs->s_v1.s_journal;
347 char b[BDEVNAME_SIZE]; 315 char b[BDEVNAME_SIZE];
348 316
349 317 seq_printf(m, /* on-disk fields */
350 seq_printf(m, /* on-disk fields */ 318 "jp_journal_1st_block: \t%i\n"
351 "jp_journal_1st_block: \t%i\n" 319 "jp_journal_dev: \t%s[%x]\n"
352 "jp_journal_dev: \t%s[%x]\n" 320 "jp_journal_size: \t%i\n"
353 "jp_journal_size: \t%i\n" 321 "jp_journal_trans_max: \t%i\n"
354 "jp_journal_trans_max: \t%i\n" 322 "jp_journal_magic: \t%i\n"
355 "jp_journal_magic: \t%i\n" 323 "jp_journal_max_batch: \t%i\n"
356 "jp_journal_max_batch: \t%i\n" 324 "jp_journal_max_commit_age: \t%i\n"
357 "jp_journal_max_commit_age: \t%i\n" 325 "jp_journal_max_trans_age: \t%i\n"
358 "jp_journal_max_trans_age: \t%i\n" 326 /* incore fields */
359 /* incore fields */ 327 "j_1st_reserved_block: \t%i\n"
360 "j_1st_reserved_block: \t%i\n" 328 "j_state: \t%li\n"
361 "j_state: \t%li\n" 329 "j_trans_id: \t%lu\n"
362 "j_trans_id: \t%lu\n" 330 "j_mount_id: \t%lu\n"
363 "j_mount_id: \t%lu\n" 331 "j_start: \t%lu\n"
364 "j_start: \t%lu\n" 332 "j_len: \t%lu\n"
365 "j_len: \t%lu\n" 333 "j_len_alloc: \t%lu\n"
366 "j_len_alloc: \t%lu\n" 334 "j_wcount: \t%i\n"
367 "j_wcount: \t%i\n" 335 "j_bcount: \t%lu\n"
368 "j_bcount: \t%lu\n" 336 "j_first_unflushed_offset: \t%lu\n"
369 "j_first_unflushed_offset: \t%lu\n" 337 "j_last_flush_trans_id: \t%lu\n"
370 "j_last_flush_trans_id: \t%lu\n" 338 "j_trans_start_time: \t%li\n"
371 "j_trans_start_time: \t%li\n" 339 "j_list_bitmap_index: \t%i\n"
372 "j_list_bitmap_index: \t%i\n" 340 "j_must_wait: \t%i\n"
373 "j_must_wait: \t%i\n" 341 "j_next_full_flush: \t%i\n"
374 "j_next_full_flush: \t%i\n" 342 "j_next_async_flush: \t%i\n"
375 "j_next_async_flush: \t%i\n" 343 "j_cnode_used: \t%i\n" "j_cnode_free: \t%i\n" "\n"
376 "j_cnode_used: \t%i\n" 344 /* reiserfs_proc_info_data_t.journal fields */
377 "j_cnode_free: \t%i\n" 345 "in_journal: \t%12lu\n"
378 "\n" 346 "in_journal_bitmap: \t%12lu\n"
379 /* reiserfs_proc_info_data_t.journal fields */ 347 "in_journal_reusable: \t%12lu\n"
380 "in_journal: \t%12lu\n" 348 "lock_journal: \t%12lu\n"
381 "in_journal_bitmap: \t%12lu\n" 349 "lock_journal_wait: \t%12lu\n"
382 "in_journal_reusable: \t%12lu\n" 350 "journal_begin: \t%12lu\n"
383 "lock_journal: \t%12lu\n" 351 "journal_relock_writers: \t%12lu\n"
384 "lock_journal_wait: \t%12lu\n" 352 "journal_relock_wcount: \t%12lu\n"
385 "journal_begin: \t%12lu\n" 353 "mark_dirty: \t%12lu\n"
386 "journal_relock_writers: \t%12lu\n" 354 "mark_dirty_already: \t%12lu\n"
387 "journal_relock_wcount: \t%12lu\n" 355 "mark_dirty_notjournal: \t%12lu\n"
388 "mark_dirty: \t%12lu\n" 356 "restore_prepared: \t%12lu\n"
389 "mark_dirty_already: \t%12lu\n" 357 "prepare: \t%12lu\n"
390 "mark_dirty_notjournal: \t%12lu\n" 358 "prepare_retry: \t%12lu\n",
391 "restore_prepared: \t%12lu\n" 359 DJP(jp_journal_1st_block),
392 "prepare: \t%12lu\n" 360 bdevname(SB_JOURNAL(sb)->j_dev_bd, b),
393 "prepare_retry: \t%12lu\n", 361 DJP(jp_journal_dev),
394 362 DJP(jp_journal_size),
395 DJP( jp_journal_1st_block ), 363 DJP(jp_journal_trans_max),
396 bdevname(SB_JOURNAL(sb)->j_dev_bd, b), 364 DJP(jp_journal_magic),
397 DJP( jp_journal_dev ), 365 DJP(jp_journal_max_batch),
398 DJP( jp_journal_size ), 366 SB_JOURNAL(sb)->j_max_commit_age,
399 DJP( jp_journal_trans_max ), 367 DJP(jp_journal_max_trans_age),
400 DJP( jp_journal_magic ), 368 JF(j_1st_reserved_block),
401 DJP( jp_journal_max_batch ), 369 JF(j_state),
402 SB_JOURNAL(sb)->j_max_commit_age, 370 JF(j_trans_id),
403 DJP( jp_journal_max_trans_age ), 371 JF(j_mount_id),
404 372 JF(j_start),
405 JF( j_1st_reserved_block ), 373 JF(j_len),
406 JF( j_state ), 374 JF(j_len_alloc),
407 JF( j_trans_id ), 375 atomic_read(&r->s_journal->j_wcount),
408 JF( j_mount_id ), 376 JF(j_bcount),
409 JF( j_start ), 377 JF(j_first_unflushed_offset),
410 JF( j_len ), 378 JF(j_last_flush_trans_id),
411 JF( j_len_alloc ), 379 JF(j_trans_start_time),
412 atomic_read( & r -> s_journal -> j_wcount ), 380 JF(j_list_bitmap_index),
413 JF( j_bcount ), 381 JF(j_must_wait),
414 JF( j_first_unflushed_offset ), 382 JF(j_next_full_flush),
415 JF( j_last_flush_trans_id ), 383 JF(j_next_async_flush),
416 JF( j_trans_start_time ), 384 JF(j_cnode_used),
417 JF( j_list_bitmap_index ), 385 JF(j_cnode_free),
418 JF( j_must_wait ), 386 SFPJ(in_journal),
419 JF( j_next_full_flush ), 387 SFPJ(in_journal_bitmap),
420 JF( j_next_async_flush ), 388 SFPJ(in_journal_reusable),
421 JF( j_cnode_used ), 389 SFPJ(lock_journal),
422 JF( j_cnode_free ), 390 SFPJ(lock_journal_wait),
423 391 SFPJ(journal_being),
424 SFPJ( in_journal ), 392 SFPJ(journal_relock_writers),
425 SFPJ( in_journal_bitmap ), 393 SFPJ(journal_relock_wcount),
426 SFPJ( in_journal_reusable ), 394 SFPJ(mark_dirty),
427 SFPJ( lock_journal ), 395 SFPJ(mark_dirty_already),
428 SFPJ( lock_journal_wait ), 396 SFPJ(mark_dirty_notjournal),
429 SFPJ( journal_being ), 397 SFPJ(restore_prepared), SFPJ(prepare), SFPJ(prepare_retry)
430 SFPJ( journal_relock_writers ), 398 );
431 SFPJ( journal_relock_wcount ),
432 SFPJ( mark_dirty ),
433 SFPJ( mark_dirty_already ),
434 SFPJ( mark_dirty_notjournal ),
435 SFPJ( restore_prepared ),
436 SFPJ( prepare ),
437 SFPJ( prepare_retry )
438 );
439 return 0; 399 return 0;
440} 400}
441 401
@@ -450,7 +410,7 @@ static int set_sb(struct super_block *sb, void *data)
450 return -ENOENT; 410 return -ENOENT;
451} 411}
452 412
453static void *r_start(struct seq_file *m, loff_t *pos) 413static void *r_start(struct seq_file *m, loff_t * pos)
454{ 414{
455 struct proc_dir_entry *de = m->private; 415 struct proc_dir_entry *de = m->private;
456 struct super_block *s = de->parent->data; 416 struct super_block *s = de->parent->data;
@@ -472,7 +432,7 @@ static void *r_start(struct seq_file *m, loff_t *pos)
472 return s; 432 return s;
473} 433}
474 434
475static void *r_next(struct seq_file *m, void *v, loff_t *pos) 435static void *r_next(struct seq_file *m, void *v, loff_t * pos)
476{ 436{
477 ++*pos; 437 ++*pos;
478 if (v) 438 if (v)
@@ -489,7 +449,7 @@ static void r_stop(struct seq_file *m, void *v)
489static int r_show(struct seq_file *m, void *v) 449static int r_show(struct seq_file *m, void *v)
490{ 450{
491 struct proc_dir_entry *de = m->private; 451 struct proc_dir_entry *de = m->private;
492 int (*show)(struct seq_file *, struct super_block *) = de->data; 452 int (*show) (struct seq_file *, struct super_block *) = de->data;
493 return show(m, v); 453 return show(m, v);
494} 454}
495 455
@@ -512,17 +472,17 @@ static int r_open(struct inode *inode, struct file *file)
512} 472}
513 473
514static struct file_operations r_file_operations = { 474static struct file_operations r_file_operations = {
515 .open = r_open, 475 .open = r_open,
516 .read = seq_read, 476 .read = seq_read,
517 .llseek = seq_lseek, 477 .llseek = seq_lseek,
518 .release = seq_release, 478 .release = seq_release,
519}; 479};
520 480
521static struct proc_dir_entry *proc_info_root = NULL; 481static struct proc_dir_entry *proc_info_root = NULL;
522static const char proc_info_root_name[] = "fs/reiserfs"; 482static const char proc_info_root_name[] = "fs/reiserfs";
523 483
524static void add_file(struct super_block *sb, char *name, 484static void add_file(struct super_block *sb, char *name,
525 int (*func)(struct seq_file *, struct super_block *)) 485 int (*func) (struct seq_file *, struct super_block *))
526{ 486{
527 struct proc_dir_entry *de; 487 struct proc_dir_entry *de;
528 de = create_proc_entry(name, 0, REISERFS_SB(sb)->procdir); 488 de = create_proc_entry(name, 0, REISERFS_SB(sb)->procdir);
@@ -532,11 +492,12 @@ static void add_file(struct super_block *sb, char *name,
532 } 492 }
533} 493}
534 494
535int reiserfs_proc_info_init( struct super_block *sb ) 495int reiserfs_proc_info_init(struct super_block *sb)
536{ 496{
537 spin_lock_init( & __PINFO( sb ).lock ); 497 spin_lock_init(&__PINFO(sb).lock);
538 REISERFS_SB(sb)->procdir = proc_mkdir(reiserfs_bdevname (sb), proc_info_root); 498 REISERFS_SB(sb)->procdir =
539 if( REISERFS_SB(sb)->procdir ) { 499 proc_mkdir(reiserfs_bdevname(sb), proc_info_root);
500 if (REISERFS_SB(sb)->procdir) {
540 REISERFS_SB(sb)->procdir->owner = THIS_MODULE; 501 REISERFS_SB(sb)->procdir->owner = THIS_MODULE;
541 REISERFS_SB(sb)->procdir->data = sb; 502 REISERFS_SB(sb)->procdir->data = sb;
542 add_file(sb, "version", show_version); 503 add_file(sb, "version", show_version);
@@ -549,11 +510,11 @@ int reiserfs_proc_info_init( struct super_block *sb )
549 return 0; 510 return 0;
550 } 511 }
551 reiserfs_warning(sb, "reiserfs: cannot create /proc/%s/%s", 512 reiserfs_warning(sb, "reiserfs: cannot create /proc/%s/%s",
552 proc_info_root_name, reiserfs_bdevname (sb) ); 513 proc_info_root_name, reiserfs_bdevname(sb));
553 return 1; 514 return 1;
554} 515}
555 516
556int reiserfs_proc_info_done( struct super_block *sb ) 517int reiserfs_proc_info_done(struct super_block *sb)
557{ 518{
558 struct proc_dir_entry *de = REISERFS_SB(sb)->procdir; 519 struct proc_dir_entry *de = REISERFS_SB(sb)->procdir;
559 if (de) { 520 if (de) {
@@ -565,48 +526,48 @@ int reiserfs_proc_info_done( struct super_block *sb )
565 remove_proc_entry("super", de); 526 remove_proc_entry("super", de);
566 remove_proc_entry("version", de); 527 remove_proc_entry("version", de);
567 } 528 }
568 spin_lock( & __PINFO( sb ).lock ); 529 spin_lock(&__PINFO(sb).lock);
569 __PINFO( sb ).exiting = 1; 530 __PINFO(sb).exiting = 1;
570 spin_unlock( & __PINFO( sb ).lock ); 531 spin_unlock(&__PINFO(sb).lock);
571 if ( proc_info_root ) { 532 if (proc_info_root) {
572 remove_proc_entry( reiserfs_bdevname (sb), proc_info_root ); 533 remove_proc_entry(reiserfs_bdevname(sb), proc_info_root);
573 REISERFS_SB(sb)->procdir = NULL; 534 REISERFS_SB(sb)->procdir = NULL;
574 } 535 }
575 return 0; 536 return 0;
576} 537}
577 538
578struct proc_dir_entry *reiserfs_proc_register_global( char *name, 539struct proc_dir_entry *reiserfs_proc_register_global(char *name,
579 read_proc_t *func ) 540 read_proc_t * func)
580{ 541{
581 return ( proc_info_root ) ? create_proc_read_entry( name, 0, 542 return (proc_info_root) ? create_proc_read_entry(name, 0,
582 proc_info_root, 543 proc_info_root,
583 func, NULL ) : NULL; 544 func, NULL) : NULL;
584} 545}
585 546
586void reiserfs_proc_unregister_global( const char *name ) 547void reiserfs_proc_unregister_global(const char *name)
587{ 548{
588 remove_proc_entry( name, proc_info_root ); 549 remove_proc_entry(name, proc_info_root);
589} 550}
590 551
591int reiserfs_proc_info_global_init( void ) 552int reiserfs_proc_info_global_init(void)
592{ 553{
593 if( proc_info_root == NULL ) { 554 if (proc_info_root == NULL) {
594 proc_info_root = proc_mkdir(proc_info_root_name, NULL); 555 proc_info_root = proc_mkdir(proc_info_root_name, NULL);
595 if( proc_info_root ) { 556 if (proc_info_root) {
596 proc_info_root -> owner = THIS_MODULE; 557 proc_info_root->owner = THIS_MODULE;
597 } else { 558 } else {
598 reiserfs_warning (NULL, 559 reiserfs_warning(NULL,
599 "reiserfs: cannot create /proc/%s", 560 "reiserfs: cannot create /proc/%s",
600 proc_info_root_name ); 561 proc_info_root_name);
601 return 1; 562 return 1;
602 } 563 }
603 } 564 }
604 return 0; 565 return 0;
605} 566}
606 567
607int reiserfs_proc_info_global_done( void ) 568int reiserfs_proc_info_global_done(void)
608{ 569{
609 if ( proc_info_root != NULL ) { 570 if (proc_info_root != NULL) {
610 proc_info_root = NULL; 571 proc_info_root = NULL;
611 remove_proc_entry(proc_info_root_name, NULL); 572 remove_proc_entry(proc_info_root_name, NULL);
612 } 573 }
@@ -616,22 +577,40 @@ int reiserfs_proc_info_global_done( void )
616/* REISERFS_PROC_INFO */ 577/* REISERFS_PROC_INFO */
617#else 578#else
618 579
619int reiserfs_proc_info_init( struct super_block *sb ) { return 0; } 580int reiserfs_proc_info_init(struct super_block *sb)
620int reiserfs_proc_info_done( struct super_block *sb ) { return 0; } 581{
582 return 0;
583}
584int reiserfs_proc_info_done(struct super_block *sb)
585{
586 return 0;
587}
621 588
622struct proc_dir_entry *reiserfs_proc_register_global( char *name, 589struct proc_dir_entry *reiserfs_proc_register_global(char *name,
623 read_proc_t *func ) 590 read_proc_t * func)
624{ return NULL; } 591{
592 return NULL;
593}
625 594
626void reiserfs_proc_unregister_global( const char *name ) {;} 595void reiserfs_proc_unregister_global(const char *name)
596{;
597}
627 598
628int reiserfs_proc_info_global_init( void ) { return 0; } 599int reiserfs_proc_info_global_init(void)
629int reiserfs_proc_info_global_done( void ) { return 0; } 600{
601 return 0;
602}
603int reiserfs_proc_info_global_done(void)
604{
605 return 0;
606}
630 607
631int reiserfs_global_version_in_proc( char *buffer, char **start, 608int reiserfs_global_version_in_proc(char *buffer, char **start,
632 off_t offset, 609 off_t offset,
633 int count, int *eof, void *data ) 610 int count, int *eof, void *data)
634{ return 0; } 611{
612 return 0;
613}
635 614
636/* REISERFS_PROC_INFO */ 615/* REISERFS_PROC_INFO */
637#endif 616#endif
diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c
index 170012078b76..39cc7f47f5dc 100644
--- a/fs/reiserfs/resize.c
+++ b/fs/reiserfs/resize.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README 2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3 */ 3 */
4 4
5/* 5/*
6 * Written by Alexander Zarochentcev. 6 * Written by Alexander Zarochentcev.
7 * 7 *
@@ -17,23 +17,23 @@
17#include <linux/reiserfs_fs_sb.h> 17#include <linux/reiserfs_fs_sb.h>
18#include <linux/buffer_head.h> 18#include <linux/buffer_head.h>
19 19
20int reiserfs_resize (struct super_block * s, unsigned long block_count_new) 20int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
21{ 21{
22 int err = 0; 22 int err = 0;
23 struct reiserfs_super_block * sb; 23 struct reiserfs_super_block *sb;
24 struct reiserfs_bitmap_info *bitmap; 24 struct reiserfs_bitmap_info *bitmap;
25 struct reiserfs_bitmap_info *old_bitmap = SB_AP_BITMAP(s); 25 struct reiserfs_bitmap_info *old_bitmap = SB_AP_BITMAP(s);
26 struct buffer_head * bh; 26 struct buffer_head *bh;
27 struct reiserfs_transaction_handle th; 27 struct reiserfs_transaction_handle th;
28 unsigned int bmap_nr_new, bmap_nr; 28 unsigned int bmap_nr_new, bmap_nr;
29 unsigned int block_r_new, block_r; 29 unsigned int block_r_new, block_r;
30 30
31 struct reiserfs_list_bitmap * jb; 31 struct reiserfs_list_bitmap *jb;
32 struct reiserfs_list_bitmap jbitmap[JOURNAL_NUM_BITMAPS]; 32 struct reiserfs_list_bitmap jbitmap[JOURNAL_NUM_BITMAPS];
33 33
34 unsigned long int block_count, free_blocks; 34 unsigned long int block_count, free_blocks;
35 int i; 35 int i;
36 int copy_size ; 36 int copy_size;
37 37
38 sb = SB_DISK_SUPER_BLOCK(s); 38 sb = SB_DISK_SUPER_BLOCK(s);
39 39
@@ -47,136 +47,145 @@ int reiserfs_resize (struct super_block * s, unsigned long block_count_new)
47 if (!bh) { 47 if (!bh) {
48 printk("reiserfs_resize: can\'t read last block\n"); 48 printk("reiserfs_resize: can\'t read last block\n");
49 return -EINVAL; 49 return -EINVAL;
50 } 50 }
51 bforget(bh); 51 bforget(bh);
52 52
53 /* old disk layout detection; those partitions can be mounted, but 53 /* old disk layout detection; those partitions can be mounted, but
54 * cannot be resized */ 54 * cannot be resized */
55 if (SB_BUFFER_WITH_SB(s)->b_blocknr * SB_BUFFER_WITH_SB(s)->b_size 55 if (SB_BUFFER_WITH_SB(s)->b_blocknr * SB_BUFFER_WITH_SB(s)->b_size
56 != REISERFS_DISK_OFFSET_IN_BYTES ) { 56 != REISERFS_DISK_OFFSET_IN_BYTES) {
57 printk("reiserfs_resize: unable to resize a reiserfs without distributed bitmap (fs version < 3.5.12)\n"); 57 printk
58 ("reiserfs_resize: unable to resize a reiserfs without distributed bitmap (fs version < 3.5.12)\n");
58 return -ENOTSUPP; 59 return -ENOTSUPP;
59 } 60 }
60 61
61 /* count used bits in last bitmap block */ 62 /* count used bits in last bitmap block */
62 block_r = SB_BLOCK_COUNT(s) - 63 block_r = SB_BLOCK_COUNT(s) - (SB_BMAP_NR(s) - 1) * s->s_blocksize * 8;
63 (SB_BMAP_NR(s) - 1) * s->s_blocksize * 8; 64
64
65 /* count bitmap blocks in new fs */ 65 /* count bitmap blocks in new fs */
66 bmap_nr_new = block_count_new / ( s->s_blocksize * 8 ); 66 bmap_nr_new = block_count_new / (s->s_blocksize * 8);
67 block_r_new = block_count_new - bmap_nr_new * s->s_blocksize * 8; 67 block_r_new = block_count_new - bmap_nr_new * s->s_blocksize * 8;
68 if (block_r_new) 68 if (block_r_new)
69 bmap_nr_new++; 69 bmap_nr_new++;
70 else 70 else
71 block_r_new = s->s_blocksize * 8; 71 block_r_new = s->s_blocksize * 8;
72 72
73 /* save old values */ 73 /* save old values */
74 block_count = SB_BLOCK_COUNT(s); 74 block_count = SB_BLOCK_COUNT(s);
75 bmap_nr = SB_BMAP_NR(s); 75 bmap_nr = SB_BMAP_NR(s);
76 76
77 /* resizing of reiserfs bitmaps (journal and real), if needed */ 77 /* resizing of reiserfs bitmaps (journal and real), if needed */
78 if (bmap_nr_new > bmap_nr) { 78 if (bmap_nr_new > bmap_nr) {
79 /* reallocate journal bitmaps */ 79 /* reallocate journal bitmaps */
80 if (reiserfs_allocate_list_bitmaps(s, jbitmap, bmap_nr_new) < 0) { 80 if (reiserfs_allocate_list_bitmaps(s, jbitmap, bmap_nr_new) < 0) {
81 printk("reiserfs_resize: unable to allocate memory for journal bitmaps\n"); 81 printk
82 unlock_super(s) ; 82 ("reiserfs_resize: unable to allocate memory for journal bitmaps\n");
83 return -ENOMEM ; 83 unlock_super(s);
84 } 84 return -ENOMEM;
85 /* the new journal bitmaps are zero filled, now we copy in the bitmap 85 }
86 ** node pointers from the old journal bitmap structs, and then 86 /* the new journal bitmaps are zero filled, now we copy in the bitmap
87 ** transfer the new data structures into the journal struct. 87 ** node pointers from the old journal bitmap structs, and then
88 ** 88 ** transfer the new data structures into the journal struct.
89 ** using the copy_size var below allows this code to work for 89 **
90 ** both shrinking and expanding the FS. 90 ** using the copy_size var below allows this code to work for
91 */ 91 ** both shrinking and expanding the FS.
92 copy_size = bmap_nr_new < bmap_nr ? bmap_nr_new : bmap_nr ; 92 */
93 copy_size = copy_size * sizeof(struct reiserfs_list_bitmap_node *) ; 93 copy_size = bmap_nr_new < bmap_nr ? bmap_nr_new : bmap_nr;
94 for (i = 0 ; i < JOURNAL_NUM_BITMAPS ; i++) { 94 copy_size =
95 struct reiserfs_bitmap_node **node_tmp ; 95 copy_size * sizeof(struct reiserfs_list_bitmap_node *);
96 jb = SB_JOURNAL(s)->j_list_bitmap + i ; 96 for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) {
97 memcpy(jbitmap[i].bitmaps, jb->bitmaps, copy_size) ; 97 struct reiserfs_bitmap_node **node_tmp;
98 98 jb = SB_JOURNAL(s)->j_list_bitmap + i;
99 /* just in case vfree schedules on us, copy the new 99 memcpy(jbitmap[i].bitmaps, jb->bitmaps, copy_size);
100 ** pointer into the journal struct before freeing the 100
101 ** old one 101 /* just in case vfree schedules on us, copy the new
102 */ 102 ** pointer into the journal struct before freeing the
103 node_tmp = jb->bitmaps ; 103 ** old one
104 jb->bitmaps = jbitmap[i].bitmaps ; 104 */
105 vfree(node_tmp) ; 105 node_tmp = jb->bitmaps;
106 } 106 jb->bitmaps = jbitmap[i].bitmaps;
107 107 vfree(node_tmp);
108 /* allocate additional bitmap blocks, reallocate array of bitmap 108 }
109 * block pointers */ 109
110 bitmap = vmalloc(sizeof(struct reiserfs_bitmap_info) * bmap_nr_new); 110 /* allocate additional bitmap blocks, reallocate array of bitmap
111 if (!bitmap) { 111 * block pointers */
112 /* Journal bitmaps are still supersized, but the memory isn't 112 bitmap =
113 * leaked, so I guess it's ok */ 113 vmalloc(sizeof(struct reiserfs_bitmap_info) * bmap_nr_new);
114 printk("reiserfs_resize: unable to allocate memory.\n"); 114 if (!bitmap) {
115 return -ENOMEM; 115 /* Journal bitmaps are still supersized, but the memory isn't
116 } 116 * leaked, so I guess it's ok */
117 memset (bitmap, 0, sizeof (struct reiserfs_bitmap_info) * SB_BMAP_NR(s)); 117 printk("reiserfs_resize: unable to allocate memory.\n");
118 for (i = 0; i < bmap_nr; i++) 118 return -ENOMEM;
119 bitmap[i] = old_bitmap[i]; 119 }
120 120 memset(bitmap, 0,
121 /* This doesn't go through the journal, but it doesn't have to. 121 sizeof(struct reiserfs_bitmap_info) * SB_BMAP_NR(s));
122 * The changes are still atomic: We're synced up when the journal 122 for (i = 0; i < bmap_nr; i++)
123 * transaction begins, and the new bitmaps don't matter if the 123 bitmap[i] = old_bitmap[i];
124 * transaction fails. */ 124
125 for (i = bmap_nr; i < bmap_nr_new; i++) { 125 /* This doesn't go through the journal, but it doesn't have to.
126 bitmap[i].bh = sb_getblk(s, i * s->s_blocksize * 8); 126 * The changes are still atomic: We're synced up when the journal
127 memset(bitmap[i].bh->b_data, 0, sb_blocksize(sb)); 127 * transaction begins, and the new bitmaps don't matter if the
128 reiserfs_test_and_set_le_bit(0, bitmap[i].bh->b_data); 128 * transaction fails. */
129 129 for (i = bmap_nr; i < bmap_nr_new; i++) {
130 set_buffer_uptodate(bitmap[i].bh); 130 bitmap[i].bh = sb_getblk(s, i * s->s_blocksize * 8);
131 mark_buffer_dirty(bitmap[i].bh) ; 131 memset(bitmap[i].bh->b_data, 0, sb_blocksize(sb));
132 sync_dirty_buffer(bitmap[i].bh); 132 reiserfs_test_and_set_le_bit(0, bitmap[i].bh->b_data);
133 // update bitmap_info stuff 133
134 bitmap[i].first_zero_hint=1; 134 set_buffer_uptodate(bitmap[i].bh);
135 bitmap[i].free_count = sb_blocksize(sb) * 8 - 1; 135 mark_buffer_dirty(bitmap[i].bh);
136 } 136 sync_dirty_buffer(bitmap[i].bh);
137 /* free old bitmap blocks array */ 137 // update bitmap_info stuff
138 SB_AP_BITMAP(s) = bitmap; 138 bitmap[i].first_zero_hint = 1;
139 vfree (old_bitmap); 139 bitmap[i].free_count = sb_blocksize(sb) * 8 - 1;
140 }
141 /* free old bitmap blocks array */
142 SB_AP_BITMAP(s) = bitmap;
143 vfree(old_bitmap);
140 } 144 }
141 145
142 /* begin transaction, if there was an error, it's fine. Yes, we have 146 /* begin transaction, if there was an error, it's fine. Yes, we have
143 * incorrect bitmaps now, but none of it is ever going to touch the 147 * incorrect bitmaps now, but none of it is ever going to touch the
144 * disk anyway. */ 148 * disk anyway. */
145 err = journal_begin(&th, s, 10); 149 err = journal_begin(&th, s, 10);
146 if (err) 150 if (err)
147 return err; 151 return err;
148 152
149 /* correct last bitmap blocks in old and new disk layout */ 153 /* correct last bitmap blocks in old and new disk layout */
150 reiserfs_prepare_for_journal(s, SB_AP_BITMAP(s)[bmap_nr - 1].bh, 1); 154 reiserfs_prepare_for_journal(s, SB_AP_BITMAP(s)[bmap_nr - 1].bh, 1);
151 for (i = block_r; i < s->s_blocksize * 8; i++) 155 for (i = block_r; i < s->s_blocksize * 8; i++)
152 reiserfs_test_and_clear_le_bit(i, 156 reiserfs_test_and_clear_le_bit(i,
153 SB_AP_BITMAP(s)[bmap_nr - 1].bh->b_data); 157 SB_AP_BITMAP(s)[bmap_nr -
158 1].bh->b_data);
154 SB_AP_BITMAP(s)[bmap_nr - 1].free_count += s->s_blocksize * 8 - block_r; 159 SB_AP_BITMAP(s)[bmap_nr - 1].free_count += s->s_blocksize * 8 - block_r;
155 if ( !SB_AP_BITMAP(s)[bmap_nr - 1].first_zero_hint) 160 if (!SB_AP_BITMAP(s)[bmap_nr - 1].first_zero_hint)
156 SB_AP_BITMAP(s)[bmap_nr - 1].first_zero_hint = block_r; 161 SB_AP_BITMAP(s)[bmap_nr - 1].first_zero_hint = block_r;
157 162
158 journal_mark_dirty(&th, s, SB_AP_BITMAP(s)[bmap_nr - 1].bh); 163 journal_mark_dirty(&th, s, SB_AP_BITMAP(s)[bmap_nr - 1].bh);
159 164
160 reiserfs_prepare_for_journal(s, SB_AP_BITMAP(s)[bmap_nr_new - 1].bh, 1); 165 reiserfs_prepare_for_journal(s, SB_AP_BITMAP(s)[bmap_nr_new - 1].bh, 1);
161 for (i = block_r_new; i < s->s_blocksize * 8; i++) 166 for (i = block_r_new; i < s->s_blocksize * 8; i++)
162 reiserfs_test_and_set_le_bit(i, 167 reiserfs_test_and_set_le_bit(i,
163 SB_AP_BITMAP(s)[bmap_nr_new - 1].bh->b_data); 168 SB_AP_BITMAP(s)[bmap_nr_new -
169 1].bh->b_data);
164 journal_mark_dirty(&th, s, SB_AP_BITMAP(s)[bmap_nr_new - 1].bh); 170 journal_mark_dirty(&th, s, SB_AP_BITMAP(s)[bmap_nr_new - 1].bh);
165 171
166 SB_AP_BITMAP(s)[bmap_nr_new - 1].free_count -= s->s_blocksize * 8 - block_r_new; 172 SB_AP_BITMAP(s)[bmap_nr_new - 1].free_count -=
173 s->s_blocksize * 8 - block_r_new;
167 /* Extreme case where last bitmap is the only valid block in itself. */ 174 /* Extreme case where last bitmap is the only valid block in itself. */
168 if ( !SB_AP_BITMAP(s)[bmap_nr_new - 1].free_count ) 175 if (!SB_AP_BITMAP(s)[bmap_nr_new - 1].free_count)
169 SB_AP_BITMAP(s)[bmap_nr_new - 1].first_zero_hint = 0; 176 SB_AP_BITMAP(s)[bmap_nr_new - 1].first_zero_hint = 0;
170 /* update super */ 177 /* update super */
171 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1) ; 178 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);
172 free_blocks = SB_FREE_BLOCKS(s); 179 free_blocks = SB_FREE_BLOCKS(s);
173 PUT_SB_FREE_BLOCKS(s, free_blocks + (block_count_new - block_count - (bmap_nr_new - bmap_nr))); 180 PUT_SB_FREE_BLOCKS(s,
181 free_blocks + (block_count_new - block_count -
182 (bmap_nr_new - bmap_nr)));
174 PUT_SB_BLOCK_COUNT(s, block_count_new); 183 PUT_SB_BLOCK_COUNT(s, block_count_new);
175 PUT_SB_BMAP_NR(s, bmap_nr_new); 184 PUT_SB_BMAP_NR(s, bmap_nr_new);
176 s->s_dirt = 1; 185 s->s_dirt = 1;
177 186
178 journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s)); 187 journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s));
179 188
180 SB_JOURNAL(s)->j_must_wait = 1; 189 SB_JOURNAL(s)->j_must_wait = 1;
181 return journal_end(&th, s, 10); 190 return journal_end(&th, s, 10);
182} 191}
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index c47f8fd31a2d..e2d08d7bcffc 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -59,46 +59,45 @@
59#include <linux/quotaops.h> 59#include <linux/quotaops.h>
60 60
61/* Does the buffer contain a disk block which is in the tree. */ 61/* Does the buffer contain a disk block which is in the tree. */
62inline int B_IS_IN_TREE (const struct buffer_head * p_s_bh) 62inline int B_IS_IN_TREE(const struct buffer_head *p_s_bh)
63{ 63{
64 64
65 RFALSE( B_LEVEL (p_s_bh) > MAX_HEIGHT, 65 RFALSE(B_LEVEL(p_s_bh) > MAX_HEIGHT,
66 "PAP-1010: block (%b) has too big level (%z)", p_s_bh, p_s_bh); 66 "PAP-1010: block (%b) has too big level (%z)", p_s_bh, p_s_bh);
67 67
68 return ( B_LEVEL (p_s_bh) != FREE_LEVEL ); 68 return (B_LEVEL(p_s_bh) != FREE_LEVEL);
69} 69}
70 70
71// 71//
72// to gets item head in le form 72// to gets item head in le form
73// 73//
74inline void copy_item_head(struct item_head * p_v_to, 74inline void copy_item_head(struct item_head *p_v_to,
75 const struct item_head * p_v_from) 75 const struct item_head *p_v_from)
76{ 76{
77 memcpy (p_v_to, p_v_from, IH_SIZE); 77 memcpy(p_v_to, p_v_from, IH_SIZE);
78} 78}
79 79
80
81/* k1 is pointer to on-disk structure which is stored in little-endian 80/* k1 is pointer to on-disk structure which is stored in little-endian
82 form. k2 is pointer to cpu variable. For key of items of the same 81 form. k2 is pointer to cpu variable. For key of items of the same
83 object this returns 0. 82 object this returns 0.
84 Returns: -1 if key1 < key2 83 Returns: -1 if key1 < key2
85 0 if key1 == key2 84 0 if key1 == key2
86 1 if key1 > key2 */ 85 1 if key1 > key2 */
87inline int comp_short_keys (const struct reiserfs_key * le_key, 86inline int comp_short_keys(const struct reiserfs_key *le_key,
88 const struct cpu_key * cpu_key) 87 const struct cpu_key *cpu_key)
89{ 88{
90 __u32 n; 89 __u32 n;
91 n = le32_to_cpu(le_key->k_dir_id); 90 n = le32_to_cpu(le_key->k_dir_id);
92 if (n < cpu_key->on_disk_key.k_dir_id) 91 if (n < cpu_key->on_disk_key.k_dir_id)
93 return -1; 92 return -1;
94 if (n > cpu_key->on_disk_key.k_dir_id) 93 if (n > cpu_key->on_disk_key.k_dir_id)
95 return 1; 94 return 1;
96 n = le32_to_cpu(le_key->k_objectid); 95 n = le32_to_cpu(le_key->k_objectid);
97 if (n < cpu_key->on_disk_key.k_objectid) 96 if (n < cpu_key->on_disk_key.k_objectid)
98 return -1; 97 return -1;
99 if (n > cpu_key->on_disk_key.k_objectid) 98 if (n > cpu_key->on_disk_key.k_objectid)
100 return 1; 99 return 1;
101 return 0; 100 return 0;
102} 101}
103 102
104/* k1 is pointer to on-disk structure which is stored in little-endian 103/* k1 is pointer to on-disk structure which is stored in little-endian
@@ -106,68 +105,72 @@ inline int comp_short_keys (const struct reiserfs_key * le_key,
106 Compare keys using all 4 key fields. 105 Compare keys using all 4 key fields.
107 Returns: -1 if key1 < key2 0 106 Returns: -1 if key1 < key2 0
108 if key1 = key2 1 if key1 > key2 */ 107 if key1 = key2 1 if key1 > key2 */
109static inline int comp_keys (const struct reiserfs_key * le_key, const struct cpu_key * cpu_key) 108static inline int comp_keys(const struct reiserfs_key *le_key,
109 const struct cpu_key *cpu_key)
110{ 110{
111 int retval; 111 int retval;
112 112
113 retval = comp_short_keys (le_key, cpu_key); 113 retval = comp_short_keys(le_key, cpu_key);
114 if (retval) 114 if (retval)
115 return retval; 115 return retval;
116 if (le_key_k_offset (le_key_version(le_key), le_key) < cpu_key_k_offset (cpu_key)) 116 if (le_key_k_offset(le_key_version(le_key), le_key) <
117 return -1; 117 cpu_key_k_offset(cpu_key))
118 if (le_key_k_offset (le_key_version(le_key), le_key) > cpu_key_k_offset (cpu_key)) 118 return -1;
119 return 1; 119 if (le_key_k_offset(le_key_version(le_key), le_key) >
120 120 cpu_key_k_offset(cpu_key))
121 if (cpu_key->key_length == 3) 121 return 1;
122 return 0; 122
123 123 if (cpu_key->key_length == 3)
124 /* this part is needed only when tail conversion is in progress */ 124 return 0;
125 if (le_key_k_type (le_key_version(le_key), le_key) < cpu_key_k_type (cpu_key)) 125
126 return -1; 126 /* this part is needed only when tail conversion is in progress */
127 if (le_key_k_type(le_key_version(le_key), le_key) <
128 cpu_key_k_type(cpu_key))
129 return -1;
130
131 if (le_key_k_type(le_key_version(le_key), le_key) >
132 cpu_key_k_type(cpu_key))
133 return 1;
127 134
128 if (le_key_k_type (le_key_version(le_key), le_key) > cpu_key_k_type (cpu_key)) 135 return 0;
129 return 1;
130
131 return 0;
132} 136}
133 137
134 138inline int comp_short_le_keys(const struct reiserfs_key *key1,
135inline int comp_short_le_keys (const struct reiserfs_key * key1, const struct reiserfs_key * key2) 139 const struct reiserfs_key *key2)
136{ 140{
137 __u32 * p_s_1_u32, * p_s_2_u32; 141 __u32 *p_s_1_u32, *p_s_2_u32;
138 int n_key_length = REISERFS_SHORT_KEY_LEN; 142 int n_key_length = REISERFS_SHORT_KEY_LEN;
139 143
140 p_s_1_u32 = (__u32 *)key1; 144 p_s_1_u32 = (__u32 *) key1;
141 p_s_2_u32 = (__u32 *)key2; 145 p_s_2_u32 = (__u32 *) key2;
142 for( ; n_key_length--; ++p_s_1_u32, ++p_s_2_u32 ) { 146 for (; n_key_length--; ++p_s_1_u32, ++p_s_2_u32) {
143 if ( le32_to_cpu (*p_s_1_u32) < le32_to_cpu (*p_s_2_u32) ) 147 if (le32_to_cpu(*p_s_1_u32) < le32_to_cpu(*p_s_2_u32))
144 return -1; 148 return -1;
145 if ( le32_to_cpu (*p_s_1_u32) > le32_to_cpu (*p_s_2_u32) ) 149 if (le32_to_cpu(*p_s_1_u32) > le32_to_cpu(*p_s_2_u32))
146 return 1; 150 return 1;
147 } 151 }
148 return 0; 152 return 0;
149} 153}
150 154
151inline void le_key2cpu_key (struct cpu_key * to, const struct reiserfs_key * from) 155inline void le_key2cpu_key(struct cpu_key *to, const struct reiserfs_key *from)
152{ 156{
153 int version; 157 int version;
154 to->on_disk_key.k_dir_id = le32_to_cpu (from->k_dir_id); 158 to->on_disk_key.k_dir_id = le32_to_cpu(from->k_dir_id);
155 to->on_disk_key.k_objectid = le32_to_cpu (from->k_objectid); 159 to->on_disk_key.k_objectid = le32_to_cpu(from->k_objectid);
156 160
157 // find out version of the key 161 // find out version of the key
158 version = le_key_version (from); 162 version = le_key_version(from);
159 to->version = version; 163 to->version = version;
160 to->on_disk_key.k_offset = le_key_k_offset(version, from); 164 to->on_disk_key.k_offset = le_key_k_offset(version, from);
161 to->on_disk_key.k_type = le_key_k_type(version, from); 165 to->on_disk_key.k_type = le_key_k_type(version, from);
162} 166}
163 167
164
165
166// this does not say which one is bigger, it only returns 1 if keys 168// this does not say which one is bigger, it only returns 1 if keys
167// are not equal, 0 otherwise 169// are not equal, 0 otherwise
168inline int comp_le_keys (const struct reiserfs_key * k1, const struct reiserfs_key * k2) 170inline int comp_le_keys(const struct reiserfs_key *k1,
171 const struct reiserfs_key *k2)
169{ 172{
170 return memcmp (k1, k2, sizeof (struct reiserfs_key)); 173 return memcmp(k1, k2, sizeof(struct reiserfs_key));
171} 174}
172 175
173/************************************************************************** 176/**************************************************************************
@@ -184,373 +187,396 @@ inline int comp_le_keys (const struct reiserfs_key * k1, const struct reiserfs_k
184 there are no possible items, and we have not found it. With each examination we 187 there are no possible items, and we have not found it. With each examination we
185 cut the number of possible items it could be by one more than half rounded down, 188 cut the number of possible items it could be by one more than half rounded down,
186 or we find it. */ 189 or we find it. */
187static inline int bin_search ( 190static inline int bin_search(const void *p_v_key, /* Key to search for. */
188 const void * p_v_key, /* Key to search for. */ 191 const void *p_v_base, /* First item in the array. */
189 const void * p_v_base,/* First item in the array. */ 192 int p_n_num, /* Number of items in the array. */
190 int p_n_num, /* Number of items in the array. */ 193 int p_n_width, /* Item size in the array.
191 int p_n_width, /* Item size in the array. 194 searched. Lest the reader be
192 searched. Lest the reader be 195 confused, note that this is crafted
193 confused, note that this is crafted 196 as a general function, and when it
194 as a general function, and when it 197 is applied specifically to the array
195 is applied specifically to the array 198 of item headers in a node, p_n_width
196 of item headers in a node, p_n_width 199 is actually the item header size not
197 is actually the item header size not 200 the item size. */
198 the item size. */ 201 int *p_n_pos /* Number of the searched for element. */
199 int * p_n_pos /* Number of the searched for element. */ 202 )
200 ) { 203{
201 int n_rbound, n_lbound, n_j; 204 int n_rbound, n_lbound, n_j;
202 205
203 for ( n_j = ((n_rbound = p_n_num - 1) + (n_lbound = 0))/2; n_lbound <= n_rbound; n_j = (n_rbound + n_lbound)/2 ) 206 for (n_j = ((n_rbound = p_n_num - 1) + (n_lbound = 0)) / 2;
204 switch( comp_keys((struct reiserfs_key *)((char * )p_v_base + n_j * p_n_width), (struct cpu_key *)p_v_key) ) { 207 n_lbound <= n_rbound; n_j = (n_rbound + n_lbound) / 2)
205 case -1: n_lbound = n_j + 1; continue; 208 switch (comp_keys
206 case 1: n_rbound = n_j - 1; continue; 209 ((struct reiserfs_key *)((char *)p_v_base +
207 case 0: *p_n_pos = n_j; return ITEM_FOUND; /* Key found in the array. */ 210 n_j * p_n_width),
208 } 211 (struct cpu_key *)p_v_key)) {
209 212 case -1:
210 /* bin_search did not find given key, it returns position of key, 213 n_lbound = n_j + 1;
211 that is minimal and greater than the given one. */ 214 continue;
212 *p_n_pos = n_lbound; 215 case 1:
213 return ITEM_NOT_FOUND; 216 n_rbound = n_j - 1;
217 continue;
218 case 0:
219 *p_n_pos = n_j;
220 return ITEM_FOUND; /* Key found in the array. */
221 }
222
223 /* bin_search did not find given key, it returns position of key,
224 that is minimal and greater than the given one. */
225 *p_n_pos = n_lbound;
226 return ITEM_NOT_FOUND;
214} 227}
215 228
216#ifdef CONFIG_REISERFS_CHECK 229#ifdef CONFIG_REISERFS_CHECK
217extern struct tree_balance * cur_tb; 230extern struct tree_balance *cur_tb;
218#endif 231#endif
219 232
220
221
222/* Minimal possible key. It is never in the tree. */ 233/* Minimal possible key. It is never in the tree. */
223const struct reiserfs_key MIN_KEY = {0, 0, {{0, 0},}}; 234const struct reiserfs_key MIN_KEY = { 0, 0, {{0, 0},} };
224 235
225/* Maximal possible key. It is never in the tree. */ 236/* Maximal possible key. It is never in the tree. */
226const struct reiserfs_key MAX_KEY = { 237static const struct reiserfs_key MAX_KEY = {
227 __constant_cpu_to_le32(0xffffffff), 238 __constant_cpu_to_le32(0xffffffff),
228 __constant_cpu_to_le32(0xffffffff), 239 __constant_cpu_to_le32(0xffffffff),
229 {{__constant_cpu_to_le32(0xffffffff), 240 {{__constant_cpu_to_le32(0xffffffff),
230 __constant_cpu_to_le32(0xffffffff)},} 241 __constant_cpu_to_le32(0xffffffff)},}
231}; 242};
232 243
233
234/* Get delimiting key of the buffer by looking for it in the buffers in the path, starting from the bottom 244/* Get delimiting key of the buffer by looking for it in the buffers in the path, starting from the bottom
235 of the path, and going upwards. We must check the path's validity at each step. If the key is not in 245 of the path, and going upwards. We must check the path's validity at each step. If the key is not in
236 the path, there is no delimiting key in the tree (buffer is first or last buffer in tree), and in this 246 the path, there is no delimiting key in the tree (buffer is first or last buffer in tree), and in this
237 case we return a special key, either MIN_KEY or MAX_KEY. */ 247 case we return a special key, either MIN_KEY or MAX_KEY. */
238static inline const struct reiserfs_key * get_lkey ( 248static inline const struct reiserfs_key *get_lkey(const struct path
239 const struct path * p_s_chk_path, 249 *p_s_chk_path,
240 const struct super_block * p_s_sb 250 const struct super_block
241 ) { 251 *p_s_sb)
242 int n_position, n_path_offset = p_s_chk_path->path_length; 252{
243 struct buffer_head * p_s_parent; 253 int n_position, n_path_offset = p_s_chk_path->path_length;
244 254 struct buffer_head *p_s_parent;
245 RFALSE( n_path_offset < FIRST_PATH_ELEMENT_OFFSET, 255
246 "PAP-5010: invalid offset in the path"); 256 RFALSE(n_path_offset < FIRST_PATH_ELEMENT_OFFSET,
247 257 "PAP-5010: invalid offset in the path");
248 /* While not higher in path than first element. */ 258
249 while ( n_path_offset-- > FIRST_PATH_ELEMENT_OFFSET ) { 259 /* While not higher in path than first element. */
250 260 while (n_path_offset-- > FIRST_PATH_ELEMENT_OFFSET) {
251 RFALSE( ! buffer_uptodate(PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)), 261
252 "PAP-5020: parent is not uptodate"); 262 RFALSE(!buffer_uptodate
253 263 (PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)),
254 /* Parent at the path is not in the tree now. */ 264 "PAP-5020: parent is not uptodate");
255 if ( ! B_IS_IN_TREE(p_s_parent = PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)) ) 265
256 return &MAX_KEY; 266 /* Parent at the path is not in the tree now. */
257 /* Check whether position in the parent is correct. */ 267 if (!B_IS_IN_TREE
258 if ( (n_position = PATH_OFFSET_POSITION(p_s_chk_path, n_path_offset)) > B_NR_ITEMS(p_s_parent) ) 268 (p_s_parent =
259 return &MAX_KEY; 269 PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)))
260 /* Check whether parent at the path really points to the child. */ 270 return &MAX_KEY;
261 if ( B_N_CHILD_NUM(p_s_parent, n_position) != 271 /* Check whether position in the parent is correct. */
262 PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset + 1)->b_blocknr ) 272 if ((n_position =
263 return &MAX_KEY; 273 PATH_OFFSET_POSITION(p_s_chk_path,
264 /* Return delimiting key if position in the parent is not equal to zero. */ 274 n_path_offset)) >
265 if ( n_position ) 275 B_NR_ITEMS(p_s_parent))
266 return B_N_PDELIM_KEY(p_s_parent, n_position - 1); 276 return &MAX_KEY;
267 } 277 /* Check whether parent at the path really points to the child. */
268 /* Return MIN_KEY if we are in the root of the buffer tree. */ 278 if (B_N_CHILD_NUM(p_s_parent, n_position) !=
269 if ( PATH_OFFSET_PBUFFER(p_s_chk_path, FIRST_PATH_ELEMENT_OFFSET)->b_blocknr == 279 PATH_OFFSET_PBUFFER(p_s_chk_path,
270 SB_ROOT_BLOCK (p_s_sb) ) 280 n_path_offset + 1)->b_blocknr)
271 return &MIN_KEY; 281 return &MAX_KEY;
272 return &MAX_KEY; 282 /* Return delimiting key if position in the parent is not equal to zero. */
283 if (n_position)
284 return B_N_PDELIM_KEY(p_s_parent, n_position - 1);
285 }
286 /* Return MIN_KEY if we are in the root of the buffer tree. */
287 if (PATH_OFFSET_PBUFFER(p_s_chk_path, FIRST_PATH_ELEMENT_OFFSET)->
288 b_blocknr == SB_ROOT_BLOCK(p_s_sb))
289 return &MIN_KEY;
290 return &MAX_KEY;
273} 291}
274 292
275
276/* Get delimiting key of the buffer at the path and its right neighbor. */ 293/* Get delimiting key of the buffer at the path and its right neighbor. */
277inline const struct reiserfs_key * get_rkey ( 294inline const struct reiserfs_key *get_rkey(const struct path *p_s_chk_path,
278 const struct path * p_s_chk_path, 295 const struct super_block *p_s_sb)
279 const struct super_block * p_s_sb 296{
280 ) { 297 int n_position, n_path_offset = p_s_chk_path->path_length;
281 int n_position, 298 struct buffer_head *p_s_parent;
282 n_path_offset = p_s_chk_path->path_length; 299
283 struct buffer_head * p_s_parent; 300 RFALSE(n_path_offset < FIRST_PATH_ELEMENT_OFFSET,
284 301 "PAP-5030: invalid offset in the path");
285 RFALSE( n_path_offset < FIRST_PATH_ELEMENT_OFFSET, 302
286 "PAP-5030: invalid offset in the path"); 303 while (n_path_offset-- > FIRST_PATH_ELEMENT_OFFSET) {
287 304
288 while ( n_path_offset-- > FIRST_PATH_ELEMENT_OFFSET ) { 305 RFALSE(!buffer_uptodate
289 306 (PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)),
290 RFALSE( ! buffer_uptodate(PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)), 307 "PAP-5040: parent is not uptodate");
291 "PAP-5040: parent is not uptodate"); 308
292 309 /* Parent at the path is not in the tree now. */
293 /* Parent at the path is not in the tree now. */ 310 if (!B_IS_IN_TREE
294 if ( ! B_IS_IN_TREE(p_s_parent = PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)) ) 311 (p_s_parent =
295 return &MIN_KEY; 312 PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)))
296 /* Check whether position in the parent is correct. */ 313 return &MIN_KEY;
297 if ( (n_position = PATH_OFFSET_POSITION(p_s_chk_path, n_path_offset)) > B_NR_ITEMS(p_s_parent) ) 314 /* Check whether position in the parent is correct. */
298 return &MIN_KEY; 315 if ((n_position =
299 /* Check whether parent at the path really points to the child. */ 316 PATH_OFFSET_POSITION(p_s_chk_path,
300 if ( B_N_CHILD_NUM(p_s_parent, n_position) != 317 n_path_offset)) >
301 PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset + 1)->b_blocknr ) 318 B_NR_ITEMS(p_s_parent))
302 return &MIN_KEY; 319 return &MIN_KEY;
303 /* Return delimiting key if position in the parent is not the last one. */ 320 /* Check whether parent at the path really points to the child. */
304 if ( n_position != B_NR_ITEMS(p_s_parent) ) 321 if (B_N_CHILD_NUM(p_s_parent, n_position) !=
305 return B_N_PDELIM_KEY(p_s_parent, n_position); 322 PATH_OFFSET_PBUFFER(p_s_chk_path,
306 } 323 n_path_offset + 1)->b_blocknr)
307 /* Return MAX_KEY if we are in the root of the buffer tree. */ 324 return &MIN_KEY;
308 if ( PATH_OFFSET_PBUFFER(p_s_chk_path, FIRST_PATH_ELEMENT_OFFSET)->b_blocknr == 325 /* Return delimiting key if position in the parent is not the last one. */
309 SB_ROOT_BLOCK (p_s_sb) ) 326 if (n_position != B_NR_ITEMS(p_s_parent))
310 return &MAX_KEY; 327 return B_N_PDELIM_KEY(p_s_parent, n_position);
311 return &MIN_KEY; 328 }
329 /* Return MAX_KEY if we are in the root of the buffer tree. */
330 if (PATH_OFFSET_PBUFFER(p_s_chk_path, FIRST_PATH_ELEMENT_OFFSET)->
331 b_blocknr == SB_ROOT_BLOCK(p_s_sb))
332 return &MAX_KEY;
333 return &MIN_KEY;
312} 334}
313 335
314
315/* Check whether a key is contained in the tree rooted from a buffer at a path. */ 336/* Check whether a key is contained in the tree rooted from a buffer at a path. */
316/* This works by looking at the left and right delimiting keys for the buffer in the last path_element in 337/* This works by looking at the left and right delimiting keys for the buffer in the last path_element in
317 the path. These delimiting keys are stored at least one level above that buffer in the tree. If the 338 the path. These delimiting keys are stored at least one level above that buffer in the tree. If the
318 buffer is the first or last node in the tree order then one of the delimiting keys may be absent, and in 339 buffer is the first or last node in the tree order then one of the delimiting keys may be absent, and in
319 this case get_lkey and get_rkey return a special key which is MIN_KEY or MAX_KEY. */ 340 this case get_lkey and get_rkey return a special key which is MIN_KEY or MAX_KEY. */
320static inline int key_in_buffer ( 341static inline int key_in_buffer(struct path *p_s_chk_path, /* Path which should be checked. */
321 struct path * p_s_chk_path, /* Path which should be checked. */ 342 const struct cpu_key *p_s_key, /* Key which should be checked. */
322 const struct cpu_key * p_s_key, /* Key which should be checked. */ 343 struct super_block *p_s_sb /* Super block pointer. */
323 struct super_block * p_s_sb /* Super block pointer. */ 344 )
324 ) { 345{
325
326 RFALSE( ! p_s_key || p_s_chk_path->path_length < FIRST_PATH_ELEMENT_OFFSET ||
327 p_s_chk_path->path_length > MAX_HEIGHT,
328 "PAP-5050: pointer to the key(%p) is NULL or invalid path length(%d)",
329 p_s_key, p_s_chk_path->path_length);
330 RFALSE( !PATH_PLAST_BUFFER(p_s_chk_path)->b_bdev,
331 "PAP-5060: device must not be NODEV");
332
333 if ( comp_keys(get_lkey(p_s_chk_path, p_s_sb), p_s_key) == 1 )
334 /* left delimiting key is bigger, that the key we look for */
335 return 0;
336 // if ( comp_keys(p_s_key, get_rkey(p_s_chk_path, p_s_sb)) != -1 )
337 if ( comp_keys(get_rkey(p_s_chk_path, p_s_sb), p_s_key) != 1 )
338 /* p_s_key must be less than right delimitiing key */
339 return 0;
340 return 1;
341}
342
343 346
344inline void decrement_bcount( 347 RFALSE(!p_s_key || p_s_chk_path->path_length < FIRST_PATH_ELEMENT_OFFSET
345 struct buffer_head * p_s_bh 348 || p_s_chk_path->path_length > MAX_HEIGHT,
346 ) { 349 "PAP-5050: pointer to the key(%p) is NULL or invalid path length(%d)",
347 if ( p_s_bh ) { 350 p_s_key, p_s_chk_path->path_length);
348 if ( atomic_read (&(p_s_bh->b_count)) ) { 351 RFALSE(!PATH_PLAST_BUFFER(p_s_chk_path)->b_bdev,
349 put_bh(p_s_bh) ; 352 "PAP-5060: device must not be NODEV");
350 return; 353
351 } 354 if (comp_keys(get_lkey(p_s_chk_path, p_s_sb), p_s_key) == 1)
352 reiserfs_panic(NULL, "PAP-5070: decrement_bcount: trying to free free buffer %b", p_s_bh); 355 /* left delimiting key is bigger, that the key we look for */
353 } 356 return 0;
357 // if ( comp_keys(p_s_key, get_rkey(p_s_chk_path, p_s_sb)) != -1 )
358 if (comp_keys(get_rkey(p_s_chk_path, p_s_sb), p_s_key) != 1)
359 /* p_s_key must be less than right delimitiing key */
360 return 0;
361 return 1;
354} 362}
355 363
364inline void decrement_bcount(struct buffer_head *p_s_bh)
365{
366 if (p_s_bh) {
367 if (atomic_read(&(p_s_bh->b_count))) {
368 put_bh(p_s_bh);
369 return;
370 }
371 reiserfs_panic(NULL,
372 "PAP-5070: decrement_bcount: trying to free free buffer %b",
373 p_s_bh);
374 }
375}
356 376
357/* Decrement b_count field of the all buffers in the path. */ 377/* Decrement b_count field of the all buffers in the path. */
358void decrement_counters_in_path ( 378void decrement_counters_in_path(struct path *p_s_search_path)
359 struct path * p_s_search_path 379{
360 ) { 380 int n_path_offset = p_s_search_path->path_length;
361 int n_path_offset = p_s_search_path->path_length; 381
362 382 RFALSE(n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET ||
363 RFALSE( n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET || 383 n_path_offset > EXTENDED_MAX_HEIGHT - 1,
364 n_path_offset > EXTENDED_MAX_HEIGHT - 1, 384 "PAP-5080: invalid path offset of %d", n_path_offset);
365 "PAP-5080: invalid path offset of %d", n_path_offset);
366
367 while ( n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET ) {
368 struct buffer_head * bh;
369
370 bh = PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--);
371 decrement_bcount (bh);
372 }
373 p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
374}
375 385
386 while (n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) {
387 struct buffer_head *bh;
376 388
377int reiserfs_check_path(struct path *p) { 389 bh = PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--);
378 RFALSE( p->path_length != ILLEGAL_PATH_ELEMENT_OFFSET, 390 decrement_bcount(bh);
379 "path not properly relsed") ; 391 }
380 return 0 ; 392 p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
381} 393}
382 394
395int reiserfs_check_path(struct path *p)
396{
397 RFALSE(p->path_length != ILLEGAL_PATH_ELEMENT_OFFSET,
398 "path not properly relsed");
399 return 0;
400}
383 401
384/* Release all buffers in the path. Restore dirty bits clean 402/* Release all buffers in the path. Restore dirty bits clean
385** when preparing the buffer for the log 403** when preparing the buffer for the log
386** 404**
387** only called from fix_nodes() 405** only called from fix_nodes()
388*/ 406*/
389void pathrelse_and_restore ( 407void pathrelse_and_restore(struct super_block *s, struct path *p_s_search_path)
390 struct super_block *s, 408{
391 struct path * p_s_search_path 409 int n_path_offset = p_s_search_path->path_length;
392 ) { 410
393 int n_path_offset = p_s_search_path->path_length; 411 RFALSE(n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
394 412 "clm-4000: invalid path offset");
395 RFALSE( n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET, 413
396 "clm-4000: invalid path offset"); 414 while (n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) {
397 415 reiserfs_restore_prepared_buffer(s,
398 while ( n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET ) { 416 PATH_OFFSET_PBUFFER
399 reiserfs_restore_prepared_buffer(s, PATH_OFFSET_PBUFFER(p_s_search_path, 417 (p_s_search_path,
400 n_path_offset)); 418 n_path_offset));
401 brelse(PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--)); 419 brelse(PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--));
402 } 420 }
403 p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET; 421 p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
404} 422}
405 423
406/* Release all buffers in the path. */ 424/* Release all buffers in the path. */
407void pathrelse ( 425void pathrelse(struct path *p_s_search_path)
408 struct path * p_s_search_path 426{
409 ) { 427 int n_path_offset = p_s_search_path->path_length;
410 int n_path_offset = p_s_search_path->path_length;
411
412 RFALSE( n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
413 "PAP-5090: invalid path offset");
414
415 while ( n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET )
416 brelse(PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--));
417
418 p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
419}
420 428
429 RFALSE(n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
430 "PAP-5090: invalid path offset");
421 431
432 while (n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET)
433 brelse(PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--));
422 434
423static int is_leaf (char * buf, int blocksize, struct buffer_head * bh) 435 p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
424{ 436}
425 struct block_head * blkh;
426 struct item_head * ih;
427 int used_space;
428 int prev_location;
429 int i;
430 int nr;
431
432 blkh = (struct block_head *)buf;
433 if ( blkh_level(blkh) != DISK_LEAF_NODE_LEVEL) {
434 reiserfs_warning (NULL, "is_leaf: this should be caught earlier");
435 return 0;
436 }
437 437
438 nr = blkh_nr_item(blkh); 438static int is_leaf(char *buf, int blocksize, struct buffer_head *bh)
439 if (nr < 1 || nr > ((blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN))) { 439{
440 /* item number is too big or too small */ 440 struct block_head *blkh;
441 reiserfs_warning (NULL, "is_leaf: nr_item seems wrong: %z", bh); 441 struct item_head *ih;
442 return 0; 442 int used_space;
443 } 443 int prev_location;
444 ih = (struct item_head *)(buf + BLKH_SIZE) + nr - 1; 444 int i;
445 used_space = BLKH_SIZE + IH_SIZE * nr + (blocksize - ih_location (ih)); 445 int nr;
446 if (used_space != blocksize - blkh_free_space(blkh)) { 446
447 /* free space does not match to calculated amount of use space */ 447 blkh = (struct block_head *)buf;
448 reiserfs_warning (NULL, "is_leaf: free space seems wrong: %z", bh); 448 if (blkh_level(blkh) != DISK_LEAF_NODE_LEVEL) {
449 return 0; 449 reiserfs_warning(NULL,
450 } 450 "is_leaf: this should be caught earlier");
451 451 return 0;
452 // FIXME: it is_leaf will hit performance too much - we may have
453 // return 1 here
454
455 /* check tables of item heads */
456 ih = (struct item_head *)(buf + BLKH_SIZE);
457 prev_location = blocksize;
458 for (i = 0; i < nr; i ++, ih ++) {
459 if ( le_ih_k_type(ih) == TYPE_ANY) {
460 reiserfs_warning (NULL, "is_leaf: wrong item type for item %h",ih);
461 return 0;
462 } 452 }
463 if (ih_location (ih) >= blocksize || ih_location (ih) < IH_SIZE * nr) { 453
464 reiserfs_warning (NULL, "is_leaf: item location seems wrong: %h", ih); 454 nr = blkh_nr_item(blkh);
465 return 0; 455 if (nr < 1 || nr > ((blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN))) {
456 /* item number is too big or too small */
457 reiserfs_warning(NULL, "is_leaf: nr_item seems wrong: %z", bh);
458 return 0;
466 } 459 }
467 if (ih_item_len (ih) < 1 || ih_item_len (ih) > MAX_ITEM_LEN (blocksize)) { 460 ih = (struct item_head *)(buf + BLKH_SIZE) + nr - 1;
468 reiserfs_warning (NULL, "is_leaf: item length seems wrong: %h", ih); 461 used_space = BLKH_SIZE + IH_SIZE * nr + (blocksize - ih_location(ih));
469 return 0; 462 if (used_space != blocksize - blkh_free_space(blkh)) {
463 /* free space does not match to calculated amount of use space */
464 reiserfs_warning(NULL, "is_leaf: free space seems wrong: %z",
465 bh);
466 return 0;
470 } 467 }
471 if (prev_location - ih_location (ih) != ih_item_len (ih)) { 468 // FIXME: it is_leaf will hit performance too much - we may have
472 reiserfs_warning (NULL, "is_leaf: item location seems wrong (second one): %h", ih); 469 // return 1 here
473 return 0; 470
471 /* check tables of item heads */
472 ih = (struct item_head *)(buf + BLKH_SIZE);
473 prev_location = blocksize;
474 for (i = 0; i < nr; i++, ih++) {
475 if (le_ih_k_type(ih) == TYPE_ANY) {
476 reiserfs_warning(NULL,
477 "is_leaf: wrong item type for item %h",
478 ih);
479 return 0;
480 }
481 if (ih_location(ih) >= blocksize
482 || ih_location(ih) < IH_SIZE * nr) {
483 reiserfs_warning(NULL,
484 "is_leaf: item location seems wrong: %h",
485 ih);
486 return 0;
487 }
488 if (ih_item_len(ih) < 1
489 || ih_item_len(ih) > MAX_ITEM_LEN(blocksize)) {
490 reiserfs_warning(NULL,
491 "is_leaf: item length seems wrong: %h",
492 ih);
493 return 0;
494 }
495 if (prev_location - ih_location(ih) != ih_item_len(ih)) {
496 reiserfs_warning(NULL,
497 "is_leaf: item location seems wrong (second one): %h",
498 ih);
499 return 0;
500 }
501 prev_location = ih_location(ih);
474 } 502 }
475 prev_location = ih_location (ih);
476 }
477 503
478 // one may imagine much more checks 504 // one may imagine much more checks
479 return 1; 505 return 1;
480} 506}
481 507
482
483/* returns 1 if buf looks like an internal node, 0 otherwise */ 508/* returns 1 if buf looks like an internal node, 0 otherwise */
484static int is_internal (char * buf, int blocksize, struct buffer_head * bh) 509static int is_internal(char *buf, int blocksize, struct buffer_head *bh)
485{ 510{
486 struct block_head * blkh; 511 struct block_head *blkh;
487 int nr; 512 int nr;
488 int used_space; 513 int used_space;
489 514
490 blkh = (struct block_head *)buf; 515 blkh = (struct block_head *)buf;
491 nr = blkh_level(blkh); 516 nr = blkh_level(blkh);
492 if (nr <= DISK_LEAF_NODE_LEVEL || nr > MAX_HEIGHT) { 517 if (nr <= DISK_LEAF_NODE_LEVEL || nr > MAX_HEIGHT) {
493 /* this level is not possible for internal nodes */ 518 /* this level is not possible for internal nodes */
494 reiserfs_warning (NULL, "is_internal: this should be caught earlier"); 519 reiserfs_warning(NULL,
495 return 0; 520 "is_internal: this should be caught earlier");
496 } 521 return 0;
497 522 }
498 nr = blkh_nr_item(blkh);
499 if (nr > (blocksize - BLKH_SIZE - DC_SIZE) / (KEY_SIZE + DC_SIZE)) {
500 /* for internal which is not root we might check min number of keys */
501 reiserfs_warning (NULL, "is_internal: number of key seems wrong: %z", bh);
502 return 0;
503 }
504 523
505 used_space = BLKH_SIZE + KEY_SIZE * nr + DC_SIZE * (nr + 1); 524 nr = blkh_nr_item(blkh);
506 if (used_space != blocksize - blkh_free_space(blkh)) { 525 if (nr > (blocksize - BLKH_SIZE - DC_SIZE) / (KEY_SIZE + DC_SIZE)) {
507 reiserfs_warning (NULL, "is_internal: free space seems wrong: %z", bh); 526 /* for internal which is not root we might check min number of keys */
508 return 0; 527 reiserfs_warning(NULL,
509 } 528 "is_internal: number of key seems wrong: %z",
529 bh);
530 return 0;
531 }
510 532
511 // one may imagine much more checks 533 used_space = BLKH_SIZE + KEY_SIZE * nr + DC_SIZE * (nr + 1);
512 return 1; 534 if (used_space != blocksize - blkh_free_space(blkh)) {
535 reiserfs_warning(NULL,
536 "is_internal: free space seems wrong: %z", bh);
537 return 0;
538 }
539 // one may imagine much more checks
540 return 1;
513} 541}
514 542
515
516// make sure that bh contains formatted node of reiserfs tree of 543// make sure that bh contains formatted node of reiserfs tree of
517// 'level'-th level 544// 'level'-th level
518static int is_tree_node (struct buffer_head * bh, int level) 545static int is_tree_node(struct buffer_head *bh, int level)
519{ 546{
520 if (B_LEVEL (bh) != level) { 547 if (B_LEVEL(bh) != level) {
521 reiserfs_warning (NULL, "is_tree_node: node level %d does not match to the expected one %d", 548 reiserfs_warning(NULL,
522 B_LEVEL (bh), level); 549 "is_tree_node: node level %d does not match to the expected one %d",
523 return 0; 550 B_LEVEL(bh), level);
524 } 551 return 0;
525 if (level == DISK_LEAF_NODE_LEVEL) 552 }
526 return is_leaf (bh->b_data, bh->b_size, bh); 553 if (level == DISK_LEAF_NODE_LEVEL)
554 return is_leaf(bh->b_data, bh->b_size, bh);
527 555
528 return is_internal (bh->b_data, bh->b_size, bh); 556 return is_internal(bh->b_data, bh->b_size, bh);
529} 557}
530 558
531
532
533#define SEARCH_BY_KEY_READA 16 559#define SEARCH_BY_KEY_READA 16
534 560
535/* The function is NOT SCHEDULE-SAFE! */ 561/* The function is NOT SCHEDULE-SAFE! */
536static void search_by_key_reada (struct super_block * s, 562static void search_by_key_reada(struct super_block *s,
537 struct buffer_head **bh, 563 struct buffer_head **bh,
538 unsigned long *b, int num) 564 unsigned long *b, int num)
539{ 565{
540 int i,j; 566 int i, j;
541 567
542 for (i = 0 ; i < num ; i++) { 568 for (i = 0; i < num; i++) {
543 bh[i] = sb_getblk (s, b[i]); 569 bh[i] = sb_getblk(s, b[i]);
544 } 570 }
545 for (j = 0 ; j < i ; j++) { 571 for (j = 0; j < i; j++) {
546 /* 572 /*
547 * note, this needs attention if we are getting rid of the BKL 573 * note, this needs attention if we are getting rid of the BKL
548 * you have to make sure the prepared bit isn't set on this buffer 574 * you have to make sure the prepared bit isn't set on this buffer
549 */ 575 */
550 if (!buffer_uptodate(bh[j])) 576 if (!buffer_uptodate(bh[j]))
551 ll_rw_block(READA, 1, bh + j); 577 ll_rw_block(READA, 1, bh + j);
552 brelse(bh[j]); 578 brelse(bh[j]);
553 } 579 }
554} 580}
555 581
556/************************************************************************** 582/**************************************************************************
@@ -576,194 +602,200 @@ static void search_by_key_reada (struct super_block * s,
576 correctness of the top of the path but need not be checked for the 602 correctness of the top of the path but need not be checked for the
577 correctness of the bottom of the path */ 603 correctness of the bottom of the path */
578/* The function is NOT SCHEDULE-SAFE! */ 604/* The function is NOT SCHEDULE-SAFE! */
579int search_by_key (struct super_block * p_s_sb, 605int search_by_key(struct super_block *p_s_sb, const struct cpu_key *p_s_key, /* Key to search. */
580 const struct cpu_key * p_s_key, /* Key to search. */ 606 struct path *p_s_search_path, /* This structure was
581 struct path * p_s_search_path, /* This structure was 607 allocated and initialized
582 allocated and initialized 608 by the calling
583 by the calling 609 function. It is filled up
584 function. It is filled up 610 by this function. */
585 by this function. */ 611 int n_stop_level /* How far down the tree to search. To
586 int n_stop_level /* How far down the tree to search. To 612 stop at leaf level - set to
587 stop at leaf level - set to 613 DISK_LEAF_NODE_LEVEL */
588 DISK_LEAF_NODE_LEVEL */ 614 )
589 ) { 615{
590 int n_block_number; 616 int n_block_number;
591 int expected_level; 617 int expected_level;
592 struct buffer_head * p_s_bh; 618 struct buffer_head *p_s_bh;
593 struct path_element * p_s_last_element; 619 struct path_element *p_s_last_element;
594 int n_node_level, n_retval; 620 int n_node_level, n_retval;
595 int right_neighbor_of_leaf_node; 621 int right_neighbor_of_leaf_node;
596 int fs_gen; 622 int fs_gen;
597 struct buffer_head *reada_bh[SEARCH_BY_KEY_READA]; 623 struct buffer_head *reada_bh[SEARCH_BY_KEY_READA];
598 unsigned long reada_blocks[SEARCH_BY_KEY_READA]; 624 unsigned long reada_blocks[SEARCH_BY_KEY_READA];
599 int reada_count = 0; 625 int reada_count = 0;
600 626
601#ifdef CONFIG_REISERFS_CHECK 627#ifdef CONFIG_REISERFS_CHECK
602 int n_repeat_counter = 0; 628 int n_repeat_counter = 0;
603#endif 629#endif
604
605 PROC_INFO_INC( p_s_sb, search_by_key );
606
607 /* As we add each node to a path we increase its count. This means that
608 we must be careful to release all nodes in a path before we either
609 discard the path struct or re-use the path struct, as we do here. */
610 630
611 decrement_counters_in_path(p_s_search_path); 631 PROC_INFO_INC(p_s_sb, search_by_key);
632
633 /* As we add each node to a path we increase its count. This means that
634 we must be careful to release all nodes in a path before we either
635 discard the path struct or re-use the path struct, as we do here. */
612 636
613 right_neighbor_of_leaf_node = 0; 637 decrement_counters_in_path(p_s_search_path);
614 638
615 /* With each iteration of this loop we search through the items in the 639 right_neighbor_of_leaf_node = 0;
616 current node, and calculate the next current node(next path element) 640
617 for the next iteration of this loop.. */ 641 /* With each iteration of this loop we search through the items in the
618 n_block_number = SB_ROOT_BLOCK (p_s_sb); 642 current node, and calculate the next current node(next path element)
619 expected_level = -1; 643 for the next iteration of this loop.. */
620 while ( 1 ) { 644 n_block_number = SB_ROOT_BLOCK(p_s_sb);
645 expected_level = -1;
646 while (1) {
621 647
622#ifdef CONFIG_REISERFS_CHECK 648#ifdef CONFIG_REISERFS_CHECK
623 if ( !(++n_repeat_counter % 50000) ) 649 if (!(++n_repeat_counter % 50000))
624 reiserfs_warning (p_s_sb, "PAP-5100: search_by_key: %s:" 650 reiserfs_warning(p_s_sb, "PAP-5100: search_by_key: %s:"
625 "there were %d iterations of while loop " 651 "there were %d iterations of while loop "
626 "looking for key %K", 652 "looking for key %K",
627 current->comm, n_repeat_counter, p_s_key); 653 current->comm, n_repeat_counter,
654 p_s_key);
628#endif 655#endif
629 656
630 /* prep path to have another element added to it. */ 657 /* prep path to have another element added to it. */
631 p_s_last_element = PATH_OFFSET_PELEMENT(p_s_search_path, ++p_s_search_path->path_length); 658 p_s_last_element =
632 fs_gen = get_generation (p_s_sb); 659 PATH_OFFSET_PELEMENT(p_s_search_path,
633 660 ++p_s_search_path->path_length);
634 /* Read the next tree node, and set the last element in the path to 661 fs_gen = get_generation(p_s_sb);
635 have a pointer to it. */ 662
636 if ((p_s_bh = p_s_last_element->pe_buffer = 663 /* Read the next tree node, and set the last element in the path to
637 sb_getblk(p_s_sb, n_block_number)) ) { 664 have a pointer to it. */
638 if (!buffer_uptodate(p_s_bh) && reada_count > 1) { 665 if ((p_s_bh = p_s_last_element->pe_buffer =
639 search_by_key_reada (p_s_sb, reada_bh, 666 sb_getblk(p_s_sb, n_block_number))) {
640 reada_blocks, reada_count); 667 if (!buffer_uptodate(p_s_bh) && reada_count > 1) {
641 } 668 search_by_key_reada(p_s_sb, reada_bh,
642 ll_rw_block(READ, 1, &p_s_bh); 669 reada_blocks, reada_count);
643 wait_on_buffer(p_s_bh); 670 }
644 if (!buffer_uptodate(p_s_bh)) 671 ll_rw_block(READ, 1, &p_s_bh);
645 goto io_error; 672 wait_on_buffer(p_s_bh);
646 } else { 673 if (!buffer_uptodate(p_s_bh))
647io_error: 674 goto io_error;
648 p_s_search_path->path_length --; 675 } else {
649 pathrelse(p_s_search_path); 676 io_error:
650 return IO_ERROR; 677 p_s_search_path->path_length--;
651 } 678 pathrelse(p_s_search_path);
652 reada_count = 0; 679 return IO_ERROR;
653 if (expected_level == -1) 680 }
654 expected_level = SB_TREE_HEIGHT (p_s_sb); 681 reada_count = 0;
655 expected_level --; 682 if (expected_level == -1)
656 683 expected_level = SB_TREE_HEIGHT(p_s_sb);
657 /* It is possible that schedule occurred. We must check whether the key 684 expected_level--;
658 to search is still in the tree rooted from the current buffer. If 685
659 not then repeat search from the root. */ 686 /* It is possible that schedule occurred. We must check whether the key
660 if ( fs_changed (fs_gen, p_s_sb) && 687 to search is still in the tree rooted from the current buffer. If
661 (!B_IS_IN_TREE (p_s_bh) || 688 not then repeat search from the root. */
662 B_LEVEL(p_s_bh) != expected_level || 689 if (fs_changed(fs_gen, p_s_sb) &&
663 !key_in_buffer(p_s_search_path, p_s_key, p_s_sb))) { 690 (!B_IS_IN_TREE(p_s_bh) ||
664 PROC_INFO_INC( p_s_sb, search_by_key_fs_changed ); 691 B_LEVEL(p_s_bh) != expected_level ||
665 PROC_INFO_INC( p_s_sb, search_by_key_restarted ); 692 !key_in_buffer(p_s_search_path, p_s_key, p_s_sb))) {
666 PROC_INFO_INC( p_s_sb, sbk_restarted[ expected_level - 1 ] ); 693 PROC_INFO_INC(p_s_sb, search_by_key_fs_changed);
667 decrement_counters_in_path(p_s_search_path); 694 PROC_INFO_INC(p_s_sb, search_by_key_restarted);
668 695 PROC_INFO_INC(p_s_sb,
669 /* Get the root block number so that we can repeat the search 696 sbk_restarted[expected_level - 1]);
670 starting from the root. */ 697 decrement_counters_in_path(p_s_search_path);
671 n_block_number = SB_ROOT_BLOCK (p_s_sb); 698
672 expected_level = -1; 699 /* Get the root block number so that we can repeat the search
673 right_neighbor_of_leaf_node = 0; 700 starting from the root. */
674 701 n_block_number = SB_ROOT_BLOCK(p_s_sb);
675 /* repeat search from the root */ 702 expected_level = -1;
676 continue; 703 right_neighbor_of_leaf_node = 0;
677 } 704
705 /* repeat search from the root */
706 continue;
707 }
678 708
679 /* only check that the key is in the buffer if p_s_key is not 709 /* only check that the key is in the buffer if p_s_key is not
680 equal to the MAX_KEY. Latter case is only possible in 710 equal to the MAX_KEY. Latter case is only possible in
681 "finish_unfinished()" processing during mount. */ 711 "finish_unfinished()" processing during mount. */
682 RFALSE( comp_keys( &MAX_KEY, p_s_key ) && 712 RFALSE(comp_keys(&MAX_KEY, p_s_key) &&
683 ! key_in_buffer(p_s_search_path, p_s_key, p_s_sb), 713 !key_in_buffer(p_s_search_path, p_s_key, p_s_sb),
684 "PAP-5130: key is not in the buffer"); 714 "PAP-5130: key is not in the buffer");
685#ifdef CONFIG_REISERFS_CHECK 715#ifdef CONFIG_REISERFS_CHECK
686 if ( cur_tb ) { 716 if (cur_tb) {
687 print_cur_tb ("5140"); 717 print_cur_tb("5140");
688 reiserfs_panic(p_s_sb, "PAP-5140: search_by_key: schedule occurred in do_balance!"); 718 reiserfs_panic(p_s_sb,
689 } 719 "PAP-5140: search_by_key: schedule occurred in do_balance!");
720 }
690#endif 721#endif
691 722
692 // make sure, that the node contents look like a node of 723 // make sure, that the node contents look like a node of
693 // certain level 724 // certain level
694 if (!is_tree_node (p_s_bh, expected_level)) { 725 if (!is_tree_node(p_s_bh, expected_level)) {
695 reiserfs_warning (p_s_sb, "vs-5150: search_by_key: " 726 reiserfs_warning(p_s_sb, "vs-5150: search_by_key: "
696 "invalid format found in block %ld. Fsck?", 727 "invalid format found in block %ld. Fsck?",
697 p_s_bh->b_blocknr); 728 p_s_bh->b_blocknr);
698 pathrelse (p_s_search_path); 729 pathrelse(p_s_search_path);
699 return IO_ERROR; 730 return IO_ERROR;
700 } 731 }
701
702 /* ok, we have acquired next formatted node in the tree */
703 n_node_level = B_LEVEL (p_s_bh);
704
705 PROC_INFO_BH_STAT( p_s_sb, p_s_bh, n_node_level - 1 );
706
707 RFALSE( n_node_level < n_stop_level,
708 "vs-5152: tree level (%d) is less than stop level (%d)",
709 n_node_level, n_stop_level);
710
711 n_retval = bin_search( p_s_key, B_N_PITEM_HEAD(p_s_bh, 0),
712 B_NR_ITEMS(p_s_bh),
713 ( n_node_level == DISK_LEAF_NODE_LEVEL ) ? IH_SIZE : KEY_SIZE,
714 &(p_s_last_element->pe_position));
715 if (n_node_level == n_stop_level) {
716 return n_retval;
717 }
718 732
719 /* we are not in the stop level */ 733 /* ok, we have acquired next formatted node in the tree */
720 if (n_retval == ITEM_FOUND) 734 n_node_level = B_LEVEL(p_s_bh);
721 /* item has been found, so we choose the pointer which is to the right of the found one */
722 p_s_last_element->pe_position++;
723 735
724 /* if item was not found we choose the position which is to 736 PROC_INFO_BH_STAT(p_s_sb, p_s_bh, n_node_level - 1);
725 the left of the found item. This requires no code,
726 bin_search did it already.*/
727 737
728 /* So we have chosen a position in the current node which is 738 RFALSE(n_node_level < n_stop_level,
729 an internal node. Now we calculate child block number by 739 "vs-5152: tree level (%d) is less than stop level (%d)",
730 position in the node. */ 740 n_node_level, n_stop_level);
731 n_block_number = B_N_CHILD_NUM(p_s_bh, p_s_last_element->pe_position);
732 741
733 /* if we are going to read leaf nodes, try for read ahead as well */ 742 n_retval = bin_search(p_s_key, B_N_PITEM_HEAD(p_s_bh, 0),
734 if ((p_s_search_path->reada & PATH_READA) && 743 B_NR_ITEMS(p_s_bh),
735 n_node_level == DISK_LEAF_NODE_LEVEL + 1) 744 (n_node_level ==
736 { 745 DISK_LEAF_NODE_LEVEL) ? IH_SIZE :
737 int pos = p_s_last_element->pe_position; 746 KEY_SIZE,
738 int limit = B_NR_ITEMS(p_s_bh); 747 &(p_s_last_element->pe_position));
739 struct reiserfs_key *le_key; 748 if (n_node_level == n_stop_level) {
740 749 return n_retval;
741 if (p_s_search_path->reada & PATH_READA_BACK) 750 }
742 limit = 0;
743 while(reada_count < SEARCH_BY_KEY_READA) {
744 if (pos == limit)
745 break;
746 reada_blocks[reada_count++] = B_N_CHILD_NUM(p_s_bh, pos);
747 if (p_s_search_path->reada & PATH_READA_BACK)
748 pos--;
749 else
750 pos++;
751 751
752 /* 752 /* we are not in the stop level */
753 * check to make sure we're in the same object 753 if (n_retval == ITEM_FOUND)
754 */ 754 /* item has been found, so we choose the pointer which is to the right of the found one */
755 le_key = B_N_PDELIM_KEY(p_s_bh, pos); 755 p_s_last_element->pe_position++;
756 if (le32_to_cpu(le_key->k_objectid) != 756
757 p_s_key->on_disk_key.k_objectid) 757 /* if item was not found we choose the position which is to
758 { 758 the left of the found item. This requires no code,
759 break; 759 bin_search did it already. */
760
761 /* So we have chosen a position in the current node which is
762 an internal node. Now we calculate child block number by
763 position in the node. */
764 n_block_number =
765 B_N_CHILD_NUM(p_s_bh, p_s_last_element->pe_position);
766
767 /* if we are going to read leaf nodes, try for read ahead as well */
768 if ((p_s_search_path->reada & PATH_READA) &&
769 n_node_level == DISK_LEAF_NODE_LEVEL + 1) {
770 int pos = p_s_last_element->pe_position;
771 int limit = B_NR_ITEMS(p_s_bh);
772 struct reiserfs_key *le_key;
773
774 if (p_s_search_path->reada & PATH_READA_BACK)
775 limit = 0;
776 while (reada_count < SEARCH_BY_KEY_READA) {
777 if (pos == limit)
778 break;
779 reada_blocks[reada_count++] =
780 B_N_CHILD_NUM(p_s_bh, pos);
781 if (p_s_search_path->reada & PATH_READA_BACK)
782 pos--;
783 else
784 pos++;
785
786 /*
787 * check to make sure we're in the same object
788 */
789 le_key = B_N_PDELIM_KEY(p_s_bh, pos);
790 if (le32_to_cpu(le_key->k_objectid) !=
791 p_s_key->on_disk_key.k_objectid) {
792 break;
793 }
794 }
760 } 795 }
761 } 796 }
762 }
763 }
764} 797}
765 798
766
767/* Form the path to an item and position in this item which contains 799/* Form the path to an item and position in this item which contains
768 file byte defined by p_s_key. If there is no such item 800 file byte defined by p_s_key. If there is no such item
769 corresponding to the key, we point the path to the item with 801 corresponding to the key, we point the path to the item with
@@ -780,94 +812,97 @@ io_error:
780 units of directory entries. */ 812 units of directory entries. */
781 813
782/* The function is NOT SCHEDULE-SAFE! */ 814/* The function is NOT SCHEDULE-SAFE! */
783int search_for_position_by_key (struct super_block * p_s_sb, /* Pointer to the super block. */ 815int search_for_position_by_key(struct super_block *p_s_sb, /* Pointer to the super block. */
784 const struct cpu_key * p_cpu_key, /* Key to search (cpu variable) */ 816 const struct cpu_key *p_cpu_key, /* Key to search (cpu variable) */
785 struct path * p_s_search_path /* Filled up by this function. */ 817 struct path *p_s_search_path /* Filled up by this function. */
786 ) { 818 )
787 struct item_head * p_le_ih; /* pointer to on-disk structure */ 819{
788 int n_blk_size; 820 struct item_head *p_le_ih; /* pointer to on-disk structure */
789 loff_t item_offset, offset; 821 int n_blk_size;
790 struct reiserfs_dir_entry de; 822 loff_t item_offset, offset;
791 int retval; 823 struct reiserfs_dir_entry de;
792 824 int retval;
793 /* If searching for directory entry. */ 825
794 if ( is_direntry_cpu_key (p_cpu_key) ) 826 /* If searching for directory entry. */
795 return search_by_entry_key (p_s_sb, p_cpu_key, p_s_search_path, &de); 827 if (is_direntry_cpu_key(p_cpu_key))
796 828 return search_by_entry_key(p_s_sb, p_cpu_key, p_s_search_path,
797 /* If not searching for directory entry. */ 829 &de);
798 830
799 /* If item is found. */ 831 /* If not searching for directory entry. */
800 retval = search_item (p_s_sb, p_cpu_key, p_s_search_path); 832
801 if (retval == IO_ERROR) 833 /* If item is found. */
802 return retval; 834 retval = search_item(p_s_sb, p_cpu_key, p_s_search_path);
803 if ( retval == ITEM_FOUND ) { 835 if (retval == IO_ERROR)
804 836 return retval;
805 RFALSE( ! ih_item_len( 837 if (retval == ITEM_FOUND) {
806 B_N_PITEM_HEAD(PATH_PLAST_BUFFER(p_s_search_path),
807 PATH_LAST_POSITION(p_s_search_path))),
808 "PAP-5165: item length equals zero");
809 838
810 pos_in_item(p_s_search_path) = 0; 839 RFALSE(!ih_item_len
811 return POSITION_FOUND; 840 (B_N_PITEM_HEAD
812 } 841 (PATH_PLAST_BUFFER(p_s_search_path),
842 PATH_LAST_POSITION(p_s_search_path))),
843 "PAP-5165: item length equals zero");
813 844
814 RFALSE( ! PATH_LAST_POSITION(p_s_search_path), 845 pos_in_item(p_s_search_path) = 0;
815 "PAP-5170: position equals zero"); 846 return POSITION_FOUND;
847 }
816 848
817 /* Item is not found. Set path to the previous item. */ 849 RFALSE(!PATH_LAST_POSITION(p_s_search_path),
818 p_le_ih = B_N_PITEM_HEAD(PATH_PLAST_BUFFER(p_s_search_path), --PATH_LAST_POSITION(p_s_search_path)); 850 "PAP-5170: position equals zero");
819 n_blk_size = p_s_sb->s_blocksize;
820 851
821 if (comp_short_keys (&(p_le_ih->ih_key), p_cpu_key)) { 852 /* Item is not found. Set path to the previous item. */
822 return FILE_NOT_FOUND; 853 p_le_ih =
823 } 854 B_N_PITEM_HEAD(PATH_PLAST_BUFFER(p_s_search_path),
855 --PATH_LAST_POSITION(p_s_search_path));
856 n_blk_size = p_s_sb->s_blocksize;
824 857
825 // FIXME: quite ugly this far 858 if (comp_short_keys(&(p_le_ih->ih_key), p_cpu_key)) {
859 return FILE_NOT_FOUND;
860 }
861 // FIXME: quite ugly this far
826 862
827 item_offset = le_ih_k_offset (p_le_ih); 863 item_offset = le_ih_k_offset(p_le_ih);
828 offset = cpu_key_k_offset (p_cpu_key); 864 offset = cpu_key_k_offset(p_cpu_key);
829 865
830 /* Needed byte is contained in the item pointed to by the path.*/ 866 /* Needed byte is contained in the item pointed to by the path. */
831 if (item_offset <= offset && 867 if (item_offset <= offset &&
832 item_offset + op_bytes_number (p_le_ih, n_blk_size) > offset) { 868 item_offset + op_bytes_number(p_le_ih, n_blk_size) > offset) {
833 pos_in_item (p_s_search_path) = offset - item_offset; 869 pos_in_item(p_s_search_path) = offset - item_offset;
834 if ( is_indirect_le_ih(p_le_ih) ) { 870 if (is_indirect_le_ih(p_le_ih)) {
835 pos_in_item (p_s_search_path) /= n_blk_size; 871 pos_in_item(p_s_search_path) /= n_blk_size;
872 }
873 return POSITION_FOUND;
836 } 874 }
837 return POSITION_FOUND;
838 }
839
840 /* Needed byte is not contained in the item pointed to by the
841 path. Set pos_in_item out of the item. */
842 if ( is_indirect_le_ih (p_le_ih) )
843 pos_in_item (p_s_search_path) = ih_item_len(p_le_ih) / UNFM_P_SIZE;
844 else
845 pos_in_item (p_s_search_path) = ih_item_len( p_le_ih );
846
847 return POSITION_NOT_FOUND;
848}
849 875
876 /* Needed byte is not contained in the item pointed to by the
877 path. Set pos_in_item out of the item. */
878 if (is_indirect_le_ih(p_le_ih))
879 pos_in_item(p_s_search_path) =
880 ih_item_len(p_le_ih) / UNFM_P_SIZE;
881 else
882 pos_in_item(p_s_search_path) = ih_item_len(p_le_ih);
883
884 return POSITION_NOT_FOUND;
885}
850 886
851/* Compare given item and item pointed to by the path. */ 887/* Compare given item and item pointed to by the path. */
852int comp_items (const struct item_head * stored_ih, const struct path * p_s_path) 888int comp_items(const struct item_head *stored_ih, const struct path *p_s_path)
853{ 889{
854 struct buffer_head * p_s_bh; 890 struct buffer_head *p_s_bh;
855 struct item_head * ih; 891 struct item_head *ih;
856 892
857 /* Last buffer at the path is not in the tree. */ 893 /* Last buffer at the path is not in the tree. */
858 if ( ! B_IS_IN_TREE(p_s_bh = PATH_PLAST_BUFFER(p_s_path)) ) 894 if (!B_IS_IN_TREE(p_s_bh = PATH_PLAST_BUFFER(p_s_path)))
859 return 1; 895 return 1;
860 896
861 /* Last path position is invalid. */ 897 /* Last path position is invalid. */
862 if ( PATH_LAST_POSITION(p_s_path) >= B_NR_ITEMS(p_s_bh) ) 898 if (PATH_LAST_POSITION(p_s_path) >= B_NR_ITEMS(p_s_bh))
863 return 1; 899 return 1;
864 900
865 /* we need only to know, whether it is the same item */ 901 /* we need only to know, whether it is the same item */
866 ih = get_ih (p_s_path); 902 ih = get_ih(p_s_path);
867 return memcmp (stored_ih, ih, IH_SIZE); 903 return memcmp(stored_ih, ih, IH_SIZE);
868} 904}
869 905
870
871/* unformatted nodes are not logged anymore, ever. This is safe 906/* unformatted nodes are not logged anymore, ever. This is safe
872** now 907** now
873*/ 908*/
@@ -876,461 +911,466 @@ int comp_items (const struct item_head * stored_ih, const struct path * p_s_path
876// block can not be forgotten as it is in I/O or held by someone 911// block can not be forgotten as it is in I/O or held by someone
877#define block_in_use(bh) (buffer_locked(bh) || (held_by_others(bh))) 912#define block_in_use(bh) (buffer_locked(bh) || (held_by_others(bh)))
878 913
879
880
881// prepare for delete or cut of direct item 914// prepare for delete or cut of direct item
882static inline int prepare_for_direct_item (struct path * path, 915static inline int prepare_for_direct_item(struct path *path,
883 struct item_head * le_ih, 916 struct item_head *le_ih,
884 struct inode * inode, 917 struct inode *inode,
885 loff_t new_file_length, 918 loff_t new_file_length, int *cut_size)
886 int * cut_size)
887{ 919{
888 loff_t round_len; 920 loff_t round_len;
889 921
890 922 if (new_file_length == max_reiserfs_offset(inode)) {
891 if ( new_file_length == max_reiserfs_offset (inode) ) { 923 /* item has to be deleted */
892 /* item has to be deleted */ 924 *cut_size = -(IH_SIZE + ih_item_len(le_ih));
893 *cut_size = -(IH_SIZE + ih_item_len(le_ih)); 925 return M_DELETE;
894 return M_DELETE; 926 }
895 } 927 // new file gets truncated
896 928 if (get_inode_item_key_version(inode) == KEY_FORMAT_3_6) {
897 // new file gets truncated 929 //
898 if (get_inode_item_key_version (inode) == KEY_FORMAT_3_6) { 930 round_len = ROUND_UP(new_file_length);
899 // 931 /* this was n_new_file_length < le_ih ... */
900 round_len = ROUND_UP (new_file_length); 932 if (round_len < le_ih_k_offset(le_ih)) {
901 /* this was n_new_file_length < le_ih ... */ 933 *cut_size = -(IH_SIZE + ih_item_len(le_ih));
902 if ( round_len < le_ih_k_offset (le_ih) ) { 934 return M_DELETE; /* Delete this item. */
903 *cut_size = -(IH_SIZE + ih_item_len(le_ih)); 935 }
904 return M_DELETE; /* Delete this item. */ 936 /* Calculate first position and size for cutting from item. */
937 pos_in_item(path) = round_len - (le_ih_k_offset(le_ih) - 1);
938 *cut_size = -(ih_item_len(le_ih) - pos_in_item(path));
939
940 return M_CUT; /* Cut from this item. */
941 }
942
943 // old file: items may have any length
944
945 if (new_file_length < le_ih_k_offset(le_ih)) {
946 *cut_size = -(IH_SIZE + ih_item_len(le_ih));
947 return M_DELETE; /* Delete this item. */
905 } 948 }
906 /* Calculate first position and size for cutting from item. */ 949 /* Calculate first position and size for cutting from item. */
907 pos_in_item (path) = round_len - (le_ih_k_offset (le_ih) - 1); 950 *cut_size = -(ih_item_len(le_ih) -
908 *cut_size = -(ih_item_len(le_ih) - pos_in_item(path)); 951 (pos_in_item(path) =
909 952 new_file_length + 1 - le_ih_k_offset(le_ih)));
910 return M_CUT; /* Cut from this item. */ 953 return M_CUT; /* Cut from this item. */
911 }
912
913
914 // old file: items may have any length
915
916 if ( new_file_length < le_ih_k_offset (le_ih) ) {
917 *cut_size = -(IH_SIZE + ih_item_len(le_ih));
918 return M_DELETE; /* Delete this item. */
919 }
920 /* Calculate first position and size for cutting from item. */
921 *cut_size = -(ih_item_len(le_ih) -
922 (pos_in_item (path) = new_file_length + 1 - le_ih_k_offset (le_ih)));
923 return M_CUT; /* Cut from this item. */
924} 954}
925 955
926 956static inline int prepare_for_direntry_item(struct path *path,
927static inline int prepare_for_direntry_item (struct path * path, 957 struct item_head *le_ih,
928 struct item_head * le_ih, 958 struct inode *inode,
929 struct inode * inode, 959 loff_t new_file_length,
930 loff_t new_file_length, 960 int *cut_size)
931 int * cut_size)
932{ 961{
933 if (le_ih_k_offset (le_ih) == DOT_OFFSET && 962 if (le_ih_k_offset(le_ih) == DOT_OFFSET &&
934 new_file_length == max_reiserfs_offset (inode)) { 963 new_file_length == max_reiserfs_offset(inode)) {
935 RFALSE( ih_entry_count (le_ih) != 2, 964 RFALSE(ih_entry_count(le_ih) != 2,
936 "PAP-5220: incorrect empty directory item (%h)", le_ih); 965 "PAP-5220: incorrect empty directory item (%h)", le_ih);
937 *cut_size = -(IH_SIZE + ih_item_len(le_ih)); 966 *cut_size = -(IH_SIZE + ih_item_len(le_ih));
938 return M_DELETE; /* Delete the directory item containing "." and ".." entry. */ 967 return M_DELETE; /* Delete the directory item containing "." and ".." entry. */
939 } 968 }
940
941 if ( ih_entry_count (le_ih) == 1 ) {
942 /* Delete the directory item such as there is one record only
943 in this item*/
944 *cut_size = -(IH_SIZE + ih_item_len(le_ih));
945 return M_DELETE;
946 }
947
948 /* Cut one record from the directory item. */
949 *cut_size = -(DEH_SIZE + entry_length (get_last_bh (path), le_ih, pos_in_item (path)));
950 return M_CUT;
951}
952 969
970 if (ih_entry_count(le_ih) == 1) {
971 /* Delete the directory item such as there is one record only
972 in this item */
973 *cut_size = -(IH_SIZE + ih_item_len(le_ih));
974 return M_DELETE;
975 }
976
977 /* Cut one record from the directory item. */
978 *cut_size =
979 -(DEH_SIZE +
980 entry_length(get_last_bh(path), le_ih, pos_in_item(path)));
981 return M_CUT;
982}
953 983
954/* If the path points to a directory or direct item, calculate mode and the size cut, for balance. 984/* If the path points to a directory or direct item, calculate mode and the size cut, for balance.
955 If the path points to an indirect item, remove some number of its unformatted nodes. 985 If the path points to an indirect item, remove some number of its unformatted nodes.
956 In case of file truncate calculate whether this item must be deleted/truncated or last 986 In case of file truncate calculate whether this item must be deleted/truncated or last
957 unformatted node of this item will be converted to a direct item. 987 unformatted node of this item will be converted to a direct item.
958 This function returns a determination of what balance mode the calling function should employ. */ 988 This function returns a determination of what balance mode the calling function should employ. */
959static char prepare_for_delete_or_cut( 989static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, struct inode *inode, struct path *p_s_path, const struct cpu_key *p_s_item_key, int *p_n_removed, /* Number of unformatted nodes which were removed
960 struct reiserfs_transaction_handle *th, 990 from end of the file. */
961 struct inode * inode, 991 int *p_n_cut_size, unsigned long long n_new_file_length /* MAX_KEY_OFFSET in case of delete. */
962 struct path * p_s_path, 992 )
963 const struct cpu_key * p_s_item_key, 993{
964 int * p_n_removed, /* Number of unformatted nodes which were removed 994 struct super_block *p_s_sb = inode->i_sb;
965 from end of the file. */ 995 struct item_head *p_le_ih = PATH_PITEM_HEAD(p_s_path);
966 int * p_n_cut_size, 996 struct buffer_head *p_s_bh = PATH_PLAST_BUFFER(p_s_path);
967 unsigned long long n_new_file_length /* MAX_KEY_OFFSET in case of delete. */
968 ) {
969 struct super_block * p_s_sb = inode->i_sb;
970 struct item_head * p_le_ih = PATH_PITEM_HEAD(p_s_path);
971 struct buffer_head * p_s_bh = PATH_PLAST_BUFFER(p_s_path);
972
973 BUG_ON (!th->t_trans_id);
974
975 /* Stat_data item. */
976 if ( is_statdata_le_ih (p_le_ih) ) {
977
978 RFALSE( n_new_file_length != max_reiserfs_offset (inode),
979 "PAP-5210: mode must be M_DELETE");
980
981 *p_n_cut_size = -(IH_SIZE + ih_item_len(p_le_ih));
982 return M_DELETE;
983 }
984
985
986 /* Directory item. */
987 if ( is_direntry_le_ih (p_le_ih) )
988 return prepare_for_direntry_item (p_s_path, p_le_ih, inode, n_new_file_length, p_n_cut_size);
989
990 /* Direct item. */
991 if ( is_direct_le_ih (p_le_ih) )
992 return prepare_for_direct_item (p_s_path, p_le_ih, inode, n_new_file_length, p_n_cut_size);
993
994
995 /* Case of an indirect item. */
996 {
997 int n_unfm_number, /* Number of the item unformatted nodes. */
998 n_counter,
999 n_blk_size;
1000 __le32 * p_n_unfm_pointer; /* Pointer to the unformatted node number. */
1001 __u32 tmp;
1002 struct item_head s_ih; /* Item header. */
1003 char c_mode; /* Returned mode of the balance. */
1004 int need_research;
1005 997
998 BUG_ON(!th->t_trans_id);
1006 999
1007 n_blk_size = p_s_sb->s_blocksize; 1000 /* Stat_data item. */
1001 if (is_statdata_le_ih(p_le_ih)) {
1008 1002
1009 /* Search for the needed object indirect item until there are no unformatted nodes to be removed. */ 1003 RFALSE(n_new_file_length != max_reiserfs_offset(inode),
1010 do { 1004 "PAP-5210: mode must be M_DELETE");
1011 need_research = 0;
1012 p_s_bh = PATH_PLAST_BUFFER(p_s_path);
1013 /* Copy indirect item header to a temp variable. */
1014 copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path));
1015 /* Calculate number of unformatted nodes in this item. */
1016 n_unfm_number = I_UNFM_NUM(&s_ih);
1017
1018 RFALSE( ! is_indirect_le_ih(&s_ih) || ! n_unfm_number ||
1019 pos_in_item (p_s_path) + 1 != n_unfm_number,
1020 "PAP-5240: invalid item %h "
1021 "n_unfm_number = %d *p_n_pos_in_item = %d",
1022 &s_ih, n_unfm_number, pos_in_item (p_s_path));
1023
1024 /* Calculate balance mode and position in the item to remove unformatted nodes. */
1025 if ( n_new_file_length == max_reiserfs_offset (inode) ) {/* Case of delete. */
1026 pos_in_item (p_s_path) = 0;
1027 *p_n_cut_size = -(IH_SIZE + ih_item_len(&s_ih));
1028 c_mode = M_DELETE;
1029 }
1030 else { /* Case of truncate. */
1031 if ( n_new_file_length < le_ih_k_offset (&s_ih) ) {
1032 pos_in_item (p_s_path) = 0;
1033 *p_n_cut_size = -(IH_SIZE + ih_item_len(&s_ih));
1034 c_mode = M_DELETE; /* Delete this item. */
1035 }
1036 else {
1037 /* indirect item must be truncated starting from *p_n_pos_in_item-th position */
1038 pos_in_item (p_s_path) = (n_new_file_length + n_blk_size - le_ih_k_offset (&s_ih) ) >> p_s_sb->s_blocksize_bits;
1039
1040 RFALSE( pos_in_item (p_s_path) > n_unfm_number,
1041 "PAP-5250: invalid position in the item");
1042
1043 /* Either convert last unformatted node of indirect item to direct item or increase
1044 its free space. */
1045 if ( pos_in_item (p_s_path) == n_unfm_number ) {
1046 *p_n_cut_size = 0; /* Nothing to cut. */
1047 return M_CONVERT; /* Maybe convert last unformatted node to the direct item. */
1048 }
1049 /* Calculate size to cut. */
1050 *p_n_cut_size = -(ih_item_len(&s_ih) - pos_in_item(p_s_path) * UNFM_P_SIZE);
1051
1052 c_mode = M_CUT; /* Cut from this indirect item. */
1053 }
1054 }
1055
1056 RFALSE( n_unfm_number <= pos_in_item (p_s_path),
1057 "PAP-5260: invalid position in the indirect item");
1058
1059 /* pointers to be cut */
1060 n_unfm_number -= pos_in_item (p_s_path);
1061 /* Set pointer to the last unformatted node pointer that is to be cut. */
1062 p_n_unfm_pointer = (__le32 *)B_I_PITEM(p_s_bh, &s_ih) + I_UNFM_NUM(&s_ih) - 1 - *p_n_removed;
1063
1064
1065 /* We go through the unformatted nodes pointers of the indirect
1066 item and look for the unformatted nodes in the cache. If we
1067 found some of them we free it, zero corresponding indirect item
1068 entry and log buffer containing that indirect item. For this we
1069 need to prepare last path element for logging. If some
1070 unformatted node has b_count > 1 we must not free this
1071 unformatted node since it is in use. */
1072 reiserfs_prepare_for_journal(p_s_sb, p_s_bh, 1);
1073 // note: path could be changed, first line in for loop takes care
1074 // of it
1075
1076 for (n_counter = *p_n_removed;
1077 n_counter < n_unfm_number; n_counter++, p_n_unfm_pointer-- ) {
1078
1079 cond_resched();
1080 if (item_moved (&s_ih, p_s_path)) {
1081 need_research = 1 ;
1082 break;
1083 }
1084 RFALSE( p_n_unfm_pointer < (__le32 *)B_I_PITEM(p_s_bh, &s_ih) ||
1085 p_n_unfm_pointer > (__le32 *)B_I_PITEM(p_s_bh, &s_ih) + I_UNFM_NUM(&s_ih) - 1,
1086 "vs-5265: pointer out of range");
1087 1005
1088 /* Hole, nothing to remove. */ 1006 *p_n_cut_size = -(IH_SIZE + ih_item_len(p_le_ih));
1089 if ( ! get_block_num(p_n_unfm_pointer,0) ) { 1007 return M_DELETE;
1090 (*p_n_removed)++; 1008 }
1091 continue;
1092 }
1093 1009
1094 (*p_n_removed)++; 1010 /* Directory item. */
1011 if (is_direntry_le_ih(p_le_ih))
1012 return prepare_for_direntry_item(p_s_path, p_le_ih, inode,
1013 n_new_file_length,
1014 p_n_cut_size);
1095 1015
1096 tmp = get_block_num(p_n_unfm_pointer,0); 1016 /* Direct item. */
1097 put_block_num(p_n_unfm_pointer, 0, 0); 1017 if (is_direct_le_ih(p_le_ih))
1098 journal_mark_dirty (th, p_s_sb, p_s_bh); 1018 return prepare_for_direct_item(p_s_path, p_le_ih, inode,
1099 reiserfs_free_block(th, inode, tmp, 1); 1019 n_new_file_length, p_n_cut_size);
1100 if ( item_moved (&s_ih, p_s_path) ) { 1020
1101 need_research = 1; 1021 /* Case of an indirect item. */
1102 break ; 1022 {
1103 } 1023 int n_unfm_number, /* Number of the item unformatted nodes. */
1104 } 1024 n_counter, n_blk_size;
1105 1025 __le32 *p_n_unfm_pointer; /* Pointer to the unformatted node number. */
1106 /* a trick. If the buffer has been logged, this 1026 __u32 tmp;
1107 ** will do nothing. If we've broken the loop without 1027 struct item_head s_ih; /* Item header. */
1108 ** logging it, it will restore the buffer 1028 char c_mode; /* Returned mode of the balance. */
1109 ** 1029 int need_research;
1110 */ 1030
1111 reiserfs_restore_prepared_buffer(p_s_sb, p_s_bh); 1031 n_blk_size = p_s_sb->s_blocksize;
1112 1032
1113 /* This loop can be optimized. */ 1033 /* Search for the needed object indirect item until there are no unformatted nodes to be removed. */
1114 } while ( (*p_n_removed < n_unfm_number || need_research) && 1034 do {
1115 search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path) == POSITION_FOUND ); 1035 need_research = 0;
1116 1036 p_s_bh = PATH_PLAST_BUFFER(p_s_path);
1117 RFALSE( *p_n_removed < n_unfm_number, 1037 /* Copy indirect item header to a temp variable. */
1118 "PAP-5310: indirect item is not found"); 1038 copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path));
1119 RFALSE( item_moved (&s_ih, p_s_path), 1039 /* Calculate number of unformatted nodes in this item. */
1120 "after while, comp failed, retry") ; 1040 n_unfm_number = I_UNFM_NUM(&s_ih);
1121 1041
1122 if (c_mode == M_CUT) 1042 RFALSE(!is_indirect_le_ih(&s_ih) || !n_unfm_number ||
1123 pos_in_item (p_s_path) *= UNFM_P_SIZE; 1043 pos_in_item(p_s_path) + 1 != n_unfm_number,
1124 return c_mode; 1044 "PAP-5240: invalid item %h "
1125 } 1045 "n_unfm_number = %d *p_n_pos_in_item = %d",
1046 &s_ih, n_unfm_number, pos_in_item(p_s_path));
1047
1048 /* Calculate balance mode and position in the item to remove unformatted nodes. */
1049 if (n_new_file_length == max_reiserfs_offset(inode)) { /* Case of delete. */
1050 pos_in_item(p_s_path) = 0;
1051 *p_n_cut_size = -(IH_SIZE + ih_item_len(&s_ih));
1052 c_mode = M_DELETE;
1053 } else { /* Case of truncate. */
1054 if (n_new_file_length < le_ih_k_offset(&s_ih)) {
1055 pos_in_item(p_s_path) = 0;
1056 *p_n_cut_size =
1057 -(IH_SIZE + ih_item_len(&s_ih));
1058 c_mode = M_DELETE; /* Delete this item. */
1059 } else {
1060 /* indirect item must be truncated starting from *p_n_pos_in_item-th position */
1061 pos_in_item(p_s_path) =
1062 (n_new_file_length + n_blk_size -
1063 le_ih_k_offset(&s_ih)) >> p_s_sb->
1064 s_blocksize_bits;
1065
1066 RFALSE(pos_in_item(p_s_path) >
1067 n_unfm_number,
1068 "PAP-5250: invalid position in the item");
1069
1070 /* Either convert last unformatted node of indirect item to direct item or increase
1071 its free space. */
1072 if (pos_in_item(p_s_path) ==
1073 n_unfm_number) {
1074 *p_n_cut_size = 0; /* Nothing to cut. */
1075 return M_CONVERT; /* Maybe convert last unformatted node to the direct item. */
1076 }
1077 /* Calculate size to cut. */
1078 *p_n_cut_size =
1079 -(ih_item_len(&s_ih) -
1080 pos_in_item(p_s_path) *
1081 UNFM_P_SIZE);
1082
1083 c_mode = M_CUT; /* Cut from this indirect item. */
1084 }
1085 }
1086
1087 RFALSE(n_unfm_number <= pos_in_item(p_s_path),
1088 "PAP-5260: invalid position in the indirect item");
1089
1090 /* pointers to be cut */
1091 n_unfm_number -= pos_in_item(p_s_path);
1092 /* Set pointer to the last unformatted node pointer that is to be cut. */
1093 p_n_unfm_pointer =
1094 (__le32 *) B_I_PITEM(p_s_bh,
1095 &s_ih) + I_UNFM_NUM(&s_ih) -
1096 1 - *p_n_removed;
1097
1098 /* We go through the unformatted nodes pointers of the indirect
1099 item and look for the unformatted nodes in the cache. If we
1100 found some of them we free it, zero corresponding indirect item
1101 entry and log buffer containing that indirect item. For this we
1102 need to prepare last path element for logging. If some
1103 unformatted node has b_count > 1 we must not free this
1104 unformatted node since it is in use. */
1105 reiserfs_prepare_for_journal(p_s_sb, p_s_bh, 1);
1106 // note: path could be changed, first line in for loop takes care
1107 // of it
1108
1109 for (n_counter = *p_n_removed;
1110 n_counter < n_unfm_number;
1111 n_counter++, p_n_unfm_pointer--) {
1112
1113 cond_resched();
1114 if (item_moved(&s_ih, p_s_path)) {
1115 need_research = 1;
1116 break;
1117 }
1118 RFALSE(p_n_unfm_pointer <
1119 (__le32 *) B_I_PITEM(p_s_bh, &s_ih)
1120 || p_n_unfm_pointer >
1121 (__le32 *) B_I_PITEM(p_s_bh,
1122 &s_ih) +
1123 I_UNFM_NUM(&s_ih) - 1,
1124 "vs-5265: pointer out of range");
1125
1126 /* Hole, nothing to remove. */
1127 if (!get_block_num(p_n_unfm_pointer, 0)) {
1128 (*p_n_removed)++;
1129 continue;
1130 }
1131
1132 (*p_n_removed)++;
1133
1134 tmp = get_block_num(p_n_unfm_pointer, 0);
1135 put_block_num(p_n_unfm_pointer, 0, 0);
1136 journal_mark_dirty(th, p_s_sb, p_s_bh);
1137 reiserfs_free_block(th, inode, tmp, 1);
1138 if (item_moved(&s_ih, p_s_path)) {
1139 need_research = 1;
1140 break;
1141 }
1142 }
1143
1144 /* a trick. If the buffer has been logged, this
1145 ** will do nothing. If we've broken the loop without
1146 ** logging it, it will restore the buffer
1147 **
1148 */
1149 reiserfs_restore_prepared_buffer(p_s_sb, p_s_bh);
1150
1151 /* This loop can be optimized. */
1152 } while ((*p_n_removed < n_unfm_number || need_research) &&
1153 search_for_position_by_key(p_s_sb, p_s_item_key,
1154 p_s_path) ==
1155 POSITION_FOUND);
1156
1157 RFALSE(*p_n_removed < n_unfm_number,
1158 "PAP-5310: indirect item is not found");
1159 RFALSE(item_moved(&s_ih, p_s_path),
1160 "after while, comp failed, retry");
1161
1162 if (c_mode == M_CUT)
1163 pos_in_item(p_s_path) *= UNFM_P_SIZE;
1164 return c_mode;
1165 }
1126} 1166}
1127 1167
1128/* Calculate number of bytes which will be deleted or cut during balance */ 1168/* Calculate number of bytes which will be deleted or cut during balance */
1129static int calc_deleted_bytes_number( 1169static int calc_deleted_bytes_number(struct tree_balance *p_s_tb, char c_mode)
1130 struct tree_balance * p_s_tb, 1170{
1131 char c_mode 1171 int n_del_size;
1132 ) { 1172 struct item_head *p_le_ih = PATH_PITEM_HEAD(p_s_tb->tb_path);
1133 int n_del_size; 1173
1134 struct item_head * p_le_ih = PATH_PITEM_HEAD(p_s_tb->tb_path); 1174 if (is_statdata_le_ih(p_le_ih))
1135 1175 return 0;
1136 if ( is_statdata_le_ih (p_le_ih) ) 1176
1137 return 0; 1177 n_del_size =
1178 (c_mode ==
1179 M_DELETE) ? ih_item_len(p_le_ih) : -p_s_tb->insert_size[0];
1180 if (is_direntry_le_ih(p_le_ih)) {
1181 // return EMPTY_DIR_SIZE; /* We delete emty directoris only. */
1182 // we can't use EMPTY_DIR_SIZE, as old format dirs have a different
1183 // empty size. ick. FIXME, is this right?
1184 //
1185 return n_del_size;
1186 }
1138 1187
1139 n_del_size = ( c_mode == M_DELETE ) ? ih_item_len(p_le_ih) : -p_s_tb->insert_size[0]; 1188 if (is_indirect_le_ih(p_le_ih))
1140 if ( is_direntry_le_ih (p_le_ih) ) { 1189 n_del_size = (n_del_size / UNFM_P_SIZE) * (PATH_PLAST_BUFFER(p_s_tb->tb_path)->b_size); // - get_ih_free_space (p_le_ih);
1141 // return EMPTY_DIR_SIZE; /* We delete emty directoris only. */ 1190 return n_del_size;
1142 // we can't use EMPTY_DIR_SIZE, as old format dirs have a different
1143 // empty size. ick. FIXME, is this right?
1144 //
1145 return n_del_size ;
1146 }
1147
1148 if ( is_indirect_le_ih (p_le_ih) )
1149 n_del_size = (n_del_size/UNFM_P_SIZE)*
1150 (PATH_PLAST_BUFFER(p_s_tb->tb_path)->b_size);// - get_ih_free_space (p_le_ih);
1151 return n_del_size;
1152} 1191}
1153 1192
1154static void init_tb_struct( 1193static void init_tb_struct(struct reiserfs_transaction_handle *th,
1155 struct reiserfs_transaction_handle *th, 1194 struct tree_balance *p_s_tb,
1156 struct tree_balance * p_s_tb, 1195 struct super_block *p_s_sb,
1157 struct super_block * p_s_sb, 1196 struct path *p_s_path, int n_size)
1158 struct path * p_s_path, 1197{
1159 int n_size
1160 ) {
1161
1162 BUG_ON (!th->t_trans_id);
1163
1164 memset (p_s_tb,'\0',sizeof(struct tree_balance));
1165 p_s_tb->transaction_handle = th ;
1166 p_s_tb->tb_sb = p_s_sb;
1167 p_s_tb->tb_path = p_s_path;
1168 PATH_OFFSET_PBUFFER(p_s_path, ILLEGAL_PATH_ELEMENT_OFFSET) = NULL;
1169 PATH_OFFSET_POSITION(p_s_path, ILLEGAL_PATH_ELEMENT_OFFSET) = 0;
1170 p_s_tb->insert_size[0] = n_size;
1171}
1172 1198
1199 BUG_ON(!th->t_trans_id);
1173 1200
1201 memset(p_s_tb, '\0', sizeof(struct tree_balance));
1202 p_s_tb->transaction_handle = th;
1203 p_s_tb->tb_sb = p_s_sb;
1204 p_s_tb->tb_path = p_s_path;
1205 PATH_OFFSET_PBUFFER(p_s_path, ILLEGAL_PATH_ELEMENT_OFFSET) = NULL;
1206 PATH_OFFSET_POSITION(p_s_path, ILLEGAL_PATH_ELEMENT_OFFSET) = 0;
1207 p_s_tb->insert_size[0] = n_size;
1208}
1174 1209
1175void padd_item (char * item, int total_length, int length) 1210void padd_item(char *item, int total_length, int length)
1176{ 1211{
1177 int i; 1212 int i;
1178 1213
1179 for (i = total_length; i > length; ) 1214 for (i = total_length; i > length;)
1180 item [--i] = 0; 1215 item[--i] = 0;
1181} 1216}
1182 1217
1183#ifdef REISERQUOTA_DEBUG 1218#ifdef REISERQUOTA_DEBUG
1184char key2type(struct reiserfs_key *ih) 1219char key2type(struct reiserfs_key *ih)
1185{ 1220{
1186 if (is_direntry_le_key(2, ih)) 1221 if (is_direntry_le_key(2, ih))
1187 return 'd'; 1222 return 'd';
1188 if (is_direct_le_key(2, ih)) 1223 if (is_direct_le_key(2, ih))
1189 return 'D'; 1224 return 'D';
1190 if (is_indirect_le_key(2, ih)) 1225 if (is_indirect_le_key(2, ih))
1191 return 'i'; 1226 return 'i';
1192 if (is_statdata_le_key(2, ih)) 1227 if (is_statdata_le_key(2, ih))
1193 return 's'; 1228 return 's';
1194 return 'u'; 1229 return 'u';
1195} 1230}
1196 1231
1197char head2type(struct item_head *ih) 1232char head2type(struct item_head *ih)
1198{ 1233{
1199 if (is_direntry_le_ih(ih)) 1234 if (is_direntry_le_ih(ih))
1200 return 'd'; 1235 return 'd';
1201 if (is_direct_le_ih(ih)) 1236 if (is_direct_le_ih(ih))
1202 return 'D'; 1237 return 'D';
1203 if (is_indirect_le_ih(ih)) 1238 if (is_indirect_le_ih(ih))
1204 return 'i'; 1239 return 'i';
1205 if (is_statdata_le_ih(ih)) 1240 if (is_statdata_le_ih(ih))
1206 return 's'; 1241 return 's';
1207 return 'u'; 1242 return 'u';
1208} 1243}
1209#endif 1244#endif
1210 1245
1211/* Delete object item. */ 1246/* Delete object item. */
1212int reiserfs_delete_item (struct reiserfs_transaction_handle *th, 1247int reiserfs_delete_item(struct reiserfs_transaction_handle *th, struct path *p_s_path, /* Path to the deleted item. */
1213 struct path * p_s_path, /* Path to the deleted item. */ 1248 const struct cpu_key *p_s_item_key, /* Key to search for the deleted item. */
1214 const struct cpu_key * p_s_item_key, /* Key to search for the deleted item. */ 1249 struct inode *p_s_inode, /* inode is here just to update i_blocks and quotas */
1215 struct inode * p_s_inode,/* inode is here just to update i_blocks and quotas */ 1250 struct buffer_head *p_s_un_bh)
1216 struct buffer_head * p_s_un_bh) /* NULL or unformatted node pointer. */ 1251{ /* NULL or unformatted node pointer. */
1217{ 1252 struct super_block *p_s_sb = p_s_inode->i_sb;
1218 struct super_block * p_s_sb = p_s_inode->i_sb; 1253 struct tree_balance s_del_balance;
1219 struct tree_balance s_del_balance; 1254 struct item_head s_ih;
1220 struct item_head s_ih; 1255 struct item_head *q_ih;
1221 struct item_head *q_ih; 1256 int quota_cut_bytes;
1222 int quota_cut_bytes; 1257 int n_ret_value, n_del_size, n_removed;
1223 int n_ret_value,
1224 n_del_size,
1225 n_removed;
1226 1258
1227#ifdef CONFIG_REISERFS_CHECK 1259#ifdef CONFIG_REISERFS_CHECK
1228 char c_mode; 1260 char c_mode;
1229 int n_iter = 0; 1261 int n_iter = 0;
1230#endif 1262#endif
1231 1263
1232 BUG_ON (!th->t_trans_id); 1264 BUG_ON(!th->t_trans_id);
1233 1265
1234 init_tb_struct(th, &s_del_balance, p_s_sb, p_s_path, 0/*size is unknown*/); 1266 init_tb_struct(th, &s_del_balance, p_s_sb, p_s_path,
1267 0 /*size is unknown */ );
1235 1268
1236 while ( 1 ) { 1269 while (1) {
1237 n_removed = 0; 1270 n_removed = 0;
1238 1271
1239#ifdef CONFIG_REISERFS_CHECK 1272#ifdef CONFIG_REISERFS_CHECK
1240 n_iter++; 1273 n_iter++;
1241 c_mode = 1274 c_mode =
1242#endif 1275#endif
1243 prepare_for_delete_or_cut(th, p_s_inode, p_s_path, p_s_item_key, &n_removed, &n_del_size, max_reiserfs_offset (p_s_inode)); 1276 prepare_for_delete_or_cut(th, p_s_inode, p_s_path,
1244 1277 p_s_item_key, &n_removed,
1245 RFALSE( c_mode != M_DELETE, "PAP-5320: mode must be M_DELETE"); 1278 &n_del_size,
1246 1279 max_reiserfs_offset(p_s_inode));
1247 copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path)); 1280
1248 s_del_balance.insert_size[0] = n_del_size; 1281 RFALSE(c_mode != M_DELETE, "PAP-5320: mode must be M_DELETE");
1249 1282
1250 n_ret_value = fix_nodes(M_DELETE, &s_del_balance, NULL, NULL); 1283 copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path));
1251 if ( n_ret_value != REPEAT_SEARCH ) 1284 s_del_balance.insert_size[0] = n_del_size;
1252 break; 1285
1253 1286 n_ret_value = fix_nodes(M_DELETE, &s_del_balance, NULL, NULL);
1254 PROC_INFO_INC( p_s_sb, delete_item_restarted ); 1287 if (n_ret_value != REPEAT_SEARCH)
1288 break;
1289
1290 PROC_INFO_INC(p_s_sb, delete_item_restarted);
1291
1292 // file system changed, repeat search
1293 n_ret_value =
1294 search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path);
1295 if (n_ret_value == IO_ERROR)
1296 break;
1297 if (n_ret_value == FILE_NOT_FOUND) {
1298 reiserfs_warning(p_s_sb,
1299 "vs-5340: reiserfs_delete_item: "
1300 "no items of the file %K found",
1301 p_s_item_key);
1302 break;
1303 }
1304 } /* while (1) */
1255 1305
1256 // file system changed, repeat search 1306 if (n_ret_value != CARRY_ON) {
1257 n_ret_value = search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path); 1307 unfix_nodes(&s_del_balance);
1258 if (n_ret_value == IO_ERROR) 1308 return 0;
1259 break; 1309 }
1260 if (n_ret_value == FILE_NOT_FOUND) { 1310 // reiserfs_delete_item returns item length when success
1261 reiserfs_warning (p_s_sb, "vs-5340: reiserfs_delete_item: " 1311 n_ret_value = calc_deleted_bytes_number(&s_del_balance, M_DELETE);
1262 "no items of the file %K found", p_s_item_key); 1312 q_ih = get_ih(p_s_path);
1263 break; 1313 quota_cut_bytes = ih_item_len(q_ih);
1314
1315 /* hack so the quota code doesn't have to guess if the file
1316 ** has a tail. On tail insert, we allocate quota for 1 unformatted node.
1317 ** We test the offset because the tail might have been
1318 ** split into multiple items, and we only want to decrement for
1319 ** the unfm node once
1320 */
1321 if (!S_ISLNK(p_s_inode->i_mode) && is_direct_le_ih(q_ih)) {
1322 if ((le_ih_k_offset(q_ih) & (p_s_sb->s_blocksize - 1)) == 1) {
1323 quota_cut_bytes = p_s_sb->s_blocksize + UNFM_P_SIZE;
1324 } else {
1325 quota_cut_bytes = 0;
1326 }
1264 } 1327 }
1265 } /* while (1) */
1266 1328
1267 if ( n_ret_value != CARRY_ON ) { 1329 if (p_s_un_bh) {
1268 unfix_nodes(&s_del_balance); 1330 int off;
1269 return 0; 1331 char *data;
1270 } 1332
1271 1333 /* We are in direct2indirect conversion, so move tail contents
1272 // reiserfs_delete_item returns item length when success 1334 to the unformatted node */
1273 n_ret_value = calc_deleted_bytes_number(&s_del_balance, M_DELETE); 1335 /* note, we do the copy before preparing the buffer because we
1274 q_ih = get_ih(p_s_path) ; 1336 ** don't care about the contents of the unformatted node yet.
1275 quota_cut_bytes = ih_item_len(q_ih) ; 1337 ** the only thing we really care about is the direct item's data
1276 1338 ** is in the unformatted node.
1277 /* hack so the quota code doesn't have to guess if the file 1339 **
1278 ** has a tail. On tail insert, we allocate quota for 1 unformatted node. 1340 ** Otherwise, we would have to call reiserfs_prepare_for_journal on
1279 ** We test the offset because the tail might have been 1341 ** the unformatted node, which might schedule, meaning we'd have to
1280 ** split into multiple items, and we only want to decrement for 1342 ** loop all the way back up to the start of the while loop.
1281 ** the unfm node once 1343 **
1282 */ 1344 ** The unformatted node must be dirtied later on. We can't be
1283 if (!S_ISLNK (p_s_inode->i_mode) && is_direct_le_ih(q_ih)) { 1345 ** sure here if the entire tail has been deleted yet.
1284 if ((le_ih_k_offset(q_ih) & (p_s_sb->s_blocksize - 1)) == 1) { 1346 **
1285 quota_cut_bytes = p_s_sb->s_blocksize + UNFM_P_SIZE; 1347 ** p_s_un_bh is from the page cache (all unformatted nodes are
1286 } else { 1348 ** from the page cache) and might be a highmem page. So, we
1287 quota_cut_bytes = 0 ; 1349 ** can't use p_s_un_bh->b_data.
1350 ** -clm
1351 */
1352
1353 data = kmap_atomic(p_s_un_bh->b_page, KM_USER0);
1354 off = ((le_ih_k_offset(&s_ih) - 1) & (PAGE_CACHE_SIZE - 1));
1355 memcpy(data + off,
1356 B_I_PITEM(PATH_PLAST_BUFFER(p_s_path), &s_ih),
1357 n_ret_value);
1358 kunmap_atomic(data, KM_USER0);
1288 } 1359 }
1289 } 1360 /* Perform balancing after all resources have been collected at once. */
1290 1361 do_balance(&s_del_balance, NULL, NULL, M_DELETE);
1291 if ( p_s_un_bh ) {
1292 int off;
1293 char *data ;
1294
1295 /* We are in direct2indirect conversion, so move tail contents
1296 to the unformatted node */
1297 /* note, we do the copy before preparing the buffer because we
1298 ** don't care about the contents of the unformatted node yet.
1299 ** the only thing we really care about is the direct item's data
1300 ** is in the unformatted node.
1301 **
1302 ** Otherwise, we would have to call reiserfs_prepare_for_journal on
1303 ** the unformatted node, which might schedule, meaning we'd have to
1304 ** loop all the way back up to the start of the while loop.
1305 **
1306 ** The unformatted node must be dirtied later on. We can't be
1307 ** sure here if the entire tail has been deleted yet.
1308 **
1309 ** p_s_un_bh is from the page cache (all unformatted nodes are
1310 ** from the page cache) and might be a highmem page. So, we
1311 ** can't use p_s_un_bh->b_data.
1312 ** -clm
1313 */
1314
1315 data = kmap_atomic(p_s_un_bh->b_page, KM_USER0);
1316 off = ((le_ih_k_offset (&s_ih) - 1) & (PAGE_CACHE_SIZE - 1));
1317 memcpy(data + off,
1318 B_I_PITEM(PATH_PLAST_BUFFER(p_s_path), &s_ih), n_ret_value);
1319 kunmap_atomic(data, KM_USER0);
1320 }
1321 /* Perform balancing after all resources have been collected at once. */
1322 do_balance(&s_del_balance, NULL, NULL, M_DELETE);
1323 1362
1324#ifdef REISERQUOTA_DEBUG 1363#ifdef REISERQUOTA_DEBUG
1325 reiserfs_debug (p_s_sb, REISERFS_DEBUG_CODE, "reiserquota delete_item(): freeing %u, id=%u type=%c", quota_cut_bytes, p_s_inode->i_uid, head2type(&s_ih)); 1364 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
1365 "reiserquota delete_item(): freeing %u, id=%u type=%c",
1366 quota_cut_bytes, p_s_inode->i_uid, head2type(&s_ih));
1326#endif 1367#endif
1327 DQUOT_FREE_SPACE_NODIRTY(p_s_inode, quota_cut_bytes); 1368 DQUOT_FREE_SPACE_NODIRTY(p_s_inode, quota_cut_bytes);
1328 1369
1329 /* Return deleted body length */ 1370 /* Return deleted body length */
1330 return n_ret_value; 1371 return n_ret_value;
1331} 1372}
1332 1373
1333
1334/* Summary Of Mechanisms For Handling Collisions Between Processes: 1374/* Summary Of Mechanisms For Handling Collisions Between Processes:
1335 1375
1336 deletion of the body of the object is performed by iput(), with the 1376 deletion of the body of the object is performed by iput(), with the
@@ -1347,727 +1387,804 @@ int reiserfs_delete_item (struct reiserfs_transaction_handle *th,
1347 - Hans 1387 - Hans
1348*/ 1388*/
1349 1389
1350
1351/* this deletes item which never gets split */ 1390/* this deletes item which never gets split */
1352void reiserfs_delete_solid_item (struct reiserfs_transaction_handle *th, 1391void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th,
1353 struct inode *inode, 1392 struct inode *inode, struct reiserfs_key *key)
1354 struct reiserfs_key * key)
1355{ 1393{
1356 struct tree_balance tb; 1394 struct tree_balance tb;
1357 INITIALIZE_PATH (path); 1395 INITIALIZE_PATH(path);
1358 int item_len = 0; 1396 int item_len = 0;
1359 int tb_init = 0 ; 1397 int tb_init = 0;
1360 struct cpu_key cpu_key; 1398 struct cpu_key cpu_key;
1361 int retval; 1399 int retval;
1362 int quota_cut_bytes = 0; 1400 int quota_cut_bytes = 0;
1363 1401
1364 BUG_ON (!th->t_trans_id); 1402 BUG_ON(!th->t_trans_id);
1365 1403
1366 le_key2cpu_key (&cpu_key, key); 1404 le_key2cpu_key(&cpu_key, key);
1367 1405
1368 while (1) { 1406 while (1) {
1369 retval = search_item (th->t_super, &cpu_key, &path); 1407 retval = search_item(th->t_super, &cpu_key, &path);
1370 if (retval == IO_ERROR) { 1408 if (retval == IO_ERROR) {
1371 reiserfs_warning (th->t_super, 1409 reiserfs_warning(th->t_super,
1372 "vs-5350: reiserfs_delete_solid_item: " 1410 "vs-5350: reiserfs_delete_solid_item: "
1373 "i/o failure occurred trying to delete %K", 1411 "i/o failure occurred trying to delete %K",
1374 &cpu_key); 1412 &cpu_key);
1375 break; 1413 break;
1376 } 1414 }
1377 if (retval != ITEM_FOUND) { 1415 if (retval != ITEM_FOUND) {
1378 pathrelse (&path); 1416 pathrelse(&path);
1379 // No need for a warning, if there is just no free space to insert '..' item into the newly-created subdir 1417 // No need for a warning, if there is just no free space to insert '..' item into the newly-created subdir
1380 if ( !( (unsigned long long) GET_HASH_VALUE (le_key_k_offset (le_key_version (key), key)) == 0 && \ 1418 if (!
1381 (unsigned long long) GET_GENERATION_NUMBER (le_key_k_offset (le_key_version (key), key)) == 1 ) ) 1419 ((unsigned long long)
1382 reiserfs_warning (th->t_super, "vs-5355: reiserfs_delete_solid_item: %k not found", key); 1420 GET_HASH_VALUE(le_key_k_offset
1383 break; 1421 (le_key_version(key), key)) == 0
1384 } 1422 && (unsigned long long)
1385 if (!tb_init) { 1423 GET_GENERATION_NUMBER(le_key_k_offset
1386 tb_init = 1 ; 1424 (le_key_version(key),
1387 item_len = ih_item_len( PATH_PITEM_HEAD(&path) ); 1425 key)) == 1))
1388 init_tb_struct (th, &tb, th->t_super, &path, - (IH_SIZE + item_len)); 1426 reiserfs_warning(th->t_super,
1389 } 1427 "vs-5355: reiserfs_delete_solid_item: %k not found",
1390 quota_cut_bytes = ih_item_len(PATH_PITEM_HEAD(&path)) ; 1428 key);
1429 break;
1430 }
1431 if (!tb_init) {
1432 tb_init = 1;
1433 item_len = ih_item_len(PATH_PITEM_HEAD(&path));
1434 init_tb_struct(th, &tb, th->t_super, &path,
1435 -(IH_SIZE + item_len));
1436 }
1437 quota_cut_bytes = ih_item_len(PATH_PITEM_HEAD(&path));
1391 1438
1392 retval = fix_nodes (M_DELETE, &tb, NULL, NULL); 1439 retval = fix_nodes(M_DELETE, &tb, NULL, NULL);
1393 if (retval == REPEAT_SEARCH) { 1440 if (retval == REPEAT_SEARCH) {
1394 PROC_INFO_INC( th -> t_super, delete_solid_item_restarted ); 1441 PROC_INFO_INC(th->t_super, delete_solid_item_restarted);
1395 continue; 1442 continue;
1396 } 1443 }
1397 1444
1398 if (retval == CARRY_ON) { 1445 if (retval == CARRY_ON) {
1399 do_balance (&tb, NULL, NULL, M_DELETE); 1446 do_balance(&tb, NULL, NULL, M_DELETE);
1400 if (inode) { /* Should we count quota for item? (we don't count quotas for save-links) */ 1447 if (inode) { /* Should we count quota for item? (we don't count quotas for save-links) */
1401#ifdef REISERQUOTA_DEBUG 1448#ifdef REISERQUOTA_DEBUG
1402 reiserfs_debug (th->t_super, REISERFS_DEBUG_CODE, "reiserquota delete_solid_item(): freeing %u id=%u type=%c", quota_cut_bytes, inode->i_uid, key2type(key)); 1449 reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE,
1450 "reiserquota delete_solid_item(): freeing %u id=%u type=%c",
1451 quota_cut_bytes, inode->i_uid,
1452 key2type(key));
1403#endif 1453#endif
1404 DQUOT_FREE_SPACE_NODIRTY(inode, quota_cut_bytes); 1454 DQUOT_FREE_SPACE_NODIRTY(inode,
1405 } 1455 quota_cut_bytes);
1406 break; 1456 }
1457 break;
1458 }
1459 // IO_ERROR, NO_DISK_SPACE, etc
1460 reiserfs_warning(th->t_super,
1461 "vs-5360: reiserfs_delete_solid_item: "
1462 "could not delete %K due to fix_nodes failure",
1463 &cpu_key);
1464 unfix_nodes(&tb);
1465 break;
1407 } 1466 }
1408 1467
1409 // IO_ERROR, NO_DISK_SPACE, etc 1468 reiserfs_check_path(&path);
1410 reiserfs_warning (th->t_super, "vs-5360: reiserfs_delete_solid_item: "
1411 "could not delete %K due to fix_nodes failure", &cpu_key);
1412 unfix_nodes (&tb);
1413 break;
1414 }
1415
1416 reiserfs_check_path(&path) ;
1417} 1469}
1418 1470
1419 1471int reiserfs_delete_object(struct reiserfs_transaction_handle *th,
1420int reiserfs_delete_object (struct reiserfs_transaction_handle *th, struct inode * inode) 1472 struct inode *inode)
1421{ 1473{
1422 int err; 1474 int err;
1423 inode->i_size = 0; 1475 inode->i_size = 0;
1424 BUG_ON (!th->t_trans_id); 1476 BUG_ON(!th->t_trans_id);
1425 1477
1426 /* for directory this deletes item containing "." and ".." */ 1478 /* for directory this deletes item containing "." and ".." */
1427 err = reiserfs_do_truncate (th, inode, NULL, 0/*no timestamp updates*/); 1479 err =
1428 if (err) 1480 reiserfs_do_truncate(th, inode, NULL, 0 /*no timestamp updates */ );
1429 return err; 1481 if (err)
1430 1482 return err;
1483
1431#if defined( USE_INODE_GENERATION_COUNTER ) 1484#if defined( USE_INODE_GENERATION_COUNTER )
1432 if( !old_format_only ( th -> t_super ) ) 1485 if (!old_format_only(th->t_super)) {
1433 { 1486 __le32 *inode_generation;
1434 __le32 *inode_generation; 1487
1435 1488 inode_generation =
1436 inode_generation = 1489 &REISERFS_SB(th->t_super)->s_rs->s_inode_generation;
1437 &REISERFS_SB(th -> t_super) -> s_rs -> s_inode_generation; 1490 *inode_generation =
1438 *inode_generation = cpu_to_le32( le32_to_cpu( *inode_generation ) + 1 ); 1491 cpu_to_le32(le32_to_cpu(*inode_generation) + 1);
1439 } 1492 }
1440/* USE_INODE_GENERATION_COUNTER */ 1493/* USE_INODE_GENERATION_COUNTER */
1441#endif 1494#endif
1442 reiserfs_delete_solid_item (th, inode, INODE_PKEY (inode)); 1495 reiserfs_delete_solid_item(th, inode, INODE_PKEY(inode));
1443 1496
1444 return err; 1497 return err;
1445} 1498}
1446 1499
1447static void 1500static void unmap_buffers(struct page *page, loff_t pos)
1448unmap_buffers(struct page *page, loff_t pos) { 1501{
1449 struct buffer_head *bh ; 1502 struct buffer_head *bh;
1450 struct buffer_head *head ; 1503 struct buffer_head *head;
1451 struct buffer_head *next ; 1504 struct buffer_head *next;
1452 unsigned long tail_index ; 1505 unsigned long tail_index;
1453 unsigned long cur_index ; 1506 unsigned long cur_index;
1454 1507
1455 if (page) { 1508 if (page) {
1456 if (page_has_buffers(page)) { 1509 if (page_has_buffers(page)) {
1457 tail_index = pos & (PAGE_CACHE_SIZE - 1) ; 1510 tail_index = pos & (PAGE_CACHE_SIZE - 1);
1458 cur_index = 0 ; 1511 cur_index = 0;
1459 head = page_buffers(page) ; 1512 head = page_buffers(page);
1460 bh = head ; 1513 bh = head;
1461 do { 1514 do {
1462 next = bh->b_this_page ; 1515 next = bh->b_this_page;
1463 1516
1464 /* we want to unmap the buffers that contain the tail, and 1517 /* we want to unmap the buffers that contain the tail, and
1465 ** all the buffers after it (since the tail must be at the 1518 ** all the buffers after it (since the tail must be at the
1466 ** end of the file). We don't want to unmap file data 1519 ** end of the file). We don't want to unmap file data
1467 ** before the tail, since it might be dirty and waiting to 1520 ** before the tail, since it might be dirty and waiting to
1468 ** reach disk 1521 ** reach disk
1469 */ 1522 */
1470 cur_index += bh->b_size ; 1523 cur_index += bh->b_size;
1471 if (cur_index > tail_index) { 1524 if (cur_index > tail_index) {
1472 reiserfs_unmap_buffer(bh) ; 1525 reiserfs_unmap_buffer(bh);
1526 }
1527 bh = next;
1528 } while (bh != head);
1529 if (PAGE_SIZE == bh->b_size) {
1530 clear_page_dirty(page);
1531 }
1473 } 1532 }
1474 bh = next ;
1475 } while (bh != head) ;
1476 if ( PAGE_SIZE == bh->b_size ) {
1477 clear_page_dirty(page);
1478 }
1479 } 1533 }
1480 }
1481} 1534}
1482 1535
1483static int maybe_indirect_to_direct (struct reiserfs_transaction_handle *th, 1536static int maybe_indirect_to_direct(struct reiserfs_transaction_handle *th,
1484 struct inode * p_s_inode, 1537 struct inode *p_s_inode,
1485 struct page *page, 1538 struct page *page,
1486 struct path * p_s_path, 1539 struct path *p_s_path,
1487 const struct cpu_key * p_s_item_key, 1540 const struct cpu_key *p_s_item_key,
1488 loff_t n_new_file_size, 1541 loff_t n_new_file_size, char *p_c_mode)
1489 char * p_c_mode 1542{
1490 ) { 1543 struct super_block *p_s_sb = p_s_inode->i_sb;
1491 struct super_block * p_s_sb = p_s_inode->i_sb; 1544 int n_block_size = p_s_sb->s_blocksize;
1492 int n_block_size = p_s_sb->s_blocksize; 1545 int cut_bytes;
1493 int cut_bytes; 1546 BUG_ON(!th->t_trans_id);
1494 BUG_ON (!th->t_trans_id); 1547
1495 1548 if (n_new_file_size != p_s_inode->i_size)
1496 if (n_new_file_size != p_s_inode->i_size) 1549 BUG();
1497 BUG ();
1498
1499 /* the page being sent in could be NULL if there was an i/o error
1500 ** reading in the last block. The user will hit problems trying to
1501 ** read the file, but for now we just skip the indirect2direct
1502 */
1503 if (atomic_read(&p_s_inode->i_count) > 1 ||
1504 !tail_has_to_be_packed (p_s_inode) ||
1505 !page || (REISERFS_I(p_s_inode)->i_flags & i_nopack_mask)) {
1506 // leave tail in an unformatted node
1507 *p_c_mode = M_SKIP_BALANCING;
1508 cut_bytes = n_block_size - (n_new_file_size & (n_block_size - 1));
1509 pathrelse(p_s_path);
1510 return cut_bytes;
1511 }
1512 /* Permorm the conversion to a direct_item. */
1513 /*return indirect_to_direct (p_s_inode, p_s_path, p_s_item_key, n_new_file_size, p_c_mode);*/
1514 return indirect2direct (th, p_s_inode, page, p_s_path, p_s_item_key, n_new_file_size, p_c_mode);
1515}
1516 1550
1551 /* the page being sent in could be NULL if there was an i/o error
1552 ** reading in the last block. The user will hit problems trying to
1553 ** read the file, but for now we just skip the indirect2direct
1554 */
1555 if (atomic_read(&p_s_inode->i_count) > 1 ||
1556 !tail_has_to_be_packed(p_s_inode) ||
1557 !page || (REISERFS_I(p_s_inode)->i_flags & i_nopack_mask)) {
1558 // leave tail in an unformatted node
1559 *p_c_mode = M_SKIP_BALANCING;
1560 cut_bytes =
1561 n_block_size - (n_new_file_size & (n_block_size - 1));
1562 pathrelse(p_s_path);
1563 return cut_bytes;
1564 }
1565 /* Permorm the conversion to a direct_item. */
1566 /*return indirect_to_direct (p_s_inode, p_s_path, p_s_item_key, n_new_file_size, p_c_mode); */
1567 return indirect2direct(th, p_s_inode, page, p_s_path, p_s_item_key,
1568 n_new_file_size, p_c_mode);
1569}
1517 1570
1518/* we did indirect_to_direct conversion. And we have inserted direct 1571/* we did indirect_to_direct conversion. And we have inserted direct
1519 item successesfully, but there were no disk space to cut unfm 1572 item successesfully, but there were no disk space to cut unfm
1520 pointer being converted. Therefore we have to delete inserted 1573 pointer being converted. Therefore we have to delete inserted
1521 direct item(s) */ 1574 direct item(s) */
1522static void indirect_to_direct_roll_back (struct reiserfs_transaction_handle *th, struct inode * inode, struct path * path) 1575static void indirect_to_direct_roll_back(struct reiserfs_transaction_handle *th,
1576 struct inode *inode, struct path *path)
1523{ 1577{
1524 struct cpu_key tail_key; 1578 struct cpu_key tail_key;
1525 int tail_len; 1579 int tail_len;
1526 int removed; 1580 int removed;
1527 BUG_ON (!th->t_trans_id); 1581 BUG_ON(!th->t_trans_id);
1528 1582
1529 make_cpu_key (&tail_key, inode, inode->i_size + 1, TYPE_DIRECT, 4);// !!!! 1583 make_cpu_key(&tail_key, inode, inode->i_size + 1, TYPE_DIRECT, 4); // !!!!
1530 tail_key.key_length = 4; 1584 tail_key.key_length = 4;
1531 1585
1532 tail_len = (cpu_key_k_offset (&tail_key) & (inode->i_sb->s_blocksize - 1)) - 1; 1586 tail_len =
1533 while (tail_len) { 1587 (cpu_key_k_offset(&tail_key) & (inode->i_sb->s_blocksize - 1)) - 1;
1534 /* look for the last byte of the tail */ 1588 while (tail_len) {
1535 if (search_for_position_by_key (inode->i_sb, &tail_key, path) == POSITION_NOT_FOUND) 1589 /* look for the last byte of the tail */
1536 reiserfs_panic (inode->i_sb, "vs-5615: indirect_to_direct_roll_back: found invalid item"); 1590 if (search_for_position_by_key(inode->i_sb, &tail_key, path) ==
1537 RFALSE( path->pos_in_item != ih_item_len(PATH_PITEM_HEAD (path)) - 1, 1591 POSITION_NOT_FOUND)
1538 "vs-5616: appended bytes found"); 1592 reiserfs_panic(inode->i_sb,
1539 PATH_LAST_POSITION (path) --; 1593 "vs-5615: indirect_to_direct_roll_back: found invalid item");
1540 1594 RFALSE(path->pos_in_item !=
1541 removed = reiserfs_delete_item (th, path, &tail_key, inode, NULL/*unbh not needed*/); 1595 ih_item_len(PATH_PITEM_HEAD(path)) - 1,
1542 RFALSE( removed <= 0 || removed > tail_len, 1596 "vs-5616: appended bytes found");
1543 "vs-5617: there was tail %d bytes, removed item length %d bytes", 1597 PATH_LAST_POSITION(path)--;
1544 tail_len, removed); 1598
1545 tail_len -= removed; 1599 removed =
1546 set_cpu_key_k_offset (&tail_key, cpu_key_k_offset (&tail_key) - removed); 1600 reiserfs_delete_item(th, path, &tail_key, inode,
1547 } 1601 NULL /*unbh not needed */ );
1548 reiserfs_warning (inode->i_sb, "indirect_to_direct_roll_back: indirect_to_direct conversion has been rolled back due to lack of disk space"); 1602 RFALSE(removed <= 0
1549 //mark_file_without_tail (inode); 1603 || removed > tail_len,
1550 mark_inode_dirty (inode); 1604 "vs-5617: there was tail %d bytes, removed item length %d bytes",
1605 tail_len, removed);
1606 tail_len -= removed;
1607 set_cpu_key_k_offset(&tail_key,
1608 cpu_key_k_offset(&tail_key) - removed);
1609 }
1610 reiserfs_warning(inode->i_sb,
1611 "indirect_to_direct_roll_back: indirect_to_direct conversion has been rolled back due to lack of disk space");
1612 //mark_file_without_tail (inode);
1613 mark_inode_dirty(inode);
1551} 1614}
1552 1615
1553
1554/* (Truncate or cut entry) or delete object item. Returns < 0 on failure */ 1616/* (Truncate or cut entry) or delete object item. Returns < 0 on failure */
1555int reiserfs_cut_from_item (struct reiserfs_transaction_handle *th, 1617int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th,
1556 struct path * p_s_path, 1618 struct path *p_s_path,
1557 struct cpu_key * p_s_item_key, 1619 struct cpu_key *p_s_item_key,
1558 struct inode * p_s_inode, 1620 struct inode *p_s_inode,
1559 struct page *page, 1621 struct page *page, loff_t n_new_file_size)
1560 loff_t n_new_file_size)
1561{ 1622{
1562 struct super_block * p_s_sb = p_s_inode->i_sb; 1623 struct super_block *p_s_sb = p_s_inode->i_sb;
1563 /* Every function which is going to call do_balance must first 1624 /* Every function which is going to call do_balance must first
1564 create a tree_balance structure. Then it must fill up this 1625 create a tree_balance structure. Then it must fill up this
1565 structure by using the init_tb_struct and fix_nodes functions. 1626 structure by using the init_tb_struct and fix_nodes functions.
1566 After that we can make tree balancing. */ 1627 After that we can make tree balancing. */
1567 struct tree_balance s_cut_balance; 1628 struct tree_balance s_cut_balance;
1568 struct item_head *p_le_ih; 1629 struct item_head *p_le_ih;
1569 int n_cut_size = 0, /* Amount to be cut. */ 1630 int n_cut_size = 0, /* Amount to be cut. */
1570 n_ret_value = CARRY_ON, 1631 n_ret_value = CARRY_ON, n_removed = 0, /* Number of the removed unformatted nodes. */
1571 n_removed = 0, /* Number of the removed unformatted nodes. */ 1632 n_is_inode_locked = 0;
1572 n_is_inode_locked = 0; 1633 char c_mode; /* Mode of the balance. */
1573 char c_mode; /* Mode of the balance. */ 1634 int retval2 = -1;
1574 int retval2 = -1; 1635 int quota_cut_bytes;
1575 int quota_cut_bytes; 1636 loff_t tail_pos = 0;
1576 loff_t tail_pos = 0; 1637
1577 1638 BUG_ON(!th->t_trans_id);
1578 BUG_ON (!th->t_trans_id); 1639
1579 1640 init_tb_struct(th, &s_cut_balance, p_s_inode->i_sb, p_s_path,
1580 init_tb_struct(th, &s_cut_balance, p_s_inode->i_sb, p_s_path, n_cut_size); 1641 n_cut_size);
1581 1642
1582 1643 /* Repeat this loop until we either cut the item without needing
1583 /* Repeat this loop until we either cut the item without needing 1644 to balance, or we fix_nodes without schedule occurring */
1584 to balance, or we fix_nodes without schedule occurring */ 1645 while (1) {
1585 while ( 1 ) { 1646 /* Determine the balance mode, position of the first byte to
1586 /* Determine the balance mode, position of the first byte to 1647 be cut, and size to be cut. In case of the indirect item
1587 be cut, and size to be cut. In case of the indirect item 1648 free unformatted nodes which are pointed to by the cut
1588 free unformatted nodes which are pointed to by the cut 1649 pointers. */
1589 pointers. */ 1650
1590 1651 c_mode =
1591 c_mode = prepare_for_delete_or_cut(th, p_s_inode, p_s_path, p_s_item_key, &n_removed, 1652 prepare_for_delete_or_cut(th, p_s_inode, p_s_path,
1592 &n_cut_size, n_new_file_size); 1653 p_s_item_key, &n_removed,
1593 if ( c_mode == M_CONVERT ) { 1654 &n_cut_size, n_new_file_size);
1594 /* convert last unformatted node to direct item or leave 1655 if (c_mode == M_CONVERT) {
1595 tail in the unformatted node */ 1656 /* convert last unformatted node to direct item or leave
1596 RFALSE( n_ret_value != CARRY_ON, "PAP-5570: can not convert twice"); 1657 tail in the unformatted node */
1597 1658 RFALSE(n_ret_value != CARRY_ON,
1598 n_ret_value = maybe_indirect_to_direct (th, p_s_inode, page, p_s_path, p_s_item_key, 1659 "PAP-5570: can not convert twice");
1599 n_new_file_size, &c_mode); 1660
1600 if ( c_mode == M_SKIP_BALANCING ) 1661 n_ret_value =
1601 /* tail has been left in the unformatted node */ 1662 maybe_indirect_to_direct(th, p_s_inode, page,
1602 return n_ret_value; 1663 p_s_path, p_s_item_key,
1603 1664 n_new_file_size, &c_mode);
1604 n_is_inode_locked = 1; 1665 if (c_mode == M_SKIP_BALANCING)
1605 1666 /* tail has been left in the unformatted node */
1606 /* removing of last unformatted node will change value we 1667 return n_ret_value;
1607 have to return to truncate. Save it */ 1668
1608 retval2 = n_ret_value; 1669 n_is_inode_locked = 1;
1609 /*retval2 = p_s_sb->s_blocksize - (n_new_file_size & (p_s_sb->s_blocksize - 1));*/ 1670
1610 1671 /* removing of last unformatted node will change value we
1611 /* So, we have performed the first part of the conversion: 1672 have to return to truncate. Save it */
1612 inserting the new direct item. Now we are removing the 1673 retval2 = n_ret_value;
1613 last unformatted node pointer. Set key to search for 1674 /*retval2 = p_s_sb->s_blocksize - (n_new_file_size & (p_s_sb->s_blocksize - 1)); */
1614 it. */ 1675
1615 set_cpu_key_k_type (p_s_item_key, TYPE_INDIRECT); 1676 /* So, we have performed the first part of the conversion:
1616 p_s_item_key->key_length = 4; 1677 inserting the new direct item. Now we are removing the
1617 n_new_file_size -= (n_new_file_size & (p_s_sb->s_blocksize - 1)); 1678 last unformatted node pointer. Set key to search for
1618 tail_pos = n_new_file_size; 1679 it. */
1619 set_cpu_key_k_offset (p_s_item_key, n_new_file_size + 1); 1680 set_cpu_key_k_type(p_s_item_key, TYPE_INDIRECT);
1620 if ( search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path) == POSITION_NOT_FOUND ){ 1681 p_s_item_key->key_length = 4;
1621 print_block (PATH_PLAST_BUFFER (p_s_path), 3, PATH_LAST_POSITION (p_s_path) - 1, PATH_LAST_POSITION (p_s_path) + 1); 1682 n_new_file_size -=
1622 reiserfs_panic(p_s_sb, "PAP-5580: reiserfs_cut_from_item: item to convert does not exist (%K)", p_s_item_key); 1683 (n_new_file_size & (p_s_sb->s_blocksize - 1));
1623 } 1684 tail_pos = n_new_file_size;
1624 continue; 1685 set_cpu_key_k_offset(p_s_item_key, n_new_file_size + 1);
1625 } 1686 if (search_for_position_by_key
1626 if (n_cut_size == 0) { 1687 (p_s_sb, p_s_item_key,
1627 pathrelse (p_s_path); 1688 p_s_path) == POSITION_NOT_FOUND) {
1628 return 0; 1689 print_block(PATH_PLAST_BUFFER(p_s_path), 3,
1629 } 1690 PATH_LAST_POSITION(p_s_path) - 1,
1691 PATH_LAST_POSITION(p_s_path) + 1);
1692 reiserfs_panic(p_s_sb,
1693 "PAP-5580: reiserfs_cut_from_item: item to convert does not exist (%K)",
1694 p_s_item_key);
1695 }
1696 continue;
1697 }
1698 if (n_cut_size == 0) {
1699 pathrelse(p_s_path);
1700 return 0;
1701 }
1702
1703 s_cut_balance.insert_size[0] = n_cut_size;
1704
1705 n_ret_value = fix_nodes(c_mode, &s_cut_balance, NULL, NULL);
1706 if (n_ret_value != REPEAT_SEARCH)
1707 break;
1708
1709 PROC_INFO_INC(p_s_sb, cut_from_item_restarted);
1710
1711 n_ret_value =
1712 search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path);
1713 if (n_ret_value == POSITION_FOUND)
1714 continue;
1630 1715
1631 s_cut_balance.insert_size[0] = n_cut_size; 1716 reiserfs_warning(p_s_sb,
1632 1717 "PAP-5610: reiserfs_cut_from_item: item %K not found",
1633 n_ret_value = fix_nodes(c_mode, &s_cut_balance, NULL, NULL); 1718 p_s_item_key);
1634 if ( n_ret_value != REPEAT_SEARCH ) 1719 unfix_nodes(&s_cut_balance);
1635 break; 1720 return (n_ret_value == IO_ERROR) ? -EIO : -ENOENT;
1636 1721 } /* while */
1637 PROC_INFO_INC( p_s_sb, cut_from_item_restarted ); 1722
1638 1723 // check fix_nodes results (IO_ERROR or NO_DISK_SPACE)
1639 n_ret_value = search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path); 1724 if (n_ret_value != CARRY_ON) {
1640 if (n_ret_value == POSITION_FOUND) 1725 if (n_is_inode_locked) {
1641 continue; 1726 // FIXME: this seems to be not needed: we are always able
1642 1727 // to cut item
1643 reiserfs_warning (p_s_sb, "PAP-5610: reiserfs_cut_from_item: item %K not found", p_s_item_key); 1728 indirect_to_direct_roll_back(th, p_s_inode, p_s_path);
1644 unfix_nodes (&s_cut_balance); 1729 }
1645 return (n_ret_value == IO_ERROR) ? -EIO : -ENOENT; 1730 if (n_ret_value == NO_DISK_SPACE)
1646 } /* while */ 1731 reiserfs_warning(p_s_sb, "NO_DISK_SPACE");
1647 1732 unfix_nodes(&s_cut_balance);
1648 // check fix_nodes results (IO_ERROR or NO_DISK_SPACE) 1733 return -EIO;
1649 if ( n_ret_value != CARRY_ON ) {
1650 if ( n_is_inode_locked ) {
1651 // FIXME: this seems to be not needed: we are always able
1652 // to cut item
1653 indirect_to_direct_roll_back (th, p_s_inode, p_s_path);
1654 } 1734 }
1655 if (n_ret_value == NO_DISK_SPACE) 1735
1656 reiserfs_warning (p_s_sb, "NO_DISK_SPACE"); 1736 /* go ahead and perform balancing */
1657 unfix_nodes (&s_cut_balance); 1737
1658 return -EIO; 1738 RFALSE(c_mode == M_PASTE || c_mode == M_INSERT, "invalid mode");
1659 } 1739
1660 1740 /* Calculate number of bytes that need to be cut from the item. */
1661 /* go ahead and perform balancing */ 1741 quota_cut_bytes =
1662 1742 (c_mode ==
1663 RFALSE( c_mode == M_PASTE || c_mode == M_INSERT, "invalid mode"); 1743 M_DELETE) ? ih_item_len(get_ih(p_s_path)) : -s_cut_balance.
1664 1744 insert_size[0];
1665 /* Calculate number of bytes that need to be cut from the item. */ 1745 if (retval2 == -1)
1666 quota_cut_bytes = ( c_mode == M_DELETE ) ? ih_item_len(get_ih(p_s_path)) : -s_cut_balance.insert_size[0]; 1746 n_ret_value = calc_deleted_bytes_number(&s_cut_balance, c_mode);
1667 if (retval2 == -1) 1747 else
1668 n_ret_value = calc_deleted_bytes_number(&s_cut_balance, c_mode); 1748 n_ret_value = retval2;
1669 else 1749
1670 n_ret_value = retval2; 1750 /* For direct items, we only change the quota when deleting the last
1671 1751 ** item.
1672 1752 */
1673 /* For direct items, we only change the quota when deleting the last 1753 p_le_ih = PATH_PITEM_HEAD(s_cut_balance.tb_path);
1674 ** item. 1754 if (!S_ISLNK(p_s_inode->i_mode) && is_direct_le_ih(p_le_ih)) {
1675 */ 1755 if (c_mode == M_DELETE &&
1676 p_le_ih = PATH_PITEM_HEAD (s_cut_balance.tb_path); 1756 (le_ih_k_offset(p_le_ih) & (p_s_sb->s_blocksize - 1)) ==
1677 if (!S_ISLNK (p_s_inode->i_mode) && is_direct_le_ih(p_le_ih)) { 1757 1) {
1678 if (c_mode == M_DELETE && 1758 // FIXME: this is to keep 3.5 happy
1679 (le_ih_k_offset (p_le_ih) & (p_s_sb->s_blocksize - 1)) == 1 ) { 1759 REISERFS_I(p_s_inode)->i_first_direct_byte = U32_MAX;
1680 // FIXME: this is to keep 3.5 happy 1760 quota_cut_bytes = p_s_sb->s_blocksize + UNFM_P_SIZE;
1681 REISERFS_I(p_s_inode)->i_first_direct_byte = U32_MAX; 1761 } else {
1682 quota_cut_bytes = p_s_sb->s_blocksize + UNFM_P_SIZE ; 1762 quota_cut_bytes = 0;
1683 } else { 1763 }
1684 quota_cut_bytes = 0 ;
1685 } 1764 }
1686 }
1687#ifdef CONFIG_REISERFS_CHECK 1765#ifdef CONFIG_REISERFS_CHECK
1688 if (n_is_inode_locked) { 1766 if (n_is_inode_locked) {
1689 struct item_head * le_ih = PATH_PITEM_HEAD (s_cut_balance.tb_path); 1767 struct item_head *le_ih =
1690 /* we are going to complete indirect2direct conversion. Make 1768 PATH_PITEM_HEAD(s_cut_balance.tb_path);
1691 sure, that we exactly remove last unformatted node pointer 1769 /* we are going to complete indirect2direct conversion. Make
1692 of the item */ 1770 sure, that we exactly remove last unformatted node pointer
1693 if (!is_indirect_le_ih (le_ih)) 1771 of the item */
1694 reiserfs_panic (p_s_sb, "vs-5652: reiserfs_cut_from_item: " 1772 if (!is_indirect_le_ih(le_ih))
1695 "item must be indirect %h", le_ih); 1773 reiserfs_panic(p_s_sb,
1696 1774 "vs-5652: reiserfs_cut_from_item: "
1697 if (c_mode == M_DELETE && ih_item_len(le_ih) != UNFM_P_SIZE) 1775 "item must be indirect %h", le_ih);
1698 reiserfs_panic (p_s_sb, "vs-5653: reiserfs_cut_from_item: " 1776
1699 "completing indirect2direct conversion indirect item %h " 1777 if (c_mode == M_DELETE && ih_item_len(le_ih) != UNFM_P_SIZE)
1700 "being deleted must be of 4 byte long", le_ih); 1778 reiserfs_panic(p_s_sb,
1701 1779 "vs-5653: reiserfs_cut_from_item: "
1702 if (c_mode == M_CUT && s_cut_balance.insert_size[0] != -UNFM_P_SIZE) { 1780 "completing indirect2direct conversion indirect item %h "
1703 reiserfs_panic (p_s_sb, "vs-5654: reiserfs_cut_from_item: " 1781 "being deleted must be of 4 byte long",
1704 "can not complete indirect2direct conversion of %h (CUT, insert_size==%d)", 1782 le_ih);
1705 le_ih, s_cut_balance.insert_size[0]); 1783
1784 if (c_mode == M_CUT
1785 && s_cut_balance.insert_size[0] != -UNFM_P_SIZE) {
1786 reiserfs_panic(p_s_sb,
1787 "vs-5654: reiserfs_cut_from_item: "
1788 "can not complete indirect2direct conversion of %h (CUT, insert_size==%d)",
1789 le_ih, s_cut_balance.insert_size[0]);
1790 }
1791 /* it would be useful to make sure, that right neighboring
1792 item is direct item of this file */
1706 } 1793 }
1707 /* it would be useful to make sure, that right neighboring
1708 item is direct item of this file */
1709 }
1710#endif 1794#endif
1711 1795
1712 do_balance(&s_cut_balance, NULL, NULL, c_mode); 1796 do_balance(&s_cut_balance, NULL, NULL, c_mode);
1713 if ( n_is_inode_locked ) { 1797 if (n_is_inode_locked) {
1714 /* we've done an indirect->direct conversion. when the data block 1798 /* we've done an indirect->direct conversion. when the data block
1715 ** was freed, it was removed from the list of blocks that must 1799 ** was freed, it was removed from the list of blocks that must
1716 ** be flushed before the transaction commits, make sure to 1800 ** be flushed before the transaction commits, make sure to
1717 ** unmap and invalidate it 1801 ** unmap and invalidate it
1718 */ 1802 */
1719 unmap_buffers(page, tail_pos); 1803 unmap_buffers(page, tail_pos);
1720 REISERFS_I(p_s_inode)->i_flags &= ~i_pack_on_close_mask ; 1804 REISERFS_I(p_s_inode)->i_flags &= ~i_pack_on_close_mask;
1721 } 1805 }
1722#ifdef REISERQUOTA_DEBUG 1806#ifdef REISERQUOTA_DEBUG
1723 reiserfs_debug (p_s_inode->i_sb, REISERFS_DEBUG_CODE, "reiserquota cut_from_item(): freeing %u id=%u type=%c", quota_cut_bytes, p_s_inode->i_uid, '?'); 1807 reiserfs_debug(p_s_inode->i_sb, REISERFS_DEBUG_CODE,
1808 "reiserquota cut_from_item(): freeing %u id=%u type=%c",
1809 quota_cut_bytes, p_s_inode->i_uid, '?');
1724#endif 1810#endif
1725 DQUOT_FREE_SPACE_NODIRTY(p_s_inode, quota_cut_bytes); 1811 DQUOT_FREE_SPACE_NODIRTY(p_s_inode, quota_cut_bytes);
1726 return n_ret_value; 1812 return n_ret_value;
1727} 1813}
1728 1814
1729static void truncate_directory (struct reiserfs_transaction_handle *th, struct inode * inode) 1815static void truncate_directory(struct reiserfs_transaction_handle *th,
1816 struct inode *inode)
1730{ 1817{
1731 BUG_ON (!th->t_trans_id); 1818 BUG_ON(!th->t_trans_id);
1732 if (inode->i_nlink) 1819 if (inode->i_nlink)
1733 reiserfs_warning (inode->i_sb, 1820 reiserfs_warning(inode->i_sb,
1734 "vs-5655: truncate_directory: link count != 0"); 1821 "vs-5655: truncate_directory: link count != 0");
1735 1822
1736 set_le_key_k_offset (KEY_FORMAT_3_5, INODE_PKEY (inode), DOT_OFFSET); 1823 set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), DOT_OFFSET);
1737 set_le_key_k_type (KEY_FORMAT_3_5, INODE_PKEY (inode), TYPE_DIRENTRY); 1824 set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_DIRENTRY);
1738 reiserfs_delete_solid_item (th, inode, INODE_PKEY (inode)); 1825 reiserfs_delete_solid_item(th, inode, INODE_PKEY(inode));
1739 reiserfs_update_sd(th, inode) ; 1826 reiserfs_update_sd(th, inode);
1740 set_le_key_k_offset (KEY_FORMAT_3_5, INODE_PKEY (inode), SD_OFFSET); 1827 set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), SD_OFFSET);
1741 set_le_key_k_type (KEY_FORMAT_3_5, INODE_PKEY (inode), TYPE_STAT_DATA); 1828 set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_STAT_DATA);
1742} 1829}
1743 1830
1831/* Truncate file to the new size. Note, this must be called with a transaction
1832 already started */
1833int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, struct inode *p_s_inode, /* ->i_size contains new
1834 size */
1835 struct page *page, /* up to date for last block */
1836 int update_timestamps /* when it is called by
1837 file_release to convert
1838 the tail - no timestamps
1839 should be updated */
1840 )
1841{
1842 INITIALIZE_PATH(s_search_path); /* Path to the current object item. */
1843 struct item_head *p_le_ih; /* Pointer to an item header. */
1844 struct cpu_key s_item_key; /* Key to search for a previous file item. */
1845 loff_t n_file_size, /* Old file size. */
1846 n_new_file_size; /* New file size. */
1847 int n_deleted; /* Number of deleted or truncated bytes. */
1848 int retval;
1849 int err = 0;
1850
1851 BUG_ON(!th->t_trans_id);
1852 if (!
1853 (S_ISREG(p_s_inode->i_mode) || S_ISDIR(p_s_inode->i_mode)
1854 || S_ISLNK(p_s_inode->i_mode)))
1855 return 0;
1856
1857 if (S_ISDIR(p_s_inode->i_mode)) {
1858 // deletion of directory - no need to update timestamps
1859 truncate_directory(th, p_s_inode);
1860 return 0;
1861 }
1744 1862
1863 /* Get new file size. */
1864 n_new_file_size = p_s_inode->i_size;
1745 1865
1866 // FIXME: note, that key type is unimportant here
1867 make_cpu_key(&s_item_key, p_s_inode, max_reiserfs_offset(p_s_inode),
1868 TYPE_DIRECT, 3);
1746 1869
1747/* Truncate file to the new size. Note, this must be called with a transaction 1870 retval =
1748 already started */ 1871 search_for_position_by_key(p_s_inode->i_sb, &s_item_key,
1749int reiserfs_do_truncate (struct reiserfs_transaction_handle *th, 1872 &s_search_path);
1750 struct inode * p_s_inode, /* ->i_size contains new 1873 if (retval == IO_ERROR) {
1751 size */ 1874 reiserfs_warning(p_s_inode->i_sb,
1752 struct page *page, /* up to date for last block */ 1875 "vs-5657: reiserfs_do_truncate: "
1753 int update_timestamps /* when it is called by 1876 "i/o failure occurred trying to truncate %K",
1754 file_release to convert 1877 &s_item_key);
1755 the tail - no timestamps 1878 err = -EIO;
1756 should be updated */ 1879 goto out;
1757 ) { 1880 }
1758 INITIALIZE_PATH (s_search_path); /* Path to the current object item. */ 1881 if (retval == POSITION_FOUND || retval == FILE_NOT_FOUND) {
1759 struct item_head * p_le_ih; /* Pointer to an item header. */ 1882 reiserfs_warning(p_s_inode->i_sb,
1760 struct cpu_key s_item_key; /* Key to search for a previous file item. */ 1883 "PAP-5660: reiserfs_do_truncate: "
1761 loff_t n_file_size, /* Old file size. */ 1884 "wrong result %d of search for %K", retval,
1762 n_new_file_size;/* New file size. */ 1885 &s_item_key);
1763 int n_deleted; /* Number of deleted or truncated bytes. */ 1886
1764 int retval; 1887 err = -EIO;
1765 int err = 0; 1888 goto out;
1766 1889 }
1767 BUG_ON (!th->t_trans_id);
1768 if ( ! (S_ISREG(p_s_inode->i_mode) || S_ISDIR(p_s_inode->i_mode) || S_ISLNK(p_s_inode->i_mode)) )
1769 return 0;
1770 1890
1771 if (S_ISDIR(p_s_inode->i_mode)) { 1891 s_search_path.pos_in_item--;
1772 // deletion of directory - no need to update timestamps 1892
1773 truncate_directory (th, p_s_inode); 1893 /* Get real file size (total length of all file items) */
1774 return 0; 1894 p_le_ih = PATH_PITEM_HEAD(&s_search_path);
1775 } 1895 if (is_statdata_le_ih(p_le_ih))
1776 1896 n_file_size = 0;
1777 /* Get new file size. */ 1897 else {
1778 n_new_file_size = p_s_inode->i_size; 1898 loff_t offset = le_ih_k_offset(p_le_ih);
1779 1899 int bytes =
1780 // FIXME: note, that key type is unimportant here 1900 op_bytes_number(p_le_ih, p_s_inode->i_sb->s_blocksize);
1781 make_cpu_key (&s_item_key, p_s_inode, max_reiserfs_offset (p_s_inode), TYPE_DIRECT, 3); 1901
1782 1902 /* this may mismatch with real file size: if last direct item
1783 retval = search_for_position_by_key(p_s_inode->i_sb, &s_item_key, &s_search_path); 1903 had no padding zeros and last unformatted node had no free
1784 if (retval == IO_ERROR) { 1904 space, this file would have this file size */
1785 reiserfs_warning (p_s_inode->i_sb, "vs-5657: reiserfs_do_truncate: " 1905 n_file_size = offset + bytes - 1;
1786 "i/o failure occurred trying to truncate %K", &s_item_key); 1906 }
1787 err = -EIO; 1907 /*
1788 goto out; 1908 * are we doing a full truncate or delete, if so
1789 } 1909 * kick in the reada code
1790 if (retval == POSITION_FOUND || retval == FILE_NOT_FOUND) { 1910 */
1791 reiserfs_warning (p_s_inode->i_sb, "PAP-5660: reiserfs_do_truncate: " 1911 if (n_new_file_size == 0)
1792 "wrong result %d of search for %K", retval, &s_item_key); 1912 s_search_path.reada = PATH_READA | PATH_READA_BACK;
1793 1913
1794 err = -EIO; 1914 if (n_file_size == 0 || n_file_size < n_new_file_size) {
1795 goto out; 1915 goto update_and_out;
1796 }
1797
1798 s_search_path.pos_in_item --;
1799
1800 /* Get real file size (total length of all file items) */
1801 p_le_ih = PATH_PITEM_HEAD(&s_search_path);
1802 if ( is_statdata_le_ih (p_le_ih) )
1803 n_file_size = 0;
1804 else {
1805 loff_t offset = le_ih_k_offset (p_le_ih);
1806 int bytes = op_bytes_number (p_le_ih,p_s_inode->i_sb->s_blocksize);
1807
1808 /* this may mismatch with real file size: if last direct item
1809 had no padding zeros and last unformatted node had no free
1810 space, this file would have this file size */
1811 n_file_size = offset + bytes - 1;
1812 }
1813 /*
1814 * are we doing a full truncate or delete, if so
1815 * kick in the reada code
1816 */
1817 if (n_new_file_size == 0)
1818 s_search_path.reada = PATH_READA | PATH_READA_BACK;
1819
1820 if ( n_file_size == 0 || n_file_size < n_new_file_size ) {
1821 goto update_and_out ;
1822 }
1823
1824 /* Update key to search for the last file item. */
1825 set_cpu_key_k_offset (&s_item_key, n_file_size);
1826
1827 do {
1828 /* Cut or delete file item. */
1829 n_deleted = reiserfs_cut_from_item(th, &s_search_path, &s_item_key, p_s_inode, page, n_new_file_size);
1830 if (n_deleted < 0) {
1831 reiserfs_warning (p_s_inode->i_sb, "vs-5665: reiserfs_do_truncate: reiserfs_cut_from_item failed");
1832 reiserfs_check_path(&s_search_path) ;
1833 return 0;
1834 } 1916 }
1835 1917
1836 RFALSE( n_deleted > n_file_size, 1918 /* Update key to search for the last file item. */
1837 "PAP-5670: reiserfs_cut_from_item: too many bytes deleted: deleted %d, file_size %lu, item_key %K", 1919 set_cpu_key_k_offset(&s_item_key, n_file_size);
1838 n_deleted, n_file_size, &s_item_key); 1920
1921 do {
1922 /* Cut or delete file item. */
1923 n_deleted =
1924 reiserfs_cut_from_item(th, &s_search_path, &s_item_key,
1925 p_s_inode, page, n_new_file_size);
1926 if (n_deleted < 0) {
1927 reiserfs_warning(p_s_inode->i_sb,
1928 "vs-5665: reiserfs_do_truncate: reiserfs_cut_from_item failed");
1929 reiserfs_check_path(&s_search_path);
1930 return 0;
1931 }
1839 1932
1840 /* Change key to search the last file item. */ 1933 RFALSE(n_deleted > n_file_size,
1841 n_file_size -= n_deleted; 1934 "PAP-5670: reiserfs_cut_from_item: too many bytes deleted: deleted %d, file_size %lu, item_key %K",
1935 n_deleted, n_file_size, &s_item_key);
1842 1936
1843 set_cpu_key_k_offset (&s_item_key, n_file_size); 1937 /* Change key to search the last file item. */
1938 n_file_size -= n_deleted;
1844 1939
1845 /* While there are bytes to truncate and previous file item is presented in the tree. */ 1940 set_cpu_key_k_offset(&s_item_key, n_file_size);
1846 1941
1847 /* 1942 /* While there are bytes to truncate and previous file item is presented in the tree. */
1848 ** This loop could take a really long time, and could log 1943
1849 ** many more blocks than a transaction can hold. So, we do a polite 1944 /*
1850 ** journal end here, and if the transaction needs ending, we make 1945 ** This loop could take a really long time, and could log
1851 ** sure the file is consistent before ending the current trans 1946 ** many more blocks than a transaction can hold. So, we do a polite
1852 ** and starting a new one 1947 ** journal end here, and if the transaction needs ending, we make
1853 */ 1948 ** sure the file is consistent before ending the current trans
1854 if (journal_transaction_should_end(th, th->t_blocks_allocated)) { 1949 ** and starting a new one
1855 int orig_len_alloc = th->t_blocks_allocated ; 1950 */
1856 decrement_counters_in_path(&s_search_path) ; 1951 if (journal_transaction_should_end(th, th->t_blocks_allocated)) {
1857 1952 int orig_len_alloc = th->t_blocks_allocated;
1858 if (update_timestamps) { 1953 decrement_counters_in_path(&s_search_path);
1859 p_s_inode->i_mtime = p_s_inode->i_ctime = CURRENT_TIME_SEC; 1954
1860 } 1955 if (update_timestamps) {
1861 reiserfs_update_sd(th, p_s_inode) ; 1956 p_s_inode->i_mtime = p_s_inode->i_ctime =
1862 1957 CURRENT_TIME_SEC;
1863 err = journal_end(th, p_s_inode->i_sb, orig_len_alloc) ; 1958 }
1864 if (err) 1959 reiserfs_update_sd(th, p_s_inode);
1865 goto out; 1960
1866 err = journal_begin (th, p_s_inode->i_sb, 1961 err = journal_end(th, p_s_inode->i_sb, orig_len_alloc);
1867 JOURNAL_PER_BALANCE_CNT * 6); 1962 if (err)
1868 if (err) 1963 goto out;
1869 goto out; 1964 err = journal_begin(th, p_s_inode->i_sb,
1870 reiserfs_update_inode_transaction(p_s_inode) ; 1965 JOURNAL_PER_BALANCE_CNT * 6);
1966 if (err)
1967 goto out;
1968 reiserfs_update_inode_transaction(p_s_inode);
1969 }
1970 } while (n_file_size > ROUND_UP(n_new_file_size) &&
1971 search_for_position_by_key(p_s_inode->i_sb, &s_item_key,
1972 &s_search_path) == POSITION_FOUND);
1973
1974 RFALSE(n_file_size > ROUND_UP(n_new_file_size),
1975 "PAP-5680: truncate did not finish: new_file_size %Ld, current %Ld, oid %d",
1976 n_new_file_size, n_file_size, s_item_key.on_disk_key.k_objectid);
1977
1978 update_and_out:
1979 if (update_timestamps) {
1980 // this is truncate, not file closing
1981 p_s_inode->i_mtime = p_s_inode->i_ctime = CURRENT_TIME_SEC;
1871 } 1982 }
1872 } while ( n_file_size > ROUND_UP (n_new_file_size) && 1983 reiserfs_update_sd(th, p_s_inode);
1873 search_for_position_by_key(p_s_inode->i_sb, &s_item_key, &s_search_path) == POSITION_FOUND ) ;
1874
1875 RFALSE( n_file_size > ROUND_UP (n_new_file_size),
1876 "PAP-5680: truncate did not finish: new_file_size %Ld, current %Ld, oid %d",
1877 n_new_file_size, n_file_size, s_item_key.on_disk_key.k_objectid);
1878
1879update_and_out:
1880 if (update_timestamps) {
1881 // this is truncate, not file closing
1882 p_s_inode->i_mtime = p_s_inode->i_ctime = CURRENT_TIME_SEC;
1883 }
1884 reiserfs_update_sd (th, p_s_inode);
1885
1886out:
1887 pathrelse(&s_search_path) ;
1888 return err;
1889}
1890 1984
1985 out:
1986 pathrelse(&s_search_path);
1987 return err;
1988}
1891 1989
1892#ifdef CONFIG_REISERFS_CHECK 1990#ifdef CONFIG_REISERFS_CHECK
1893// this makes sure, that we __append__, not overwrite or add holes 1991// this makes sure, that we __append__, not overwrite or add holes
1894static void check_research_for_paste (struct path * path, 1992static void check_research_for_paste(struct path *path,
1895 const struct cpu_key * p_s_key) 1993 const struct cpu_key *p_s_key)
1896{ 1994{
1897 struct item_head * found_ih = get_ih (path); 1995 struct item_head *found_ih = get_ih(path);
1898 1996
1899 if (is_direct_le_ih (found_ih)) { 1997 if (is_direct_le_ih(found_ih)) {
1900 if (le_ih_k_offset (found_ih) + op_bytes_number (found_ih, get_last_bh (path)->b_size) != 1998 if (le_ih_k_offset(found_ih) +
1901 cpu_key_k_offset (p_s_key) || 1999 op_bytes_number(found_ih,
1902 op_bytes_number (found_ih, get_last_bh (path)->b_size) != pos_in_item (path)) 2000 get_last_bh(path)->b_size) !=
1903 reiserfs_panic (NULL, "PAP-5720: check_research_for_paste: " 2001 cpu_key_k_offset(p_s_key)
1904 "found direct item %h or position (%d) does not match to key %K", 2002 || op_bytes_number(found_ih,
1905 found_ih, pos_in_item (path), p_s_key); 2003 get_last_bh(path)->b_size) !=
1906 } 2004 pos_in_item(path))
1907 if (is_indirect_le_ih (found_ih)) { 2005 reiserfs_panic(NULL,
1908 if (le_ih_k_offset (found_ih) + op_bytes_number (found_ih, get_last_bh (path)->b_size) != cpu_key_k_offset (p_s_key) || 2006 "PAP-5720: check_research_for_paste: "
1909 I_UNFM_NUM (found_ih) != pos_in_item (path) || 2007 "found direct item %h or position (%d) does not match to key %K",
1910 get_ih_free_space (found_ih) != 0) 2008 found_ih, pos_in_item(path), p_s_key);
1911 reiserfs_panic (NULL, "PAP-5730: check_research_for_paste: " 2009 }
1912 "found indirect item (%h) or position (%d) does not match to key (%K)", 2010 if (is_indirect_le_ih(found_ih)) {
1913 found_ih, pos_in_item (path), p_s_key); 2011 if (le_ih_k_offset(found_ih) +
1914 } 2012 op_bytes_number(found_ih,
2013 get_last_bh(path)->b_size) !=
2014 cpu_key_k_offset(p_s_key)
2015 || I_UNFM_NUM(found_ih) != pos_in_item(path)
2016 || get_ih_free_space(found_ih) != 0)
2017 reiserfs_panic(NULL,
2018 "PAP-5730: check_research_for_paste: "
2019 "found indirect item (%h) or position (%d) does not match to key (%K)",
2020 found_ih, pos_in_item(path), p_s_key);
2021 }
1915} 2022}
1916#endif /* config reiserfs check */ 2023#endif /* config reiserfs check */
1917
1918 2024
1919/* Paste bytes to the existing item. Returns bytes number pasted into the item. */ 2025/* Paste bytes to the existing item. Returns bytes number pasted into the item. */
1920int reiserfs_paste_into_item (struct reiserfs_transaction_handle *th, 2026int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct path *p_s_search_path, /* Path to the pasted item. */
1921 struct path * p_s_search_path, /* Path to the pasted item. */ 2027 const struct cpu_key *p_s_key, /* Key to search for the needed item. */
1922 const struct cpu_key * p_s_key, /* Key to search for the needed item.*/ 2028 struct inode *inode, /* Inode item belongs to */
1923 struct inode * inode, /* Inode item belongs to */ 2029 const char *p_c_body, /* Pointer to the bytes to paste. */
1924 const char * p_c_body, /* Pointer to the bytes to paste. */ 2030 int n_pasted_size)
1925 int n_pasted_size) /* Size of pasted bytes. */ 2031{ /* Size of pasted bytes. */
1926{ 2032 struct tree_balance s_paste_balance;
1927 struct tree_balance s_paste_balance; 2033 int retval;
1928 int retval; 2034 int fs_gen;
1929 int fs_gen; 2035
2036 BUG_ON(!th->t_trans_id);
1930 2037
1931 BUG_ON (!th->t_trans_id); 2038 fs_gen = get_generation(inode->i_sb);
1932
1933 fs_gen = get_generation(inode->i_sb) ;
1934 2039
1935#ifdef REISERQUOTA_DEBUG 2040#ifdef REISERQUOTA_DEBUG
1936 reiserfs_debug (inode->i_sb, REISERFS_DEBUG_CODE, "reiserquota paste_into_item(): allocating %u id=%u type=%c", n_pasted_size, inode->i_uid, key2type(&(p_s_key->on_disk_key))); 2041 reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
2042 "reiserquota paste_into_item(): allocating %u id=%u type=%c",
2043 n_pasted_size, inode->i_uid,
2044 key2type(&(p_s_key->on_disk_key)));
1937#endif 2045#endif
1938 2046
1939 if (DQUOT_ALLOC_SPACE_NODIRTY(inode, n_pasted_size)) { 2047 if (DQUOT_ALLOC_SPACE_NODIRTY(inode, n_pasted_size)) {
1940 pathrelse(p_s_search_path); 2048 pathrelse(p_s_search_path);
1941 return -EDQUOT; 2049 return -EDQUOT;
1942 } 2050 }
1943 init_tb_struct(th, &s_paste_balance, th->t_super, p_s_search_path, n_pasted_size); 2051 init_tb_struct(th, &s_paste_balance, th->t_super, p_s_search_path,
2052 n_pasted_size);
1944#ifdef DISPLACE_NEW_PACKING_LOCALITIES 2053#ifdef DISPLACE_NEW_PACKING_LOCALITIES
1945 s_paste_balance.key = p_s_key->on_disk_key; 2054 s_paste_balance.key = p_s_key->on_disk_key;
1946#endif 2055#endif
1947 2056
1948 /* DQUOT_* can schedule, must check before the fix_nodes */ 2057 /* DQUOT_* can schedule, must check before the fix_nodes */
1949 if (fs_changed(fs_gen, inode->i_sb)) { 2058 if (fs_changed(fs_gen, inode->i_sb)) {
1950 goto search_again; 2059 goto search_again;
1951 }
1952
1953 while ((retval = fix_nodes(M_PASTE, &s_paste_balance, NULL, p_c_body)) ==
1954REPEAT_SEARCH ) {
1955search_again:
1956 /* file system changed while we were in the fix_nodes */
1957 PROC_INFO_INC( th -> t_super, paste_into_item_restarted );
1958 retval = search_for_position_by_key (th->t_super, p_s_key, p_s_search_path);
1959 if (retval == IO_ERROR) {
1960 retval = -EIO ;
1961 goto error_out ;
1962 } 2060 }
1963 if (retval == POSITION_FOUND) { 2061
1964 reiserfs_warning (inode->i_sb, "PAP-5710: reiserfs_paste_into_item: entry or pasted byte (%K) exists", p_s_key); 2062 while ((retval =
1965 retval = -EEXIST ; 2063 fix_nodes(M_PASTE, &s_paste_balance, NULL,
1966 goto error_out ; 2064 p_c_body)) == REPEAT_SEARCH) {
1967 } 2065 search_again:
1968 2066 /* file system changed while we were in the fix_nodes */
2067 PROC_INFO_INC(th->t_super, paste_into_item_restarted);
2068 retval =
2069 search_for_position_by_key(th->t_super, p_s_key,
2070 p_s_search_path);
2071 if (retval == IO_ERROR) {
2072 retval = -EIO;
2073 goto error_out;
2074 }
2075 if (retval == POSITION_FOUND) {
2076 reiserfs_warning(inode->i_sb,
2077 "PAP-5710: reiserfs_paste_into_item: entry or pasted byte (%K) exists",
2078 p_s_key);
2079 retval = -EEXIST;
2080 goto error_out;
2081 }
1969#ifdef CONFIG_REISERFS_CHECK 2082#ifdef CONFIG_REISERFS_CHECK
1970 check_research_for_paste (p_s_search_path, p_s_key); 2083 check_research_for_paste(p_s_search_path, p_s_key);
1971#endif 2084#endif
1972 } 2085 }
1973 2086
1974 /* Perform balancing after all resources are collected by fix_nodes, and 2087 /* Perform balancing after all resources are collected by fix_nodes, and
1975 accessing them will not risk triggering schedule. */ 2088 accessing them will not risk triggering schedule. */
1976 if ( retval == CARRY_ON ) { 2089 if (retval == CARRY_ON) {
1977 do_balance(&s_paste_balance, NULL/*ih*/, p_c_body, M_PASTE); 2090 do_balance(&s_paste_balance, NULL /*ih */ , p_c_body, M_PASTE);
1978 return 0; 2091 return 0;
1979 } 2092 }
1980 retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO; 2093 retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
1981error_out: 2094 error_out:
1982 /* this also releases the path */ 2095 /* this also releases the path */
1983 unfix_nodes(&s_paste_balance); 2096 unfix_nodes(&s_paste_balance);
1984#ifdef REISERQUOTA_DEBUG 2097#ifdef REISERQUOTA_DEBUG
1985 reiserfs_debug (inode->i_sb, REISERFS_DEBUG_CODE, "reiserquota paste_into_item(): freeing %u id=%u type=%c", n_pasted_size, inode->i_uid, key2type(&(p_s_key->on_disk_key))); 2098 reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
2099 "reiserquota paste_into_item(): freeing %u id=%u type=%c",
2100 n_pasted_size, inode->i_uid,
2101 key2type(&(p_s_key->on_disk_key)));
1986#endif 2102#endif
1987 DQUOT_FREE_SPACE_NODIRTY(inode, n_pasted_size); 2103 DQUOT_FREE_SPACE_NODIRTY(inode, n_pasted_size);
1988 return retval ; 2104 return retval;
1989} 2105}
1990 2106
1991
1992/* Insert new item into the buffer at the path. */ 2107/* Insert new item into the buffer at the path. */
1993int reiserfs_insert_item(struct reiserfs_transaction_handle *th, 2108int reiserfs_insert_item(struct reiserfs_transaction_handle *th, struct path *p_s_path, /* Path to the inserteded item. */
1994 struct path * p_s_path, /* Path to the inserteded item. */ 2109 const struct cpu_key *key, struct item_head *p_s_ih, /* Pointer to the item header to insert. */
1995 const struct cpu_key * key, 2110 struct inode *inode, const char *p_c_body)
1996 struct item_head * p_s_ih, /* Pointer to the item header to insert.*/ 2111{ /* Pointer to the bytes to insert. */
1997 struct inode * inode, 2112 struct tree_balance s_ins_balance;
1998 const char * p_c_body) /* Pointer to the bytes to insert. */ 2113 int retval;
1999{ 2114 int fs_gen = 0;
2000 struct tree_balance s_ins_balance; 2115 int quota_bytes = 0;
2001 int retval; 2116
2002 int fs_gen = 0 ; 2117 BUG_ON(!th->t_trans_id);
2003 int quota_bytes = 0 ; 2118
2004 2119 if (inode) { /* Do we count quotas for item? */
2005 BUG_ON (!th->t_trans_id); 2120 fs_gen = get_generation(inode->i_sb);
2006 2121 quota_bytes = ih_item_len(p_s_ih);
2007 if (inode) { /* Do we count quotas for item? */ 2122
2008 fs_gen = get_generation(inode->i_sb); 2123 /* hack so the quota code doesn't have to guess if the file has
2009 quota_bytes = ih_item_len(p_s_ih); 2124 ** a tail, links are always tails, so there's no guessing needed
2010 2125 */
2011 /* hack so the quota code doesn't have to guess if the file has 2126 if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(p_s_ih)) {
2012 ** a tail, links are always tails, so there's no guessing needed 2127 quota_bytes = inode->i_sb->s_blocksize + UNFM_P_SIZE;
2013 */ 2128 }
2014 if (!S_ISLNK (inode->i_mode) && is_direct_le_ih(p_s_ih)) {
2015 quota_bytes = inode->i_sb->s_blocksize + UNFM_P_SIZE ;
2016 }
2017#ifdef REISERQUOTA_DEBUG 2129#ifdef REISERQUOTA_DEBUG
2018 reiserfs_debug (inode->i_sb, REISERFS_DEBUG_CODE, "reiserquota insert_item(): allocating %u id=%u type=%c", quota_bytes, inode->i_uid, head2type(p_s_ih)); 2130 reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
2131 "reiserquota insert_item(): allocating %u id=%u type=%c",
2132 quota_bytes, inode->i_uid, head2type(p_s_ih));
2019#endif 2133#endif
2020 /* We can't dirty inode here. It would be immediately written but 2134 /* We can't dirty inode here. It would be immediately written but
2021 * appropriate stat item isn't inserted yet... */ 2135 * appropriate stat item isn't inserted yet... */
2022 if (DQUOT_ALLOC_SPACE_NODIRTY(inode, quota_bytes)) { 2136 if (DQUOT_ALLOC_SPACE_NODIRTY(inode, quota_bytes)) {
2023 pathrelse(p_s_path); 2137 pathrelse(p_s_path);
2024 return -EDQUOT; 2138 return -EDQUOT;
2139 }
2025 } 2140 }
2026 } 2141 init_tb_struct(th, &s_ins_balance, th->t_super, p_s_path,
2027 init_tb_struct(th, &s_ins_balance, th->t_super, p_s_path, IH_SIZE + ih_item_len(p_s_ih)); 2142 IH_SIZE + ih_item_len(p_s_ih));
2028#ifdef DISPLACE_NEW_PACKING_LOCALITIES 2143#ifdef DISPLACE_NEW_PACKING_LOCALITIES
2029 s_ins_balance.key = key->on_disk_key; 2144 s_ins_balance.key = key->on_disk_key;
2030#endif 2145#endif
2031 /* DQUOT_* can schedule, must check to be sure calling fix_nodes is safe */ 2146 /* DQUOT_* can schedule, must check to be sure calling fix_nodes is safe */
2032 if (inode && fs_changed(fs_gen, inode->i_sb)) { 2147 if (inode && fs_changed(fs_gen, inode->i_sb)) {
2033 goto search_again; 2148 goto search_again;
2034 }
2035
2036 while ( (retval = fix_nodes(M_INSERT, &s_ins_balance, p_s_ih, p_c_body)) == REPEAT_SEARCH) {
2037search_again:
2038 /* file system changed while we were in the fix_nodes */
2039 PROC_INFO_INC( th -> t_super, insert_item_restarted );
2040 retval = search_item (th->t_super, key, p_s_path);
2041 if (retval == IO_ERROR) {
2042 retval = -EIO;
2043 goto error_out ;
2044 } 2149 }
2045 if (retval == ITEM_FOUND) { 2150
2046 reiserfs_warning (th->t_super, "PAP-5760: reiserfs_insert_item: " 2151 while ((retval =
2047 "key %K already exists in the tree", key); 2152 fix_nodes(M_INSERT, &s_ins_balance, p_s_ih,
2048 retval = -EEXIST ; 2153 p_c_body)) == REPEAT_SEARCH) {
2049 goto error_out; 2154 search_again:
2155 /* file system changed while we were in the fix_nodes */
2156 PROC_INFO_INC(th->t_super, insert_item_restarted);
2157 retval = search_item(th->t_super, key, p_s_path);
2158 if (retval == IO_ERROR) {
2159 retval = -EIO;
2160 goto error_out;
2161 }
2162 if (retval == ITEM_FOUND) {
2163 reiserfs_warning(th->t_super,
2164 "PAP-5760: reiserfs_insert_item: "
2165 "key %K already exists in the tree",
2166 key);
2167 retval = -EEXIST;
2168 goto error_out;
2169 }
2050 } 2170 }
2051 }
2052 2171
2053 /* make balancing after all resources will be collected at a time */ 2172 /* make balancing after all resources will be collected at a time */
2054 if ( retval == CARRY_ON ) { 2173 if (retval == CARRY_ON) {
2055 do_balance (&s_ins_balance, p_s_ih, p_c_body, M_INSERT); 2174 do_balance(&s_ins_balance, p_s_ih, p_c_body, M_INSERT);
2056 return 0; 2175 return 0;
2057 } 2176 }
2058 2177
2059 retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO; 2178 retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
2060error_out: 2179 error_out:
2061 /* also releases the path */ 2180 /* also releases the path */
2062 unfix_nodes(&s_ins_balance); 2181 unfix_nodes(&s_ins_balance);
2063#ifdef REISERQUOTA_DEBUG 2182#ifdef REISERQUOTA_DEBUG
2064 reiserfs_debug (th->t_super, REISERFS_DEBUG_CODE, "reiserquota insert_item(): freeing %u id=%u type=%c", quota_bytes, inode->i_uid, head2type(p_s_ih)); 2183 reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE,
2184 "reiserquota insert_item(): freeing %u id=%u type=%c",
2185 quota_bytes, inode->i_uid, head2type(p_s_ih));
2065#endif 2186#endif
2066 if (inode) 2187 if (inode)
2067 DQUOT_FREE_SPACE_NODIRTY(inode, quota_bytes) ; 2188 DQUOT_FREE_SPACE_NODIRTY(inode, quota_bytes);
2068 return retval; 2189 return retval;
2069} 2190}
2070
2071
2072
2073
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index b35b87744983..6951c35755be 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -35,83 +35,81 @@ static const char reiserfs_3_5_magic_string[] = REISERFS_SUPER_MAGIC_STRING;
35static const char reiserfs_3_6_magic_string[] = REISER2FS_SUPER_MAGIC_STRING; 35static const char reiserfs_3_6_magic_string[] = REISER2FS_SUPER_MAGIC_STRING;
36static const char reiserfs_jr_magic_string[] = REISER2FS_JR_SUPER_MAGIC_STRING; 36static const char reiserfs_jr_magic_string[] = REISER2FS_JR_SUPER_MAGIC_STRING;
37 37
38int is_reiserfs_3_5 (struct reiserfs_super_block * rs) 38int is_reiserfs_3_5(struct reiserfs_super_block *rs)
39{ 39{
40 return !strncmp (rs->s_v1.s_magic, reiserfs_3_5_magic_string, 40 return !strncmp(rs->s_v1.s_magic, reiserfs_3_5_magic_string,
41 strlen (reiserfs_3_5_magic_string)); 41 strlen(reiserfs_3_5_magic_string));
42} 42}
43 43
44 44int is_reiserfs_3_6(struct reiserfs_super_block *rs)
45int is_reiserfs_3_6 (struct reiserfs_super_block * rs)
46{ 45{
47 return !strncmp (rs->s_v1.s_magic, reiserfs_3_6_magic_string, 46 return !strncmp(rs->s_v1.s_magic, reiserfs_3_6_magic_string,
48 strlen (reiserfs_3_6_magic_string)); 47 strlen(reiserfs_3_6_magic_string));
49} 48}
50 49
51 50int is_reiserfs_jr(struct reiserfs_super_block *rs)
52int is_reiserfs_jr (struct reiserfs_super_block * rs)
53{ 51{
54 return !strncmp (rs->s_v1.s_magic, reiserfs_jr_magic_string, 52 return !strncmp(rs->s_v1.s_magic, reiserfs_jr_magic_string,
55 strlen (reiserfs_jr_magic_string)); 53 strlen(reiserfs_jr_magic_string));
56} 54}
57 55
58 56static int is_any_reiserfs_magic_string(struct reiserfs_super_block *rs)
59static int is_any_reiserfs_magic_string (struct reiserfs_super_block * rs)
60{ 57{
61 return (is_reiserfs_3_5 (rs) || is_reiserfs_3_6 (rs) || 58 return (is_reiserfs_3_5(rs) || is_reiserfs_3_6(rs) ||
62 is_reiserfs_jr (rs)); 59 is_reiserfs_jr(rs));
63} 60}
64 61
65static int reiserfs_remount (struct super_block * s, int * flags, char * data); 62static int reiserfs_remount(struct super_block *s, int *flags, char *data);
66static int reiserfs_statfs (struct super_block * s, struct kstatfs * buf); 63static int reiserfs_statfs(struct super_block *s, struct kstatfs *buf);
67 64
68static int reiserfs_sync_fs (struct super_block * s, int wait) 65static int reiserfs_sync_fs(struct super_block *s, int wait)
69{ 66{
70 if (!(s->s_flags & MS_RDONLY)) { 67 if (!(s->s_flags & MS_RDONLY)) {
71 struct reiserfs_transaction_handle th; 68 struct reiserfs_transaction_handle th;
72 reiserfs_write_lock(s); 69 reiserfs_write_lock(s);
73 if (!journal_begin(&th, s, 1)) 70 if (!journal_begin(&th, s, 1))
74 if (!journal_end_sync(&th, s, 1)) 71 if (!journal_end_sync(&th, s, 1))
75 reiserfs_flush_old_commits(s); 72 reiserfs_flush_old_commits(s);
76 s->s_dirt = 0; /* Even if it's not true. 73 s->s_dirt = 0; /* Even if it's not true.
77 * We'll loop forever in sync_supers otherwise */ 74 * We'll loop forever in sync_supers otherwise */
78 reiserfs_write_unlock(s); 75 reiserfs_write_unlock(s);
79 } else { 76 } else {
80 s->s_dirt = 0; 77 s->s_dirt = 0;
81 } 78 }
82 return 0; 79 return 0;
83} 80}
84 81
85static void reiserfs_write_super(struct super_block *s) 82static void reiserfs_write_super(struct super_block *s)
86{ 83{
87 reiserfs_sync_fs(s, 1); 84 reiserfs_sync_fs(s, 1);
88} 85}
89 86
90static void reiserfs_write_super_lockfs (struct super_block * s) 87static void reiserfs_write_super_lockfs(struct super_block *s)
91{ 88{
92 struct reiserfs_transaction_handle th ; 89 struct reiserfs_transaction_handle th;
93 reiserfs_write_lock(s); 90 reiserfs_write_lock(s);
94 if (!(s->s_flags & MS_RDONLY)) { 91 if (!(s->s_flags & MS_RDONLY)) {
95 int err = journal_begin(&th, s, 1) ; 92 int err = journal_begin(&th, s, 1);
96 if (err) { 93 if (err) {
97 reiserfs_block_writes(&th) ; 94 reiserfs_block_writes(&th);
98 } else { 95 } else {
99 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); 96 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s),
100 journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB (s)); 97 1);
101 reiserfs_block_writes(&th) ; 98 journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s));
102 journal_end_sync(&th, s, 1) ; 99 reiserfs_block_writes(&th);
103 } 100 journal_end_sync(&th, s, 1);
104 } 101 }
105 s->s_dirt = 0; 102 }
106 reiserfs_write_unlock(s); 103 s->s_dirt = 0;
104 reiserfs_write_unlock(s);
107} 105}
108 106
109static void reiserfs_unlockfs(struct super_block *s) { 107static void reiserfs_unlockfs(struct super_block *s)
110 reiserfs_allow_writes(s) ; 108{
109 reiserfs_allow_writes(s);
111} 110}
112 111
113extern const struct in_core_key MAX_IN_CORE_KEY; 112extern const struct in_core_key MAX_IN_CORE_KEY;
114
115 113
116/* this is used to delete "save link" when there are no items of a 114/* this is used to delete "save link" when there are no items of a
117 file it points to. It can either happen if unlink is completed but 115 file it points to. It can either happen if unlink is completed but
@@ -120,364 +118,387 @@ extern const struct in_core_key MAX_IN_CORE_KEY;
120 protecting unlink is bigger that a key lf "save link" which 118 protecting unlink is bigger that a key lf "save link" which
121 protects truncate), so there left no items to make truncate 119 protects truncate), so there left no items to make truncate
122 completion on */ 120 completion on */
123static int remove_save_link_only (struct super_block * s, struct reiserfs_key * key, int oid_free) 121static int remove_save_link_only(struct super_block *s,
122 struct reiserfs_key *key, int oid_free)
124{ 123{
125 struct reiserfs_transaction_handle th; 124 struct reiserfs_transaction_handle th;
126 int err; 125 int err;
127 126
128 /* we are going to do one balancing */ 127 /* we are going to do one balancing */
129 err = journal_begin (&th, s, JOURNAL_PER_BALANCE_CNT); 128 err = journal_begin(&th, s, JOURNAL_PER_BALANCE_CNT);
130 if (err) 129 if (err)
131 return err; 130 return err;
132 131
133 reiserfs_delete_solid_item (&th, NULL, key); 132 reiserfs_delete_solid_item(&th, NULL, key);
134 if (oid_free) 133 if (oid_free)
135 /* removals are protected by direct items */ 134 /* removals are protected by direct items */
136 reiserfs_release_objectid (&th, le32_to_cpu (key->k_objectid)); 135 reiserfs_release_objectid(&th, le32_to_cpu(key->k_objectid));
137 136
138 return journal_end (&th, s, JOURNAL_PER_BALANCE_CNT); 137 return journal_end(&th, s, JOURNAL_PER_BALANCE_CNT);
139} 138}
140 139
141#ifdef CONFIG_QUOTA 140#ifdef CONFIG_QUOTA
142static int reiserfs_quota_on_mount(struct super_block *, int); 141static int reiserfs_quota_on_mount(struct super_block *, int);
143#endif 142#endif
144 143
145/* look for uncompleted unlinks and truncates and complete them */ 144/* look for uncompleted unlinks and truncates and complete them */
146static int finish_unfinished (struct super_block * s) 145static int finish_unfinished(struct super_block *s)
147{ 146{
148 INITIALIZE_PATH (path); 147 INITIALIZE_PATH(path);
149 struct cpu_key max_cpu_key, obj_key; 148 struct cpu_key max_cpu_key, obj_key;
150 struct reiserfs_key save_link_key; 149 struct reiserfs_key save_link_key;
151 int retval = 0; 150 int retval = 0;
152 struct item_head * ih; 151 struct item_head *ih;
153 struct buffer_head * bh; 152 struct buffer_head *bh;
154 int item_pos; 153 int item_pos;
155 char * item; 154 char *item;
156 int done; 155 int done;
157 struct inode * inode; 156 struct inode *inode;
158 int truncate; 157 int truncate;
159#ifdef CONFIG_QUOTA 158#ifdef CONFIG_QUOTA
160 int i; 159 int i;
161 int ms_active_set; 160 int ms_active_set;
162#endif 161#endif
163 162
164 163 /* compose key to look for "save" links */
165 /* compose key to look for "save" links */ 164 max_cpu_key.version = KEY_FORMAT_3_5;
166 max_cpu_key.version = KEY_FORMAT_3_5; 165 max_cpu_key.on_disk_key.k_dir_id = ~0U;
167 max_cpu_key.on_disk_key.k_dir_id = ~0U; 166 max_cpu_key.on_disk_key.k_objectid = ~0U;
168 max_cpu_key.on_disk_key.k_objectid = ~0U; 167 set_cpu_key_k_offset(&max_cpu_key, ~0U);
169 set_cpu_key_k_offset (&max_cpu_key, ~0U); 168 max_cpu_key.key_length = 3;
170 max_cpu_key.key_length = 3;
171 169
172#ifdef CONFIG_QUOTA 170#ifdef CONFIG_QUOTA
173 /* Needed for iput() to work correctly and not trash data */ 171 /* Needed for iput() to work correctly and not trash data */
174 if (s->s_flags & MS_ACTIVE) { 172 if (s->s_flags & MS_ACTIVE) {
175 ms_active_set = 0; 173 ms_active_set = 0;
176 } else { 174 } else {
177 ms_active_set = 1; 175 ms_active_set = 1;
178 s->s_flags |= MS_ACTIVE; 176 s->s_flags |= MS_ACTIVE;
179 } 177 }
180 /* Turn on quotas so that they are updated correctly */ 178 /* Turn on quotas so that they are updated correctly */
181 for (i = 0; i < MAXQUOTAS; i++) { 179 for (i = 0; i < MAXQUOTAS; i++) {
182 if (REISERFS_SB(s)->s_qf_names[i]) { 180 if (REISERFS_SB(s)->s_qf_names[i]) {
183 int ret = reiserfs_quota_on_mount(s, i); 181 int ret = reiserfs_quota_on_mount(s, i);
184 if (ret < 0) 182 if (ret < 0)
185 reiserfs_warning(s, "reiserfs: cannot turn on journalled quota: error %d", ret); 183 reiserfs_warning(s,
186 } 184 "reiserfs: cannot turn on journalled quota: error %d",
187 } 185 ret);
186 }
187 }
188#endif 188#endif
189 189
190 done = 0; 190 done = 0;
191 REISERFS_SB(s)->s_is_unlinked_ok = 1; 191 REISERFS_SB(s)->s_is_unlinked_ok = 1;
192 while (!retval) { 192 while (!retval) {
193 retval = search_item (s, &max_cpu_key, &path); 193 retval = search_item(s, &max_cpu_key, &path);
194 if (retval != ITEM_NOT_FOUND) { 194 if (retval != ITEM_NOT_FOUND) {
195 reiserfs_warning (s, "vs-2140: finish_unfinished: search_by_key returned %d", 195 reiserfs_warning(s,
196 retval); 196 "vs-2140: finish_unfinished: search_by_key returned %d",
197 break; 197 retval);
198 } 198 break;
199 199 }
200 bh = get_last_bh (&path); 200
201 item_pos = get_item_pos (&path); 201 bh = get_last_bh(&path);
202 if (item_pos != B_NR_ITEMS (bh)) { 202 item_pos = get_item_pos(&path);
203 reiserfs_warning (s, "vs-2060: finish_unfinished: wrong position found"); 203 if (item_pos != B_NR_ITEMS(bh)) {
204 break; 204 reiserfs_warning(s,
205 } 205 "vs-2060: finish_unfinished: wrong position found");
206 item_pos --; 206 break;
207 ih = B_N_PITEM_HEAD (bh, item_pos); 207 }
208 208 item_pos--;
209 if (le32_to_cpu (ih->ih_key.k_dir_id) != MAX_KEY_OBJECTID) 209 ih = B_N_PITEM_HEAD(bh, item_pos);
210 /* there are no "save" links anymore */ 210
211 break; 211 if (le32_to_cpu(ih->ih_key.k_dir_id) != MAX_KEY_OBJECTID)
212 212 /* there are no "save" links anymore */
213 save_link_key = ih->ih_key; 213 break;
214 if (is_indirect_le_ih (ih)) 214
215 truncate = 1; 215 save_link_key = ih->ih_key;
216 else 216 if (is_indirect_le_ih(ih))
217 truncate = 0; 217 truncate = 1;
218 218 else
219 /* reiserfs_iget needs k_dirid and k_objectid only */ 219 truncate = 0;
220 item = B_I_PITEM (bh, ih); 220
221 obj_key.on_disk_key.k_dir_id = le32_to_cpu (*(__le32 *)item); 221 /* reiserfs_iget needs k_dirid and k_objectid only */
222 obj_key.on_disk_key.k_objectid = le32_to_cpu (ih->ih_key.k_objectid); 222 item = B_I_PITEM(bh, ih);
223 obj_key.on_disk_key.k_offset = 0; 223 obj_key.on_disk_key.k_dir_id = le32_to_cpu(*(__le32 *) item);
224 obj_key.on_disk_key.k_type = 0; 224 obj_key.on_disk_key.k_objectid =
225 225 le32_to_cpu(ih->ih_key.k_objectid);
226 pathrelse (&path); 226 obj_key.on_disk_key.k_offset = 0;
227 227 obj_key.on_disk_key.k_type = 0;
228 inode = reiserfs_iget (s, &obj_key); 228
229 if (!inode) { 229 pathrelse(&path);
230 /* the unlink almost completed, it just did not manage to remove 230
231 "save" link and release objectid */ 231 inode = reiserfs_iget(s, &obj_key);
232 reiserfs_warning (s, "vs-2180: finish_unfinished: iget failed for %K", 232 if (!inode) {
233 &obj_key); 233 /* the unlink almost completed, it just did not manage to remove
234 retval = remove_save_link_only (s, &save_link_key, 1); 234 "save" link and release objectid */
235 continue; 235 reiserfs_warning(s,
236 } 236 "vs-2180: finish_unfinished: iget failed for %K",
237 237 &obj_key);
238 if (!truncate && inode->i_nlink) { 238 retval = remove_save_link_only(s, &save_link_key, 1);
239 /* file is not unlinked */ 239 continue;
240 reiserfs_warning (s, "vs-2185: finish_unfinished: file %K is not unlinked", 240 }
241 &obj_key); 241
242 retval = remove_save_link_only (s, &save_link_key, 0); 242 if (!truncate && inode->i_nlink) {
243 continue; 243 /* file is not unlinked */
244 } 244 reiserfs_warning(s,
245 DQUOT_INIT(inode); 245 "vs-2185: finish_unfinished: file %K is not unlinked",
246 246 &obj_key);
247 if (truncate && S_ISDIR (inode->i_mode) ) { 247 retval = remove_save_link_only(s, &save_link_key, 0);
248 /* We got a truncate request for a dir which is impossible. 248 continue;
249 The only imaginable way is to execute unfinished truncate request 249 }
250 then boot into old kernel, remove the file and create dir with 250 DQUOT_INIT(inode);
251 the same key. */ 251
252 reiserfs_warning(s, "green-2101: impossible truncate on a directory %k. Please report", INODE_PKEY (inode)); 252 if (truncate && S_ISDIR(inode->i_mode)) {
253 retval = remove_save_link_only (s, &save_link_key, 0); 253 /* We got a truncate request for a dir which is impossible.
254 truncate = 0; 254 The only imaginable way is to execute unfinished truncate request
255 iput (inode); 255 then boot into old kernel, remove the file and create dir with
256 continue; 256 the same key. */
257 } 257 reiserfs_warning(s,
258 258 "green-2101: impossible truncate on a directory %k. Please report",
259 if (truncate) { 259 INODE_PKEY(inode));
260 REISERFS_I(inode) -> i_flags |= i_link_saved_truncate_mask; 260 retval = remove_save_link_only(s, &save_link_key, 0);
261 /* not completed truncate found. New size was committed together 261 truncate = 0;
262 with "save" link */ 262 iput(inode);
263 reiserfs_info (s, "Truncating %k to %Ld ..", 263 continue;
264 INODE_PKEY (inode), inode->i_size); 264 }
265 reiserfs_truncate_file (inode, 0/*don't update modification time*/); 265
266 retval = remove_save_link (inode, truncate); 266 if (truncate) {
267 } else { 267 REISERFS_I(inode)->i_flags |=
268 REISERFS_I(inode) -> i_flags |= i_link_saved_unlink_mask; 268 i_link_saved_truncate_mask;
269 /* not completed unlink (rmdir) found */ 269 /* not completed truncate found. New size was committed together
270 reiserfs_info (s, "Removing %k..", INODE_PKEY (inode)); 270 with "save" link */
271 /* removal gets completed in iput */ 271 reiserfs_info(s, "Truncating %k to %Ld ..",
272 retval = 0; 272 INODE_PKEY(inode), inode->i_size);
273 } 273 reiserfs_truncate_file(inode,
274 274 0
275 iput (inode); 275 /*don't update modification time */
276 printk ("done\n"); 276 );
277 done ++; 277 retval = remove_save_link(inode, truncate);
278 } 278 } else {
279 REISERFS_SB(s)->s_is_unlinked_ok = 0; 279 REISERFS_I(inode)->i_flags |= i_link_saved_unlink_mask;
280 280 /* not completed unlink (rmdir) found */
281 reiserfs_info(s, "Removing %k..", INODE_PKEY(inode));
282 /* removal gets completed in iput */
283 retval = 0;
284 }
285
286 iput(inode);
287 printk("done\n");
288 done++;
289 }
290 REISERFS_SB(s)->s_is_unlinked_ok = 0;
291
281#ifdef CONFIG_QUOTA 292#ifdef CONFIG_QUOTA
282 /* Turn quotas off */ 293 /* Turn quotas off */
283 for (i = 0; i < MAXQUOTAS; i++) { 294 for (i = 0; i < MAXQUOTAS; i++) {
284 if (sb_dqopt(s)->files[i]) 295 if (sb_dqopt(s)->files[i])
285 vfs_quota_off_mount(s, i); 296 vfs_quota_off_mount(s, i);
286 } 297 }
287 if (ms_active_set) 298 if (ms_active_set)
288 /* Restore the flag back */ 299 /* Restore the flag back */
289 s->s_flags &= ~MS_ACTIVE; 300 s->s_flags &= ~MS_ACTIVE;
290#endif 301#endif
291 pathrelse (&path); 302 pathrelse(&path);
292 if (done) 303 if (done)
293 reiserfs_info (s, "There were %d uncompleted unlinks/truncates. " 304 reiserfs_info(s, "There were %d uncompleted unlinks/truncates. "
294 "Completed\n", done); 305 "Completed\n", done);
295 return retval; 306 return retval;
296} 307}
297 308
298/* to protect file being unlinked from getting lost we "safe" link files 309/* to protect file being unlinked from getting lost we "safe" link files
299 being unlinked. This link will be deleted in the same transaction with last 310 being unlinked. This link will be deleted in the same transaction with last
300 item of file. mounting the filesytem we scan all these links and remove 311 item of file. mounting the filesytem we scan all these links and remove
301 files which almost got lost */ 312 files which almost got lost */
302void add_save_link (struct reiserfs_transaction_handle * th, 313void add_save_link(struct reiserfs_transaction_handle *th,
303 struct inode * inode, int truncate) 314 struct inode *inode, int truncate)
304{ 315{
305 INITIALIZE_PATH (path); 316 INITIALIZE_PATH(path);
306 int retval; 317 int retval;
307 struct cpu_key key; 318 struct cpu_key key;
308 struct item_head ih; 319 struct item_head ih;
309 __le32 link; 320 __le32 link;
310 321
311 BUG_ON (!th->t_trans_id); 322 BUG_ON(!th->t_trans_id);
312 323
313 /* file can only get one "save link" of each kind */ 324 /* file can only get one "save link" of each kind */
314 RFALSE( truncate && 325 RFALSE(truncate &&
315 ( REISERFS_I(inode) -> i_flags & i_link_saved_truncate_mask ), 326 (REISERFS_I(inode)->i_flags & i_link_saved_truncate_mask),
316 "saved link already exists for truncated inode %lx", 327 "saved link already exists for truncated inode %lx",
317 ( long ) inode -> i_ino ); 328 (long)inode->i_ino);
318 RFALSE( !truncate && 329 RFALSE(!truncate &&
319 ( REISERFS_I(inode) -> i_flags & i_link_saved_unlink_mask ), 330 (REISERFS_I(inode)->i_flags & i_link_saved_unlink_mask),
320 "saved link already exists for unlinked inode %lx", 331 "saved link already exists for unlinked inode %lx",
321 ( long ) inode -> i_ino ); 332 (long)inode->i_ino);
322 333
323 /* setup key of "save" link */ 334 /* setup key of "save" link */
324 key.version = KEY_FORMAT_3_5; 335 key.version = KEY_FORMAT_3_5;
325 key.on_disk_key.k_dir_id = MAX_KEY_OBJECTID; 336 key.on_disk_key.k_dir_id = MAX_KEY_OBJECTID;
326 key.on_disk_key.k_objectid = inode->i_ino; 337 key.on_disk_key.k_objectid = inode->i_ino;
327 if (!truncate) { 338 if (!truncate) {
328 /* unlink, rmdir, rename */ 339 /* unlink, rmdir, rename */
329 set_cpu_key_k_offset (&key, 1 + inode->i_sb->s_blocksize); 340 set_cpu_key_k_offset(&key, 1 + inode->i_sb->s_blocksize);
330 set_cpu_key_k_type (&key, TYPE_DIRECT); 341 set_cpu_key_k_type(&key, TYPE_DIRECT);
331 342
332 /* item head of "safe" link */ 343 /* item head of "safe" link */
333 make_le_item_head (&ih, &key, key.version, 1 + inode->i_sb->s_blocksize, TYPE_DIRECT, 344 make_le_item_head(&ih, &key, key.version,
334 4/*length*/, 0xffff/*free space*/); 345 1 + inode->i_sb->s_blocksize, TYPE_DIRECT,
335 } else { 346 4 /*length */ , 0xffff /*free space */ );
336 /* truncate */ 347 } else {
337 if (S_ISDIR (inode->i_mode)) 348 /* truncate */
338 reiserfs_warning(inode->i_sb, "green-2102: Adding a truncate savelink for a directory %k! Please report", INODE_PKEY(inode)); 349 if (S_ISDIR(inode->i_mode))
339 set_cpu_key_k_offset (&key, 1); 350 reiserfs_warning(inode->i_sb,
340 set_cpu_key_k_type (&key, TYPE_INDIRECT); 351 "green-2102: Adding a truncate savelink for a directory %k! Please report",
341 352 INODE_PKEY(inode));
342 /* item head of "safe" link */ 353 set_cpu_key_k_offset(&key, 1);
343 make_le_item_head (&ih, &key, key.version, 1, TYPE_INDIRECT, 354 set_cpu_key_k_type(&key, TYPE_INDIRECT);
344 4/*length*/, 0/*free space*/); 355
345 } 356 /* item head of "safe" link */
346 key.key_length = 3; 357 make_le_item_head(&ih, &key, key.version, 1, TYPE_INDIRECT,
347 358 4 /*length */ , 0 /*free space */ );
348 /* look for its place in the tree */ 359 }
349 retval = search_item (inode->i_sb, &key, &path); 360 key.key_length = 3;
350 if (retval != ITEM_NOT_FOUND) { 361
351 if ( retval != -ENOSPC ) 362 /* look for its place in the tree */
352 reiserfs_warning (inode->i_sb, "vs-2100: add_save_link:" 363 retval = search_item(inode->i_sb, &key, &path);
353 "search_by_key (%K) returned %d", &key, retval); 364 if (retval != ITEM_NOT_FOUND) {
354 pathrelse (&path); 365 if (retval != -ENOSPC)
355 return; 366 reiserfs_warning(inode->i_sb, "vs-2100: add_save_link:"
356 } 367 "search_by_key (%K) returned %d", &key,
357 368 retval);
358 /* body of "save" link */ 369 pathrelse(&path);
359 link = INODE_PKEY (inode)->k_dir_id; 370 return;
360 371 }
361 /* put "save" link inot tree, don't charge quota to anyone */
362 retval = reiserfs_insert_item (th, &path, &key, &ih, NULL, (char *)&link);
363 if (retval) {
364 if (retval != -ENOSPC)
365 reiserfs_warning (inode->i_sb, "vs-2120: add_save_link: insert_item returned %d",
366 retval);
367 } else {
368 if( truncate )
369 REISERFS_I(inode) -> i_flags |= i_link_saved_truncate_mask;
370 else
371 REISERFS_I(inode) -> i_flags |= i_link_saved_unlink_mask;
372 }
373}
374 372
373 /* body of "save" link */
374 link = INODE_PKEY(inode)->k_dir_id;
375
376 /* put "save" link inot tree, don't charge quota to anyone */
377 retval =
378 reiserfs_insert_item(th, &path, &key, &ih, NULL, (char *)&link);
379 if (retval) {
380 if (retval != -ENOSPC)
381 reiserfs_warning(inode->i_sb,
382 "vs-2120: add_save_link: insert_item returned %d",
383 retval);
384 } else {
385 if (truncate)
386 REISERFS_I(inode)->i_flags |=
387 i_link_saved_truncate_mask;
388 else
389 REISERFS_I(inode)->i_flags |= i_link_saved_unlink_mask;
390 }
391}
375 392
376/* this opens transaction unlike add_save_link */ 393/* this opens transaction unlike add_save_link */
377int remove_save_link (struct inode * inode, int truncate) 394int remove_save_link(struct inode *inode, int truncate)
378{ 395{
379 struct reiserfs_transaction_handle th; 396 struct reiserfs_transaction_handle th;
380 struct reiserfs_key key; 397 struct reiserfs_key key;
381 int err; 398 int err;
382 399
383 /* we are going to do one balancing only */ 400 /* we are going to do one balancing only */
384 err = journal_begin (&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT); 401 err = journal_begin(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT);
385 if (err) 402 if (err)
386 return err; 403 return err;
387 404
388 /* setup key of "save" link */ 405 /* setup key of "save" link */
389 key.k_dir_id = cpu_to_le32 (MAX_KEY_OBJECTID); 406 key.k_dir_id = cpu_to_le32(MAX_KEY_OBJECTID);
390 key.k_objectid = INODE_PKEY (inode)->k_objectid; 407 key.k_objectid = INODE_PKEY(inode)->k_objectid;
391 if (!truncate) { 408 if (!truncate) {
392 /* unlink, rmdir, rename */ 409 /* unlink, rmdir, rename */
393 set_le_key_k_offset (KEY_FORMAT_3_5, &key, 410 set_le_key_k_offset(KEY_FORMAT_3_5, &key,
394 1 + inode->i_sb->s_blocksize); 411 1 + inode->i_sb->s_blocksize);
395 set_le_key_k_type (KEY_FORMAT_3_5, &key, TYPE_DIRECT); 412 set_le_key_k_type(KEY_FORMAT_3_5, &key, TYPE_DIRECT);
396 } else { 413 } else {
397 /* truncate */ 414 /* truncate */
398 set_le_key_k_offset (KEY_FORMAT_3_5, &key, 1); 415 set_le_key_k_offset(KEY_FORMAT_3_5, &key, 1);
399 set_le_key_k_type (KEY_FORMAT_3_5, &key, TYPE_INDIRECT); 416 set_le_key_k_type(KEY_FORMAT_3_5, &key, TYPE_INDIRECT);
400 } 417 }
401
402 if( ( truncate &&
403 ( REISERFS_I(inode) -> i_flags & i_link_saved_truncate_mask ) ) ||
404 ( !truncate &&
405 ( REISERFS_I(inode) -> i_flags & i_link_saved_unlink_mask ) ) )
406 /* don't take quota bytes from anywhere */
407 reiserfs_delete_solid_item (&th, NULL, &key);
408 if (!truncate) {
409 reiserfs_release_objectid (&th, inode->i_ino);
410 REISERFS_I(inode) -> i_flags &= ~i_link_saved_unlink_mask;
411 } else
412 REISERFS_I(inode) -> i_flags &= ~i_link_saved_truncate_mask;
413
414 return journal_end (&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT);
415}
416 418
419 if ((truncate &&
420 (REISERFS_I(inode)->i_flags & i_link_saved_truncate_mask)) ||
421 (!truncate &&
422 (REISERFS_I(inode)->i_flags & i_link_saved_unlink_mask)))
423 /* don't take quota bytes from anywhere */
424 reiserfs_delete_solid_item(&th, NULL, &key);
425 if (!truncate) {
426 reiserfs_release_objectid(&th, inode->i_ino);
427 REISERFS_I(inode)->i_flags &= ~i_link_saved_unlink_mask;
428 } else
429 REISERFS_I(inode)->i_flags &= ~i_link_saved_truncate_mask;
430
431 return journal_end(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT);
432}
417 433
418static void reiserfs_put_super (struct super_block * s) 434static void reiserfs_put_super(struct super_block *s)
419{ 435{
420 int i; 436 int i;
421 struct reiserfs_transaction_handle th ; 437 struct reiserfs_transaction_handle th;
422 th.t_trans_id = 0; 438 th.t_trans_id = 0;
423 439
424 if (REISERFS_SB(s)->xattr_root) { 440 if (REISERFS_SB(s)->xattr_root) {
425 d_invalidate (REISERFS_SB(s)->xattr_root); 441 d_invalidate(REISERFS_SB(s)->xattr_root);
426 dput (REISERFS_SB(s)->xattr_root); 442 dput(REISERFS_SB(s)->xattr_root);
427 } 443 }
428 444
429 if (REISERFS_SB(s)->priv_root) { 445 if (REISERFS_SB(s)->priv_root) {
430 d_invalidate (REISERFS_SB(s)->priv_root); 446 d_invalidate(REISERFS_SB(s)->priv_root);
431 dput (REISERFS_SB(s)->priv_root); 447 dput(REISERFS_SB(s)->priv_root);
432 } 448 }
433 449
434 /* change file system state to current state if it was mounted with read-write permissions */ 450 /* change file system state to current state if it was mounted with read-write permissions */
435 if (!(s->s_flags & MS_RDONLY)) { 451 if (!(s->s_flags & MS_RDONLY)) {
436 if (!journal_begin(&th, s, 10)) { 452 if (!journal_begin(&th, s, 10)) {
437 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1) ; 453 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s),
438 set_sb_umount_state( SB_DISK_SUPER_BLOCK(s), REISERFS_SB(s)->s_mount_state ); 454 1);
439 journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB (s)); 455 set_sb_umount_state(SB_DISK_SUPER_BLOCK(s),
440 } 456 REISERFS_SB(s)->s_mount_state);
441 } 457 journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s));
442 458 }
443 /* note, journal_release checks for readonly mount, and can decide not 459 }
444 ** to do a journal_end 460
445 */ 461 /* note, journal_release checks for readonly mount, and can decide not
446 journal_release(&th, s) ; 462 ** to do a journal_end
447 463 */
448 for (i = 0; i < SB_BMAP_NR (s); i ++) 464 journal_release(&th, s);
449 brelse (SB_AP_BITMAP (s)[i].bh); 465
450 466 for (i = 0; i < SB_BMAP_NR(s); i++)
451 vfree (SB_AP_BITMAP (s)); 467 brelse(SB_AP_BITMAP(s)[i].bh);
452 468
453 brelse (SB_BUFFER_WITH_SB (s)); 469 vfree(SB_AP_BITMAP(s));
454 470
455 print_statistics (s); 471 brelse(SB_BUFFER_WITH_SB(s));
456 472
457 if (REISERFS_SB(s)->s_kmallocs != 0) { 473 print_statistics(s);
458 reiserfs_warning (s, "vs-2004: reiserfs_put_super: allocated memory left %d", 474
459 REISERFS_SB(s)->s_kmallocs); 475 if (REISERFS_SB(s)->s_kmallocs != 0) {
460 } 476 reiserfs_warning(s,
461 477 "vs-2004: reiserfs_put_super: allocated memory left %d",
462 if (REISERFS_SB(s)->reserved_blocks != 0) { 478 REISERFS_SB(s)->s_kmallocs);
463 reiserfs_warning (s, "green-2005: reiserfs_put_super: reserved blocks left %d", 479 }
464 REISERFS_SB(s)->reserved_blocks); 480
465 } 481 if (REISERFS_SB(s)->reserved_blocks != 0) {
466 482 reiserfs_warning(s,
467 reiserfs_proc_info_done( s ); 483 "green-2005: reiserfs_put_super: reserved blocks left %d",
468 484 REISERFS_SB(s)->reserved_blocks);
469 kfree(s->s_fs_info); 485 }
470 s->s_fs_info = NULL; 486
471 487 reiserfs_proc_info_done(s);
472 return; 488
489 kfree(s->s_fs_info);
490 s->s_fs_info = NULL;
491
492 return;
473} 493}
474 494
475static kmem_cache_t * reiserfs_inode_cachep; 495static kmem_cache_t *reiserfs_inode_cachep;
476 496
477static struct inode *reiserfs_alloc_inode(struct super_block *sb) 497static struct inode *reiserfs_alloc_inode(struct super_block *sb)
478{ 498{
479 struct reiserfs_inode_info *ei; 499 struct reiserfs_inode_info *ei;
480 ei = (struct reiserfs_inode_info *)kmem_cache_alloc(reiserfs_inode_cachep, SLAB_KERNEL); 500 ei = (struct reiserfs_inode_info *)
501 kmem_cache_alloc(reiserfs_inode_cachep, SLAB_KERNEL);
481 if (!ei) 502 if (!ei)
482 return NULL; 503 return NULL;
483 return &ei->vfs_inode; 504 return &ei->vfs_inode;
@@ -488,25 +509,26 @@ static void reiserfs_destroy_inode(struct inode *inode)
488 kmem_cache_free(reiserfs_inode_cachep, REISERFS_I(inode)); 509 kmem_cache_free(reiserfs_inode_cachep, REISERFS_I(inode));
489} 510}
490 511
491static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) 512static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags)
492{ 513{
493 struct reiserfs_inode_info *ei = (struct reiserfs_inode_info *) foo; 514 struct reiserfs_inode_info *ei = (struct reiserfs_inode_info *)foo;
494 515
495 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 516 if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) ==
496 SLAB_CTOR_CONSTRUCTOR) { 517 SLAB_CTOR_CONSTRUCTOR) {
497 INIT_LIST_HEAD(&ei->i_prealloc_list) ; 518 INIT_LIST_HEAD(&ei->i_prealloc_list);
498 inode_init_once(&ei->vfs_inode); 519 inode_init_once(&ei->vfs_inode);
499 ei->i_acl_access = NULL; 520 ei->i_acl_access = NULL;
500 ei->i_acl_default = NULL; 521 ei->i_acl_default = NULL;
501 } 522 }
502} 523}
503 524
504static int init_inodecache(void) 525static int init_inodecache(void)
505{ 526{
506 reiserfs_inode_cachep = kmem_cache_create("reiser_inode_cache", 527 reiserfs_inode_cachep = kmem_cache_create("reiser_inode_cache",
507 sizeof(struct reiserfs_inode_info), 528 sizeof(struct
508 0, SLAB_RECLAIM_ACCOUNT, 529 reiserfs_inode_info),
509 init_once, NULL); 530 0, SLAB_RECLAIM_ACCOUNT,
531 init_once, NULL);
510 if (reiserfs_inode_cachep == NULL) 532 if (reiserfs_inode_cachep == NULL)
511 return -ENOMEM; 533 return -ENOMEM;
512 return 0; 534 return 0;
@@ -515,72 +537,76 @@ static int init_inodecache(void)
515static void destroy_inodecache(void) 537static void destroy_inodecache(void)
516{ 538{
517 if (kmem_cache_destroy(reiserfs_inode_cachep)) 539 if (kmem_cache_destroy(reiserfs_inode_cachep))
518 reiserfs_warning (NULL, "reiserfs_inode_cache: not all structures were freed"); 540 reiserfs_warning(NULL,
541 "reiserfs_inode_cache: not all structures were freed");
519} 542}
520 543
521/* we don't mark inodes dirty, we just log them */ 544/* we don't mark inodes dirty, we just log them */
522static void reiserfs_dirty_inode (struct inode * inode) { 545static void reiserfs_dirty_inode(struct inode *inode)
523 struct reiserfs_transaction_handle th ; 546{
524 547 struct reiserfs_transaction_handle th;
525 int err = 0; 548
526 if (inode->i_sb->s_flags & MS_RDONLY) { 549 int err = 0;
527 reiserfs_warning(inode->i_sb, "clm-6006: writing inode %lu on readonly FS", 550 if (inode->i_sb->s_flags & MS_RDONLY) {
528 inode->i_ino) ; 551 reiserfs_warning(inode->i_sb,
529 return ; 552 "clm-6006: writing inode %lu on readonly FS",
530 } 553 inode->i_ino);
531 reiserfs_write_lock(inode->i_sb); 554 return;
532 555 }
533 /* this is really only used for atime updates, so they don't have 556 reiserfs_write_lock(inode->i_sb);
534 ** to be included in O_SYNC or fsync 557
535 */ 558 /* this is really only used for atime updates, so they don't have
536 err = journal_begin(&th, inode->i_sb, 1) ; 559 ** to be included in O_SYNC or fsync
537 if (err) { 560 */
538 reiserfs_write_unlock (inode->i_sb); 561 err = journal_begin(&th, inode->i_sb, 1);
539 return; 562 if (err) {
540 } 563 reiserfs_write_unlock(inode->i_sb);
541 reiserfs_update_sd (&th, inode); 564 return;
542 journal_end(&th, inode->i_sb, 1) ; 565 }
543 reiserfs_write_unlock(inode->i_sb); 566 reiserfs_update_sd(&th, inode);
567 journal_end(&th, inode->i_sb, 1);
568 reiserfs_write_unlock(inode->i_sb);
544} 569}
545 570
546static void reiserfs_clear_inode (struct inode *inode) 571static void reiserfs_clear_inode(struct inode *inode)
547{ 572{
548 struct posix_acl *acl; 573 struct posix_acl *acl;
549 574
550 acl = REISERFS_I(inode)->i_acl_access; 575 acl = REISERFS_I(inode)->i_acl_access;
551 if (acl && !IS_ERR (acl)) 576 if (acl && !IS_ERR(acl))
552 posix_acl_release (acl); 577 posix_acl_release(acl);
553 REISERFS_I(inode)->i_acl_access = NULL; 578 REISERFS_I(inode)->i_acl_access = NULL;
554 579
555 acl = REISERFS_I(inode)->i_acl_default; 580 acl = REISERFS_I(inode)->i_acl_default;
556 if (acl && !IS_ERR (acl)) 581 if (acl && !IS_ERR(acl))
557 posix_acl_release (acl); 582 posix_acl_release(acl);
558 REISERFS_I(inode)->i_acl_default = NULL; 583 REISERFS_I(inode)->i_acl_default = NULL;
559} 584}
560 585
561#ifdef CONFIG_QUOTA 586#ifdef CONFIG_QUOTA
562static ssize_t reiserfs_quota_write(struct super_block *, int, const char *, size_t, loff_t); 587static ssize_t reiserfs_quota_write(struct super_block *, int, const char *,
563static ssize_t reiserfs_quota_read(struct super_block *, int, char *, size_t, loff_t); 588 size_t, loff_t);
589static ssize_t reiserfs_quota_read(struct super_block *, int, char *, size_t,
590 loff_t);
564#endif 591#endif
565 592
566static struct super_operations reiserfs_sops = 593static struct super_operations reiserfs_sops = {
567{ 594 .alloc_inode = reiserfs_alloc_inode,
568 .alloc_inode = reiserfs_alloc_inode, 595 .destroy_inode = reiserfs_destroy_inode,
569 .destroy_inode = reiserfs_destroy_inode, 596 .write_inode = reiserfs_write_inode,
570 .write_inode = reiserfs_write_inode, 597 .dirty_inode = reiserfs_dirty_inode,
571 .dirty_inode = reiserfs_dirty_inode, 598 .delete_inode = reiserfs_delete_inode,
572 .delete_inode = reiserfs_delete_inode, 599 .clear_inode = reiserfs_clear_inode,
573 .clear_inode = reiserfs_clear_inode, 600 .put_super = reiserfs_put_super,
574 .put_super = reiserfs_put_super, 601 .write_super = reiserfs_write_super,
575 .write_super = reiserfs_write_super, 602 .sync_fs = reiserfs_sync_fs,
576 .sync_fs = reiserfs_sync_fs, 603 .write_super_lockfs = reiserfs_write_super_lockfs,
577 .write_super_lockfs = reiserfs_write_super_lockfs, 604 .unlockfs = reiserfs_unlockfs,
578 .unlockfs = reiserfs_unlockfs, 605 .statfs = reiserfs_statfs,
579 .statfs = reiserfs_statfs, 606 .remount_fs = reiserfs_remount,
580 .remount_fs = reiserfs_remount,
581#ifdef CONFIG_QUOTA 607#ifdef CONFIG_QUOTA
582 .quota_read = reiserfs_quota_read, 608 .quota_read = reiserfs_quota_read,
583 .quota_write = reiserfs_quota_write, 609 .quota_write = reiserfs_quota_write,
584#endif 610#endif
585}; 611};
586 612
@@ -596,50 +622,48 @@ static int reiserfs_mark_dquot_dirty(struct dquot *);
596static int reiserfs_write_info(struct super_block *, int); 622static int reiserfs_write_info(struct super_block *, int);
597static int reiserfs_quota_on(struct super_block *, int, int, char *); 623static int reiserfs_quota_on(struct super_block *, int, int, char *);
598 624
599static struct dquot_operations reiserfs_quota_operations = 625static struct dquot_operations reiserfs_quota_operations = {
600{ 626 .initialize = reiserfs_dquot_initialize,
601 .initialize = reiserfs_dquot_initialize, 627 .drop = reiserfs_dquot_drop,
602 .drop = reiserfs_dquot_drop, 628 .alloc_space = dquot_alloc_space,
603 .alloc_space = dquot_alloc_space, 629 .alloc_inode = dquot_alloc_inode,
604 .alloc_inode = dquot_alloc_inode, 630 .free_space = dquot_free_space,
605 .free_space = dquot_free_space, 631 .free_inode = dquot_free_inode,
606 .free_inode = dquot_free_inode, 632 .transfer = dquot_transfer,
607 .transfer = dquot_transfer, 633 .write_dquot = reiserfs_write_dquot,
608 .write_dquot = reiserfs_write_dquot, 634 .acquire_dquot = reiserfs_acquire_dquot,
609 .acquire_dquot = reiserfs_acquire_dquot, 635 .release_dquot = reiserfs_release_dquot,
610 .release_dquot = reiserfs_release_dquot, 636 .mark_dirty = reiserfs_mark_dquot_dirty,
611 .mark_dirty = reiserfs_mark_dquot_dirty, 637 .write_info = reiserfs_write_info,
612 .write_info = reiserfs_write_info,
613}; 638};
614 639
615static struct quotactl_ops reiserfs_qctl_operations = 640static struct quotactl_ops reiserfs_qctl_operations = {
616{ 641 .quota_on = reiserfs_quota_on,
617 .quota_on = reiserfs_quota_on, 642 .quota_off = vfs_quota_off,
618 .quota_off = vfs_quota_off, 643 .quota_sync = vfs_quota_sync,
619 .quota_sync = vfs_quota_sync, 644 .get_info = vfs_get_dqinfo,
620 .get_info = vfs_get_dqinfo, 645 .set_info = vfs_set_dqinfo,
621 .set_info = vfs_set_dqinfo, 646 .get_dqblk = vfs_get_dqblk,
622 .get_dqblk = vfs_get_dqblk, 647 .set_dqblk = vfs_set_dqblk,
623 .set_dqblk = vfs_set_dqblk,
624}; 648};
625#endif 649#endif
626 650
627static struct export_operations reiserfs_export_ops = { 651static struct export_operations reiserfs_export_ops = {
628 .encode_fh = reiserfs_encode_fh, 652 .encode_fh = reiserfs_encode_fh,
629 .decode_fh = reiserfs_decode_fh, 653 .decode_fh = reiserfs_decode_fh,
630 .get_parent = reiserfs_get_parent, 654 .get_parent = reiserfs_get_parent,
631 .get_dentry = reiserfs_get_dentry, 655 .get_dentry = reiserfs_get_dentry,
632} ; 656};
633 657
634/* this struct is used in reiserfs_getopt () for containing the value for those 658/* this struct is used in reiserfs_getopt () for containing the value for those
635 mount options that have values rather than being toggles. */ 659 mount options that have values rather than being toggles. */
636typedef struct { 660typedef struct {
637 char * value; 661 char *value;
638 int setmask; /* bitmask which is to set on mount_options bitmask when this 662 int setmask; /* bitmask which is to set on mount_options bitmask when this
639 value is found, 0 is no bits are to be changed. */ 663 value is found, 0 is no bits are to be changed. */
640 int clrmask; /* bitmask which is to clear on mount_options bitmask when this 664 int clrmask; /* bitmask which is to clear on mount_options bitmask when this
641 value is found, 0 is no bits are to be changed. This is 665 value is found, 0 is no bits are to be changed. This is
642 applied BEFORE setmask */ 666 applied BEFORE setmask */
643} arg_desc_t; 667} arg_desc_t;
644 668
645/* Set this bit in arg_required to allow empty arguments */ 669/* Set this bit in arg_required to allow empty arguments */
@@ -648,67 +672,70 @@ typedef struct {
648/* this struct is used in reiserfs_getopt() for describing the set of reiserfs 672/* this struct is used in reiserfs_getopt() for describing the set of reiserfs
649 mount options */ 673 mount options */
650typedef struct { 674typedef struct {
651 char * option_name; 675 char *option_name;
652 int arg_required; /* 0 if argument is not required, not 0 otherwise */ 676 int arg_required; /* 0 if argument is not required, not 0 otherwise */
653 const arg_desc_t * values; /* list of values accepted by an option */ 677 const arg_desc_t *values; /* list of values accepted by an option */
654 int setmask; /* bitmask which is to set on mount_options bitmask when this 678 int setmask; /* bitmask which is to set on mount_options bitmask when this
655 value is found, 0 is no bits are to be changed. */ 679 value is found, 0 is no bits are to be changed. */
656 int clrmask; /* bitmask which is to clear on mount_options bitmask when this 680 int clrmask; /* bitmask which is to clear on mount_options bitmask when this
657 value is found, 0 is no bits are to be changed. This is 681 value is found, 0 is no bits are to be changed. This is
658 applied BEFORE setmask */ 682 applied BEFORE setmask */
659} opt_desc_t; 683} opt_desc_t;
660 684
661/* possible values for -o data= */ 685/* possible values for -o data= */
662static const arg_desc_t logging_mode[] = { 686static const arg_desc_t logging_mode[] = {
663 {"ordered", 1<<REISERFS_DATA_ORDERED, (1<<REISERFS_DATA_LOG|1<<REISERFS_DATA_WRITEBACK)}, 687 {"ordered", 1 << REISERFS_DATA_ORDERED,
664 {"journal", 1<<REISERFS_DATA_LOG, (1<<REISERFS_DATA_ORDERED|1<<REISERFS_DATA_WRITEBACK)}, 688 (1 << REISERFS_DATA_LOG | 1 << REISERFS_DATA_WRITEBACK)},
665 {"writeback", 1<<REISERFS_DATA_WRITEBACK, (1<<REISERFS_DATA_ORDERED|1<<REISERFS_DATA_LOG)}, 689 {"journal", 1 << REISERFS_DATA_LOG,
666 {NULL, 0} 690 (1 << REISERFS_DATA_ORDERED | 1 << REISERFS_DATA_WRITEBACK)},
691 {"writeback", 1 << REISERFS_DATA_WRITEBACK,
692 (1 << REISERFS_DATA_ORDERED | 1 << REISERFS_DATA_LOG)},
693 {NULL, 0}
667}; 694};
668 695
669/* possible values for -o barrier= */ 696/* possible values for -o barrier= */
670static const arg_desc_t barrier_mode[] = { 697static const arg_desc_t barrier_mode[] = {
671 {"none", 1<<REISERFS_BARRIER_NONE, 1<<REISERFS_BARRIER_FLUSH}, 698 {"none", 1 << REISERFS_BARRIER_NONE, 1 << REISERFS_BARRIER_FLUSH},
672 {"flush", 1<<REISERFS_BARRIER_FLUSH, 1<<REISERFS_BARRIER_NONE}, 699 {"flush", 1 << REISERFS_BARRIER_FLUSH, 1 << REISERFS_BARRIER_NONE},
673 {NULL, 0} 700 {NULL, 0}
674}; 701};
675 702
676/* possible values for "-o block-allocator=" and bits which are to be set in 703/* possible values for "-o block-allocator=" and bits which are to be set in
677 s_mount_opt of reiserfs specific part of in-core super block */ 704 s_mount_opt of reiserfs specific part of in-core super block */
678static const arg_desc_t balloc[] = { 705static const arg_desc_t balloc[] = {
679 {"noborder", 1<<REISERFS_NO_BORDER, 0}, 706 {"noborder", 1 << REISERFS_NO_BORDER, 0},
680 {"border", 0, 1<<REISERFS_NO_BORDER}, 707 {"border", 0, 1 << REISERFS_NO_BORDER},
681 {"no_unhashed_relocation", 1<<REISERFS_NO_UNHASHED_RELOCATION, 0}, 708 {"no_unhashed_relocation", 1 << REISERFS_NO_UNHASHED_RELOCATION, 0},
682 {"hashed_relocation", 1<<REISERFS_HASHED_RELOCATION, 0}, 709 {"hashed_relocation", 1 << REISERFS_HASHED_RELOCATION, 0},
683 {"test4", 1<<REISERFS_TEST4, 0}, 710 {"test4", 1 << REISERFS_TEST4, 0},
684 {"notest4", 0, 1<<REISERFS_TEST4}, 711 {"notest4", 0, 1 << REISERFS_TEST4},
685 {NULL, 0, 0} 712 {NULL, 0, 0}
686}; 713};
687 714
688static const arg_desc_t tails[] = { 715static const arg_desc_t tails[] = {
689 {"on", 1<<REISERFS_LARGETAIL, 1<<REISERFS_SMALLTAIL}, 716 {"on", 1 << REISERFS_LARGETAIL, 1 << REISERFS_SMALLTAIL},
690 {"off", 0, (1<<REISERFS_LARGETAIL)|(1<<REISERFS_SMALLTAIL)}, 717 {"off", 0, (1 << REISERFS_LARGETAIL) | (1 << REISERFS_SMALLTAIL)},
691 {"small", 1<<REISERFS_SMALLTAIL, 1<<REISERFS_LARGETAIL}, 718 {"small", 1 << REISERFS_SMALLTAIL, 1 << REISERFS_LARGETAIL},
692 {NULL, 0, 0} 719 {NULL, 0, 0}
693}; 720};
694 721
695static const arg_desc_t error_actions[] = { 722static const arg_desc_t error_actions[] = {
696 {"panic", 1 << REISERFS_ERROR_PANIC, 723 {"panic", 1 << REISERFS_ERROR_PANIC,
697 (1 << REISERFS_ERROR_RO | 1 << REISERFS_ERROR_CONTINUE)}, 724 (1 << REISERFS_ERROR_RO | 1 << REISERFS_ERROR_CONTINUE)},
698 {"ro-remount", 1 << REISERFS_ERROR_RO, 725 {"ro-remount", 1 << REISERFS_ERROR_RO,
699 (1 << REISERFS_ERROR_PANIC | 1 << REISERFS_ERROR_CONTINUE)}, 726 (1 << REISERFS_ERROR_PANIC | 1 << REISERFS_ERROR_CONTINUE)},
700#ifdef REISERFS_JOURNAL_ERROR_ALLOWS_NO_LOG 727#ifdef REISERFS_JOURNAL_ERROR_ALLOWS_NO_LOG
701 {"continue", 1 << REISERFS_ERROR_CONTINUE, 728 {"continue", 1 << REISERFS_ERROR_CONTINUE,
702 (1 << REISERFS_ERROR_PANIC | 1 << REISERFS_ERROR_RO)}, 729 (1 << REISERFS_ERROR_PANIC | 1 << REISERFS_ERROR_RO)},
703#endif 730#endif
704 {NULL, 0, 0}, 731 {NULL, 0, 0},
705}; 732};
706 733
707int reiserfs_default_io_size = 128 * 1024; /* Default recommended I/O size is 128k. 734int reiserfs_default_io_size = 128 * 1024; /* Default recommended I/O size is 128k.
708 There might be broken applications that are 735 There might be broken applications that are
709 confused by this. Use nolargeio mount option 736 confused by this. Use nolargeio mount option
710 to get usual i/o size = PAGE_SIZE. 737 to get usual i/o size = PAGE_SIZE.
711 */ 738 */
712 739
713/* proceed only one option from a list *cur - string containing of mount options 740/* proceed only one option from a list *cur - string containing of mount options
714 opts - array of options which are accepted 741 opts - array of options which are accepted
@@ -716,476 +743,530 @@ int reiserfs_default_io_size = 128 * 1024; /* Default recommended I/O size is 12
716 in the input - pointer to the argument is stored here 743 in the input - pointer to the argument is stored here
717 bit_flags - if option requires to set a certain bit - it is set here 744 bit_flags - if option requires to set a certain bit - it is set here
718 return -1 if unknown option is found, opt->arg_required otherwise */ 745 return -1 if unknown option is found, opt->arg_required otherwise */
719static int reiserfs_getopt ( struct super_block * s, char ** cur, opt_desc_t * opts, char ** opt_arg, 746static int reiserfs_getopt(struct super_block *s, char **cur, opt_desc_t * opts,
720 unsigned long * bit_flags) 747 char **opt_arg, unsigned long *bit_flags)
721{ 748{
722 char * p; 749 char *p;
723 /* foo=bar, 750 /* foo=bar,
724 ^ ^ ^ 751 ^ ^ ^
725 | | +-- option_end 752 | | +-- option_end
726 | +-- arg_start 753 | +-- arg_start
727 +-- option_start 754 +-- option_start
728 */ 755 */
729 const opt_desc_t * opt; 756 const opt_desc_t *opt;
730 const arg_desc_t * arg; 757 const arg_desc_t *arg;
731 758
732 759 p = *cur;
733 p = *cur; 760
734 761 /* assume argument cannot contain commas */
735 /* assume argument cannot contain commas */ 762 *cur = strchr(p, ',');
736 *cur = strchr (p, ','); 763 if (*cur) {
737 if (*cur) { 764 *(*cur) = '\0';
738 *(*cur) = '\0'; 765 (*cur)++;
739 (*cur) ++; 766 }
740 } 767
741 768 if (!strncmp(p, "alloc=", 6)) {
742 if ( !strncmp (p, "alloc=", 6) ) { 769 /* Ugly special case, probably we should redo options parser so that
743 /* Ugly special case, probably we should redo options parser so that 770 it can understand several arguments for some options, also so that
744 it can understand several arguments for some options, also so that 771 it can fill several bitfields with option values. */
745 it can fill several bitfields with option values. */ 772 if (reiserfs_parse_alloc_options(s, p + 6)) {
746 if ( reiserfs_parse_alloc_options( s, p + 6) ) { 773 return -1;
747 return -1; 774 } else {
748 } else { 775 return 0;
749 return 0; 776 }
750 } 777 }
751 } 778
752 779 /* for every option in the list */
753 780 for (opt = opts; opt->option_name; opt++) {
754 /* for every option in the list */ 781 if (!strncmp(p, opt->option_name, strlen(opt->option_name))) {
755 for (opt = opts; opt->option_name; opt ++) { 782 if (bit_flags) {
756 if (!strncmp (p, opt->option_name, strlen (opt->option_name))) { 783 if (opt->clrmask ==
757 if (bit_flags) { 784 (1 << REISERFS_UNSUPPORTED_OPT))
758 if (opt->clrmask == (1 << REISERFS_UNSUPPORTED_OPT)) 785 reiserfs_warning(s, "%s not supported.",
759 reiserfs_warning (s, "%s not supported.", p); 786 p);
760 else 787 else
761 *bit_flags &= ~opt->clrmask; 788 *bit_flags &= ~opt->clrmask;
762 if (opt->setmask == (1 << REISERFS_UNSUPPORTED_OPT)) 789 if (opt->setmask ==
763 reiserfs_warning (s, "%s not supported.", p); 790 (1 << REISERFS_UNSUPPORTED_OPT))
764 else 791 reiserfs_warning(s, "%s not supported.",
765 *bit_flags |= opt->setmask; 792 p);
766 } 793 else
767 break; 794 *bit_flags |= opt->setmask;
768 } 795 }
769 } 796 break;
770 if (!opt->option_name) { 797 }
771 reiserfs_warning (s, "unknown mount option \"%s\"", p); 798 }
772 return -1; 799 if (!opt->option_name) {
773 } 800 reiserfs_warning(s, "unknown mount option \"%s\"", p);
774 801 return -1;
775 p += strlen (opt->option_name); 802 }
776 switch (*p) { 803
777 case '=': 804 p += strlen(opt->option_name);
778 if (!opt->arg_required) { 805 switch (*p) {
779 reiserfs_warning (s, "the option \"%s\" does not require an argument", 806 case '=':
780 opt->option_name); 807 if (!opt->arg_required) {
781 return -1; 808 reiserfs_warning(s,
782 } 809 "the option \"%s\" does not require an argument",
783 break; 810 opt->option_name);
784 811 return -1;
785 case 0: 812 }
786 if (opt->arg_required) { 813 break;
787 reiserfs_warning (s, "the option \"%s\" requires an argument", opt->option_name); 814
788 return -1; 815 case 0:
789 } 816 if (opt->arg_required) {
790 break; 817 reiserfs_warning(s,
791 default: 818 "the option \"%s\" requires an argument",
792 reiserfs_warning (s, "head of option \"%s\" is only correct", opt->option_name); 819 opt->option_name);
793 return -1; 820 return -1;
794 } 821 }
795 822 break;
796 /* move to the argument, or to next option if argument is not required */ 823 default:
797 p ++; 824 reiserfs_warning(s, "head of option \"%s\" is only correct",
798 825 opt->option_name);
799 if ( opt->arg_required && !(opt->arg_required & (1<<REISERFS_OPT_ALLOWEMPTY)) && !strlen (p) ) { 826 return -1;
800 /* this catches "option=," if not allowed */ 827 }
801 reiserfs_warning (s, "empty argument for \"%s\"", opt->option_name); 828
829 /* move to the argument, or to next option if argument is not required */
830 p++;
831
832 if (opt->arg_required
833 && !(opt->arg_required & (1 << REISERFS_OPT_ALLOWEMPTY))
834 && !strlen(p)) {
835 /* this catches "option=," if not allowed */
836 reiserfs_warning(s, "empty argument for \"%s\"",
837 opt->option_name);
838 return -1;
839 }
840
841 if (!opt->values) {
842 /* *=NULLopt_arg contains pointer to argument */
843 *opt_arg = p;
844 return opt->arg_required & ~(1 << REISERFS_OPT_ALLOWEMPTY);
845 }
846
847 /* values possible for this option are listed in opt->values */
848 for (arg = opt->values; arg->value; arg++) {
849 if (!strcmp(p, arg->value)) {
850 if (bit_flags) {
851 *bit_flags &= ~arg->clrmask;
852 *bit_flags |= arg->setmask;
853 }
854 return opt->arg_required;
855 }
856 }
857
858 reiserfs_warning(s, "bad value \"%s\" for option \"%s\"", p,
859 opt->option_name);
802 return -1; 860 return -1;
803 }
804
805 if (!opt->values) {
806 /* *=NULLopt_arg contains pointer to argument */
807 *opt_arg = p;
808 return opt->arg_required & ~(1<<REISERFS_OPT_ALLOWEMPTY);
809 }
810
811 /* values possible for this option are listed in opt->values */
812 for (arg = opt->values; arg->value; arg ++) {
813 if (!strcmp (p, arg->value)) {
814 if (bit_flags) {
815 *bit_flags &= ~arg->clrmask;
816 *bit_flags |= arg->setmask;
817 }
818 return opt->arg_required;
819 }
820 }
821
822 reiserfs_warning (s, "bad value \"%s\" for option \"%s\"", p, opt->option_name);
823 return -1;
824} 861}
825 862
826/* returns 0 if something is wrong in option string, 1 - otherwise */ 863/* returns 0 if something is wrong in option string, 1 - otherwise */
827static int reiserfs_parse_options (struct super_block * s, char * options, /* string given via mount's -o */ 864static int reiserfs_parse_options(struct super_block *s, char *options, /* string given via mount's -o */
828 unsigned long * mount_options, 865 unsigned long *mount_options,
829 /* after the parsing phase, contains the 866 /* after the parsing phase, contains the
830 collection of bitflags defining what 867 collection of bitflags defining what
831 mount options were selected. */ 868 mount options were selected. */
832 unsigned long * blocks, /* strtol-ed from NNN of resize=NNN */ 869 unsigned long *blocks, /* strtol-ed from NNN of resize=NNN */
833 char ** jdev_name, 870 char **jdev_name,
834 unsigned int * commit_max_age) 871 unsigned int *commit_max_age)
835{ 872{
836 int c; 873 int c;
837 char * arg = NULL; 874 char *arg = NULL;
838 char * pos; 875 char *pos;
839 opt_desc_t opts[] = { 876 opt_desc_t opts[] = {
840 /* Compatibility stuff, so that -o notail for old setups still work */ 877 /* Compatibility stuff, so that -o notail for old setups still work */
841 {"tails", .arg_required = 't', .values = tails}, 878 {"tails",.arg_required = 't',.values = tails},
842 {"notail", .clrmask = (1<<REISERFS_LARGETAIL)|(1<<REISERFS_SMALLTAIL)}, 879 {"notail",.clrmask =
843 {"conv", .setmask = 1<<REISERFS_CONVERT}, 880 (1 << REISERFS_LARGETAIL) | (1 << REISERFS_SMALLTAIL)},
844 {"attrs", .setmask = 1<<REISERFS_ATTRS}, 881 {"conv",.setmask = 1 << REISERFS_CONVERT},
845 {"noattrs", .clrmask = 1<<REISERFS_ATTRS}, 882 {"attrs",.setmask = 1 << REISERFS_ATTRS},
883 {"noattrs",.clrmask = 1 << REISERFS_ATTRS},
846#ifdef CONFIG_REISERFS_FS_XATTR 884#ifdef CONFIG_REISERFS_FS_XATTR
847 {"user_xattr", .setmask = 1<<REISERFS_XATTRS_USER}, 885 {"user_xattr",.setmask = 1 << REISERFS_XATTRS_USER},
848 {"nouser_xattr",.clrmask = 1<<REISERFS_XATTRS_USER}, 886 {"nouser_xattr",.clrmask = 1 << REISERFS_XATTRS_USER},
849#else 887#else
850 {"user_xattr", .setmask = 1<<REISERFS_UNSUPPORTED_OPT}, 888 {"user_xattr",.setmask = 1 << REISERFS_UNSUPPORTED_OPT},
851 {"nouser_xattr",.clrmask = 1<<REISERFS_UNSUPPORTED_OPT}, 889 {"nouser_xattr",.clrmask = 1 << REISERFS_UNSUPPORTED_OPT},
852#endif 890#endif
853#ifdef CONFIG_REISERFS_FS_POSIX_ACL 891#ifdef CONFIG_REISERFS_FS_POSIX_ACL
854 {"acl", .setmask = 1<<REISERFS_POSIXACL}, 892 {"acl",.setmask = 1 << REISERFS_POSIXACL},
855 {"noacl", .clrmask = 1<<REISERFS_POSIXACL}, 893 {"noacl",.clrmask = 1 << REISERFS_POSIXACL},
856#else 894#else
857 {"acl", .setmask = 1<<REISERFS_UNSUPPORTED_OPT}, 895 {"acl",.setmask = 1 << REISERFS_UNSUPPORTED_OPT},
858 {"noacl", .clrmask = 1<<REISERFS_UNSUPPORTED_OPT}, 896 {"noacl",.clrmask = 1 << REISERFS_UNSUPPORTED_OPT},
859#endif 897#endif
860 {"nolog",}, /* This is unsupported */ 898 {"nolog",}, /* This is unsupported */
861 {"replayonly", .setmask = 1<<REPLAYONLY}, 899 {"replayonly",.setmask = 1 << REPLAYONLY},
862 {"block-allocator", .arg_required = 'a', .values = balloc}, 900 {"block-allocator",.arg_required = 'a',.values = balloc},
863 {"data", .arg_required = 'd', .values = logging_mode}, 901 {"data",.arg_required = 'd',.values = logging_mode},
864 {"barrier", .arg_required = 'b', .values = barrier_mode}, 902 {"barrier",.arg_required = 'b',.values = barrier_mode},
865 {"resize", .arg_required = 'r', .values = NULL}, 903 {"resize",.arg_required = 'r',.values = NULL},
866 {"jdev", .arg_required = 'j', .values = NULL}, 904 {"jdev",.arg_required = 'j',.values = NULL},
867 {"nolargeio", .arg_required = 'w', .values = NULL}, 905 {"nolargeio",.arg_required = 'w',.values = NULL},
868 {"commit", .arg_required = 'c', .values = NULL}, 906 {"commit",.arg_required = 'c',.values = NULL},
869 {"usrquota",}, 907 {"usrquota",.setmask = 1 << REISERFS_QUOTA},
870 {"grpquota",}, 908 {"grpquota",.setmask = 1 << REISERFS_QUOTA},
871 {"errors", .arg_required = 'e', .values = error_actions}, 909 {"noquota",.clrmask = 1 << REISERFS_QUOTA},
872 {"usrjquota", .arg_required = 'u'|(1<<REISERFS_OPT_ALLOWEMPTY), .values = NULL}, 910 {"errors",.arg_required = 'e',.values = error_actions},
873 {"grpjquota", .arg_required = 'g'|(1<<REISERFS_OPT_ALLOWEMPTY), .values = NULL}, 911 {"usrjquota",.arg_required =
874 {"jqfmt", .arg_required = 'f', .values = NULL}, 912 'u' | (1 << REISERFS_OPT_ALLOWEMPTY),.values = NULL},
875 {NULL,} 913 {"grpjquota",.arg_required =
876 }; 914 'g' | (1 << REISERFS_OPT_ALLOWEMPTY),.values = NULL},
877 915 {"jqfmt",.arg_required = 'f',.values = NULL},
878 *blocks = 0; 916 {NULL,}
879 if (!options || !*options) 917 };
880 /* use default configuration: create tails, journaling on, no 918
881 conversion to newest format */ 919 *blocks = 0;
882 return 1; 920 if (!options || !*options)
883 921 /* use default configuration: create tails, journaling on, no
884 for (pos = options; pos; ) { 922 conversion to newest format */
885 c = reiserfs_getopt (s, &pos, opts, &arg, mount_options); 923 return 1;
886 if (c == -1) 924
887 /* wrong option is given */ 925 for (pos = options; pos;) {
888 return 0; 926 c = reiserfs_getopt(s, &pos, opts, &arg, mount_options);
889 927 if (c == -1)
890 if (c == 'r') { 928 /* wrong option is given */
891 char * p;
892
893 p = NULL;
894 /* "resize=NNN" or "resize=auto" */
895
896 if (!strcmp(arg, "auto")) {
897 /* From JFS code, to auto-get the size.*/
898 *blocks = s->s_bdev->bd_inode->i_size >> s->s_blocksize_bits;
899 } else {
900 *blocks = simple_strtoul (arg, &p, 0);
901 if (*p != '\0') {
902 /* NNN does not look like a number */
903 reiserfs_warning (s, "reiserfs_parse_options: bad value %s", arg);
904 return 0; 929 return 0;
905 }
906 }
907 }
908 930
909 if ( c == 'c' ) { 931 if (c == 'r') {
910 char *p = NULL; 932 char *p;
911 unsigned long val = simple_strtoul (arg, &p, 0); 933
912 /* commit=NNN (time in seconds) */ 934 p = NULL;
913 if ( *p != '\0' || val >= (unsigned int)-1) { 935 /* "resize=NNN" or "resize=auto" */
914 reiserfs_warning (s, "reiserfs_parse_options: bad value %s", arg); 936
915 return 0; 937 if (!strcmp(arg, "auto")) {
938 /* From JFS code, to auto-get the size. */
939 *blocks =
940 s->s_bdev->bd_inode->i_size >> s->
941 s_blocksize_bits;
942 } else {
943 *blocks = simple_strtoul(arg, &p, 0);
944 if (*p != '\0') {
945 /* NNN does not look like a number */
946 reiserfs_warning(s,
947 "reiserfs_parse_options: bad value %s",
948 arg);
949 return 0;
950 }
951 }
916 } 952 }
917 *commit_max_age = (unsigned int)val;
918 }
919 953
920 if ( c == 'w' ) { 954 if (c == 'c') {
921 char *p=NULL; 955 char *p = NULL;
922 int val = simple_strtoul (arg, &p, 0); 956 unsigned long val = simple_strtoul(arg, &p, 0);
923 957 /* commit=NNN (time in seconds) */
924 if ( *p != '\0') { 958 if (*p != '\0' || val >= (unsigned int)-1) {
925 reiserfs_warning (s, "reiserfs_parse_options: non-numeric value %s for nolargeio option", arg); 959 reiserfs_warning(s,
926 return 0; 960 "reiserfs_parse_options: bad value %s",
961 arg);
962 return 0;
963 }
964 *commit_max_age = (unsigned int)val;
927 } 965 }
928 if ( val )
929 reiserfs_default_io_size = PAGE_SIZE;
930 else
931 reiserfs_default_io_size = 128 * 1024;
932 }
933 966
934 if (c == 'j') { 967 if (c == 'w') {
935 if (arg && *arg && jdev_name) { 968 char *p = NULL;
936 if ( *jdev_name ) { //Hm, already assigned? 969 int val = simple_strtoul(arg, &p, 0);
937 reiserfs_warning (s, "reiserfs_parse_options: journal device was already specified to be %s", *jdev_name); 970
938 return 0; 971 if (*p != '\0') {
972 reiserfs_warning(s,
973 "reiserfs_parse_options: non-numeric value %s for nolargeio option",
974 arg);
975 return 0;
976 }
977 if (val)
978 reiserfs_default_io_size = PAGE_SIZE;
979 else
980 reiserfs_default_io_size = 128 * 1024;
939 } 981 }
940 *jdev_name = arg;
941 }
942 }
943 982
944#ifdef CONFIG_QUOTA 983 if (c == 'j') {
945 if (c == 'u' || c == 'g') { 984 if (arg && *arg && jdev_name) {
946 int qtype = c == 'u' ? USRQUOTA : GRPQUOTA; 985 if (*jdev_name) { //Hm, already assigned?
947 986 reiserfs_warning(s,
948 if (sb_any_quota_enabled(s)) { 987 "reiserfs_parse_options: journal device was already specified to be %s",
949 reiserfs_warning(s, "reiserfs_parse_options: cannot change journalled quota options when quota turned on."); 988 *jdev_name);
950 return 0; 989 return 0;
951 } 990 }
952 if (*arg) { /* Some filename specified? */ 991 *jdev_name = arg;
953 if (REISERFS_SB(s)->s_qf_names[qtype] && strcmp(REISERFS_SB(s)->s_qf_names[qtype], arg)) { 992 }
954 reiserfs_warning(s, "reiserfs_parse_options: %s quota file already specified.", QTYPE2NAME(qtype));
955 return 0;
956 } 993 }
957 if (strchr(arg, '/')) { 994#ifdef CONFIG_QUOTA
958 reiserfs_warning(s, "reiserfs_parse_options: quotafile must be on filesystem root."); 995 if (c == 'u' || c == 'g') {
959 return 0; 996 int qtype = c == 'u' ? USRQUOTA : GRPQUOTA;
997
998 if (sb_any_quota_enabled(s)) {
999 reiserfs_warning(s,
1000 "reiserfs_parse_options: cannot change journalled quota options when quota turned on.");
1001 return 0;
1002 }
1003 if (*arg) { /* Some filename specified? */
1004 if (REISERFS_SB(s)->s_qf_names[qtype]
1005 && strcmp(REISERFS_SB(s)->s_qf_names[qtype],
1006 arg)) {
1007 reiserfs_warning(s,
1008 "reiserfs_parse_options: %s quota file already specified.",
1009 QTYPE2NAME(qtype));
1010 return 0;
1011 }
1012 if (strchr(arg, '/')) {
1013 reiserfs_warning(s,
1014 "reiserfs_parse_options: quotafile must be on filesystem root.");
1015 return 0;
1016 }
1017 REISERFS_SB(s)->s_qf_names[qtype] =
1018 kmalloc(strlen(arg) + 1, GFP_KERNEL);
1019 if (!REISERFS_SB(s)->s_qf_names[qtype]) {
1020 reiserfs_warning(s,
1021 "reiserfs_parse_options: not enough memory for storing quotafile name.");
1022 return 0;
1023 }
1024 strcpy(REISERFS_SB(s)->s_qf_names[qtype], arg);
1025 *mount_options |= 1 << REISERFS_QUOTA;
1026 } else {
1027 if (REISERFS_SB(s)->s_qf_names[qtype]) {
1028 kfree(REISERFS_SB(s)->
1029 s_qf_names[qtype]);
1030 REISERFS_SB(s)->s_qf_names[qtype] =
1031 NULL;
1032 }
1033 }
960 } 1034 }
961 REISERFS_SB(s)->s_qf_names[qtype] = kmalloc(strlen(arg)+1, GFP_KERNEL); 1035 if (c == 'f') {
962 if (!REISERFS_SB(s)->s_qf_names[qtype]) { 1036 if (!strcmp(arg, "vfsold"))
963 reiserfs_warning(s, "reiserfs_parse_options: not enough memory for storing quotafile name."); 1037 REISERFS_SB(s)->s_jquota_fmt = QFMT_VFS_OLD;
964 return 0; 1038 else if (!strcmp(arg, "vfsv0"))
1039 REISERFS_SB(s)->s_jquota_fmt = QFMT_VFS_V0;
1040 else {
1041 reiserfs_warning(s,
1042 "reiserfs_parse_options: unknown quota format specified.");
1043 return 0;
1044 }
965 } 1045 }
966 strcpy(REISERFS_SB(s)->s_qf_names[qtype], arg); 1046#else
967 } 1047 if (c == 'u' || c == 'g' || c == 'f') {
968 else { 1048 reiserfs_warning(s,
969 if (REISERFS_SB(s)->s_qf_names[qtype]) { 1049 "reiserfs_parse_options: journalled quota options not supported.");
970 kfree(REISERFS_SB(s)->s_qf_names[qtype]); 1050 return 0;
971 REISERFS_SB(s)->s_qf_names[qtype] = NULL;
972 } 1051 }
973 } 1052#endif
974 } 1053 }
975 if (c == 'f') { 1054
976 if (!strcmp(arg, "vfsold")) 1055#ifdef CONFIG_QUOTA
977 REISERFS_SB(s)->s_jquota_fmt = QFMT_VFS_OLD; 1056 if (!REISERFS_SB(s)->s_jquota_fmt
978 else if (!strcmp(arg, "vfsv0")) 1057 && (REISERFS_SB(s)->s_qf_names[USRQUOTA]
979 REISERFS_SB(s)->s_jquota_fmt = QFMT_VFS_V0; 1058 || REISERFS_SB(s)->s_qf_names[GRPQUOTA])) {
980 else { 1059 reiserfs_warning(s,
981 reiserfs_warning(s, "reiserfs_parse_options: unknown quota format specified."); 1060 "reiserfs_parse_options: journalled quota format not specified.");
982 return 0; 1061 return 0;
983 }
984 } 1062 }
985#else 1063 /* This checking is not precise wrt the quota type but for our purposes it is sufficient */
986 if (c == 'u' || c == 'g' || c == 'f') { 1064 if (!(*mount_options & (1 << REISERFS_QUOTA))
987 reiserfs_warning(s, "reiserfs_parse_options: journalled quota options not supported."); 1065 && sb_any_quota_enabled(s)) {
988 return 0; 1066 reiserfs_warning(s,
1067 "reiserfs_parse_options: quota options must be present when quota is turned on.");
1068 return 0;
989 } 1069 }
990#endif 1070#endif
991 } 1071
992 1072 return 1;
993#ifdef CONFIG_QUOTA
994 if (!REISERFS_SB(s)->s_jquota_fmt && (REISERFS_SB(s)->s_qf_names[USRQUOTA] || REISERFS_SB(s)->s_qf_names[GRPQUOTA])) {
995 reiserfs_warning(s, "reiserfs_parse_options: journalled quota format not specified.");
996 return 0;
997 }
998#endif
999 return 1;
1000} 1073}
1001 1074
1002static void switch_data_mode(struct super_block *s, unsigned long mode) { 1075static void switch_data_mode(struct super_block *s, unsigned long mode)
1003 REISERFS_SB(s)->s_mount_opt &= ~((1 << REISERFS_DATA_LOG) | 1076{
1004 (1 << REISERFS_DATA_ORDERED) | 1077 REISERFS_SB(s)->s_mount_opt &= ~((1 << REISERFS_DATA_LOG) |
1005 (1 << REISERFS_DATA_WRITEBACK)); 1078 (1 << REISERFS_DATA_ORDERED) |
1006 REISERFS_SB(s)->s_mount_opt |= (1 << mode); 1079 (1 << REISERFS_DATA_WRITEBACK));
1080 REISERFS_SB(s)->s_mount_opt |= (1 << mode);
1007} 1081}
1008 1082
1009static void handle_data_mode(struct super_block *s, unsigned long mount_options) 1083static void handle_data_mode(struct super_block *s, unsigned long mount_options)
1010{ 1084{
1011 if (mount_options & (1 << REISERFS_DATA_LOG)) { 1085 if (mount_options & (1 << REISERFS_DATA_LOG)) {
1012 if (!reiserfs_data_log(s)) { 1086 if (!reiserfs_data_log(s)) {
1013 switch_data_mode(s, REISERFS_DATA_LOG); 1087 switch_data_mode(s, REISERFS_DATA_LOG);
1014 reiserfs_info (s, "switching to journaled data mode\n"); 1088 reiserfs_info(s, "switching to journaled data mode\n");
1015 } 1089 }
1016 } else if (mount_options & (1 << REISERFS_DATA_ORDERED)) { 1090 } else if (mount_options & (1 << REISERFS_DATA_ORDERED)) {
1017 if (!reiserfs_data_ordered(s)) { 1091 if (!reiserfs_data_ordered(s)) {
1018 switch_data_mode(s, REISERFS_DATA_ORDERED); 1092 switch_data_mode(s, REISERFS_DATA_ORDERED);
1019 reiserfs_info (s, "switching to ordered data mode\n"); 1093 reiserfs_info(s, "switching to ordered data mode\n");
1020 } 1094 }
1021 } else if (mount_options & (1 << REISERFS_DATA_WRITEBACK)) { 1095 } else if (mount_options & (1 << REISERFS_DATA_WRITEBACK)) {
1022 if (!reiserfs_data_writeback(s)) { 1096 if (!reiserfs_data_writeback(s)) {
1023 switch_data_mode(s, REISERFS_DATA_WRITEBACK); 1097 switch_data_mode(s, REISERFS_DATA_WRITEBACK);
1024 reiserfs_info (s, "switching to writeback data mode\n"); 1098 reiserfs_info(s, "switching to writeback data mode\n");
1025 } 1099 }
1026 } 1100 }
1027} 1101}
1028 1102
1029static void handle_barrier_mode(struct super_block *s, unsigned long bits) { 1103static void handle_barrier_mode(struct super_block *s, unsigned long bits)
1030 int flush = (1 << REISERFS_BARRIER_FLUSH); 1104{
1031 int none = (1 << REISERFS_BARRIER_NONE); 1105 int flush = (1 << REISERFS_BARRIER_FLUSH);
1032 int all_barrier = flush | none; 1106 int none = (1 << REISERFS_BARRIER_NONE);
1033 1107 int all_barrier = flush | none;
1034 if (bits & all_barrier) { 1108
1035 REISERFS_SB(s)->s_mount_opt &= ~all_barrier; 1109 if (bits & all_barrier) {
1036 if (bits & flush) { 1110 REISERFS_SB(s)->s_mount_opt &= ~all_barrier;
1037 REISERFS_SB(s)->s_mount_opt |= flush; 1111 if (bits & flush) {
1038 printk("reiserfs: enabling write barrier flush mode\n"); 1112 REISERFS_SB(s)->s_mount_opt |= flush;
1039 } else if (bits & none) { 1113 printk("reiserfs: enabling write barrier flush mode\n");
1040 REISERFS_SB(s)->s_mount_opt |= none; 1114 } else if (bits & none) {
1041 printk("reiserfs: write barriers turned off\n"); 1115 REISERFS_SB(s)->s_mount_opt |= none;
1042 } 1116 printk("reiserfs: write barriers turned off\n");
1043 } 1117 }
1118 }
1044} 1119}
1045 1120
1046static void handle_attrs( struct super_block *s ) 1121static void handle_attrs(struct super_block *s)
1047{ 1122{
1048 struct reiserfs_super_block * rs; 1123 struct reiserfs_super_block *rs = SB_DISK_SUPER_BLOCK(s);
1049 1124
1050 if( reiserfs_attrs( s ) ) { 1125 if (reiserfs_attrs(s)) {
1051 rs = SB_DISK_SUPER_BLOCK (s); 1126 if (old_format_only(s)) {
1052 if( old_format_only(s) ) { 1127 reiserfs_warning(s,
1053 reiserfs_warning(s, "reiserfs: cannot support attributes on 3.5.x disk format" ); 1128 "reiserfs: cannot support attributes on 3.5.x disk format");
1054 REISERFS_SB(s) -> s_mount_opt &= ~ ( 1 << REISERFS_ATTRS ); 1129 REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_ATTRS);
1055 return; 1130 return;
1056 } 1131 }
1057 if( !( le32_to_cpu( rs -> s_flags ) & reiserfs_attrs_cleared ) ) { 1132 if (!(le32_to_cpu(rs->s_flags) & reiserfs_attrs_cleared)) {
1058 reiserfs_warning(s, "reiserfs: cannot support attributes until flag is set in super-block" ); 1133 reiserfs_warning(s,
1059 REISERFS_SB(s) -> s_mount_opt &= ~ ( 1 << REISERFS_ATTRS ); 1134 "reiserfs: cannot support attributes until flag is set in super-block");
1135 REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_ATTRS);
1060 } 1136 }
1137 } else if (le32_to_cpu(rs->s_flags) & reiserfs_attrs_cleared) {
1138 REISERFS_SB(s)->s_mount_opt |= REISERFS_ATTRS;
1061 } 1139 }
1062} 1140}
1063 1141
1064static int reiserfs_remount (struct super_block * s, int * mount_flags, char * arg) 1142static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1065{ 1143{
1066 struct reiserfs_super_block * rs; 1144 struct reiserfs_super_block *rs;
1067 struct reiserfs_transaction_handle th ; 1145 struct reiserfs_transaction_handle th;
1068 unsigned long blocks; 1146 unsigned long blocks;
1069 unsigned long mount_options = REISERFS_SB(s)->s_mount_opt; 1147 unsigned long mount_options = REISERFS_SB(s)->s_mount_opt;
1070 unsigned long safe_mask = 0; 1148 unsigned long safe_mask = 0;
1071 unsigned int commit_max_age = (unsigned int)-1; 1149 unsigned int commit_max_age = (unsigned int)-1;
1072 struct reiserfs_journal *journal = SB_JOURNAL(s); 1150 struct reiserfs_journal *journal = SB_JOURNAL(s);
1073 int err; 1151 int err;
1074#ifdef CONFIG_QUOTA 1152#ifdef CONFIG_QUOTA
1075 int i; 1153 int i;
1076#endif 1154#endif
1077 1155
1078 rs = SB_DISK_SUPER_BLOCK (s); 1156 rs = SB_DISK_SUPER_BLOCK(s);
1079 1157
1080 if (!reiserfs_parse_options(s, arg, &mount_options, &blocks, NULL, &commit_max_age)) { 1158 if (!reiserfs_parse_options
1159 (s, arg, &mount_options, &blocks, NULL, &commit_max_age)) {
1081#ifdef CONFIG_QUOTA 1160#ifdef CONFIG_QUOTA
1082 for (i = 0; i < MAXQUOTAS; i++) 1161 for (i = 0; i < MAXQUOTAS; i++)
1083 if (REISERFS_SB(s)->s_qf_names[i]) { 1162 if (REISERFS_SB(s)->s_qf_names[i]) {
1084 kfree(REISERFS_SB(s)->s_qf_names[i]); 1163 kfree(REISERFS_SB(s)->s_qf_names[i]);
1085 REISERFS_SB(s)->s_qf_names[i] = NULL; 1164 REISERFS_SB(s)->s_qf_names[i] = NULL;
1086 } 1165 }
1087#endif 1166#endif
1088 return -EINVAL; 1167 return -EINVAL;
1089 } 1168 }
1090 1169
1091 handle_attrs(s); 1170 handle_attrs(s);
1092 1171
1093 /* Add options that are safe here */ 1172 /* Add options that are safe here */
1094 safe_mask |= 1 << REISERFS_SMALLTAIL; 1173 safe_mask |= 1 << REISERFS_SMALLTAIL;
1095 safe_mask |= 1 << REISERFS_LARGETAIL; 1174 safe_mask |= 1 << REISERFS_LARGETAIL;
1096 safe_mask |= 1 << REISERFS_NO_BORDER; 1175 safe_mask |= 1 << REISERFS_NO_BORDER;
1097 safe_mask |= 1 << REISERFS_NO_UNHASHED_RELOCATION; 1176 safe_mask |= 1 << REISERFS_NO_UNHASHED_RELOCATION;
1098 safe_mask |= 1 << REISERFS_HASHED_RELOCATION; 1177 safe_mask |= 1 << REISERFS_HASHED_RELOCATION;
1099 safe_mask |= 1 << REISERFS_TEST4; 1178 safe_mask |= 1 << REISERFS_TEST4;
1100 safe_mask |= 1 << REISERFS_ATTRS; 1179 safe_mask |= 1 << REISERFS_ATTRS;
1101 safe_mask |= 1 << REISERFS_XATTRS_USER; 1180 safe_mask |= 1 << REISERFS_XATTRS_USER;
1102 safe_mask |= 1 << REISERFS_POSIXACL; 1181 safe_mask |= 1 << REISERFS_POSIXACL;
1103 safe_mask |= 1 << REISERFS_BARRIER_FLUSH; 1182 safe_mask |= 1 << REISERFS_BARRIER_FLUSH;
1104 safe_mask |= 1 << REISERFS_BARRIER_NONE; 1183 safe_mask |= 1 << REISERFS_BARRIER_NONE;
1105 safe_mask |= 1 << REISERFS_ERROR_RO; 1184 safe_mask |= 1 << REISERFS_ERROR_RO;
1106 safe_mask |= 1 << REISERFS_ERROR_CONTINUE; 1185 safe_mask |= 1 << REISERFS_ERROR_CONTINUE;
1107 safe_mask |= 1 << REISERFS_ERROR_PANIC; 1186 safe_mask |= 1 << REISERFS_ERROR_PANIC;
1108 1187 safe_mask |= 1 << REISERFS_QUOTA;
1109 /* Update the bitmask, taking care to keep 1188
1110 * the bits we're not allowed to change here */ 1189 /* Update the bitmask, taking care to keep
1111 REISERFS_SB(s)->s_mount_opt = (REISERFS_SB(s)->s_mount_opt & ~safe_mask) | (mount_options & safe_mask); 1190 * the bits we're not allowed to change here */
1112 1191 REISERFS_SB(s)->s_mount_opt =
1113 if(commit_max_age != 0 && commit_max_age != (unsigned int)-1) { 1192 (REISERFS_SB(s)->
1114 journal->j_max_commit_age = commit_max_age; 1193 s_mount_opt & ~safe_mask) | (mount_options & safe_mask);
1115 journal->j_max_trans_age = commit_max_age; 1194
1116 } 1195 if (commit_max_age != 0 && commit_max_age != (unsigned int)-1) {
1117 else if(commit_max_age == 0) 1196 journal->j_max_commit_age = commit_max_age;
1118 { 1197 journal->j_max_trans_age = commit_max_age;
1119 /* 0 means restore defaults. */ 1198 } else if (commit_max_age == 0) {
1120 journal->j_max_commit_age = journal->j_default_max_commit_age; 1199 /* 0 means restore defaults. */
1121 journal->j_max_trans_age = JOURNAL_MAX_TRANS_AGE; 1200 journal->j_max_commit_age = journal->j_default_max_commit_age;
1122 } 1201 journal->j_max_trans_age = JOURNAL_MAX_TRANS_AGE;
1123 1202 }
1124 if(blocks) { 1203
1125 int rc = reiserfs_resize(s, blocks); 1204 if (blocks) {
1126 if (rc != 0) 1205 int rc = reiserfs_resize(s, blocks);
1127 return rc; 1206 if (rc != 0)
1128 } 1207 return rc;
1129 1208 }
1130 if (*mount_flags & MS_RDONLY) { 1209
1131 reiserfs_xattr_init (s, *mount_flags); 1210 if (*mount_flags & MS_RDONLY) {
1132 /* remount read-only */ 1211 reiserfs_xattr_init(s, *mount_flags);
1133 if (s->s_flags & MS_RDONLY) 1212 /* remount read-only */
1134 /* it is read-only already */ 1213 if (s->s_flags & MS_RDONLY)
1135 return 0; 1214 /* it is read-only already */
1136 /* try to remount file system with read-only permissions */ 1215 return 0;
1137 if (sb_umount_state(rs) == REISERFS_VALID_FS || REISERFS_SB(s)->s_mount_state != REISERFS_VALID_FS) { 1216 /* try to remount file system with read-only permissions */
1138 return 0; 1217 if (sb_umount_state(rs) == REISERFS_VALID_FS
1139 } 1218 || REISERFS_SB(s)->s_mount_state != REISERFS_VALID_FS) {
1140 1219 return 0;
1141 err = journal_begin(&th, s, 10) ; 1220 }
1142 if (err) 1221
1143 return err; 1222 err = journal_begin(&th, s, 10);
1144 1223 if (err)
1145 /* Mounting a rw partition read-only. */ 1224 return err;
1146 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1) ; 1225
1147 set_sb_umount_state( rs, REISERFS_SB(s)->s_mount_state ); 1226 /* Mounting a rw partition read-only. */
1148 journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB (s)); 1227 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);
1149 } else { 1228 set_sb_umount_state(rs, REISERFS_SB(s)->s_mount_state);
1150 /* remount read-write */ 1229 journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s));
1151 if (!(s->s_flags & MS_RDONLY)) { 1230 } else {
1152 reiserfs_xattr_init (s, *mount_flags); 1231 /* remount read-write */
1153 return 0; /* We are read-write already */ 1232 if (!(s->s_flags & MS_RDONLY)) {
1154 } 1233 reiserfs_xattr_init(s, *mount_flags);
1155 1234 return 0; /* We are read-write already */
1156 if (reiserfs_is_journal_aborted (journal)) 1235 }
1157 return journal->j_errno; 1236
1158 1237 if (reiserfs_is_journal_aborted(journal))
1159 handle_data_mode(s, mount_options); 1238 return journal->j_errno;
1160 handle_barrier_mode(s, mount_options); 1239
1161 REISERFS_SB(s)->s_mount_state = sb_umount_state(rs) ; 1240 handle_data_mode(s, mount_options);
1162 s->s_flags &= ~MS_RDONLY ; /* now it is safe to call journal_begin */ 1241 handle_barrier_mode(s, mount_options);
1163 err = journal_begin(&th, s, 10) ; 1242 REISERFS_SB(s)->s_mount_state = sb_umount_state(rs);
1164 if (err) 1243 s->s_flags &= ~MS_RDONLY; /* now it is safe to call journal_begin */
1165 return err; 1244 err = journal_begin(&th, s, 10);
1166 1245 if (err)
1167 /* Mount a partition which is read-only, read-write */ 1246 return err;
1168 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1) ; 1247
1169 REISERFS_SB(s)->s_mount_state = sb_umount_state(rs); 1248 /* Mount a partition which is read-only, read-write */
1170 s->s_flags &= ~MS_RDONLY; 1249 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);
1171 set_sb_umount_state( rs, REISERFS_ERROR_FS ); 1250 REISERFS_SB(s)->s_mount_state = sb_umount_state(rs);
1172 /* mark_buffer_dirty (SB_BUFFER_WITH_SB (s), 1); */ 1251 s->s_flags &= ~MS_RDONLY;
1173 journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB (s)); 1252 set_sb_umount_state(rs, REISERFS_ERROR_FS);
1174 REISERFS_SB(s)->s_mount_state = REISERFS_VALID_FS ; 1253 /* mark_buffer_dirty (SB_BUFFER_WITH_SB (s), 1); */
1175 } 1254 journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s));
1176 /* this will force a full flush of all journal lists */ 1255 REISERFS_SB(s)->s_mount_state = REISERFS_VALID_FS;
1177 SB_JOURNAL(s)->j_must_wait = 1 ; 1256 }
1178 err = journal_end(&th, s, 10) ; 1257 /* this will force a full flush of all journal lists */
1179 if (err) 1258 SB_JOURNAL(s)->j_must_wait = 1;
1180 return err; 1259 err = journal_end(&th, s, 10);
1181 s->s_dirt = 0; 1260 if (err)
1182 1261 return err;
1183 if (!( *mount_flags & MS_RDONLY ) ) { 1262 s->s_dirt = 0;
1184 finish_unfinished( s ); 1263
1185 reiserfs_xattr_init (s, *mount_flags); 1264 if (!(*mount_flags & MS_RDONLY)) {
1186 } 1265 finish_unfinished(s);
1187 1266 reiserfs_xattr_init(s, *mount_flags);
1188 return 0; 1267 }
1268
1269 return 0;
1189} 1270}
1190 1271
1191/* load_bitmap_info_data - Sets up the reiserfs_bitmap_info structure from disk. 1272/* load_bitmap_info_data - Sets up the reiserfs_bitmap_info structure from disk.
@@ -1204,791 +1285,879 @@ static int reiserfs_remount (struct super_block * s, int * mount_flags, char * a
1204 * free blocks at all. 1285 * free blocks at all.
1205 */ 1286 */
1206 1287
1207static void load_bitmap_info_data (struct super_block *sb, 1288static void load_bitmap_info_data(struct super_block *sb,
1208 struct reiserfs_bitmap_info *bi) 1289 struct reiserfs_bitmap_info *bi)
1209{ 1290{
1210 unsigned long *cur = (unsigned long *)bi->bh->b_data; 1291 unsigned long *cur = (unsigned long *)bi->bh->b_data;
1211 1292
1212 while ((char *)cur < (bi->bh->b_data + sb->s_blocksize)) { 1293 while ((char *)cur < (bi->bh->b_data + sb->s_blocksize)) {
1213 1294
1214 /* No need to scan if all 0's or all 1's. 1295 /* No need to scan if all 0's or all 1's.
1215 * Since we're only counting 0's, we can simply ignore all 1's */ 1296 * Since we're only counting 0's, we can simply ignore all 1's */
1216 if (*cur == 0) { 1297 if (*cur == 0) {
1217 if (bi->first_zero_hint == 0) { 1298 if (bi->first_zero_hint == 0) {
1218 bi->first_zero_hint = ((char *)cur - bi->bh->b_data) << 3; 1299 bi->first_zero_hint =
1219 } 1300 ((char *)cur - bi->bh->b_data) << 3;
1220 bi->free_count += sizeof(unsigned long)*8; 1301 }
1221 } else if (*cur != ~0L) { 1302 bi->free_count += sizeof(unsigned long) * 8;
1222 int b; 1303 } else if (*cur != ~0L) {
1223 for (b = 0; b < sizeof(unsigned long)*8; b++) { 1304 int b;
1224 if (!reiserfs_test_le_bit (b, cur)) { 1305 for (b = 0; b < sizeof(unsigned long) * 8; b++) {
1225 bi->free_count ++; 1306 if (!reiserfs_test_le_bit(b, cur)) {
1226 if (bi->first_zero_hint == 0) 1307 bi->free_count++;
1227 bi->first_zero_hint = 1308 if (bi->first_zero_hint == 0)
1228 (((char *)cur - bi->bh->b_data) << 3) + b; 1309 bi->first_zero_hint =
1229 } 1310 (((char *)cur -
1311 bi->bh->b_data) << 3) + b;
1312 }
1313 }
1230 } 1314 }
1231 } 1315 cur++;
1232 cur ++; 1316 }
1233 }
1234 1317
1235#ifdef CONFIG_REISERFS_CHECK 1318#ifdef CONFIG_REISERFS_CHECK
1236// This outputs a lot of unneded info on big FSes 1319// This outputs a lot of unneded info on big FSes
1237// reiserfs_warning ("bitmap loaded from block %d: %d free blocks", 1320// reiserfs_warning ("bitmap loaded from block %d: %d free blocks",
1238// bi->bh->b_blocknr, bi->free_count); 1321// bi->bh->b_blocknr, bi->free_count);
1239#endif 1322#endif
1240} 1323}
1241 1324
1242static int read_bitmaps (struct super_block * s) 1325static int read_bitmaps(struct super_block *s)
1243{ 1326{
1244 int i, bmap_nr; 1327 int i, bmap_nr;
1328
1329 SB_AP_BITMAP(s) =
1330 vmalloc(sizeof(struct reiserfs_bitmap_info) * SB_BMAP_NR(s));
1331 if (SB_AP_BITMAP(s) == 0)
1332 return 1;
1333 memset(SB_AP_BITMAP(s), 0,
1334 sizeof(struct reiserfs_bitmap_info) * SB_BMAP_NR(s));
1335 for (i = 0, bmap_nr =
1336 REISERFS_DISK_OFFSET_IN_BYTES / s->s_blocksize + 1;
1337 i < SB_BMAP_NR(s); i++, bmap_nr = s->s_blocksize * 8 * i) {
1338 SB_AP_BITMAP(s)[i].bh = sb_getblk(s, bmap_nr);
1339 if (!buffer_uptodate(SB_AP_BITMAP(s)[i].bh))
1340 ll_rw_block(READ, 1, &SB_AP_BITMAP(s)[i].bh);
1341 }
1342 for (i = 0; i < SB_BMAP_NR(s); i++) {
1343 wait_on_buffer(SB_AP_BITMAP(s)[i].bh);
1344 if (!buffer_uptodate(SB_AP_BITMAP(s)[i].bh)) {
1345 reiserfs_warning(s, "sh-2029: reiserfs read_bitmaps: "
1346 "bitmap block (#%lu) reading failed",
1347 SB_AP_BITMAP(s)[i].bh->b_blocknr);
1348 for (i = 0; i < SB_BMAP_NR(s); i++)
1349 brelse(SB_AP_BITMAP(s)[i].bh);
1350 vfree(SB_AP_BITMAP(s));
1351 SB_AP_BITMAP(s) = NULL;
1352 return 1;
1353 }
1354 load_bitmap_info_data(s, SB_AP_BITMAP(s) + i);
1355 }
1356 return 0;
1357}
1245 1358
1246 SB_AP_BITMAP (s) = vmalloc (sizeof (struct reiserfs_bitmap_info) * SB_BMAP_NR(s)); 1359static int read_old_bitmaps(struct super_block *s)
1247 if (SB_AP_BITMAP (s) == 0) 1360{
1248 return 1; 1361 int i;
1249 memset (SB_AP_BITMAP (s), 0, sizeof (struct reiserfs_bitmap_info) * SB_BMAP_NR(s)); 1362 struct reiserfs_super_block *rs = SB_DISK_SUPER_BLOCK(s);
1250 for (i = 0, bmap_nr = REISERFS_DISK_OFFSET_IN_BYTES / s->s_blocksize + 1; 1363 int bmp1 = (REISERFS_OLD_DISK_OFFSET_IN_BYTES / s->s_blocksize) + 1; /* first of bitmap blocks */
1251 i < SB_BMAP_NR(s); i++, bmap_nr = s->s_blocksize * 8 * i) { 1364
1252 SB_AP_BITMAP (s)[i].bh = sb_getblk(s, bmap_nr); 1365 /* read true bitmap */
1253 if (!buffer_uptodate(SB_AP_BITMAP(s)[i].bh)) 1366 SB_AP_BITMAP(s) =
1254 ll_rw_block(READ, 1, &SB_AP_BITMAP(s)[i].bh); 1367 vmalloc(sizeof(struct reiserfs_buffer_info *) * sb_bmap_nr(rs));
1255 } 1368 if (SB_AP_BITMAP(s) == 0)
1256 for (i = 0; i < SB_BMAP_NR(s); i++) { 1369 return 1;
1257 wait_on_buffer(SB_AP_BITMAP (s)[i].bh); 1370
1258 if (!buffer_uptodate(SB_AP_BITMAP(s)[i].bh)) { 1371 memset(SB_AP_BITMAP(s), 0,
1259 reiserfs_warning(s,"sh-2029: reiserfs read_bitmaps: " 1372 sizeof(struct reiserfs_buffer_info *) * sb_bmap_nr(rs));
1260 "bitmap block (#%lu) reading failed", 1373
1261 SB_AP_BITMAP(s)[i].bh->b_blocknr); 1374 for (i = 0; i < sb_bmap_nr(rs); i++) {
1262 for (i = 0; i < SB_BMAP_NR(s); i++) 1375 SB_AP_BITMAP(s)[i].bh = sb_bread(s, bmp1 + i);
1263 brelse(SB_AP_BITMAP(s)[i].bh); 1376 if (!SB_AP_BITMAP(s)[i].bh)
1264 vfree(SB_AP_BITMAP(s)); 1377 return 1;
1265 SB_AP_BITMAP(s) = NULL; 1378 load_bitmap_info_data(s, SB_AP_BITMAP(s) + i);
1266 return 1;
1267 } 1379 }
1268 load_bitmap_info_data (s, SB_AP_BITMAP (s) + i); 1380
1269 } 1381 return 0;
1270 return 0;
1271} 1382}
1272 1383
1273static int read_old_bitmaps (struct super_block * s) 1384static int read_super_block(struct super_block *s, int offset)
1274{ 1385{
1275 int i ; 1386 struct buffer_head *bh;
1276 struct reiserfs_super_block * rs = SB_DISK_SUPER_BLOCK(s); 1387 struct reiserfs_super_block *rs;
1277 int bmp1 = (REISERFS_OLD_DISK_OFFSET_IN_BYTES / s->s_blocksize) + 1; /* first of bitmap blocks */ 1388 int fs_blocksize;
1278 1389
1279 /* read true bitmap */ 1390 bh = sb_bread(s, offset / s->s_blocksize);
1280 SB_AP_BITMAP (s) = vmalloc (sizeof (struct reiserfs_buffer_info *) * sb_bmap_nr(rs)); 1391 if (!bh) {
1281 if (SB_AP_BITMAP (s) == 0) 1392 reiserfs_warning(s, "sh-2006: read_super_block: "
1282 return 1; 1393 "bread failed (dev %s, block %lu, size %lu)",
1394 reiserfs_bdevname(s), offset / s->s_blocksize,
1395 s->s_blocksize);
1396 return 1;
1397 }
1283 1398
1284 memset (SB_AP_BITMAP (s), 0, sizeof (struct reiserfs_buffer_info *) * sb_bmap_nr(rs)); 1399 rs = (struct reiserfs_super_block *)bh->b_data;
1400 if (!is_any_reiserfs_magic_string(rs)) {
1401 brelse(bh);
1402 return 1;
1403 }
1404 //
1405 // ok, reiserfs signature (old or new) found in at the given offset
1406 //
1407 fs_blocksize = sb_blocksize(rs);
1408 brelse(bh);
1409 sb_set_blocksize(s, fs_blocksize);
1285 1410
1286 for (i = 0; i < sb_bmap_nr(rs); i ++) { 1411 bh = sb_bread(s, offset / s->s_blocksize);
1287 SB_AP_BITMAP (s)[i].bh = sb_bread (s, bmp1 + i); 1412 if (!bh) {
1288 if (!SB_AP_BITMAP (s)[i].bh) 1413 reiserfs_warning(s, "sh-2007: read_super_block: "
1289 return 1; 1414 "bread failed (dev %s, block %lu, size %lu)\n",
1290 load_bitmap_info_data (s, SB_AP_BITMAP (s) + i); 1415 reiserfs_bdevname(s), offset / s->s_blocksize,
1291 } 1416 s->s_blocksize);
1417 return 1;
1418 }
1292 1419
1293 return 0; 1420 rs = (struct reiserfs_super_block *)bh->b_data;
1294} 1421 if (sb_blocksize(rs) != s->s_blocksize) {
1422 reiserfs_warning(s, "sh-2011: read_super_block: "
1423 "can't find a reiserfs filesystem on (dev %s, block %Lu, size %lu)\n",
1424 reiserfs_bdevname(s),
1425 (unsigned long long)bh->b_blocknr,
1426 s->s_blocksize);
1427 brelse(bh);
1428 return 1;
1429 }
1295 1430
1296static int read_super_block (struct super_block * s, int offset) 1431 if (rs->s_v1.s_root_block == cpu_to_le32(-1)) {
1297{ 1432 brelse(bh);
1298 struct buffer_head * bh; 1433 reiserfs_warning(s,
1299 struct reiserfs_super_block * rs; 1434 "Unfinished reiserfsck --rebuild-tree run detected. Please run\n"
1300 int fs_blocksize; 1435 "reiserfsck --rebuild-tree and wait for a completion. If that fails\n"
1301 1436 "get newer reiserfsprogs package");
1302 1437 return 1;
1303 bh = sb_bread (s, offset / s->s_blocksize);
1304 if (!bh) {
1305 reiserfs_warning (s, "sh-2006: read_super_block: "
1306 "bread failed (dev %s, block %lu, size %lu)",
1307 reiserfs_bdevname (s), offset / s->s_blocksize, s->s_blocksize);
1308 return 1;
1309 }
1310
1311 rs = (struct reiserfs_super_block *)bh->b_data;
1312 if (!is_any_reiserfs_magic_string (rs)) {
1313 brelse (bh);
1314 return 1;
1315 }
1316
1317 //
1318 // ok, reiserfs signature (old or new) found in at the given offset
1319 //
1320 fs_blocksize = sb_blocksize(rs);
1321 brelse (bh);
1322 sb_set_blocksize (s, fs_blocksize);
1323
1324 bh = sb_bread (s, offset / s->s_blocksize);
1325 if (!bh) {
1326 reiserfs_warning (s, "sh-2007: read_super_block: "
1327 "bread failed (dev %s, block %lu, size %lu)\n",
1328 reiserfs_bdevname (s), offset / s->s_blocksize, s->s_blocksize);
1329 return 1;
1330 }
1331
1332 rs = (struct reiserfs_super_block *)bh->b_data;
1333 if (sb_blocksize(rs) != s->s_blocksize) {
1334 reiserfs_warning (s, "sh-2011: read_super_block: "
1335 "can't find a reiserfs filesystem on (dev %s, block %Lu, size %lu)\n",
1336 reiserfs_bdevname (s), (unsigned long long)bh->b_blocknr, s->s_blocksize);
1337 brelse (bh);
1338 return 1;
1339 }
1340
1341 if ( rs->s_v1.s_root_block == cpu_to_le32(-1) ) {
1342 brelse(bh) ;
1343 reiserfs_warning (s, "Unfinished reiserfsck --rebuild-tree run detected. Please run\n"
1344 "reiserfsck --rebuild-tree and wait for a completion. If that fails\n"
1345 "get newer reiserfsprogs package");
1346 return 1;
1347 }
1348
1349 SB_BUFFER_WITH_SB (s) = bh;
1350 SB_DISK_SUPER_BLOCK (s) = rs;
1351
1352 if (is_reiserfs_jr (rs)) {
1353 /* magic is of non-standard journal filesystem, look at s_version to
1354 find which format is in use */
1355 if (sb_version(rs) == REISERFS_VERSION_2)
1356 reiserfs_warning (s, "read_super_block: found reiserfs format \"3.6\""
1357 " with non-standard journal");
1358 else if (sb_version(rs) == REISERFS_VERSION_1)
1359 reiserfs_warning (s, "read_super_block: found reiserfs format \"3.5\""
1360 " with non-standard journal");
1361 else {
1362 reiserfs_warning (s, "sh-2012: read_super_block: found unknown "
1363 "format \"%u\" of reiserfs with non-standard magic",
1364 sb_version(rs));
1365 return 1;
1366 } 1438 }
1367 }
1368 else
1369 /* s_version of standard format may contain incorrect information,
1370 so we just look at the magic string */
1371 reiserfs_info (s, "found reiserfs format \"%s\" with standard journal\n",
1372 is_reiserfs_3_5 (rs) ? "3.5" : "3.6");
1373 1439
1374 s->s_op = &reiserfs_sops; 1440 SB_BUFFER_WITH_SB(s) = bh;
1375 s->s_export_op = &reiserfs_export_ops; 1441 SB_DISK_SUPER_BLOCK(s) = rs;
1442
1443 if (is_reiserfs_jr(rs)) {
1444 /* magic is of non-standard journal filesystem, look at s_version to
1445 find which format is in use */
1446 if (sb_version(rs) == REISERFS_VERSION_2)
1447 reiserfs_warning(s,
1448 "read_super_block: found reiserfs format \"3.6\""
1449 " with non-standard journal");
1450 else if (sb_version(rs) == REISERFS_VERSION_1)
1451 reiserfs_warning(s,
1452 "read_super_block: found reiserfs format \"3.5\""
1453 " with non-standard journal");
1454 else {
1455 reiserfs_warning(s,
1456 "sh-2012: read_super_block: found unknown "
1457 "format \"%u\" of reiserfs with non-standard magic",
1458 sb_version(rs));
1459 return 1;
1460 }
1461 } else
1462 /* s_version of standard format may contain incorrect information,
1463 so we just look at the magic string */
1464 reiserfs_info(s,
1465 "found reiserfs format \"%s\" with standard journal\n",
1466 is_reiserfs_3_5(rs) ? "3.5" : "3.6");
1467
1468 s->s_op = &reiserfs_sops;
1469 s->s_export_op = &reiserfs_export_ops;
1376#ifdef CONFIG_QUOTA 1470#ifdef CONFIG_QUOTA
1377 s->s_qcop = &reiserfs_qctl_operations; 1471 s->s_qcop = &reiserfs_qctl_operations;
1378 s->dq_op = &reiserfs_quota_operations; 1472 s->dq_op = &reiserfs_quota_operations;
1379#endif 1473#endif
1380 1474
1381 /* new format is limited by the 32 bit wide i_blocks field, want to 1475 /* new format is limited by the 32 bit wide i_blocks field, want to
1382 ** be one full block below that. 1476 ** be one full block below that.
1383 */ 1477 */
1384 s->s_maxbytes = (512LL << 32) - s->s_blocksize ; 1478 s->s_maxbytes = (512LL << 32) - s->s_blocksize;
1385 return 0; 1479 return 0;
1386} 1480}
1387 1481
1388
1389
1390/* after journal replay, reread all bitmap and super blocks */ 1482/* after journal replay, reread all bitmap and super blocks */
1391static int reread_meta_blocks(struct super_block *s) { 1483static int reread_meta_blocks(struct super_block *s)
1392 int i ; 1484{
1393 ll_rw_block(READ, 1, &(SB_BUFFER_WITH_SB(s))) ; 1485 int i;
1394 wait_on_buffer(SB_BUFFER_WITH_SB(s)) ; 1486 ll_rw_block(READ, 1, &(SB_BUFFER_WITH_SB(s)));
1395 if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) { 1487 wait_on_buffer(SB_BUFFER_WITH_SB(s));
1396 reiserfs_warning (s, "reread_meta_blocks, error reading the super") ; 1488 if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) {
1397 return 1 ; 1489 reiserfs_warning(s,
1398 } 1490 "reread_meta_blocks, error reading the super");
1399 1491 return 1;
1400 for (i = 0; i < SB_BMAP_NR(s) ; i++) { 1492 }
1401 ll_rw_block(READ, 1, &(SB_AP_BITMAP(s)[i].bh)) ;
1402 wait_on_buffer(SB_AP_BITMAP(s)[i].bh) ;
1403 if (!buffer_uptodate(SB_AP_BITMAP(s)[i].bh)) {
1404 reiserfs_warning (s, "reread_meta_blocks, error reading bitmap block number %d at %llu",
1405 i, (unsigned long long)SB_AP_BITMAP(s)[i].bh->b_blocknr) ;
1406 return 1 ;
1407 }
1408 }
1409 return 0 ;
1410 1493
1411} 1494 for (i = 0; i < SB_BMAP_NR(s); i++) {
1495 ll_rw_block(READ, 1, &(SB_AP_BITMAP(s)[i].bh));
1496 wait_on_buffer(SB_AP_BITMAP(s)[i].bh);
1497 if (!buffer_uptodate(SB_AP_BITMAP(s)[i].bh)) {
1498 reiserfs_warning(s,
1499 "reread_meta_blocks, error reading bitmap block number %d at %llu",
1500 i,
1501 (unsigned long long)SB_AP_BITMAP(s)[i].
1502 bh->b_blocknr);
1503 return 1;
1504 }
1505 }
1506 return 0;
1412 1507
1508}
1413 1509
1414///////////////////////////////////////////////////// 1510/////////////////////////////////////////////////////
1415// hash detection stuff 1511// hash detection stuff
1416 1512
1417
1418// if root directory is empty - we set default - Yura's - hash and 1513// if root directory is empty - we set default - Yura's - hash and
1419// warn about it 1514// warn about it
1420// FIXME: we look for only one name in a directory. If tea and yura 1515// FIXME: we look for only one name in a directory. If tea and yura
1421// bith have the same value - we ask user to send report to the 1516// bith have the same value - we ask user to send report to the
1422// mailing list 1517// mailing list
1423static __u32 find_hash_out (struct super_block * s) 1518static __u32 find_hash_out(struct super_block *s)
1424{ 1519{
1425 int retval; 1520 int retval;
1426 struct inode * inode; 1521 struct inode *inode;
1427 struct cpu_key key; 1522 struct cpu_key key;
1428 INITIALIZE_PATH (path); 1523 INITIALIZE_PATH(path);
1429 struct reiserfs_dir_entry de; 1524 struct reiserfs_dir_entry de;
1430 __u32 hash = DEFAULT_HASH; 1525 __u32 hash = DEFAULT_HASH;
1431 1526
1432 inode = s->s_root->d_inode; 1527 inode = s->s_root->d_inode;
1433 1528
1434 do { // Some serious "goto"-hater was there ;) 1529 do { // Some serious "goto"-hater was there ;)
1435 u32 teahash, r5hash, yurahash; 1530 u32 teahash, r5hash, yurahash;
1436 1531
1437 make_cpu_key (&key, inode, ~0, TYPE_DIRENTRY, 3); 1532 make_cpu_key(&key, inode, ~0, TYPE_DIRENTRY, 3);
1438 retval = search_by_entry_key (s, &key, &path, &de); 1533 retval = search_by_entry_key(s, &key, &path, &de);
1439 if (retval == IO_ERROR) { 1534 if (retval == IO_ERROR) {
1440 pathrelse (&path); 1535 pathrelse(&path);
1441 return UNSET_HASH ; 1536 return UNSET_HASH;
1442 } 1537 }
1443 if (retval == NAME_NOT_FOUND) 1538 if (retval == NAME_NOT_FOUND)
1444 de.de_entry_num --; 1539 de.de_entry_num--;
1445 set_de_name_and_namelen (&de); 1540 set_de_name_and_namelen(&de);
1446 if (deh_offset( &(de.de_deh[de.de_entry_num]) ) == DOT_DOT_OFFSET) { 1541 if (deh_offset(&(de.de_deh[de.de_entry_num])) == DOT_DOT_OFFSET) {
1447 /* allow override in this case */ 1542 /* allow override in this case */
1448 if (reiserfs_rupasov_hash(s)) { 1543 if (reiserfs_rupasov_hash(s)) {
1449 hash = YURA_HASH ; 1544 hash = YURA_HASH;
1450 } 1545 }
1451 reiserfs_warning(s,"FS seems to be empty, autodetect " 1546 reiserfs_warning(s, "FS seems to be empty, autodetect "
1452 "is using the default hash"); 1547 "is using the default hash");
1453 break; 1548 break;
1454 } 1549 }
1455 r5hash=GET_HASH_VALUE (r5_hash (de.de_name, de.de_namelen)); 1550 r5hash = GET_HASH_VALUE(r5_hash(de.de_name, de.de_namelen));
1456 teahash=GET_HASH_VALUE (keyed_hash (de.de_name, de.de_namelen)); 1551 teahash = GET_HASH_VALUE(keyed_hash(de.de_name, de.de_namelen));
1457 yurahash=GET_HASH_VALUE (yura_hash (de.de_name, de.de_namelen)); 1552 yurahash = GET_HASH_VALUE(yura_hash(de.de_name, de.de_namelen));
1458 if ( ( (teahash == r5hash) && (GET_HASH_VALUE( deh_offset(&(de.de_deh[de.de_entry_num]))) == r5hash) ) || 1553 if (((teahash == r5hash)
1459 ( (teahash == yurahash) && (yurahash == GET_HASH_VALUE( deh_offset(&(de.de_deh[de.de_entry_num])))) ) || 1554 &&
1460 ( (r5hash == yurahash) && (yurahash == GET_HASH_VALUE( deh_offset(&(de.de_deh[de.de_entry_num])))) ) ) { 1555 (GET_HASH_VALUE(deh_offset(&(de.de_deh[de.de_entry_num])))
1461 reiserfs_warning(s,"Unable to automatically detect hash function. " 1556 == r5hash)) || ((teahash == yurahash)
1462 "Please mount with -o hash={tea,rupasov,r5}", 1557 && (yurahash ==
1463 reiserfs_bdevname (s)); 1558 GET_HASH_VALUE(deh_offset
1464 hash = UNSET_HASH; 1559 (&
1465 break; 1560 (de.
1466 } 1561 de_deh[de.
1467 if (GET_HASH_VALUE( deh_offset(&(de.de_deh[de.de_entry_num])) ) == yurahash) 1562 de_entry_num])))))
1468 hash = YURA_HASH; 1563 || ((r5hash == yurahash)
1469 else if (GET_HASH_VALUE( deh_offset(&(de.de_deh[de.de_entry_num])) ) == teahash) 1564 && (yurahash ==
1470 hash = TEA_HASH; 1565 GET_HASH_VALUE(deh_offset
1471 else if (GET_HASH_VALUE( deh_offset(&(de.de_deh[de.de_entry_num])) ) == r5hash) 1566 (&(de.de_deh[de.de_entry_num])))))) {
1472 hash = R5_HASH; 1567 reiserfs_warning(s,
1473 else { 1568 "Unable to automatically detect hash function. "
1474 reiserfs_warning (s,"Unrecognised hash function"); 1569 "Please mount with -o hash={tea,rupasov,r5}",
1475 hash = UNSET_HASH; 1570 reiserfs_bdevname(s));
1476 } 1571 hash = UNSET_HASH;
1477 } while (0); 1572 break;
1478 1573 }
1479 pathrelse (&path); 1574 if (GET_HASH_VALUE(deh_offset(&(de.de_deh[de.de_entry_num]))) ==
1480 return hash; 1575 yurahash)
1576 hash = YURA_HASH;
1577 else if (GET_HASH_VALUE
1578 (deh_offset(&(de.de_deh[de.de_entry_num]))) == teahash)
1579 hash = TEA_HASH;
1580 else if (GET_HASH_VALUE
1581 (deh_offset(&(de.de_deh[de.de_entry_num]))) == r5hash)
1582 hash = R5_HASH;
1583 else {
1584 reiserfs_warning(s, "Unrecognised hash function");
1585 hash = UNSET_HASH;
1586 }
1587 } while (0);
1588
1589 pathrelse(&path);
1590 return hash;
1481} 1591}
1482 1592
1483// finds out which hash names are sorted with 1593// finds out which hash names are sorted with
1484static int what_hash (struct super_block * s) 1594static int what_hash(struct super_block *s)
1485{ 1595{
1486 __u32 code; 1596 __u32 code;
1487 1597
1488 code = sb_hash_function_code(SB_DISK_SUPER_BLOCK(s)); 1598 code = sb_hash_function_code(SB_DISK_SUPER_BLOCK(s));
1489 1599
1490 /* reiserfs_hash_detect() == true if any of the hash mount options 1600 /* reiserfs_hash_detect() == true if any of the hash mount options
1491 ** were used. We must check them to make sure the user isn't 1601 ** were used. We must check them to make sure the user isn't
1492 ** using a bad hash value 1602 ** using a bad hash value
1493 */ 1603 */
1494 if (code == UNSET_HASH || reiserfs_hash_detect(s)) 1604 if (code == UNSET_HASH || reiserfs_hash_detect(s))
1495 code = find_hash_out (s); 1605 code = find_hash_out(s);
1496 1606
1497 if (code != UNSET_HASH && reiserfs_hash_detect(s)) { 1607 if (code != UNSET_HASH && reiserfs_hash_detect(s)) {
1498 /* detection has found the hash, and we must check against the 1608 /* detection has found the hash, and we must check against the
1499 ** mount options 1609 ** mount options
1500 */ 1610 */
1501 if (reiserfs_rupasov_hash(s) && code != YURA_HASH) { 1611 if (reiserfs_rupasov_hash(s) && code != YURA_HASH) {
1502 reiserfs_warning (s, "Error, %s hash detected, " 1612 reiserfs_warning(s, "Error, %s hash detected, "
1503 "unable to force rupasov hash", reiserfs_hashname(code)) ; 1613 "unable to force rupasov hash",
1504 code = UNSET_HASH ; 1614 reiserfs_hashname(code));
1505 } else if (reiserfs_tea_hash(s) && code != TEA_HASH) { 1615 code = UNSET_HASH;
1506 reiserfs_warning (s, "Error, %s hash detected, " 1616 } else if (reiserfs_tea_hash(s) && code != TEA_HASH) {
1507 "unable to force tea hash", reiserfs_hashname(code)) ; 1617 reiserfs_warning(s, "Error, %s hash detected, "
1508 code = UNSET_HASH ; 1618 "unable to force tea hash",
1509 } else if (reiserfs_r5_hash(s) && code != R5_HASH) { 1619 reiserfs_hashname(code));
1510 reiserfs_warning (s, "Error, %s hash detected, " 1620 code = UNSET_HASH;
1511 "unable to force r5 hash", reiserfs_hashname(code)) ; 1621 } else if (reiserfs_r5_hash(s) && code != R5_HASH) {
1512 code = UNSET_HASH ; 1622 reiserfs_warning(s, "Error, %s hash detected, "
1513 } 1623 "unable to force r5 hash",
1514 } else { 1624 reiserfs_hashname(code));
1515 /* find_hash_out was not called or could not determine the hash */ 1625 code = UNSET_HASH;
1516 if (reiserfs_rupasov_hash(s)) { 1626 }
1517 code = YURA_HASH ; 1627 } else {
1518 } else if (reiserfs_tea_hash(s)) { 1628 /* find_hash_out was not called or could not determine the hash */
1519 code = TEA_HASH ; 1629 if (reiserfs_rupasov_hash(s)) {
1520 } else if (reiserfs_r5_hash(s)) { 1630 code = YURA_HASH;
1521 code = R5_HASH ; 1631 } else if (reiserfs_tea_hash(s)) {
1522 } 1632 code = TEA_HASH;
1523 } 1633 } else if (reiserfs_r5_hash(s)) {
1524 1634 code = R5_HASH;
1525 /* if we are mounted RW, and we have a new valid hash code, update 1635 }
1526 ** the super 1636 }
1527 */ 1637
1528 if (code != UNSET_HASH && 1638 /* if we are mounted RW, and we have a new valid hash code, update
1529 !(s->s_flags & MS_RDONLY) && 1639 ** the super
1530 code != sb_hash_function_code(SB_DISK_SUPER_BLOCK(s))) { 1640 */
1531 set_sb_hash_function_code(SB_DISK_SUPER_BLOCK(s), code); 1641 if (code != UNSET_HASH &&
1532 } 1642 !(s->s_flags & MS_RDONLY) &&
1533 return code; 1643 code != sb_hash_function_code(SB_DISK_SUPER_BLOCK(s))) {
1644 set_sb_hash_function_code(SB_DISK_SUPER_BLOCK(s), code);
1645 }
1646 return code;
1534} 1647}
1535 1648
1536// return pointer to appropriate function 1649// return pointer to appropriate function
1537static hashf_t hash_function (struct super_block * s) 1650static hashf_t hash_function(struct super_block *s)
1538{ 1651{
1539 switch (what_hash (s)) { 1652 switch (what_hash(s)) {
1540 case TEA_HASH: 1653 case TEA_HASH:
1541 reiserfs_info (s, "Using tea hash to sort names\n"); 1654 reiserfs_info(s, "Using tea hash to sort names\n");
1542 return keyed_hash; 1655 return keyed_hash;
1543 case YURA_HASH: 1656 case YURA_HASH:
1544 reiserfs_info (s, "Using rupasov hash to sort names\n"); 1657 reiserfs_info(s, "Using rupasov hash to sort names\n");
1545 return yura_hash; 1658 return yura_hash;
1546 case R5_HASH: 1659 case R5_HASH:
1547 reiserfs_info (s, "Using r5 hash to sort names\n"); 1660 reiserfs_info(s, "Using r5 hash to sort names\n");
1548 return r5_hash; 1661 return r5_hash;
1549 } 1662 }
1550 return NULL; 1663 return NULL;
1551} 1664}
1552 1665
1553// this is used to set up correct value for old partitions 1666// this is used to set up correct value for old partitions
1554static int function2code (hashf_t func) 1667static int function2code(hashf_t func)
1555{ 1668{
1556 if (func == keyed_hash) 1669 if (func == keyed_hash)
1557 return TEA_HASH; 1670 return TEA_HASH;
1558 if (func == yura_hash) 1671 if (func == yura_hash)
1559 return YURA_HASH; 1672 return YURA_HASH;
1560 if (func == r5_hash) 1673 if (func == r5_hash)
1561 return R5_HASH; 1674 return R5_HASH;
1562 1675
1563 BUG() ; // should never happen 1676 BUG(); // should never happen
1564 1677
1565 return 0; 1678 return 0;
1566} 1679}
1567 1680
1568#define SWARN(silent, s, ...) \ 1681#define SWARN(silent, s, ...) \
1569 if (!(silent)) \ 1682 if (!(silent)) \
1570 reiserfs_warning (s, __VA_ARGS__) 1683 reiserfs_warning (s, __VA_ARGS__)
1571 1684
1572static int reiserfs_fill_super (struct super_block * s, void * data, int silent) 1685static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1573{ 1686{
1574 struct inode *root_inode; 1687 struct inode *root_inode;
1575 int j; 1688 int j;
1576 struct reiserfs_transaction_handle th ; 1689 struct reiserfs_transaction_handle th;
1577 int old_format = 0; 1690 int old_format = 0;
1578 unsigned long blocks; 1691 unsigned long blocks;
1579 unsigned int commit_max_age = 0; 1692 unsigned int commit_max_age = 0;
1580 int jinit_done = 0 ; 1693 int jinit_done = 0;
1581 struct reiserfs_iget_args args ; 1694 struct reiserfs_iget_args args;
1582 struct reiserfs_super_block * rs; 1695 struct reiserfs_super_block *rs;
1583 char *jdev_name; 1696 char *jdev_name;
1584 struct reiserfs_sb_info *sbi; 1697 struct reiserfs_sb_info *sbi;
1585 int errval = -EINVAL; 1698 int errval = -EINVAL;
1586 1699
1587 sbi = kmalloc(sizeof(struct reiserfs_sb_info), GFP_KERNEL); 1700 sbi = kmalloc(sizeof(struct reiserfs_sb_info), GFP_KERNEL);
1588 if (!sbi) { 1701 if (!sbi) {
1589 errval = -ENOMEM; 1702 errval = -ENOMEM;
1590 goto error; 1703 goto error;
1591 } 1704 }
1592 s->s_fs_info = sbi; 1705 s->s_fs_info = sbi;
1593 memset (sbi, 0, sizeof (struct reiserfs_sb_info)); 1706 memset(sbi, 0, sizeof(struct reiserfs_sb_info));
1594 /* Set default values for options: non-aggressive tails, RO on errors */ 1707 /* Set default values for options: non-aggressive tails, RO on errors */
1595 REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_SMALLTAIL); 1708 REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_SMALLTAIL);
1596 REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_ERROR_RO); 1709 REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_ERROR_RO);
1597 /* no preallocation minimum, be smart in 1710 /* no preallocation minimum, be smart in
1598 reiserfs_file_write instead */ 1711 reiserfs_file_write instead */
1599 REISERFS_SB(s)->s_alloc_options.preallocmin = 0; 1712 REISERFS_SB(s)->s_alloc_options.preallocmin = 0;
1600 /* Preallocate by 16 blocks (17-1) at once */ 1713 /* Preallocate by 16 blocks (17-1) at once */
1601 REISERFS_SB(s)->s_alloc_options.preallocsize = 17; 1714 REISERFS_SB(s)->s_alloc_options.preallocsize = 17;
1602 /* Initialize the rwsem for xattr dir */ 1715 /* Initialize the rwsem for xattr dir */
1603 init_rwsem(&REISERFS_SB(s)->xattr_dir_sem); 1716 init_rwsem(&REISERFS_SB(s)->xattr_dir_sem);
1604 1717
1605 /* setup default block allocator options */ 1718 /* setup default block allocator options */
1606 reiserfs_init_alloc_options(s); 1719 reiserfs_init_alloc_options(s);
1607 1720
1608 jdev_name = NULL; 1721 jdev_name = NULL;
1609 if (reiserfs_parse_options (s, (char *) data, &(sbi->s_mount_opt), &blocks, &jdev_name, &commit_max_age) == 0) { 1722 if (reiserfs_parse_options
1610 goto error; 1723 (s, (char *)data, &(sbi->s_mount_opt), &blocks, &jdev_name,
1611 } 1724 &commit_max_age) == 0) {
1612 1725 goto error;
1613 if (blocks) { 1726 }
1614 SWARN (silent, s, "jmacd-7: reiserfs_fill_super: resize option " 1727
1615 "for remount only"); 1728 if (blocks) {
1616 goto error; 1729 SWARN(silent, s, "jmacd-7: reiserfs_fill_super: resize option "
1617 } 1730 "for remount only");
1618 1731 goto error;
1619 /* try old format (undistributed bitmap, super block in 8-th 1k block of a device) */ 1732 }
1620 if (!read_super_block (s, REISERFS_OLD_DISK_OFFSET_IN_BYTES)) 1733
1621 old_format = 1; 1734 /* try old format (undistributed bitmap, super block in 8-th 1k block of a device) */
1622 /* try new format (64-th 1k block), which can contain reiserfs super block */ 1735 if (!read_super_block(s, REISERFS_OLD_DISK_OFFSET_IN_BYTES))
1623 else if (read_super_block (s, REISERFS_DISK_OFFSET_IN_BYTES)) { 1736 old_format = 1;
1624 SWARN(silent, s, "sh-2021: reiserfs_fill_super: can not find reiserfs on %s", reiserfs_bdevname (s)); 1737 /* try new format (64-th 1k block), which can contain reiserfs super block */
1625 goto error; 1738 else if (read_super_block(s, REISERFS_DISK_OFFSET_IN_BYTES)) {
1626 } 1739 SWARN(silent, s,
1627 1740 "sh-2021: reiserfs_fill_super: can not find reiserfs on %s",
1628 rs = SB_DISK_SUPER_BLOCK (s); 1741 reiserfs_bdevname(s));
1629 /* Let's do basic sanity check to verify that underlying device is not 1742 goto error;
1630 smaller than the filesystem. If the check fails then abort and scream, 1743 }
1631 because bad stuff will happen otherwise. */ 1744
1632 if ( s->s_bdev && s->s_bdev->bd_inode && i_size_read(s->s_bdev->bd_inode) < sb_block_count(rs)*sb_blocksize(rs)) { 1745 rs = SB_DISK_SUPER_BLOCK(s);
1633 SWARN (silent, s, "Filesystem on %s cannot be mounted because it is bigger than the device", reiserfs_bdevname(s)); 1746 /* Let's do basic sanity check to verify that underlying device is not
1634 SWARN(silent, s, "You may need to run fsck or increase size of your LVM partition"); 1747 smaller than the filesystem. If the check fails then abort and scream,
1635 SWARN(silent, s, "Or may be you forgot to reboot after fdisk when it told you to"); 1748 because bad stuff will happen otherwise. */
1636 goto error; 1749 if (s->s_bdev && s->s_bdev->bd_inode
1637 } 1750 && i_size_read(s->s_bdev->bd_inode) <
1638 1751 sb_block_count(rs) * sb_blocksize(rs)) {
1639 sbi->s_mount_state = SB_REISERFS_STATE(s); 1752 SWARN(silent, s,
1640 sbi->s_mount_state = REISERFS_VALID_FS ; 1753 "Filesystem on %s cannot be mounted because it is bigger than the device",
1641 1754 reiserfs_bdevname(s));
1642 if (old_format ? read_old_bitmaps(s) : read_bitmaps(s)) { 1755 SWARN(silent, s,
1643 SWARN(silent, s, "jmacd-8: reiserfs_fill_super: unable to read bitmap"); 1756 "You may need to run fsck or increase size of your LVM partition");
1644 goto error; 1757 SWARN(silent, s,
1645 } 1758 "Or may be you forgot to reboot after fdisk when it told you to");
1759 goto error;
1760 }
1761
1762 sbi->s_mount_state = SB_REISERFS_STATE(s);
1763 sbi->s_mount_state = REISERFS_VALID_FS;
1764
1765 if (old_format ? read_old_bitmaps(s) : read_bitmaps(s)) {
1766 SWARN(silent, s,
1767 "jmacd-8: reiserfs_fill_super: unable to read bitmap");
1768 goto error;
1769 }
1646#ifdef CONFIG_REISERFS_CHECK 1770#ifdef CONFIG_REISERFS_CHECK
1647 SWARN (silent, s, "CONFIG_REISERFS_CHECK is set ON"); 1771 SWARN(silent, s, "CONFIG_REISERFS_CHECK is set ON");
1648 SWARN (silent, s, "- it is slow mode for debugging."); 1772 SWARN(silent, s, "- it is slow mode for debugging.");
1649#endif 1773#endif
1650 1774
1651 /* make data=ordered the default */ 1775 /* make data=ordered the default */
1652 if (!reiserfs_data_log(s) && !reiserfs_data_ordered(s) && 1776 if (!reiserfs_data_log(s) && !reiserfs_data_ordered(s) &&
1653 !reiserfs_data_writeback(s)) 1777 !reiserfs_data_writeback(s)) {
1654 { 1778 REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_DATA_ORDERED);
1655 REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_DATA_ORDERED); 1779 }
1656 } 1780
1657 1781 if (reiserfs_data_log(s)) {
1658 if (reiserfs_data_log(s)) { 1782 reiserfs_info(s, "using journaled data mode\n");
1659 reiserfs_info (s, "using journaled data mode\n"); 1783 } else if (reiserfs_data_ordered(s)) {
1660 } else if (reiserfs_data_ordered(s)) { 1784 reiserfs_info(s, "using ordered data mode\n");
1661 reiserfs_info (s, "using ordered data mode\n"); 1785 } else {
1662 } else { 1786 reiserfs_info(s, "using writeback data mode\n");
1663 reiserfs_info (s, "using writeback data mode\n"); 1787 }
1664 } 1788 if (reiserfs_barrier_flush(s)) {
1665 if (reiserfs_barrier_flush(s)) { 1789 printk("reiserfs: using flush barriers\n");
1666 printk("reiserfs: using flush barriers\n"); 1790 }
1667 } 1791 // set_device_ro(s->s_dev, 1) ;
1668 1792 if (journal_init(s, jdev_name, old_format, commit_max_age)) {
1669 // set_device_ro(s->s_dev, 1) ; 1793 SWARN(silent, s,
1670 if( journal_init(s, jdev_name, old_format, commit_max_age) ) { 1794 "sh-2022: reiserfs_fill_super: unable to initialize journal space");
1671 SWARN(silent, s, "sh-2022: reiserfs_fill_super: unable to initialize journal space") ; 1795 goto error;
1672 goto error ; 1796 } else {
1673 } else { 1797 jinit_done = 1; /* once this is set, journal_release must be called
1674 jinit_done = 1 ; /* once this is set, journal_release must be called 1798 ** if we error out of the mount
1675 ** if we error out of the mount 1799 */
1676 */ 1800 }
1677 } 1801 if (reread_meta_blocks(s)) {
1678 if (reread_meta_blocks(s)) { 1802 SWARN(silent, s,
1679 SWARN(silent, s, "jmacd-9: reiserfs_fill_super: unable to reread meta blocks after journal init") ; 1803 "jmacd-9: reiserfs_fill_super: unable to reread meta blocks after journal init");
1680 goto error ; 1804 goto error;
1681 } 1805 }
1682 1806
1683 if (replay_only (s)) 1807 if (replay_only(s))
1684 goto error; 1808 goto error;
1685 1809
1686 if (bdev_read_only(s->s_bdev) && !(s->s_flags & MS_RDONLY)) { 1810 if (bdev_read_only(s->s_bdev) && !(s->s_flags & MS_RDONLY)) {
1687 SWARN(silent, s, "clm-7000: Detected readonly device, marking FS readonly") ; 1811 SWARN(silent, s,
1688 s->s_flags |= MS_RDONLY ; 1812 "clm-7000: Detected readonly device, marking FS readonly");
1689 } 1813 s->s_flags |= MS_RDONLY;
1690 args.objectid = REISERFS_ROOT_OBJECTID ; 1814 }
1691 args.dirid = REISERFS_ROOT_PARENT_OBJECTID ; 1815 args.objectid = REISERFS_ROOT_OBJECTID;
1692 root_inode = iget5_locked (s, REISERFS_ROOT_OBJECTID, reiserfs_find_actor, reiserfs_init_locked_inode, (void *)(&args)); 1816 args.dirid = REISERFS_ROOT_PARENT_OBJECTID;
1693 if (!root_inode) { 1817 root_inode =
1694 SWARN(silent, s, "jmacd-10: reiserfs_fill_super: get root inode failed"); 1818 iget5_locked(s, REISERFS_ROOT_OBJECTID, reiserfs_find_actor,
1695 goto error; 1819 reiserfs_init_locked_inode, (void *)(&args));
1696 } 1820 if (!root_inode) {
1697 1821 SWARN(silent, s,
1698 if (root_inode->i_state & I_NEW) { 1822 "jmacd-10: reiserfs_fill_super: get root inode failed");
1699 reiserfs_read_locked_inode(root_inode, &args); 1823 goto error;
1700 unlock_new_inode(root_inode); 1824 }
1701 } 1825
1702 1826 if (root_inode->i_state & I_NEW) {
1703 s->s_root = d_alloc_root(root_inode); 1827 reiserfs_read_locked_inode(root_inode, &args);
1704 if (!s->s_root) { 1828 unlock_new_inode(root_inode);
1705 iput(root_inode); 1829 }
1706 goto error; 1830
1707 } 1831 s->s_root = d_alloc_root(root_inode);
1708 1832 if (!s->s_root) {
1709 // define and initialize hash function 1833 iput(root_inode);
1710 sbi->s_hash_function = hash_function (s); 1834 goto error;
1711 if (sbi->s_hash_function == NULL) { 1835 }
1712 dput(s->s_root) ; 1836 // define and initialize hash function
1713 s->s_root = NULL ; 1837 sbi->s_hash_function = hash_function(s);
1714 goto error ; 1838 if (sbi->s_hash_function == NULL) {
1715 } 1839 dput(s->s_root);
1716 1840 s->s_root = NULL;
1717 if (is_reiserfs_3_5 (rs) || (is_reiserfs_jr (rs) && SB_VERSION (s) == REISERFS_VERSION_1)) 1841 goto error;
1718 set_bit(REISERFS_3_5, &(sbi->s_properties)); 1842 }
1719 else 1843
1720 set_bit(REISERFS_3_6, &(sbi->s_properties)); 1844 if (is_reiserfs_3_5(rs)
1721 1845 || (is_reiserfs_jr(rs) && SB_VERSION(s) == REISERFS_VERSION_1))
1722 if (!(s->s_flags & MS_RDONLY)) { 1846 set_bit(REISERFS_3_5, &(sbi->s_properties));
1723 1847 else
1724 errval = journal_begin(&th, s, 1) ; 1848 set_bit(REISERFS_3_6, &(sbi->s_properties));
1725 if (errval) { 1849
1726 dput (s->s_root); 1850 if (!(s->s_flags & MS_RDONLY)) {
1727 s->s_root = NULL; 1851
1728 goto error; 1852 errval = journal_begin(&th, s, 1);
1729 } 1853 if (errval) {
1730 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1) ; 1854 dput(s->s_root);
1731 1855 s->s_root = NULL;
1732 set_sb_umount_state( rs, REISERFS_ERROR_FS ); 1856 goto error;
1733 set_sb_fs_state (rs, 0); 1857 }
1734 1858 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);
1735 if (old_format_only(s)) { 1859
1736 /* filesystem of format 3.5 either with standard or non-standard 1860 set_sb_umount_state(rs, REISERFS_ERROR_FS);
1737 journal */ 1861 set_sb_fs_state(rs, 0);
1738 if (convert_reiserfs (s)) { 1862
1739 /* and -o conv is given */ 1863 if (old_format_only(s)) {
1740 if(!silent) 1864 /* filesystem of format 3.5 either with standard or non-standard
1741 reiserfs_info (s,"converting 3.5 filesystem to the 3.6 format") ; 1865 journal */
1742 1866 if (convert_reiserfs(s)) {
1743 if (is_reiserfs_3_5 (rs)) 1867 /* and -o conv is given */
1744 /* put magic string of 3.6 format. 2.2 will not be able to 1868 if (!silent)
1745 mount this filesystem anymore */ 1869 reiserfs_info(s,
1746 memcpy (rs->s_v1.s_magic, reiserfs_3_6_magic_string, 1870 "converting 3.5 filesystem to the 3.6 format");
1747 sizeof (reiserfs_3_6_magic_string)); 1871
1748 1872 if (is_reiserfs_3_5(rs))
1749 set_sb_version(rs,REISERFS_VERSION_2); 1873 /* put magic string of 3.6 format. 2.2 will not be able to
1750 reiserfs_convert_objectid_map_v1(s) ; 1874 mount this filesystem anymore */
1751 set_bit(REISERFS_3_6, &(sbi->s_properties)); 1875 memcpy(rs->s_v1.s_magic,
1752 clear_bit(REISERFS_3_5, &(sbi->s_properties)); 1876 reiserfs_3_6_magic_string,
1753 } else if (!silent){ 1877 sizeof
1754 reiserfs_info (s, "using 3.5.x disk format\n") ; 1878 (reiserfs_3_6_magic_string));
1755 } 1879
1756 } 1880 set_sb_version(rs, REISERFS_VERSION_2);
1757 1881 reiserfs_convert_objectid_map_v1(s);
1758 journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB (s)); 1882 set_bit(REISERFS_3_6, &(sbi->s_properties));
1759 errval = journal_end(&th, s, 1) ; 1883 clear_bit(REISERFS_3_5, &(sbi->s_properties));
1760 if (errval) { 1884 } else if (!silent) {
1761 dput (s->s_root); 1885 reiserfs_info(s, "using 3.5.x disk format\n");
1762 s->s_root = NULL; 1886 }
1763 goto error; 1887 }
1764 } 1888
1765 1889 journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s));
1766 if ((errval = reiserfs_xattr_init (s, s->s_flags))) { 1890 errval = journal_end(&th, s, 1);
1767 dput (s->s_root); 1891 if (errval) {
1768 s->s_root = NULL; 1892 dput(s->s_root);
1769 goto error; 1893 s->s_root = NULL;
1770 } 1894 goto error;
1771 1895 }
1772 /* look for files which were to be removed in previous session */ 1896
1773 finish_unfinished (s); 1897 if ((errval = reiserfs_xattr_init(s, s->s_flags))) {
1774 } else { 1898 dput(s->s_root);
1775 if ( old_format_only(s) && !silent) { 1899 s->s_root = NULL;
1776 reiserfs_info (s, "using 3.5.x disk format\n") ; 1900 goto error;
1777 } 1901 }
1778 1902
1779 if ((errval = reiserfs_xattr_init (s, s->s_flags))) { 1903 /* look for files which were to be removed in previous session */
1780 dput (s->s_root); 1904 finish_unfinished(s);
1781 s->s_root = NULL; 1905 } else {
1782 goto error; 1906 if (old_format_only(s) && !silent) {
1783 } 1907 reiserfs_info(s, "using 3.5.x disk format\n");
1784 } 1908 }
1785 // mark hash in super block: it could be unset. overwrite should be ok 1909
1786 set_sb_hash_function_code( rs, function2code(sbi->s_hash_function ) ); 1910 if ((errval = reiserfs_xattr_init(s, s->s_flags))) {
1787 1911 dput(s->s_root);
1788 handle_attrs( s ); 1912 s->s_root = NULL;
1789 1913 goto error;
1790 reiserfs_proc_info_init( s ); 1914 }
1791 1915 }
1792 init_waitqueue_head (&(sbi->s_wait)); 1916 // mark hash in super block: it could be unset. overwrite should be ok
1793 spin_lock_init(&sbi->bitmap_lock); 1917 set_sb_hash_function_code(rs, function2code(sbi->s_hash_function));
1794 1918
1795 return (0); 1919 handle_attrs(s);
1796 1920
1797 error: 1921 reiserfs_proc_info_init(s);
1798 if (jinit_done) { /* kill the commit thread, free journal ram */ 1922
1799 journal_release_error(NULL, s) ; 1923 init_waitqueue_head(&(sbi->s_wait));
1800 } 1924 spin_lock_init(&sbi->bitmap_lock);
1801 if (SB_DISK_SUPER_BLOCK (s)) { 1925
1802 for (j = 0; j < SB_BMAP_NR (s); j ++) { 1926 return (0);
1803 if (SB_AP_BITMAP (s)) 1927
1804 brelse (SB_AP_BITMAP (s)[j].bh); 1928 error:
1805 } 1929 if (jinit_done) { /* kill the commit thread, free journal ram */
1806 if (SB_AP_BITMAP (s)) 1930 journal_release_error(NULL, s);
1807 vfree (SB_AP_BITMAP (s)); 1931 }
1808 } 1932 if (SB_DISK_SUPER_BLOCK(s)) {
1809 if (SB_BUFFER_WITH_SB (s)) 1933 for (j = 0; j < SB_BMAP_NR(s); j++) {
1810 brelse(SB_BUFFER_WITH_SB (s)); 1934 if (SB_AP_BITMAP(s))
1935 brelse(SB_AP_BITMAP(s)[j].bh);
1936 }
1937 if (SB_AP_BITMAP(s))
1938 vfree(SB_AP_BITMAP(s));
1939 }
1940 if (SB_BUFFER_WITH_SB(s))
1941 brelse(SB_BUFFER_WITH_SB(s));
1811#ifdef CONFIG_QUOTA 1942#ifdef CONFIG_QUOTA
1812 for (j = 0; j < MAXQUOTAS; j++) { 1943 for (j = 0; j < MAXQUOTAS; j++) {
1813 if (sbi->s_qf_names[j]) 1944 if (sbi->s_qf_names[j])
1814 kfree(sbi->s_qf_names[j]); 1945 kfree(sbi->s_qf_names[j]);
1815 } 1946 }
1816#endif 1947#endif
1817 if (sbi != NULL) { 1948 if (sbi != NULL) {
1818 kfree(sbi); 1949 kfree(sbi);
1819 } 1950 }
1820 1951
1821 s->s_fs_info = NULL; 1952 s->s_fs_info = NULL;
1822 return errval; 1953 return errval;
1823} 1954}
1824 1955
1825 1956static int reiserfs_statfs(struct super_block *s, struct kstatfs *buf)
1826static int reiserfs_statfs (struct super_block * s, struct kstatfs * buf)
1827{ 1957{
1828 struct reiserfs_super_block * rs = SB_DISK_SUPER_BLOCK (s); 1958 struct reiserfs_super_block *rs = SB_DISK_SUPER_BLOCK(s);
1829 1959
1830 buf->f_namelen = (REISERFS_MAX_NAME (s->s_blocksize)); 1960 buf->f_namelen = (REISERFS_MAX_NAME(s->s_blocksize));
1831 buf->f_bfree = sb_free_blocks(rs); 1961 buf->f_bfree = sb_free_blocks(rs);
1832 buf->f_bavail = buf->f_bfree; 1962 buf->f_bavail = buf->f_bfree;
1833 buf->f_blocks = sb_block_count(rs) - sb_bmap_nr(rs) - 1; 1963 buf->f_blocks = sb_block_count(rs) - sb_bmap_nr(rs) - 1;
1834 buf->f_bsize = s->s_blocksize; 1964 buf->f_bsize = s->s_blocksize;
1835 /* changed to accommodate gcc folks.*/ 1965 /* changed to accommodate gcc folks. */
1836 buf->f_type = REISERFS_SUPER_MAGIC; 1966 buf->f_type = REISERFS_SUPER_MAGIC;
1837 return 0; 1967 return 0;
1838} 1968}
1839 1969
1840#ifdef CONFIG_QUOTA 1970#ifdef CONFIG_QUOTA
1841static int reiserfs_dquot_initialize(struct inode *inode, int type) 1971static int reiserfs_dquot_initialize(struct inode *inode, int type)
1842{ 1972{
1843 struct reiserfs_transaction_handle th; 1973 struct reiserfs_transaction_handle th;
1844 int ret; 1974 int ret, err;
1845 1975
1846 /* We may create quota structure so we need to reserve enough blocks */ 1976 /* We may create quota structure so we need to reserve enough blocks */
1847 reiserfs_write_lock(inode->i_sb); 1977 reiserfs_write_lock(inode->i_sb);
1848 journal_begin(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS); 1978 ret =
1849 ret = dquot_initialize(inode, type); 1979 journal_begin(&th, inode->i_sb,
1850 journal_end(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS); 1980 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb));
1851 reiserfs_write_unlock(inode->i_sb); 1981 if (ret)
1852 return ret; 1982 goto out;
1983 ret = dquot_initialize(inode, type);
1984 err =
1985 journal_end(&th, inode->i_sb,
1986 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb));
1987 if (!ret && err)
1988 ret = err;
1989 out:
1990 reiserfs_write_unlock(inode->i_sb);
1991 return ret;
1853} 1992}
1854 1993
1855static int reiserfs_dquot_drop(struct inode *inode) 1994static int reiserfs_dquot_drop(struct inode *inode)
1856{ 1995{
1857 struct reiserfs_transaction_handle th; 1996 struct reiserfs_transaction_handle th;
1858 int ret; 1997 int ret, err;
1859 1998
1860 /* We may delete quota structure so we need to reserve enough blocks */ 1999 /* We may delete quota structure so we need to reserve enough blocks */
1861 reiserfs_write_lock(inode->i_sb); 2000 reiserfs_write_lock(inode->i_sb);
1862 journal_begin(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS); 2001 ret =
1863 ret = dquot_drop(inode); 2002 journal_begin(&th, inode->i_sb,
1864 journal_end(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS); 2003 2 * REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb));
1865 reiserfs_write_unlock(inode->i_sb); 2004 if (ret)
1866 return ret; 2005 goto out;
2006 ret = dquot_drop(inode);
2007 err =
2008 journal_end(&th, inode->i_sb,
2009 2 * REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb));
2010 if (!ret && err)
2011 ret = err;
2012 out:
2013 reiserfs_write_unlock(inode->i_sb);
2014 return ret;
1867} 2015}
1868 2016
1869static int reiserfs_write_dquot(struct dquot *dquot) 2017static int reiserfs_write_dquot(struct dquot *dquot)
1870{ 2018{
1871 struct reiserfs_transaction_handle th; 2019 struct reiserfs_transaction_handle th;
1872 int ret; 2020 int ret, err;
1873 2021
1874 reiserfs_write_lock(dquot->dq_sb); 2022 reiserfs_write_lock(dquot->dq_sb);
1875 journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_TRANS_BLOCKS); 2023 ret =
1876 ret = dquot_commit(dquot); 2024 journal_begin(&th, dquot->dq_sb,
1877 journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_TRANS_BLOCKS); 2025 REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
1878 reiserfs_write_unlock(dquot->dq_sb); 2026 if (ret)
1879 return ret; 2027 goto out;
2028 ret = dquot_commit(dquot);
2029 err =
2030 journal_end(&th, dquot->dq_sb,
2031 REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
2032 if (!ret && err)
2033 ret = err;
2034 out:
2035 reiserfs_write_unlock(dquot->dq_sb);
2036 return ret;
1880} 2037}
1881 2038
1882static int reiserfs_acquire_dquot(struct dquot *dquot) 2039static int reiserfs_acquire_dquot(struct dquot *dquot)
1883{ 2040{
1884 struct reiserfs_transaction_handle th; 2041 struct reiserfs_transaction_handle th;
1885 int ret; 2042 int ret, err;
1886 2043
1887 reiserfs_write_lock(dquot->dq_sb); 2044 reiserfs_write_lock(dquot->dq_sb);
1888 journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS); 2045 ret =
1889 ret = dquot_acquire(dquot); 2046 journal_begin(&th, dquot->dq_sb,
1890 journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS); 2047 REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb));
1891 reiserfs_write_unlock(dquot->dq_sb); 2048 if (ret)
1892 return ret; 2049 goto out;
2050 ret = dquot_acquire(dquot);
2051 err =
2052 journal_end(&th, dquot->dq_sb,
2053 REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb));
2054 if (!ret && err)
2055 ret = err;
2056 out:
2057 reiserfs_write_unlock(dquot->dq_sb);
2058 return ret;
1893} 2059}
1894 2060
1895static int reiserfs_release_dquot(struct dquot *dquot) 2061static int reiserfs_release_dquot(struct dquot *dquot)
1896{ 2062{
1897 struct reiserfs_transaction_handle th; 2063 struct reiserfs_transaction_handle th;
1898 int ret; 2064 int ret, err;
1899 2065
1900 reiserfs_write_lock(dquot->dq_sb); 2066 reiserfs_write_lock(dquot->dq_sb);
1901 journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS); 2067 ret =
1902 ret = dquot_release(dquot); 2068 journal_begin(&th, dquot->dq_sb,
1903 journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS); 2069 REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb));
1904 reiserfs_write_unlock(dquot->dq_sb); 2070 if (ret)
1905 return ret; 2071 goto out;
2072 ret = dquot_release(dquot);
2073 err =
2074 journal_end(&th, dquot->dq_sb,
2075 REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb));
2076 if (!ret && err)
2077 ret = err;
2078 out:
2079 reiserfs_write_unlock(dquot->dq_sb);
2080 return ret;
1906} 2081}
1907 2082
1908static int reiserfs_mark_dquot_dirty(struct dquot *dquot) 2083static int reiserfs_mark_dquot_dirty(struct dquot *dquot)
1909{ 2084{
1910 /* Are we journalling quotas? */ 2085 /* Are we journalling quotas? */
1911 if (REISERFS_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] || 2086 if (REISERFS_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
1912 REISERFS_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) { 2087 REISERFS_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
1913 dquot_mark_dquot_dirty(dquot); 2088 dquot_mark_dquot_dirty(dquot);
1914 return reiserfs_write_dquot(dquot); 2089 return reiserfs_write_dquot(dquot);
1915 } 2090 } else
1916 else 2091 return dquot_mark_dquot_dirty(dquot);
1917 return dquot_mark_dquot_dirty(dquot);
1918} 2092}
1919 2093
1920static int reiserfs_write_info(struct super_block *sb, int type) 2094static int reiserfs_write_info(struct super_block *sb, int type)
1921{ 2095{
1922 struct reiserfs_transaction_handle th; 2096 struct reiserfs_transaction_handle th;
1923 int ret; 2097 int ret, err;
1924 2098
1925 /* Data block + inode block */ 2099 /* Data block + inode block */
1926 reiserfs_write_lock(sb); 2100 reiserfs_write_lock(sb);
1927 journal_begin(&th, sb, 2); 2101 ret = journal_begin(&th, sb, 2);
1928 ret = dquot_commit_info(sb, type); 2102 if (ret)
1929 journal_end(&th, sb, 2); 2103 goto out;
1930 reiserfs_write_unlock(sb); 2104 ret = dquot_commit_info(sb, type);
1931 return ret; 2105 err = journal_end(&th, sb, 2);
2106 if (!ret && err)
2107 ret = err;
2108 out:
2109 reiserfs_write_unlock(sb);
2110 return ret;
1932} 2111}
1933 2112
1934/* 2113/*
1935 * Turn on quotas during mount time - we need to find 2114 * Turn on quotas during mount time - we need to find the quota file and such...
1936 * the quota file and such...
1937 */ 2115 */
1938static int reiserfs_quota_on_mount(struct super_block *sb, int type) 2116static int reiserfs_quota_on_mount(struct super_block *sb, int type)
1939{ 2117{
1940 int err; 2118 return vfs_quota_on_mount(sb, REISERFS_SB(sb)->s_qf_names[type],
1941 struct dentry *dentry; 2119 REISERFS_SB(sb)->s_jquota_fmt, type);
1942 struct qstr name = { .name = REISERFS_SB(sb)->s_qf_names[type],
1943 .hash = 0,
1944 .len = strlen(REISERFS_SB(sb)->s_qf_names[type])};
1945
1946 dentry = lookup_hash(&name, sb->s_root);
1947 if (IS_ERR(dentry))
1948 return PTR_ERR(dentry);
1949 err = vfs_quota_on_mount(type, REISERFS_SB(sb)->s_jquota_fmt, dentry);
1950 /* Now invalidate and put the dentry - quota got its own reference
1951 * to inode and dentry has at least wrong hash so we had better
1952 * throw it away */
1953 d_invalidate(dentry);
1954 dput(dentry);
1955 return err;
1956} 2120}
1957 2121
1958/* 2122/*
1959 * Standard function to be called on quota_on 2123 * Standard function to be called on quota_on
1960 */ 2124 */
1961static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, char *path) 2125static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
2126 char *path)
1962{ 2127{
1963 int err; 2128 int err;
1964 struct nameidata nd; 2129 struct nameidata nd;
1965 2130
1966 err = path_lookup(path, LOOKUP_FOLLOW, &nd); 2131 if (!(REISERFS_SB(sb)->s_mount_opt & (1 << REISERFS_QUOTA)))
1967 if (err) 2132 return -EINVAL;
1968 return err; 2133 err = path_lookup(path, LOOKUP_FOLLOW, &nd);
1969 /* Quotafile not on the same filesystem? */ 2134 if (err)
1970 if (nd.mnt->mnt_sb != sb) { 2135 return err;
1971 path_release(&nd); 2136 /* Quotafile not on the same filesystem? */
1972 return -EXDEV; 2137 if (nd.mnt->mnt_sb != sb) {
1973 } 2138 path_release(&nd);
1974 /* We must not pack tails for quota files on reiserfs for quota IO to work */ 2139 return -EXDEV;
1975 if (!REISERFS_I(nd.dentry->d_inode)->i_flags & i_nopack_mask) { 2140 }
1976 reiserfs_warning(sb, "reiserfs: Quota file must have tail packing disabled."); 2141 /* We must not pack tails for quota files on reiserfs for quota IO to work */
1977 path_release(&nd); 2142 if (!REISERFS_I(nd.dentry->d_inode)->i_flags & i_nopack_mask) {
1978 return -EINVAL; 2143 reiserfs_warning(sb,
1979 } 2144 "reiserfs: Quota file must have tail packing disabled.");
1980 /* Not journalling quota? No more tests needed... */ 2145 path_release(&nd);
1981 if (!REISERFS_SB(sb)->s_qf_names[USRQUOTA] && 2146 return -EINVAL;
1982 !REISERFS_SB(sb)->s_qf_names[GRPQUOTA]) { 2147 }
2148 /* Not journalling quota? No more tests needed... */
2149 if (!REISERFS_SB(sb)->s_qf_names[USRQUOTA] &&
2150 !REISERFS_SB(sb)->s_qf_names[GRPQUOTA]) {
2151 path_release(&nd);
2152 return vfs_quota_on(sb, type, format_id, path);
2153 }
2154 /* Quotafile not of fs root? */
2155 if (nd.dentry->d_parent->d_inode != sb->s_root->d_inode)
2156 reiserfs_warning(sb,
2157 "reiserfs: Quota file not on filesystem root. "
2158 "Journalled quota will not work.");
1983 path_release(&nd); 2159 path_release(&nd);
1984 return vfs_quota_on(sb, type, format_id, path); 2160 return vfs_quota_on(sb, type, format_id, path);
1985 }
1986 /* Quotafile not of fs root? */
1987 if (nd.dentry->d_parent->d_inode != sb->s_root->d_inode)
1988 reiserfs_warning(sb, "reiserfs: Quota file not on filesystem root. "
1989 "Journalled quota will not work.");
1990 path_release(&nd);
1991 return vfs_quota_on(sb, type, format_id, path);
1992} 2161}
1993 2162
1994/* Read data from quotafile - avoid pagecache and such because we cannot afford 2163/* Read data from quotafile - avoid pagecache and such because we cannot afford
@@ -1998,42 +2167,44 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, ch
1998static ssize_t reiserfs_quota_read(struct super_block *sb, int type, char *data, 2167static ssize_t reiserfs_quota_read(struct super_block *sb, int type, char *data,
1999 size_t len, loff_t off) 2168 size_t len, loff_t off)
2000{ 2169{
2001 struct inode *inode = sb_dqopt(sb)->files[type]; 2170 struct inode *inode = sb_dqopt(sb)->files[type];
2002 unsigned long blk = off >> sb->s_blocksize_bits; 2171 unsigned long blk = off >> sb->s_blocksize_bits;
2003 int err = 0, offset = off & (sb->s_blocksize - 1), tocopy; 2172 int err = 0, offset = off & (sb->s_blocksize - 1), tocopy;
2004 size_t toread; 2173 size_t toread;
2005 struct buffer_head tmp_bh, *bh; 2174 struct buffer_head tmp_bh, *bh;
2006 loff_t i_size = i_size_read(inode); 2175 loff_t i_size = i_size_read(inode);
2007 2176
2008 if (off > i_size) 2177 if (off > i_size)
2009 return 0; 2178 return 0;
2010 if (off+len > i_size) 2179 if (off + len > i_size)
2011 len = i_size-off; 2180 len = i_size - off;
2012 toread = len; 2181 toread = len;
2013 while (toread > 0) { 2182 while (toread > 0) {
2014 tocopy = sb->s_blocksize - offset < toread ? sb->s_blocksize - offset : toread; 2183 tocopy =
2015 tmp_bh.b_state = 0; 2184 sb->s_blocksize - offset <
2016 /* Quota files are without tails so we can safely use this function */ 2185 toread ? sb->s_blocksize - offset : toread;
2017 reiserfs_write_lock(sb); 2186 tmp_bh.b_state = 0;
2018 err = reiserfs_get_block(inode, blk, &tmp_bh, 0); 2187 /* Quota files are without tails so we can safely use this function */
2019 reiserfs_write_unlock(sb); 2188 reiserfs_write_lock(sb);
2020 if (err) 2189 err = reiserfs_get_block(inode, blk, &tmp_bh, 0);
2021 return err; 2190 reiserfs_write_unlock(sb);
2022 if (!buffer_mapped(&tmp_bh)) /* A hole? */ 2191 if (err)
2023 memset(data, 0, tocopy); 2192 return err;
2024 else { 2193 if (!buffer_mapped(&tmp_bh)) /* A hole? */
2025 bh = sb_bread(sb, tmp_bh.b_blocknr); 2194 memset(data, 0, tocopy);
2026 if (!bh) 2195 else {
2027 return -EIO; 2196 bh = sb_bread(sb, tmp_bh.b_blocknr);
2028 memcpy(data, bh->b_data+offset, tocopy); 2197 if (!bh)
2029 brelse(bh); 2198 return -EIO;
2030 } 2199 memcpy(data, bh->b_data + offset, tocopy);
2031 offset = 0; 2200 brelse(bh);
2032 toread -= tocopy; 2201 }
2033 data += tocopy; 2202 offset = 0;
2034 blk++; 2203 toread -= tocopy;
2035 } 2204 data += tocopy;
2036 return len; 2205 blk++;
2206 }
2207 return len;
2037} 2208}
2038 2209
2039/* Write to quotafile (we know the transaction is already started and has 2210/* Write to quotafile (we know the transaction is already started and has
@@ -2041,117 +2212,116 @@ static ssize_t reiserfs_quota_read(struct super_block *sb, int type, char *data,
2041static ssize_t reiserfs_quota_write(struct super_block *sb, int type, 2212static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
2042 const char *data, size_t len, loff_t off) 2213 const char *data, size_t len, loff_t off)
2043{ 2214{
2044 struct inode *inode = sb_dqopt(sb)->files[type]; 2215 struct inode *inode = sb_dqopt(sb)->files[type];
2045 unsigned long blk = off >> sb->s_blocksize_bits; 2216 unsigned long blk = off >> sb->s_blocksize_bits;
2046 int err = 0, offset = off & (sb->s_blocksize - 1), tocopy; 2217 int err = 0, offset = off & (sb->s_blocksize - 1), tocopy;
2047 int journal_quota = REISERFS_SB(sb)->s_qf_names[type] != NULL; 2218 int journal_quota = REISERFS_SB(sb)->s_qf_names[type] != NULL;
2048 size_t towrite = len; 2219 size_t towrite = len;
2049 struct buffer_head tmp_bh, *bh; 2220 struct buffer_head tmp_bh, *bh;
2050 2221
2051 down(&inode->i_sem); 2222 down(&inode->i_sem);
2052 while (towrite > 0) { 2223 while (towrite > 0) {
2053 tocopy = sb->s_blocksize - offset < towrite ? 2224 tocopy = sb->s_blocksize - offset < towrite ?
2054 sb->s_blocksize - offset : towrite; 2225 sb->s_blocksize - offset : towrite;
2055 tmp_bh.b_state = 0; 2226 tmp_bh.b_state = 0;
2056 err = reiserfs_get_block(inode, blk, &tmp_bh, GET_BLOCK_CREATE); 2227 err = reiserfs_get_block(inode, blk, &tmp_bh, GET_BLOCK_CREATE);
2057 if (err) 2228 if (err)
2058 goto out; 2229 goto out;
2059 if (offset || tocopy != sb->s_blocksize) 2230 if (offset || tocopy != sb->s_blocksize)
2060 bh = sb_bread(sb, tmp_bh.b_blocknr); 2231 bh = sb_bread(sb, tmp_bh.b_blocknr);
2061 else 2232 else
2062 bh = sb_getblk(sb, tmp_bh.b_blocknr); 2233 bh = sb_getblk(sb, tmp_bh.b_blocknr);
2063 if (!bh) { 2234 if (!bh) {
2064 err = -EIO; 2235 err = -EIO;
2065 goto out; 2236 goto out;
2066 } 2237 }
2067 lock_buffer(bh); 2238 lock_buffer(bh);
2068 memcpy(bh->b_data+offset, data, tocopy); 2239 memcpy(bh->b_data + offset, data, tocopy);
2069 flush_dcache_page(bh->b_page); 2240 flush_dcache_page(bh->b_page);
2070 set_buffer_uptodate(bh); 2241 set_buffer_uptodate(bh);
2071 unlock_buffer(bh); 2242 unlock_buffer(bh);
2072 reiserfs_prepare_for_journal(sb, bh, 1); 2243 reiserfs_prepare_for_journal(sb, bh, 1);
2073 journal_mark_dirty(current->journal_info, sb, bh); 2244 journal_mark_dirty(current->journal_info, sb, bh);
2074 if (!journal_quota) 2245 if (!journal_quota)
2075 reiserfs_add_ordered_list(inode, bh); 2246 reiserfs_add_ordered_list(inode, bh);
2076 brelse(bh); 2247 brelse(bh);
2077 offset = 0; 2248 offset = 0;
2078 towrite -= tocopy; 2249 towrite -= tocopy;
2079 data += tocopy; 2250 data += tocopy;
2080 blk++; 2251 blk++;
2081 } 2252 }
2082out: 2253 out:
2083 if (len == towrite) 2254 if (len == towrite)
2084 return err; 2255 return err;
2085 if (inode->i_size < off+len-towrite) 2256 if (inode->i_size < off + len - towrite)
2086 i_size_write(inode, off+len-towrite); 2257 i_size_write(inode, off + len - towrite);
2087 inode->i_version++; 2258 inode->i_version++;
2088 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 2259 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
2089 mark_inode_dirty(inode); 2260 mark_inode_dirty(inode);
2090 up(&inode->i_sem); 2261 up(&inode->i_sem);
2091 return len - towrite; 2262 return len - towrite;
2092} 2263}
2093 2264
2094#endif 2265#endif
2095 2266
2096static struct super_block* 2267static struct super_block *get_super_block(struct file_system_type *fs_type,
2097get_super_block (struct file_system_type *fs_type, int flags, 2268 int flags, const char *dev_name,
2098 const char *dev_name, void *data) 2269 void *data)
2099{ 2270{
2100 return get_sb_bdev(fs_type, flags, dev_name, data, reiserfs_fill_super); 2271 return get_sb_bdev(fs_type, flags, dev_name, data, reiserfs_fill_super);
2101} 2272}
2102 2273
2103static int __init 2274static int __init init_reiserfs_fs(void)
2104init_reiserfs_fs ( void )
2105{ 2275{
2106 int ret; 2276 int ret;
2107 2277
2108 if ((ret = init_inodecache ())) { 2278 if ((ret = init_inodecache())) {
2109 return ret; 2279 return ret;
2110 } 2280 }
2111 2281
2112 if ((ret = reiserfs_xattr_register_handlers ())) 2282 if ((ret = reiserfs_xattr_register_handlers()))
2113 goto failed_reiserfs_xattr_register_handlers; 2283 goto failed_reiserfs_xattr_register_handlers;
2114 2284
2115 reiserfs_proc_info_global_init (); 2285 reiserfs_proc_info_global_init();
2116 reiserfs_proc_register_global ("version", reiserfs_global_version_in_proc); 2286 reiserfs_proc_register_global("version",
2287 reiserfs_global_version_in_proc);
2117 2288
2118 ret = register_filesystem (& reiserfs_fs_type); 2289 ret = register_filesystem(&reiserfs_fs_type);
2119 2290
2120 if (ret == 0) { 2291 if (ret == 0) {
2121 return 0; 2292 return 0;
2122 } 2293 }
2123 2294
2124 reiserfs_xattr_unregister_handlers (); 2295 reiserfs_xattr_unregister_handlers();
2125 2296
2126failed_reiserfs_xattr_register_handlers: 2297 failed_reiserfs_xattr_register_handlers:
2127 reiserfs_proc_unregister_global ("version"); 2298 reiserfs_proc_unregister_global("version");
2128 reiserfs_proc_info_global_done (); 2299 reiserfs_proc_info_global_done();
2129 destroy_inodecache (); 2300 destroy_inodecache();
2130 2301
2131 return ret; 2302 return ret;
2132} 2303}
2133 2304
2134static void __exit 2305static void __exit exit_reiserfs_fs(void)
2135exit_reiserfs_fs ( void )
2136{ 2306{
2137 reiserfs_xattr_unregister_handlers (); 2307 reiserfs_xattr_unregister_handlers();
2138 reiserfs_proc_unregister_global ("version"); 2308 reiserfs_proc_unregister_global("version");
2139 reiserfs_proc_info_global_done (); 2309 reiserfs_proc_info_global_done();
2140 unregister_filesystem (& reiserfs_fs_type); 2310 unregister_filesystem(&reiserfs_fs_type);
2141 destroy_inodecache (); 2311 destroy_inodecache();
2142} 2312}
2143 2313
2144struct file_system_type reiserfs_fs_type = { 2314struct file_system_type reiserfs_fs_type = {
2145 .owner = THIS_MODULE, 2315 .owner = THIS_MODULE,
2146 .name = "reiserfs", 2316 .name = "reiserfs",
2147 .get_sb = get_super_block, 2317 .get_sb = get_super_block,
2148 .kill_sb = kill_block_super, 2318 .kill_sb = kill_block_super,
2149 .fs_flags = FS_REQUIRES_DEV, 2319 .fs_flags = FS_REQUIRES_DEV,
2150}; 2320};
2151 2321
2152MODULE_DESCRIPTION ("ReiserFS journaled filesystem"); 2322MODULE_DESCRIPTION("ReiserFS journaled filesystem");
2153MODULE_AUTHOR ("Hans Reiser <reiser@namesys.com>"); 2323MODULE_AUTHOR("Hans Reiser <reiser@namesys.com>");
2154MODULE_LICENSE ("GPL"); 2324MODULE_LICENSE("GPL");
2155 2325
2156module_init (init_reiserfs_fs); 2326module_init(init_reiserfs_fs);
2157module_exit (exit_reiserfs_fs); 2327module_exit(exit_reiserfs_fs);
diff --git a/fs/reiserfs/tail_conversion.c b/fs/reiserfs/tail_conversion.c
index 6191909d5165..c92e124f628e 100644
--- a/fs/reiserfs/tail_conversion.c
+++ b/fs/reiserfs/tail_conversion.c
@@ -11,156 +11,159 @@
11/* access to tail : when one is going to read tail it must make sure, that is not running. 11/* access to tail : when one is going to read tail it must make sure, that is not running.
12 direct2indirect and indirect2direct can not run concurrently */ 12 direct2indirect and indirect2direct can not run concurrently */
13 13
14
15/* Converts direct items to an unformatted node. Panics if file has no 14/* Converts direct items to an unformatted node. Panics if file has no
16 tail. -ENOSPC if no disk space for conversion */ 15 tail. -ENOSPC if no disk space for conversion */
17/* path points to first direct item of the file regarless of how many of 16/* path points to first direct item of the file regarless of how many of
18 them are there */ 17 them are there */
19int direct2indirect (struct reiserfs_transaction_handle *th, struct inode * inode, 18int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode,
20 struct path * path, struct buffer_head * unbh, 19 struct path *path, struct buffer_head *unbh,
21 loff_t tail_offset) 20 loff_t tail_offset)
22{ 21{
23 struct super_block * sb = inode->i_sb; 22 struct super_block *sb = inode->i_sb;
24 struct buffer_head *up_to_date_bh ; 23 struct buffer_head *up_to_date_bh;
25 struct item_head * p_le_ih = PATH_PITEM_HEAD (path); 24 struct item_head *p_le_ih = PATH_PITEM_HEAD(path);
26 unsigned long total_tail = 0 ; 25 unsigned long total_tail = 0;
27 struct cpu_key end_key; /* Key to search for the last byte of the 26 struct cpu_key end_key; /* Key to search for the last byte of the
28 converted item. */ 27 converted item. */
29 struct item_head ind_ih; /* new indirect item to be inserted or 28 struct item_head ind_ih; /* new indirect item to be inserted or
30 key of unfm pointer to be pasted */ 29 key of unfm pointer to be pasted */
31 int n_blk_size, 30 int n_blk_size, n_retval; /* returned value for reiserfs_insert_item and clones */
32 n_retval; /* returned value for reiserfs_insert_item and clones */ 31 unp_t unfm_ptr; /* Handle on an unformatted node
33 unp_t unfm_ptr; /* Handle on an unformatted node 32 that will be inserted in the
34 that will be inserted in the 33 tree. */
35 tree. */ 34
36 35 BUG_ON(!th->t_trans_id);
37 BUG_ON (!th->t_trans_id); 36
38 37 REISERFS_SB(sb)->s_direct2indirect++;
39 REISERFS_SB(sb)->s_direct2indirect ++; 38
40 39 n_blk_size = sb->s_blocksize;
41 n_blk_size = sb->s_blocksize; 40
42 41 /* and key to search for append or insert pointer to the new
43 /* and key to search for append or insert pointer to the new 42 unformatted node. */
44 unformatted node. */ 43 copy_item_head(&ind_ih, p_le_ih);
45 copy_item_head (&ind_ih, p_le_ih); 44 set_le_ih_k_offset(&ind_ih, tail_offset);
46 set_le_ih_k_offset (&ind_ih, tail_offset); 45 set_le_ih_k_type(&ind_ih, TYPE_INDIRECT);
47 set_le_ih_k_type (&ind_ih, TYPE_INDIRECT); 46
48 47 /* Set the key to search for the place for new unfm pointer */
49 /* Set the key to search for the place for new unfm pointer */ 48 make_cpu_key(&end_key, inode, tail_offset, TYPE_INDIRECT, 4);
50 make_cpu_key (&end_key, inode, tail_offset, TYPE_INDIRECT, 4); 49
51 50 // FIXME: we could avoid this
52 // FIXME: we could avoid this 51 if (search_for_position_by_key(sb, &end_key, path) == POSITION_FOUND) {
53 if ( search_for_position_by_key (sb, &end_key, path) == POSITION_FOUND ) { 52 reiserfs_warning(sb, "PAP-14030: direct2indirect: "
54 reiserfs_warning (sb, "PAP-14030: direct2indirect: " 53 "pasted or inserted byte exists in the tree %K. "
55 "pasted or inserted byte exists in the tree %K. " 54 "Use fsck to repair.", &end_key);
56 "Use fsck to repair.", &end_key); 55 pathrelse(path);
57 pathrelse(path); 56 return -EIO;
58 return -EIO; 57 }
59 } 58
60 59 p_le_ih = PATH_PITEM_HEAD(path);
61 p_le_ih = PATH_PITEM_HEAD (path); 60
62 61 unfm_ptr = cpu_to_le32(unbh->b_blocknr);
63 unfm_ptr = cpu_to_le32 (unbh->b_blocknr); 62
64 63 if (is_statdata_le_ih(p_le_ih)) {
65 if ( is_statdata_le_ih (p_le_ih) ) { 64 /* Insert new indirect item. */
66 /* Insert new indirect item. */ 65 set_ih_free_space(&ind_ih, 0); /* delete at nearest future */
67 set_ih_free_space (&ind_ih, 0); /* delete at nearest future */ 66 put_ih_item_len(&ind_ih, UNFM_P_SIZE);
68 put_ih_item_len( &ind_ih, UNFM_P_SIZE ); 67 PATH_LAST_POSITION(path)++;
69 PATH_LAST_POSITION (path)++; 68 n_retval =
70 n_retval = reiserfs_insert_item (th, path, &end_key, &ind_ih, inode, 69 reiserfs_insert_item(th, path, &end_key, &ind_ih, inode,
71 (char *)&unfm_ptr); 70 (char *)&unfm_ptr);
72 } else {
73 /* Paste into last indirect item of an object. */
74 n_retval = reiserfs_paste_into_item(th, path, &end_key, inode,
75 (char *)&unfm_ptr, UNFM_P_SIZE);
76 }
77 if ( n_retval ) {
78 return n_retval;
79 }
80
81 // note: from here there are two keys which have matching first
82 // three key components. They only differ by the fourth one.
83
84
85 /* Set the key to search for the direct items of the file */
86 make_cpu_key (&end_key, inode, max_reiserfs_offset (inode), TYPE_DIRECT, 4);
87
88 /* Move bytes from the direct items to the new unformatted node
89 and delete them. */
90 while (1) {
91 int tail_size;
92
93 /* end_key.k_offset is set so, that we will always have found
94 last item of the file */
95 if ( search_for_position_by_key (sb, &end_key, path) == POSITION_FOUND )
96 reiserfs_panic (sb, "PAP-14050: direct2indirect: "
97 "direct item (%K) not found", &end_key);
98 p_le_ih = PATH_PITEM_HEAD (path);
99 RFALSE( !is_direct_le_ih (p_le_ih),
100 "vs-14055: direct item expected(%K), found %h",
101 &end_key, p_le_ih);
102 tail_size = (le_ih_k_offset (p_le_ih) & (n_blk_size - 1))
103 + ih_item_len(p_le_ih) - 1;
104
105 /* we only send the unbh pointer if the buffer is not up to date.
106 ** this avoids overwriting good data from writepage() with old data
107 ** from the disk or buffer cache
108 ** Special case: unbh->b_page will be NULL if we are coming through
109 ** DIRECT_IO handler here.
110 */
111 if (!unbh->b_page || buffer_uptodate(unbh) || PageUptodate(unbh->b_page)) {
112 up_to_date_bh = NULL ;
113 } else { 71 } else {
114 up_to_date_bh = unbh ; 72 /* Paste into last indirect item of an object. */
73 n_retval = reiserfs_paste_into_item(th, path, &end_key, inode,
74 (char *)&unfm_ptr,
75 UNFM_P_SIZE);
115 } 76 }
116 n_retval = reiserfs_delete_item (th, path, &end_key, inode, 77 if (n_retval) {
117 up_to_date_bh) ; 78 return n_retval;
118 79 }
119 total_tail += n_retval ; 80 // note: from here there are two keys which have matching first
120 if (tail_size == n_retval) 81 // three key components. They only differ by the fourth one.
121 // done: file does not have direct items anymore 82
122 break; 83 /* Set the key to search for the direct items of the file */
123 84 make_cpu_key(&end_key, inode, max_reiserfs_offset(inode), TYPE_DIRECT,
124 } 85 4);
125 /* if we've copied bytes from disk into the page, we need to zero 86
126 ** out the unused part of the block (it was not up to date before) 87 /* Move bytes from the direct items to the new unformatted node
127 */ 88 and delete them. */
128 if (up_to_date_bh) { 89 while (1) {
129 unsigned pgoff = (tail_offset + total_tail - 1) & (PAGE_CACHE_SIZE - 1); 90 int tail_size;
130 char *kaddr=kmap_atomic(up_to_date_bh->b_page, KM_USER0); 91
131 memset(kaddr + pgoff, 0, n_blk_size - total_tail) ; 92 /* end_key.k_offset is set so, that we will always have found
132 kunmap_atomic(kaddr, KM_USER0); 93 last item of the file */
133 } 94 if (search_for_position_by_key(sb, &end_key, path) ==
134 95 POSITION_FOUND)
135 REISERFS_I(inode)->i_first_direct_byte = U32_MAX; 96 reiserfs_panic(sb,
136 97 "PAP-14050: direct2indirect: "
137 return 0; 98 "direct item (%K) not found", &end_key);
138} 99 p_le_ih = PATH_PITEM_HEAD(path);
100 RFALSE(!is_direct_le_ih(p_le_ih),
101 "vs-14055: direct item expected(%K), found %h",
102 &end_key, p_le_ih);
103 tail_size = (le_ih_k_offset(p_le_ih) & (n_blk_size - 1))
104 + ih_item_len(p_le_ih) - 1;
105
106 /* we only send the unbh pointer if the buffer is not up to date.
107 ** this avoids overwriting good data from writepage() with old data
108 ** from the disk or buffer cache
109 ** Special case: unbh->b_page will be NULL if we are coming through
110 ** DIRECT_IO handler here.
111 */
112 if (!unbh->b_page || buffer_uptodate(unbh)
113 || PageUptodate(unbh->b_page)) {
114 up_to_date_bh = NULL;
115 } else {
116 up_to_date_bh = unbh;
117 }
118 n_retval = reiserfs_delete_item(th, path, &end_key, inode,
119 up_to_date_bh);
120
121 total_tail += n_retval;
122 if (tail_size == n_retval)
123 // done: file does not have direct items anymore
124 break;
139 125
126 }
127 /* if we've copied bytes from disk into the page, we need to zero
128 ** out the unused part of the block (it was not up to date before)
129 */
130 if (up_to_date_bh) {
131 unsigned pgoff =
132 (tail_offset + total_tail - 1) & (PAGE_CACHE_SIZE - 1);
133 char *kaddr = kmap_atomic(up_to_date_bh->b_page, KM_USER0);
134 memset(kaddr + pgoff, 0, n_blk_size - total_tail);
135 kunmap_atomic(kaddr, KM_USER0);
136 }
137
138 REISERFS_I(inode)->i_first_direct_byte = U32_MAX;
139
140 return 0;
141}
140 142
141/* stolen from fs/buffer.c */ 143/* stolen from fs/buffer.c */
142void reiserfs_unmap_buffer(struct buffer_head *bh) { 144void reiserfs_unmap_buffer(struct buffer_head *bh)
143 lock_buffer(bh) ; 145{
144 if (buffer_journaled(bh) || buffer_journal_dirty(bh)) { 146 lock_buffer(bh);
145 BUG() ; 147 if (buffer_journaled(bh) || buffer_journal_dirty(bh)) {
146 } 148 BUG();
147 clear_buffer_dirty(bh) ; 149 }
148 /* Remove the buffer from whatever list it belongs to. We are mostly 150 clear_buffer_dirty(bh);
149 interested in removing it from per-sb j_dirty_buffers list, to avoid 151 /* Remove the buffer from whatever list it belongs to. We are mostly
150 BUG() on attempt to write not mapped buffer */ 152 interested in removing it from per-sb j_dirty_buffers list, to avoid
151 if ( (!list_empty(&bh->b_assoc_buffers) || bh->b_private) && bh->b_page) { 153 BUG() on attempt to write not mapped buffer */
152 struct inode *inode = bh->b_page->mapping->host; 154 if ((!list_empty(&bh->b_assoc_buffers) || bh->b_private) && bh->b_page) {
153 struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb); 155 struct inode *inode = bh->b_page->mapping->host;
154 spin_lock(&j->j_dirty_buffers_lock); 156 struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb);
155 list_del_init(&bh->b_assoc_buffers); 157 spin_lock(&j->j_dirty_buffers_lock);
156 reiserfs_free_jh(bh); 158 list_del_init(&bh->b_assoc_buffers);
157 spin_unlock(&j->j_dirty_buffers_lock); 159 reiserfs_free_jh(bh);
158 } 160 spin_unlock(&j->j_dirty_buffers_lock);
159 clear_buffer_mapped(bh) ; 161 }
160 clear_buffer_req(bh) ; 162 clear_buffer_mapped(bh);
161 clear_buffer_new(bh); 163 clear_buffer_req(bh);
162 bh->b_bdev = NULL; 164 clear_buffer_new(bh);
163 unlock_buffer(bh) ; 165 bh->b_bdev = NULL;
166 unlock_buffer(bh);
164} 167}
165 168
166/* this first locks inode (neither reads nor sync are permitted), 169/* this first locks inode (neither reads nor sync are permitted),
@@ -169,108 +172,108 @@ void reiserfs_unmap_buffer(struct buffer_head *bh) {
169 what we expect from it (number of cut bytes). But when tail remains 172 what we expect from it (number of cut bytes). But when tail remains
170 in the unformatted node, we set mode to SKIP_BALANCING and unlock 173 in the unformatted node, we set mode to SKIP_BALANCING and unlock
171 inode */ 174 inode */
172int indirect2direct (struct reiserfs_transaction_handle *th, 175int indirect2direct(struct reiserfs_transaction_handle *th, struct inode *p_s_inode, struct page *page, struct path *p_s_path, /* path to the indirect item. */
173 struct inode * p_s_inode, 176 const struct cpu_key *p_s_item_key, /* Key to look for unformatted node pointer to be cut. */
174 struct page *page, 177 loff_t n_new_file_size, /* New file size. */
175 struct path * p_s_path, /* path to the indirect item. */ 178 char *p_c_mode)
176 const struct cpu_key * p_s_item_key, /* Key to look for unformatted node pointer to be cut. */
177 loff_t n_new_file_size, /* New file size. */
178 char * p_c_mode)
179{ 179{
180 struct super_block * p_s_sb = p_s_inode->i_sb; 180 struct super_block *p_s_sb = p_s_inode->i_sb;
181 struct item_head s_ih; 181 struct item_head s_ih;
182 unsigned long n_block_size = p_s_sb->s_blocksize; 182 unsigned long n_block_size = p_s_sb->s_blocksize;
183 char * tail; 183 char *tail;
184 int tail_len, round_tail_len; 184 int tail_len, round_tail_len;
185 loff_t pos, pos1; /* position of first byte of the tail */ 185 loff_t pos, pos1; /* position of first byte of the tail */
186 struct cpu_key key; 186 struct cpu_key key;
187 187
188 BUG_ON (!th->t_trans_id); 188 BUG_ON(!th->t_trans_id);
189 189
190 REISERFS_SB(p_s_sb)->s_indirect2direct ++; 190 REISERFS_SB(p_s_sb)->s_indirect2direct++;
191 191
192 *p_c_mode = M_SKIP_BALANCING; 192 *p_c_mode = M_SKIP_BALANCING;
193 193
194 /* store item head path points to. */ 194 /* store item head path points to. */
195 copy_item_head (&s_ih, PATH_PITEM_HEAD(p_s_path));
196
197 tail_len = (n_new_file_size & (n_block_size - 1));
198 if (get_inode_sd_version (p_s_inode) == STAT_DATA_V2)
199 round_tail_len = ROUND_UP (tail_len);
200 else
201 round_tail_len = tail_len;
202
203 pos = le_ih_k_offset (&s_ih) - 1 + (ih_item_len(&s_ih) / UNFM_P_SIZE - 1) * p_s_sb->s_blocksize;
204 pos1 = pos;
205
206 // we are protected by i_sem. The tail can not disapper, not
207 // append can be done either
208 // we are in truncate or packing tail in file_release
209
210 tail = (char *)kmap(page) ; /* this can schedule */
211
212 if (path_changed (&s_ih, p_s_path)) {
213 /* re-search indirect item */
214 if ( search_for_position_by_key (p_s_sb, p_s_item_key, p_s_path) == POSITION_NOT_FOUND )
215 reiserfs_panic(p_s_sb, "PAP-5520: indirect2direct: "
216 "item to be converted %K does not exist", p_s_item_key);
217 copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path)); 195 copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path));
196
197 tail_len = (n_new_file_size & (n_block_size - 1));
198 if (get_inode_sd_version(p_s_inode) == STAT_DATA_V2)
199 round_tail_len = ROUND_UP(tail_len);
200 else
201 round_tail_len = tail_len;
202
203 pos =
204 le_ih_k_offset(&s_ih) - 1 + (ih_item_len(&s_ih) / UNFM_P_SIZE -
205 1) * p_s_sb->s_blocksize;
206 pos1 = pos;
207
208 // we are protected by i_sem. The tail can not disapper, not
209 // append can be done either
210 // we are in truncate or packing tail in file_release
211
212 tail = (char *)kmap(page); /* this can schedule */
213
214 if (path_changed(&s_ih, p_s_path)) {
215 /* re-search indirect item */
216 if (search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path)
217 == POSITION_NOT_FOUND)
218 reiserfs_panic(p_s_sb,
219 "PAP-5520: indirect2direct: "
220 "item to be converted %K does not exist",
221 p_s_item_key);
222 copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path));
218#ifdef CONFIG_REISERFS_CHECK 223#ifdef CONFIG_REISERFS_CHECK
219 pos = le_ih_k_offset (&s_ih) - 1 + 224 pos = le_ih_k_offset(&s_ih) - 1 +
220 (ih_item_len(&s_ih) / UNFM_P_SIZE - 1) * p_s_sb->s_blocksize; 225 (ih_item_len(&s_ih) / UNFM_P_SIZE -
221 if (pos != pos1) 226 1) * p_s_sb->s_blocksize;
222 reiserfs_panic (p_s_sb, "vs-5530: indirect2direct: " 227 if (pos != pos1)
223 "tail position changed while we were reading it"); 228 reiserfs_panic(p_s_sb, "vs-5530: indirect2direct: "
229 "tail position changed while we were reading it");
224#endif 230#endif
225 } 231 }
226
227
228 /* Set direct item header to insert. */
229 make_le_item_head (&s_ih, NULL, get_inode_item_key_version (p_s_inode), pos1 + 1,
230 TYPE_DIRECT, round_tail_len, 0xffff/*ih_free_space*/);
231
232 /* we want a pointer to the first byte of the tail in the page.
233 ** the page was locked and this part of the page was up to date when
234 ** indirect2direct was called, so we know the bytes are still valid
235 */
236 tail = tail + (pos & (PAGE_CACHE_SIZE - 1)) ;
237
238 PATH_LAST_POSITION(p_s_path)++;
239
240 key = *p_s_item_key;
241 set_cpu_key_k_type (&key, TYPE_DIRECT);
242 key.key_length = 4;
243 /* Insert tail as new direct item in the tree */
244 if ( reiserfs_insert_item(th, p_s_path, &key, &s_ih, p_s_inode,
245 tail ? tail : NULL) < 0 ) {
246 /* No disk memory. So we can not convert last unformatted node
247 to the direct item. In this case we used to adjust
248 indirect items's ih_free_space. Now ih_free_space is not
249 used, it would be ideal to write zeros to corresponding
250 unformatted node. For now i_size is considered as guard for
251 going out of file size */
252 kunmap(page) ;
253 return n_block_size - round_tail_len;
254 }
255 kunmap(page) ;
256
257 /* make sure to get the i_blocks changes from reiserfs_insert_item */
258 reiserfs_update_sd(th, p_s_inode);
259 232
260 // note: we have now the same as in above direct2indirect 233 /* Set direct item header to insert. */
261 // conversion: there are two keys which have matching first three 234 make_le_item_head(&s_ih, NULL, get_inode_item_key_version(p_s_inode),
262 // key components. They only differ by the fouhth one. 235 pos1 + 1, TYPE_DIRECT, round_tail_len,
236 0xffff /*ih_free_space */ );
237
238 /* we want a pointer to the first byte of the tail in the page.
239 ** the page was locked and this part of the page was up to date when
240 ** indirect2direct was called, so we know the bytes are still valid
241 */
242 tail = tail + (pos & (PAGE_CACHE_SIZE - 1));
243
244 PATH_LAST_POSITION(p_s_path)++;
245
246 key = *p_s_item_key;
247 set_cpu_key_k_type(&key, TYPE_DIRECT);
248 key.key_length = 4;
249 /* Insert tail as new direct item in the tree */
250 if (reiserfs_insert_item(th, p_s_path, &key, &s_ih, p_s_inode,
251 tail ? tail : NULL) < 0) {
252 /* No disk memory. So we can not convert last unformatted node
253 to the direct item. In this case we used to adjust
254 indirect items's ih_free_space. Now ih_free_space is not
255 used, it would be ideal to write zeros to corresponding
256 unformatted node. For now i_size is considered as guard for
257 going out of file size */
258 kunmap(page);
259 return n_block_size - round_tail_len;
260 }
261 kunmap(page);
263 262
264 /* We have inserted new direct item and must remove last 263 /* make sure to get the i_blocks changes from reiserfs_insert_item */
265 unformatted node. */ 264 reiserfs_update_sd(th, p_s_inode);
266 *p_c_mode = M_CUT;
267 265
268 /* we store position of first direct item in the in-core inode */ 266 // note: we have now the same as in above direct2indirect
269 //mark_file_with_tail (p_s_inode, pos1 + 1); 267 // conversion: there are two keys which have matching first three
270 REISERFS_I(p_s_inode)->i_first_direct_byte = pos1 + 1; 268 // key components. They only differ by the fouhth one.
271
272 return n_block_size - round_tail_len;
273}
274 269
270 /* We have inserted new direct item and must remove last
271 unformatted node. */
272 *p_c_mode = M_CUT;
275 273
274 /* we store position of first direct item in the in-core inode */
275 //mark_file_with_tail (p_s_inode, pos1 + 1);
276 REISERFS_I(p_s_inode)->i_first_direct_byte = pos1 + 1;
276 277
278 return n_block_size - round_tail_len;
279}
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 45582fe8b466..87ac9dc8b381 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -39,7 +39,6 @@
39#include <linux/xattr.h> 39#include <linux/xattr.h>
40#include <linux/reiserfs_xattr.h> 40#include <linux/reiserfs_xattr.h>
41#include <linux/reiserfs_acl.h> 41#include <linux/reiserfs_acl.h>
42#include <linux/mbcache.h>
43#include <asm/uaccess.h> 42#include <asm/uaccess.h>
44#include <asm/checksum.h> 43#include <asm/checksum.h>
45#include <linux/smp_lock.h> 44#include <linux/smp_lock.h>
@@ -51,67 +50,68 @@
51#define PRIVROOT_NAME ".reiserfs_priv" 50#define PRIVROOT_NAME ".reiserfs_priv"
52#define XAROOT_NAME "xattrs" 51#define XAROOT_NAME "xattrs"
53 52
54static struct reiserfs_xattr_handler *find_xattr_handler_prefix (const char *prefix); 53static struct reiserfs_xattr_handler *find_xattr_handler_prefix(const char
54 *prefix);
55 55
56static struct dentry * 56static struct dentry *create_xa_root(struct super_block *sb)
57create_xa_root (struct super_block *sb)
58{ 57{
59 struct dentry *privroot = dget (REISERFS_SB(sb)->priv_root); 58 struct dentry *privroot = dget(REISERFS_SB(sb)->priv_root);
60 struct dentry *xaroot; 59 struct dentry *xaroot;
61 60
62 /* This needs to be created at mount-time */ 61 /* This needs to be created at mount-time */
63 if (!privroot) 62 if (!privroot)
64 return ERR_PTR(-EOPNOTSUPP); 63 return ERR_PTR(-EOPNOTSUPP);
65 64
66 xaroot = lookup_one_len (XAROOT_NAME, privroot, strlen (XAROOT_NAME)); 65 xaroot = lookup_one_len(XAROOT_NAME, privroot, strlen(XAROOT_NAME));
67 if (IS_ERR (xaroot)) { 66 if (IS_ERR(xaroot)) {
68 goto out; 67 goto out;
69 } else if (!xaroot->d_inode) { 68 } else if (!xaroot->d_inode) {
70 int err; 69 int err;
71 down (&privroot->d_inode->i_sem); 70 down(&privroot->d_inode->i_sem);
72 err = privroot->d_inode->i_op->mkdir (privroot->d_inode, xaroot, 0700); 71 err =
73 up (&privroot->d_inode->i_sem); 72 privroot->d_inode->i_op->mkdir(privroot->d_inode, xaroot,
74 73 0700);
75 if (err) { 74 up(&privroot->d_inode->i_sem);
76 dput (xaroot); 75
77 dput (privroot); 76 if (err) {
78 return ERR_PTR (err); 77 dput(xaroot);
79 } 78 dput(privroot);
80 REISERFS_SB(sb)->xattr_root = dget (xaroot); 79 return ERR_PTR(err);
81 } 80 }
82 81 REISERFS_SB(sb)->xattr_root = dget(xaroot);
83out: 82 }
84 dput (privroot); 83
85 return xaroot; 84 out:
85 dput(privroot);
86 return xaroot;
86} 87}
87 88
88/* This will return a dentry, or error, refering to the xa root directory. 89/* This will return a dentry, or error, refering to the xa root directory.
89 * If the xa root doesn't exist yet, the dentry will be returned without 90 * If the xa root doesn't exist yet, the dentry will be returned without
90 * an associated inode. This dentry can be used with ->mkdir to create 91 * an associated inode. This dentry can be used with ->mkdir to create
91 * the xa directory. */ 92 * the xa directory. */
92static struct dentry * 93static struct dentry *__get_xa_root(struct super_block *s)
93__get_xa_root (struct super_block *s)
94{ 94{
95 struct dentry *privroot = dget (REISERFS_SB(s)->priv_root); 95 struct dentry *privroot = dget(REISERFS_SB(s)->priv_root);
96 struct dentry *xaroot = NULL; 96 struct dentry *xaroot = NULL;
97 97
98 if (IS_ERR (privroot) || !privroot) 98 if (IS_ERR(privroot) || !privroot)
99 return privroot; 99 return privroot;
100 100
101 xaroot = lookup_one_len (XAROOT_NAME, privroot, strlen (XAROOT_NAME)); 101 xaroot = lookup_one_len(XAROOT_NAME, privroot, strlen(XAROOT_NAME));
102 if (IS_ERR (xaroot)) { 102 if (IS_ERR(xaroot)) {
103 goto out; 103 goto out;
104 } else if (!xaroot->d_inode) { 104 } else if (!xaroot->d_inode) {
105 dput (xaroot); 105 dput(xaroot);
106 xaroot = NULL; 106 xaroot = NULL;
107 goto out; 107 goto out;
108 } 108 }
109 109
110 REISERFS_SB(s)->xattr_root = dget (xaroot); 110 REISERFS_SB(s)->xattr_root = dget(xaroot);
111 111
112out: 112 out:
113 dput (privroot); 113 dput(privroot);
114 return xaroot; 114 return xaroot;
115} 115}
116 116
117/* Returns the dentry (or NULL) referring to the root of the extended 117/* Returns the dentry (or NULL) referring to the root of the extended
@@ -119,147 +119,145 @@ out:
119 * Otherwise, we attempt to retreive it from disk. It may also return 119 * Otherwise, we attempt to retreive it from disk. It may also return
120 * a pointer-encoded error. 120 * a pointer-encoded error.
121 */ 121 */
122static inline struct dentry * 122static inline struct dentry *get_xa_root(struct super_block *s)
123get_xa_root (struct super_block *s)
124{ 123{
125 struct dentry *dentry = dget (REISERFS_SB(s)->xattr_root); 124 struct dentry *dentry = dget(REISERFS_SB(s)->xattr_root);
126 125
127 if (!dentry) 126 if (!dentry)
128 dentry = __get_xa_root (s); 127 dentry = __get_xa_root(s);
129 128
130 return dentry; 129 return dentry;
131} 130}
132 131
133/* Opens the directory corresponding to the inode's extended attribute store. 132/* Opens the directory corresponding to the inode's extended attribute store.
134 * If flags allow, the tree to the directory may be created. If creation is 133 * If flags allow, the tree to the directory may be created. If creation is
135 * prohibited, -ENODATA is returned. */ 134 * prohibited, -ENODATA is returned. */
136static struct dentry * 135static struct dentry *open_xa_dir(const struct inode *inode, int flags)
137open_xa_dir (const struct inode *inode, int flags)
138{ 136{
139 struct dentry *xaroot, *xadir; 137 struct dentry *xaroot, *xadir;
140 char namebuf[17]; 138 char namebuf[17];
141 139
142 xaroot = get_xa_root (inode->i_sb); 140 xaroot = get_xa_root(inode->i_sb);
143 if (IS_ERR (xaroot)) { 141 if (IS_ERR(xaroot)) {
144 return xaroot; 142 return xaroot;
145 } else if (!xaroot) { 143 } else if (!xaroot) {
146 if (flags == 0 || flags & XATTR_CREATE) { 144 if (flags == 0 || flags & XATTR_CREATE) {
147 xaroot = create_xa_root (inode->i_sb); 145 xaroot = create_xa_root(inode->i_sb);
148 if (IS_ERR (xaroot)) 146 if (IS_ERR(xaroot))
149 return xaroot; 147 return xaroot;
150 } 148 }
151 if (!xaroot) 149 if (!xaroot)
152 return ERR_PTR (-ENODATA); 150 return ERR_PTR(-ENODATA);
153 } 151 }
154 152
155 /* ok, we have xaroot open */ 153 /* ok, we have xaroot open */
156 154
157 snprintf (namebuf, sizeof (namebuf), "%X.%X", 155 snprintf(namebuf, sizeof(namebuf), "%X.%X",
158 le32_to_cpu (INODE_PKEY (inode)->k_objectid), 156 le32_to_cpu(INODE_PKEY(inode)->k_objectid),
159 inode->i_generation); 157 inode->i_generation);
160 xadir = lookup_one_len (namebuf, xaroot, strlen (namebuf)); 158 xadir = lookup_one_len(namebuf, xaroot, strlen(namebuf));
161 if (IS_ERR (xadir)) { 159 if (IS_ERR(xadir)) {
162 dput (xaroot); 160 dput(xaroot);
163 return xadir; 161 return xadir;
164 } 162 }
165 163
166 if (!xadir->d_inode) { 164 if (!xadir->d_inode) {
167 int err; 165 int err;
168 if (flags == 0 || flags & XATTR_CREATE) { 166 if (flags == 0 || flags & XATTR_CREATE) {
169 /* Although there is nothing else trying to create this directory, 167 /* Although there is nothing else trying to create this directory,
170 * another directory with the same hash may be created, so we need 168 * another directory with the same hash may be created, so we need
171 * to protect against that */ 169 * to protect against that */
172 err = xaroot->d_inode->i_op->mkdir (xaroot->d_inode, xadir, 0700); 170 err =
173 if (err) { 171 xaroot->d_inode->i_op->mkdir(xaroot->d_inode, xadir,
174 dput (xaroot); 172 0700);
175 dput (xadir); 173 if (err) {
176 return ERR_PTR (err); 174 dput(xaroot);
177 } 175 dput(xadir);
178 } 176 return ERR_PTR(err);
179 if (!xadir->d_inode) { 177 }
180 dput (xaroot); 178 }
181 dput (xadir); 179 if (!xadir->d_inode) {
182 return ERR_PTR (-ENODATA); 180 dput(xaroot);
183 } 181 dput(xadir);
184 } 182 return ERR_PTR(-ENODATA);
185 183 }
186 dput (xaroot); 184 }
187 return xadir; 185
186 dput(xaroot);
187 return xadir;
188} 188}
189 189
190/* Returns a dentry corresponding to a specific extended attribute file 190/* Returns a dentry corresponding to a specific extended attribute file
191 * for the inode. If flags allow, the file is created. Otherwise, a 191 * for the inode. If flags allow, the file is created. Otherwise, a
192 * valid or negative dentry, or an error is returned. */ 192 * valid or negative dentry, or an error is returned. */
193static struct dentry * 193static struct dentry *get_xa_file_dentry(const struct inode *inode,
194get_xa_file_dentry (const struct inode *inode, const char *name, int flags) 194 const char *name, int flags)
195{ 195{
196 struct dentry *xadir, *xafile; 196 struct dentry *xadir, *xafile;
197 int err = 0; 197 int err = 0;
198 198
199 xadir = open_xa_dir (inode, flags); 199 xadir = open_xa_dir(inode, flags);
200 if (IS_ERR (xadir)) { 200 if (IS_ERR(xadir)) {
201 return ERR_PTR (PTR_ERR (xadir)); 201 return ERR_PTR(PTR_ERR(xadir));
202 } else if (xadir && !xadir->d_inode) { 202 } else if (xadir && !xadir->d_inode) {
203 dput (xadir); 203 dput(xadir);
204 return ERR_PTR (-ENODATA); 204 return ERR_PTR(-ENODATA);
205 } 205 }
206 206
207 xafile = lookup_one_len (name, xadir, strlen (name)); 207 xafile = lookup_one_len(name, xadir, strlen(name));
208 if (IS_ERR (xafile)) { 208 if (IS_ERR(xafile)) {
209 dput (xadir); 209 dput(xadir);
210 return ERR_PTR (PTR_ERR (xafile)); 210 return ERR_PTR(PTR_ERR(xafile));
211 } 211 }
212 212
213 if (xafile->d_inode) { /* file exists */ 213 if (xafile->d_inode) { /* file exists */
214 if (flags & XATTR_CREATE) { 214 if (flags & XATTR_CREATE) {
215 err = -EEXIST; 215 err = -EEXIST;
216 dput (xafile); 216 dput(xafile);
217 goto out; 217 goto out;
218 } 218 }
219 } else if (flags & XATTR_REPLACE || flags & FL_READONLY) { 219 } else if (flags & XATTR_REPLACE || flags & FL_READONLY) {
220 goto out; 220 goto out;
221 } else { 221 } else {
222 /* inode->i_sem is down, so nothing else can try to create 222 /* inode->i_sem is down, so nothing else can try to create
223 * the same xattr */ 223 * the same xattr */
224 err = xadir->d_inode->i_op->create (xadir->d_inode, xafile, 224 err = xadir->d_inode->i_op->create(xadir->d_inode, xafile,
225 0700|S_IFREG, NULL); 225 0700 | S_IFREG, NULL);
226 226
227 if (err) { 227 if (err) {
228 dput (xafile); 228 dput(xafile);
229 goto out; 229 goto out;
230 } 230 }
231 } 231 }
232
233out:
234 dput (xadir);
235 if (err)
236 xafile = ERR_PTR (err);
237 return xafile;
238}
239 232
233 out:
234 dput(xadir);
235 if (err)
236 xafile = ERR_PTR(err);
237 return xafile;
238}
240 239
241/* Opens a file pointer to the attribute associated with inode */ 240/* Opens a file pointer to the attribute associated with inode */
242static struct file * 241static struct file *open_xa_file(const struct inode *inode, const char *name,
243open_xa_file (const struct inode *inode, const char *name, int flags) 242 int flags)
244{ 243{
245 struct dentry *xafile; 244 struct dentry *xafile;
246 struct file *fp; 245 struct file *fp;
247 246
248 xafile = get_xa_file_dentry (inode, name, flags); 247 xafile = get_xa_file_dentry(inode, name, flags);
249 if (IS_ERR (xafile)) 248 if (IS_ERR(xafile))
250 return ERR_PTR (PTR_ERR (xafile)); 249 return ERR_PTR(PTR_ERR(xafile));
251 else if (!xafile->d_inode) { 250 else if (!xafile->d_inode) {
252 dput (xafile); 251 dput(xafile);
253 return ERR_PTR (-ENODATA); 252 return ERR_PTR(-ENODATA);
254 } 253 }
255 254
256 fp = dentry_open (xafile, NULL, O_RDWR); 255 fp = dentry_open(xafile, NULL, O_RDWR);
257 /* dentry_open dputs the dentry if it fails */ 256 /* dentry_open dputs the dentry if it fails */
258 257
259 return fp; 258 return fp;
260} 259}
261 260
262
263/* 261/*
264 * this is very similar to fs/reiserfs/dir.c:reiserfs_readdir, but 262 * this is very similar to fs/reiserfs/dir.c:reiserfs_readdir, but
265 * we need to drop the path before calling the filldir struct. That 263 * we need to drop the path before calling the filldir struct. That
@@ -273,139 +271,146 @@ open_xa_file (const struct inode *inode, const char *name, int flags)
273 * we're called with i_sem held, so there are no worries about the directory 271 * we're called with i_sem held, so there are no worries about the directory
274 * changing underneath us. 272 * changing underneath us.
275 */ 273 */
276static int __xattr_readdir(struct file * filp, void * dirent, filldir_t filldir) 274static int __xattr_readdir(struct file *filp, void *dirent, filldir_t filldir)
277{ 275{
278 struct inode *inode = filp->f_dentry->d_inode; 276 struct inode *inode = filp->f_dentry->d_inode;
279 struct cpu_key pos_key; /* key of current position in the directory (key of directory entry) */ 277 struct cpu_key pos_key; /* key of current position in the directory (key of directory entry) */
280 INITIALIZE_PATH (path_to_entry); 278 INITIALIZE_PATH(path_to_entry);
281 struct buffer_head * bh; 279 struct buffer_head *bh;
282 int entry_num; 280 int entry_num;
283 struct item_head * ih, tmp_ih; 281 struct item_head *ih, tmp_ih;
284 int search_res; 282 int search_res;
285 char * local_buf; 283 char *local_buf;
286 loff_t next_pos; 284 loff_t next_pos;
287 char small_buf[32] ; /* avoid kmalloc if we can */ 285 char small_buf[32]; /* avoid kmalloc if we can */
288 struct reiserfs_de_head *deh; 286 struct reiserfs_de_head *deh;
289 int d_reclen; 287 int d_reclen;
290 char * d_name; 288 char *d_name;
291 off_t d_off; 289 off_t d_off;
292 ino_t d_ino; 290 ino_t d_ino;
293 struct reiserfs_dir_entry de; 291 struct reiserfs_dir_entry de;
294 292
295 293 /* form key for search the next directory entry using f_pos field of
296 /* form key for search the next directory entry using f_pos field of 294 file structure */
297 file structure */ 295 next_pos = max_reiserfs_offset(inode);
298 next_pos = max_reiserfs_offset(inode); 296
299 297 while (1) {
300 while (1) { 298 research:
301research: 299 if (next_pos <= DOT_DOT_OFFSET)
302 if (next_pos <= DOT_DOT_OFFSET) 300 break;
303 break; 301 make_cpu_key(&pos_key, inode, next_pos, TYPE_DIRENTRY, 3);
304 make_cpu_key (&pos_key, inode, next_pos, TYPE_DIRENTRY, 3); 302
305 303 search_res =
306 search_res = search_by_entry_key(inode->i_sb, &pos_key, &path_to_entry, &de); 304 search_by_entry_key(inode->i_sb, &pos_key, &path_to_entry,
307 if (search_res == IO_ERROR) { 305 &de);
308 // FIXME: we could just skip part of directory which could 306 if (search_res == IO_ERROR) {
309 // not be read 307 // FIXME: we could just skip part of directory which could
310 pathrelse(&path_to_entry); 308 // not be read
311 return -EIO; 309 pathrelse(&path_to_entry);
312 } 310 return -EIO;
313 311 }
314 if (search_res == NAME_NOT_FOUND)
315 de.de_entry_num--;
316 312
317 set_de_name_and_namelen(&de); 313 if (search_res == NAME_NOT_FOUND)
318 entry_num = de.de_entry_num; 314 de.de_entry_num--;
319 deh = &(de.de_deh[entry_num]);
320 315
321 bh = de.de_bh; 316 set_de_name_and_namelen(&de);
322 ih = de.de_ih; 317 entry_num = de.de_entry_num;
318 deh = &(de.de_deh[entry_num]);
323 319
324 if (!is_direntry_le_ih(ih)) { 320 bh = de.de_bh;
325 reiserfs_warning(inode->i_sb, "not direntry %h", ih); 321 ih = de.de_ih;
326 break;
327 }
328 copy_item_head(&tmp_ih, ih);
329 322
330 /* we must have found item, that is item of this directory, */ 323 if (!is_direntry_le_ih(ih)) {
331 RFALSE( COMP_SHORT_KEYS (&(ih->ih_key), &pos_key), 324 reiserfs_warning(inode->i_sb, "not direntry %h", ih);
332 "vs-9000: found item %h does not match to dir we readdir %K", 325 break;
333 ih, &pos_key); 326 }
327 copy_item_head(&tmp_ih, ih);
334 328
335 if (deh_offset(deh) <= DOT_DOT_OFFSET) { 329 /* we must have found item, that is item of this directory, */
336 break; 330 RFALSE(COMP_SHORT_KEYS(&(ih->ih_key), &pos_key),
337 } 331 "vs-9000: found item %h does not match to dir we readdir %K",
332 ih, &pos_key);
338 333
339 /* look for the previous entry in the directory */ 334 if (deh_offset(deh) <= DOT_DOT_OFFSET) {
340 next_pos = deh_offset (deh) - 1; 335 break;
336 }
341 337
342 if (!de_visible (deh)) 338 /* look for the previous entry in the directory */
343 /* it is hidden entry */ 339 next_pos = deh_offset(deh) - 1;
344 continue;
345 340
346 d_reclen = entry_length(bh, ih, entry_num); 341 if (!de_visible(deh))
347 d_name = B_I_DEH_ENTRY_FILE_NAME (bh, ih, deh); 342 /* it is hidden entry */
348 d_off = deh_offset (deh); 343 continue;
349 d_ino = deh_objectid (deh);
350 344
351 if (!d_name[d_reclen - 1]) 345 d_reclen = entry_length(bh, ih, entry_num);
352 d_reclen = strlen (d_name); 346 d_name = B_I_DEH_ENTRY_FILE_NAME(bh, ih, deh);
347 d_off = deh_offset(deh);
348 d_ino = deh_objectid(deh);
353 349
354 if (d_reclen > REISERFS_MAX_NAME(inode->i_sb->s_blocksize)){ 350 if (!d_name[d_reclen - 1])
355 /* too big to send back to VFS */ 351 d_reclen = strlen(d_name);
356 continue ;
357 }
358 352
359 /* Ignore the .reiserfs_priv entry */ 353 if (d_reclen > REISERFS_MAX_NAME(inode->i_sb->s_blocksize)) {
360 if (reiserfs_xattrs (inode->i_sb) && 354 /* too big to send back to VFS */
361 !old_format_only(inode->i_sb) && 355 continue;
362 deh_objectid (deh) == le32_to_cpu (INODE_PKEY(REISERFS_SB(inode->i_sb)->priv_root->d_inode)->k_objectid)) 356 }
363 continue;
364 357
365 if (d_reclen <= 32) { 358 /* Ignore the .reiserfs_priv entry */
366 local_buf = small_buf ; 359 if (reiserfs_xattrs(inode->i_sb) &&
367 } else { 360 !old_format_only(inode->i_sb) &&
368 local_buf = reiserfs_kmalloc(d_reclen, GFP_NOFS, inode->i_sb) ; 361 deh_objectid(deh) ==
369 if (!local_buf) { 362 le32_to_cpu(INODE_PKEY
370 pathrelse (&path_to_entry); 363 (REISERFS_SB(inode->i_sb)->priv_root->d_inode)->
371 return -ENOMEM ; 364 k_objectid))
372 } 365 continue;
373 if (item_moved (&tmp_ih, &path_to_entry)) { 366
374 reiserfs_kfree(local_buf, d_reclen, inode->i_sb) ; 367 if (d_reclen <= 32) {
375 368 local_buf = small_buf;
376 /* sigh, must retry. Do this same offset again */ 369 } else {
377 next_pos = d_off; 370 local_buf =
378 goto research; 371 reiserfs_kmalloc(d_reclen, GFP_NOFS, inode->i_sb);
379 } 372 if (!local_buf) {
380 } 373 pathrelse(&path_to_entry);
374 return -ENOMEM;
375 }
376 if (item_moved(&tmp_ih, &path_to_entry)) {
377 reiserfs_kfree(local_buf, d_reclen,
378 inode->i_sb);
379
380 /* sigh, must retry. Do this same offset again */
381 next_pos = d_off;
382 goto research;
383 }
384 }
381 385
382 // Note, that we copy name to user space via temporary 386 // Note, that we copy name to user space via temporary
383 // buffer (local_buf) because filldir will block if 387 // buffer (local_buf) because filldir will block if
384 // user space buffer is swapped out. At that time 388 // user space buffer is swapped out. At that time
385 // entry can move to somewhere else 389 // entry can move to somewhere else
386 memcpy (local_buf, d_name, d_reclen); 390 memcpy(local_buf, d_name, d_reclen);
387 391
388 /* the filldir function might need to start transactions, 392 /* the filldir function might need to start transactions,
389 * or do who knows what. Release the path now that we've 393 * or do who knows what. Release the path now that we've
390 * copied all the important stuff out of the deh 394 * copied all the important stuff out of the deh
391 */ 395 */
392 pathrelse (&path_to_entry); 396 pathrelse(&path_to_entry);
393 397
394 if (filldir (dirent, local_buf, d_reclen, d_off, d_ino, 398 if (filldir(dirent, local_buf, d_reclen, d_off, d_ino,
395 DT_UNKNOWN) < 0) { 399 DT_UNKNOWN) < 0) {
396 if (local_buf != small_buf) { 400 if (local_buf != small_buf) {
397 reiserfs_kfree(local_buf, d_reclen, inode->i_sb) ; 401 reiserfs_kfree(local_buf, d_reclen,
398 } 402 inode->i_sb);
399 goto end; 403 }
400 } 404 goto end;
401 if (local_buf != small_buf) { 405 }
402 reiserfs_kfree(local_buf, d_reclen, inode->i_sb) ; 406 if (local_buf != small_buf) {
403 } 407 reiserfs_kfree(local_buf, d_reclen, inode->i_sb);
404 } /* while */ 408 }
409 } /* while */
405 410
406end: 411 end:
407 pathrelse (&path_to_entry); 412 pathrelse(&path_to_entry);
408 return 0; 413 return 0;
409} 414}
410 415
411/* 416/*
@@ -417,63 +422,59 @@ end:
417static 422static
418int xattr_readdir(struct file *file, filldir_t filler, void *buf) 423int xattr_readdir(struct file *file, filldir_t filler, void *buf)
419{ 424{
420 struct inode *inode = file->f_dentry->d_inode; 425 struct inode *inode = file->f_dentry->d_inode;
421 int res = -ENOTDIR; 426 int res = -ENOTDIR;
422 if (!file->f_op || !file->f_op->readdir) 427 if (!file->f_op || !file->f_op->readdir)
423 goto out; 428 goto out;
424 down(&inode->i_sem); 429 down(&inode->i_sem);
425// down(&inode->i_zombie); 430// down(&inode->i_zombie);
426 res = -ENOENT; 431 res = -ENOENT;
427 if (!IS_DEADDIR(inode)) { 432 if (!IS_DEADDIR(inode)) {
428 lock_kernel(); 433 lock_kernel();
429 res = __xattr_readdir(file, buf, filler); 434 res = __xattr_readdir(file, buf, filler);
430 unlock_kernel(); 435 unlock_kernel();
431 } 436 }
432// up(&inode->i_zombie); 437// up(&inode->i_zombie);
433 up(&inode->i_sem); 438 up(&inode->i_sem);
434out: 439 out:
435 return res; 440 return res;
436} 441}
437 442
438
439/* Internal operations on file data */ 443/* Internal operations on file data */
440static inline void 444static inline void reiserfs_put_page(struct page *page)
441reiserfs_put_page(struct page *page)
442{ 445{
443 kunmap(page); 446 kunmap(page);
444 page_cache_release(page); 447 page_cache_release(page);
445} 448}
446 449
447static struct page * 450static struct page *reiserfs_get_page(struct inode *dir, unsigned long n)
448reiserfs_get_page(struct inode *dir, unsigned long n)
449{ 451{
450 struct address_space *mapping = dir->i_mapping; 452 struct address_space *mapping = dir->i_mapping;
451 struct page *page; 453 struct page *page;
452 /* We can deadlock if we try to free dentries, 454 /* We can deadlock if we try to free dentries,
453 and an unlink/rmdir has just occured - GFP_NOFS avoids this */ 455 and an unlink/rmdir has just occured - GFP_NOFS avoids this */
454 mapping->flags = (mapping->flags & ~__GFP_BITS_MASK) | GFP_NOFS; 456 mapping->flags = (mapping->flags & ~__GFP_BITS_MASK) | GFP_NOFS;
455 page = read_cache_page (mapping, n, 457 page = read_cache_page(mapping, n,
456 (filler_t*)mapping->a_ops->readpage, NULL); 458 (filler_t *) mapping->a_ops->readpage, NULL);
457 if (!IS_ERR(page)) { 459 if (!IS_ERR(page)) {
458 wait_on_page_locked(page); 460 wait_on_page_locked(page);
459 kmap(page); 461 kmap(page);
460 if (!PageUptodate(page)) 462 if (!PageUptodate(page))
461 goto fail; 463 goto fail;
462 464
463 if (PageError(page)) 465 if (PageError(page))
464 goto fail; 466 goto fail;
465 } 467 }
466 return page; 468 return page;
467 469
468fail: 470 fail:
469 reiserfs_put_page(page); 471 reiserfs_put_page(page);
470 return ERR_PTR(-EIO); 472 return ERR_PTR(-EIO);
471} 473}
472 474
473static inline __u32 475static inline __u32 xattr_hash(const char *msg, int len)
474xattr_hash (const char *msg, int len)
475{ 476{
476 return csum_partial (msg, len, 0); 477 return csum_partial(msg, len, 0);
477} 478}
478 479
479/* Generic extended attribute operations that can be used by xa plugins */ 480/* Generic extended attribute operations that can be used by xa plugins */
@@ -482,294 +483,300 @@ xattr_hash (const char *msg, int len)
482 * inode->i_sem: down 483 * inode->i_sem: down
483 */ 484 */
484int 485int
485reiserfs_xattr_set (struct inode *inode, const char *name, const void *buffer, 486reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer,
486 size_t buffer_size, int flags) 487 size_t buffer_size, int flags)
487{ 488{
488 int err = 0; 489 int err = 0;
489 struct file *fp; 490 struct file *fp;
490 struct page *page; 491 struct page *page;
491 char *data; 492 char *data;
492 struct address_space *mapping; 493 struct address_space *mapping;
493 size_t file_pos = 0; 494 size_t file_pos = 0;
494 size_t buffer_pos = 0; 495 size_t buffer_pos = 0;
495 struct inode *xinode; 496 struct inode *xinode;
496 struct iattr newattrs; 497 struct iattr newattrs;
497 __u32 xahash = 0; 498 __u32 xahash = 0;
498 499
499 if (IS_RDONLY (inode)) 500 if (IS_RDONLY(inode))
500 return -EROFS; 501 return -EROFS;
501 502
502 if (IS_IMMUTABLE (inode) || IS_APPEND (inode)) 503 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
503 return -EPERM; 504 return -EPERM;
504 505
505 if (get_inode_sd_version (inode) == STAT_DATA_V1) 506 if (get_inode_sd_version(inode) == STAT_DATA_V1)
506 return -EOPNOTSUPP; 507 return -EOPNOTSUPP;
507 508
508 /* Empty xattrs are ok, they're just empty files, no hash */ 509 /* Empty xattrs are ok, they're just empty files, no hash */
509 if (buffer && buffer_size) 510 if (buffer && buffer_size)
510 xahash = xattr_hash (buffer, buffer_size); 511 xahash = xattr_hash(buffer, buffer_size);
511 512
512open_file: 513 open_file:
513 fp = open_xa_file (inode, name, flags); 514 fp = open_xa_file(inode, name, flags);
514 if (IS_ERR (fp)) { 515 if (IS_ERR(fp)) {
515 err = PTR_ERR (fp); 516 err = PTR_ERR(fp);
516 goto out; 517 goto out;
517 } 518 }
518 519
519 xinode = fp->f_dentry->d_inode; 520 xinode = fp->f_dentry->d_inode;
520 REISERFS_I(inode)->i_flags |= i_has_xattr_dir; 521 REISERFS_I(inode)->i_flags |= i_has_xattr_dir;
521 522
522 /* we need to copy it off.. */ 523 /* we need to copy it off.. */
523 if (xinode->i_nlink > 1) { 524 if (xinode->i_nlink > 1) {
524 fput(fp); 525 fput(fp);
525 err = reiserfs_xattr_del (inode, name); 526 err = reiserfs_xattr_del(inode, name);
526 if (err < 0) 527 if (err < 0)
527 goto out; 528 goto out;
528 /* We just killed the old one, we're not replacing anymore */ 529 /* We just killed the old one, we're not replacing anymore */
529 if (flags & XATTR_REPLACE) 530 if (flags & XATTR_REPLACE)
530 flags &= ~XATTR_REPLACE; 531 flags &= ~XATTR_REPLACE;
531 goto open_file; 532 goto open_file;
532 } 533 }
533 534
534 /* Resize it so we're ok to write there */ 535 /* Resize it so we're ok to write there */
535 newattrs.ia_size = buffer_size; 536 newattrs.ia_size = buffer_size;
536 newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; 537 newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
537 down (&xinode->i_sem); 538 down(&xinode->i_sem);
538 err = notify_change(fp->f_dentry, &newattrs); 539 err = notify_change(fp->f_dentry, &newattrs);
539 if (err) 540 if (err)
540 goto out_filp; 541 goto out_filp;
541 542
542 mapping = xinode->i_mapping; 543 mapping = xinode->i_mapping;
543 while (buffer_pos < buffer_size || buffer_pos == 0) { 544 while (buffer_pos < buffer_size || buffer_pos == 0) {
544 size_t chunk; 545 size_t chunk;
545 size_t skip = 0; 546 size_t skip = 0;
546 size_t page_offset = (file_pos & (PAGE_CACHE_SIZE - 1)); 547 size_t page_offset = (file_pos & (PAGE_CACHE_SIZE - 1));
547 if (buffer_size - buffer_pos > PAGE_CACHE_SIZE) 548 if (buffer_size - buffer_pos > PAGE_CACHE_SIZE)
548 chunk = PAGE_CACHE_SIZE; 549 chunk = PAGE_CACHE_SIZE;
549 else 550 else
550 chunk = buffer_size - buffer_pos; 551 chunk = buffer_size - buffer_pos;
551 552
552 page = reiserfs_get_page (xinode, file_pos >> PAGE_CACHE_SHIFT); 553 page = reiserfs_get_page(xinode, file_pos >> PAGE_CACHE_SHIFT);
553 if (IS_ERR (page)) { 554 if (IS_ERR(page)) {
554 err = PTR_ERR (page); 555 err = PTR_ERR(page);
555 goto out_filp; 556 goto out_filp;
556 } 557 }
557 558
558 lock_page (page); 559 lock_page(page);
559 data = page_address (page); 560 data = page_address(page);
560 561
561 if (file_pos == 0) { 562 if (file_pos == 0) {
562 struct reiserfs_xattr_header *rxh; 563 struct reiserfs_xattr_header *rxh;
563 skip = file_pos = sizeof (struct reiserfs_xattr_header); 564 skip = file_pos = sizeof(struct reiserfs_xattr_header);
564 if (chunk + skip > PAGE_CACHE_SIZE) 565 if (chunk + skip > PAGE_CACHE_SIZE)
565 chunk = PAGE_CACHE_SIZE - skip; 566 chunk = PAGE_CACHE_SIZE - skip;
566 rxh = (struct reiserfs_xattr_header *)data; 567 rxh = (struct reiserfs_xattr_header *)data;
567 rxh->h_magic = cpu_to_le32 (REISERFS_XATTR_MAGIC); 568 rxh->h_magic = cpu_to_le32(REISERFS_XATTR_MAGIC);
568 rxh->h_hash = cpu_to_le32 (xahash); 569 rxh->h_hash = cpu_to_le32(xahash);
569 } 570 }
570 571
571 err = mapping->a_ops->prepare_write (fp, page, page_offset, 572 err = mapping->a_ops->prepare_write(fp, page, page_offset,
572 page_offset + chunk + skip); 573 page_offset + chunk + skip);
573 if (!err) { 574 if (!err) {
574 if (buffer) 575 if (buffer)
575 memcpy (data + skip, buffer + buffer_pos, chunk); 576 memcpy(data + skip, buffer + buffer_pos, chunk);
576 err = mapping->a_ops->commit_write (fp, page, page_offset, 577 err =
577 page_offset + chunk + skip); 578 mapping->a_ops->commit_write(fp, page, page_offset,
579 page_offset + chunk +
580 skip);
581 }
582 unlock_page(page);
583 reiserfs_put_page(page);
584 buffer_pos += chunk;
585 file_pos += chunk;
586 skip = 0;
587 if (err || buffer_size == 0 || !buffer)
588 break;
589 }
590
591 /* We can't mark the inode dirty if it's not hashed. This is the case
592 * when we're inheriting the default ACL. If we dirty it, the inode
593 * gets marked dirty, but won't (ever) make it onto the dirty list until
594 * it's synced explicitly to clear I_DIRTY. This is bad. */
595 if (!hlist_unhashed(&inode->i_hash)) {
596 inode->i_ctime = CURRENT_TIME_SEC;
597 mark_inode_dirty(inode);
578 } 598 }
579 unlock_page (page); 599
580 reiserfs_put_page (page); 600 out_filp:
581 buffer_pos += chunk; 601 up(&xinode->i_sem);
582 file_pos += chunk; 602 fput(fp);
583 skip = 0; 603
584 if (err || buffer_size == 0 || !buffer) 604 out:
585 break; 605 return err;
586 }
587
588 /* We can't mark the inode dirty if it's not hashed. This is the case
589 * when we're inheriting the default ACL. If we dirty it, the inode
590 * gets marked dirty, but won't (ever) make it onto the dirty list until
591 * it's synced explicitly to clear I_DIRTY. This is bad. */
592 if (!hlist_unhashed(&inode->i_hash)) {
593 inode->i_ctime = CURRENT_TIME_SEC;
594 mark_inode_dirty (inode);
595 }
596
597out_filp:
598 up (&xinode->i_sem);
599 fput(fp);
600
601out:
602 return err;
603} 606}
604 607
605/* 608/*
606 * inode->i_sem: down 609 * inode->i_sem: down
607 */ 610 */
608int 611int
609reiserfs_xattr_get (const struct inode *inode, const char *name, void *buffer, 612reiserfs_xattr_get(const struct inode *inode, const char *name, void *buffer,
610 size_t buffer_size) 613 size_t buffer_size)
611{ 614{
612 ssize_t err = 0; 615 ssize_t err = 0;
613 struct file *fp; 616 struct file *fp;
614 size_t isize; 617 size_t isize;
615 size_t file_pos = 0; 618 size_t file_pos = 0;
616 size_t buffer_pos = 0; 619 size_t buffer_pos = 0;
617 struct page *page; 620 struct page *page;
618 struct inode *xinode; 621 struct inode *xinode;
619 __u32 hash = 0; 622 __u32 hash = 0;
620 623
621 if (name == NULL) 624 if (name == NULL)
622 return -EINVAL; 625 return -EINVAL;
623 626
624 /* We can't have xattrs attached to v1 items since they don't have 627 /* We can't have xattrs attached to v1 items since they don't have
625 * generation numbers */ 628 * generation numbers */
626 if (get_inode_sd_version (inode) == STAT_DATA_V1) 629 if (get_inode_sd_version(inode) == STAT_DATA_V1)
627 return -EOPNOTSUPP; 630 return -EOPNOTSUPP;
628 631
629 fp = open_xa_file (inode, name, FL_READONLY); 632 fp = open_xa_file(inode, name, FL_READONLY);
630 if (IS_ERR (fp)) { 633 if (IS_ERR(fp)) {
631 err = PTR_ERR (fp); 634 err = PTR_ERR(fp);
632 goto out; 635 goto out;
633 } 636 }
634 637
635 xinode = fp->f_dentry->d_inode; 638 xinode = fp->f_dentry->d_inode;
636 isize = xinode->i_size; 639 isize = xinode->i_size;
637 REISERFS_I(inode)->i_flags |= i_has_xattr_dir; 640 REISERFS_I(inode)->i_flags |= i_has_xattr_dir;
638 641
639 /* Just return the size needed */ 642 /* Just return the size needed */
640 if (buffer == NULL) { 643 if (buffer == NULL) {
641 err = isize - sizeof (struct reiserfs_xattr_header); 644 err = isize - sizeof(struct reiserfs_xattr_header);
642 goto out_dput; 645 goto out_dput;
643 } 646 }
644 647
645 if (buffer_size < isize - sizeof (struct reiserfs_xattr_header)) { 648 if (buffer_size < isize - sizeof(struct reiserfs_xattr_header)) {
646 err = -ERANGE; 649 err = -ERANGE;
647 goto out_dput; 650 goto out_dput;
648 } 651 }
649 652
650 while (file_pos < isize) { 653 while (file_pos < isize) {
651 size_t chunk; 654 size_t chunk;
652 char *data; 655 char *data;
653 size_t skip = 0; 656 size_t skip = 0;
654 if (isize - file_pos > PAGE_CACHE_SIZE) 657 if (isize - file_pos > PAGE_CACHE_SIZE)
655 chunk = PAGE_CACHE_SIZE; 658 chunk = PAGE_CACHE_SIZE;
656 else 659 else
657 chunk = isize - file_pos; 660 chunk = isize - file_pos;
658 661
659 page = reiserfs_get_page (xinode, file_pos >> PAGE_CACHE_SHIFT); 662 page = reiserfs_get_page(xinode, file_pos >> PAGE_CACHE_SHIFT);
660 if (IS_ERR (page)) { 663 if (IS_ERR(page)) {
661 err = PTR_ERR (page); 664 err = PTR_ERR(page);
662 goto out_dput; 665 goto out_dput;
663 } 666 }
664 667
665 lock_page (page); 668 lock_page(page);
666 data = page_address (page); 669 data = page_address(page);
667 if (file_pos == 0) { 670 if (file_pos == 0) {
668 struct reiserfs_xattr_header *rxh = 671 struct reiserfs_xattr_header *rxh =
669 (struct reiserfs_xattr_header *)data; 672 (struct reiserfs_xattr_header *)data;
670 skip = file_pos = sizeof (struct reiserfs_xattr_header); 673 skip = file_pos = sizeof(struct reiserfs_xattr_header);
671 chunk -= skip; 674 chunk -= skip;
672 /* Magic doesn't match up.. */ 675 /* Magic doesn't match up.. */
673 if (rxh->h_magic != cpu_to_le32 (REISERFS_XATTR_MAGIC)) { 676 if (rxh->h_magic != cpu_to_le32(REISERFS_XATTR_MAGIC)) {
674 unlock_page (page); 677 unlock_page(page);
675 reiserfs_put_page (page); 678 reiserfs_put_page(page);
676 reiserfs_warning (inode->i_sb, "Invalid magic for xattr (%s) " 679 reiserfs_warning(inode->i_sb,
677 "associated with %k", name, 680 "Invalid magic for xattr (%s) "
678 INODE_PKEY (inode)); 681 "associated with %k", name,
679 err = -EIO; 682 INODE_PKEY(inode));
680 goto out_dput; 683 err = -EIO;
681 } 684 goto out_dput;
682 hash = le32_to_cpu (rxh->h_hash); 685 }
683 } 686 hash = le32_to_cpu(rxh->h_hash);
684 memcpy (buffer + buffer_pos, data + skip, chunk); 687 }
685 unlock_page (page); 688 memcpy(buffer + buffer_pos, data + skip, chunk);
686 reiserfs_put_page (page); 689 unlock_page(page);
687 file_pos += chunk; 690 reiserfs_put_page(page);
688 buffer_pos += chunk; 691 file_pos += chunk;
689 skip = 0; 692 buffer_pos += chunk;
690 } 693 skip = 0;
691 err = isize - sizeof (struct reiserfs_xattr_header); 694 }
692 695 err = isize - sizeof(struct reiserfs_xattr_header);
693 if (xattr_hash (buffer, isize - sizeof (struct reiserfs_xattr_header)) != hash) { 696
694 reiserfs_warning (inode->i_sb, "Invalid hash for xattr (%s) associated " 697 if (xattr_hash(buffer, isize - sizeof(struct reiserfs_xattr_header)) !=
695 "with %k", name, INODE_PKEY (inode)); 698 hash) {
696 err = -EIO; 699 reiserfs_warning(inode->i_sb,
697 } 700 "Invalid hash for xattr (%s) associated "
698 701 "with %k", name, INODE_PKEY(inode));
699out_dput: 702 err = -EIO;
700 fput(fp); 703 }
701 704
702out: 705 out_dput:
703 return err; 706 fput(fp);
707
708 out:
709 return err;
704} 710}
705 711
706static int 712static int
707__reiserfs_xattr_del (struct dentry *xadir, const char *name, int namelen) 713__reiserfs_xattr_del(struct dentry *xadir, const char *name, int namelen)
708{ 714{
709 struct dentry *dentry; 715 struct dentry *dentry;
710 struct inode *dir = xadir->d_inode; 716 struct inode *dir = xadir->d_inode;
711 int err = 0; 717 int err = 0;
712 718
713 dentry = lookup_one_len (name, xadir, namelen); 719 dentry = lookup_one_len(name, xadir, namelen);
714 if (IS_ERR (dentry)) { 720 if (IS_ERR(dentry)) {
715 err = PTR_ERR (dentry); 721 err = PTR_ERR(dentry);
716 goto out; 722 goto out;
717 } else if (!dentry->d_inode) { 723 } else if (!dentry->d_inode) {
718 err = -ENODATA; 724 err = -ENODATA;
719 goto out_file; 725 goto out_file;
720 } 726 }
721 727
722 /* Skip directories.. */ 728 /* Skip directories.. */
723 if (S_ISDIR (dentry->d_inode->i_mode)) 729 if (S_ISDIR(dentry->d_inode->i_mode))
724 goto out_file; 730 goto out_file;
725 731
726 if (!is_reiserfs_priv_object (dentry->d_inode)) { 732 if (!is_reiserfs_priv_object(dentry->d_inode)) {
727 reiserfs_warning (dir->i_sb, "OID %08x [%.*s/%.*s] doesn't have " 733 reiserfs_warning(dir->i_sb, "OID %08x [%.*s/%.*s] doesn't have "
728 "priv flag set [parent is %sset].", 734 "priv flag set [parent is %sset].",
729 le32_to_cpu (INODE_PKEY (dentry->d_inode)->k_objectid), 735 le32_to_cpu(INODE_PKEY(dentry->d_inode)->
730 xadir->d_name.len, xadir->d_name.name, namelen, name, 736 k_objectid), xadir->d_name.len,
731 is_reiserfs_priv_object (xadir->d_inode) ? "" : "not "); 737 xadir->d_name.name, namelen, name,
732 dput (dentry); 738 is_reiserfs_priv_object(xadir->
733 return -EIO; 739 d_inode) ? "" :
734 } 740 "not ");
735 741 dput(dentry);
736 err = dir->i_op->unlink (dir, dentry); 742 return -EIO;
737 if (!err) 743 }
738 d_delete (dentry);
739
740out_file:
741 dput (dentry);
742
743out:
744 return err;
745}
746 744
745 err = dir->i_op->unlink(dir, dentry);
746 if (!err)
747 d_delete(dentry);
747 748
748int 749 out_file:
749reiserfs_xattr_del (struct inode *inode, const char *name) 750 dput(dentry);
751
752 out:
753 return err;
754}
755
756int reiserfs_xattr_del(struct inode *inode, const char *name)
750{ 757{
751 struct dentry *dir; 758 struct dentry *dir;
752 int err; 759 int err;
753 760
754 if (IS_RDONLY (inode)) 761 if (IS_RDONLY(inode))
755 return -EROFS; 762 return -EROFS;
756 763
757 dir = open_xa_dir (inode, FL_READONLY); 764 dir = open_xa_dir(inode, FL_READONLY);
758 if (IS_ERR (dir)) { 765 if (IS_ERR(dir)) {
759 err = PTR_ERR (dir); 766 err = PTR_ERR(dir);
760 goto out; 767 goto out;
761 } 768 }
762 769
763 err = __reiserfs_xattr_del (dir, name, strlen (name)); 770 err = __reiserfs_xattr_del(dir, name, strlen(name));
764 dput (dir); 771 dput(dir);
765 772
766 if (!err) { 773 if (!err) {
767 inode->i_ctime = CURRENT_TIME_SEC; 774 inode->i_ctime = CURRENT_TIME_SEC;
768 mark_inode_dirty (inode); 775 mark_inode_dirty(inode);
769 } 776 }
770 777
771out: 778 out:
772 return err; 779 return err;
773} 780}
774 781
775/* The following are side effects of other operations that aren't explicitly 782/* The following are side effects of other operations that aren't explicitly
@@ -777,167 +784,163 @@ out:
777 * or ownership changes, object deletions, etc. */ 784 * or ownership changes, object deletions, etc. */
778 785
779static int 786static int
780reiserfs_delete_xattrs_filler (void *buf, const char *name, int namelen, 787reiserfs_delete_xattrs_filler(void *buf, const char *name, int namelen,
781 loff_t offset, ino_t ino, unsigned int d_type) 788 loff_t offset, ino_t ino, unsigned int d_type)
782{ 789{
783 struct dentry *xadir = (struct dentry *)buf; 790 struct dentry *xadir = (struct dentry *)buf;
784 791
785 return __reiserfs_xattr_del (xadir, name, namelen); 792 return __reiserfs_xattr_del(xadir, name, namelen);
786 793
787} 794}
788 795
789/* This is called w/ inode->i_sem downed */ 796/* This is called w/ inode->i_sem downed */
790int 797int reiserfs_delete_xattrs(struct inode *inode)
791reiserfs_delete_xattrs (struct inode *inode)
792{ 798{
793 struct file *fp; 799 struct file *fp;
794 struct dentry *dir, *root; 800 struct dentry *dir, *root;
795 int err = 0; 801 int err = 0;
796 802
797 /* Skip out, an xattr has no xattrs associated with it */ 803 /* Skip out, an xattr has no xattrs associated with it */
798 if (is_reiserfs_priv_object (inode) || 804 if (is_reiserfs_priv_object(inode) ||
799 get_inode_sd_version (inode) == STAT_DATA_V1 || 805 get_inode_sd_version(inode) == STAT_DATA_V1 ||
800 !reiserfs_xattrs(inode->i_sb)) 806 !reiserfs_xattrs(inode->i_sb)) {
801 { 807 return 0;
802 return 0; 808 }
803 } 809 reiserfs_read_lock_xattrs(inode->i_sb);
804 reiserfs_read_lock_xattrs (inode->i_sb); 810 dir = open_xa_dir(inode, FL_READONLY);
805 dir = open_xa_dir (inode, FL_READONLY); 811 reiserfs_read_unlock_xattrs(inode->i_sb);
806 reiserfs_read_unlock_xattrs (inode->i_sb); 812 if (IS_ERR(dir)) {
807 if (IS_ERR (dir)) { 813 err = PTR_ERR(dir);
808 err = PTR_ERR (dir); 814 goto out;
809 goto out; 815 } else if (!dir->d_inode) {
810 } else if (!dir->d_inode) { 816 dput(dir);
811 dput (dir); 817 return 0;
812 return 0; 818 }
813 } 819
814 820 fp = dentry_open(dir, NULL, O_RDWR);
815 fp = dentry_open (dir, NULL, O_RDWR); 821 if (IS_ERR(fp)) {
816 if (IS_ERR (fp)) { 822 err = PTR_ERR(fp);
817 err = PTR_ERR (fp); 823 /* dentry_open dputs the dentry if it fails */
818 /* dentry_open dputs the dentry if it fails */ 824 goto out;
819 goto out; 825 }
820 } 826
821 827 lock_kernel();
822 lock_kernel (); 828 err = xattr_readdir(fp, reiserfs_delete_xattrs_filler, dir);
823 err = xattr_readdir (fp, reiserfs_delete_xattrs_filler, dir); 829 if (err) {
824 if (err) { 830 unlock_kernel();
825 unlock_kernel (); 831 goto out_dir;
826 goto out_dir; 832 }
827 } 833
828 834 /* Leftovers besides . and .. -- that's not good. */
829 /* Leftovers besides . and .. -- that's not good. */ 835 if (dir->d_inode->i_nlink <= 2) {
830 if (dir->d_inode->i_nlink <= 2) { 836 root = get_xa_root(inode->i_sb);
831 root = get_xa_root (inode->i_sb); 837 reiserfs_write_lock_xattrs(inode->i_sb);
832 reiserfs_write_lock_xattrs (inode->i_sb); 838 err = vfs_rmdir(root->d_inode, dir);
833 err = vfs_rmdir (root->d_inode, dir); 839 reiserfs_write_unlock_xattrs(inode->i_sb);
834 reiserfs_write_unlock_xattrs (inode->i_sb); 840 dput(root);
835 dput (root); 841 } else {
836 } else { 842 reiserfs_warning(inode->i_sb,
837 reiserfs_warning (inode->i_sb, 843 "Couldn't remove all entries in directory");
838 "Couldn't remove all entries in directory"); 844 }
839 } 845 unlock_kernel();
840 unlock_kernel (); 846
841 847 out_dir:
842out_dir: 848 fput(fp);
843 fput(fp); 849
844 850 out:
845out: 851 if (!err)
846 if (!err) 852 REISERFS_I(inode)->i_flags =
847 REISERFS_I(inode)->i_flags = REISERFS_I(inode)->i_flags & ~i_has_xattr_dir; 853 REISERFS_I(inode)->i_flags & ~i_has_xattr_dir;
848 return err; 854 return err;
849} 855}
850 856
851struct reiserfs_chown_buf { 857struct reiserfs_chown_buf {
852 struct inode *inode; 858 struct inode *inode;
853 struct dentry *xadir; 859 struct dentry *xadir;
854 struct iattr *attrs; 860 struct iattr *attrs;
855}; 861};
856 862
857/* XXX: If there is a better way to do this, I'd love to hear about it */ 863/* XXX: If there is a better way to do this, I'd love to hear about it */
858static int 864static int
859reiserfs_chown_xattrs_filler (void *buf, const char *name, int namelen, 865reiserfs_chown_xattrs_filler(void *buf, const char *name, int namelen,
860 loff_t offset, ino_t ino, unsigned int d_type) 866 loff_t offset, ino_t ino, unsigned int d_type)
861{ 867{
862 struct reiserfs_chown_buf *chown_buf = (struct reiserfs_chown_buf *)buf; 868 struct reiserfs_chown_buf *chown_buf = (struct reiserfs_chown_buf *)buf;
863 struct dentry *xafile, *xadir = chown_buf->xadir; 869 struct dentry *xafile, *xadir = chown_buf->xadir;
864 struct iattr *attrs = chown_buf->attrs; 870 struct iattr *attrs = chown_buf->attrs;
865 int err = 0; 871 int err = 0;
866 872
867 xafile = lookup_one_len (name, xadir, namelen); 873 xafile = lookup_one_len(name, xadir, namelen);
868 if (IS_ERR (xafile)) 874 if (IS_ERR(xafile))
869 return PTR_ERR (xafile); 875 return PTR_ERR(xafile);
870 else if (!xafile->d_inode) { 876 else if (!xafile->d_inode) {
871 dput (xafile); 877 dput(xafile);
872 return -ENODATA; 878 return -ENODATA;
873 } 879 }
874 880
875 if (!S_ISDIR (xafile->d_inode->i_mode)) 881 if (!S_ISDIR(xafile->d_inode->i_mode))
876 err = notify_change (xafile, attrs); 882 err = notify_change(xafile, attrs);
877 dput (xafile); 883 dput(xafile);
878 884
879 return err; 885 return err;
880} 886}
881 887
882int 888int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs)
883reiserfs_chown_xattrs (struct inode *inode, struct iattr *attrs)
884{ 889{
885 struct file *fp; 890 struct file *fp;
886 struct dentry *dir; 891 struct dentry *dir;
887 int err = 0; 892 int err = 0;
888 struct reiserfs_chown_buf buf; 893 struct reiserfs_chown_buf buf;
889 unsigned int ia_valid = attrs->ia_valid; 894 unsigned int ia_valid = attrs->ia_valid;
890 895
891 /* Skip out, an xattr has no xattrs associated with it */ 896 /* Skip out, an xattr has no xattrs associated with it */
892 if (is_reiserfs_priv_object (inode) || 897 if (is_reiserfs_priv_object(inode) ||
893 get_inode_sd_version (inode) == STAT_DATA_V1 || 898 get_inode_sd_version(inode) == STAT_DATA_V1 ||
894 !reiserfs_xattrs(inode->i_sb)) 899 !reiserfs_xattrs(inode->i_sb)) {
895 { 900 return 0;
896 return 0; 901 }
897 } 902 reiserfs_read_lock_xattrs(inode->i_sb);
898 reiserfs_read_lock_xattrs (inode->i_sb); 903 dir = open_xa_dir(inode, FL_READONLY);
899 dir = open_xa_dir (inode, FL_READONLY); 904 reiserfs_read_unlock_xattrs(inode->i_sb);
900 reiserfs_read_unlock_xattrs (inode->i_sb); 905 if (IS_ERR(dir)) {
901 if (IS_ERR (dir)) { 906 if (PTR_ERR(dir) != -ENODATA)
902 if (PTR_ERR (dir) != -ENODATA) 907 err = PTR_ERR(dir);
903 err = PTR_ERR (dir); 908 goto out;
904 goto out; 909 } else if (!dir->d_inode) {
905 } else if (!dir->d_inode) { 910 dput(dir);
906 dput (dir); 911 goto out;
907 goto out; 912 }
908 } 913
909 914 fp = dentry_open(dir, NULL, O_RDWR);
910 fp = dentry_open (dir, NULL, O_RDWR); 915 if (IS_ERR(fp)) {
911 if (IS_ERR (fp)) { 916 err = PTR_ERR(fp);
912 err = PTR_ERR (fp); 917 /* dentry_open dputs the dentry if it fails */
913 /* dentry_open dputs the dentry if it fails */ 918 goto out;
914 goto out; 919 }
915 }
916
917 lock_kernel ();
918
919 attrs->ia_valid &= (ATTR_UID | ATTR_GID | ATTR_CTIME);
920 buf.xadir = dir;
921 buf.attrs = attrs;
922 buf.inode = inode;
923
924 err = xattr_readdir (fp, reiserfs_chown_xattrs_filler, &buf);
925 if (err) {
926 unlock_kernel ();
927 goto out_dir;
928 }
929
930 err = notify_change (dir, attrs);
931 unlock_kernel ();
932
933out_dir:
934 fput(fp);
935
936out:
937 attrs->ia_valid = ia_valid;
938 return err;
939}
940 920
921 lock_kernel();
922
923 attrs->ia_valid &= (ATTR_UID | ATTR_GID | ATTR_CTIME);
924 buf.xadir = dir;
925 buf.attrs = attrs;
926 buf.inode = inode;
927
928 err = xattr_readdir(fp, reiserfs_chown_xattrs_filler, &buf);
929 if (err) {
930 unlock_kernel();
931 goto out_dir;
932 }
933
934 err = notify_change(dir, attrs);
935 unlock_kernel();
936
937 out_dir:
938 fput(fp);
939
940 out:
941 attrs->ia_valid = ia_valid;
942 return err;
943}
941 944
942/* Actual operations that are exported to VFS-land */ 945/* Actual operations that are exported to VFS-land */
943 946
@@ -946,61 +949,60 @@ out:
946 * Preliminary locking: we down dentry->d_inode->i_sem 949 * Preliminary locking: we down dentry->d_inode->i_sem
947 */ 950 */
948ssize_t 951ssize_t
949reiserfs_getxattr (struct dentry *dentry, const char *name, void *buffer, 952reiserfs_getxattr(struct dentry * dentry, const char *name, void *buffer,
950 size_t size) 953 size_t size)
951{ 954{
952 struct reiserfs_xattr_handler *xah = find_xattr_handler_prefix (name); 955 struct reiserfs_xattr_handler *xah = find_xattr_handler_prefix(name);
953 int err; 956 int err;
954 957
955 if (!xah || !reiserfs_xattrs(dentry->d_sb) || 958 if (!xah || !reiserfs_xattrs(dentry->d_sb) ||
956 get_inode_sd_version (dentry->d_inode) == STAT_DATA_V1) 959 get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
957 return -EOPNOTSUPP; 960 return -EOPNOTSUPP;
958 961
959 reiserfs_read_lock_xattr_i (dentry->d_inode); 962 reiserfs_read_lock_xattr_i(dentry->d_inode);
960 reiserfs_read_lock_xattrs (dentry->d_sb); 963 reiserfs_read_lock_xattrs(dentry->d_sb);
961 err = xah->get (dentry->d_inode, name, buffer, size); 964 err = xah->get(dentry->d_inode, name, buffer, size);
962 reiserfs_read_unlock_xattrs (dentry->d_sb); 965 reiserfs_read_unlock_xattrs(dentry->d_sb);
963 reiserfs_read_unlock_xattr_i (dentry->d_inode); 966 reiserfs_read_unlock_xattr_i(dentry->d_inode);
964 return err; 967 return err;
965} 968}
966 969
967
968/* 970/*
969 * Inode operation setxattr() 971 * Inode operation setxattr()
970 * 972 *
971 * dentry->d_inode->i_sem down 973 * dentry->d_inode->i_sem down
972 */ 974 */
973int 975int
974reiserfs_setxattr (struct dentry *dentry, const char *name, const void *value, 976reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value,
975 size_t size, int flags) 977 size_t size, int flags)
976{ 978{
977 struct reiserfs_xattr_handler *xah = find_xattr_handler_prefix (name); 979 struct reiserfs_xattr_handler *xah = find_xattr_handler_prefix(name);
978 int err; 980 int err;
979 int lock; 981 int lock;
980 982
981 if (!xah || !reiserfs_xattrs(dentry->d_sb) || 983 if (!xah || !reiserfs_xattrs(dentry->d_sb) ||
982 get_inode_sd_version (dentry->d_inode) == STAT_DATA_V1) 984 get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
983 return -EOPNOTSUPP; 985 return -EOPNOTSUPP;
984 986
985 if (IS_RDONLY (dentry->d_inode)) 987 if (IS_RDONLY(dentry->d_inode))
986 return -EROFS; 988 return -EROFS;
987 989
988 if (IS_IMMUTABLE (dentry->d_inode) || IS_APPEND (dentry->d_inode)) 990 if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode))
989 return -EROFS; 991 return -EROFS;
990 992
991 reiserfs_write_lock_xattr_i (dentry->d_inode); 993 reiserfs_write_lock_xattr_i(dentry->d_inode);
992 lock = !has_xattr_dir (dentry->d_inode); 994 lock = !has_xattr_dir(dentry->d_inode);
993 if (lock) 995 if (lock)
994 reiserfs_write_lock_xattrs (dentry->d_sb); 996 reiserfs_write_lock_xattrs(dentry->d_sb);
995 else 997 else
996 reiserfs_read_lock_xattrs (dentry->d_sb); 998 reiserfs_read_lock_xattrs(dentry->d_sb);
997 err = xah->set (dentry->d_inode, name, value, size, flags); 999 err = xah->set(dentry->d_inode, name, value, size, flags);
998 if (lock) 1000 if (lock)
999 reiserfs_write_unlock_xattrs (dentry->d_sb); 1001 reiserfs_write_unlock_xattrs(dentry->d_sb);
1000 else 1002 else
1001 reiserfs_read_unlock_xattrs (dentry->d_sb); 1003 reiserfs_read_unlock_xattrs(dentry->d_sb);
1002 reiserfs_write_unlock_xattr_i (dentry->d_inode); 1004 reiserfs_write_unlock_xattr_i(dentry->d_inode);
1003 return err; 1005 return err;
1004} 1006}
1005 1007
1006/* 1008/*
@@ -1008,344 +1010,343 @@ reiserfs_setxattr (struct dentry *dentry, const char *name, const void *value,
1008 * 1010 *
1009 * dentry->d_inode->i_sem down 1011 * dentry->d_inode->i_sem down
1010 */ 1012 */
1011int 1013int reiserfs_removexattr(struct dentry *dentry, const char *name)
1012reiserfs_removexattr (struct dentry *dentry, const char *name)
1013{ 1014{
1014 int err; 1015 int err;
1015 struct reiserfs_xattr_handler *xah = find_xattr_handler_prefix (name); 1016 struct reiserfs_xattr_handler *xah = find_xattr_handler_prefix(name);
1016 1017
1017 if (!xah || !reiserfs_xattrs(dentry->d_sb) || 1018 if (!xah || !reiserfs_xattrs(dentry->d_sb) ||
1018 get_inode_sd_version (dentry->d_inode) == STAT_DATA_V1) 1019 get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
1019 return -EOPNOTSUPP; 1020 return -EOPNOTSUPP;
1020 1021
1021 if (IS_RDONLY (dentry->d_inode)) 1022 if (IS_RDONLY(dentry->d_inode))
1022 return -EROFS; 1023 return -EROFS;
1023 1024
1024 if (IS_IMMUTABLE (dentry->d_inode) || IS_APPEND (dentry->d_inode)) 1025 if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode))
1025 return -EPERM; 1026 return -EPERM;
1026 1027
1027 reiserfs_write_lock_xattr_i (dentry->d_inode); 1028 reiserfs_write_lock_xattr_i(dentry->d_inode);
1028 reiserfs_read_lock_xattrs (dentry->d_sb); 1029 reiserfs_read_lock_xattrs(dentry->d_sb);
1029 1030
1030 /* Deletion pre-operation */ 1031 /* Deletion pre-operation */
1031 if (xah->del) { 1032 if (xah->del) {
1032 err = xah->del (dentry->d_inode, name); 1033 err = xah->del(dentry->d_inode, name);
1033 if (err) 1034 if (err)
1034 goto out; 1035 goto out;
1035 } 1036 }
1036 1037
1037 err = reiserfs_xattr_del (dentry->d_inode, name); 1038 err = reiserfs_xattr_del(dentry->d_inode, name);
1038 1039
1039 dentry->d_inode->i_ctime = CURRENT_TIME_SEC; 1040 dentry->d_inode->i_ctime = CURRENT_TIME_SEC;
1040 mark_inode_dirty (dentry->d_inode); 1041 mark_inode_dirty(dentry->d_inode);
1041 1042
1042out: 1043 out:
1043 reiserfs_read_unlock_xattrs (dentry->d_sb); 1044 reiserfs_read_unlock_xattrs(dentry->d_sb);
1044 reiserfs_write_unlock_xattr_i (dentry->d_inode); 1045 reiserfs_write_unlock_xattr_i(dentry->d_inode);
1045 return err; 1046 return err;
1046} 1047}
1047 1048
1048
1049/* This is what filldir will use: 1049/* This is what filldir will use:
1050 * r_pos will always contain the amount of space required for the entire 1050 * r_pos will always contain the amount of space required for the entire
1051 * list. If r_pos becomes larger than r_size, we need more space and we 1051 * list. If r_pos becomes larger than r_size, we need more space and we
1052 * return an error indicating this. If r_pos is less than r_size, then we've 1052 * return an error indicating this. If r_pos is less than r_size, then we've
1053 * filled the buffer successfully and we return success */ 1053 * filled the buffer successfully and we return success */
1054struct reiserfs_listxattr_buf { 1054struct reiserfs_listxattr_buf {
1055 int r_pos; 1055 int r_pos;
1056 int r_size; 1056 int r_size;
1057 char *r_buf; 1057 char *r_buf;
1058 struct inode *r_inode; 1058 struct inode *r_inode;
1059}; 1059};
1060 1060
1061static int 1061static int
1062reiserfs_listxattr_filler (void *buf, const char *name, int namelen, 1062reiserfs_listxattr_filler(void *buf, const char *name, int namelen,
1063 loff_t offset, ino_t ino, unsigned int d_type) 1063 loff_t offset, ino_t ino, unsigned int d_type)
1064{ 1064{
1065 struct reiserfs_listxattr_buf *b = (struct reiserfs_listxattr_buf *)buf; 1065 struct reiserfs_listxattr_buf *b = (struct reiserfs_listxattr_buf *)buf;
1066 int len = 0; 1066 int len = 0;
1067 if (name[0] != '.' || (namelen != 1 && (name[1] != '.' || namelen != 2))) { 1067 if (name[0] != '.'
1068 struct reiserfs_xattr_handler *xah = find_xattr_handler_prefix (name); 1068 || (namelen != 1 && (name[1] != '.' || namelen != 2))) {
1069 if (!xah) return 0; /* Unsupported xattr name, skip it */ 1069 struct reiserfs_xattr_handler *xah =
1070 1070 find_xattr_handler_prefix(name);
1071 /* We call ->list() twice because the operation isn't required to just 1071 if (!xah)
1072 * return the name back - we want to make sure we have enough space */ 1072 return 0; /* Unsupported xattr name, skip it */
1073 len += xah->list (b->r_inode, name, namelen, NULL); 1073
1074 1074 /* We call ->list() twice because the operation isn't required to just
1075 if (len) { 1075 * return the name back - we want to make sure we have enough space */
1076 if (b->r_pos + len + 1 <= b->r_size) { 1076 len += xah->list(b->r_inode, name, namelen, NULL);
1077 char *p = b->r_buf + b->r_pos; 1077
1078 p += xah->list (b->r_inode, name, namelen, p); 1078 if (len) {
1079 *p++ = '\0'; 1079 if (b->r_pos + len + 1 <= b->r_size) {
1080 } 1080 char *p = b->r_buf + b->r_pos;
1081 b->r_pos += len + 1; 1081 p += xah->list(b->r_inode, name, namelen, p);
1082 } 1082 *p++ = '\0';
1083 } 1083 }
1084 1084 b->r_pos += len + 1;
1085 return 0; 1085 }
1086 }
1087
1088 return 0;
1086} 1089}
1090
1087/* 1091/*
1088 * Inode operation listxattr() 1092 * Inode operation listxattr()
1089 * 1093 *
1090 * Preliminary locking: we down dentry->d_inode->i_sem 1094 * Preliminary locking: we down dentry->d_inode->i_sem
1091 */ 1095 */
1092ssize_t 1096ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size)
1093reiserfs_listxattr (struct dentry *dentry, char *buffer, size_t size)
1094{ 1097{
1095 struct file *fp; 1098 struct file *fp;
1096 struct dentry *dir; 1099 struct dentry *dir;
1097 int err = 0; 1100 int err = 0;
1098 struct reiserfs_listxattr_buf buf; 1101 struct reiserfs_listxattr_buf buf;
1099 1102
1100 if (!dentry->d_inode) 1103 if (!dentry->d_inode)
1101 return -EINVAL; 1104 return -EINVAL;
1102 1105
1103 if (!reiserfs_xattrs(dentry->d_sb) || 1106 if (!reiserfs_xattrs(dentry->d_sb) ||
1104 get_inode_sd_version (dentry->d_inode) == STAT_DATA_V1) 1107 get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
1105 return -EOPNOTSUPP; 1108 return -EOPNOTSUPP;
1106 1109
1107 reiserfs_read_lock_xattr_i (dentry->d_inode); 1110 reiserfs_read_lock_xattr_i(dentry->d_inode);
1108 reiserfs_read_lock_xattrs (dentry->d_sb); 1111 reiserfs_read_lock_xattrs(dentry->d_sb);
1109 dir = open_xa_dir (dentry->d_inode, FL_READONLY); 1112 dir = open_xa_dir(dentry->d_inode, FL_READONLY);
1110 reiserfs_read_unlock_xattrs (dentry->d_sb); 1113 reiserfs_read_unlock_xattrs(dentry->d_sb);
1111 if (IS_ERR (dir)) { 1114 if (IS_ERR(dir)) {
1112 err = PTR_ERR (dir); 1115 err = PTR_ERR(dir);
1113 if (err == -ENODATA) 1116 if (err == -ENODATA)
1114 err = 0; /* Not an error if there aren't any xattrs */ 1117 err = 0; /* Not an error if there aren't any xattrs */
1115 goto out; 1118 goto out;
1116 } 1119 }
1117 1120
1118 fp = dentry_open (dir, NULL, O_RDWR); 1121 fp = dentry_open(dir, NULL, O_RDWR);
1119 if (IS_ERR (fp)) { 1122 if (IS_ERR(fp)) {
1120 err = PTR_ERR (fp); 1123 err = PTR_ERR(fp);
1121 /* dentry_open dputs the dentry if it fails */ 1124 /* dentry_open dputs the dentry if it fails */
1122 goto out; 1125 goto out;
1123 } 1126 }
1124 1127
1125 buf.r_buf = buffer; 1128 buf.r_buf = buffer;
1126 buf.r_size = buffer ? size : 0; 1129 buf.r_size = buffer ? size : 0;
1127 buf.r_pos = 0; 1130 buf.r_pos = 0;
1128 buf.r_inode = dentry->d_inode; 1131 buf.r_inode = dentry->d_inode;
1129 1132
1130 REISERFS_I(dentry->d_inode)->i_flags |= i_has_xattr_dir; 1133 REISERFS_I(dentry->d_inode)->i_flags |= i_has_xattr_dir;
1131 1134
1132 err = xattr_readdir (fp, reiserfs_listxattr_filler, &buf); 1135 err = xattr_readdir(fp, reiserfs_listxattr_filler, &buf);
1133 if (err) 1136 if (err)
1134 goto out_dir; 1137 goto out_dir;
1135 1138
1136 if (buf.r_pos > buf.r_size && buffer != NULL) 1139 if (buf.r_pos > buf.r_size && buffer != NULL)
1137 err = -ERANGE; 1140 err = -ERANGE;
1138 else 1141 else
1139 err = buf.r_pos; 1142 err = buf.r_pos;
1140 1143
1141out_dir: 1144 out_dir:
1142 fput(fp); 1145 fput(fp);
1143 1146
1144out: 1147 out:
1145 reiserfs_read_unlock_xattr_i (dentry->d_inode); 1148 reiserfs_read_unlock_xattr_i(dentry->d_inode);
1146 return err; 1149 return err;
1147} 1150}
1148 1151
1149/* This is the implementation for the xattr plugin infrastructure */ 1152/* This is the implementation for the xattr plugin infrastructure */
1150static struct list_head xattr_handlers = LIST_HEAD_INIT (xattr_handlers); 1153static struct list_head xattr_handlers = LIST_HEAD_INIT(xattr_handlers);
1151static DEFINE_RWLOCK(handler_lock); 1154static DEFINE_RWLOCK(handler_lock);
1152 1155
1153static struct reiserfs_xattr_handler * 1156static struct reiserfs_xattr_handler *find_xattr_handler_prefix(const char
1154find_xattr_handler_prefix (const char *prefix) 1157 *prefix)
1155{ 1158{
1156 struct reiserfs_xattr_handler *xah = NULL; 1159 struct reiserfs_xattr_handler *xah = NULL;
1157 struct list_head *p; 1160 struct list_head *p;
1158 1161
1159 read_lock (&handler_lock); 1162 read_lock(&handler_lock);
1160 list_for_each (p, &xattr_handlers) { 1163 list_for_each(p, &xattr_handlers) {
1161 xah = list_entry (p, struct reiserfs_xattr_handler, handlers); 1164 xah = list_entry(p, struct reiserfs_xattr_handler, handlers);
1162 if (strncmp (xah->prefix, prefix, strlen (xah->prefix)) == 0) 1165 if (strncmp(xah->prefix, prefix, strlen(xah->prefix)) == 0)
1163 break; 1166 break;
1164 xah = NULL; 1167 xah = NULL;
1165 } 1168 }
1166 1169
1167 read_unlock (&handler_lock); 1170 read_unlock(&handler_lock);
1168 return xah; 1171 return xah;
1169} 1172}
1170 1173
1171static void 1174static void __unregister_handlers(void)
1172__unregister_handlers (void)
1173{ 1175{
1174 struct reiserfs_xattr_handler *xah; 1176 struct reiserfs_xattr_handler *xah;
1175 struct list_head *p, *tmp; 1177 struct list_head *p, *tmp;
1176 1178
1177 list_for_each_safe (p, tmp, &xattr_handlers) { 1179 list_for_each_safe(p, tmp, &xattr_handlers) {
1178 xah = list_entry (p, struct reiserfs_xattr_handler, handlers); 1180 xah = list_entry(p, struct reiserfs_xattr_handler, handlers);
1179 if (xah->exit) 1181 if (xah->exit)
1180 xah->exit(); 1182 xah->exit();
1181 1183
1182 list_del_init (p); 1184 list_del_init(p);
1183 } 1185 }
1184 INIT_LIST_HEAD (&xattr_handlers); 1186 INIT_LIST_HEAD(&xattr_handlers);
1185} 1187}
1186 1188
1187int __init 1189int __init reiserfs_xattr_register_handlers(void)
1188reiserfs_xattr_register_handlers (void)
1189{ 1190{
1190 int err = 0; 1191 int err = 0;
1191 struct reiserfs_xattr_handler *xah; 1192 struct reiserfs_xattr_handler *xah;
1192 struct list_head *p; 1193 struct list_head *p;
1193 1194
1194 write_lock (&handler_lock); 1195 write_lock(&handler_lock);
1195 1196
1196 /* If we're already initialized, nothing to do */ 1197 /* If we're already initialized, nothing to do */
1197 if (!list_empty (&xattr_handlers)) { 1198 if (!list_empty(&xattr_handlers)) {
1198 write_unlock (&handler_lock); 1199 write_unlock(&handler_lock);
1199 return 0; 1200 return 0;
1200 } 1201 }
1201 1202
1202 /* Add the handlers */ 1203 /* Add the handlers */
1203 list_add_tail (&user_handler.handlers, &xattr_handlers); 1204 list_add_tail(&user_handler.handlers, &xattr_handlers);
1204 list_add_tail (&trusted_handler.handlers, &xattr_handlers); 1205 list_add_tail(&trusted_handler.handlers, &xattr_handlers);
1205#ifdef CONFIG_REISERFS_FS_SECURITY 1206#ifdef CONFIG_REISERFS_FS_SECURITY
1206 list_add_tail (&security_handler.handlers, &xattr_handlers); 1207 list_add_tail(&security_handler.handlers, &xattr_handlers);
1207#endif 1208#endif
1208#ifdef CONFIG_REISERFS_FS_POSIX_ACL 1209#ifdef CONFIG_REISERFS_FS_POSIX_ACL
1209 list_add_tail (&posix_acl_access_handler.handlers, &xattr_handlers); 1210 list_add_tail(&posix_acl_access_handler.handlers, &xattr_handlers);
1210 list_add_tail (&posix_acl_default_handler.handlers, &xattr_handlers); 1211 list_add_tail(&posix_acl_default_handler.handlers, &xattr_handlers);
1211#endif 1212#endif
1212 1213
1213 /* Run initializers, if available */ 1214 /* Run initializers, if available */
1214 list_for_each (p, &xattr_handlers) { 1215 list_for_each(p, &xattr_handlers) {
1215 xah = list_entry (p, struct reiserfs_xattr_handler, handlers); 1216 xah = list_entry(p, struct reiserfs_xattr_handler, handlers);
1216 if (xah->init) { 1217 if (xah->init) {
1217 err = xah->init (); 1218 err = xah->init();
1218 if (err) { 1219 if (err) {
1219 list_del_init (p); 1220 list_del_init(p);
1220 break; 1221 break;
1221 } 1222 }
1222 } 1223 }
1223 } 1224 }
1224 1225
1225 /* Clean up other handlers, if any failed */ 1226 /* Clean up other handlers, if any failed */
1226 if (err) 1227 if (err)
1227 __unregister_handlers (); 1228 __unregister_handlers();
1228 1229
1229 write_unlock (&handler_lock); 1230 write_unlock(&handler_lock);
1230 return err; 1231 return err;
1231} 1232}
1232 1233
1233void 1234void reiserfs_xattr_unregister_handlers(void)
1234reiserfs_xattr_unregister_handlers (void)
1235{ 1235{
1236 write_lock (&handler_lock); 1236 write_lock(&handler_lock);
1237 __unregister_handlers (); 1237 __unregister_handlers();
1238 write_unlock (&handler_lock); 1238 write_unlock(&handler_lock);
1239} 1239}
1240 1240
1241/* This will catch lookups from the fs root to .reiserfs_priv */ 1241/* This will catch lookups from the fs root to .reiserfs_priv */
1242static int 1242static int
1243xattr_lookup_poison (struct dentry *dentry, struct qstr *q1, struct qstr *name) 1243xattr_lookup_poison(struct dentry *dentry, struct qstr *q1, struct qstr *name)
1244{ 1244{
1245 struct dentry *priv_root = REISERFS_SB(dentry->d_sb)->priv_root; 1245 struct dentry *priv_root = REISERFS_SB(dentry->d_sb)->priv_root;
1246 if (name->len == priv_root->d_name.len && 1246 if (name->len == priv_root->d_name.len &&
1247 name->hash == priv_root->d_name.hash && 1247 name->hash == priv_root->d_name.hash &&
1248 !memcmp (name->name, priv_root->d_name.name, name->len)) { 1248 !memcmp(name->name, priv_root->d_name.name, name->len)) {
1249 return -ENOENT; 1249 return -ENOENT;
1250 } else if (q1->len == name->len && 1250 } else if (q1->len == name->len &&
1251 !memcmp(q1->name, name->name, name->len)) 1251 !memcmp(q1->name, name->name, name->len))
1252 return 0; 1252 return 0;
1253 return 1; 1253 return 1;
1254} 1254}
1255 1255
1256static struct dentry_operations xattr_lookup_poison_ops = { 1256static struct dentry_operations xattr_lookup_poison_ops = {
1257 .d_compare = xattr_lookup_poison, 1257 .d_compare = xattr_lookup_poison,
1258}; 1258};
1259 1259
1260
1261/* We need to take a copy of the mount flags since things like 1260/* We need to take a copy of the mount flags since things like
1262 * MS_RDONLY don't get set until *after* we're called. 1261 * MS_RDONLY don't get set until *after* we're called.
1263 * mount_flags != mount_options */ 1262 * mount_flags != mount_options */
1264int 1263int reiserfs_xattr_init(struct super_block *s, int mount_flags)
1265reiserfs_xattr_init (struct super_block *s, int mount_flags)
1266{ 1264{
1267 int err = 0; 1265 int err = 0;
1268 1266
1269 /* We need generation numbers to ensure that the oid mapping is correct 1267 /* We need generation numbers to ensure that the oid mapping is correct
1270 * v3.5 filesystems don't have them. */ 1268 * v3.5 filesystems don't have them. */
1271 if (!old_format_only (s)) { 1269 if (!old_format_only(s)) {
1272 set_bit (REISERFS_XATTRS, &(REISERFS_SB(s)->s_mount_opt)); 1270 set_bit(REISERFS_XATTRS, &(REISERFS_SB(s)->s_mount_opt));
1273 } else if (reiserfs_xattrs_optional (s)) { 1271 } else if (reiserfs_xattrs_optional(s)) {
1274 /* Old format filesystem, but optional xattrs have been enabled 1272 /* Old format filesystem, but optional xattrs have been enabled
1275 * at mount time. Error out. */ 1273 * at mount time. Error out. */
1276 reiserfs_warning (s, "xattrs/ACLs not supported on pre v3.6 " 1274 reiserfs_warning(s, "xattrs/ACLs not supported on pre v3.6 "
1277 "format filesystem. Failing mount."); 1275 "format filesystem. Failing mount.");
1278 err = -EOPNOTSUPP; 1276 err = -EOPNOTSUPP;
1279 goto error; 1277 goto error;
1280 } else { 1278 } else {
1281 /* Old format filesystem, but no optional xattrs have been enabled. This 1279 /* Old format filesystem, but no optional xattrs have been enabled. This
1282 * means we silently disable xattrs on the filesystem. */ 1280 * means we silently disable xattrs on the filesystem. */
1283 clear_bit (REISERFS_XATTRS, &(REISERFS_SB(s)->s_mount_opt)); 1281 clear_bit(REISERFS_XATTRS, &(REISERFS_SB(s)->s_mount_opt));
1284 } 1282 }
1285 1283
1286 /* If we don't have the privroot located yet - go find it */ 1284 /* If we don't have the privroot located yet - go find it */
1287 if (reiserfs_xattrs (s) && !REISERFS_SB(s)->priv_root) { 1285 if (reiserfs_xattrs(s) && !REISERFS_SB(s)->priv_root) {
1288 struct dentry *dentry; 1286 struct dentry *dentry;
1289 dentry = lookup_one_len (PRIVROOT_NAME, s->s_root, 1287 dentry = lookup_one_len(PRIVROOT_NAME, s->s_root,
1290 strlen (PRIVROOT_NAME)); 1288 strlen(PRIVROOT_NAME));
1291 if (!IS_ERR (dentry)) { 1289 if (!IS_ERR(dentry)) {
1292 if (!(mount_flags & MS_RDONLY) && !dentry->d_inode) { 1290 if (!(mount_flags & MS_RDONLY) && !dentry->d_inode) {
1293 struct inode *inode = dentry->d_parent->d_inode; 1291 struct inode *inode = dentry->d_parent->d_inode;
1294 down (&inode->i_sem); 1292 down(&inode->i_sem);
1295 err = inode->i_op->mkdir (inode, dentry, 0700); 1293 err = inode->i_op->mkdir(inode, dentry, 0700);
1296 up (&inode->i_sem); 1294 up(&inode->i_sem);
1297 if (err) { 1295 if (err) {
1298 dput (dentry); 1296 dput(dentry);
1299 dentry = NULL; 1297 dentry = NULL;
1300 } 1298 }
1301 1299
1302 if (dentry && dentry->d_inode) 1300 if (dentry && dentry->d_inode)
1303 reiserfs_warning (s, "Created %s on %s - reserved for " 1301 reiserfs_warning(s,
1304 "xattr storage.", PRIVROOT_NAME, 1302 "Created %s on %s - reserved for "
1305 reiserfs_bdevname (inode->i_sb)); 1303 "xattr storage.",
1306 } else if (!dentry->d_inode) { 1304 PRIVROOT_NAME,
1307 dput (dentry); 1305 reiserfs_bdevname
1308 dentry = NULL; 1306 (inode->i_sb));
1309 } 1307 } else if (!dentry->d_inode) {
1310 } else 1308 dput(dentry);
1311 err = PTR_ERR (dentry); 1309 dentry = NULL;
1312 1310 }
1313 if (!err && dentry) { 1311 } else
1314 s->s_root->d_op = &xattr_lookup_poison_ops; 1312 err = PTR_ERR(dentry);
1315 reiserfs_mark_inode_private (dentry->d_inode); 1313
1316 REISERFS_SB(s)->priv_root = dentry; 1314 if (!err && dentry) {
1317 } else if (!(mount_flags & MS_RDONLY)) { /* xattrs are unavailable */ 1315 s->s_root->d_op = &xattr_lookup_poison_ops;
1318 /* If we're read-only it just means that the dir hasn't been 1316 reiserfs_mark_inode_private(dentry->d_inode);
1319 * created. Not an error -- just no xattrs on the fs. We'll 1317 REISERFS_SB(s)->priv_root = dentry;
1320 * check again if we go read-write */ 1318 } else if (!(mount_flags & MS_RDONLY)) { /* xattrs are unavailable */
1321 reiserfs_warning (s, "xattrs/ACLs enabled and couldn't " 1319 /* If we're read-only it just means that the dir hasn't been
1322 "find/create .reiserfs_priv. Failing mount."); 1320 * created. Not an error -- just no xattrs on the fs. We'll
1323 err = -EOPNOTSUPP; 1321 * check again if we go read-write */
1324 } 1322 reiserfs_warning(s, "xattrs/ACLs enabled and couldn't "
1325 } 1323 "find/create .reiserfs_priv. Failing mount.");
1326 1324 err = -EOPNOTSUPP;
1327error: 1325 }
1328 /* This is only nonzero if there was an error initializing the xattr 1326 }
1329 * directory or if there is a condition where we don't support them. */ 1327
1330 if (err) { 1328 error:
1331 clear_bit (REISERFS_XATTRS, &(REISERFS_SB(s)->s_mount_opt)); 1329 /* This is only nonzero if there was an error initializing the xattr
1332 clear_bit (REISERFS_XATTRS_USER, &(REISERFS_SB(s)->s_mount_opt)); 1330 * directory or if there is a condition where we don't support them. */
1333 clear_bit (REISERFS_POSIXACL, &(REISERFS_SB(s)->s_mount_opt)); 1331 if (err) {
1334 } 1332 clear_bit(REISERFS_XATTRS, &(REISERFS_SB(s)->s_mount_opt));
1335 1333 clear_bit(REISERFS_XATTRS_USER, &(REISERFS_SB(s)->s_mount_opt));
1336 /* The super_block MS_POSIXACL must mirror the (no)acl mount option. */ 1334 clear_bit(REISERFS_POSIXACL, &(REISERFS_SB(s)->s_mount_opt));
1337 s->s_flags = s->s_flags & ~MS_POSIXACL; 1335 }
1338 if (reiserfs_posixacl (s)) 1336
1339 s->s_flags |= MS_POSIXACL; 1337 /* The super_block MS_POSIXACL must mirror the (no)acl mount option. */
1340 1338 s->s_flags = s->s_flags & ~MS_POSIXACL;
1341 return err; 1339 if (reiserfs_posixacl(s))
1340 s->s_flags |= MS_POSIXACL;
1341
1342 return err;
1342} 1343}
1343 1344
1344static int 1345static int
1345__reiserfs_permission (struct inode *inode, int mask, struct nameidata *nd, 1346__reiserfs_permission(struct inode *inode, int mask, struct nameidata *nd,
1346 int need_lock) 1347 int need_lock)
1347{ 1348{
1348 umode_t mode = inode->i_mode; 1349 umode_t mode = inode->i_mode;
1349 1350
1350 if (mask & MAY_WRITE) { 1351 if (mask & MAY_WRITE) {
1351 /* 1352 /*
@@ -1363,50 +1364,50 @@ __reiserfs_permission (struct inode *inode, int mask, struct nameidata *nd,
1363 } 1364 }
1364 1365
1365 /* We don't do permission checks on the internal objects. 1366 /* We don't do permission checks on the internal objects.
1366 * Permissions are determined by the "owning" object. */ 1367 * Permissions are determined by the "owning" object. */
1367 if (is_reiserfs_priv_object (inode)) 1368 if (is_reiserfs_priv_object(inode))
1368 return 0; 1369 return 0;
1369 1370
1370 if (current->fsuid == inode->i_uid) { 1371 if (current->fsuid == inode->i_uid) {
1371 mode >>= 6; 1372 mode >>= 6;
1372#ifdef CONFIG_REISERFS_FS_POSIX_ACL 1373#ifdef CONFIG_REISERFS_FS_POSIX_ACL
1373 } else if (reiserfs_posixacl(inode->i_sb) && 1374 } else if (reiserfs_posixacl(inode->i_sb) &&
1374 get_inode_sd_version (inode) != STAT_DATA_V1) { 1375 get_inode_sd_version(inode) != STAT_DATA_V1) {
1375 struct posix_acl *acl; 1376 struct posix_acl *acl;
1376 1377
1377 /* ACL can't contain additional permissions if 1378 /* ACL can't contain additional permissions if
1378 the ACL_MASK entry is 0 */ 1379 the ACL_MASK entry is 0 */
1379 if (!(mode & S_IRWXG)) 1380 if (!(mode & S_IRWXG))
1380 goto check_groups; 1381 goto check_groups;
1381 1382
1382 if (need_lock) { 1383 if (need_lock) {
1383 reiserfs_read_lock_xattr_i (inode); 1384 reiserfs_read_lock_xattr_i(inode);
1384 reiserfs_read_lock_xattrs (inode->i_sb); 1385 reiserfs_read_lock_xattrs(inode->i_sb);
1386 }
1387 acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS);
1388 if (need_lock) {
1389 reiserfs_read_unlock_xattrs(inode->i_sb);
1390 reiserfs_read_unlock_xattr_i(inode);
1385 } 1391 }
1386 acl = reiserfs_get_acl (inode, ACL_TYPE_ACCESS); 1392 if (IS_ERR(acl)) {
1387 if (need_lock) { 1393 if (PTR_ERR(acl) == -ENODATA)
1388 reiserfs_read_unlock_xattrs (inode->i_sb); 1394 goto check_groups;
1389 reiserfs_read_unlock_xattr_i (inode); 1395 return PTR_ERR(acl);
1390 } 1396 }
1391 if (IS_ERR (acl)) { 1397
1392 if (PTR_ERR (acl) == -ENODATA) 1398 if (acl) {
1393 goto check_groups; 1399 int err = posix_acl_permission(inode, acl, mask);
1394 return PTR_ERR (acl); 1400 posix_acl_release(acl);
1395 } 1401 if (err == -EACCES) {
1396 1402 goto check_capabilities;
1397 if (acl) { 1403 }
1398 int err = posix_acl_permission (inode, acl, mask); 1404 return err;
1399 posix_acl_release (acl);
1400 if (err == -EACCES) {
1401 goto check_capabilities;
1402 }
1403 return err;
1404 } else { 1405 } else {
1405 goto check_groups; 1406 goto check_groups;
1406 } 1407 }
1407#endif 1408#endif
1408 } else { 1409 } else {
1409check_groups: 1410 check_groups:
1410 if (in_group_p(inode->i_gid)) 1411 if (in_group_p(inode->i_gid))
1411 mode >>= 3; 1412 mode >>= 3;
1412 } 1413 }
@@ -1414,10 +1415,10 @@ check_groups:
1414 /* 1415 /*
1415 * If the DACs are ok we don't need any capability check. 1416 * If the DACs are ok we don't need any capability check.
1416 */ 1417 */
1417 if (((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask)) 1418 if (((mode & mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == mask))
1418 return 0; 1419 return 0;
1419 1420
1420check_capabilities: 1421 check_capabilities:
1421 /* 1422 /*
1422 * Read/write DACs are always overridable. 1423 * Read/write DACs are always overridable.
1423 * Executable DACs are overridable if at least one exec bit is set. 1424 * Executable DACs are overridable if at least one exec bit is set.
@@ -1437,14 +1438,13 @@ check_capabilities:
1437 return -EACCES; 1438 return -EACCES;
1438} 1439}
1439 1440
1440int 1441int reiserfs_permission(struct inode *inode, int mask, struct nameidata *nd)
1441reiserfs_permission (struct inode *inode, int mask, struct nameidata *nd)
1442{ 1442{
1443 return __reiserfs_permission (inode, mask, nd, 1); 1443 return __reiserfs_permission(inode, mask, nd, 1);
1444} 1444}
1445 1445
1446int 1446int
1447reiserfs_permission_locked (struct inode *inode, int mask, struct nameidata *nd) 1447reiserfs_permission_locked(struct inode *inode, int mask, struct nameidata *nd)
1448{ 1448{
1449 return __reiserfs_permission (inode, mask, nd, 0); 1449 return __reiserfs_permission(inode, mask, nd, 0);
1450} 1450}
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index e302071903a1..6703efa3c430 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -4,12 +4,13 @@
4#include <linux/errno.h> 4#include <linux/errno.h>
5#include <linux/pagemap.h> 5#include <linux/pagemap.h>
6#include <linux/xattr.h> 6#include <linux/xattr.h>
7#include <linux/xattr_acl.h> 7#include <linux/posix_acl_xattr.h>
8#include <linux/reiserfs_xattr.h> 8#include <linux/reiserfs_xattr.h>
9#include <linux/reiserfs_acl.h> 9#include <linux/reiserfs_acl.h>
10#include <asm/uaccess.h> 10#include <asm/uaccess.h>
11 11
12static int reiserfs_set_acl(struct inode *inode, int type, struct posix_acl *acl); 12static int reiserfs_set_acl(struct inode *inode, int type,
13 struct posix_acl *acl);
13 14
14static int 15static int
15xattr_set_acl(struct inode *inode, int type, const void *value, size_t size) 16xattr_set_acl(struct inode *inode, int type, const void *value, size_t size)
@@ -34,14 +35,13 @@ xattr_set_acl(struct inode *inode, int type, const void *value, size_t size)
34 } else 35 } else
35 acl = NULL; 36 acl = NULL;
36 37
37 error = reiserfs_set_acl (inode, type, acl); 38 error = reiserfs_set_acl(inode, type, acl);
38 39
39release_and_out: 40 release_and_out:
40 posix_acl_release(acl); 41 posix_acl_release(acl);
41 return error; 42 return error;
42} 43}
43 44
44
45static int 45static int
46xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size) 46xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
47{ 47{
@@ -51,7 +51,7 @@ xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
51 if (!reiserfs_posixacl(inode->i_sb)) 51 if (!reiserfs_posixacl(inode->i_sb))
52 return -EOPNOTSUPP; 52 return -EOPNOTSUPP;
53 53
54 acl = reiserfs_get_acl (inode, type); 54 acl = reiserfs_get_acl(inode, type);
55 if (IS_ERR(acl)) 55 if (IS_ERR(acl))
56 return PTR_ERR(acl); 56 return PTR_ERR(acl);
57 if (acl == NULL) 57 if (acl == NULL)
@@ -62,12 +62,10 @@ xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
62 return error; 62 return error;
63} 63}
64 64
65
66/* 65/*
67 * Convert from filesystem to in-memory representation. 66 * Convert from filesystem to in-memory representation.
68 */ 67 */
69static struct posix_acl * 68static struct posix_acl *posix_acl_from_disk(const void *value, size_t size)
70posix_acl_from_disk(const void *value, size_t size)
71{ 69{
72 const char *end = (char *)value + size; 70 const char *end = (char *)value + size;
73 int n, count; 71 int n, count;
@@ -76,8 +74,8 @@ posix_acl_from_disk(const void *value, size_t size)
76 if (!value) 74 if (!value)
77 return NULL; 75 return NULL;
78 if (size < sizeof(reiserfs_acl_header)) 76 if (size < sizeof(reiserfs_acl_header))
79 return ERR_PTR(-EINVAL); 77 return ERR_PTR(-EINVAL);
80 if (((reiserfs_acl_header *)value)->a_version != 78 if (((reiserfs_acl_header *) value)->a_version !=
81 cpu_to_le32(REISERFS_ACL_VERSION)) 79 cpu_to_le32(REISERFS_ACL_VERSION))
82 return ERR_PTR(-EINVAL); 80 return ERR_PTR(-EINVAL);
83 value = (char *)value + sizeof(reiserfs_acl_header); 81 value = (char *)value + sizeof(reiserfs_acl_header);
@@ -89,41 +87,39 @@ posix_acl_from_disk(const void *value, size_t size)
89 acl = posix_acl_alloc(count, GFP_NOFS); 87 acl = posix_acl_alloc(count, GFP_NOFS);
90 if (!acl) 88 if (!acl)
91 return ERR_PTR(-ENOMEM); 89 return ERR_PTR(-ENOMEM);
92 for (n=0; n < count; n++) { 90 for (n = 0; n < count; n++) {
93 reiserfs_acl_entry *entry = 91 reiserfs_acl_entry *entry = (reiserfs_acl_entry *) value;
94 (reiserfs_acl_entry *)value;
95 if ((char *)value + sizeof(reiserfs_acl_entry_short) > end) 92 if ((char *)value + sizeof(reiserfs_acl_entry_short) > end)
96 goto fail; 93 goto fail;
97 acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag); 94 acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag);
98 acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm); 95 acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm);
99 switch(acl->a_entries[n].e_tag) { 96 switch (acl->a_entries[n].e_tag) {
100 case ACL_USER_OBJ: 97 case ACL_USER_OBJ:
101 case ACL_GROUP_OBJ: 98 case ACL_GROUP_OBJ:
102 case ACL_MASK: 99 case ACL_MASK:
103 case ACL_OTHER: 100 case ACL_OTHER:
104 value = (char *)value + 101 value = (char *)value +
105 sizeof(reiserfs_acl_entry_short); 102 sizeof(reiserfs_acl_entry_short);
106 acl->a_entries[n].e_id = ACL_UNDEFINED_ID; 103 acl->a_entries[n].e_id = ACL_UNDEFINED_ID;
107 break; 104 break;
108 105
109 case ACL_USER: 106 case ACL_USER:
110 case ACL_GROUP: 107 case ACL_GROUP:
111 value = (char *)value + sizeof(reiserfs_acl_entry); 108 value = (char *)value + sizeof(reiserfs_acl_entry);
112 if ((char *)value > end) 109 if ((char *)value > end)
113 goto fail;
114 acl->a_entries[n].e_id =
115 le32_to_cpu(entry->e_id);
116 break;
117
118 default:
119 goto fail; 110 goto fail;
111 acl->a_entries[n].e_id = le32_to_cpu(entry->e_id);
112 break;
113
114 default:
115 goto fail;
120 } 116 }
121 } 117 }
122 if (value != end) 118 if (value != end)
123 goto fail; 119 goto fail;
124 return acl; 120 return acl;
125 121
126fail: 122 fail:
127 posix_acl_release(acl); 123 posix_acl_release(acl);
128 return ERR_PTR(-EINVAL); 124 return ERR_PTR(-EINVAL);
129} 125}
@@ -131,46 +127,46 @@ fail:
131/* 127/*
132 * Convert from in-memory to filesystem representation. 128 * Convert from in-memory to filesystem representation.
133 */ 129 */
134static void * 130static void *posix_acl_to_disk(const struct posix_acl *acl, size_t * size)
135posix_acl_to_disk(const struct posix_acl *acl, size_t *size)
136{ 131{
137 reiserfs_acl_header *ext_acl; 132 reiserfs_acl_header *ext_acl;
138 char *e; 133 char *e;
139 int n; 134 int n;
140 135
141 *size = reiserfs_acl_size(acl->a_count); 136 *size = reiserfs_acl_size(acl->a_count);
142 ext_acl = (reiserfs_acl_header *)kmalloc(sizeof(reiserfs_acl_header) + 137 ext_acl = (reiserfs_acl_header *) kmalloc(sizeof(reiserfs_acl_header) +
143 acl->a_count * sizeof(reiserfs_acl_entry), GFP_NOFS); 138 acl->a_count *
139 sizeof(reiserfs_acl_entry),
140 GFP_NOFS);
144 if (!ext_acl) 141 if (!ext_acl)
145 return ERR_PTR(-ENOMEM); 142 return ERR_PTR(-ENOMEM);
146 ext_acl->a_version = cpu_to_le32(REISERFS_ACL_VERSION); 143 ext_acl->a_version = cpu_to_le32(REISERFS_ACL_VERSION);
147 e = (char *)ext_acl + sizeof(reiserfs_acl_header); 144 e = (char *)ext_acl + sizeof(reiserfs_acl_header);
148 for (n=0; n < acl->a_count; n++) { 145 for (n = 0; n < acl->a_count; n++) {
149 reiserfs_acl_entry *entry = (reiserfs_acl_entry *)e; 146 reiserfs_acl_entry *entry = (reiserfs_acl_entry *) e;
150 entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag); 147 entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag);
151 entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm); 148 entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm);
152 switch(acl->a_entries[n].e_tag) { 149 switch (acl->a_entries[n].e_tag) {
153 case ACL_USER: 150 case ACL_USER:
154 case ACL_GROUP: 151 case ACL_GROUP:
155 entry->e_id = 152 entry->e_id = cpu_to_le32(acl->a_entries[n].e_id);
156 cpu_to_le32(acl->a_entries[n].e_id); 153 e += sizeof(reiserfs_acl_entry);
157 e += sizeof(reiserfs_acl_entry); 154 break;
158 break; 155
159 156 case ACL_USER_OBJ:
160 case ACL_USER_OBJ: 157 case ACL_GROUP_OBJ:
161 case ACL_GROUP_OBJ: 158 case ACL_MASK:
162 case ACL_MASK: 159 case ACL_OTHER:
163 case ACL_OTHER: 160 e += sizeof(reiserfs_acl_entry_short);
164 e += sizeof(reiserfs_acl_entry_short); 161 break;
165 break; 162
166 163 default:
167 default: 164 goto fail;
168 goto fail;
169 } 165 }
170 } 166 }
171 return (char *)ext_acl; 167 return (char *)ext_acl;
172 168
173fail: 169 fail:
174 kfree(ext_acl); 170 kfree(ext_acl);
175 return ERR_PTR(-EINVAL); 171 return ERR_PTR(-EINVAL);
176} 172}
@@ -181,59 +177,58 @@ fail:
181 * inode->i_sem: down 177 * inode->i_sem: down
182 * BKL held [before 2.5.x] 178 * BKL held [before 2.5.x]
183 */ 179 */
184struct posix_acl * 180struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
185reiserfs_get_acl(struct inode *inode, int type)
186{ 181{
187 char *name, *value; 182 char *name, *value;
188 struct posix_acl *acl, **p_acl; 183 struct posix_acl *acl, **p_acl;
189 size_t size; 184 size_t size;
190 int retval; 185 int retval;
191 struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode); 186 struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode);
192 187
193 switch (type) { 188 switch (type) {
194 case ACL_TYPE_ACCESS: 189 case ACL_TYPE_ACCESS:
195 name = XATTR_NAME_ACL_ACCESS; 190 name = POSIX_ACL_XATTR_ACCESS;
196 p_acl = &reiserfs_i->i_acl_access; 191 p_acl = &reiserfs_i->i_acl_access;
197 break; 192 break;
198 case ACL_TYPE_DEFAULT: 193 case ACL_TYPE_DEFAULT:
199 name = XATTR_NAME_ACL_DEFAULT; 194 name = POSIX_ACL_XATTR_DEFAULT;
200 p_acl = &reiserfs_i->i_acl_default; 195 p_acl = &reiserfs_i->i_acl_default;
201 break; 196 break;
202 default: 197 default:
203 return ERR_PTR (-EINVAL); 198 return ERR_PTR(-EINVAL);
204 } 199 }
205 200
206 if (IS_ERR (*p_acl)) { 201 if (IS_ERR(*p_acl)) {
207 if (PTR_ERR (*p_acl) == -ENODATA) 202 if (PTR_ERR(*p_acl) == -ENODATA)
208 return NULL; 203 return NULL;
209 } else if (*p_acl != NULL) 204 } else if (*p_acl != NULL)
210 return posix_acl_dup (*p_acl); 205 return posix_acl_dup(*p_acl);
211 206
212 size = reiserfs_xattr_get (inode, name, NULL, 0); 207 size = reiserfs_xattr_get(inode, name, NULL, 0);
213 if ((int)size < 0) { 208 if ((int)size < 0) {
214 if (size == -ENODATA || size == -ENOSYS) { 209 if (size == -ENODATA || size == -ENOSYS) {
215 *p_acl = ERR_PTR (-ENODATA); 210 *p_acl = ERR_PTR(-ENODATA);
216 return NULL; 211 return NULL;
217 } 212 }
218 return ERR_PTR (size); 213 return ERR_PTR(size);
219 } 214 }
220 215
221 value = kmalloc (size, GFP_NOFS); 216 value = kmalloc(size, GFP_NOFS);
222 if (!value) 217 if (!value)
223 return ERR_PTR (-ENOMEM); 218 return ERR_PTR(-ENOMEM);
224 219
225 retval = reiserfs_xattr_get(inode, name, value, size); 220 retval = reiserfs_xattr_get(inode, name, value, size);
226 if (retval == -ENODATA || retval == -ENOSYS) { 221 if (retval == -ENODATA || retval == -ENOSYS) {
227 /* This shouldn't actually happen as it should have 222 /* This shouldn't actually happen as it should have
228 been caught above.. but just in case */ 223 been caught above.. but just in case */
229 acl = NULL; 224 acl = NULL;
230 *p_acl = ERR_PTR (-ENODATA); 225 *p_acl = ERR_PTR(-ENODATA);
231 } else if (retval < 0) { 226 } else if (retval < 0) {
232 acl = ERR_PTR(retval); 227 acl = ERR_PTR(retval);
233 } else { 228 } else {
234 acl = posix_acl_from_disk(value, retval); 229 acl = posix_acl_from_disk(value, retval);
235 *p_acl = posix_acl_dup (acl); 230 *p_acl = posix_acl_dup(acl);
236 } 231 }
237 232
238 kfree(value); 233 kfree(value);
239 return acl; 234 return acl;
@@ -248,72 +243,72 @@ reiserfs_get_acl(struct inode *inode, int type)
248static int 243static int
249reiserfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) 244reiserfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
250{ 245{
251 char *name; 246 char *name;
252 void *value = NULL; 247 void *value = NULL;
253 struct posix_acl **p_acl; 248 struct posix_acl **p_acl;
254 size_t size; 249 size_t size;
255 int error; 250 int error;
256 struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode); 251 struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode);
257 252
258 if (S_ISLNK(inode->i_mode)) 253 if (S_ISLNK(inode->i_mode))
259 return -EOPNOTSUPP; 254 return -EOPNOTSUPP;
260 255
261 switch (type) { 256 switch (type) {
262 case ACL_TYPE_ACCESS: 257 case ACL_TYPE_ACCESS:
263 name = XATTR_NAME_ACL_ACCESS; 258 name = POSIX_ACL_XATTR_ACCESS;
264 p_acl = &reiserfs_i->i_acl_access; 259 p_acl = &reiserfs_i->i_acl_access;
265 if (acl) { 260 if (acl) {
266 mode_t mode = inode->i_mode; 261 mode_t mode = inode->i_mode;
267 error = posix_acl_equiv_mode (acl, &mode); 262 error = posix_acl_equiv_mode(acl, &mode);
268 if (error < 0) 263 if (error < 0)
269 return error; 264 return error;
270 else { 265 else {
271 inode->i_mode = mode; 266 inode->i_mode = mode;
272 if (error == 0) 267 if (error == 0)
273 acl = NULL; 268 acl = NULL;
274 } 269 }
275 } 270 }
276 break; 271 break;
277 case ACL_TYPE_DEFAULT: 272 case ACL_TYPE_DEFAULT:
278 name = XATTR_NAME_ACL_DEFAULT; 273 name = POSIX_ACL_XATTR_DEFAULT;
279 p_acl = &reiserfs_i->i_acl_default; 274 p_acl = &reiserfs_i->i_acl_default;
280 if (!S_ISDIR (inode->i_mode)) 275 if (!S_ISDIR(inode->i_mode))
281 return acl ? -EACCES : 0; 276 return acl ? -EACCES : 0;
282 break; 277 break;
283 default: 278 default:
284 return -EINVAL; 279 return -EINVAL;
285 } 280 }
286 281
287 if (acl) { 282 if (acl) {
288 value = posix_acl_to_disk(acl, &size); 283 value = posix_acl_to_disk(acl, &size);
289 if (IS_ERR(value)) 284 if (IS_ERR(value))
290 return (int)PTR_ERR(value); 285 return (int)PTR_ERR(value);
291 error = reiserfs_xattr_set(inode, name, value, size, 0); 286 error = reiserfs_xattr_set(inode, name, value, size, 0);
292 } else { 287 } else {
293 error = reiserfs_xattr_del (inode, name); 288 error = reiserfs_xattr_del(inode, name);
294 if (error == -ENODATA) { 289 if (error == -ENODATA) {
295 /* This may seem odd here, but it means that the ACL was set 290 /* This may seem odd here, but it means that the ACL was set
296 * with a value representable with mode bits. If there was 291 * with a value representable with mode bits. If there was
297 * an ACL before, reiserfs_xattr_del already dirtied the inode. 292 * an ACL before, reiserfs_xattr_del already dirtied the inode.
298 */ 293 */
299 mark_inode_dirty (inode); 294 mark_inode_dirty(inode);
300 error = 0; 295 error = 0;
301 } 296 }
302 } 297 }
303 298
304 if (value) 299 if (value)
305 kfree(value); 300 kfree(value);
306 301
307 if (!error) { 302 if (!error) {
308 /* Release the old one */ 303 /* Release the old one */
309 if (!IS_ERR (*p_acl) && *p_acl) 304 if (!IS_ERR(*p_acl) && *p_acl)
310 posix_acl_release (*p_acl); 305 posix_acl_release(*p_acl);
311 306
312 if (acl == NULL) 307 if (acl == NULL)
313 *p_acl = ERR_PTR (-ENODATA); 308 *p_acl = ERR_PTR(-ENODATA);
314 else 309 else
315 *p_acl = posix_acl_dup (acl); 310 *p_acl = posix_acl_dup(acl);
316 } 311 }
317 312
318 return error; 313 return error;
319} 314}
@@ -321,196 +316,194 @@ reiserfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
321/* dir->i_sem: down, 316/* dir->i_sem: down,
322 * inode is new and not released into the wild yet */ 317 * inode is new and not released into the wild yet */
323int 318int
324reiserfs_inherit_default_acl (struct inode *dir, struct dentry *dentry, struct inode *inode) 319reiserfs_inherit_default_acl(struct inode *dir, struct dentry *dentry,
320 struct inode *inode)
325{ 321{
326 struct posix_acl *acl; 322 struct posix_acl *acl;
327 int err = 0; 323 int err = 0;
328 324
329 /* ACLs only get applied to files and directories */ 325 /* ACLs only get applied to files and directories */
330 if (S_ISLNK (inode->i_mode)) 326 if (S_ISLNK(inode->i_mode))
331 return 0; 327 return 0;
332 328
333 /* ACLs can only be used on "new" objects, so if it's an old object 329 /* ACLs can only be used on "new" objects, so if it's an old object
334 * there is nothing to inherit from */ 330 * there is nothing to inherit from */
335 if (get_inode_sd_version (dir) == STAT_DATA_V1) 331 if (get_inode_sd_version(dir) == STAT_DATA_V1)
336 goto apply_umask; 332 goto apply_umask;
337 333
338 /* Don't apply ACLs to objects in the .reiserfs_priv tree.. This 334 /* Don't apply ACLs to objects in the .reiserfs_priv tree.. This
339 * would be useless since permissions are ignored, and a pain because 335 * would be useless since permissions are ignored, and a pain because
340 * it introduces locking cycles */ 336 * it introduces locking cycles */
341 if (is_reiserfs_priv_object (dir)) { 337 if (is_reiserfs_priv_object(dir)) {
342 reiserfs_mark_inode_private (inode); 338 reiserfs_mark_inode_private(inode);
343 goto apply_umask; 339 goto apply_umask;
344 } 340 }
345 341
346 acl = reiserfs_get_acl (dir, ACL_TYPE_DEFAULT); 342 acl = reiserfs_get_acl(dir, ACL_TYPE_DEFAULT);
347 if (IS_ERR (acl)) { 343 if (IS_ERR(acl)) {
348 if (PTR_ERR (acl) == -ENODATA) 344 if (PTR_ERR(acl) == -ENODATA)
349 goto apply_umask; 345 goto apply_umask;
350 return PTR_ERR (acl); 346 return PTR_ERR(acl);
351 } 347 }
352 348
353 if (acl) { 349 if (acl) {
354 struct posix_acl *acl_copy; 350 struct posix_acl *acl_copy;
355 mode_t mode = inode->i_mode; 351 mode_t mode = inode->i_mode;
356 int need_acl; 352 int need_acl;
357 353
358 /* Copy the default ACL to the default ACL of a new directory */ 354 /* Copy the default ACL to the default ACL of a new directory */
359 if (S_ISDIR (inode->i_mode)) { 355 if (S_ISDIR(inode->i_mode)) {
360 err = reiserfs_set_acl (inode, ACL_TYPE_DEFAULT, acl); 356 err = reiserfs_set_acl(inode, ACL_TYPE_DEFAULT, acl);
361 if (err) 357 if (err)
362 goto cleanup; 358 goto cleanup;
363 } 359 }
364 360
365 /* Now we reconcile the new ACL and the mode, 361 /* Now we reconcile the new ACL and the mode,
366 potentially modifying both */ 362 potentially modifying both */
367 acl_copy = posix_acl_clone (acl, GFP_NOFS); 363 acl_copy = posix_acl_clone(acl, GFP_NOFS);
368 if (!acl_copy) { 364 if (!acl_copy) {
369 err = -ENOMEM; 365 err = -ENOMEM;
370 goto cleanup; 366 goto cleanup;
371 } 367 }
372 368
373 369 need_acl = posix_acl_create_masq(acl_copy, &mode);
374 need_acl = posix_acl_create_masq (acl_copy, &mode); 370 if (need_acl >= 0) {
375 if (need_acl >= 0) { 371 if (mode != inode->i_mode) {
376 if (mode != inode->i_mode) { 372 inode->i_mode = mode;
377 inode->i_mode = mode; 373 }
378 } 374
379 375 /* If we need an ACL.. */
380 /* If we need an ACL.. */ 376 if (need_acl > 0) {
381 if (need_acl > 0) { 377 err =
382 err = reiserfs_set_acl (inode, ACL_TYPE_ACCESS, acl_copy); 378 reiserfs_set_acl(inode, ACL_TYPE_ACCESS,
383 if (err) 379 acl_copy);
384 goto cleanup_copy; 380 if (err)
385 } 381 goto cleanup_copy;
386 } 382 }
387cleanup_copy: 383 }
388 posix_acl_release (acl_copy); 384 cleanup_copy:
389cleanup: 385 posix_acl_release(acl_copy);
390 posix_acl_release (acl); 386 cleanup:
391 } else { 387 posix_acl_release(acl);
392apply_umask: 388 } else {
393 /* no ACL, apply umask */ 389 apply_umask:
394 inode->i_mode &= ~current->fs->umask; 390 /* no ACL, apply umask */
395 } 391 inode->i_mode &= ~current->fs->umask;
396 392 }
397 return err; 393
394 return err;
398} 395}
399 396
400/* Looks up and caches the result of the default ACL. 397/* Looks up and caches the result of the default ACL.
401 * We do this so that we don't need to carry the xattr_sem into 398 * We do this so that we don't need to carry the xattr_sem into
402 * reiserfs_new_inode if we don't need to */ 399 * reiserfs_new_inode if we don't need to */
403int 400int reiserfs_cache_default_acl(struct inode *inode)
404reiserfs_cache_default_acl (struct inode *inode)
405{ 401{
406 int ret = 0; 402 int ret = 0;
407 if (reiserfs_posixacl (inode->i_sb) && 403 if (reiserfs_posixacl(inode->i_sb) && !is_reiserfs_priv_object(inode)) {
408 !is_reiserfs_priv_object (inode)) { 404 struct posix_acl *acl;
409 struct posix_acl *acl; 405 reiserfs_read_lock_xattr_i(inode);
410 reiserfs_read_lock_xattr_i (inode); 406 reiserfs_read_lock_xattrs(inode->i_sb);
411 reiserfs_read_lock_xattrs (inode->i_sb); 407 acl = reiserfs_get_acl(inode, ACL_TYPE_DEFAULT);
412 acl = reiserfs_get_acl (inode, ACL_TYPE_DEFAULT); 408 reiserfs_read_unlock_xattrs(inode->i_sb);
413 reiserfs_read_unlock_xattrs (inode->i_sb); 409 reiserfs_read_unlock_xattr_i(inode);
414 reiserfs_read_unlock_xattr_i (inode); 410 ret = acl ? 1 : 0;
415 ret = acl ? 1 : 0; 411 posix_acl_release(acl);
416 posix_acl_release (acl); 412 }
417 } 413
418 414 return ret;
419 return ret;
420} 415}
421 416
422int 417int reiserfs_acl_chmod(struct inode *inode)
423reiserfs_acl_chmod (struct inode *inode)
424{ 418{
425 struct posix_acl *acl, *clone; 419 struct posix_acl *acl, *clone;
426 int error; 420 int error;
427 421
428 if (S_ISLNK(inode->i_mode)) 422 if (S_ISLNK(inode->i_mode))
429 return -EOPNOTSUPP; 423 return -EOPNOTSUPP;
430 424
431 if (get_inode_sd_version (inode) == STAT_DATA_V1 || 425 if (get_inode_sd_version(inode) == STAT_DATA_V1 ||
432 !reiserfs_posixacl(inode->i_sb)) 426 !reiserfs_posixacl(inode->i_sb)) {
433 { 427 return 0;
434 return 0;
435 } 428 }
436 429
437 reiserfs_read_lock_xattrs (inode->i_sb); 430 reiserfs_read_lock_xattrs(inode->i_sb);
438 acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS); 431 acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS);
439 reiserfs_read_unlock_xattrs (inode->i_sb); 432 reiserfs_read_unlock_xattrs(inode->i_sb);
440 if (!acl) 433 if (!acl)
441 return 0; 434 return 0;
442 if (IS_ERR(acl)) 435 if (IS_ERR(acl))
443 return PTR_ERR(acl); 436 return PTR_ERR(acl);
444 clone = posix_acl_clone(acl, GFP_NOFS); 437 clone = posix_acl_clone(acl, GFP_NOFS);
445 posix_acl_release(acl); 438 posix_acl_release(acl);
446 if (!clone) 439 if (!clone)
447 return -ENOMEM; 440 return -ENOMEM;
448 error = posix_acl_chmod_masq(clone, inode->i_mode); 441 error = posix_acl_chmod_masq(clone, inode->i_mode);
449 if (!error) { 442 if (!error) {
450 int lock = !has_xattr_dir (inode); 443 int lock = !has_xattr_dir(inode);
451 reiserfs_write_lock_xattr_i (inode); 444 reiserfs_write_lock_xattr_i(inode);
452 if (lock) 445 if (lock)
453 reiserfs_write_lock_xattrs (inode->i_sb); 446 reiserfs_write_lock_xattrs(inode->i_sb);
454 else 447 else
455 reiserfs_read_lock_xattrs (inode->i_sb); 448 reiserfs_read_lock_xattrs(inode->i_sb);
456 error = reiserfs_set_acl(inode, ACL_TYPE_ACCESS, clone); 449 error = reiserfs_set_acl(inode, ACL_TYPE_ACCESS, clone);
457 if (lock) 450 if (lock)
458 reiserfs_write_unlock_xattrs (inode->i_sb); 451 reiserfs_write_unlock_xattrs(inode->i_sb);
459 else 452 else
460 reiserfs_read_unlock_xattrs (inode->i_sb); 453 reiserfs_read_unlock_xattrs(inode->i_sb);
461 reiserfs_write_unlock_xattr_i (inode); 454 reiserfs_write_unlock_xattr_i(inode);
462 } 455 }
463 posix_acl_release(clone); 456 posix_acl_release(clone);
464 return error; 457 return error;
465} 458}
466 459
467static int 460static int
468posix_acl_access_get(struct inode *inode, const char *name, 461posix_acl_access_get(struct inode *inode, const char *name,
469 void *buffer, size_t size) 462 void *buffer, size_t size)
470{ 463{
471 if (strlen(name) != sizeof(XATTR_NAME_ACL_ACCESS)-1) 464 if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS) - 1)
472 return -EINVAL; 465 return -EINVAL;
473 return xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size); 466 return xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
474} 467}
475 468
476static int 469static int
477posix_acl_access_set(struct inode *inode, const char *name, 470posix_acl_access_set(struct inode *inode, const char *name,
478 const void *value, size_t size, int flags) 471 const void *value, size_t size, int flags)
479{ 472{
480 if (strlen(name) != sizeof(XATTR_NAME_ACL_ACCESS)-1) 473 if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS) - 1)
481 return -EINVAL; 474 return -EINVAL;
482 return xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size); 475 return xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
483} 476}
484 477
485static int 478static int posix_acl_access_del(struct inode *inode, const char *name)
486posix_acl_access_del (struct inode *inode, const char *name)
487{ 479{
488 struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode); 480 struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode);
489 struct posix_acl **acl = &reiserfs_i->i_acl_access; 481 struct posix_acl **acl = &reiserfs_i->i_acl_access;
490 if (strlen(name) != sizeof(XATTR_NAME_ACL_ACCESS)-1) 482 if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS) - 1)
491 return -EINVAL; 483 return -EINVAL;
492 if (!IS_ERR (*acl) && *acl) { 484 if (!IS_ERR(*acl) && *acl) {
493 posix_acl_release (*acl); 485 posix_acl_release(*acl);
494 *acl = ERR_PTR (-ENODATA); 486 *acl = ERR_PTR(-ENODATA);
495 } 487 }
496 488
497 return 0; 489 return 0;
498} 490}
499 491
500static int 492static int
501posix_acl_access_list (struct inode *inode, const char *name, int namelen, char *out) 493posix_acl_access_list(struct inode *inode, const char *name, int namelen,
494 char *out)
502{ 495{
503 int len = namelen; 496 int len = namelen;
504 if (!reiserfs_posixacl (inode->i_sb)) 497 if (!reiserfs_posixacl(inode->i_sb))
505 return 0; 498 return 0;
506 if (out) 499 if (out)
507 memcpy (out, name, len); 500 memcpy(out, name, len);
508 501
509 return len; 502 return len;
510} 503}
511 504
512struct reiserfs_xattr_handler posix_acl_access_handler = { 505struct reiserfs_xattr_handler posix_acl_access_handler = {
513 .prefix = XATTR_NAME_ACL_ACCESS, 506 .prefix = POSIX_ACL_XATTR_ACCESS,
514 .get = posix_acl_access_get, 507 .get = posix_acl_access_get,
515 .set = posix_acl_access_set, 508 .set = posix_acl_access_set,
516 .del = posix_acl_access_del, 509 .del = posix_acl_access_del,
@@ -518,52 +511,52 @@ struct reiserfs_xattr_handler posix_acl_access_handler = {
518}; 511};
519 512
520static int 513static int
521posix_acl_default_get (struct inode *inode, const char *name, 514posix_acl_default_get(struct inode *inode, const char *name,
522 void *buffer, size_t size) 515 void *buffer, size_t size)
523{ 516{
524 if (strlen(name) != sizeof(XATTR_NAME_ACL_DEFAULT)-1) 517 if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT) - 1)
525 return -EINVAL; 518 return -EINVAL;
526 return xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size); 519 return xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
527} 520}
528 521
529static int 522static int
530posix_acl_default_set(struct inode *inode, const char *name, 523posix_acl_default_set(struct inode *inode, const char *name,
531 const void *value, size_t size, int flags) 524 const void *value, size_t size, int flags)
532{ 525{
533 if (strlen(name) != sizeof(XATTR_NAME_ACL_DEFAULT)-1) 526 if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT) - 1)
534 return -EINVAL; 527 return -EINVAL;
535 return xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size); 528 return xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
536} 529}
537 530
538static int 531static int posix_acl_default_del(struct inode *inode, const char *name)
539posix_acl_default_del (struct inode *inode, const char *name)
540{ 532{
541 struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode); 533 struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode);
542 struct posix_acl **acl = &reiserfs_i->i_acl_default; 534 struct posix_acl **acl = &reiserfs_i->i_acl_default;
543 if (strlen(name) != sizeof(XATTR_NAME_ACL_DEFAULT)-1) 535 if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT) - 1)
544 return -EINVAL; 536 return -EINVAL;
545 if (!IS_ERR (*acl) && *acl) { 537 if (!IS_ERR(*acl) && *acl) {
546 posix_acl_release (*acl); 538 posix_acl_release(*acl);
547 *acl = ERR_PTR (-ENODATA); 539 *acl = ERR_PTR(-ENODATA);
548 } 540 }
549 541
550 return 0; 542 return 0;
551} 543}
552 544
553static int 545static int
554posix_acl_default_list (struct inode *inode, const char *name, int namelen, char *out) 546posix_acl_default_list(struct inode *inode, const char *name, int namelen,
547 char *out)
555{ 548{
556 int len = namelen; 549 int len = namelen;
557 if (!reiserfs_posixacl (inode->i_sb)) 550 if (!reiserfs_posixacl(inode->i_sb))
558 return 0; 551 return 0;
559 if (out) 552 if (out)
560 memcpy (out, name, len); 553 memcpy(out, name, len);
561 554
562 return len; 555 return len;
563} 556}
564 557
565struct reiserfs_xattr_handler posix_acl_default_handler = { 558struct reiserfs_xattr_handler posix_acl_default_handler = {
566 .prefix = XATTR_NAME_ACL_DEFAULT, 559 .prefix = POSIX_ACL_XATTR_DEFAULT,
567 .get = posix_acl_default_get, 560 .get = posix_acl_default_get,
568 .set = posix_acl_default_set, 561 .set = posix_acl_default_set,
569 .del = posix_acl_default_del, 562 .del = posix_acl_default_del,
diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c
index e044d5117117..5e90a95ad60b 100644
--- a/fs/reiserfs/xattr_security.c
+++ b/fs/reiserfs/xattr_security.c
@@ -9,57 +9,55 @@
9#define XATTR_SECURITY_PREFIX "security." 9#define XATTR_SECURITY_PREFIX "security."
10 10
11static int 11static int
12security_get (struct inode *inode, const char *name, void *buffer, size_t size) 12security_get(struct inode *inode, const char *name, void *buffer, size_t size)
13{ 13{
14 if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX)) 14 if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX))
15 return -EINVAL; 15 return -EINVAL;
16 16
17 if (is_reiserfs_priv_object(inode)) 17 if (is_reiserfs_priv_object(inode))
18 return -EPERM; 18 return -EPERM;
19 19
20 return reiserfs_xattr_get (inode, name, buffer, size); 20 return reiserfs_xattr_get(inode, name, buffer, size);
21} 21}
22 22
23static int 23static int
24security_set (struct inode *inode, const char *name, const void *buffer, 24security_set(struct inode *inode, const char *name, const void *buffer,
25 size_t size, int flags) 25 size_t size, int flags)
26{ 26{
27 if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX)) 27 if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX))
28 return -EINVAL; 28 return -EINVAL;
29 29
30 if (is_reiserfs_priv_object(inode)) 30 if (is_reiserfs_priv_object(inode))
31 return -EPERM; 31 return -EPERM;
32 32
33 return reiserfs_xattr_set (inode, name, buffer, size, flags); 33 return reiserfs_xattr_set(inode, name, buffer, size, flags);
34} 34}
35 35
36static int 36static int security_del(struct inode *inode, const char *name)
37security_del (struct inode *inode, const char *name)
38{ 37{
39 if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX)) 38 if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX))
40 return -EINVAL; 39 return -EINVAL;
41 40
42 if (is_reiserfs_priv_object(inode)) 41 if (is_reiserfs_priv_object(inode))
43 return -EPERM; 42 return -EPERM;
44 43
45 return 0; 44 return 0;
46} 45}
47 46
48static int 47static int
49security_list (struct inode *inode, const char *name, int namelen, char *out) 48security_list(struct inode *inode, const char *name, int namelen, char *out)
50{ 49{
51 int len = namelen; 50 int len = namelen;
52 51
53 if (is_reiserfs_priv_object(inode)) 52 if (is_reiserfs_priv_object(inode))
54 return 0; 53 return 0;
55 54
56 if (out) 55 if (out)
57 memcpy (out, name, len); 56 memcpy(out, name, len);
58 57
59 return len; 58 return len;
60} 59}
61 60
62
63struct reiserfs_xattr_handler security_handler = { 61struct reiserfs_xattr_handler security_handler = {
64 .prefix = XATTR_SECURITY_PREFIX, 62 .prefix = XATTR_SECURITY_PREFIX,
65 .get = security_get, 63 .get = security_get,
diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c
index 43762197fb0a..2501f7e66ab9 100644
--- a/fs/reiserfs/xattr_trusted.c
+++ b/fs/reiserfs/xattr_trusted.c
@@ -9,69 +9,67 @@
9#define XATTR_TRUSTED_PREFIX "trusted." 9#define XATTR_TRUSTED_PREFIX "trusted."
10 10
11static int 11static int
12trusted_get (struct inode *inode, const char *name, void *buffer, size_t size) 12trusted_get(struct inode *inode, const char *name, void *buffer, size_t size)
13{ 13{
14 if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX)) 14 if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX))
15 return -EINVAL; 15 return -EINVAL;
16 16
17 if (!reiserfs_xattrs (inode->i_sb)) 17 if (!reiserfs_xattrs(inode->i_sb))
18 return -EOPNOTSUPP; 18 return -EOPNOTSUPP;
19 19
20 if (!(capable(CAP_SYS_ADMIN) || is_reiserfs_priv_object(inode))) 20 if (!(capable(CAP_SYS_ADMIN) || is_reiserfs_priv_object(inode)))
21 return -EPERM; 21 return -EPERM;
22 22
23 return reiserfs_xattr_get (inode, name, buffer, size); 23 return reiserfs_xattr_get(inode, name, buffer, size);
24} 24}
25 25
26static int 26static int
27trusted_set (struct inode *inode, const char *name, const void *buffer, 27trusted_set(struct inode *inode, const char *name, const void *buffer,
28 size_t size, int flags) 28 size_t size, int flags)
29{ 29{
30 if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX)) 30 if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX))
31 return -EINVAL; 31 return -EINVAL;
32 32
33 if (!reiserfs_xattrs (inode->i_sb)) 33 if (!reiserfs_xattrs(inode->i_sb))
34 return -EOPNOTSUPP; 34 return -EOPNOTSUPP;
35 35
36 if (!(capable(CAP_SYS_ADMIN) || is_reiserfs_priv_object(inode))) 36 if (!(capable(CAP_SYS_ADMIN) || is_reiserfs_priv_object(inode)))
37 return -EPERM; 37 return -EPERM;
38 38
39 return reiserfs_xattr_set (inode, name, buffer, size, flags); 39 return reiserfs_xattr_set(inode, name, buffer, size, flags);
40} 40}
41 41
42static int 42static int trusted_del(struct inode *inode, const char *name)
43trusted_del (struct inode *inode, const char *name)
44{ 43{
45 if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX)) 44 if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX))
46 return -EINVAL; 45 return -EINVAL;
47 46
48 if (!reiserfs_xattrs (inode->i_sb)) 47 if (!reiserfs_xattrs(inode->i_sb))
49 return -EOPNOTSUPP; 48 return -EOPNOTSUPP;
50 49
51 if (!(capable(CAP_SYS_ADMIN) || is_reiserfs_priv_object(inode))) 50 if (!(capable(CAP_SYS_ADMIN) || is_reiserfs_priv_object(inode)))
52 return -EPERM; 51 return -EPERM;
53 52
54 return 0; 53 return 0;
55} 54}
56 55
57static int 56static int
58trusted_list (struct inode *inode, const char *name, int namelen, char *out) 57trusted_list(struct inode *inode, const char *name, int namelen, char *out)
59{ 58{
60 int len = namelen; 59 int len = namelen;
61 60
62 if (!reiserfs_xattrs (inode->i_sb)) 61 if (!reiserfs_xattrs(inode->i_sb))
63 return 0; 62 return 0;
64 63
65 if (!(capable(CAP_SYS_ADMIN) || is_reiserfs_priv_object(inode))) 64 if (!(capable(CAP_SYS_ADMIN) || is_reiserfs_priv_object(inode)))
66 return 0; 65 return 0;
67 66
68 if (out) 67 if (out)
69 memcpy (out, name, len); 68 memcpy(out, name, len);
70 69
71 return len; 70 return len;
72} 71}
73 72
74
75struct reiserfs_xattr_handler trusted_handler = { 73struct reiserfs_xattr_handler trusted_handler = {
76 .prefix = XATTR_TRUSTED_PREFIX, 74 .prefix = XATTR_TRUSTED_PREFIX,
77 .get = trusted_get, 75 .get = trusted_get,
diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c
index 0772806466a8..51458048ca66 100644
--- a/fs/reiserfs/xattr_user.c
+++ b/fs/reiserfs/xattr_user.c
@@ -13,81 +13,80 @@
13#define XATTR_USER_PREFIX "user." 13#define XATTR_USER_PREFIX "user."
14 14
15static int 15static int
16user_get (struct inode *inode, const char *name, void *buffer, size_t size) 16user_get(struct inode *inode, const char *name, void *buffer, size_t size)
17{ 17{
18 18
19 int error; 19 int error;
20 20
21 if (strlen(name) < sizeof(XATTR_USER_PREFIX)) 21 if (strlen(name) < sizeof(XATTR_USER_PREFIX))
22 return -EINVAL; 22 return -EINVAL;
23 23
24 if (!reiserfs_xattrs_user (inode->i_sb)) 24 if (!reiserfs_xattrs_user(inode->i_sb))
25 return -EOPNOTSUPP; 25 return -EOPNOTSUPP;
26 26
27 error = reiserfs_permission_locked (inode, MAY_READ, NULL); 27 error = reiserfs_permission_locked(inode, MAY_READ, NULL);
28 if (error) 28 if (error)
29 return error; 29 return error;
30 30
31 return reiserfs_xattr_get (inode, name, buffer, size); 31 return reiserfs_xattr_get(inode, name, buffer, size);
32} 32}
33 33
34static int 34static int
35user_set (struct inode *inode, const char *name, const void *buffer, 35user_set(struct inode *inode, const char *name, const void *buffer,
36 size_t size, int flags) 36 size_t size, int flags)
37{ 37{
38 38
39 int error; 39 int error;
40 40
41 if (strlen(name) < sizeof(XATTR_USER_PREFIX)) 41 if (strlen(name) < sizeof(XATTR_USER_PREFIX))
42 return -EINVAL; 42 return -EINVAL;
43 43
44 if (!reiserfs_xattrs_user (inode->i_sb)) 44 if (!reiserfs_xattrs_user(inode->i_sb))
45 return -EOPNOTSUPP; 45 return -EOPNOTSUPP;
46 46
47 if (!S_ISREG (inode->i_mode) && 47 if (!S_ISREG(inode->i_mode) &&
48 (!S_ISDIR (inode->i_mode) || inode->i_mode & S_ISVTX)) 48 (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX))
49 return -EPERM; 49 return -EPERM;
50 50
51 error = reiserfs_permission_locked (inode, MAY_WRITE, NULL); 51 error = reiserfs_permission_locked(inode, MAY_WRITE, NULL);
52 if (error) 52 if (error)
53 return error; 53 return error;
54 54
55 return reiserfs_xattr_set (inode, name, buffer, size, flags); 55 return reiserfs_xattr_set(inode, name, buffer, size, flags);
56} 56}
57 57
58static int 58static int user_del(struct inode *inode, const char *name)
59user_del (struct inode *inode, const char *name)
60{ 59{
61 int error; 60 int error;
62 61
63 if (strlen(name) < sizeof(XATTR_USER_PREFIX)) 62 if (strlen(name) < sizeof(XATTR_USER_PREFIX))
64 return -EINVAL; 63 return -EINVAL;
65 64
66 if (!reiserfs_xattrs_user (inode->i_sb)) 65 if (!reiserfs_xattrs_user(inode->i_sb))
67 return -EOPNOTSUPP; 66 return -EOPNOTSUPP;
68 67
69 if (!S_ISREG (inode->i_mode) && 68 if (!S_ISREG(inode->i_mode) &&
70 (!S_ISDIR (inode->i_mode) || inode->i_mode & S_ISVTX)) 69 (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX))
71 return -EPERM; 70 return -EPERM;
72 71
73 error = reiserfs_permission_locked (inode, MAY_WRITE, NULL); 72 error = reiserfs_permission_locked(inode, MAY_WRITE, NULL);
74 if (error) 73 if (error)
75 return error; 74 return error;
76 75
77 return 0; 76 return 0;
78} 77}
79 78
80static int 79static int
81user_list (struct inode *inode, const char *name, int namelen, char *out) 80user_list(struct inode *inode, const char *name, int namelen, char *out)
82{ 81{
83 int len = namelen; 82 int len = namelen;
84 if (!reiserfs_xattrs_user (inode->i_sb)) 83 if (!reiserfs_xattrs_user(inode->i_sb))
85 return 0; 84 return 0;
86 85
87 if (out) 86 if (out)
88 memcpy (out, name, len); 87 memcpy(out, name, len);
89 88
90 return len; 89 return len;
91} 90}
92 91
93struct reiserfs_xattr_handler user_handler = { 92struct reiserfs_xattr_handler user_handler = {
diff --git a/fs/smbfs/symlink.c b/fs/smbfs/symlink.c
index 8b069e06433d..0c64bc3a0127 100644
--- a/fs/smbfs/symlink.c
+++ b/fs/smbfs/symlink.c
@@ -34,7 +34,7 @@ int smb_symlink(struct inode *inode, struct dentry *dentry, const char *oldname)
34 return smb_proc_symlink(server_from_dentry(dentry), dentry, oldname); 34 return smb_proc_symlink(server_from_dentry(dentry), dentry, oldname);
35} 35}
36 36
37static int smb_follow_link(struct dentry *dentry, struct nameidata *nd) 37static void *smb_follow_link(struct dentry *dentry, struct nameidata *nd)
38{ 38{
39 char *link = __getname(); 39 char *link = __getname();
40 DEBUG1("followlink of %s/%s\n", DENTRY_PATH(dentry)); 40 DEBUG1("followlink of %s/%s\n", DENTRY_PATH(dentry));
@@ -52,10 +52,10 @@ static int smb_follow_link(struct dentry *dentry, struct nameidata *nd)
52 } 52 }
53 } 53 }
54 nd_set_link(nd, link); 54 nd_set_link(nd, link);
55 return 0; 55 return NULL;
56} 56}
57 57
58static void smb_put_link(struct dentry *dentry, struct nameidata *nd) 58static void smb_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
59{ 59{
60 char *s = nd_get_link(nd); 60 char *s = nd_get_link(nd);
61 if (!IS_ERR(s)) 61 if (!IS_ERR(s))
diff --git a/fs/super.c b/fs/super.c
index 573bcc81bb82..6e57ee252e14 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -341,20 +341,22 @@ static inline void write_super(struct super_block *sb)
341 */ 341 */
342void sync_supers(void) 342void sync_supers(void)
343{ 343{
344 struct super_block * sb; 344 struct super_block *sb;
345restart: 345
346 spin_lock(&sb_lock); 346 spin_lock(&sb_lock);
347 sb = sb_entry(super_blocks.next); 347restart:
348 while (sb != sb_entry(&super_blocks)) 348 list_for_each_entry(sb, &super_blocks, s_list) {
349 if (sb->s_dirt) { 349 if (sb->s_dirt) {
350 sb->s_count++; 350 sb->s_count++;
351 spin_unlock(&sb_lock); 351 spin_unlock(&sb_lock);
352 down_read(&sb->s_umount); 352 down_read(&sb->s_umount);
353 write_super(sb); 353 write_super(sb);
354 drop_super(sb); 354 up_read(&sb->s_umount);
355 goto restart; 355 spin_lock(&sb_lock);
356 } else 356 if (__put_super_and_need_restart(sb))
357 sb = sb_entry(sb->s_list.next); 357 goto restart;
358 }
359 }
358 spin_unlock(&sb_lock); 360 spin_unlock(&sb_lock);
359} 361}
360 362
@@ -381,20 +383,16 @@ void sync_filesystems(int wait)
381 383
382 down(&mutex); /* Could be down_interruptible */ 384 down(&mutex); /* Could be down_interruptible */
383 spin_lock(&sb_lock); 385 spin_lock(&sb_lock);
384 for (sb = sb_entry(super_blocks.next); sb != sb_entry(&super_blocks); 386 list_for_each_entry(sb, &super_blocks, s_list) {
385 sb = sb_entry(sb->s_list.next)) {
386 if (!sb->s_op->sync_fs) 387 if (!sb->s_op->sync_fs)
387 continue; 388 continue;
388 if (sb->s_flags & MS_RDONLY) 389 if (sb->s_flags & MS_RDONLY)
389 continue; 390 continue;
390 sb->s_need_sync_fs = 1; 391 sb->s_need_sync_fs = 1;
391 } 392 }
392 spin_unlock(&sb_lock);
393 393
394restart: 394restart:
395 spin_lock(&sb_lock); 395 list_for_each_entry(sb, &super_blocks, s_list) {
396 for (sb = sb_entry(super_blocks.next); sb != sb_entry(&super_blocks);
397 sb = sb_entry(sb->s_list.next)) {
398 if (!sb->s_need_sync_fs) 396 if (!sb->s_need_sync_fs)
399 continue; 397 continue;
400 sb->s_need_sync_fs = 0; 398 sb->s_need_sync_fs = 0;
@@ -405,8 +403,11 @@ restart:
405 down_read(&sb->s_umount); 403 down_read(&sb->s_umount);
406 if (sb->s_root && (wait || sb->s_dirt)) 404 if (sb->s_root && (wait || sb->s_dirt))
407 sb->s_op->sync_fs(sb, wait); 405 sb->s_op->sync_fs(sb, wait);
408 drop_super(sb); 406 up_read(&sb->s_umount);
409 goto restart; 407 /* restart only when sb is no longer on the list */
408 spin_lock(&sb_lock);
409 if (__put_super_and_need_restart(sb))
410 goto restart;
410 } 411 }
411 spin_unlock(&sb_lock); 412 spin_unlock(&sb_lock);
412 up(&mutex); 413 up(&mutex);
@@ -422,21 +423,25 @@ restart:
422 423
423struct super_block * get_super(struct block_device *bdev) 424struct super_block * get_super(struct block_device *bdev)
424{ 425{
425 struct list_head *p; 426 struct super_block *sb;
427
426 if (!bdev) 428 if (!bdev)
427 return NULL; 429 return NULL;
428rescan: 430
429 spin_lock(&sb_lock); 431 spin_lock(&sb_lock);
430 list_for_each(p, &super_blocks) { 432rescan:
431 struct super_block *s = sb_entry(p); 433 list_for_each_entry(sb, &super_blocks, s_list) {
432 if (s->s_bdev == bdev) { 434 if (sb->s_bdev == bdev) {
433 s->s_count++; 435 sb->s_count++;
434 spin_unlock(&sb_lock); 436 spin_unlock(&sb_lock);
435 down_read(&s->s_umount); 437 down_read(&sb->s_umount);
436 if (s->s_root) 438 if (sb->s_root)
437 return s; 439 return sb;
438 drop_super(s); 440 up_read(&sb->s_umount);
439 goto rescan; 441 /* restart only when sb is no longer on the list */
442 spin_lock(&sb_lock);
443 if (__put_super_and_need_restart(sb))
444 goto rescan;
440 } 445 }
441 } 446 }
442 spin_unlock(&sb_lock); 447 spin_unlock(&sb_lock);
@@ -447,20 +452,22 @@ EXPORT_SYMBOL(get_super);
447 452
448struct super_block * user_get_super(dev_t dev) 453struct super_block * user_get_super(dev_t dev)
449{ 454{
450 struct list_head *p; 455 struct super_block *sb;
451 456
452rescan:
453 spin_lock(&sb_lock); 457 spin_lock(&sb_lock);
454 list_for_each(p, &super_blocks) { 458rescan:
455 struct super_block *s = sb_entry(p); 459 list_for_each_entry(sb, &super_blocks, s_list) {
456 if (s->s_dev == dev) { 460 if (sb->s_dev == dev) {
457 s->s_count++; 461 sb->s_count++;
458 spin_unlock(&sb_lock); 462 spin_unlock(&sb_lock);
459 down_read(&s->s_umount); 463 down_read(&sb->s_umount);
460 if (s->s_root) 464 if (sb->s_root)
461 return s; 465 return sb;
462 drop_super(s); 466 up_read(&sb->s_umount);
463 goto rescan; 467 /* restart only when sb is no longer on the list */
468 spin_lock(&sb_lock);
469 if (__put_super_and_need_restart(sb))
470 goto rescan;
464 } 471 }
465 } 472 }
466 spin_unlock(&sb_lock); 473 spin_unlock(&sb_lock);
@@ -833,7 +840,6 @@ do_kern_mount(const char *fstype, int flags, const char *name, void *data)
833 mnt->mnt_root = dget(sb->s_root); 840 mnt->mnt_root = dget(sb->s_root);
834 mnt->mnt_mountpoint = sb->s_root; 841 mnt->mnt_mountpoint = sb->s_root;
835 mnt->mnt_parent = mnt; 842 mnt->mnt_parent = mnt;
836 mnt->mnt_namespace = current->namespace;
837 up_write(&sb->s_umount); 843 up_write(&sb->s_umount);
838 free_secdata(secdata); 844 free_secdata(secdata);
839 put_filesystem(type); 845 put_filesystem(type);
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 37d7a6875d86..59734ba1ee60 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -8,6 +8,7 @@
8#include <linux/mount.h> 8#include <linux/mount.h>
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/kobject.h> 10#include <linux/kobject.h>
11#include <linux/namei.h>
11#include "sysfs.h" 12#include "sysfs.h"
12 13
13DECLARE_RWSEM(sysfs_rename_sem); 14DECLARE_RWSEM(sysfs_rename_sem);
@@ -99,7 +100,7 @@ static int create_dir(struct kobject * k, struct dentry * p,
99 umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO; 100 umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO;
100 101
101 down(&p->d_inode->i_sem); 102 down(&p->d_inode->i_sem);
102 *d = sysfs_get_dentry(p,n); 103 *d = lookup_one_len(n, p, strlen(n));
103 if (!IS_ERR(*d)) { 104 if (!IS_ERR(*d)) {
104 error = sysfs_make_dirent(p->d_fsdata, *d, k, mode, SYSFS_DIR); 105 error = sysfs_make_dirent(p->d_fsdata, *d, k, mode, SYSFS_DIR);
105 if (!error) { 106 if (!error) {
@@ -315,7 +316,7 @@ int sysfs_rename_dir(struct kobject * kobj, const char *new_name)
315 316
316 down(&parent->d_inode->i_sem); 317 down(&parent->d_inode->i_sem);
317 318
318 new_dentry = sysfs_get_dentry(parent, new_name); 319 new_dentry = lookup_one_len(new_name, parent, strlen(new_name));
319 if (!IS_ERR(new_dentry)) { 320 if (!IS_ERR(new_dentry)) {
320 if (!new_dentry->d_inode) { 321 if (!new_dentry->d_inode) {
321 error = kobject_set_name(kobj, "%s", new_name); 322 error = kobject_set_name(kobj, "%s", new_name);
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 849aac115460..4013d7905e84 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -3,8 +3,9 @@
3 */ 3 */
4 4
5#include <linux/module.h> 5#include <linux/module.h>
6#include <linux/dnotify.h> 6#include <linux/fsnotify.h>
7#include <linux/kobject.h> 7#include <linux/kobject.h>
8#include <linux/namei.h>
8#include <asm/uaccess.h> 9#include <asm/uaccess.h>
9#include <asm/semaphore.h> 10#include <asm/semaphore.h>
10 11
@@ -13,7 +14,7 @@
13#define to_subsys(k) container_of(k,struct subsystem,kset.kobj) 14#define to_subsys(k) container_of(k,struct subsystem,kset.kobj)
14#define to_sattr(a) container_of(a,struct subsys_attribute,attr) 15#define to_sattr(a) container_of(a,struct subsys_attribute,attr)
15 16
16/** 17/*
17 * Subsystem file operations. 18 * Subsystem file operations.
18 * These operations allow subsystems to have files that can be 19 * These operations allow subsystems to have files that can be
19 * read/written. 20 * read/written.
@@ -191,8 +192,9 @@ fill_write_buffer(struct sysfs_buffer * buffer, const char __user * buf, size_t
191 192
192/** 193/**
193 * flush_write_buffer - push buffer to kobject. 194 * flush_write_buffer - push buffer to kobject.
194 * @file: file pointer. 195 * @dentry: dentry to the attribute
195 * @buffer: data buffer for file. 196 * @buffer: data buffer for file.
197 * @count: number of bytes
196 * 198 *
197 * Get the correct pointers for the kobject and the attribute we're 199 * Get the correct pointers for the kobject and the attribute we're
198 * dealing with, then call the store() method for the attribute, 200 * dealing with, then call the store() method for the attribute,
@@ -389,9 +391,6 @@ int sysfs_create_file(struct kobject * kobj, const struct attribute * attr)
389 * sysfs_update_file - update the modified timestamp on an object attribute. 391 * sysfs_update_file - update the modified timestamp on an object attribute.
390 * @kobj: object we're acting for. 392 * @kobj: object we're acting for.
391 * @attr: attribute descriptor. 393 * @attr: attribute descriptor.
392 *
393 * Also call dnotify for the dentry, which lots of userspace programs
394 * use.
395 */ 394 */
396int sysfs_update_file(struct kobject * kobj, const struct attribute * attr) 395int sysfs_update_file(struct kobject * kobj, const struct attribute * attr)
397{ 396{
@@ -400,13 +399,13 @@ int sysfs_update_file(struct kobject * kobj, const struct attribute * attr)
400 int res = -ENOENT; 399 int res = -ENOENT;
401 400
402 down(&dir->d_inode->i_sem); 401 down(&dir->d_inode->i_sem);
403 victim = sysfs_get_dentry(dir, attr->name); 402 victim = lookup_one_len(attr->name, dir, strlen(attr->name));
404 if (!IS_ERR(victim)) { 403 if (!IS_ERR(victim)) {
405 /* make sure dentry is really there */ 404 /* make sure dentry is really there */
406 if (victim->d_inode && 405 if (victim->d_inode &&
407 (victim->d_parent->d_inode == dir->d_inode)) { 406 (victim->d_parent->d_inode == dir->d_inode)) {
408 victim->d_inode->i_mtime = CURRENT_TIME; 407 victim->d_inode->i_mtime = CURRENT_TIME;
409 dnotify_parent(victim, DN_MODIFY); 408 fsnotify_modify(victim);
410 409
411 /** 410 /**
412 * Drop reference from initial sysfs_get_dentry(). 411 * Drop reference from initial sysfs_get_dentry().
@@ -438,22 +437,24 @@ int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode)
438{ 437{
439 struct dentry *dir = kobj->dentry; 438 struct dentry *dir = kobj->dentry;
440 struct dentry *victim; 439 struct dentry *victim;
441 struct sysfs_dirent *sd; 440 struct inode * inode;
442 umode_t umode = (mode & S_IALLUGO) | S_IFREG; 441 struct iattr newattrs;
443 int res = -ENOENT; 442 int res = -ENOENT;
444 443
445 down(&dir->d_inode->i_sem); 444 down(&dir->d_inode->i_sem);
446 victim = sysfs_get_dentry(dir, attr->name); 445 victim = lookup_one_len(attr->name, dir, strlen(attr->name));
447 if (!IS_ERR(victim)) { 446 if (!IS_ERR(victim)) {
448 if (victim->d_inode && 447 if (victim->d_inode &&
449 (victim->d_parent->d_inode == dir->d_inode)) { 448 (victim->d_parent->d_inode == dir->d_inode)) {
450 sd = victim->d_fsdata; 449 inode = victim->d_inode;
451 attr->mode = mode; 450 down(&inode->i_sem);
452 sd->s_mode = umode; 451 newattrs.ia_mode = (mode & S_IALLUGO) |
453 victim->d_inode->i_mode = umode; 452 (inode->i_mode & ~S_IALLUGO);
454 dput(victim); 453 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
455 res = 0; 454 res = notify_change(victim, &newattrs);
455 up(&inode->i_sem);
456 } 456 }
457 dput(victim);
457 } 458 }
458 up(&dir->d_inode->i_sem); 459 up(&dir->d_inode->i_sem);
459 460
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index f11ac5ea7021..122145b0895c 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -11,6 +11,7 @@
11#include <linux/kobject.h> 11#include <linux/kobject.h>
12#include <linux/module.h> 12#include <linux/module.h>
13#include <linux/dcache.h> 13#include <linux/dcache.h>
14#include <linux/namei.h>
14#include <linux/err.h> 15#include <linux/err.h>
15#include "sysfs.h" 16#include "sysfs.h"
16 17
@@ -68,7 +69,8 @@ void sysfs_remove_group(struct kobject * kobj,
68 struct dentry * dir; 69 struct dentry * dir;
69 70
70 if (grp->name) 71 if (grp->name)
71 dir = sysfs_get_dentry(kobj->dentry,grp->name); 72 dir = lookup_one_len(grp->name, kobj->dentry,
73 strlen(grp->name));
72 else 74 else
73 dir = dget(kobj->dentry); 75 dir = dget(kobj->dentry);
74 76
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 565cac1d4200..970a33f03299 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -85,7 +85,7 @@ int sysfs_setattr(struct dentry * dentry, struct iattr * iattr)
85 85
86 if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) 86 if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
87 mode &= ~S_ISGID; 87 mode &= ~S_ISGID;
88 sd_iattr->ia_mode = mode; 88 sd_iattr->ia_mode = sd->s_mode = mode;
89 } 89 }
90 90
91 return error; 91 return error;
@@ -166,16 +166,6 @@ int sysfs_create(struct dentry * dentry, int mode, int (*init)(struct inode *))
166 return error; 166 return error;
167} 167}
168 168
169struct dentry * sysfs_get_dentry(struct dentry * parent, const char * name)
170{
171 struct qstr qstr;
172
173 qstr.name = name;
174 qstr.len = strlen(name);
175 qstr.hash = full_name_hash(name,qstr.len);
176 return lookup_hash(&qstr,parent);
177}
178
179/* 169/*
180 * Get the name for corresponding element represented by the given sysfs_dirent 170 * Get the name for corresponding element represented by the given sysfs_dirent
181 */ 171 */
@@ -238,6 +228,10 @@ void sysfs_hash_and_remove(struct dentry * dir, const char * name)
238 struct sysfs_dirent * sd; 228 struct sysfs_dirent * sd;
239 struct sysfs_dirent * parent_sd = dir->d_fsdata; 229 struct sysfs_dirent * parent_sd = dir->d_fsdata;
240 230
231 if (dir->d_inode == NULL)
232 /* no inode means this hasn't been made visible yet */
233 return;
234
241 down(&dir->d_inode->i_sem); 235 down(&dir->d_inode->i_sem);
242 list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { 236 list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
243 if (!sd->s_element) 237 if (!sd->s_element)
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index fae57c83a722..de402fa915f2 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -151,17 +151,17 @@ static int sysfs_getlink(struct dentry *dentry, char * path)
151 151
152} 152}
153 153
154static int sysfs_follow_link(struct dentry *dentry, struct nameidata *nd) 154static void *sysfs_follow_link(struct dentry *dentry, struct nameidata *nd)
155{ 155{
156 int error = -ENOMEM; 156 int error = -ENOMEM;
157 unsigned long page = get_zeroed_page(GFP_KERNEL); 157 unsigned long page = get_zeroed_page(GFP_KERNEL);
158 if (page) 158 if (page)
159 error = sysfs_getlink(dentry, (char *) page); 159 error = sysfs_getlink(dentry, (char *) page);
160 nd_set_link(nd, error ? ERR_PTR(error) : (char *)page); 160 nd_set_link(nd, error ? ERR_PTR(error) : (char *)page);
161 return 0; 161 return NULL;
162} 162}
163 163
164static void sysfs_put_link(struct dentry *dentry, struct nameidata *nd) 164static void sysfs_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
165{ 165{
166 char *page = nd_get_link(nd); 166 char *page = nd_get_link(nd);
167 if (!IS_ERR(page)) 167 if (!IS_ERR(page))
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 29da6f5f07c8..3f8953e0e5d0 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -7,7 +7,6 @@ extern int sysfs_create(struct dentry *, int mode, int (*init)(struct inode *));
7 7
8extern int sysfs_make_dirent(struct sysfs_dirent *, struct dentry *, void *, 8extern int sysfs_make_dirent(struct sysfs_dirent *, struct dentry *, void *,
9 umode_t, int); 9 umode_t, int);
10extern struct dentry * sysfs_get_dentry(struct dentry *, const char *);
11 10
12extern int sysfs_add_file(struct dentry *, const struct attribute *, int); 11extern int sysfs_add_file(struct dentry *, const struct attribute *, int);
13extern void sysfs_hash_and_remove(struct dentry * dir, const char * name); 12extern void sysfs_hash_and_remove(struct dentry * dir, const char * name);
diff --git a/fs/sysv/symlink.c b/fs/sysv/symlink.c
index ed637db2dcb1..b85ce61d635c 100644
--- a/fs/sysv/symlink.c
+++ b/fs/sysv/symlink.c
@@ -8,10 +8,10 @@
8#include "sysv.h" 8#include "sysv.h"
9#include <linux/namei.h> 9#include <linux/namei.h>
10 10
11static int sysv_follow_link(struct dentry *dentry, struct nameidata *nd) 11static void *sysv_follow_link(struct dentry *dentry, struct nameidata *nd)
12{ 12{
13 nd_set_link(nd, (char *)SYSV_I(dentry->d_inode)->i_data); 13 nd_set_link(nd, (char *)SYSV_I(dentry->d_inode)->i_data);
14 return 0; 14 return NULL;
15} 15}
16 16
17struct inode_operations sysv_fast_symlink_inode_operations = { 17struct inode_operations sysv_fast_symlink_inode_operations = {
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 3f6dc7112bc6..ac191ed7df0a 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -159,14 +159,12 @@ udf_find_entry(struct inode *dir, struct dentry *dentry,
159 char *nameptr; 159 char *nameptr;
160 uint8_t lfi; 160 uint8_t lfi;
161 uint16_t liu; 161 uint16_t liu;
162 loff_t size = (udf_ext0_offset(dir) + dir->i_size) >> 2; 162 loff_t size;
163 kernel_lb_addr bloc, eloc; 163 kernel_lb_addr bloc, eloc;
164 uint32_t extoffset, elen, offset; 164 uint32_t extoffset, elen, offset;
165 struct buffer_head *bh = NULL; 165 struct buffer_head *bh = NULL;
166 166
167 if (!dir) 167 size = (udf_ext0_offset(dir) + dir->i_size) >> 2;
168 return NULL;
169
170 f_pos = (udf_ext0_offset(dir) >> 2); 168 f_pos = (udf_ext0_offset(dir) >> 2);
171 169
172 fibh->soffset = fibh->eoffset = (f_pos & ((dir->i_sb->s_blocksize - 1) >> 2)) << 2; 170 fibh->soffset = fibh->eoffset = (f_pos & ((dir->i_sb->s_blocksize - 1) >> 2)) << 2;
diff --git a/fs/ufs/symlink.c b/fs/ufs/symlink.c
index a0e49149098f..337512ed5781 100644
--- a/fs/ufs/symlink.c
+++ b/fs/ufs/symlink.c
@@ -29,11 +29,11 @@
29#include <linux/namei.h> 29#include <linux/namei.h>
30#include <linux/ufs_fs.h> 30#include <linux/ufs_fs.h>
31 31
32static int ufs_follow_link(struct dentry *dentry, struct nameidata *nd) 32static void *ufs_follow_link(struct dentry *dentry, struct nameidata *nd)
33{ 33{
34 struct ufs_inode_info *p = UFS_I(dentry->d_inode); 34 struct ufs_inode_info *p = UFS_I(dentry->d_inode);
35 nd_set_link(nd, (char*)p->i_u1.i_symlink); 35 nd_set_link(nd, (char*)p->i_u1.i_symlink);
36 return 0; 36 return NULL;
37} 37}
38 38
39struct inode_operations ufs_fast_symlink_inode_operations = { 39struct inode_operations ufs_fast_symlink_inode_operations = {
diff --git a/fs/xattr.c b/fs/xattr.c
index 93dee70a1dbe..6acd5c63da91 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -16,6 +16,7 @@
16#include <linux/security.h> 16#include <linux/security.h>
17#include <linux/syscalls.h> 17#include <linux/syscalls.h>
18#include <linux/module.h> 18#include <linux/module.h>
19#include <linux/fsnotify.h>
19#include <asm/uaccess.h> 20#include <asm/uaccess.h>
20 21
21/* 22/*
@@ -57,8 +58,10 @@ setxattr(struct dentry *d, char __user *name, void __user *value,
57 if (error) 58 if (error)
58 goto out; 59 goto out;
59 error = d->d_inode->i_op->setxattr(d, kname, kvalue, size, flags); 60 error = d->d_inode->i_op->setxattr(d, kname, kvalue, size, flags);
60 if (!error) 61 if (!error) {
62 fsnotify_xattr(d);
61 security_inode_post_setxattr(d, kname, kvalue, size, flags); 63 security_inode_post_setxattr(d, kname, kvalue, size, flags);
64 }
62out: 65out:
63 up(&d->d_inode->i_sem); 66 up(&d->d_inode->i_sem);
64 } 67 }
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 93ce257cd149..a3a4b5aaf5d9 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -149,11 +149,12 @@ linvfs_unwritten_convert(
149 */ 149 */
150STATIC void 150STATIC void
151linvfs_unwritten_convert_direct( 151linvfs_unwritten_convert_direct(
152 struct inode *inode, 152 struct kiocb *iocb,
153 loff_t offset, 153 loff_t offset,
154 ssize_t size, 154 ssize_t size,
155 void *private) 155 void *private)
156{ 156{
157 struct inode *inode = iocb->ki_filp->f_dentry->d_inode;
157 ASSERT(!private || inode == (struct inode *)private); 158 ASSERT(!private || inode == (struct inode *)private);
158 159
159 /* private indicates an unwritten extent lay beneath this IO */ 160 /* private indicates an unwritten extent lay beneath this IO */
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index c60e69431e11..df0cba239dd5 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1771,9 +1771,9 @@ xfsbufd(
1771 1771
1772 INIT_LIST_HEAD(&tmp); 1772 INIT_LIST_HEAD(&tmp);
1773 do { 1773 do {
1774 if (unlikely(current->flags & PF_FREEZE)) { 1774 if (unlikely(freezing(current))) {
1775 xfsbufd_force_sleep = 1; 1775 xfsbufd_force_sleep = 1;
1776 refrigerator(PF_FREEZE); 1776 refrigerator();
1777 } else { 1777 } else {
1778 xfsbufd_force_sleep = 0; 1778 xfsbufd_force_sleep = 0;
1779 } 1779 }
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 407e99359391..f252605514eb 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -374,7 +374,7 @@ linvfs_rename(
374 * we need to be very careful about how much stack we use. 374 * we need to be very careful about how much stack we use.
375 * uio is kmalloced for this reason... 375 * uio is kmalloced for this reason...
376 */ 376 */
377STATIC int 377STATIC void *
378linvfs_follow_link( 378linvfs_follow_link(
379 struct dentry *dentry, 379 struct dentry *dentry,
380 struct nameidata *nd) 380 struct nameidata *nd)
@@ -391,14 +391,14 @@ linvfs_follow_link(
391 link = (char *)kmalloc(MAXNAMELEN+1, GFP_KERNEL); 391 link = (char *)kmalloc(MAXNAMELEN+1, GFP_KERNEL);
392 if (!link) { 392 if (!link) {
393 nd_set_link(nd, ERR_PTR(-ENOMEM)); 393 nd_set_link(nd, ERR_PTR(-ENOMEM));
394 return 0; 394 return NULL;
395 } 395 }
396 396
397 uio = (uio_t *)kmalloc(sizeof(uio_t), GFP_KERNEL); 397 uio = (uio_t *)kmalloc(sizeof(uio_t), GFP_KERNEL);
398 if (!uio) { 398 if (!uio) {
399 kfree(link); 399 kfree(link);
400 nd_set_link(nd, ERR_PTR(-ENOMEM)); 400 nd_set_link(nd, ERR_PTR(-ENOMEM));
401 return 0; 401 return NULL;
402 } 402 }
403 403
404 vp = LINVFS_GET_VP(dentry->d_inode); 404 vp = LINVFS_GET_VP(dentry->d_inode);
@@ -422,10 +422,10 @@ linvfs_follow_link(
422 kfree(uio); 422 kfree(uio);
423 423
424 nd_set_link(nd, link); 424 nd_set_link(nd, link);
425 return 0; 425 return NULL;
426} 426}
427 427
428static void linvfs_put_link(struct dentry *dentry, struct nameidata *nd) 428static void linvfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
429{ 429{
430 char *s = nd_get_link(nd); 430 char *s = nd_get_link(nd);
431 if (!IS_ERR(s)) 431 if (!IS_ERR(s))
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 5fe9af38aa20..f6dd7de25927 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -483,7 +483,7 @@ xfssyncd(
483 set_current_state(TASK_INTERRUPTIBLE); 483 set_current_state(TASK_INTERRUPTIBLE);
484 timeleft = schedule_timeout(timeleft); 484 timeleft = schedule_timeout(timeleft);
485 /* swsusp */ 485 /* swsusp */
486 try_to_freeze(PF_FREEZE); 486 try_to_freeze();
487 if (vfsp->vfs_flag & VFS_UMOUNT) 487 if (vfsp->vfs_flag & VFS_UMOUNT)
488 break; 488 break;
489 489