aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig89
-rw-r--r--fs/afs/netdevices.c5
-rw-r--r--fs/binfmt_elf.c14
-rw-r--r--fs/bio.c50
-rw-r--r--fs/block_dev.c2
-rw-r--r--fs/buffer.c6
-rw-r--r--fs/compat_ioctl.c692
-rw-r--r--fs/debugfs/file.c36
-rw-r--r--fs/direct-io.c13
-rw-r--r--fs/dlm/dlm_internal.h1
-rw-r--r--fs/dlm/lock.c142
-rw-r--r--fs/dlm/lock.h3
-rw-r--r--fs/dlm/lockspace.c3
-rw-r--r--fs/dlm/lowcomms.c23
-rw-r--r--fs/dlm/member.c41
-rw-r--r--fs/dlm/midcomms.c17
-rw-r--r--fs/dlm/rcom.c36
-rw-r--r--fs/dlm/rcom.h5
-rw-r--r--fs/dlm/recoverd.c11
-rw-r--r--fs/dlm/requestqueue.c58
-rw-r--r--fs/dlm/requestqueue.h4
-rw-r--r--fs/ecryptfs/netlink.c16
-rw-r--r--fs/fs-writeback.c1
-rw-r--r--fs/gfs2/bmap.c35
-rw-r--r--fs/gfs2/daemon.c24
-rw-r--r--fs/gfs2/daemon.h1
-rw-r--r--fs/gfs2/dir.c3
-rw-r--r--fs/gfs2/eaops.c8
-rw-r--r--fs/gfs2/eaops.h4
-rw-r--r--fs/gfs2/glock.c293
-rw-r--r--fs/gfs2/glock.h5
-rw-r--r--fs/gfs2/glops.c24
-rw-r--r--fs/gfs2/incore.h31
-rw-r--r--fs/gfs2/inode.c78
-rw-r--r--fs/gfs2/inode.h3
-rw-r--r--fs/gfs2/locking/dlm/lock_dlm.h1
-rw-r--r--fs/gfs2/locking/dlm/plock.c11
-rw-r--r--fs/gfs2/locking/dlm/sysfs.c2
-rw-r--r--fs/gfs2/locking/dlm/thread.c20
-rw-r--r--fs/gfs2/locking/nolock/main.c1
-rw-r--r--fs/gfs2/log.c230
-rw-r--r--fs/gfs2/log.h2
-rw-r--r--fs/gfs2/lops.c470
-rw-r--r--fs/gfs2/main.c3
-rw-r--r--fs/gfs2/meta_io.c136
-rw-r--r--fs/gfs2/meta_io.h6
-rw-r--r--fs/gfs2/mount.c5
-rw-r--r--fs/gfs2/ops_address.c146
-rw-r--r--fs/gfs2/ops_export.c2
-rw-r--r--fs/gfs2/ops_file.c13
-rw-r--r--fs/gfs2/ops_fstype.c40
-rw-r--r--fs/gfs2/ops_inode.c38
-rw-r--r--fs/gfs2/ops_super.c14
-rw-r--r--fs/gfs2/quota.c13
-rw-r--r--fs/gfs2/recovery.c2
-rw-r--r--fs/gfs2/rgrp.c39
-rw-r--r--fs/gfs2/super.c6
-rw-r--r--fs/gfs2/sys.c4
-rw-r--r--fs/gfs2/trans.c22
-rw-r--r--fs/gfs2/trans.h2
-rw-r--r--fs/inode.c24
-rw-r--r--fs/jbd/transaction.c9
-rw-r--r--fs/jffs2/Makefile1
-rw-r--r--fs/jffs2/acl.c23
-rw-r--r--fs/jffs2/acl.h4
-rw-r--r--fs/jffs2/background.c4
-rw-r--r--fs/jffs2/build.c10
-rw-r--r--fs/jffs2/compr.c422
-rw-r--r--fs/jffs2/compr.h54
-rw-r--r--fs/jffs2/compr_lzo.c108
-rw-r--r--fs/jffs2/compr_rtime.c2
-rw-r--r--fs/jffs2/compr_rubin.c4
-rw-r--r--fs/jffs2/compr_zlib.c6
-rw-r--r--fs/jffs2/dir.c37
-rw-r--r--fs/jffs2/erase.c57
-rw-r--r--fs/jffs2/fs.c32
-rw-r--r--fs/jffs2/gc.c23
-rw-r--r--fs/jffs2/jffs2_fs_sb.h5
-rw-r--r--fs/jffs2/nodelist.h2
-rw-r--r--fs/jffs2/nodemgmt.c25
-rw-r--r--fs/jffs2/os-linux.h5
-rw-r--r--fs/jffs2/readinode.c8
-rw-r--r--fs/jffs2/scan.c19
-rw-r--r--fs/jffs2/security.c6
-rw-r--r--fs/jffs2/summary.c30
-rw-r--r--fs/jffs2/summary.h6
-rw-r--r--fs/jffs2/wbuf.c81
-rw-r--r--fs/jffs2/write.c13
-rw-r--r--fs/jffs2/xattr.h2
-rw-r--r--fs/jffs2/xattr_user.c4
-rw-r--r--fs/jfs/jfs_dtree.c2
-rw-r--r--fs/jfs/jfs_incore.h2
-rw-r--r--fs/jfs/jfs_logmgr.c13
-rw-r--r--fs/jfs/jfs_logmgr.h2
-rw-r--r--fs/jfs/jfs_metapage.c15
-rw-r--r--fs/lockd/mon.c3
-rw-r--r--fs/lockd/xdr.c8
-rw-r--r--fs/lockd/xdr4.c8
-rw-r--r--fs/mpage.c12
-rw-r--r--fs/nfs/Makefile1
-rw-r--r--fs/nfs/client.c49
-rw-r--r--fs/nfs/delegation.c6
-rw-r--r--fs/nfs/dir.c263
-rw-r--r--fs/nfs/direct.c8
-rw-r--r--fs/nfs/file.c105
-rw-r--r--fs/nfs/inode.c273
-rw-r--r--fs/nfs/internal.h50
-rw-r--r--fs/nfs/nfs2xdr.c20
-rw-r--r--fs/nfs/nfs3acl.c2
-rw-r--r--fs/nfs/nfs3proc.c17
-rw-r--r--fs/nfs/nfs3xdr.c25
-rw-r--r--fs/nfs/nfs4proc.c85
-rw-r--r--fs/nfs/nfs4state.c2
-rw-r--r--fs/nfs/nfs4xdr.c72
-rw-r--r--fs/nfs/nfsroot.c3
-rw-r--r--fs/nfs/proc.c5
-rw-r--r--fs/nfs/read.c9
-rw-r--r--fs/nfs/super.c393
-rw-r--r--fs/nfs/unlink.c3
-rw-r--r--fs/nfs/write.c199
-rw-r--r--fs/nfsd/nfs3xdr.c59
-rw-r--r--fs/nfsd/nfs4callback.c89
-rw-r--r--fs/nfsd/nfs4idmap.c8
-rw-r--r--fs/nfsd/nfs4proc.c4
-rw-r--r--fs/nfsd/nfs4state.c200
-rw-r--r--fs/nfsd/nfs4xdr.c38
-rw-r--r--fs/nfsd/nfsctl.c7
-rw-r--r--fs/nfsd/nfssvc.c8
-rw-r--r--fs/nfsd/nfsxdr.c4
-rw-r--r--fs/nfsd/vfs.c43
-rw-r--r--fs/ntfs/ChangeLog12
-rw-r--r--fs/ntfs/Makefile2
-rw-r--r--fs/ntfs/aops.c22
-rw-r--r--fs/ntfs/attrib.c8
-rw-r--r--fs/ntfs/file.c36
-rw-r--r--fs/ntfs/inode.c3
-rw-r--r--fs/ntfs/logfile.c143
-rw-r--r--fs/ntfs/runlist.c4
-rw-r--r--fs/ocfs2/alloc.c482
-rw-r--r--fs/ocfs2/alloc.h7
-rw-r--r--fs/ocfs2/aops.c309
-rw-r--r--fs/ocfs2/aops.h6
-rw-r--r--fs/ocfs2/cluster/heartbeat.c7
-rw-r--r--fs/ocfs2/cluster/masklog.c3
-rw-r--r--fs/ocfs2/dir.c1423
-rw-r--r--fs/ocfs2/dir.h48
-rw-r--r--fs/ocfs2/dlmglue.c2
-rw-r--r--fs/ocfs2/dlmglue.h4
-rw-r--r--fs/ocfs2/export.c8
-rw-r--r--fs/ocfs2/extent_map.c6
-rw-r--r--fs/ocfs2/file.c298
-rw-r--r--fs/ocfs2/file.h2
-rw-r--r--fs/ocfs2/inode.c7
-rw-r--r--fs/ocfs2/inode.h1
-rw-r--r--fs/ocfs2/journal.c120
-rw-r--r--fs/ocfs2/journal.h3
-rw-r--r--fs/ocfs2/namei.c552
-rw-r--r--fs/ocfs2/namei.h19
-rw-r--r--fs/ocfs2/ocfs2.h7
-rw-r--r--fs/ocfs2/ocfs2_fs.h64
-rw-r--r--fs/ocfs2/super.c62
-rw-r--r--fs/ocfs2/sysfile.c10
-rw-r--r--fs/partitions/check.c12
-rw-r--r--fs/partitions/sun.c4
-rw-r--r--fs/pipe.c9
-rw-r--r--fs/proc/Makefile1
-rw-r--r--fs/proc/array.c17
-rw-r--r--fs/proc/base.c2
-rw-r--r--fs/proc/internal.h5
-rw-r--r--fs/proc/proc_misc.c15
-rw-r--r--fs/proc/proc_net.c200
-rw-r--r--fs/proc/root.c8
-rw-r--r--fs/seq_file.c33
-rw-r--r--fs/smbfs/smbiod.c2
-rw-r--r--fs/sysfs/bin.c36
-rw-r--r--fs/sysfs/dir.c754
-rw-r--r--fs/sysfs/file.c248
-rw-r--r--fs/sysfs/group.c2
-rw-r--r--fs/sysfs/inode.c103
-rw-r--r--fs/sysfs/mount.c26
-rw-r--r--fs/sysfs/symlink.c34
-rw-r--r--fs/sysfs/sysfs.h184
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c7
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c7
184 files changed, 6394 insertions, 5141 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index f9eed6d79066..815d201d8600 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1225,6 +1225,14 @@ config JFFS2_FS_WRITEBUFFER
1225 - NOR flash with transparent ECC 1225 - NOR flash with transparent ECC
1226 - DataFlash 1226 - DataFlash
1227 1227
1228config JFFS2_FS_WBUF_VERIFY
1229 bool "Verify JFFS2 write-buffer reads"
1230 depends on JFFS2_FS_WRITEBUFFER
1231 default n
1232 help
1233 This causes JFFS2 to read back every page written through the
1234 write-buffer, and check for errors.
1235
1228config JFFS2_SUMMARY 1236config JFFS2_SUMMARY
1229 bool "JFFS2 summary support (EXPERIMENTAL)" 1237 bool "JFFS2 summary support (EXPERIMENTAL)"
1230 depends on JFFS2_FS && EXPERIMENTAL 1238 depends on JFFS2_FS && EXPERIMENTAL
@@ -1295,52 +1303,71 @@ config JFFS2_ZLIB
1295 select ZLIB_DEFLATE 1303 select ZLIB_DEFLATE
1296 depends on JFFS2_FS 1304 depends on JFFS2_FS
1297 default y 1305 default y
1298 help 1306 help
1299 Zlib is designed to be a free, general-purpose, legally unencumbered, 1307 Zlib is designed to be a free, general-purpose, legally unencumbered,
1300 lossless data-compression library for use on virtually any computer 1308 lossless data-compression library for use on virtually any computer
1301 hardware and operating system. See <http://www.gzip.org/zlib/> for 1309 hardware and operating system. See <http://www.gzip.org/zlib/> for
1302 further information. 1310 further information.
1311
1312 Say 'Y' if unsure.
1313
1314config JFFS2_LZO
1315 bool "JFFS2 LZO compression support" if JFFS2_COMPRESSION_OPTIONS
1316 select LZO_COMPRESS
1317 select LZO_DECOMPRESS
1318 depends on JFFS2_FS
1319 default n
1320 help
1321 minilzo-based compression. Generally works better than Zlib.
1303 1322
1304 Say 'Y' if unsure. 1323 This feature was added in July, 2007. Say 'N' if you need
1324 compatibility with older bootloaders or kernels.
1305 1325
1306config JFFS2_RTIME 1326config JFFS2_RTIME
1307 bool "JFFS2 RTIME compression support" if JFFS2_COMPRESSION_OPTIONS 1327 bool "JFFS2 RTIME compression support" if JFFS2_COMPRESSION_OPTIONS
1308 depends on JFFS2_FS 1328 depends on JFFS2_FS
1309 default y 1329 default y
1310 help 1330 help
1311 Rtime does manage to recompress already-compressed data. Say 'Y' if unsure. 1331 Rtime does manage to recompress already-compressed data. Say 'Y' if unsure.
1312 1332
1313config JFFS2_RUBIN 1333config JFFS2_RUBIN
1314 bool "JFFS2 RUBIN compression support" if JFFS2_COMPRESSION_OPTIONS 1334 bool "JFFS2 RUBIN compression support" if JFFS2_COMPRESSION_OPTIONS
1315 depends on JFFS2_FS 1335 depends on JFFS2_FS
1316 default n 1336 default n
1317 help 1337 help
1318 RUBINMIPS and DYNRUBIN compressors. Say 'N' if unsure. 1338 RUBINMIPS and DYNRUBIN compressors. Say 'N' if unsure.
1319 1339
1320choice 1340choice
1321 prompt "JFFS2 default compression mode" if JFFS2_COMPRESSION_OPTIONS 1341 prompt "JFFS2 default compression mode" if JFFS2_COMPRESSION_OPTIONS
1322 default JFFS2_CMODE_PRIORITY 1342 default JFFS2_CMODE_PRIORITY
1323 depends on JFFS2_FS 1343 depends on JFFS2_FS
1324 help 1344 help
1325 You can set here the default compression mode of JFFS2 from 1345 You can set here the default compression mode of JFFS2 from
1326 the available compression modes. Don't touch if unsure. 1346 the available compression modes. Don't touch if unsure.
1327 1347
1328config JFFS2_CMODE_NONE 1348config JFFS2_CMODE_NONE
1329 bool "no compression" 1349 bool "no compression"
1330 help 1350 help
1331 Uses no compression. 1351 Uses no compression.
1332 1352
1333config JFFS2_CMODE_PRIORITY 1353config JFFS2_CMODE_PRIORITY
1334 bool "priority" 1354 bool "priority"
1335 help 1355 help
1336 Tries the compressors in a predefined order and chooses the first 1356 Tries the compressors in a predefined order and chooses the first
1337 successful one. 1357 successful one.
1338 1358
1339config JFFS2_CMODE_SIZE 1359config JFFS2_CMODE_SIZE
1340 bool "size (EXPERIMENTAL)" 1360 bool "size (EXPERIMENTAL)"
1341 help 1361 help
1342 Tries all compressors and chooses the one which has the smallest 1362 Tries all compressors and chooses the one which has the smallest
1343 result. 1363 result.
1364
1365config JFFS2_CMODE_FAVOURLZO
1366 bool "Favour LZO"
1367 help
1368 Tries all compressors and chooses the one which has the smallest
1369 result but gives some preference to LZO (which has faster
1370 decompression) at the expense of size.
1344 1371
1345endchoice 1372endchoice
1346 1373
@@ -1728,6 +1755,14 @@ config SUNRPC
1728config SUNRPC_GSS 1755config SUNRPC_GSS
1729 tristate 1756 tristate
1730 1757
1758config SUNRPC_XPRT_RDMA
1759 tristate "RDMA transport for sunrpc (EXPERIMENTAL)"
1760 depends on SUNRPC && INFINIBAND && EXPERIMENTAL
1761 default m
1762 help
1763 Adds a client RPC transport for supporting kernel NFS over RDMA
1764 mounts, including Infiniband and iWARP. Experimental.
1765
1731config SUNRPC_BIND34 1766config SUNRPC_BIND34
1732 bool "Support for rpcbind versions 3 & 4 (EXPERIMENTAL)" 1767 bool "Support for rpcbind versions 3 & 4 (EXPERIMENTAL)"
1733 depends on SUNRPC && EXPERIMENTAL 1768 depends on SUNRPC && EXPERIMENTAL
diff --git a/fs/afs/netdevices.c b/fs/afs/netdevices.c
index fc27d4b52e5f..49f189423063 100644
--- a/fs/afs/netdevices.c
+++ b/fs/afs/netdevices.c
@@ -8,6 +8,7 @@
8#include <linux/inetdevice.h> 8#include <linux/inetdevice.h>
9#include <linux/netdevice.h> 9#include <linux/netdevice.h>
10#include <linux/if_arp.h> 10#include <linux/if_arp.h>
11#include <net/net_namespace.h>
11#include "internal.h" 12#include "internal.h"
12 13
13/* 14/*
@@ -23,7 +24,7 @@ int afs_get_MAC_address(u8 *mac, size_t maclen)
23 BUG(); 24 BUG();
24 25
25 rtnl_lock(); 26 rtnl_lock();
26 dev = __dev_getfirstbyhwtype(ARPHRD_ETHER); 27 dev = __dev_getfirstbyhwtype(&init_net, ARPHRD_ETHER);
27 if (dev) { 28 if (dev) {
28 memcpy(mac, dev->dev_addr, maclen); 29 memcpy(mac, dev->dev_addr, maclen);
29 ret = 0; 30 ret = 0;
@@ -47,7 +48,7 @@ int afs_get_ipv4_interfaces(struct afs_interface *bufs, size_t maxbufs,
47 ASSERT(maxbufs > 0); 48 ASSERT(maxbufs > 0);
48 49
49 rtnl_lock(); 50 rtnl_lock();
50 for_each_netdev(dev) { 51 for_each_netdev(&init_net, dev) {
51 if (dev->type == ARPHRD_LOOPBACK && !wantloopback) 52 if (dev->type == ARPHRD_LOOPBACK && !wantloopback)
52 continue; 53 continue;
53 idev = __in_dev_get_rtnl(dev); 54 idev = __in_dev_get_rtnl(dev);
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 4482a0673b15..b1013f34085d 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1514,9 +1514,6 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
1514 int thread_status_size = 0; 1514 int thread_status_size = 0;
1515 elf_addr_t *auxv; 1515 elf_addr_t *auxv;
1516 unsigned long mm_flags; 1516 unsigned long mm_flags;
1517#ifdef ELF_CORE_WRITE_EXTRA_NOTES
1518 int extra_notes_size;
1519#endif
1520 1517
1521 /* 1518 /*
1522 * We no longer stop all VM operations. 1519 * We no longer stop all VM operations.
@@ -1645,10 +1642,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
1645 1642
1646 sz += thread_status_size; 1643 sz += thread_status_size;
1647 1644
1648#ifdef ELF_CORE_WRITE_EXTRA_NOTES 1645 sz += elf_coredump_extra_notes_size();
1649 extra_notes_size = ELF_CORE_EXTRA_NOTES_SIZE;
1650 sz += extra_notes_size;
1651#endif
1652 1646
1653 fill_elf_note_phdr(&phdr, sz, offset); 1647 fill_elf_note_phdr(&phdr, sz, offset);
1654 offset += sz; 1648 offset += sz;
@@ -1698,10 +1692,8 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
1698 if (!writenote(notes + i, file, &foffset)) 1692 if (!writenote(notes + i, file, &foffset))
1699 goto end_coredump; 1693 goto end_coredump;
1700 1694
1701#ifdef ELF_CORE_WRITE_EXTRA_NOTES 1695 if (elf_coredump_extra_notes_write(file, &foffset))
1702 ELF_CORE_WRITE_EXTRA_NOTES; 1696 goto end_coredump;
1703 foffset += extra_notes_size;
1704#endif
1705 1697
1706 /* write out the thread status notes section */ 1698 /* write out the thread status notes section */
1707 list_for_each(t, &thread_list) { 1699 list_for_each(t, &thread_list) {
diff --git a/fs/bio.c b/fs/bio.c
index 29a44c1b64c6..5f604f269dfa 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -798,13 +798,9 @@ void bio_unmap_user(struct bio *bio)
798 bio_put(bio); 798 bio_put(bio);
799} 799}
800 800
801static int bio_map_kern_endio(struct bio *bio, unsigned int bytes_done, int err) 801static void bio_map_kern_endio(struct bio *bio, int err)
802{ 802{
803 if (bio->bi_size)
804 return 1;
805
806 bio_put(bio); 803 bio_put(bio);
807 return 0;
808} 804}
809 805
810 806
@@ -1002,34 +998,26 @@ void bio_check_pages_dirty(struct bio *bio)
1002/** 998/**
1003 * bio_endio - end I/O on a bio 999 * bio_endio - end I/O on a bio
1004 * @bio: bio 1000 * @bio: bio
1005 * @bytes_done: number of bytes completed
1006 * @error: error, if any 1001 * @error: error, if any
1007 * 1002 *
1008 * Description: 1003 * Description:
1009 * bio_endio() will end I/O on @bytes_done number of bytes. This may be 1004 * bio_endio() will end I/O on the whole bio. bio_endio() is the
1010 * just a partial part of the bio, or it may be the whole bio. bio_endio() 1005 * preferred way to end I/O on a bio, it takes care of clearing
1011 * is the preferred way to end I/O on a bio, it takes care of decrementing 1006 * BIO_UPTODATE on error. @error is 0 on success, and and one of the
1012 * bi_size and clearing BIO_UPTODATE on error. @error is 0 on success, and 1007 * established -Exxxx (-EIO, for instance) error values in case
1013 * and one of the established -Exxxx (-EIO, for instance) error values in 1008 * something went wrong. Noone should call bi_end_io() directly on a
1014 * case something went wrong. Noone should call bi_end_io() directly on 1009 * bio unless they own it and thus know that it has an end_io
1015 * a bio unless they own it and thus know that it has an end_io function. 1010 * function.
1016 **/ 1011 **/
1017void bio_endio(struct bio *bio, unsigned int bytes_done, int error) 1012void bio_endio(struct bio *bio, int error)
1018{ 1013{
1019 if (error) 1014 if (error)
1020 clear_bit(BIO_UPTODATE, &bio->bi_flags); 1015 clear_bit(BIO_UPTODATE, &bio->bi_flags);
1021 1016 else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
1022 if (unlikely(bytes_done > bio->bi_size)) { 1017 error = -EIO;
1023 printk("%s: want %u bytes done, only %u left\n", __FUNCTION__,
1024 bytes_done, bio->bi_size);
1025 bytes_done = bio->bi_size;
1026 }
1027
1028 bio->bi_size -= bytes_done;
1029 bio->bi_sector += (bytes_done >> 9);
1030 1018
1031 if (bio->bi_end_io) 1019 if (bio->bi_end_io)
1032 bio->bi_end_io(bio, bytes_done, error); 1020 bio->bi_end_io(bio, error);
1033} 1021}
1034 1022
1035void bio_pair_release(struct bio_pair *bp) 1023void bio_pair_release(struct bio_pair *bp)
@@ -1037,37 +1025,29 @@ void bio_pair_release(struct bio_pair *bp)
1037 if (atomic_dec_and_test(&bp->cnt)) { 1025 if (atomic_dec_and_test(&bp->cnt)) {
1038 struct bio *master = bp->bio1.bi_private; 1026 struct bio *master = bp->bio1.bi_private;
1039 1027
1040 bio_endio(master, master->bi_size, bp->error); 1028 bio_endio(master, bp->error);
1041 mempool_free(bp, bp->bio2.bi_private); 1029 mempool_free(bp, bp->bio2.bi_private);
1042 } 1030 }
1043} 1031}
1044 1032
1045static int bio_pair_end_1(struct bio * bi, unsigned int done, int err) 1033static void bio_pair_end_1(struct bio *bi, int err)
1046{ 1034{
1047 struct bio_pair *bp = container_of(bi, struct bio_pair, bio1); 1035 struct bio_pair *bp = container_of(bi, struct bio_pair, bio1);
1048 1036
1049 if (err) 1037 if (err)
1050 bp->error = err; 1038 bp->error = err;
1051 1039
1052 if (bi->bi_size)
1053 return 1;
1054
1055 bio_pair_release(bp); 1040 bio_pair_release(bp);
1056 return 0;
1057} 1041}
1058 1042
1059static int bio_pair_end_2(struct bio * bi, unsigned int done, int err) 1043static void bio_pair_end_2(struct bio *bi, int err)
1060{ 1044{
1061 struct bio_pair *bp = container_of(bi, struct bio_pair, bio2); 1045 struct bio_pair *bp = container_of(bi, struct bio_pair, bio2);
1062 1046
1063 if (err) 1047 if (err)
1064 bp->error = err; 1048 bp->error = err;
1065 1049
1066 if (bi->bi_size)
1067 return 1;
1068
1069 bio_pair_release(bp); 1050 bio_pair_release(bp);
1070 return 0;
1071} 1051}
1072 1052
1073/* 1053/*
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 2980eabe5779..6339a30879b7 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -172,7 +172,7 @@ blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
172} 172}
173 173
174#if 0 174#if 0
175static int blk_end_aio(struct bio *bio, unsigned int bytes_done, int error) 175static void blk_end_aio(struct bio *bio, int error)
176{ 176{
177 struct kiocb *iocb = bio->bi_private; 177 struct kiocb *iocb = bio->bi_private;
178 atomic_t *bio_count = &iocb->ki_bio_count; 178 atomic_t *bio_count = &iocb->ki_bio_count;
diff --git a/fs/buffer.c b/fs/buffer.c
index 0e5ec371ce72..75b51dfa5e03 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2634,13 +2634,10 @@ sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
2634 return tmp.b_blocknr; 2634 return tmp.b_blocknr;
2635} 2635}
2636 2636
2637static int end_bio_bh_io_sync(struct bio *bio, unsigned int bytes_done, int err) 2637static void end_bio_bh_io_sync(struct bio *bio, int err)
2638{ 2638{
2639 struct buffer_head *bh = bio->bi_private; 2639 struct buffer_head *bh = bio->bi_private;
2640 2640
2641 if (bio->bi_size)
2642 return 1;
2643
2644 if (err == -EOPNOTSUPP) { 2641 if (err == -EOPNOTSUPP) {
2645 set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); 2642 set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
2646 set_bit(BH_Eopnotsupp, &bh->b_state); 2643 set_bit(BH_Eopnotsupp, &bh->b_state);
@@ -2648,7 +2645,6 @@ static int end_bio_bh_io_sync(struct bio *bio, unsigned int bytes_done, int err)
2648 2645
2649 bh->b_end_io(bh, test_bit(BIO_UPTODATE, &bio->bi_flags)); 2646 bh->b_end_io(bh, test_bit(BIO_UPTODATE, &bio->bi_flags));
2650 bio_put(bio); 2647 bio_put(bio);
2651 return 0;
2652} 2648}
2653 2649
2654int submit_bh(int rw, struct buffer_head * bh) 2650int submit_bh(int rw, struct buffer_head * bh)
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 37310b0e8107..9c3fd07f35e0 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -21,7 +21,6 @@
21#include <linux/if.h> 21#include <linux/if.h>
22#include <linux/if_bridge.h> 22#include <linux/if_bridge.h>
23#include <linux/slab.h> 23#include <linux/slab.h>
24#include <linux/hdreg.h>
25#include <linux/raid/md.h> 24#include <linux/raid/md.h>
26#include <linux/kd.h> 25#include <linux/kd.h>
27#include <linux/dirent.h> 26#include <linux/dirent.h>
@@ -33,12 +32,10 @@
33#include <linux/vt.h> 32#include <linux/vt.h>
34#include <linux/fs.h> 33#include <linux/fs.h>
35#include <linux/file.h> 34#include <linux/file.h>
36#include <linux/fd.h>
37#include <linux/ppp_defs.h> 35#include <linux/ppp_defs.h>
38#include <linux/if_ppp.h> 36#include <linux/if_ppp.h>
39#include <linux/if_pppox.h> 37#include <linux/if_pppox.h>
40#include <linux/mtio.h> 38#include <linux/mtio.h>
41#include <linux/cdrom.h>
42#include <linux/auto_fs.h> 39#include <linux/auto_fs.h>
43#include <linux/auto_fs4.h> 40#include <linux/auto_fs4.h>
44#include <linux/tty.h> 41#include <linux/tty.h>
@@ -48,7 +45,6 @@
48#include <linux/netdevice.h> 45#include <linux/netdevice.h>
49#include <linux/raw.h> 46#include <linux/raw.h>
50#include <linux/smb_fs.h> 47#include <linux/smb_fs.h>
51#include <linux/blkpg.h>
52#include <linux/blkdev.h> 48#include <linux/blkdev.h>
53#include <linux/elevator.h> 49#include <linux/elevator.h>
54#include <linux/rtc.h> 50#include <linux/rtc.h>
@@ -62,7 +58,6 @@
62#include <linux/i2c-dev.h> 58#include <linux/i2c-dev.h>
63#include <linux/wireless.h> 59#include <linux/wireless.h>
64#include <linux/atalk.h> 60#include <linux/atalk.h>
65#include <linux/blktrace_api.h>
66#include <linux/loop.h> 61#include <linux/loop.h>
67 62
68#include <net/bluetooth/bluetooth.h> 63#include <net/bluetooth/bluetooth.h>
@@ -324,22 +319,21 @@ struct ifconf32 {
324 319
325static int dev_ifname32(unsigned int fd, unsigned int cmd, unsigned long arg) 320static int dev_ifname32(unsigned int fd, unsigned int cmd, unsigned long arg)
326{ 321{
327 struct net_device *dev; 322 struct ifreq __user *uifr;
328 struct ifreq32 ifr32;
329 int err; 323 int err;
330 324
331 if (copy_from_user(&ifr32, compat_ptr(arg), sizeof(ifr32))) 325 uifr = compat_alloc_user_space(sizeof(struct ifreq));
326 if (copy_in_user(uifr, compat_ptr(arg), sizeof(struct ifreq32)));
332 return -EFAULT; 327 return -EFAULT;
333 328
334 dev = dev_get_by_index(ifr32.ifr_ifindex); 329 err = sys_ioctl(fd, SIOCGIFNAME, (unsigned long)uifr);
335 if (!dev) 330 if (err)
336 return -ENODEV; 331 return err;
332
333 if (copy_in_user(compat_ptr(arg), uifr, sizeof(struct ifreq32)))
334 return -EFAULT;
337 335
338 strlcpy(ifr32.ifr_name, dev->name, sizeof(ifr32.ifr_name)); 336 return 0;
339 dev_put(dev);
340
341 err = copy_to_user(compat_ptr(arg), &ifr32, sizeof(ifr32));
342 return (err ? -EFAULT : 0);
343} 337}
344 338
345static int dev_ifconf(unsigned int fd, unsigned int cmd, unsigned long arg) 339static int dev_ifconf(unsigned int fd, unsigned int cmd, unsigned long arg)
@@ -668,53 +662,6 @@ out:
668#endif 662#endif
669 663
670#ifdef CONFIG_BLOCK 664#ifdef CONFIG_BLOCK
671struct hd_geometry32 {
672 unsigned char heads;
673 unsigned char sectors;
674 unsigned short cylinders;
675 u32 start;
676};
677
678static int hdio_getgeo(unsigned int fd, unsigned int cmd, unsigned long arg)
679{
680 mm_segment_t old_fs = get_fs();
681 struct hd_geometry geo;
682 struct hd_geometry32 __user *ugeo;
683 int err;
684
685 set_fs (KERNEL_DS);
686 err = sys_ioctl(fd, HDIO_GETGEO, (unsigned long)&geo);
687 set_fs (old_fs);
688 ugeo = compat_ptr(arg);
689 if (!err) {
690 err = copy_to_user (ugeo, &geo, 4);
691 err |= __put_user (geo.start, &ugeo->start);
692 if (err)
693 err = -EFAULT;
694 }
695 return err;
696}
697
698static int hdio_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
699{
700 mm_segment_t old_fs = get_fs();
701 unsigned long kval;
702 unsigned int __user *uvp;
703 int error;
704
705 set_fs(KERNEL_DS);
706 error = sys_ioctl(fd, cmd, (long)&kval);
707 set_fs(old_fs);
708
709 if(error == 0) {
710 uvp = compat_ptr(arg);
711 if(put_user(kval, uvp))
712 error = -EFAULT;
713 }
714 return error;
715}
716
717
718typedef struct sg_io_hdr32 { 665typedef struct sg_io_hdr32 {
719 compat_int_t interface_id; /* [i] 'S' for SCSI generic (required) */ 666 compat_int_t interface_id; /* [i] 'S' for SCSI generic (required) */
720 compat_int_t dxfer_direction; /* [i] data transfer direction */ 667 compat_int_t dxfer_direction; /* [i] data transfer direction */
@@ -1089,108 +1036,6 @@ static int mt_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
1089 return err ? -EFAULT: 0; 1036 return err ? -EFAULT: 0;
1090} 1037}
1091 1038
1092struct cdrom_read_audio32 {
1093 union cdrom_addr addr;
1094 u8 addr_format;
1095 compat_int_t nframes;
1096 compat_caddr_t buf;
1097};
1098
1099struct cdrom_generic_command32 {
1100 unsigned char cmd[CDROM_PACKET_SIZE];
1101 compat_caddr_t buffer;
1102 compat_uint_t buflen;
1103 compat_int_t stat;
1104 compat_caddr_t sense;
1105 unsigned char data_direction;
1106 compat_int_t quiet;
1107 compat_int_t timeout;
1108 compat_caddr_t reserved[1];
1109};
1110
1111static int cdrom_do_read_audio(unsigned int fd, unsigned int cmd, unsigned long arg)
1112{
1113 struct cdrom_read_audio __user *cdread_audio;
1114 struct cdrom_read_audio32 __user *cdread_audio32;
1115 __u32 data;
1116 void __user *datap;
1117
1118 cdread_audio = compat_alloc_user_space(sizeof(*cdread_audio));
1119 cdread_audio32 = compat_ptr(arg);
1120
1121 if (copy_in_user(&cdread_audio->addr,
1122 &cdread_audio32->addr,
1123 (sizeof(*cdread_audio32) -
1124 sizeof(compat_caddr_t))))
1125 return -EFAULT;
1126
1127 if (get_user(data, &cdread_audio32->buf))
1128 return -EFAULT;
1129 datap = compat_ptr(data);
1130 if (put_user(datap, &cdread_audio->buf))
1131 return -EFAULT;
1132
1133 return sys_ioctl(fd, cmd, (unsigned long) cdread_audio);
1134}
1135
1136static int cdrom_do_generic_command(unsigned int fd, unsigned int cmd, unsigned long arg)
1137{
1138 struct cdrom_generic_command __user *cgc;
1139 struct cdrom_generic_command32 __user *cgc32;
1140 u32 data;
1141 unsigned char dir;
1142 int itmp;
1143
1144 cgc = compat_alloc_user_space(sizeof(*cgc));
1145 cgc32 = compat_ptr(arg);
1146
1147 if (copy_in_user(&cgc->cmd, &cgc32->cmd, sizeof(cgc->cmd)) ||
1148 get_user(data, &cgc32->buffer) ||
1149 put_user(compat_ptr(data), &cgc->buffer) ||
1150 copy_in_user(&cgc->buflen, &cgc32->buflen,
1151 (sizeof(unsigned int) + sizeof(int))) ||
1152 get_user(data, &cgc32->sense) ||
1153 put_user(compat_ptr(data), &cgc->sense) ||
1154 get_user(dir, &cgc32->data_direction) ||
1155 put_user(dir, &cgc->data_direction) ||
1156 get_user(itmp, &cgc32->quiet) ||
1157 put_user(itmp, &cgc->quiet) ||
1158 get_user(itmp, &cgc32->timeout) ||
1159 put_user(itmp, &cgc->timeout) ||
1160 get_user(data, &cgc32->reserved[0]) ||
1161 put_user(compat_ptr(data), &cgc->reserved[0]))
1162 return -EFAULT;
1163
1164 return sys_ioctl(fd, cmd, (unsigned long) cgc);
1165}
1166
1167static int cdrom_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
1168{
1169 int err;
1170
1171 switch(cmd) {
1172 case CDROMREADAUDIO:
1173 err = cdrom_do_read_audio(fd, cmd, arg);
1174 break;
1175
1176 case CDROM_SEND_PACKET:
1177 err = cdrom_do_generic_command(fd, cmd, arg);
1178 break;
1179
1180 default:
1181 do {
1182 static int count;
1183 if (++count <= 20)
1184 printk("cdrom_ioctl: Unknown cmd fd(%d) "
1185 "cmd(%08x) arg(%08x)\n",
1186 (int)fd, (unsigned int)cmd, (unsigned int)arg);
1187 } while(0);
1188 err = -EINVAL;
1189 break;
1190 };
1191
1192 return err;
1193}
1194#endif /* CONFIG_BLOCK */ 1039#endif /* CONFIG_BLOCK */
1195 1040
1196#ifdef CONFIG_VT 1041#ifdef CONFIG_VT
@@ -1536,71 +1381,11 @@ ret_einval(unsigned int fd, unsigned int cmd, unsigned long arg)
1536 return -EINVAL; 1381 return -EINVAL;
1537} 1382}
1538 1383
1539#ifdef CONFIG_BLOCK
1540static int broken_blkgetsize(unsigned int fd, unsigned int cmd, unsigned long arg)
1541{
1542 /* The mkswap binary hard codes it to Intel value :-((( */
1543 return w_long(fd, BLKGETSIZE, arg);
1544}
1545
1546struct blkpg_ioctl_arg32 {
1547 compat_int_t op;
1548 compat_int_t flags;
1549 compat_int_t datalen;
1550 compat_caddr_t data;
1551};
1552
1553static int blkpg_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
1554{
1555 struct blkpg_ioctl_arg32 __user *ua32 = compat_ptr(arg);
1556 struct blkpg_ioctl_arg __user *a = compat_alloc_user_space(sizeof(*a));
1557 compat_caddr_t udata;
1558 compat_int_t n;
1559 int err;
1560
1561 err = get_user(n, &ua32->op);
1562 err |= put_user(n, &a->op);
1563 err |= get_user(n, &ua32->flags);
1564 err |= put_user(n, &a->flags);
1565 err |= get_user(n, &ua32->datalen);
1566 err |= put_user(n, &a->datalen);
1567 err |= get_user(udata, &ua32->data);
1568 err |= put_user(compat_ptr(udata), &a->data);
1569 if (err)
1570 return err;
1571
1572 return sys_ioctl(fd, cmd, (unsigned long)a);
1573}
1574#endif
1575
1576static int ioc_settimeout(unsigned int fd, unsigned int cmd, unsigned long arg) 1384static int ioc_settimeout(unsigned int fd, unsigned int cmd, unsigned long arg)
1577{ 1385{
1578 return rw_long(fd, AUTOFS_IOC_SETTIMEOUT, arg); 1386 return rw_long(fd, AUTOFS_IOC_SETTIMEOUT, arg);
1579} 1387}
1580 1388
1581#ifdef CONFIG_BLOCK
1582/* Fix sizeof(sizeof()) breakage */
1583#define BLKBSZGET_32 _IOR(0x12,112,int)
1584#define BLKBSZSET_32 _IOW(0x12,113,int)
1585#define BLKGETSIZE64_32 _IOR(0x12,114,int)
1586
1587static int do_blkbszget(unsigned int fd, unsigned int cmd, unsigned long arg)
1588{
1589 return sys_ioctl(fd, BLKBSZGET, (unsigned long)compat_ptr(arg));
1590}
1591
1592static int do_blkbszset(unsigned int fd, unsigned int cmd, unsigned long arg)
1593{
1594 return sys_ioctl(fd, BLKBSZSET, (unsigned long)compat_ptr(arg));
1595}
1596
1597static int do_blkgetsize64(unsigned int fd, unsigned int cmd,
1598 unsigned long arg)
1599{
1600 return sys_ioctl(fd, BLKGETSIZE64, (unsigned long)compat_ptr(arg));
1601}
1602#endif
1603
1604/* Bluetooth ioctls */ 1389/* Bluetooth ioctls */
1605#define HCIUARTSETPROTO _IOW('U', 200, int) 1390#define HCIUARTSETPROTO _IOW('U', 200, int)
1606#define HCIUARTGETPROTO _IOR('U', 201, int) 1391#define HCIUARTGETPROTO _IOR('U', 201, int)
@@ -1620,333 +1405,6 @@ static int do_blkgetsize64(unsigned int fd, unsigned int cmd,
1620#define HIDPGETCONNLIST _IOR('H', 210, int) 1405#define HIDPGETCONNLIST _IOR('H', 210, int)
1621#define HIDPGETCONNINFO _IOR('H', 211, int) 1406#define HIDPGETCONNINFO _IOR('H', 211, int)
1622 1407
1623#ifdef CONFIG_BLOCK
1624struct floppy_struct32 {
1625 compat_uint_t size;
1626 compat_uint_t sect;
1627 compat_uint_t head;
1628 compat_uint_t track;
1629 compat_uint_t stretch;
1630 unsigned char gap;
1631 unsigned char rate;
1632 unsigned char spec1;
1633 unsigned char fmt_gap;
1634 const compat_caddr_t name;
1635};
1636
1637struct floppy_drive_params32 {
1638 char cmos;
1639 compat_ulong_t max_dtr;
1640 compat_ulong_t hlt;
1641 compat_ulong_t hut;
1642 compat_ulong_t srt;
1643 compat_ulong_t spinup;
1644 compat_ulong_t spindown;
1645 unsigned char spindown_offset;
1646 unsigned char select_delay;
1647 unsigned char rps;
1648 unsigned char tracks;
1649 compat_ulong_t timeout;
1650 unsigned char interleave_sect;
1651 struct floppy_max_errors max_errors;
1652 char flags;
1653 char read_track;
1654 short autodetect[8];
1655 compat_int_t checkfreq;
1656 compat_int_t native_format;
1657};
1658
1659struct floppy_drive_struct32 {
1660 signed char flags;
1661 compat_ulong_t spinup_date;
1662 compat_ulong_t select_date;
1663 compat_ulong_t first_read_date;
1664 short probed_format;
1665 short track;
1666 short maxblock;
1667 short maxtrack;
1668 compat_int_t generation;
1669 compat_int_t keep_data;
1670 compat_int_t fd_ref;
1671 compat_int_t fd_device;
1672 compat_int_t last_checked;
1673 compat_caddr_t dmabuf;
1674 compat_int_t bufblocks;
1675};
1676
1677struct floppy_fdc_state32 {
1678 compat_int_t spec1;
1679 compat_int_t spec2;
1680 compat_int_t dtr;
1681 unsigned char version;
1682 unsigned char dor;
1683 compat_ulong_t address;
1684 unsigned int rawcmd:2;
1685 unsigned int reset:1;
1686 unsigned int need_configure:1;
1687 unsigned int perp_mode:2;
1688 unsigned int has_fifo:1;
1689 unsigned int driver_version;
1690 unsigned char track[4];
1691};
1692
1693struct floppy_write_errors32 {
1694 unsigned int write_errors;
1695 compat_ulong_t first_error_sector;
1696 compat_int_t first_error_generation;
1697 compat_ulong_t last_error_sector;
1698 compat_int_t last_error_generation;
1699 compat_uint_t badness;
1700};
1701
1702#define FDSETPRM32 _IOW(2, 0x42, struct floppy_struct32)
1703#define FDDEFPRM32 _IOW(2, 0x43, struct floppy_struct32)
1704#define FDGETPRM32 _IOR(2, 0x04, struct floppy_struct32)
1705#define FDSETDRVPRM32 _IOW(2, 0x90, struct floppy_drive_params32)
1706#define FDGETDRVPRM32 _IOR(2, 0x11, struct floppy_drive_params32)
1707#define FDGETDRVSTAT32 _IOR(2, 0x12, struct floppy_drive_struct32)
1708#define FDPOLLDRVSTAT32 _IOR(2, 0x13, struct floppy_drive_struct32)
1709#define FDGETFDCSTAT32 _IOR(2, 0x15, struct floppy_fdc_state32)
1710#define FDWERRORGET32 _IOR(2, 0x17, struct floppy_write_errors32)
1711
1712static struct {
1713 unsigned int cmd32;
1714 unsigned int cmd;
1715} fd_ioctl_trans_table[] = {
1716 { FDSETPRM32, FDSETPRM },
1717 { FDDEFPRM32, FDDEFPRM },
1718 { FDGETPRM32, FDGETPRM },
1719 { FDSETDRVPRM32, FDSETDRVPRM },
1720 { FDGETDRVPRM32, FDGETDRVPRM },
1721 { FDGETDRVSTAT32, FDGETDRVSTAT },
1722 { FDPOLLDRVSTAT32, FDPOLLDRVSTAT },
1723 { FDGETFDCSTAT32, FDGETFDCSTAT },
1724 { FDWERRORGET32, FDWERRORGET }
1725};
1726
1727#define NR_FD_IOCTL_TRANS ARRAY_SIZE(fd_ioctl_trans_table)
1728
1729static int fd_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
1730{
1731 mm_segment_t old_fs = get_fs();
1732 void *karg = NULL;
1733 unsigned int kcmd = 0;
1734 int i, err;
1735
1736 for (i = 0; i < NR_FD_IOCTL_TRANS; i++)
1737 if (cmd == fd_ioctl_trans_table[i].cmd32) {
1738 kcmd = fd_ioctl_trans_table[i].cmd;
1739 break;
1740 }
1741 if (!kcmd)
1742 return -EINVAL;
1743
1744 switch (cmd) {
1745 case FDSETPRM32:
1746 case FDDEFPRM32:
1747 case FDGETPRM32:
1748 {
1749 compat_uptr_t name;
1750 struct floppy_struct32 __user *uf;
1751 struct floppy_struct *f;
1752
1753 uf = compat_ptr(arg);
1754 f = karg = kmalloc(sizeof(struct floppy_struct), GFP_KERNEL);
1755 if (!karg)
1756 return -ENOMEM;
1757 if (cmd == FDGETPRM32)
1758 break;
1759 err = __get_user(f->size, &uf->size);
1760 err |= __get_user(f->sect, &uf->sect);
1761 err |= __get_user(f->head, &uf->head);
1762 err |= __get_user(f->track, &uf->track);
1763 err |= __get_user(f->stretch, &uf->stretch);
1764 err |= __get_user(f->gap, &uf->gap);
1765 err |= __get_user(f->rate, &uf->rate);
1766 err |= __get_user(f->spec1, &uf->spec1);
1767 err |= __get_user(f->fmt_gap, &uf->fmt_gap);
1768 err |= __get_user(name, &uf->name);
1769 f->name = compat_ptr(name);
1770 if (err) {
1771 err = -EFAULT;
1772 goto out;
1773 }
1774 break;
1775 }
1776 case FDSETDRVPRM32:
1777 case FDGETDRVPRM32:
1778 {
1779 struct floppy_drive_params32 __user *uf;
1780 struct floppy_drive_params *f;
1781
1782 uf = compat_ptr(arg);
1783 f = karg = kmalloc(sizeof(struct floppy_drive_params), GFP_KERNEL);
1784 if (!karg)
1785 return -ENOMEM;
1786 if (cmd == FDGETDRVPRM32)
1787 break;
1788 err = __get_user(f->cmos, &uf->cmos);
1789 err |= __get_user(f->max_dtr, &uf->max_dtr);
1790 err |= __get_user(f->hlt, &uf->hlt);
1791 err |= __get_user(f->hut, &uf->hut);
1792 err |= __get_user(f->srt, &uf->srt);
1793 err |= __get_user(f->spinup, &uf->spinup);
1794 err |= __get_user(f->spindown, &uf->spindown);
1795 err |= __get_user(f->spindown_offset, &uf->spindown_offset);
1796 err |= __get_user(f->select_delay, &uf->select_delay);
1797 err |= __get_user(f->rps, &uf->rps);
1798 err |= __get_user(f->tracks, &uf->tracks);
1799 err |= __get_user(f->timeout, &uf->timeout);
1800 err |= __get_user(f->interleave_sect, &uf->interleave_sect);
1801 err |= __copy_from_user(&f->max_errors, &uf->max_errors, sizeof(f->max_errors));
1802 err |= __get_user(f->flags, &uf->flags);
1803 err |= __get_user(f->read_track, &uf->read_track);
1804 err |= __copy_from_user(f->autodetect, uf->autodetect, sizeof(f->autodetect));
1805 err |= __get_user(f->checkfreq, &uf->checkfreq);
1806 err |= __get_user(f->native_format, &uf->native_format);
1807 if (err) {
1808 err = -EFAULT;
1809 goto out;
1810 }
1811 break;
1812 }
1813 case FDGETDRVSTAT32:
1814 case FDPOLLDRVSTAT32:
1815 karg = kmalloc(sizeof(struct floppy_drive_struct), GFP_KERNEL);
1816 if (!karg)
1817 return -ENOMEM;
1818 break;
1819 case FDGETFDCSTAT32:
1820 karg = kmalloc(sizeof(struct floppy_fdc_state), GFP_KERNEL);
1821 if (!karg)
1822 return -ENOMEM;
1823 break;
1824 case FDWERRORGET32:
1825 karg = kmalloc(sizeof(struct floppy_write_errors), GFP_KERNEL);
1826 if (!karg)
1827 return -ENOMEM;
1828 break;
1829 default:
1830 return -EINVAL;
1831 }
1832 set_fs (KERNEL_DS);
1833 err = sys_ioctl (fd, kcmd, (unsigned long)karg);
1834 set_fs (old_fs);
1835 if (err)
1836 goto out;
1837 switch (cmd) {
1838 case FDGETPRM32:
1839 {
1840 struct floppy_struct *f = karg;
1841 struct floppy_struct32 __user *uf = compat_ptr(arg);
1842
1843 err = __put_user(f->size, &uf->size);
1844 err |= __put_user(f->sect, &uf->sect);
1845 err |= __put_user(f->head, &uf->head);
1846 err |= __put_user(f->track, &uf->track);
1847 err |= __put_user(f->stretch, &uf->stretch);
1848 err |= __put_user(f->gap, &uf->gap);
1849 err |= __put_user(f->rate, &uf->rate);
1850 err |= __put_user(f->spec1, &uf->spec1);
1851 err |= __put_user(f->fmt_gap, &uf->fmt_gap);
1852 err |= __put_user((u64)f->name, (compat_caddr_t __user *)&uf->name);
1853 break;
1854 }
1855 case FDGETDRVPRM32:
1856 {
1857 struct floppy_drive_params32 __user *uf;
1858 struct floppy_drive_params *f = karg;
1859
1860 uf = compat_ptr(arg);
1861 err = __put_user(f->cmos, &uf->cmos);
1862 err |= __put_user(f->max_dtr, &uf->max_dtr);
1863 err |= __put_user(f->hlt, &uf->hlt);
1864 err |= __put_user(f->hut, &uf->hut);
1865 err |= __put_user(f->srt, &uf->srt);
1866 err |= __put_user(f->spinup, &uf->spinup);
1867 err |= __put_user(f->spindown, &uf->spindown);
1868 err |= __put_user(f->spindown_offset, &uf->spindown_offset);
1869 err |= __put_user(f->select_delay, &uf->select_delay);
1870 err |= __put_user(f->rps, &uf->rps);
1871 err |= __put_user(f->tracks, &uf->tracks);
1872 err |= __put_user(f->timeout, &uf->timeout);
1873 err |= __put_user(f->interleave_sect, &uf->interleave_sect);
1874 err |= __copy_to_user(&uf->max_errors, &f->max_errors, sizeof(f->max_errors));
1875 err |= __put_user(f->flags, &uf->flags);
1876 err |= __put_user(f->read_track, &uf->read_track);
1877 err |= __copy_to_user(uf->autodetect, f->autodetect, sizeof(f->autodetect));
1878 err |= __put_user(f->checkfreq, &uf->checkfreq);
1879 err |= __put_user(f->native_format, &uf->native_format);
1880 break;
1881 }
1882 case FDGETDRVSTAT32:
1883 case FDPOLLDRVSTAT32:
1884 {
1885 struct floppy_drive_struct32 __user *uf;
1886 struct floppy_drive_struct *f = karg;
1887
1888 uf = compat_ptr(arg);
1889 err = __put_user(f->flags, &uf->flags);
1890 err |= __put_user(f->spinup_date, &uf->spinup_date);
1891 err |= __put_user(f->select_date, &uf->select_date);
1892 err |= __put_user(f->first_read_date, &uf->first_read_date);
1893 err |= __put_user(f->probed_format, &uf->probed_format);
1894 err |= __put_user(f->track, &uf->track);
1895 err |= __put_user(f->maxblock, &uf->maxblock);
1896 err |= __put_user(f->maxtrack, &uf->maxtrack);
1897 err |= __put_user(f->generation, &uf->generation);
1898 err |= __put_user(f->keep_data, &uf->keep_data);
1899 err |= __put_user(f->fd_ref, &uf->fd_ref);
1900 err |= __put_user(f->fd_device, &uf->fd_device);
1901 err |= __put_user(f->last_checked, &uf->last_checked);
1902 err |= __put_user((u64)f->dmabuf, &uf->dmabuf);
1903 err |= __put_user((u64)f->bufblocks, &uf->bufblocks);
1904 break;
1905 }
1906 case FDGETFDCSTAT32:
1907 {
1908 struct floppy_fdc_state32 __user *uf;
1909 struct floppy_fdc_state *f = karg;
1910
1911 uf = compat_ptr(arg);
1912 err = __put_user(f->spec1, &uf->spec1);
1913 err |= __put_user(f->spec2, &uf->spec2);
1914 err |= __put_user(f->dtr, &uf->dtr);
1915 err |= __put_user(f->version, &uf->version);
1916 err |= __put_user(f->dor, &uf->dor);
1917 err |= __put_user(f->address, &uf->address);
1918 err |= __copy_to_user((char __user *)&uf->address + sizeof(uf->address),
1919 (char *)&f->address + sizeof(f->address), sizeof(int));
1920 err |= __put_user(f->driver_version, &uf->driver_version);
1921 err |= __copy_to_user(uf->track, f->track, sizeof(f->track));
1922 break;
1923 }
1924 case FDWERRORGET32:
1925 {
1926 struct floppy_write_errors32 __user *uf;
1927 struct floppy_write_errors *f = karg;
1928
1929 uf = compat_ptr(arg);
1930 err = __put_user(f->write_errors, &uf->write_errors);
1931 err |= __put_user(f->first_error_sector, &uf->first_error_sector);
1932 err |= __put_user(f->first_error_generation, &uf->first_error_generation);
1933 err |= __put_user(f->last_error_sector, &uf->last_error_sector);
1934 err |= __put_user(f->last_error_generation, &uf->last_error_generation);
1935 err |= __put_user(f->badness, &uf->badness);
1936 break;
1937 }
1938 default:
1939 break;
1940 }
1941 if (err)
1942 err = -EFAULT;
1943
1944out:
1945 kfree(karg);
1946 return err;
1947}
1948#endif
1949
1950struct mtd_oob_buf32 { 1408struct mtd_oob_buf32 {
1951 u_int32_t start; 1409 u_int32_t start;
1952 u_int32_t length; 1410 u_int32_t length;
@@ -2506,60 +1964,6 @@ COMPATIBLE_IOCTL(FIONREAD) /* This is also TIOCINQ */
2506/* 0x00 */ 1964/* 0x00 */
2507COMPATIBLE_IOCTL(FIBMAP) 1965COMPATIBLE_IOCTL(FIBMAP)
2508COMPATIBLE_IOCTL(FIGETBSZ) 1966COMPATIBLE_IOCTL(FIGETBSZ)
2509/* 0x03 -- HD/IDE ioctl's used by hdparm and friends.
2510 * Some need translations, these do not.
2511 */
2512COMPATIBLE_IOCTL(HDIO_GET_IDENTITY)
2513COMPATIBLE_IOCTL(HDIO_DRIVE_TASK)
2514COMPATIBLE_IOCTL(HDIO_DRIVE_CMD)
2515ULONG_IOCTL(HDIO_SET_MULTCOUNT)
2516ULONG_IOCTL(HDIO_SET_UNMASKINTR)
2517ULONG_IOCTL(HDIO_SET_KEEPSETTINGS)
2518ULONG_IOCTL(HDIO_SET_32BIT)
2519ULONG_IOCTL(HDIO_SET_NOWERR)
2520ULONG_IOCTL(HDIO_SET_DMA)
2521ULONG_IOCTL(HDIO_SET_PIO_MODE)
2522ULONG_IOCTL(HDIO_SET_NICE)
2523ULONG_IOCTL(HDIO_SET_WCACHE)
2524ULONG_IOCTL(HDIO_SET_ACOUSTIC)
2525ULONG_IOCTL(HDIO_SET_BUSSTATE)
2526ULONG_IOCTL(HDIO_SET_ADDRESS)
2527COMPATIBLE_IOCTL(HDIO_SCAN_HWIF)
2528/* 0x330 is reserved -- it used to be HDIO_GETGEO_BIG */
2529COMPATIBLE_IOCTL(0x330)
2530/* 0x02 -- Floppy ioctls */
2531COMPATIBLE_IOCTL(FDMSGON)
2532COMPATIBLE_IOCTL(FDMSGOFF)
2533COMPATIBLE_IOCTL(FDSETEMSGTRESH)
2534COMPATIBLE_IOCTL(FDFLUSH)
2535COMPATIBLE_IOCTL(FDWERRORCLR)
2536COMPATIBLE_IOCTL(FDSETMAXERRS)
2537COMPATIBLE_IOCTL(FDGETMAXERRS)
2538COMPATIBLE_IOCTL(FDGETDRVTYP)
2539COMPATIBLE_IOCTL(FDEJECT)
2540COMPATIBLE_IOCTL(FDCLRPRM)
2541COMPATIBLE_IOCTL(FDFMTBEG)
2542COMPATIBLE_IOCTL(FDFMTEND)
2543COMPATIBLE_IOCTL(FDRESET)
2544COMPATIBLE_IOCTL(FDTWADDLE)
2545COMPATIBLE_IOCTL(FDFMTTRK)
2546COMPATIBLE_IOCTL(FDRAWCMD)
2547/* 0x12 */
2548#ifdef CONFIG_BLOCK
2549COMPATIBLE_IOCTL(BLKRASET)
2550COMPATIBLE_IOCTL(BLKROSET)
2551COMPATIBLE_IOCTL(BLKROGET)
2552COMPATIBLE_IOCTL(BLKRRPART)
2553COMPATIBLE_IOCTL(BLKFLSBUF)
2554COMPATIBLE_IOCTL(BLKSECTSET)
2555COMPATIBLE_IOCTL(BLKSSZGET)
2556COMPATIBLE_IOCTL(BLKTRACESTART)
2557COMPATIBLE_IOCTL(BLKTRACESTOP)
2558COMPATIBLE_IOCTL(BLKTRACESETUP)
2559COMPATIBLE_IOCTL(BLKTRACETEARDOWN)
2560ULONG_IOCTL(BLKRASET)
2561ULONG_IOCTL(BLKFRASET)
2562#endif
2563/* RAID */ 1967/* RAID */
2564COMPATIBLE_IOCTL(RAID_VERSION) 1968COMPATIBLE_IOCTL(RAID_VERSION)
2565COMPATIBLE_IOCTL(GET_ARRAY_INFO) 1969COMPATIBLE_IOCTL(GET_ARRAY_INFO)
@@ -2807,50 +2211,6 @@ COMPATIBLE_IOCTL(PPGETMODE)
2807COMPATIBLE_IOCTL(PPGETPHASE) 2211COMPATIBLE_IOCTL(PPGETPHASE)
2808COMPATIBLE_IOCTL(PPGETFLAGS) 2212COMPATIBLE_IOCTL(PPGETFLAGS)
2809COMPATIBLE_IOCTL(PPSETFLAGS) 2213COMPATIBLE_IOCTL(PPSETFLAGS)
2810/* CDROM stuff */
2811COMPATIBLE_IOCTL(CDROMPAUSE)
2812COMPATIBLE_IOCTL(CDROMRESUME)
2813COMPATIBLE_IOCTL(CDROMPLAYMSF)
2814COMPATIBLE_IOCTL(CDROMPLAYTRKIND)
2815COMPATIBLE_IOCTL(CDROMREADTOCHDR)
2816COMPATIBLE_IOCTL(CDROMREADTOCENTRY)
2817COMPATIBLE_IOCTL(CDROMSTOP)
2818COMPATIBLE_IOCTL(CDROMSTART)
2819COMPATIBLE_IOCTL(CDROMEJECT)
2820COMPATIBLE_IOCTL(CDROMVOLCTRL)
2821COMPATIBLE_IOCTL(CDROMSUBCHNL)
2822ULONG_IOCTL(CDROMEJECT_SW)
2823COMPATIBLE_IOCTL(CDROMMULTISESSION)
2824COMPATIBLE_IOCTL(CDROM_GET_MCN)
2825COMPATIBLE_IOCTL(CDROMRESET)
2826COMPATIBLE_IOCTL(CDROMVOLREAD)
2827COMPATIBLE_IOCTL(CDROMSEEK)
2828COMPATIBLE_IOCTL(CDROMPLAYBLK)
2829COMPATIBLE_IOCTL(CDROMCLOSETRAY)
2830ULONG_IOCTL(CDROM_SET_OPTIONS)
2831ULONG_IOCTL(CDROM_CLEAR_OPTIONS)
2832ULONG_IOCTL(CDROM_SELECT_SPEED)
2833ULONG_IOCTL(CDROM_SELECT_DISC)
2834ULONG_IOCTL(CDROM_MEDIA_CHANGED)
2835ULONG_IOCTL(CDROM_DRIVE_STATUS)
2836COMPATIBLE_IOCTL(CDROM_DISC_STATUS)
2837COMPATIBLE_IOCTL(CDROM_CHANGER_NSLOTS)
2838ULONG_IOCTL(CDROM_LOCKDOOR)
2839ULONG_IOCTL(CDROM_DEBUG)
2840COMPATIBLE_IOCTL(CDROM_GET_CAPABILITY)
2841/* Ignore cdrom.h about these next 5 ioctls, they absolutely do
2842 * not take a struct cdrom_read, instead they take a struct cdrom_msf
2843 * which is compatible.
2844 */
2845COMPATIBLE_IOCTL(CDROMREADMODE2)
2846COMPATIBLE_IOCTL(CDROMREADMODE1)
2847COMPATIBLE_IOCTL(CDROMREADRAW)
2848COMPATIBLE_IOCTL(CDROMREADCOOKED)
2849COMPATIBLE_IOCTL(CDROMREADALL)
2850/* DVD ioctls */
2851COMPATIBLE_IOCTL(DVD_READ_STRUCT)
2852COMPATIBLE_IOCTL(DVD_WRITE_STRUCT)
2853COMPATIBLE_IOCTL(DVD_AUTH)
2854/* pktcdvd */ 2214/* pktcdvd */
2855COMPATIBLE_IOCTL(PACKET_CTRL_CMD) 2215COMPATIBLE_IOCTL(PACKET_CTRL_CMD)
2856/* Big A */ 2216/* Big A */
@@ -3336,33 +2696,6 @@ HANDLE_IOCTL(SIOCGSTAMP, do_siocgstamp)
3336HANDLE_IOCTL(SIOCGSTAMPNS, do_siocgstampns) 2696HANDLE_IOCTL(SIOCGSTAMPNS, do_siocgstampns)
3337#endif 2697#endif
3338#ifdef CONFIG_BLOCK 2698#ifdef CONFIG_BLOCK
3339HANDLE_IOCTL(HDIO_GETGEO, hdio_getgeo)
3340HANDLE_IOCTL(BLKRAGET, w_long)
3341HANDLE_IOCTL(BLKGETSIZE, w_long)
3342HANDLE_IOCTL(0x1260, broken_blkgetsize)
3343HANDLE_IOCTL(BLKFRAGET, w_long)
3344HANDLE_IOCTL(BLKSECTGET, w_long)
3345HANDLE_IOCTL(BLKPG, blkpg_ioctl_trans)
3346HANDLE_IOCTL(HDIO_GET_UNMASKINTR, hdio_ioctl_trans)
3347HANDLE_IOCTL(HDIO_GET_MULTCOUNT, hdio_ioctl_trans)
3348HANDLE_IOCTL(HDIO_GET_KEEPSETTINGS, hdio_ioctl_trans)
3349HANDLE_IOCTL(HDIO_GET_32BIT, hdio_ioctl_trans)
3350HANDLE_IOCTL(HDIO_GET_NOWERR, hdio_ioctl_trans)
3351HANDLE_IOCTL(HDIO_GET_DMA, hdio_ioctl_trans)
3352HANDLE_IOCTL(HDIO_GET_NICE, hdio_ioctl_trans)
3353HANDLE_IOCTL(HDIO_GET_WCACHE, hdio_ioctl_trans)
3354HANDLE_IOCTL(HDIO_GET_ACOUSTIC, hdio_ioctl_trans)
3355HANDLE_IOCTL(HDIO_GET_ADDRESS, hdio_ioctl_trans)
3356HANDLE_IOCTL(HDIO_GET_BUSSTATE, hdio_ioctl_trans)
3357HANDLE_IOCTL(FDSETPRM32, fd_ioctl_trans)
3358HANDLE_IOCTL(FDDEFPRM32, fd_ioctl_trans)
3359HANDLE_IOCTL(FDGETPRM32, fd_ioctl_trans)
3360HANDLE_IOCTL(FDSETDRVPRM32, fd_ioctl_trans)
3361HANDLE_IOCTL(FDGETDRVPRM32, fd_ioctl_trans)
3362HANDLE_IOCTL(FDGETDRVSTAT32, fd_ioctl_trans)
3363HANDLE_IOCTL(FDPOLLDRVSTAT32, fd_ioctl_trans)
3364HANDLE_IOCTL(FDGETFDCSTAT32, fd_ioctl_trans)
3365HANDLE_IOCTL(FDWERRORGET32, fd_ioctl_trans)
3366HANDLE_IOCTL(SG_IO,sg_ioctl_trans) 2699HANDLE_IOCTL(SG_IO,sg_ioctl_trans)
3367HANDLE_IOCTL(SG_GET_REQUEST_TABLE, sg_grt_trans) 2700HANDLE_IOCTL(SG_GET_REQUEST_TABLE, sg_grt_trans)
3368#endif 2701#endif
@@ -3373,8 +2706,6 @@ HANDLE_IOCTL(PPPIOCSACTIVE32, ppp_sock_fprog_ioctl_trans)
3373#ifdef CONFIG_BLOCK 2706#ifdef CONFIG_BLOCK
3374HANDLE_IOCTL(MTIOCGET32, mt_ioctl_trans) 2707HANDLE_IOCTL(MTIOCGET32, mt_ioctl_trans)
3375HANDLE_IOCTL(MTIOCPOS32, mt_ioctl_trans) 2708HANDLE_IOCTL(MTIOCPOS32, mt_ioctl_trans)
3376HANDLE_IOCTL(CDROMREADAUDIO, cdrom_ioctl_trans)
3377HANDLE_IOCTL(CDROM_SEND_PACKET, cdrom_ioctl_trans)
3378#endif 2709#endif
3379#define AUTOFS_IOC_SETTIMEOUT32 _IOWR(0x93,0x64,unsigned int) 2710#define AUTOFS_IOC_SETTIMEOUT32 _IOWR(0x93,0x64,unsigned int)
3380HANDLE_IOCTL(AUTOFS_IOC_SETTIMEOUT32, ioc_settimeout) 2711HANDLE_IOCTL(AUTOFS_IOC_SETTIMEOUT32, ioc_settimeout)
@@ -3415,9 +2746,6 @@ HANDLE_IOCTL(SONET_GETFRAMING, do_atm_ioctl)
3415HANDLE_IOCTL(SONET_GETFRSENSE, do_atm_ioctl) 2746HANDLE_IOCTL(SONET_GETFRSENSE, do_atm_ioctl)
3416/* block stuff */ 2747/* block stuff */
3417#ifdef CONFIG_BLOCK 2748#ifdef CONFIG_BLOCK
3418HANDLE_IOCTL(BLKBSZGET_32, do_blkbszget)
3419HANDLE_IOCTL(BLKBSZSET_32, do_blkbszset)
3420HANDLE_IOCTL(BLKGETSIZE64_32, do_blkgetsize64)
3421/* Raw devices */ 2749/* Raw devices */
3422HANDLE_IOCTL(RAW_SETBIND, raw_ioctl) 2750HANDLE_IOCTL(RAW_SETBIND, raw_ioctl)
3423HANDLE_IOCTL(RAW_GETBIND, raw_ioctl) 2751HANDLE_IOCTL(RAW_GETBIND, raw_ioctl)
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 2e124e0075c5..a9b99c0dc2e7 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -221,6 +221,42 @@ struct dentry *debugfs_create_u64(const char *name, mode_t mode,
221} 221}
222EXPORT_SYMBOL_GPL(debugfs_create_u64); 222EXPORT_SYMBOL_GPL(debugfs_create_u64);
223 223
224DEFINE_SIMPLE_ATTRIBUTE(fops_x8, debugfs_u8_get, debugfs_u8_set, "0x%02llx\n");
225
226DEFINE_SIMPLE_ATTRIBUTE(fops_x16, debugfs_u16_get, debugfs_u16_set, "0x%04llx\n");
227
228DEFINE_SIMPLE_ATTRIBUTE(fops_x32, debugfs_u32_get, debugfs_u32_set, "0x%08llx\n");
229
230/**
231 * debugfs_create_x8 - create a debugfs file that is used to read and write an unsigned 8-bit value
232 * debugfs_create_x16 - create a debugfs file that is used to read and write an unsigned 16-bit value
233 * debugfs_create_x32 - create a debugfs file that is used to read and write an unsigned 32-bit value
234 *
235 * These functions are exactly the same as the above functions, (but use a hex
236 * output for the decimal challenged) for details look at the above unsigned
237 * decimal functions.
238 */
239struct dentry *debugfs_create_x8(const char *name, mode_t mode,
240 struct dentry *parent, u8 *value)
241{
242 return debugfs_create_file(name, mode, parent, value, &fops_x8);
243}
244EXPORT_SYMBOL_GPL(debugfs_create_x8);
245
246struct dentry *debugfs_create_x16(const char *name, mode_t mode,
247 struct dentry *parent, u16 *value)
248{
249 return debugfs_create_file(name, mode, parent, value, &fops_x16);
250}
251EXPORT_SYMBOL_GPL(debugfs_create_x16);
252
253struct dentry *debugfs_create_x32(const char *name, mode_t mode,
254 struct dentry *parent, u32 *value)
255{
256 return debugfs_create_file(name, mode, parent, value, &fops_x32);
257}
258EXPORT_SYMBOL_GPL(debugfs_create_x32);
259
224static ssize_t read_file_bool(struct file *file, char __user *user_buf, 260static ssize_t read_file_bool(struct file *file, char __user *user_buf,
225 size_t count, loff_t *ppos) 261 size_t count, loff_t *ppos)
226{ 262{
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 901dc55e9f54..b5928a7b6a5a 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -264,15 +264,12 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio);
264/* 264/*
265 * Asynchronous IO callback. 265 * Asynchronous IO callback.
266 */ 266 */
267static int dio_bio_end_aio(struct bio *bio, unsigned int bytes_done, int error) 267static void dio_bio_end_aio(struct bio *bio, int error)
268{ 268{
269 struct dio *dio = bio->bi_private; 269 struct dio *dio = bio->bi_private;
270 unsigned long remaining; 270 unsigned long remaining;
271 unsigned long flags; 271 unsigned long flags;
272 272
273 if (bio->bi_size)
274 return 1;
275
276 /* cleanup the bio */ 273 /* cleanup the bio */
277 dio_bio_complete(dio, bio); 274 dio_bio_complete(dio, bio);
278 275
@@ -287,8 +284,6 @@ static int dio_bio_end_aio(struct bio *bio, unsigned int bytes_done, int error)
287 aio_complete(dio->iocb, ret, 0); 284 aio_complete(dio->iocb, ret, 0);
288 kfree(dio); 285 kfree(dio);
289 } 286 }
290
291 return 0;
292} 287}
293 288
294/* 289/*
@@ -298,21 +293,17 @@ static int dio_bio_end_aio(struct bio *bio, unsigned int bytes_done, int error)
298 * During I/O bi_private points at the dio. After I/O, bi_private is used to 293 * During I/O bi_private points at the dio. After I/O, bi_private is used to
299 * implement a singly-linked list of completed BIOs, at dio->bio_list. 294 * implement a singly-linked list of completed BIOs, at dio->bio_list.
300 */ 295 */
301static int dio_bio_end_io(struct bio *bio, unsigned int bytes_done, int error) 296static void dio_bio_end_io(struct bio *bio, int error)
302{ 297{
303 struct dio *dio = bio->bi_private; 298 struct dio *dio = bio->bi_private;
304 unsigned long flags; 299 unsigned long flags;
305 300
306 if (bio->bi_size)
307 return 1;
308
309 spin_lock_irqsave(&dio->bio_lock, flags); 301 spin_lock_irqsave(&dio->bio_lock, flags);
310 bio->bi_private = dio->bio_list; 302 bio->bi_private = dio->bio_list;
311 dio->bio_list = bio; 303 dio->bio_list = bio;
312 if (--dio->refcount == 1 && dio->waiter) 304 if (--dio->refcount == 1 && dio->waiter)
313 wake_up_process(dio->waiter); 305 wake_up_process(dio->waiter);
314 spin_unlock_irqrestore(&dio->bio_lock, flags); 306 spin_unlock_irqrestore(&dio->bio_lock, flags);
315 return 0;
316} 307}
317 308
318static int 309static int
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 74901e981e10..d2fc2384c3be 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -491,6 +491,7 @@ struct dlm_ls {
491 uint64_t ls_recover_seq; 491 uint64_t ls_recover_seq;
492 struct dlm_recover *ls_recover_args; 492 struct dlm_recover *ls_recover_args;
493 struct rw_semaphore ls_in_recovery; /* block local requests */ 493 struct rw_semaphore ls_in_recovery; /* block local requests */
494 struct rw_semaphore ls_recv_active; /* block dlm_recv */
494 struct list_head ls_requestqueue;/* queue remote requests */ 495 struct list_head ls_requestqueue;/* queue remote requests */
495 struct mutex ls_requestqueue_mutex; 496 struct mutex ls_requestqueue_mutex;
496 char *ls_recover_buf; 497 char *ls_recover_buf;
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 2082daf083d8..3915b8e14146 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -3638,55 +3638,8 @@ static void receive_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms)
3638 dlm_put_lkb(lkb); 3638 dlm_put_lkb(lkb);
3639} 3639}
3640 3640
3641int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery) 3641static void _receive_message(struct dlm_ls *ls, struct dlm_message *ms)
3642{ 3642{
3643 struct dlm_message *ms = (struct dlm_message *) hd;
3644 struct dlm_ls *ls;
3645 int error = 0;
3646
3647 if (!recovery)
3648 dlm_message_in(ms);
3649
3650 ls = dlm_find_lockspace_global(hd->h_lockspace);
3651 if (!ls) {
3652 log_print("drop message %d from %d for unknown lockspace %d",
3653 ms->m_type, nodeid, hd->h_lockspace);
3654 return -EINVAL;
3655 }
3656
3657 /* recovery may have just ended leaving a bunch of backed-up requests
3658 in the requestqueue; wait while dlm_recoverd clears them */
3659
3660 if (!recovery)
3661 dlm_wait_requestqueue(ls);
3662
3663 /* recovery may have just started while there were a bunch of
3664 in-flight requests -- save them in requestqueue to be processed
3665 after recovery. we can't let dlm_recvd block on the recovery
3666 lock. if dlm_recoverd is calling this function to clear the
3667 requestqueue, it needs to be interrupted (-EINTR) if another
3668 recovery operation is starting. */
3669
3670 while (1) {
3671 if (dlm_locking_stopped(ls)) {
3672 if (recovery) {
3673 error = -EINTR;
3674 goto out;
3675 }
3676 error = dlm_add_requestqueue(ls, nodeid, hd);
3677 if (error == -EAGAIN)
3678 continue;
3679 else {
3680 error = -EINTR;
3681 goto out;
3682 }
3683 }
3684
3685 if (dlm_lock_recovery_try(ls))
3686 break;
3687 schedule();
3688 }
3689
3690 switch (ms->m_type) { 3643 switch (ms->m_type) {
3691 3644
3692 /* messages sent to a master node */ 3645 /* messages sent to a master node */
@@ -3761,17 +3714,90 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery)
3761 log_error(ls, "unknown message type %d", ms->m_type); 3714 log_error(ls, "unknown message type %d", ms->m_type);
3762 } 3715 }
3763 3716
3764 dlm_unlock_recovery(ls);
3765 out:
3766 dlm_put_lockspace(ls);
3767 dlm_astd_wake(); 3717 dlm_astd_wake();
3768 return error;
3769} 3718}
3770 3719
3720/* If the lockspace is in recovery mode (locking stopped), then normal
3721 messages are saved on the requestqueue for processing after recovery is
3722 done. When not in recovery mode, we wait for dlm_recoverd to drain saved
3723 messages off the requestqueue before we process new ones. This occurs right
3724 after recovery completes when we transition from saving all messages on
3725 requestqueue, to processing all the saved messages, to processing new
3726 messages as they arrive. */
3771 3727
3772/* 3728static void dlm_receive_message(struct dlm_ls *ls, struct dlm_message *ms,
3773 * Recovery related 3729 int nodeid)
3774 */ 3730{
3731 if (dlm_locking_stopped(ls)) {
3732 dlm_add_requestqueue(ls, nodeid, (struct dlm_header *) ms);
3733 } else {
3734 dlm_wait_requestqueue(ls);
3735 _receive_message(ls, ms);
3736 }
3737}
3738
3739/* This is called by dlm_recoverd to process messages that were saved on
3740 the requestqueue. */
3741
3742void dlm_receive_message_saved(struct dlm_ls *ls, struct dlm_message *ms)
3743{
3744 _receive_message(ls, ms);
3745}
3746
3747/* This is called by the midcomms layer when something is received for
3748 the lockspace. It could be either a MSG (normal message sent as part of
3749 standard locking activity) or an RCOM (recovery message sent as part of
3750 lockspace recovery). */
3751
3752void dlm_receive_buffer(struct dlm_header *hd, int nodeid)
3753{
3754 struct dlm_message *ms = (struct dlm_message *) hd;
3755 struct dlm_rcom *rc = (struct dlm_rcom *) hd;
3756 struct dlm_ls *ls;
3757 int type = 0;
3758
3759 switch (hd->h_cmd) {
3760 case DLM_MSG:
3761 dlm_message_in(ms);
3762 type = ms->m_type;
3763 break;
3764 case DLM_RCOM:
3765 dlm_rcom_in(rc);
3766 type = rc->rc_type;
3767 break;
3768 default:
3769 log_print("invalid h_cmd %d from %u", hd->h_cmd, nodeid);
3770 return;
3771 }
3772
3773 if (hd->h_nodeid != nodeid) {
3774 log_print("invalid h_nodeid %d from %d lockspace %x",
3775 hd->h_nodeid, nodeid, hd->h_lockspace);
3776 return;
3777 }
3778
3779 ls = dlm_find_lockspace_global(hd->h_lockspace);
3780 if (!ls) {
3781 log_print("invalid h_lockspace %x from %d cmd %d type %d",
3782 hd->h_lockspace, nodeid, hd->h_cmd, type);
3783
3784 if (hd->h_cmd == DLM_RCOM && type == DLM_RCOM_STATUS)
3785 dlm_send_ls_not_ready(nodeid, rc);
3786 return;
3787 }
3788
3789 /* this rwsem allows dlm_ls_stop() to wait for all dlm_recv threads to
3790 be inactive (in this ls) before transitioning to recovery mode */
3791
3792 down_read(&ls->ls_recv_active);
3793 if (hd->h_cmd == DLM_MSG)
3794 dlm_receive_message(ls, ms, nodeid);
3795 else
3796 dlm_receive_rcom(ls, rc, nodeid);
3797 up_read(&ls->ls_recv_active);
3798
3799 dlm_put_lockspace(ls);
3800}
3775 3801
3776static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb) 3802static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb)
3777{ 3803{
@@ -4429,7 +4455,8 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
4429 4455
4430 if (lvb_in && ua->lksb.sb_lvbptr) 4456 if (lvb_in && ua->lksb.sb_lvbptr)
4431 memcpy(ua->lksb.sb_lvbptr, lvb_in, DLM_USER_LVB_LEN); 4457 memcpy(ua->lksb.sb_lvbptr, lvb_in, DLM_USER_LVB_LEN);
4432 ua->castparam = ua_tmp->castparam; 4458 if (ua_tmp->castparam)
4459 ua->castparam = ua_tmp->castparam;
4433 ua->user_lksb = ua_tmp->user_lksb; 4460 ua->user_lksb = ua_tmp->user_lksb;
4434 4461
4435 error = set_unlock_args(flags, ua, &args); 4462 error = set_unlock_args(flags, ua, &args);
@@ -4474,7 +4501,8 @@ int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
4474 goto out; 4501 goto out;
4475 4502
4476 ua = (struct dlm_user_args *)lkb->lkb_astparam; 4503 ua = (struct dlm_user_args *)lkb->lkb_astparam;
4477 ua->castparam = ua_tmp->castparam; 4504 if (ua_tmp->castparam)
4505 ua->castparam = ua_tmp->castparam;
4478 ua->user_lksb = ua_tmp->user_lksb; 4506 ua->user_lksb = ua_tmp->user_lksb;
4479 4507
4480 error = set_unlock_args(flags, ua, &args); 4508 error = set_unlock_args(flags, ua, &args);
diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h
index 1720313c22df..ada04680a1e5 100644
--- a/fs/dlm/lock.h
+++ b/fs/dlm/lock.h
@@ -16,7 +16,8 @@
16void dlm_print_rsb(struct dlm_rsb *r); 16void dlm_print_rsb(struct dlm_rsb *r);
17void dlm_dump_rsb(struct dlm_rsb *r); 17void dlm_dump_rsb(struct dlm_rsb *r);
18void dlm_print_lkb(struct dlm_lkb *lkb); 18void dlm_print_lkb(struct dlm_lkb *lkb);
19int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery); 19void dlm_receive_message_saved(struct dlm_ls *ls, struct dlm_message *ms);
20void dlm_receive_buffer(struct dlm_header *hd, int nodeid);
20int dlm_modes_compat(int mode1, int mode2); 21int dlm_modes_compat(int mode1, int mode2);
21int dlm_find_rsb(struct dlm_ls *ls, char *name, int namelen, 22int dlm_find_rsb(struct dlm_ls *ls, char *name, int namelen,
22 unsigned int flags, struct dlm_rsb **r_ret); 23 unsigned int flags, struct dlm_rsb **r_ret);
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index 1dc72105ab12..6353a8384520 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -167,7 +167,6 @@ static struct kobj_type dlm_ktype = {
167}; 167};
168 168
169static struct kset dlm_kset = { 169static struct kset dlm_kset = {
170 .kobj = {.name = "dlm",},
171 .ktype = &dlm_ktype, 170 .ktype = &dlm_ktype,
172}; 171};
173 172
@@ -228,6 +227,7 @@ int dlm_lockspace_init(void)
228 INIT_LIST_HEAD(&lslist); 227 INIT_LIST_HEAD(&lslist);
229 spin_lock_init(&lslist_lock); 228 spin_lock_init(&lslist_lock);
230 229
230 kobject_set_name(&dlm_kset.kobj, "dlm");
231 kobj_set_kset_s(&dlm_kset, kernel_subsys); 231 kobj_set_kset_s(&dlm_kset, kernel_subsys);
232 error = kset_register(&dlm_kset); 232 error = kset_register(&dlm_kset);
233 if (error) 233 if (error)
@@ -519,6 +519,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
519 ls->ls_recover_seq = 0; 519 ls->ls_recover_seq = 0;
520 ls->ls_recover_args = NULL; 520 ls->ls_recover_args = NULL;
521 init_rwsem(&ls->ls_in_recovery); 521 init_rwsem(&ls->ls_in_recovery);
522 init_rwsem(&ls->ls_recv_active);
522 INIT_LIST_HEAD(&ls->ls_requestqueue); 523 INIT_LIST_HEAD(&ls->ls_requestqueue);
523 mutex_init(&ls->ls_requestqueue_mutex); 524 mutex_init(&ls->ls_requestqueue_mutex);
524 mutex_init(&ls->ls_clear_proc_locks); 525 mutex_init(&ls->ls_clear_proc_locks);
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 9e9d2e82f40f..58bf3f5cdbe2 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -334,18 +334,8 @@ static void close_connection(struct connection *con, bool and_other)
334 con->rx_page = NULL; 334 con->rx_page = NULL;
335 } 335 }
336 336
337 /* If we are an 'othercon' then NULL the pointer to us 337 con->retries = 0;
338 from the parent and tidy ourself up */ 338 mutex_unlock(&con->sock_mutex);
339 if (test_bit(CF_IS_OTHERCON, &con->flags)) {
340 struct connection *parent = __nodeid2con(con->nodeid, 0);
341 parent->othercon = NULL;
342 kmem_cache_free(con_cache, con);
343 }
344 else {
345 /* Parent connections get reused */
346 con->retries = 0;
347 mutex_unlock(&con->sock_mutex);
348 }
349} 339}
350 340
351/* We only send shutdown messages to nodes that are not part of the cluster */ 341/* We only send shutdown messages to nodes that are not part of the cluster */
@@ -731,6 +721,8 @@ static int tcp_accept_from_sock(struct connection *con)
731 INIT_WORK(&othercon->swork, process_send_sockets); 721 INIT_WORK(&othercon->swork, process_send_sockets);
732 INIT_WORK(&othercon->rwork, process_recv_sockets); 722 INIT_WORK(&othercon->rwork, process_recv_sockets);
733 set_bit(CF_IS_OTHERCON, &othercon->flags); 723 set_bit(CF_IS_OTHERCON, &othercon->flags);
724 }
725 if (!othercon->sock) {
734 newcon->othercon = othercon; 726 newcon->othercon = othercon;
735 othercon->sock = newsock; 727 othercon->sock = newsock;
736 newsock->sk->sk_user_data = othercon; 728 newsock->sk->sk_user_data = othercon;
@@ -1272,14 +1264,15 @@ static void send_to_sock(struct connection *con)
1272 if (len) { 1264 if (len) {
1273 ret = sendpage(con->sock, e->page, offset, len, 1265 ret = sendpage(con->sock, e->page, offset, len,
1274 msg_flags); 1266 msg_flags);
1275 if (ret == -EAGAIN || ret == 0) 1267 if (ret == -EAGAIN || ret == 0) {
1268 cond_resched();
1276 goto out; 1269 goto out;
1270 }
1277 if (ret <= 0) 1271 if (ret <= 0)
1278 goto send_error; 1272 goto send_error;
1279 } else { 1273 }
1280 /* Don't starve people filling buffers */ 1274 /* Don't starve people filling buffers */
1281 cond_resched(); 1275 cond_resched();
1282 }
1283 1276
1284 spin_lock(&con->writequeue_lock); 1277 spin_lock(&con->writequeue_lock);
1285 e->offset += ret; 1278 e->offset += ret;
diff --git a/fs/dlm/member.c b/fs/dlm/member.c
index d09977528f69..e9cdcab306e2 100644
--- a/fs/dlm/member.c
+++ b/fs/dlm/member.c
@@ -18,10 +18,6 @@
18#include "rcom.h" 18#include "rcom.h"
19#include "config.h" 19#include "config.h"
20 20
21/*
22 * Following called by dlm_recoverd thread
23 */
24
25static void add_ordered_member(struct dlm_ls *ls, struct dlm_member *new) 21static void add_ordered_member(struct dlm_ls *ls, struct dlm_member *new)
26{ 22{
27 struct dlm_member *memb = NULL; 23 struct dlm_member *memb = NULL;
@@ -250,18 +246,30 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
250 return error; 246 return error;
251} 247}
252 248
253/* 249/* Userspace guarantees that dlm_ls_stop() has completed on all nodes before
254 * Following called from lockspace.c 250 dlm_ls_start() is called on any of them to start the new recovery. */
255 */
256 251
257int dlm_ls_stop(struct dlm_ls *ls) 252int dlm_ls_stop(struct dlm_ls *ls)
258{ 253{
259 int new; 254 int new;
260 255
261 /* 256 /*
262 * A stop cancels any recovery that's in progress (see RECOVERY_STOP, 257 * Prevent dlm_recv from being in the middle of something when we do
263 * dlm_recovery_stopped()) and prevents any new locks from being 258 * the stop. This includes ensuring dlm_recv isn't processing a
264 * processed (see RUNNING, dlm_locking_stopped()). 259 * recovery message (rcom), while dlm_recoverd is aborting and
260 * resetting things from an in-progress recovery. i.e. we want
261 * dlm_recoverd to abort its recovery without worrying about dlm_recv
262 * processing an rcom at the same time. Stopping dlm_recv also makes
263 * it easy for dlm_receive_message() to check locking stopped and add a
264 * message to the requestqueue without races.
265 */
266
267 down_write(&ls->ls_recv_active);
268
269 /*
270 * Abort any recovery that's in progress (see RECOVERY_STOP,
271 * dlm_recovery_stopped()) and tell any other threads running in the
272 * dlm to quit any processing (see RUNNING, dlm_locking_stopped()).
265 */ 273 */
266 274
267 spin_lock(&ls->ls_recover_lock); 275 spin_lock(&ls->ls_recover_lock);
@@ -271,8 +279,14 @@ int dlm_ls_stop(struct dlm_ls *ls)
271 spin_unlock(&ls->ls_recover_lock); 279 spin_unlock(&ls->ls_recover_lock);
272 280
273 /* 281 /*
282 * Let dlm_recv run again, now any normal messages will be saved on the
283 * requestqueue for later.
284 */
285
286 up_write(&ls->ls_recv_active);
287
288 /*
274 * This in_recovery lock does two things: 289 * This in_recovery lock does two things:
275 *
276 * 1) Keeps this function from returning until all threads are out 290 * 1) Keeps this function from returning until all threads are out
277 * of locking routines and locking is truely stopped. 291 * of locking routines and locking is truely stopped.
278 * 2) Keeps any new requests from being processed until it's unlocked 292 * 2) Keeps any new requests from being processed until it's unlocked
@@ -284,9 +298,8 @@ int dlm_ls_stop(struct dlm_ls *ls)
284 298
285 /* 299 /*
286 * The recoverd suspend/resume makes sure that dlm_recoverd (if 300 * The recoverd suspend/resume makes sure that dlm_recoverd (if
287 * running) has noticed the clearing of RUNNING above and quit 301 * running) has noticed RECOVERY_STOP above and quit processing the
288 * processing the previous recovery. This will be true for all nodes 302 * previous recovery.
289 * before any nodes start the new recovery.
290 */ 303 */
291 304
292 dlm_recoverd_suspend(ls); 305 dlm_recoverd_suspend(ls);
diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c
index a5126e0c68a6..f8c69dda16a0 100644
--- a/fs/dlm/midcomms.c
+++ b/fs/dlm/midcomms.c
@@ -2,7 +2,7 @@
2******************************************************************************* 2*******************************************************************************
3** 3**
4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
5** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. 5** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
6** 6**
7** This copyrighted material is made available to anyone wishing to use, 7** This copyrighted material is made available to anyone wishing to use,
8** modify, copy, or redistribute it subject to the terms and conditions 8** modify, copy, or redistribute it subject to the terms and conditions
@@ -27,7 +27,6 @@
27#include "dlm_internal.h" 27#include "dlm_internal.h"
28#include "lowcomms.h" 28#include "lowcomms.h"
29#include "config.h" 29#include "config.h"
30#include "rcom.h"
31#include "lock.h" 30#include "lock.h"
32#include "midcomms.h" 31#include "midcomms.h"
33 32
@@ -117,19 +116,7 @@ int dlm_process_incoming_buffer(int nodeid, const void *base,
117 offset &= (limit - 1); 116 offset &= (limit - 1);
118 len -= msglen; 117 len -= msglen;
119 118
120 switch (msg->h_cmd) { 119 dlm_receive_buffer(msg, nodeid);
121 case DLM_MSG:
122 dlm_receive_message(msg, nodeid, 0);
123 break;
124
125 case DLM_RCOM:
126 dlm_receive_rcom(msg, nodeid);
127 break;
128
129 default:
130 log_print("unknown msg type %x from %u: %u %u %u %u",
131 msg->h_cmd, nodeid, msglen, len, offset, ret);
132 }
133 } 120 }
134 121
135 if (msg != (struct dlm_header *) __tmp) 122 if (msg != (struct dlm_header *) __tmp)
diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c
index 188b91c027e4..ae2fd97fa4ad 100644
--- a/fs/dlm/rcom.c
+++ b/fs/dlm/rcom.c
@@ -2,7 +2,7 @@
2******************************************************************************* 2*******************************************************************************
3** 3**
4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
5** Copyright (C) 2005 Red Hat, Inc. All rights reserved. 5** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved.
6** 6**
7** This copyrighted material is made available to anyone wishing to use, 7** This copyrighted material is made available to anyone wishing to use,
8** modify, copy, or redistribute it subject to the terms and conditions 8** modify, copy, or redistribute it subject to the terms and conditions
@@ -386,7 +386,10 @@ static void receive_rcom_lock_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
386 dlm_recover_process_copy(ls, rc_in); 386 dlm_recover_process_copy(ls, rc_in);
387} 387}
388 388
389static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in) 389/* If the lockspace doesn't exist then still send a status message
390 back; it's possible that it just doesn't have its global_id yet. */
391
392int dlm_send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
390{ 393{
391 struct dlm_rcom *rc; 394 struct dlm_rcom *rc;
392 struct rcom_config *rf; 395 struct rcom_config *rf;
@@ -446,28 +449,11 @@ static int is_old_reply(struct dlm_ls *ls, struct dlm_rcom *rc)
446 return rv; 449 return rv;
447} 450}
448 451
449/* Called by dlm_recvd; corresponds to dlm_receive_message() but special 452/* Called by dlm_recv; corresponds to dlm_receive_message() but special
450 recovery-only comms are sent through here. */ 453 recovery-only comms are sent through here. */
451 454
452void dlm_receive_rcom(struct dlm_header *hd, int nodeid) 455void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
453{ 456{
454 struct dlm_rcom *rc = (struct dlm_rcom *) hd;
455 struct dlm_ls *ls;
456
457 dlm_rcom_in(rc);
458
459 /* If the lockspace doesn't exist then still send a status message
460 back; it's possible that it just doesn't have its global_id yet. */
461
462 ls = dlm_find_lockspace_global(hd->h_lockspace);
463 if (!ls) {
464 log_print("lockspace %x from %d type %x not found",
465 hd->h_lockspace, nodeid, rc->rc_type);
466 if (rc->rc_type == DLM_RCOM_STATUS)
467 send_ls_not_ready(nodeid, rc);
468 return;
469 }
470
471 if (dlm_recovery_stopped(ls) && (rc->rc_type != DLM_RCOM_STATUS)) { 457 if (dlm_recovery_stopped(ls) && (rc->rc_type != DLM_RCOM_STATUS)) {
472 log_debug(ls, "ignoring recovery message %x from %d", 458 log_debug(ls, "ignoring recovery message %x from %d",
473 rc->rc_type, nodeid); 459 rc->rc_type, nodeid);
@@ -477,12 +463,6 @@ void dlm_receive_rcom(struct dlm_header *hd, int nodeid)
477 if (is_old_reply(ls, rc)) 463 if (is_old_reply(ls, rc))
478 goto out; 464 goto out;
479 465
480 if (nodeid != rc->rc_header.h_nodeid) {
481 log_error(ls, "bad rcom nodeid %d from %d",
482 rc->rc_header.h_nodeid, nodeid);
483 goto out;
484 }
485
486 switch (rc->rc_type) { 466 switch (rc->rc_type) {
487 case DLM_RCOM_STATUS: 467 case DLM_RCOM_STATUS:
488 receive_rcom_status(ls, rc); 468 receive_rcom_status(ls, rc);
@@ -520,6 +500,6 @@ void dlm_receive_rcom(struct dlm_header *hd, int nodeid)
520 DLM_ASSERT(0, printk("rc_type=%x\n", rc->rc_type);); 500 DLM_ASSERT(0, printk("rc_type=%x\n", rc->rc_type););
521 } 501 }
522 out: 502 out:
523 dlm_put_lockspace(ls); 503 return;
524} 504}
525 505
diff --git a/fs/dlm/rcom.h b/fs/dlm/rcom.h
index d7984321ff41..b09abd29ba38 100644
--- a/fs/dlm/rcom.h
+++ b/fs/dlm/rcom.h
@@ -2,7 +2,7 @@
2******************************************************************************* 2*******************************************************************************
3** 3**
4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
5** Copyright (C) 2005 Red Hat, Inc. All rights reserved. 5** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved.
6** 6**
7** This copyrighted material is made available to anyone wishing to use, 7** This copyrighted material is made available to anyone wishing to use,
8** modify, copy, or redistribute it subject to the terms and conditions 8** modify, copy, or redistribute it subject to the terms and conditions
@@ -18,7 +18,8 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid);
18int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name,int last_len); 18int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name,int last_len);
19int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid); 19int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid);
20int dlm_send_rcom_lock(struct dlm_rsb *r, struct dlm_lkb *lkb); 20int dlm_send_rcom_lock(struct dlm_rsb *r, struct dlm_lkb *lkb);
21void dlm_receive_rcom(struct dlm_header *hd, int nodeid); 21void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid);
22int dlm_send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in);
22 23
23#endif 24#endif
24 25
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c
index 66575997861c..4b89e20eebe7 100644
--- a/fs/dlm/recoverd.c
+++ b/fs/dlm/recoverd.c
@@ -24,19 +24,28 @@
24 24
25 25
26/* If the start for which we're re-enabling locking (seq) has been superseded 26/* If the start for which we're re-enabling locking (seq) has been superseded
27 by a newer stop (ls_recover_seq), we need to leave locking disabled. */ 27 by a newer stop (ls_recover_seq), we need to leave locking disabled.
28
29 We suspend dlm_recv threads here to avoid the race where dlm_recv a) sees
30 locking stopped and b) adds a message to the requestqueue, but dlm_recoverd
31 enables locking and clears the requestqueue between a and b. */
28 32
29static int enable_locking(struct dlm_ls *ls, uint64_t seq) 33static int enable_locking(struct dlm_ls *ls, uint64_t seq)
30{ 34{
31 int error = -EINTR; 35 int error = -EINTR;
32 36
37 down_write(&ls->ls_recv_active);
38
33 spin_lock(&ls->ls_recover_lock); 39 spin_lock(&ls->ls_recover_lock);
34 if (ls->ls_recover_seq == seq) { 40 if (ls->ls_recover_seq == seq) {
35 set_bit(LSFL_RUNNING, &ls->ls_flags); 41 set_bit(LSFL_RUNNING, &ls->ls_flags);
42 /* unblocks processes waiting to enter the dlm */
36 up_write(&ls->ls_in_recovery); 43 up_write(&ls->ls_in_recovery);
37 error = 0; 44 error = 0;
38 } 45 }
39 spin_unlock(&ls->ls_recover_lock); 46 spin_unlock(&ls->ls_recover_lock);
47
48 up_write(&ls->ls_recv_active);
40 return error; 49 return error;
41} 50}
42 51
diff --git a/fs/dlm/requestqueue.c b/fs/dlm/requestqueue.c
index 65008d79c96d..0de04f17ccea 100644
--- a/fs/dlm/requestqueue.c
+++ b/fs/dlm/requestqueue.c
@@ -1,7 +1,7 @@
1/****************************************************************************** 1/******************************************************************************
2******************************************************************************* 2*******************************************************************************
3** 3**
4** Copyright (C) 2005 Red Hat, Inc. All rights reserved. 4** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved.
5** 5**
6** This copyrighted material is made available to anyone wishing to use, 6** This copyrighted material is made available to anyone wishing to use,
7** modify, copy, or redistribute it subject to the terms and conditions 7** modify, copy, or redistribute it subject to the terms and conditions
@@ -20,7 +20,7 @@
20struct rq_entry { 20struct rq_entry {
21 struct list_head list; 21 struct list_head list;
22 int nodeid; 22 int nodeid;
23 char request[1]; 23 char request[0];
24}; 24};
25 25
26/* 26/*
@@ -30,42 +30,39 @@ struct rq_entry {
30 * lockspace is enabled on some while still suspended on others. 30 * lockspace is enabled on some while still suspended on others.
31 */ 31 */
32 32
33int dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd) 33void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd)
34{ 34{
35 struct rq_entry *e; 35 struct rq_entry *e;
36 int length = hd->h_length; 36 int length = hd->h_length;
37 int rv = 0;
38 37
39 e = kmalloc(sizeof(struct rq_entry) + length, GFP_KERNEL); 38 e = kmalloc(sizeof(struct rq_entry) + length, GFP_KERNEL);
40 if (!e) { 39 if (!e) {
41 log_print("dlm_add_requestqueue: out of memory\n"); 40 log_print("dlm_add_requestqueue: out of memory len %d", length);
42 return 0; 41 return;
43 } 42 }
44 43
45 e->nodeid = nodeid; 44 e->nodeid = nodeid;
46 memcpy(e->request, hd, length); 45 memcpy(e->request, hd, length);
47 46
48 /* We need to check dlm_locking_stopped() after taking the mutex to
49 avoid a race where dlm_recoverd enables locking and runs
50 process_requestqueue between our earlier dlm_locking_stopped check
51 and this addition to the requestqueue. */
52
53 mutex_lock(&ls->ls_requestqueue_mutex); 47 mutex_lock(&ls->ls_requestqueue_mutex);
54 if (dlm_locking_stopped(ls)) 48 list_add_tail(&e->list, &ls->ls_requestqueue);
55 list_add_tail(&e->list, &ls->ls_requestqueue);
56 else {
57 log_debug(ls, "dlm_add_requestqueue skip from %d", nodeid);
58 kfree(e);
59 rv = -EAGAIN;
60 }
61 mutex_unlock(&ls->ls_requestqueue_mutex); 49 mutex_unlock(&ls->ls_requestqueue_mutex);
62 return rv;
63} 50}
64 51
52/*
53 * Called by dlm_recoverd to process normal messages saved while recovery was
54 * happening. Normal locking has been enabled before this is called. dlm_recv
55 * upon receiving a message, will wait for all saved messages to be drained
56 * here before processing the message it got. If a new dlm_ls_stop() arrives
57 * while we're processing these saved messages, it may block trying to suspend
58 * dlm_recv if dlm_recv is waiting for us in dlm_wait_requestqueue. In that
59 * case, we don't abort since locking_stopped is still 0. If dlm_recv is not
60 * waiting for us, then this processing may be aborted due to locking_stopped.
61 */
62
65int dlm_process_requestqueue(struct dlm_ls *ls) 63int dlm_process_requestqueue(struct dlm_ls *ls)
66{ 64{
67 struct rq_entry *e; 65 struct rq_entry *e;
68 struct dlm_header *hd;
69 int error = 0; 66 int error = 0;
70 67
71 mutex_lock(&ls->ls_requestqueue_mutex); 68 mutex_lock(&ls->ls_requestqueue_mutex);
@@ -79,14 +76,7 @@ int dlm_process_requestqueue(struct dlm_ls *ls)
79 e = list_entry(ls->ls_requestqueue.next, struct rq_entry, list); 76 e = list_entry(ls->ls_requestqueue.next, struct rq_entry, list);
80 mutex_unlock(&ls->ls_requestqueue_mutex); 77 mutex_unlock(&ls->ls_requestqueue_mutex);
81 78
82 hd = (struct dlm_header *) e->request; 79 dlm_receive_message_saved(ls, (struct dlm_message *)e->request);
83 error = dlm_receive_message(hd, e->nodeid, 1);
84
85 if (error == -EINTR) {
86 /* entry is left on requestqueue */
87 log_debug(ls, "process_requestqueue abort eintr");
88 break;
89 }
90 80
91 mutex_lock(&ls->ls_requestqueue_mutex); 81 mutex_lock(&ls->ls_requestqueue_mutex);
92 list_del(&e->list); 82 list_del(&e->list);
@@ -106,10 +96,12 @@ int dlm_process_requestqueue(struct dlm_ls *ls)
106 96
107/* 97/*
108 * After recovery is done, locking is resumed and dlm_recoverd takes all the 98 * After recovery is done, locking is resumed and dlm_recoverd takes all the
109 * saved requests and processes them as they would have been by dlm_recvd. At 99 * saved requests and processes them as they would have been by dlm_recv. At
110 * the same time, dlm_recvd will start receiving new requests from remote 100 * the same time, dlm_recv will start receiving new requests from remote nodes.
111 * nodes. We want to delay dlm_recvd processing new requests until 101 * We want to delay dlm_recv processing new requests until dlm_recoverd has
112 * dlm_recoverd has finished processing the old saved requests. 102 * finished processing the old saved requests. We don't check for locking
103 * stopped here because dlm_ls_stop won't stop locking until it's suspended us
104 * (dlm_recv).
113 */ 105 */
114 106
115void dlm_wait_requestqueue(struct dlm_ls *ls) 107void dlm_wait_requestqueue(struct dlm_ls *ls)
@@ -118,8 +110,6 @@ void dlm_wait_requestqueue(struct dlm_ls *ls)
118 mutex_lock(&ls->ls_requestqueue_mutex); 110 mutex_lock(&ls->ls_requestqueue_mutex);
119 if (list_empty(&ls->ls_requestqueue)) 111 if (list_empty(&ls->ls_requestqueue))
120 break; 112 break;
121 if (dlm_locking_stopped(ls))
122 break;
123 mutex_unlock(&ls->ls_requestqueue_mutex); 113 mutex_unlock(&ls->ls_requestqueue_mutex);
124 schedule(); 114 schedule();
125 } 115 }
diff --git a/fs/dlm/requestqueue.h b/fs/dlm/requestqueue.h
index 6a53ea03335d..aba34fc05ee4 100644
--- a/fs/dlm/requestqueue.h
+++ b/fs/dlm/requestqueue.h
@@ -1,7 +1,7 @@
1/****************************************************************************** 1/******************************************************************************
2******************************************************************************* 2*******************************************************************************
3** 3**
4** Copyright (C) 2005 Red Hat, Inc. All rights reserved. 4** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved.
5** 5**
6** This copyrighted material is made available to anyone wishing to use, 6** This copyrighted material is made available to anyone wishing to use,
7** modify, copy, or redistribute it subject to the terms and conditions 7** modify, copy, or redistribute it subject to the terms and conditions
@@ -13,7 +13,7 @@
13#ifndef __REQUESTQUEUE_DOT_H__ 13#ifndef __REQUESTQUEUE_DOT_H__
14#define __REQUESTQUEUE_DOT_H__ 14#define __REQUESTQUEUE_DOT_H__
15 15
16int dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd); 16void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd);
17int dlm_process_requestqueue(struct dlm_ls *ls); 17int dlm_process_requestqueue(struct dlm_ls *ls);
18void dlm_wait_requestqueue(struct dlm_ls *ls); 18void dlm_wait_requestqueue(struct dlm_ls *ls);
19void dlm_purge_requestqueue(struct dlm_ls *ls); 19void dlm_purge_requestqueue(struct dlm_ls *ls);
diff --git a/fs/ecryptfs/netlink.c b/fs/ecryptfs/netlink.c
index fe9186312d7c..9aa345121e09 100644
--- a/fs/ecryptfs/netlink.c
+++ b/fs/ecryptfs/netlink.c
@@ -165,22 +165,10 @@ static int ecryptfs_process_nl_quit(struct sk_buff *skb)
165 * it to its desired netlink context element and wake up the process 165 * it to its desired netlink context element and wake up the process
166 * that is waiting for a response. 166 * that is waiting for a response.
167 */ 167 */
168static void ecryptfs_receive_nl_message(struct sock *sk, int len) 168static void ecryptfs_receive_nl_message(struct sk_buff *skb)
169{ 169{
170 struct sk_buff *skb;
171 struct nlmsghdr *nlh; 170 struct nlmsghdr *nlh;
172 int rc = 0; /* skb_recv_datagram requires this */
173 171
174receive:
175 skb = skb_recv_datagram(sk, 0, 0, &rc);
176 if (rc == -EINTR)
177 goto receive;
178 else if (rc < 0) {
179 ecryptfs_printk(KERN_ERR, "Error occurred while "
180 "receiving eCryptfs netlink message; "
181 "rc = [%d]\n", rc);
182 return;
183 }
184 nlh = nlmsg_hdr(skb); 172 nlh = nlmsg_hdr(skb);
185 if (!NLMSG_OK(nlh, skb->len)) { 173 if (!NLMSG_OK(nlh, skb->len)) {
186 ecryptfs_printk(KERN_ERR, "Received corrupt netlink " 174 ecryptfs_printk(KERN_ERR, "Received corrupt netlink "
@@ -227,7 +215,7 @@ int ecryptfs_init_netlink(void)
227{ 215{
228 int rc; 216 int rc;
229 217
230 ecryptfs_nl_sock = netlink_kernel_create(NETLINK_ECRYPTFS, 0, 218 ecryptfs_nl_sock = netlink_kernel_create(&init_net, NETLINK_ECRYPTFS, 0,
231 ecryptfs_receive_nl_message, 219 ecryptfs_receive_nl_message,
232 NULL, THIS_MODULE); 220 NULL, THIS_MODULE);
233 if (!ecryptfs_nl_sock) { 221 if (!ecryptfs_nl_sock) {
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index a4b142a6a2c7..8d23b0b38717 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -14,6 +14,7 @@
14 */ 14 */
15 15
16#include <linux/kernel.h> 16#include <linux/kernel.h>
17#include <linux/module.h>
17#include <linux/spinlock.h> 18#include <linux/spinlock.h>
18#include <linux/sched.h> 19#include <linux/sched.h>
19#include <linux/fs.h> 20#include <linux/fs.h>
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index cd805a66880d..93fa427bb5f5 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -93,9 +93,10 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
93 map_bh(bh, inode->i_sb, block); 93 map_bh(bh, inode->i_sb, block);
94 94
95 set_buffer_uptodate(bh); 95 set_buffer_uptodate(bh);
96 if (!gfs2_is_jdata(ip))
97 mark_buffer_dirty(bh);
96 if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) 98 if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip))
97 gfs2_trans_add_bh(ip->i_gl, bh, 0); 99 gfs2_trans_add_bh(ip->i_gl, bh, 0);
98 mark_buffer_dirty(bh);
99 100
100 if (release) { 101 if (release) {
101 unlock_page(page); 102 unlock_page(page);
@@ -1085,6 +1086,33 @@ static int do_shrink(struct gfs2_inode *ip, u64 size)
1085 return error; 1086 return error;
1086} 1087}
1087 1088
1089static int do_touch(struct gfs2_inode *ip, u64 size)
1090{
1091 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1092 struct buffer_head *dibh;
1093 int error;
1094
1095 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
1096 if (error)
1097 return error;
1098
1099 down_write(&ip->i_rw_mutex);
1100
1101 error = gfs2_meta_inode_buffer(ip, &dibh);
1102 if (error)
1103 goto do_touch_out;
1104
1105 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
1106 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1107 gfs2_dinode_out(ip, dibh->b_data);
1108 brelse(dibh);
1109
1110do_touch_out:
1111 up_write(&ip->i_rw_mutex);
1112 gfs2_trans_end(sdp);
1113 return error;
1114}
1115
1088/** 1116/**
1089 * gfs2_truncatei - make a file a given size 1117 * gfs2_truncatei - make a file a given size
1090 * @ip: the inode 1118 * @ip: the inode
@@ -1105,8 +1133,11 @@ int gfs2_truncatei(struct gfs2_inode *ip, u64 size)
1105 1133
1106 if (size > ip->i_di.di_size) 1134 if (size > ip->i_di.di_size)
1107 error = do_grow(ip, size); 1135 error = do_grow(ip, size);
1108 else 1136 else if (size < ip->i_di.di_size)
1109 error = do_shrink(ip, size); 1137 error = do_shrink(ip, size);
1138 else
1139 /* update time stamps */
1140 error = do_touch(ip, size);
1110 1141
1111 return error; 1142 return error;
1112} 1143}
diff --git a/fs/gfs2/daemon.c b/fs/gfs2/daemon.c
index 3548d9f31e0d..3731ab0771d5 100644
--- a/fs/gfs2/daemon.c
+++ b/fs/gfs2/daemon.c
@@ -35,30 +35,6 @@
35 The kthread functions used to start these daemons block and flush signals. */ 35 The kthread functions used to start these daemons block and flush signals. */
36 36
37/** 37/**
38 * gfs2_scand - Look for cached glocks and inodes to toss from memory
39 * @sdp: Pointer to GFS2 superblock
40 *
41 * One of these daemons runs, finding candidates to add to sd_reclaim_list.
42 * See gfs2_glockd()
43 */
44
45int gfs2_scand(void *data)
46{
47 struct gfs2_sbd *sdp = data;
48 unsigned long t;
49
50 while (!kthread_should_stop()) {
51 gfs2_scand_internal(sdp);
52 t = gfs2_tune_get(sdp, gt_scand_secs) * HZ;
53 if (freezing(current))
54 refrigerator();
55 schedule_timeout_interruptible(t);
56 }
57
58 return 0;
59}
60
61/**
62 * gfs2_glockd - Reclaim unused glock structures 38 * gfs2_glockd - Reclaim unused glock structures
63 * @sdp: Pointer to GFS2 superblock 39 * @sdp: Pointer to GFS2 superblock
64 * 40 *
diff --git a/fs/gfs2/daemon.h b/fs/gfs2/daemon.h
index 801007120fb2..0de9b3557955 100644
--- a/fs/gfs2/daemon.h
+++ b/fs/gfs2/daemon.h
@@ -10,7 +10,6 @@
10#ifndef __DAEMON_DOT_H__ 10#ifndef __DAEMON_DOT_H__
11#define __DAEMON_DOT_H__ 11#define __DAEMON_DOT_H__
12 12
13int gfs2_scand(void *data);
14int gfs2_glockd(void *data); 13int gfs2_glockd(void *data);
15int gfs2_recoverd(void *data); 14int gfs2_recoverd(void *data);
16int gfs2_logd(void *data); 15int gfs2_logd(void *data);
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 2beb2f401aa2..9949bb746a52 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -1043,6 +1043,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
1043 1043
1044 error = gfs2_meta_inode_buffer(dip, &dibh); 1044 error = gfs2_meta_inode_buffer(dip, &dibh);
1045 if (!gfs2_assert_withdraw(GFS2_SB(&dip->i_inode), !error)) { 1045 if (!gfs2_assert_withdraw(GFS2_SB(&dip->i_inode), !error)) {
1046 gfs2_trans_add_bh(dip->i_gl, dibh, 1);
1046 dip->i_di.di_blocks++; 1047 dip->i_di.di_blocks++;
1047 gfs2_set_inode_blocks(&dip->i_inode); 1048 gfs2_set_inode_blocks(&dip->i_inode);
1048 gfs2_dinode_out(dip, dibh->b_data); 1049 gfs2_dinode_out(dip, dibh->b_data);
@@ -1501,7 +1502,7 @@ struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *name)
1501 inode = gfs2_inode_lookup(dir->i_sb, 1502 inode = gfs2_inode_lookup(dir->i_sb,
1502 be16_to_cpu(dent->de_type), 1503 be16_to_cpu(dent->de_type),
1503 be64_to_cpu(dent->de_inum.no_addr), 1504 be64_to_cpu(dent->de_inum.no_addr),
1504 be64_to_cpu(dent->de_inum.no_formal_ino)); 1505 be64_to_cpu(dent->de_inum.no_formal_ino), 0);
1505 brelse(bh); 1506 brelse(bh);
1506 return inode; 1507 return inode;
1507 } 1508 }
diff --git a/fs/gfs2/eaops.c b/fs/gfs2/eaops.c
index 1ab3e9d73886..aa8dbf303f6d 100644
--- a/fs/gfs2/eaops.c
+++ b/fs/gfs2/eaops.c
@@ -200,28 +200,28 @@ static int security_eo_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er)
200 return gfs2_ea_remove_i(ip, er); 200 return gfs2_ea_remove_i(ip, er);
201} 201}
202 202
203static struct gfs2_eattr_operations gfs2_user_eaops = { 203static const struct gfs2_eattr_operations gfs2_user_eaops = {
204 .eo_get = user_eo_get, 204 .eo_get = user_eo_get,
205 .eo_set = user_eo_set, 205 .eo_set = user_eo_set,
206 .eo_remove = user_eo_remove, 206 .eo_remove = user_eo_remove,
207 .eo_name = "user", 207 .eo_name = "user",
208}; 208};
209 209
210struct gfs2_eattr_operations gfs2_system_eaops = { 210const struct gfs2_eattr_operations gfs2_system_eaops = {
211 .eo_get = system_eo_get, 211 .eo_get = system_eo_get,
212 .eo_set = system_eo_set, 212 .eo_set = system_eo_set,
213 .eo_remove = system_eo_remove, 213 .eo_remove = system_eo_remove,
214 .eo_name = "system", 214 .eo_name = "system",
215}; 215};
216 216
217static struct gfs2_eattr_operations gfs2_security_eaops = { 217static const struct gfs2_eattr_operations gfs2_security_eaops = {
218 .eo_get = security_eo_get, 218 .eo_get = security_eo_get,
219 .eo_set = security_eo_set, 219 .eo_set = security_eo_set,
220 .eo_remove = security_eo_remove, 220 .eo_remove = security_eo_remove,
221 .eo_name = "security", 221 .eo_name = "security",
222}; 222};
223 223
224struct gfs2_eattr_operations *gfs2_ea_ops[] = { 224const struct gfs2_eattr_operations *gfs2_ea_ops[] = {
225 NULL, 225 NULL,
226 &gfs2_user_eaops, 226 &gfs2_user_eaops,
227 &gfs2_system_eaops, 227 &gfs2_system_eaops,
diff --git a/fs/gfs2/eaops.h b/fs/gfs2/eaops.h
index 508b4f7a2449..da2f7fbbb40d 100644
--- a/fs/gfs2/eaops.h
+++ b/fs/gfs2/eaops.h
@@ -22,9 +22,9 @@ struct gfs2_eattr_operations {
22 22
23unsigned int gfs2_ea_name2type(const char *name, const char **truncated_name); 23unsigned int gfs2_ea_name2type(const char *name, const char **truncated_name);
24 24
25extern struct gfs2_eattr_operations gfs2_system_eaops; 25extern const struct gfs2_eattr_operations gfs2_system_eaops;
26 26
27extern struct gfs2_eattr_operations *gfs2_ea_ops[]; 27extern const struct gfs2_eattr_operations *gfs2_ea_ops[];
28 28
29#endif /* __EAOPS_DOT_H__ */ 29#endif /* __EAOPS_DOT_H__ */
30 30
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 3f0974e1afef..a37efe4aae6f 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -25,8 +25,10 @@
25#include <asm/uaccess.h> 25#include <asm/uaccess.h>
26#include <linux/seq_file.h> 26#include <linux/seq_file.h>
27#include <linux/debugfs.h> 27#include <linux/debugfs.h>
28#include <linux/module.h> 28#include <linux/kthread.h>
29#include <linux/kallsyms.h> 29#include <linux/freezer.h>
30#include <linux/workqueue.h>
31#include <linux/jiffies.h>
30 32
31#include "gfs2.h" 33#include "gfs2.h"
32#include "incore.h" 34#include "incore.h"
@@ -48,7 +50,6 @@ struct glock_iter {
48 int hash; /* hash bucket index */ 50 int hash; /* hash bucket index */
49 struct gfs2_sbd *sdp; /* incore superblock */ 51 struct gfs2_sbd *sdp; /* incore superblock */
50 struct gfs2_glock *gl; /* current glock struct */ 52 struct gfs2_glock *gl; /* current glock struct */
51 struct hlist_head *hb_list; /* current hash bucket ptr */
52 struct seq_file *seq; /* sequence file for debugfs */ 53 struct seq_file *seq; /* sequence file for debugfs */
53 char string[512]; /* scratch space */ 54 char string[512]; /* scratch space */
54}; 55};
@@ -59,8 +60,13 @@ static int gfs2_dump_lockstate(struct gfs2_sbd *sdp);
59static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl); 60static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl);
60static void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh); 61static void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh);
61static void gfs2_glock_drop_th(struct gfs2_glock *gl); 62static void gfs2_glock_drop_th(struct gfs2_glock *gl);
63static void run_queue(struct gfs2_glock *gl);
64
62static DECLARE_RWSEM(gfs2_umount_flush_sem); 65static DECLARE_RWSEM(gfs2_umount_flush_sem);
63static struct dentry *gfs2_root; 66static struct dentry *gfs2_root;
67static struct task_struct *scand_process;
68static unsigned int scand_secs = 5;
69static struct workqueue_struct *glock_workqueue;
64 70
65#define GFS2_GL_HASH_SHIFT 15 71#define GFS2_GL_HASH_SHIFT 15
66#define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT) 72#define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT)
@@ -276,6 +282,18 @@ static struct gfs2_glock *gfs2_glock_find(const struct gfs2_sbd *sdp,
276 return gl; 282 return gl;
277} 283}
278 284
285static void glock_work_func(struct work_struct *work)
286{
287 struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work);
288
289 spin_lock(&gl->gl_spin);
290 if (test_and_clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags))
291 set_bit(GLF_DEMOTE, &gl->gl_flags);
292 run_queue(gl);
293 spin_unlock(&gl->gl_spin);
294 gfs2_glock_put(gl);
295}
296
279/** 297/**
280 * gfs2_glock_get() - Get a glock, or create one if one doesn't exist 298 * gfs2_glock_get() - Get a glock, or create one if one doesn't exist
281 * @sdp: The GFS2 superblock 299 * @sdp: The GFS2 superblock
@@ -315,6 +333,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
315 gl->gl_name = name; 333 gl->gl_name = name;
316 atomic_set(&gl->gl_ref, 1); 334 atomic_set(&gl->gl_ref, 1);
317 gl->gl_state = LM_ST_UNLOCKED; 335 gl->gl_state = LM_ST_UNLOCKED;
336 gl->gl_demote_state = LM_ST_EXCLUSIVE;
318 gl->gl_hash = hash; 337 gl->gl_hash = hash;
319 gl->gl_owner_pid = 0; 338 gl->gl_owner_pid = 0;
320 gl->gl_ip = 0; 339 gl->gl_ip = 0;
@@ -323,10 +342,12 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
323 gl->gl_req_bh = NULL; 342 gl->gl_req_bh = NULL;
324 gl->gl_vn = 0; 343 gl->gl_vn = 0;
325 gl->gl_stamp = jiffies; 344 gl->gl_stamp = jiffies;
345 gl->gl_tchange = jiffies;
326 gl->gl_object = NULL; 346 gl->gl_object = NULL;
327 gl->gl_sbd = sdp; 347 gl->gl_sbd = sdp;
328 gl->gl_aspace = NULL; 348 gl->gl_aspace = NULL;
329 lops_init_le(&gl->gl_le, &gfs2_glock_lops); 349 lops_init_le(&gl->gl_le, &gfs2_glock_lops);
350 INIT_DELAYED_WORK(&gl->gl_work, glock_work_func);
330 351
331 /* If this glock protects actual on-disk data or metadata blocks, 352 /* If this glock protects actual on-disk data or metadata blocks,
332 create a VFS inode to manage the pages/buffers holding them. */ 353 create a VFS inode to manage the pages/buffers holding them. */
@@ -440,6 +461,8 @@ static void wait_on_holder(struct gfs2_holder *gh)
440 461
441static void gfs2_demote_wake(struct gfs2_glock *gl) 462static void gfs2_demote_wake(struct gfs2_glock *gl)
442{ 463{
464 BUG_ON(!spin_is_locked(&gl->gl_spin));
465 gl->gl_demote_state = LM_ST_EXCLUSIVE;
443 clear_bit(GLF_DEMOTE, &gl->gl_flags); 466 clear_bit(GLF_DEMOTE, &gl->gl_flags);
444 smp_mb__after_clear_bit(); 467 smp_mb__after_clear_bit();
445 wake_up_bit(&gl->gl_flags, GLF_DEMOTE); 468 wake_up_bit(&gl->gl_flags, GLF_DEMOTE);
@@ -545,12 +568,14 @@ static int rq_demote(struct gfs2_glock *gl)
545 return 0; 568 return 0;
546 } 569 }
547 set_bit(GLF_LOCK, &gl->gl_flags); 570 set_bit(GLF_LOCK, &gl->gl_flags);
548 spin_unlock(&gl->gl_spin);
549 if (gl->gl_demote_state == LM_ST_UNLOCKED || 571 if (gl->gl_demote_state == LM_ST_UNLOCKED ||
550 gl->gl_state != LM_ST_EXCLUSIVE) 572 gl->gl_state != LM_ST_EXCLUSIVE) {
573 spin_unlock(&gl->gl_spin);
551 gfs2_glock_drop_th(gl); 574 gfs2_glock_drop_th(gl);
552 else 575 } else {
576 spin_unlock(&gl->gl_spin);
553 gfs2_glock_xmote_th(gl, NULL); 577 gfs2_glock_xmote_th(gl, NULL);
578 }
554 spin_lock(&gl->gl_spin); 579 spin_lock(&gl->gl_spin);
555 580
556 return 0; 581 return 0;
@@ -679,24 +704,25 @@ static void gfs2_glmutex_unlock(struct gfs2_glock *gl)
679 * practise: LM_ST_SHARED and LM_ST_UNLOCKED 704 * practise: LM_ST_SHARED and LM_ST_UNLOCKED
680 */ 705 */
681 706
682static void handle_callback(struct gfs2_glock *gl, unsigned int state, int remote) 707static void handle_callback(struct gfs2_glock *gl, unsigned int state,
708 int remote, unsigned long delay)
683{ 709{
710 int bit = delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE;
711
684 spin_lock(&gl->gl_spin); 712 spin_lock(&gl->gl_spin);
685 if (test_and_set_bit(GLF_DEMOTE, &gl->gl_flags) == 0) { 713 set_bit(bit, &gl->gl_flags);
714 if (gl->gl_demote_state == LM_ST_EXCLUSIVE) {
686 gl->gl_demote_state = state; 715 gl->gl_demote_state = state;
687 gl->gl_demote_time = jiffies; 716 gl->gl_demote_time = jiffies;
688 if (remote && gl->gl_ops->go_type == LM_TYPE_IOPEN && 717 if (remote && gl->gl_ops->go_type == LM_TYPE_IOPEN &&
689 gl->gl_object) { 718 gl->gl_object) {
690 struct inode *inode = igrab(gl->gl_object); 719 gfs2_glock_schedule_for_reclaim(gl);
691 spin_unlock(&gl->gl_spin); 720 spin_unlock(&gl->gl_spin);
692 if (inode) {
693 d_prune_aliases(inode);
694 iput(inode);
695 }
696 return; 721 return;
697 } 722 }
698 } else if (gl->gl_demote_state != LM_ST_UNLOCKED) { 723 } else if (gl->gl_demote_state != LM_ST_UNLOCKED &&
699 gl->gl_demote_state = state; 724 gl->gl_demote_state != state) {
725 gl->gl_demote_state = LM_ST_UNLOCKED;
700 } 726 }
701 spin_unlock(&gl->gl_spin); 727 spin_unlock(&gl->gl_spin);
702} 728}
@@ -723,6 +749,7 @@ static void state_change(struct gfs2_glock *gl, unsigned int new_state)
723 } 749 }
724 750
725 gl->gl_state = new_state; 751 gl->gl_state = new_state;
752 gl->gl_tchange = jiffies;
726} 753}
727 754
728/** 755/**
@@ -760,10 +787,20 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret)
760 787
761 if (!gh) { 788 if (!gh) {
762 gl->gl_stamp = jiffies; 789 gl->gl_stamp = jiffies;
763 if (ret & LM_OUT_CANCELED) 790 if (ret & LM_OUT_CANCELED) {
764 op_done = 0; 791 op_done = 0;
765 else 792 } else {
793 spin_lock(&gl->gl_spin);
794 if (gl->gl_state != gl->gl_demote_state) {
795 gl->gl_req_bh = NULL;
796 spin_unlock(&gl->gl_spin);
797 gfs2_glock_drop_th(gl);
798 gfs2_glock_put(gl);
799 return;
800 }
766 gfs2_demote_wake(gl); 801 gfs2_demote_wake(gl);
802 spin_unlock(&gl->gl_spin);
803 }
767 } else { 804 } else {
768 spin_lock(&gl->gl_spin); 805 spin_lock(&gl->gl_spin);
769 list_del_init(&gh->gh_list); 806 list_del_init(&gh->gh_list);
@@ -799,7 +836,6 @@ out:
799 gl->gl_req_gh = NULL; 836 gl->gl_req_gh = NULL;
800 gl->gl_req_bh = NULL; 837 gl->gl_req_bh = NULL;
801 clear_bit(GLF_LOCK, &gl->gl_flags); 838 clear_bit(GLF_LOCK, &gl->gl_flags);
802 run_queue(gl);
803 spin_unlock(&gl->gl_spin); 839 spin_unlock(&gl->gl_spin);
804 } 840 }
805 841
@@ -817,7 +853,7 @@ out:
817 * 853 *
818 */ 854 */
819 855
820void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh) 856static void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh)
821{ 857{
822 struct gfs2_sbd *sdp = gl->gl_sbd; 858 struct gfs2_sbd *sdp = gl->gl_sbd;
823 int flags = gh ? gh->gh_flags : 0; 859 int flags = gh ? gh->gh_flags : 0;
@@ -871,7 +907,6 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret)
871 gfs2_assert_warn(sdp, !ret); 907 gfs2_assert_warn(sdp, !ret);
872 908
873 state_change(gl, LM_ST_UNLOCKED); 909 state_change(gl, LM_ST_UNLOCKED);
874 gfs2_demote_wake(gl);
875 910
876 if (glops->go_inval) 911 if (glops->go_inval)
877 glops->go_inval(gl, DIO_METADATA); 912 glops->go_inval(gl, DIO_METADATA);
@@ -884,10 +919,10 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret)
884 } 919 }
885 920
886 spin_lock(&gl->gl_spin); 921 spin_lock(&gl->gl_spin);
922 gfs2_demote_wake(gl);
887 gl->gl_req_gh = NULL; 923 gl->gl_req_gh = NULL;
888 gl->gl_req_bh = NULL; 924 gl->gl_req_bh = NULL;
889 clear_bit(GLF_LOCK, &gl->gl_flags); 925 clear_bit(GLF_LOCK, &gl->gl_flags);
890 run_queue(gl);
891 spin_unlock(&gl->gl_spin); 926 spin_unlock(&gl->gl_spin);
892 927
893 gfs2_glock_put(gl); 928 gfs2_glock_put(gl);
@@ -1067,24 +1102,31 @@ static void add_to_queue(struct gfs2_holder *gh)
1067 if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags)) 1102 if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags))
1068 BUG(); 1103 BUG();
1069 1104
1070 existing = find_holder_by_owner(&gl->gl_holders, gh->gh_owner_pid); 1105 if (!(gh->gh_flags & GL_FLOCK)) {
1071 if (existing) { 1106 existing = find_holder_by_owner(&gl->gl_holders,
1072 print_symbol(KERN_WARNING "original: %s\n", existing->gh_ip); 1107 gh->gh_owner_pid);
1073 printk(KERN_INFO "pid : %d\n", existing->gh_owner_pid); 1108 if (existing) {
1074 printk(KERN_INFO "lock type : %d lock state : %d\n", 1109 print_symbol(KERN_WARNING "original: %s\n",
1075 existing->gh_gl->gl_name.ln_type, existing->gh_gl->gl_state); 1110 existing->gh_ip);
1076 print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip); 1111 printk(KERN_INFO "pid : %d\n", existing->gh_owner_pid);
1077 printk(KERN_INFO "pid : %d\n", gh->gh_owner_pid); 1112 printk(KERN_INFO "lock type : %d lock state : %d\n",
1078 printk(KERN_INFO "lock type : %d lock state : %d\n", 1113 existing->gh_gl->gl_name.ln_type,
1079 gl->gl_name.ln_type, gl->gl_state); 1114 existing->gh_gl->gl_state);
1080 BUG(); 1115 print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip);
1081 } 1116 printk(KERN_INFO "pid : %d\n", gh->gh_owner_pid);
1082 1117 printk(KERN_INFO "lock type : %d lock state : %d\n",
1083 existing = find_holder_by_owner(&gl->gl_waiters3, gh->gh_owner_pid); 1118 gl->gl_name.ln_type, gl->gl_state);
1084 if (existing) { 1119 BUG();
1085 print_symbol(KERN_WARNING "original: %s\n", existing->gh_ip); 1120 }
1086 print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip); 1121
1087 BUG(); 1122 existing = find_holder_by_owner(&gl->gl_waiters3,
1123 gh->gh_owner_pid);
1124 if (existing) {
1125 print_symbol(KERN_WARNING "original: %s\n",
1126 existing->gh_ip);
1127 print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip);
1128 BUG();
1129 }
1088 } 1130 }
1089 1131
1090 if (gh->gh_flags & LM_FLAG_PRIORITY) 1132 if (gh->gh_flags & LM_FLAG_PRIORITY)
@@ -1195,9 +1237,10 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
1195{ 1237{
1196 struct gfs2_glock *gl = gh->gh_gl; 1238 struct gfs2_glock *gl = gh->gh_gl;
1197 const struct gfs2_glock_operations *glops = gl->gl_ops; 1239 const struct gfs2_glock_operations *glops = gl->gl_ops;
1240 unsigned delay = 0;
1198 1241
1199 if (gh->gh_flags & GL_NOCACHE) 1242 if (gh->gh_flags & GL_NOCACHE)
1200 handle_callback(gl, LM_ST_UNLOCKED, 0); 1243 handle_callback(gl, LM_ST_UNLOCKED, 0, 0);
1201 1244
1202 gfs2_glmutex_lock(gl); 1245 gfs2_glmutex_lock(gl);
1203 1246
@@ -1215,8 +1258,14 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
1215 } 1258 }
1216 1259
1217 clear_bit(GLF_LOCK, &gl->gl_flags); 1260 clear_bit(GLF_LOCK, &gl->gl_flags);
1218 run_queue(gl);
1219 spin_unlock(&gl->gl_spin); 1261 spin_unlock(&gl->gl_spin);
1262
1263 gfs2_glock_hold(gl);
1264 if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
1265 !test_bit(GLF_DEMOTE, &gl->gl_flags))
1266 delay = gl->gl_ops->go_min_hold_time;
1267 if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
1268 gfs2_glock_put(gl);
1220} 1269}
1221 1270
1222void gfs2_glock_dq_wait(struct gfs2_holder *gh) 1271void gfs2_glock_dq_wait(struct gfs2_holder *gh)
@@ -1443,18 +1492,21 @@ static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name,
1443 unsigned int state) 1492 unsigned int state)
1444{ 1493{
1445 struct gfs2_glock *gl; 1494 struct gfs2_glock *gl;
1495 unsigned long delay = 0;
1496 unsigned long holdtime;
1497 unsigned long now = jiffies;
1446 1498
1447 gl = gfs2_glock_find(sdp, name); 1499 gl = gfs2_glock_find(sdp, name);
1448 if (!gl) 1500 if (!gl)
1449 return; 1501 return;
1450 1502
1451 handle_callback(gl, state, 1); 1503 holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time;
1452 1504 if (time_before(now, holdtime))
1453 spin_lock(&gl->gl_spin); 1505 delay = holdtime - now;
1454 run_queue(gl);
1455 spin_unlock(&gl->gl_spin);
1456 1506
1457 gfs2_glock_put(gl); 1507 handle_callback(gl, state, 1, delay);
1508 if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
1509 gfs2_glock_put(gl);
1458} 1510}
1459 1511
1460/** 1512/**
@@ -1495,7 +1547,8 @@ void gfs2_glock_cb(void *cb_data, unsigned int type, void *data)
1495 return; 1547 return;
1496 if (!gfs2_assert_warn(sdp, gl->gl_req_bh)) 1548 if (!gfs2_assert_warn(sdp, gl->gl_req_bh))
1497 gl->gl_req_bh(gl, async->lc_ret); 1549 gl->gl_req_bh(gl, async->lc_ret);
1498 gfs2_glock_put(gl); 1550 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
1551 gfs2_glock_put(gl);
1499 up_read(&gfs2_umount_flush_sem); 1552 up_read(&gfs2_umount_flush_sem);
1500 return; 1553 return;
1501 } 1554 }
@@ -1588,7 +1641,7 @@ void gfs2_reclaim_glock(struct gfs2_sbd *sdp)
1588 if (gfs2_glmutex_trylock(gl)) { 1641 if (gfs2_glmutex_trylock(gl)) {
1589 if (list_empty(&gl->gl_holders) && 1642 if (list_empty(&gl->gl_holders) &&
1590 gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl)) 1643 gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl))
1591 handle_callback(gl, LM_ST_UNLOCKED, 0); 1644 handle_callback(gl, LM_ST_UNLOCKED, 0, 0);
1592 gfs2_glmutex_unlock(gl); 1645 gfs2_glmutex_unlock(gl);
1593 } 1646 }
1594 1647
@@ -1617,7 +1670,7 @@ static int examine_bucket(glock_examiner examiner, struct gfs2_sbd *sdp,
1617 goto out; 1670 goto out;
1618 gl = list_entry(head->first, struct gfs2_glock, gl_list); 1671 gl = list_entry(head->first, struct gfs2_glock, gl_list);
1619 while(1) { 1672 while(1) {
1620 if (gl->gl_sbd == sdp) { 1673 if (!sdp || gl->gl_sbd == sdp) {
1621 gfs2_glock_hold(gl); 1674 gfs2_glock_hold(gl);
1622 read_unlock(gl_lock_addr(hash)); 1675 read_unlock(gl_lock_addr(hash));
1623 if (prev) 1676 if (prev)
@@ -1635,6 +1688,7 @@ out:
1635 read_unlock(gl_lock_addr(hash)); 1688 read_unlock(gl_lock_addr(hash));
1636 if (prev) 1689 if (prev)
1637 gfs2_glock_put(prev); 1690 gfs2_glock_put(prev);
1691 cond_resched();
1638 return has_entries; 1692 return has_entries;
1639} 1693}
1640 1694
@@ -1663,20 +1717,6 @@ out_schedule:
1663} 1717}
1664 1718
1665/** 1719/**
1666 * gfs2_scand_internal - Look for glocks and inodes to toss from memory
1667 * @sdp: the filesystem
1668 *
1669 */
1670
1671void gfs2_scand_internal(struct gfs2_sbd *sdp)
1672{
1673 unsigned int x;
1674
1675 for (x = 0; x < GFS2_GL_HASH_SIZE; x++)
1676 examine_bucket(scan_glock, sdp, x);
1677}
1678
1679/**
1680 * clear_glock - look at a glock and see if we can free it from glock cache 1720 * clear_glock - look at a glock and see if we can free it from glock cache
1681 * @gl: the glock to look at 1721 * @gl: the glock to look at
1682 * 1722 *
@@ -1701,7 +1741,7 @@ static void clear_glock(struct gfs2_glock *gl)
1701 if (gfs2_glmutex_trylock(gl)) { 1741 if (gfs2_glmutex_trylock(gl)) {
1702 if (list_empty(&gl->gl_holders) && 1742 if (list_empty(&gl->gl_holders) &&
1703 gl->gl_state != LM_ST_UNLOCKED) 1743 gl->gl_state != LM_ST_UNLOCKED)
1704 handle_callback(gl, LM_ST_UNLOCKED, 0); 1744 handle_callback(gl, LM_ST_UNLOCKED, 0, 0);
1705 gfs2_glmutex_unlock(gl); 1745 gfs2_glmutex_unlock(gl);
1706 } 1746 }
1707} 1747}
@@ -1843,7 +1883,7 @@ static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl)
1843 1883
1844 spin_lock(&gl->gl_spin); 1884 spin_lock(&gl->gl_spin);
1845 1885
1846 print_dbg(gi, "Glock 0x%p (%u, %llu)\n", gl, gl->gl_name.ln_type, 1886 print_dbg(gi, "Glock 0x%p (%u, 0x%llx)\n", gl, gl->gl_name.ln_type,
1847 (unsigned long long)gl->gl_name.ln_number); 1887 (unsigned long long)gl->gl_name.ln_number);
1848 print_dbg(gi, " gl_flags ="); 1888 print_dbg(gi, " gl_flags =");
1849 for (x = 0; x < 32; x++) { 1889 for (x = 0; x < 32; x++) {
@@ -1963,6 +2003,35 @@ static int gfs2_dump_lockstate(struct gfs2_sbd *sdp)
1963 return error; 2003 return error;
1964} 2004}
1965 2005
2006/**
2007 * gfs2_scand - Look for cached glocks and inodes to toss from memory
2008 * @sdp: Pointer to GFS2 superblock
2009 *
2010 * One of these daemons runs, finding candidates to add to sd_reclaim_list.
2011 * See gfs2_glockd()
2012 */
2013
2014static int gfs2_scand(void *data)
2015{
2016 unsigned x;
2017 unsigned delay;
2018
2019 while (!kthread_should_stop()) {
2020 for (x = 0; x < GFS2_GL_HASH_SIZE; x++)
2021 examine_bucket(scan_glock, NULL, x);
2022 if (freezing(current))
2023 refrigerator();
2024 delay = scand_secs;
2025 if (delay < 1)
2026 delay = 1;
2027 schedule_timeout_interruptible(delay * HZ);
2028 }
2029
2030 return 0;
2031}
2032
2033
2034
1966int __init gfs2_glock_init(void) 2035int __init gfs2_glock_init(void)
1967{ 2036{
1968 unsigned i; 2037 unsigned i;
@@ -1974,52 +2043,69 @@ int __init gfs2_glock_init(void)
1974 rwlock_init(&gl_hash_locks[i]); 2043 rwlock_init(&gl_hash_locks[i]);
1975 } 2044 }
1976#endif 2045#endif
2046
2047 scand_process = kthread_run(gfs2_scand, NULL, "gfs2_scand");
2048 if (IS_ERR(scand_process))
2049 return PTR_ERR(scand_process);
2050
2051 glock_workqueue = create_workqueue("glock_workqueue");
2052 if (IS_ERR(glock_workqueue)) {
2053 kthread_stop(scand_process);
2054 return PTR_ERR(glock_workqueue);
2055 }
2056
1977 return 0; 2057 return 0;
1978} 2058}
1979 2059
2060void gfs2_glock_exit(void)
2061{
2062 destroy_workqueue(glock_workqueue);
2063 kthread_stop(scand_process);
2064}
2065
2066module_param(scand_secs, uint, S_IRUGO|S_IWUSR);
2067MODULE_PARM_DESC(scand_secs, "The number of seconds between scand runs");
2068
1980static int gfs2_glock_iter_next(struct glock_iter *gi) 2069static int gfs2_glock_iter_next(struct glock_iter *gi)
1981{ 2070{
2071 struct gfs2_glock *gl;
2072
2073restart:
1982 read_lock(gl_lock_addr(gi->hash)); 2074 read_lock(gl_lock_addr(gi->hash));
1983 while (1) { 2075 gl = gi->gl;
1984 if (!gi->hb_list) { /* If we don't have a hash bucket yet */ 2076 if (gl) {
1985 gi->hb_list = &gl_hash_table[gi->hash].hb_list; 2077 gi->gl = hlist_entry(gl->gl_list.next,
1986 if (hlist_empty(gi->hb_list)) { 2078 struct gfs2_glock, gl_list);
1987 read_unlock(gl_lock_addr(gi->hash));
1988 gi->hash++;
1989 read_lock(gl_lock_addr(gi->hash));
1990 gi->hb_list = NULL;
1991 if (gi->hash >= GFS2_GL_HASH_SIZE) {
1992 read_unlock(gl_lock_addr(gi->hash));
1993 return 1;
1994 }
1995 else
1996 continue;
1997 }
1998 if (!hlist_empty(gi->hb_list)) {
1999 gi->gl = list_entry(gi->hb_list->first,
2000 struct gfs2_glock,
2001 gl_list);
2002 }
2003 } else {
2004 if (gi->gl->gl_list.next == NULL) {
2005 read_unlock(gl_lock_addr(gi->hash));
2006 gi->hash++;
2007 read_lock(gl_lock_addr(gi->hash));
2008 gi->hb_list = NULL;
2009 continue;
2010 }
2011 gi->gl = list_entry(gi->gl->gl_list.next,
2012 struct gfs2_glock, gl_list);
2013 }
2014 if (gi->gl) 2079 if (gi->gl)
2015 break; 2080 gfs2_glock_hold(gi->gl);
2016 } 2081 }
2017 read_unlock(gl_lock_addr(gi->hash)); 2082 read_unlock(gl_lock_addr(gi->hash));
2083 if (gl)
2084 gfs2_glock_put(gl);
2085 if (gl && gi->gl == NULL)
2086 gi->hash++;
2087 while(gi->gl == NULL) {
2088 if (gi->hash >= GFS2_GL_HASH_SIZE)
2089 return 1;
2090 read_lock(gl_lock_addr(gi->hash));
2091 gi->gl = hlist_entry(gl_hash_table[gi->hash].hb_list.first,
2092 struct gfs2_glock, gl_list);
2093 if (gi->gl)
2094 gfs2_glock_hold(gi->gl);
2095 read_unlock(gl_lock_addr(gi->hash));
2096 gi->hash++;
2097 }
2098
2099 if (gi->sdp != gi->gl->gl_sbd)
2100 goto restart;
2101
2018 return 0; 2102 return 0;
2019} 2103}
2020 2104
2021static void gfs2_glock_iter_free(struct glock_iter *gi) 2105static void gfs2_glock_iter_free(struct glock_iter *gi)
2022{ 2106{
2107 if (gi->gl)
2108 gfs2_glock_put(gi->gl);
2023 kfree(gi); 2109 kfree(gi);
2024} 2110}
2025 2111
@@ -2033,9 +2119,8 @@ static struct glock_iter *gfs2_glock_iter_init(struct gfs2_sbd *sdp)
2033 2119
2034 gi->sdp = sdp; 2120 gi->sdp = sdp;
2035 gi->hash = 0; 2121 gi->hash = 0;
2036 gi->gl = NULL;
2037 gi->hb_list = NULL;
2038 gi->seq = NULL; 2122 gi->seq = NULL;
2123 gi->gl = NULL;
2039 memset(gi->string, 0, sizeof(gi->string)); 2124 memset(gi->string, 0, sizeof(gi->string));
2040 2125
2041 if (gfs2_glock_iter_next(gi)) { 2126 if (gfs2_glock_iter_next(gi)) {
@@ -2055,7 +2140,7 @@ static void *gfs2_glock_seq_start(struct seq_file *file, loff_t *pos)
2055 if (!gi) 2140 if (!gi)
2056 return NULL; 2141 return NULL;
2057 2142
2058 while (n--) { 2143 while(n--) {
2059 if (gfs2_glock_iter_next(gi)) { 2144 if (gfs2_glock_iter_next(gi)) {
2060 gfs2_glock_iter_free(gi); 2145 gfs2_glock_iter_free(gi);
2061 return NULL; 2146 return NULL;
@@ -2082,7 +2167,9 @@ static void *gfs2_glock_seq_next(struct seq_file *file, void *iter_ptr,
2082 2167
2083static void gfs2_glock_seq_stop(struct seq_file *file, void *iter_ptr) 2168static void gfs2_glock_seq_stop(struct seq_file *file, void *iter_ptr)
2084{ 2169{
2085 /* nothing for now */ 2170 struct glock_iter *gi = iter_ptr;
2171 if (gi)
2172 gfs2_glock_iter_free(gi);
2086} 2173}
2087 2174
2088static int gfs2_glock_seq_show(struct seq_file *file, void *iter_ptr) 2175static int gfs2_glock_seq_show(struct seq_file *file, void *iter_ptr)
@@ -2095,7 +2182,7 @@ static int gfs2_glock_seq_show(struct seq_file *file, void *iter_ptr)
2095 return 0; 2182 return 0;
2096} 2183}
2097 2184
2098static struct seq_operations gfs2_glock_seq_ops = { 2185static const struct seq_operations gfs2_glock_seq_ops = {
2099 .start = gfs2_glock_seq_start, 2186 .start = gfs2_glock_seq_start,
2100 .next = gfs2_glock_seq_next, 2187 .next = gfs2_glock_seq_next,
2101 .stop = gfs2_glock_seq_stop, 2188 .stop = gfs2_glock_seq_stop,
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 7721ca3fff9e..b16f604eea9f 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -26,6 +26,7 @@
26#define GL_SKIP 0x00000100 26#define GL_SKIP 0x00000100
27#define GL_ATIME 0x00000200 27#define GL_ATIME 0x00000200
28#define GL_NOCACHE 0x00000400 28#define GL_NOCACHE 0x00000400
29#define GL_FLOCK 0x00000800
29#define GL_NOCANCEL 0x00001000 30#define GL_NOCANCEL 0x00001000
30 31
31#define GLR_TRYFAILED 13 32#define GLR_TRYFAILED 13
@@ -132,11 +133,11 @@ void gfs2_glock_cb(void *cb_data, unsigned int type, void *data);
132 133
133void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl); 134void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl);
134void gfs2_reclaim_glock(struct gfs2_sbd *sdp); 135void gfs2_reclaim_glock(struct gfs2_sbd *sdp);
135
136void gfs2_scand_internal(struct gfs2_sbd *sdp);
137void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait); 136void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait);
138 137
139int __init gfs2_glock_init(void); 138int __init gfs2_glock_init(void);
139void gfs2_glock_exit(void);
140
140int gfs2_create_debugfs_file(struct gfs2_sbd *sdp); 141int gfs2_create_debugfs_file(struct gfs2_sbd *sdp);
141void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp); 142void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp);
142int gfs2_register_debugfs(void); 143int gfs2_register_debugfs(void);
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 777ca46010e8..4670dcb2a877 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -41,7 +41,6 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
41 struct list_head *head = &gl->gl_ail_list; 41 struct list_head *head = &gl->gl_ail_list;
42 struct gfs2_bufdata *bd; 42 struct gfs2_bufdata *bd;
43 struct buffer_head *bh; 43 struct buffer_head *bh;
44 u64 blkno;
45 int error; 44 int error;
46 45
47 blocks = atomic_read(&gl->gl_ail_count); 46 blocks = atomic_read(&gl->gl_ail_count);
@@ -57,19 +56,12 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
57 bd = list_entry(head->next, struct gfs2_bufdata, 56 bd = list_entry(head->next, struct gfs2_bufdata,
58 bd_ail_gl_list); 57 bd_ail_gl_list);
59 bh = bd->bd_bh; 58 bh = bd->bd_bh;
60 blkno = bh->b_blocknr; 59 gfs2_remove_from_ail(NULL, bd);
60 bd->bd_bh = NULL;
61 bh->b_private = NULL;
62 bd->bd_blkno = bh->b_blocknr;
61 gfs2_assert_withdraw(sdp, !buffer_busy(bh)); 63 gfs2_assert_withdraw(sdp, !buffer_busy(bh));
62 64 gfs2_trans_add_revoke(sdp, bd);
63 bd->bd_ail = NULL;
64 list_del(&bd->bd_ail_st_list);
65 list_del(&bd->bd_ail_gl_list);
66 atomic_dec(&gl->gl_ail_count);
67 brelse(bh);
68 gfs2_log_unlock(sdp);
69
70 gfs2_trans_add_revoke(sdp, blkno);
71
72 gfs2_log_lock(sdp);
73 } 65 }
74 gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count)); 66 gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count));
75 gfs2_log_unlock(sdp); 67 gfs2_log_unlock(sdp);
@@ -156,9 +148,11 @@ static void inode_go_sync(struct gfs2_glock *gl)
156 ip = NULL; 148 ip = NULL;
157 149
158 if (test_bit(GLF_DIRTY, &gl->gl_flags)) { 150 if (test_bit(GLF_DIRTY, &gl->gl_flags)) {
159 if (ip) 151 if (ip && !gfs2_is_jdata(ip))
160 filemap_fdatawrite(ip->i_inode.i_mapping); 152 filemap_fdatawrite(ip->i_inode.i_mapping);
161 gfs2_log_flush(gl->gl_sbd, gl); 153 gfs2_log_flush(gl->gl_sbd, gl);
154 if (ip && gfs2_is_jdata(ip))
155 filemap_fdatawrite(ip->i_inode.i_mapping);
162 gfs2_meta_sync(gl); 156 gfs2_meta_sync(gl);
163 if (ip) { 157 if (ip) {
164 struct address_space *mapping = ip->i_inode.i_mapping; 158 struct address_space *mapping = ip->i_inode.i_mapping;
@@ -452,6 +446,7 @@ const struct gfs2_glock_operations gfs2_inode_glops = {
452 .go_lock = inode_go_lock, 446 .go_lock = inode_go_lock,
453 .go_unlock = inode_go_unlock, 447 .go_unlock = inode_go_unlock,
454 .go_type = LM_TYPE_INODE, 448 .go_type = LM_TYPE_INODE,
449 .go_min_hold_time = HZ / 10,
455}; 450};
456 451
457const struct gfs2_glock_operations gfs2_rgrp_glops = { 452const struct gfs2_glock_operations gfs2_rgrp_glops = {
@@ -462,6 +457,7 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = {
462 .go_lock = rgrp_go_lock, 457 .go_lock = rgrp_go_lock,
463 .go_unlock = rgrp_go_unlock, 458 .go_unlock = rgrp_go_unlock,
464 .go_type = LM_TYPE_RGRP, 459 .go_type = LM_TYPE_RGRP,
460 .go_min_hold_time = HZ / 10,
465}; 461};
466 462
467const struct gfs2_glock_operations gfs2_trans_glops = { 463const struct gfs2_glock_operations gfs2_trans_glops = {
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 170ba93829c0..eaddfb5a8e6f 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -11,6 +11,7 @@
11#define __INCORE_DOT_H__ 11#define __INCORE_DOT_H__
12 12
13#include <linux/fs.h> 13#include <linux/fs.h>
14#include <linux/workqueue.h>
14 15
15#define DIO_WAIT 0x00000010 16#define DIO_WAIT 0x00000010
16#define DIO_METADATA 0x00000020 17#define DIO_METADATA 0x00000020
@@ -113,7 +114,13 @@ struct gfs2_bufdata {
113 struct buffer_head *bd_bh; 114 struct buffer_head *bd_bh;
114 struct gfs2_glock *bd_gl; 115 struct gfs2_glock *bd_gl;
115 116
116 struct list_head bd_list_tr; 117 union {
118 struct list_head list_tr;
119 u64 blkno;
120 } u;
121#define bd_list_tr u.list_tr
122#define bd_blkno u.blkno
123
117 struct gfs2_log_element bd_le; 124 struct gfs2_log_element bd_le;
118 125
119 struct gfs2_ail *bd_ail; 126 struct gfs2_ail *bd_ail;
@@ -130,6 +137,7 @@ struct gfs2_glock_operations {
130 int (*go_lock) (struct gfs2_holder *gh); 137 int (*go_lock) (struct gfs2_holder *gh);
131 void (*go_unlock) (struct gfs2_holder *gh); 138 void (*go_unlock) (struct gfs2_holder *gh);
132 const int go_type; 139 const int go_type;
140 const unsigned long go_min_hold_time;
133}; 141};
134 142
135enum { 143enum {
@@ -161,6 +169,7 @@ enum {
161 GLF_LOCK = 1, 169 GLF_LOCK = 1,
162 GLF_STICKY = 2, 170 GLF_STICKY = 2,
163 GLF_DEMOTE = 3, 171 GLF_DEMOTE = 3,
172 GLF_PENDING_DEMOTE = 4,
164 GLF_DIRTY = 5, 173 GLF_DIRTY = 5,
165}; 174};
166 175
@@ -193,6 +202,7 @@ struct gfs2_glock {
193 202
194 u64 gl_vn; 203 u64 gl_vn;
195 unsigned long gl_stamp; 204 unsigned long gl_stamp;
205 unsigned long gl_tchange;
196 void *gl_object; 206 void *gl_object;
197 207
198 struct list_head gl_reclaim; 208 struct list_head gl_reclaim;
@@ -203,6 +213,7 @@ struct gfs2_glock {
203 struct gfs2_log_element gl_le; 213 struct gfs2_log_element gl_le;
204 struct list_head gl_ail_list; 214 struct list_head gl_ail_list;
205 atomic_t gl_ail_count; 215 atomic_t gl_ail_count;
216 struct delayed_work gl_work;
206}; 217};
207 218
208struct gfs2_alloc { 219struct gfs2_alloc {
@@ -293,11 +304,6 @@ struct gfs2_file {
293 struct gfs2_holder f_fl_gh; 304 struct gfs2_holder f_fl_gh;
294}; 305};
295 306
296struct gfs2_revoke {
297 struct gfs2_log_element rv_le;
298 u64 rv_blkno;
299};
300
301struct gfs2_revoke_replay { 307struct gfs2_revoke_replay {
302 struct list_head rr_list; 308 struct list_head rr_list;
303 u64 rr_blkno; 309 u64 rr_blkno;
@@ -335,12 +341,6 @@ struct gfs2_quota_data {
335 unsigned long qd_last_touched; 341 unsigned long qd_last_touched;
336}; 342};
337 343
338struct gfs2_log_buf {
339 struct list_head lb_list;
340 struct buffer_head *lb_bh;
341 struct buffer_head *lb_real;
342};
343
344struct gfs2_trans { 344struct gfs2_trans {
345 unsigned long tr_ip; 345 unsigned long tr_ip;
346 346
@@ -429,7 +429,6 @@ struct gfs2_tune {
429 unsigned int gt_log_flush_secs; 429 unsigned int gt_log_flush_secs;
430 unsigned int gt_jindex_refresh_secs; /* Check for new journal index */ 430 unsigned int gt_jindex_refresh_secs; /* Check for new journal index */
431 431
432 unsigned int gt_scand_secs;
433 unsigned int gt_recoverd_secs; 432 unsigned int gt_recoverd_secs;
434 unsigned int gt_logd_secs; 433 unsigned int gt_logd_secs;
435 unsigned int gt_quotad_secs; 434 unsigned int gt_quotad_secs;
@@ -574,7 +573,6 @@ struct gfs2_sbd {
574 573
575 /* Daemon stuff */ 574 /* Daemon stuff */
576 575
577 struct task_struct *sd_scand_process;
578 struct task_struct *sd_recoverd_process; 576 struct task_struct *sd_recoverd_process;
579 struct task_struct *sd_logd_process; 577 struct task_struct *sd_logd_process;
580 struct task_struct *sd_quotad_process; 578 struct task_struct *sd_quotad_process;
@@ -609,13 +607,13 @@ struct gfs2_sbd {
609 unsigned int sd_log_num_revoke; 607 unsigned int sd_log_num_revoke;
610 unsigned int sd_log_num_rg; 608 unsigned int sd_log_num_rg;
611 unsigned int sd_log_num_databuf; 609 unsigned int sd_log_num_databuf;
612 unsigned int sd_log_num_jdata;
613 610
614 struct list_head sd_log_le_gl; 611 struct list_head sd_log_le_gl;
615 struct list_head sd_log_le_buf; 612 struct list_head sd_log_le_buf;
616 struct list_head sd_log_le_revoke; 613 struct list_head sd_log_le_revoke;
617 struct list_head sd_log_le_rg; 614 struct list_head sd_log_le_rg;
618 struct list_head sd_log_le_databuf; 615 struct list_head sd_log_le_databuf;
616 struct list_head sd_log_le_ordered;
619 617
620 unsigned int sd_log_blks_free; 618 unsigned int sd_log_blks_free;
621 struct mutex sd_log_reserve_mutex; 619 struct mutex sd_log_reserve_mutex;
@@ -627,7 +625,8 @@ struct gfs2_sbd {
627 625
628 unsigned long sd_log_flush_time; 626 unsigned long sd_log_flush_time;
629 struct rw_semaphore sd_log_flush_lock; 627 struct rw_semaphore sd_log_flush_lock;
630 struct list_head sd_log_flush_list; 628 atomic_t sd_log_in_flight;
629 wait_queue_head_t sd_log_flush_wait;
631 630
632 unsigned int sd_log_flush_head; 631 unsigned int sd_log_flush_head;
633 u64 sd_log_flush_wrapped; 632 u64 sd_log_flush_wrapped;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 34f7bcdea1e9..5f6dc32946cd 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -77,6 +77,49 @@ static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr)
77 return iget5_locked(sb, hash, iget_test, iget_set, &no_addr); 77 return iget5_locked(sb, hash, iget_test, iget_set, &no_addr);
78} 78}
79 79
80struct gfs2_skip_data {
81 u64 no_addr;
82 int skipped;
83};
84
85static int iget_skip_test(struct inode *inode, void *opaque)
86{
87 struct gfs2_inode *ip = GFS2_I(inode);
88 struct gfs2_skip_data *data = opaque;
89
90 if (ip->i_no_addr == data->no_addr && inode->i_private != NULL){
91 if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)){
92 data->skipped = 1;
93 return 0;
94 }
95 return 1;
96 }
97 return 0;
98}
99
100static int iget_skip_set(struct inode *inode, void *opaque)
101{
102 struct gfs2_inode *ip = GFS2_I(inode);
103 struct gfs2_skip_data *data = opaque;
104
105 if (data->skipped)
106 return 1;
107 inode->i_ino = (unsigned long)(data->no_addr);
108 ip->i_no_addr = data->no_addr;
109 return 0;
110}
111
112static struct inode *gfs2_iget_skip(struct super_block *sb,
113 u64 no_addr)
114{
115 struct gfs2_skip_data data;
116 unsigned long hash = (unsigned long)no_addr;
117
118 data.no_addr = no_addr;
119 data.skipped = 0;
120 return iget5_locked(sb, hash, iget_skip_test, iget_skip_set, &data);
121}
122
80/** 123/**
81 * GFS2 lookup code fills in vfs inode contents based on info obtained 124 * GFS2 lookup code fills in vfs inode contents based on info obtained
82 * from directory entry inside gfs2_inode_lookup(). This has caused issues 125 * from directory entry inside gfs2_inode_lookup(). This has caused issues
@@ -112,6 +155,7 @@ void gfs2_set_iop(struct inode *inode)
112 * @sb: The super block 155 * @sb: The super block
113 * @no_addr: The inode number 156 * @no_addr: The inode number
114 * @type: The type of the inode 157 * @type: The type of the inode
158 * @skip_freeing: set this not return an inode if it is currently being freed.
115 * 159 *
116 * Returns: A VFS inode, or an error 160 * Returns: A VFS inode, or an error
117 */ 161 */
@@ -119,13 +163,19 @@ void gfs2_set_iop(struct inode *inode)
119struct inode *gfs2_inode_lookup(struct super_block *sb, 163struct inode *gfs2_inode_lookup(struct super_block *sb,
120 unsigned int type, 164 unsigned int type,
121 u64 no_addr, 165 u64 no_addr,
122 u64 no_formal_ino) 166 u64 no_formal_ino, int skip_freeing)
123{ 167{
124 struct inode *inode = gfs2_iget(sb, no_addr); 168 struct inode *inode;
125 struct gfs2_inode *ip = GFS2_I(inode); 169 struct gfs2_inode *ip;
126 struct gfs2_glock *io_gl; 170 struct gfs2_glock *io_gl;
127 int error; 171 int error;
128 172
173 if (skip_freeing)
174 inode = gfs2_iget_skip(sb, no_addr);
175 else
176 inode = gfs2_iget(sb, no_addr);
177 ip = GFS2_I(inode);
178
129 if (!inode) 179 if (!inode)
130 return ERR_PTR(-ENOBUFS); 180 return ERR_PTR(-ENOBUFS);
131 181
@@ -244,6 +294,11 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
244 return 0; 294 return 0;
245} 295}
246 296
297static void gfs2_inode_bh(struct gfs2_inode *ip, struct buffer_head *bh)
298{
299 ip->i_cache[0] = bh;
300}
301
247/** 302/**
248 * gfs2_inode_refresh - Refresh the incore copy of the dinode 303 * gfs2_inode_refresh - Refresh the incore copy of the dinode
249 * @ip: The GFS2 inode 304 * @ip: The GFS2 inode
@@ -688,7 +743,7 @@ out:
688static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, 743static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
689 const struct gfs2_inum_host *inum, unsigned int mode, 744 const struct gfs2_inum_host *inum, unsigned int mode,
690 unsigned int uid, unsigned int gid, 745 unsigned int uid, unsigned int gid,
691 const u64 *generation, dev_t dev) 746 const u64 *generation, dev_t dev, struct buffer_head **bhp)
692{ 747{
693 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 748 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
694 struct gfs2_dinode *di; 749 struct gfs2_dinode *di;
@@ -743,13 +798,15 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
743 di->di_mtime_nsec = cpu_to_be32(tv.tv_nsec); 798 di->di_mtime_nsec = cpu_to_be32(tv.tv_nsec);
744 di->di_ctime_nsec = cpu_to_be32(tv.tv_nsec); 799 di->di_ctime_nsec = cpu_to_be32(tv.tv_nsec);
745 memset(&di->di_reserved, 0, sizeof(di->di_reserved)); 800 memset(&di->di_reserved, 0, sizeof(di->di_reserved));
801
802 set_buffer_uptodate(dibh);
746 803
747 brelse(dibh); 804 *bhp = dibh;
748} 805}
749 806
750static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, 807static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
751 unsigned int mode, const struct gfs2_inum_host *inum, 808 unsigned int mode, const struct gfs2_inum_host *inum,
752 const u64 *generation, dev_t dev) 809 const u64 *generation, dev_t dev, struct buffer_head **bhp)
753{ 810{
754 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 811 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
755 unsigned int uid, gid; 812 unsigned int uid, gid;
@@ -770,7 +827,7 @@ static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
770 if (error) 827 if (error)
771 goto out_quota; 828 goto out_quota;
772 829
773 init_dinode(dip, gl, inum, mode, uid, gid, generation, dev); 830 init_dinode(dip, gl, inum, mode, uid, gid, generation, dev, bhp);
774 gfs2_quota_change(dip, +1, uid, gid); 831 gfs2_quota_change(dip, +1, uid, gid);
775 gfs2_trans_end(sdp); 832 gfs2_trans_end(sdp);
776 833
@@ -909,6 +966,7 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
909 struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 }; 966 struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 };
910 int error; 967 int error;
911 u64 generation; 968 u64 generation;
969 struct buffer_head *bh=NULL;
912 970
913 if (!name->len || name->len > GFS2_FNAMESIZE) 971 if (!name->len || name->len > GFS2_FNAMESIZE)
914 return ERR_PTR(-ENAMETOOLONG); 972 return ERR_PTR(-ENAMETOOLONG);
@@ -935,16 +993,18 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
935 if (error) 993 if (error)
936 goto fail_gunlock; 994 goto fail_gunlock;
937 995
938 error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation, dev); 996 error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation, dev, &bh);
939 if (error) 997 if (error)
940 goto fail_gunlock2; 998 goto fail_gunlock2;
941 999
942 inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode), 1000 inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode),
943 inum.no_addr, 1001 inum.no_addr,
944 inum.no_formal_ino); 1002 inum.no_formal_ino, 0);
945 if (IS_ERR(inode)) 1003 if (IS_ERR(inode))
946 goto fail_gunlock2; 1004 goto fail_gunlock2;
947 1005
1006 gfs2_inode_bh(GFS2_I(inode), bh);
1007
948 error = gfs2_inode_refresh(GFS2_I(inode)); 1008 error = gfs2_inode_refresh(GFS2_I(inode));
949 if (error) 1009 if (error)
950 goto fail_gunlock2; 1010 goto fail_gunlock2;
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 4517ac82c01c..351ac87ab384 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -49,7 +49,8 @@ static inline void gfs2_inum_out(const struct gfs2_inode *ip,
49void gfs2_inode_attr_in(struct gfs2_inode *ip); 49void gfs2_inode_attr_in(struct gfs2_inode *ip);
50void gfs2_set_iop(struct inode *inode); 50void gfs2_set_iop(struct inode *inode);
51struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, 51struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type,
52 u64 no_addr, u64 no_formal_ino); 52 u64 no_addr, u64 no_formal_ino,
53 int skip_freeing);
53struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr); 54struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr);
54 55
55int gfs2_inode_refresh(struct gfs2_inode *ip); 56int gfs2_inode_refresh(struct gfs2_inode *ip);
diff --git a/fs/gfs2/locking/dlm/lock_dlm.h b/fs/gfs2/locking/dlm/lock_dlm.h
index 24d70f73b651..9e8265d28377 100644
--- a/fs/gfs2/locking/dlm/lock_dlm.h
+++ b/fs/gfs2/locking/dlm/lock_dlm.h
@@ -13,7 +13,6 @@
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/slab.h> 14#include <linux/slab.h>
15#include <linux/spinlock.h> 15#include <linux/spinlock.h>
16#include <linux/module.h>
17#include <linux/types.h> 16#include <linux/types.h>
18#include <linux/string.h> 17#include <linux/string.h>
19#include <linux/list.h> 18#include <linux/list.h>
diff --git a/fs/gfs2/locking/dlm/plock.c b/fs/gfs2/locking/dlm/plock.c
index fba1f1d87e4f..1f7b038530b4 100644
--- a/fs/gfs2/locking/dlm/plock.c
+++ b/fs/gfs2/locking/dlm/plock.c
@@ -346,15 +346,16 @@ static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
346 346
347static unsigned int dev_poll(struct file *file, poll_table *wait) 347static unsigned int dev_poll(struct file *file, poll_table *wait)
348{ 348{
349 unsigned int mask = 0;
350
349 poll_wait(file, &send_wq, wait); 351 poll_wait(file, &send_wq, wait);
350 352
351 spin_lock(&ops_lock); 353 spin_lock(&ops_lock);
352 if (!list_empty(&send_list)) { 354 if (!list_empty(&send_list))
353 spin_unlock(&ops_lock); 355 mask = POLLIN | POLLRDNORM;
354 return POLLIN | POLLRDNORM;
355 }
356 spin_unlock(&ops_lock); 356 spin_unlock(&ops_lock);
357 return 0; 357
358 return mask;
358} 359}
359 360
360static const struct file_operations dev_fops = { 361static const struct file_operations dev_fops = {
diff --git a/fs/gfs2/locking/dlm/sysfs.c b/fs/gfs2/locking/dlm/sysfs.c
index d9fe3ca40e18..ae9e6a25fe2b 100644
--- a/fs/gfs2/locking/dlm/sysfs.c
+++ b/fs/gfs2/locking/dlm/sysfs.c
@@ -190,7 +190,6 @@ static struct kobj_type gdlm_ktype = {
190}; 190};
191 191
192static struct kset gdlm_kset = { 192static struct kset gdlm_kset = {
193 .kobj = {.name = "lock_dlm",},
194 .ktype = &gdlm_ktype, 193 .ktype = &gdlm_ktype,
195}; 194};
196 195
@@ -224,6 +223,7 @@ int gdlm_sysfs_init(void)
224{ 223{
225 int error; 224 int error;
226 225
226 kobject_set_name(&gdlm_kset.kobj, "lock_dlm");
227 kobj_set_kset_s(&gdlm_kset, kernel_subsys); 227 kobj_set_kset_s(&gdlm_kset, kernel_subsys);
228 error = kset_register(&gdlm_kset); 228 error = kset_register(&gdlm_kset);
229 if (error) 229 if (error)
diff --git a/fs/gfs2/locking/dlm/thread.c b/fs/gfs2/locking/dlm/thread.c
index 1aca51e45092..bd938f06481d 100644
--- a/fs/gfs2/locking/dlm/thread.c
+++ b/fs/gfs2/locking/dlm/thread.c
@@ -268,20 +268,16 @@ static inline int check_drop(struct gdlm_ls *ls)
268 return 0; 268 return 0;
269} 269}
270 270
271static int gdlm_thread(void *data) 271static int gdlm_thread(void *data, int blist)
272{ 272{
273 struct gdlm_ls *ls = (struct gdlm_ls *) data; 273 struct gdlm_ls *ls = (struct gdlm_ls *) data;
274 struct gdlm_lock *lp = NULL; 274 struct gdlm_lock *lp = NULL;
275 int blist = 0;
276 uint8_t complete, blocking, submit, drop; 275 uint8_t complete, blocking, submit, drop;
277 DECLARE_WAITQUEUE(wait, current); 276 DECLARE_WAITQUEUE(wait, current);
278 277
279 /* Only thread1 is allowed to do blocking callbacks since gfs 278 /* Only thread1 is allowed to do blocking callbacks since gfs
280 may wait for a completion callback within a blocking cb. */ 279 may wait for a completion callback within a blocking cb. */
281 280
282 if (current == ls->thread1)
283 blist = 1;
284
285 while (!kthread_should_stop()) { 281 while (!kthread_should_stop()) {
286 set_current_state(TASK_INTERRUPTIBLE); 282 set_current_state(TASK_INTERRUPTIBLE);
287 add_wait_queue(&ls->thread_wait, &wait); 283 add_wait_queue(&ls->thread_wait, &wait);
@@ -333,12 +329,22 @@ static int gdlm_thread(void *data)
333 return 0; 329 return 0;
334} 330}
335 331
332static int gdlm_thread1(void *data)
333{
334 return gdlm_thread(data, 1);
335}
336
337static int gdlm_thread2(void *data)
338{
339 return gdlm_thread(data, 0);
340}
341
336int gdlm_init_threads(struct gdlm_ls *ls) 342int gdlm_init_threads(struct gdlm_ls *ls)
337{ 343{
338 struct task_struct *p; 344 struct task_struct *p;
339 int error; 345 int error;
340 346
341 p = kthread_run(gdlm_thread, ls, "lock_dlm1"); 347 p = kthread_run(gdlm_thread1, ls, "lock_dlm1");
342 error = IS_ERR(p); 348 error = IS_ERR(p);
343 if (error) { 349 if (error) {
344 log_error("can't start lock_dlm1 thread %d", error); 350 log_error("can't start lock_dlm1 thread %d", error);
@@ -346,7 +352,7 @@ int gdlm_init_threads(struct gdlm_ls *ls)
346 } 352 }
347 ls->thread1 = p; 353 ls->thread1 = p;
348 354
349 p = kthread_run(gdlm_thread, ls, "lock_dlm2"); 355 p = kthread_run(gdlm_thread2, ls, "lock_dlm2");
350 error = IS_ERR(p); 356 error = IS_ERR(p);
351 if (error) { 357 if (error) {
352 log_error("can't start lock_dlm2 thread %d", error); 358 log_error("can't start lock_dlm2 thread %d", error);
diff --git a/fs/gfs2/locking/nolock/main.c b/fs/gfs2/locking/nolock/main.c
index 0d149c8c493a..d3b8ce6fbbe3 100644
--- a/fs/gfs2/locking/nolock/main.c
+++ b/fs/gfs2/locking/nolock/main.c
@@ -9,7 +9,6 @@
9 9
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/slab.h> 11#include <linux/slab.h>
12#include <linux/module.h>
13#include <linux/init.h> 12#include <linux/init.h>
14#include <linux/types.h> 13#include <linux/types.h>
15#include <linux/fs.h> 14#include <linux/fs.h>
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index f49a12e24086..7df702473252 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -60,6 +60,26 @@ unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
60} 60}
61 61
62/** 62/**
63 * gfs2_remove_from_ail - Remove an entry from the ail lists, updating counters
64 * @mapping: The associated mapping (maybe NULL)
65 * @bd: The gfs2_bufdata to remove
66 *
67 * The log lock _must_ be held when calling this function
68 *
69 */
70
71void gfs2_remove_from_ail(struct address_space *mapping, struct gfs2_bufdata *bd)
72{
73 bd->bd_ail = NULL;
74 list_del_init(&bd->bd_ail_st_list);
75 list_del_init(&bd->bd_ail_gl_list);
76 atomic_dec(&bd->bd_gl->gl_ail_count);
77 if (mapping)
78 gfs2_meta_cache_flush(GFS2_I(mapping->host));
79 brelse(bd->bd_bh);
80}
81
82/**
63 * gfs2_ail1_start_one - Start I/O on a part of the AIL 83 * gfs2_ail1_start_one - Start I/O on a part of the AIL
64 * @sdp: the filesystem 84 * @sdp: the filesystem
65 * @tr: the part of the AIL 85 * @tr: the part of the AIL
@@ -83,17 +103,9 @@ static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
83 103
84 gfs2_assert(sdp, bd->bd_ail == ai); 104 gfs2_assert(sdp, bd->bd_ail == ai);
85 105
86 if (!bh){
87 list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
88 continue;
89 }
90
91 if (!buffer_busy(bh)) { 106 if (!buffer_busy(bh)) {
92 if (!buffer_uptodate(bh)) { 107 if (!buffer_uptodate(bh))
93 gfs2_log_unlock(sdp);
94 gfs2_io_error_bh(sdp, bh); 108 gfs2_io_error_bh(sdp, bh);
95 gfs2_log_lock(sdp);
96 }
97 list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list); 109 list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
98 continue; 110 continue;
99 } 111 }
@@ -103,9 +115,16 @@ static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
103 115
104 list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list); 116 list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list);
105 117
118 get_bh(bh);
106 gfs2_log_unlock(sdp); 119 gfs2_log_unlock(sdp);
107 wait_on_buffer(bh); 120 lock_buffer(bh);
108 ll_rw_block(WRITE, 1, &bh); 121 if (test_clear_buffer_dirty(bh)) {
122 bh->b_end_io = end_buffer_write_sync;
123 submit_bh(WRITE, bh);
124 } else {
125 unlock_buffer(bh);
126 brelse(bh);
127 }
109 gfs2_log_lock(sdp); 128 gfs2_log_lock(sdp);
110 129
111 retry = 1; 130 retry = 1;
@@ -130,11 +149,6 @@ static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int fl
130 bd_ail_st_list) { 149 bd_ail_st_list) {
131 bh = bd->bd_bh; 150 bh = bd->bd_bh;
132 151
133 if (!bh){
134 list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
135 continue;
136 }
137
138 gfs2_assert(sdp, bd->bd_ail == ai); 152 gfs2_assert(sdp, bd->bd_ail == ai);
139 153
140 if (buffer_busy(bh)) { 154 if (buffer_busy(bh)) {
@@ -155,13 +169,14 @@ static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int fl
155 169
156static void gfs2_ail1_start(struct gfs2_sbd *sdp, int flags) 170static void gfs2_ail1_start(struct gfs2_sbd *sdp, int flags)
157{ 171{
158 struct list_head *head = &sdp->sd_ail1_list; 172 struct list_head *head;
159 u64 sync_gen; 173 u64 sync_gen;
160 struct list_head *first; 174 struct list_head *first;
161 struct gfs2_ail *first_ai, *ai, *tmp; 175 struct gfs2_ail *first_ai, *ai, *tmp;
162 int done = 0; 176 int done = 0;
163 177
164 gfs2_log_lock(sdp); 178 gfs2_log_lock(sdp);
179 head = &sdp->sd_ail1_list;
165 if (list_empty(head)) { 180 if (list_empty(head)) {
166 gfs2_log_unlock(sdp); 181 gfs2_log_unlock(sdp);
167 return; 182 return;
@@ -233,11 +248,7 @@ static void gfs2_ail2_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
233 bd = list_entry(head->prev, struct gfs2_bufdata, 248 bd = list_entry(head->prev, struct gfs2_bufdata,
234 bd_ail_st_list); 249 bd_ail_st_list);
235 gfs2_assert(sdp, bd->bd_ail == ai); 250 gfs2_assert(sdp, bd->bd_ail == ai);
236 bd->bd_ail = NULL; 251 gfs2_remove_from_ail(bd->bd_bh->b_page->mapping, bd);
237 list_del(&bd->bd_ail_st_list);
238 list_del(&bd->bd_ail_gl_list);
239 atomic_dec(&bd->bd_gl->gl_ail_count);
240 brelse(bd->bd_bh);
241 } 252 }
242} 253}
243 254
@@ -439,10 +450,10 @@ static unsigned int current_tail(struct gfs2_sbd *sdp)
439 return tail; 450 return tail;
440} 451}
441 452
442static inline void log_incr_head(struct gfs2_sbd *sdp) 453void gfs2_log_incr_head(struct gfs2_sbd *sdp)
443{ 454{
444 if (sdp->sd_log_flush_head == sdp->sd_log_tail) 455 if (sdp->sd_log_flush_head == sdp->sd_log_tail)
445 gfs2_assert_withdraw(sdp, sdp->sd_log_flush_head == sdp->sd_log_head); 456 BUG_ON(sdp->sd_log_flush_head != sdp->sd_log_head);
446 457
447 if (++sdp->sd_log_flush_head == sdp->sd_jdesc->jd_blocks) { 458 if (++sdp->sd_log_flush_head == sdp->sd_jdesc->jd_blocks) {
448 sdp->sd_log_flush_head = 0; 459 sdp->sd_log_flush_head = 0;
@@ -451,6 +462,23 @@ static inline void log_incr_head(struct gfs2_sbd *sdp)
451} 462}
452 463
453/** 464/**
465 * gfs2_log_write_endio - End of I/O for a log buffer
466 * @bh: The buffer head
467 * @uptodate: I/O Status
468 *
469 */
470
471static void gfs2_log_write_endio(struct buffer_head *bh, int uptodate)
472{
473 struct gfs2_sbd *sdp = bh->b_private;
474 bh->b_private = NULL;
475
476 end_buffer_write_sync(bh, uptodate);
477 if (atomic_dec_and_test(&sdp->sd_log_in_flight))
478 wake_up(&sdp->sd_log_flush_wait);
479}
480
481/**
454 * gfs2_log_get_buf - Get and initialize a buffer to use for log control data 482 * gfs2_log_get_buf - Get and initialize a buffer to use for log control data
455 * @sdp: The GFS2 superblock 483 * @sdp: The GFS2 superblock
456 * 484 *
@@ -460,25 +488,43 @@ static inline void log_incr_head(struct gfs2_sbd *sdp)
460struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp) 488struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp)
461{ 489{
462 u64 blkno = log_bmap(sdp, sdp->sd_log_flush_head); 490 u64 blkno = log_bmap(sdp, sdp->sd_log_flush_head);
463 struct gfs2_log_buf *lb;
464 struct buffer_head *bh; 491 struct buffer_head *bh;
465 492
466 lb = kzalloc(sizeof(struct gfs2_log_buf), GFP_NOFS | __GFP_NOFAIL); 493 bh = sb_getblk(sdp->sd_vfs, blkno);
467 list_add(&lb->lb_list, &sdp->sd_log_flush_list);
468
469 bh = lb->lb_bh = sb_getblk(sdp->sd_vfs, blkno);
470 lock_buffer(bh); 494 lock_buffer(bh);
471 memset(bh->b_data, 0, bh->b_size); 495 memset(bh->b_data, 0, bh->b_size);
472 set_buffer_uptodate(bh); 496 set_buffer_uptodate(bh);
473 clear_buffer_dirty(bh); 497 clear_buffer_dirty(bh);
474 unlock_buffer(bh); 498 gfs2_log_incr_head(sdp);
475 499 atomic_inc(&sdp->sd_log_in_flight);
476 log_incr_head(sdp); 500 bh->b_private = sdp;
501 bh->b_end_io = gfs2_log_write_endio;
477 502
478 return bh; 503 return bh;
479} 504}
480 505
481/** 506/**
507 * gfs2_fake_write_endio -
508 * @bh: The buffer head
509 * @uptodate: The I/O Status
510 *
511 */
512
513static void gfs2_fake_write_endio(struct buffer_head *bh, int uptodate)
514{
515 struct buffer_head *real_bh = bh->b_private;
516 struct gfs2_bufdata *bd = real_bh->b_private;
517 struct gfs2_sbd *sdp = bd->bd_gl->gl_sbd;
518
519 end_buffer_write_sync(bh, uptodate);
520 free_buffer_head(bh);
521 unlock_buffer(real_bh);
522 brelse(real_bh);
523 if (atomic_dec_and_test(&sdp->sd_log_in_flight))
524 wake_up(&sdp->sd_log_flush_wait);
525}
526
527/**
482 * gfs2_log_fake_buf - Build a fake buffer head to write metadata buffer to log 528 * gfs2_log_fake_buf - Build a fake buffer head to write metadata buffer to log
483 * @sdp: the filesystem 529 * @sdp: the filesystem
484 * @data: the data the buffer_head should point to 530 * @data: the data the buffer_head should point to
@@ -490,22 +536,20 @@ struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
490 struct buffer_head *real) 536 struct buffer_head *real)
491{ 537{
492 u64 blkno = log_bmap(sdp, sdp->sd_log_flush_head); 538 u64 blkno = log_bmap(sdp, sdp->sd_log_flush_head);
493 struct gfs2_log_buf *lb;
494 struct buffer_head *bh; 539 struct buffer_head *bh;
495 540
496 lb = kzalloc(sizeof(struct gfs2_log_buf), GFP_NOFS | __GFP_NOFAIL); 541 bh = alloc_buffer_head(GFP_NOFS | __GFP_NOFAIL);
497 list_add(&lb->lb_list, &sdp->sd_log_flush_list);
498 lb->lb_real = real;
499
500 bh = lb->lb_bh = alloc_buffer_head(GFP_NOFS | __GFP_NOFAIL);
501 atomic_set(&bh->b_count, 1); 542 atomic_set(&bh->b_count, 1);
502 bh->b_state = (1 << BH_Mapped) | (1 << BH_Uptodate); 543 bh->b_state = (1 << BH_Mapped) | (1 << BH_Uptodate) | (1 << BH_Lock);
503 set_bh_page(bh, real->b_page, bh_offset(real)); 544 set_bh_page(bh, real->b_page, bh_offset(real));
504 bh->b_blocknr = blkno; 545 bh->b_blocknr = blkno;
505 bh->b_size = sdp->sd_sb.sb_bsize; 546 bh->b_size = sdp->sd_sb.sb_bsize;
506 bh->b_bdev = sdp->sd_vfs->s_bdev; 547 bh->b_bdev = sdp->sd_vfs->s_bdev;
548 bh->b_private = real;
549 bh->b_end_io = gfs2_fake_write_endio;
507 550
508 log_incr_head(sdp); 551 gfs2_log_incr_head(sdp);
552 atomic_inc(&sdp->sd_log_in_flight);
509 553
510 return bh; 554 return bh;
511} 555}
@@ -572,45 +616,75 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
572 gfs2_assert_withdraw(sdp, !pull); 616 gfs2_assert_withdraw(sdp, !pull);
573 617
574 sdp->sd_log_idle = (tail == sdp->sd_log_flush_head); 618 sdp->sd_log_idle = (tail == sdp->sd_log_flush_head);
575 log_incr_head(sdp); 619 gfs2_log_incr_head(sdp);
576} 620}
577 621
578static void log_flush_commit(struct gfs2_sbd *sdp) 622static void log_flush_commit(struct gfs2_sbd *sdp)
579{ 623{
580 struct list_head *head = &sdp->sd_log_flush_list; 624 DEFINE_WAIT(wait);
581 struct gfs2_log_buf *lb; 625
582 struct buffer_head *bh; 626 if (atomic_read(&sdp->sd_log_in_flight)) {
583 int flushcount = 0; 627 do {
628 prepare_to_wait(&sdp->sd_log_flush_wait, &wait,
629 TASK_UNINTERRUPTIBLE);
630 if (atomic_read(&sdp->sd_log_in_flight))
631 io_schedule();
632 } while(atomic_read(&sdp->sd_log_in_flight));
633 finish_wait(&sdp->sd_log_flush_wait, &wait);
634 }
584 635
585 while (!list_empty(head)) { 636 log_write_header(sdp, 0, 0);
586 lb = list_entry(head->next, struct gfs2_log_buf, lb_list); 637}
587 list_del(&lb->lb_list);
588 bh = lb->lb_bh;
589 638
590 wait_on_buffer(bh); 639static void gfs2_ordered_write(struct gfs2_sbd *sdp)
591 if (!buffer_uptodate(bh)) 640{
592 gfs2_io_error_bh(sdp, bh); 641 struct gfs2_bufdata *bd;
593 if (lb->lb_real) { 642 struct buffer_head *bh;
594 while (atomic_read(&bh->b_count) != 1) /* Grrrr... */ 643 LIST_HEAD(written);
595 schedule(); 644
596 free_buffer_head(bh); 645 gfs2_log_lock(sdp);
597 } else 646 while (!list_empty(&sdp->sd_log_le_ordered)) {
647 bd = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_bufdata, bd_le.le_list);
648 list_move(&bd->bd_le.le_list, &written);
649 bh = bd->bd_bh;
650 if (!buffer_dirty(bh))
651 continue;
652 get_bh(bh);
653 gfs2_log_unlock(sdp);
654 lock_buffer(bh);
655 if (test_clear_buffer_dirty(bh)) {
656 bh->b_end_io = end_buffer_write_sync;
657 submit_bh(WRITE, bh);
658 } else {
659 unlock_buffer(bh);
598 brelse(bh); 660 brelse(bh);
599 kfree(lb); 661 }
600 flushcount++; 662 gfs2_log_lock(sdp);
601 } 663 }
664 list_splice(&written, &sdp->sd_log_le_ordered);
665 gfs2_log_unlock(sdp);
666}
602 667
603 /* If nothing was journaled, the header is unplanned and unwanted. */ 668static void gfs2_ordered_wait(struct gfs2_sbd *sdp)
604 if (flushcount) { 669{
605 log_write_header(sdp, 0, 0); 670 struct gfs2_bufdata *bd;
606 } else { 671 struct buffer_head *bh;
607 unsigned int tail;
608 tail = current_tail(sdp);
609 672
610 gfs2_ail1_empty(sdp, 0); 673 gfs2_log_lock(sdp);
611 if (sdp->sd_log_tail != tail) 674 while (!list_empty(&sdp->sd_log_le_ordered)) {
612 log_pull_tail(sdp, tail); 675 bd = list_entry(sdp->sd_log_le_ordered.prev, struct gfs2_bufdata, bd_le.le_list);
676 bh = bd->bd_bh;
677 if (buffer_locked(bh)) {
678 get_bh(bh);
679 gfs2_log_unlock(sdp);
680 wait_on_buffer(bh);
681 brelse(bh);
682 gfs2_log_lock(sdp);
683 continue;
684 }
685 list_del_init(&bd->bd_le.le_list);
613 } 686 }
687 gfs2_log_unlock(sdp);
614} 688}
615 689
616/** 690/**
@@ -640,10 +714,16 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
640 INIT_LIST_HEAD(&ai->ai_ail1_list); 714 INIT_LIST_HEAD(&ai->ai_ail1_list);
641 INIT_LIST_HEAD(&ai->ai_ail2_list); 715 INIT_LIST_HEAD(&ai->ai_ail2_list);
642 716
643 gfs2_assert_withdraw(sdp, 717 if (sdp->sd_log_num_buf != sdp->sd_log_commited_buf) {
644 sdp->sd_log_num_buf + sdp->sd_log_num_jdata == 718 printk(KERN_INFO "GFS2: log buf %u %u\n", sdp->sd_log_num_buf,
645 sdp->sd_log_commited_buf + 719 sdp->sd_log_commited_buf);
646 sdp->sd_log_commited_databuf); 720 gfs2_assert_withdraw(sdp, 0);
721 }
722 if (sdp->sd_log_num_databuf != sdp->sd_log_commited_databuf) {
723 printk(KERN_INFO "GFS2: log databuf %u %u\n",
724 sdp->sd_log_num_databuf, sdp->sd_log_commited_databuf);
725 gfs2_assert_withdraw(sdp, 0);
726 }
647 gfs2_assert_withdraw(sdp, 727 gfs2_assert_withdraw(sdp,
648 sdp->sd_log_num_revoke == sdp->sd_log_commited_revoke); 728 sdp->sd_log_num_revoke == sdp->sd_log_commited_revoke);
649 729
@@ -651,8 +731,11 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
651 sdp->sd_log_flush_wrapped = 0; 731 sdp->sd_log_flush_wrapped = 0;
652 ai->ai_first = sdp->sd_log_flush_head; 732 ai->ai_first = sdp->sd_log_flush_head;
653 733
734 gfs2_ordered_write(sdp);
654 lops_before_commit(sdp); 735 lops_before_commit(sdp);
655 if (!list_empty(&sdp->sd_log_flush_list)) 736 gfs2_ordered_wait(sdp);
737
738 if (sdp->sd_log_head != sdp->sd_log_flush_head)
656 log_flush_commit(sdp); 739 log_flush_commit(sdp);
657 else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){ 740 else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){
658 gfs2_log_lock(sdp); 741 gfs2_log_lock(sdp);
@@ -744,7 +827,6 @@ void gfs2_log_shutdown(struct gfs2_sbd *sdp)
744 gfs2_assert_withdraw(sdp, !sdp->sd_log_blks_reserved); 827 gfs2_assert_withdraw(sdp, !sdp->sd_log_blks_reserved);
745 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_gl); 828 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_gl);
746 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_buf); 829 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_buf);
747 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_jdata);
748 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); 830 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
749 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_rg); 831 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_rg);
750 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_databuf); 832 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_databuf);
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index 8e7aa0f29109..dae282400627 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -52,12 +52,14 @@ int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags);
52 52
53int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks); 53int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks);
54void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks); 54void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks);
55void gfs2_log_incr_head(struct gfs2_sbd *sdp);
55 56
56struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp); 57struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp);
57struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp, 58struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
58 struct buffer_head *real); 59 struct buffer_head *real);
59void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl); 60void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl);
60void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans); 61void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans);
62void gfs2_remove_from_ail(struct address_space *mapping, struct gfs2_bufdata *bd);
61 63
62void gfs2_log_shutdown(struct gfs2_sbd *sdp); 64void gfs2_log_shutdown(struct gfs2_sbd *sdp);
63void gfs2_meta_syncfs(struct gfs2_sbd *sdp); 65void gfs2_meta_syncfs(struct gfs2_sbd *sdp);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 3b395c41b2f3..6c27cea761c6 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -27,7 +27,104 @@
27#include "trans.h" 27#include "trans.h"
28#include "util.h" 28#include "util.h"
29 29
30static void glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) 30/**
31 * gfs2_pin - Pin a buffer in memory
32 * @sdp: The superblock
33 * @bh: The buffer to be pinned
34 *
35 * The log lock must be held when calling this function
36 */
37static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
38{
39 struct gfs2_bufdata *bd;
40
41 gfs2_assert_withdraw(sdp, test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags));
42
43 clear_buffer_dirty(bh);
44 if (test_set_buffer_pinned(bh))
45 gfs2_assert_withdraw(sdp, 0);
46 if (!buffer_uptodate(bh))
47 gfs2_io_error_bh(sdp, bh);
48 bd = bh->b_private;
49 /* If this buffer is in the AIL and it has already been written
50 * to in-place disk block, remove it from the AIL.
51 */
52 if (bd->bd_ail)
53 list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list);
54 get_bh(bh);
55}
56
57/**
58 * gfs2_unpin - Unpin a buffer
59 * @sdp: the filesystem the buffer belongs to
60 * @bh: The buffer to unpin
61 * @ai:
62 *
63 */
64
65static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
66 struct gfs2_ail *ai)
67{
68 struct gfs2_bufdata *bd = bh->b_private;
69
70 gfs2_assert_withdraw(sdp, buffer_uptodate(bh));
71
72 if (!buffer_pinned(bh))
73 gfs2_assert_withdraw(sdp, 0);
74
75 lock_buffer(bh);
76 mark_buffer_dirty(bh);
77 clear_buffer_pinned(bh);
78
79 gfs2_log_lock(sdp);
80 if (bd->bd_ail) {
81 list_del(&bd->bd_ail_st_list);
82 brelse(bh);
83 } else {
84 struct gfs2_glock *gl = bd->bd_gl;
85 list_add(&bd->bd_ail_gl_list, &gl->gl_ail_list);
86 atomic_inc(&gl->gl_ail_count);
87 }
88 bd->bd_ail = ai;
89 list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list);
90 gfs2_log_unlock(sdp);
91 unlock_buffer(bh);
92}
93
94
95static inline struct gfs2_log_descriptor *bh_log_desc(struct buffer_head *bh)
96{
97 return (struct gfs2_log_descriptor *)bh->b_data;
98}
99
100static inline __be64 *bh_log_ptr(struct buffer_head *bh)
101{
102 struct gfs2_log_descriptor *ld = bh_log_desc(bh);
103 return (__force __be64 *)(ld + 1);
104}
105
106static inline __be64 *bh_ptr_end(struct buffer_head *bh)
107{
108 return (__force __be64 *)(bh->b_data + bh->b_size);
109}
110
111
112static struct buffer_head *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type)
113{
114 struct buffer_head *bh = gfs2_log_get_buf(sdp);
115 struct gfs2_log_descriptor *ld = bh_log_desc(bh);
116 ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
117 ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD);
118 ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD);
119 ld->ld_type = cpu_to_be32(ld_type);
120 ld->ld_length = 0;
121 ld->ld_data1 = 0;
122 ld->ld_data2 = 0;
123 memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
124 return bh;
125}
126
127static void __glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
31{ 128{
32 struct gfs2_glock *gl; 129 struct gfs2_glock *gl;
33 struct gfs2_trans *tr = current->journal_info; 130 struct gfs2_trans *tr = current->journal_info;
@@ -38,15 +135,19 @@ static void glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
38 if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl))) 135 if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl)))
39 return; 136 return;
40 137
41 gfs2_log_lock(sdp); 138 if (!list_empty(&le->le_list))
42 if (!list_empty(&le->le_list)){
43 gfs2_log_unlock(sdp);
44 return; 139 return;
45 } 140
46 gfs2_glock_hold(gl); 141 gfs2_glock_hold(gl);
47 set_bit(GLF_DIRTY, &gl->gl_flags); 142 set_bit(GLF_DIRTY, &gl->gl_flags);
48 sdp->sd_log_num_gl++; 143 sdp->sd_log_num_gl++;
49 list_add(&le->le_list, &sdp->sd_log_le_gl); 144 list_add(&le->le_list, &sdp->sd_log_le_gl);
145}
146
147static void glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
148{
149 gfs2_log_lock(sdp);
150 __glock_lo_add(sdp, le);
50 gfs2_log_unlock(sdp); 151 gfs2_log_unlock(sdp);
51} 152}
52 153
@@ -71,30 +172,25 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
71 struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le); 172 struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
72 struct gfs2_trans *tr; 173 struct gfs2_trans *tr;
73 174
175 lock_buffer(bd->bd_bh);
74 gfs2_log_lock(sdp); 176 gfs2_log_lock(sdp);
75 if (!list_empty(&bd->bd_list_tr)) { 177 if (!list_empty(&bd->bd_list_tr))
76 gfs2_log_unlock(sdp); 178 goto out;
77 return;
78 }
79 tr = current->journal_info; 179 tr = current->journal_info;
80 tr->tr_touched = 1; 180 tr->tr_touched = 1;
81 tr->tr_num_buf++; 181 tr->tr_num_buf++;
82 list_add(&bd->bd_list_tr, &tr->tr_list_buf); 182 list_add(&bd->bd_list_tr, &tr->tr_list_buf);
83 gfs2_log_unlock(sdp);
84
85 if (!list_empty(&le->le_list)) 183 if (!list_empty(&le->le_list))
86 return; 184 goto out;
87 185 __glock_lo_add(sdp, &bd->bd_gl->gl_le);
88 gfs2_trans_add_gl(bd->bd_gl);
89
90 gfs2_meta_check(sdp, bd->bd_bh); 186 gfs2_meta_check(sdp, bd->bd_bh);
91 gfs2_pin(sdp, bd->bd_bh); 187 gfs2_pin(sdp, bd->bd_bh);
92 gfs2_log_lock(sdp);
93 sdp->sd_log_num_buf++; 188 sdp->sd_log_num_buf++;
94 list_add(&le->le_list, &sdp->sd_log_le_buf); 189 list_add(&le->le_list, &sdp->sd_log_le_buf);
95 gfs2_log_unlock(sdp);
96
97 tr->tr_num_buf_new++; 190 tr->tr_num_buf_new++;
191out:
192 gfs2_log_unlock(sdp);
193 unlock_buffer(bd->bd_bh);
98} 194}
99 195
100static void buf_lo_incore_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) 196static void buf_lo_incore_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
@@ -117,8 +213,7 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp)
117 struct buffer_head *bh; 213 struct buffer_head *bh;
118 struct gfs2_log_descriptor *ld; 214 struct gfs2_log_descriptor *ld;
119 struct gfs2_bufdata *bd1 = NULL, *bd2; 215 struct gfs2_bufdata *bd1 = NULL, *bd2;
120 unsigned int total = sdp->sd_log_num_buf; 216 unsigned int total;
121 unsigned int offset = BUF_OFFSET;
122 unsigned int limit; 217 unsigned int limit;
123 unsigned int num; 218 unsigned int num;
124 unsigned n; 219 unsigned n;
@@ -127,22 +222,20 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp)
127 limit = buf_limit(sdp); 222 limit = buf_limit(sdp);
128 /* for 4k blocks, limit = 503 */ 223 /* for 4k blocks, limit = 503 */
129 224
225 gfs2_log_lock(sdp);
226 total = sdp->sd_log_num_buf;
130 bd1 = bd2 = list_prepare_entry(bd1, &sdp->sd_log_le_buf, bd_le.le_list); 227 bd1 = bd2 = list_prepare_entry(bd1, &sdp->sd_log_le_buf, bd_le.le_list);
131 while(total) { 228 while(total) {
132 num = total; 229 num = total;
133 if (total > limit) 230 if (total > limit)
134 num = limit; 231 num = limit;
135 bh = gfs2_log_get_buf(sdp); 232 gfs2_log_unlock(sdp);
136 ld = (struct gfs2_log_descriptor *)bh->b_data; 233 bh = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_METADATA);
137 ptr = (__be64 *)(bh->b_data + offset); 234 gfs2_log_lock(sdp);
138 ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC); 235 ld = bh_log_desc(bh);
139 ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD); 236 ptr = bh_log_ptr(bh);
140 ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD);
141 ld->ld_type = cpu_to_be32(GFS2_LOG_DESC_METADATA);
142 ld->ld_length = cpu_to_be32(num + 1); 237 ld->ld_length = cpu_to_be32(num + 1);
143 ld->ld_data1 = cpu_to_be32(num); 238 ld->ld_data1 = cpu_to_be32(num);
144 ld->ld_data2 = cpu_to_be32(0);
145 memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
146 239
147 n = 0; 240 n = 0;
148 list_for_each_entry_continue(bd1, &sdp->sd_log_le_buf, 241 list_for_each_entry_continue(bd1, &sdp->sd_log_le_buf,
@@ -152,21 +245,27 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp)
152 break; 245 break;
153 } 246 }
154 247
155 set_buffer_dirty(bh); 248 gfs2_log_unlock(sdp);
156 ll_rw_block(WRITE, 1, &bh); 249 submit_bh(WRITE, bh);
250 gfs2_log_lock(sdp);
157 251
158 n = 0; 252 n = 0;
159 list_for_each_entry_continue(bd2, &sdp->sd_log_le_buf, 253 list_for_each_entry_continue(bd2, &sdp->sd_log_le_buf,
160 bd_le.le_list) { 254 bd_le.le_list) {
255 get_bh(bd2->bd_bh);
256 gfs2_log_unlock(sdp);
257 lock_buffer(bd2->bd_bh);
161 bh = gfs2_log_fake_buf(sdp, bd2->bd_bh); 258 bh = gfs2_log_fake_buf(sdp, bd2->bd_bh);
162 set_buffer_dirty(bh); 259 submit_bh(WRITE, bh);
163 ll_rw_block(WRITE, 1, &bh); 260 gfs2_log_lock(sdp);
164 if (++n >= num) 261 if (++n >= num)
165 break; 262 break;
166 } 263 }
167 264
265 BUG_ON(total < num);
168 total -= num; 266 total -= num;
169 } 267 }
268 gfs2_log_unlock(sdp);
170} 269}
171 270
172static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) 271static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
@@ -270,11 +369,8 @@ static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
270 tr = current->journal_info; 369 tr = current->journal_info;
271 tr->tr_touched = 1; 370 tr->tr_touched = 1;
272 tr->tr_num_revoke++; 371 tr->tr_num_revoke++;
273
274 gfs2_log_lock(sdp);
275 sdp->sd_log_num_revoke++; 372 sdp->sd_log_num_revoke++;
276 list_add(&le->le_list, &sdp->sd_log_le_revoke); 373 list_add(&le->le_list, &sdp->sd_log_le_revoke);
277 gfs2_log_unlock(sdp);
278} 374}
279 375
280static void revoke_lo_before_commit(struct gfs2_sbd *sdp) 376static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
@@ -284,32 +380,25 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
284 struct buffer_head *bh; 380 struct buffer_head *bh;
285 unsigned int offset; 381 unsigned int offset;
286 struct list_head *head = &sdp->sd_log_le_revoke; 382 struct list_head *head = &sdp->sd_log_le_revoke;
287 struct gfs2_revoke *rv; 383 struct gfs2_bufdata *bd;
288 384
289 if (!sdp->sd_log_num_revoke) 385 if (!sdp->sd_log_num_revoke)
290 return; 386 return;
291 387
292 bh = gfs2_log_get_buf(sdp); 388 bh = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_REVOKE);
293 ld = (struct gfs2_log_descriptor *)bh->b_data; 389 ld = bh_log_desc(bh);
294 ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
295 ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD);
296 ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD);
297 ld->ld_type = cpu_to_be32(GFS2_LOG_DESC_REVOKE);
298 ld->ld_length = cpu_to_be32(gfs2_struct2blk(sdp, sdp->sd_log_num_revoke, 390 ld->ld_length = cpu_to_be32(gfs2_struct2blk(sdp, sdp->sd_log_num_revoke,
299 sizeof(u64))); 391 sizeof(u64)));
300 ld->ld_data1 = cpu_to_be32(sdp->sd_log_num_revoke); 392 ld->ld_data1 = cpu_to_be32(sdp->sd_log_num_revoke);
301 ld->ld_data2 = cpu_to_be32(0);
302 memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
303 offset = sizeof(struct gfs2_log_descriptor); 393 offset = sizeof(struct gfs2_log_descriptor);
304 394
305 while (!list_empty(head)) { 395 while (!list_empty(head)) {
306 rv = list_entry(head->next, struct gfs2_revoke, rv_le.le_list); 396 bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
307 list_del_init(&rv->rv_le.le_list); 397 list_del_init(&bd->bd_le.le_list);
308 sdp->sd_log_num_revoke--; 398 sdp->sd_log_num_revoke--;
309 399
310 if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) { 400 if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) {
311 set_buffer_dirty(bh); 401 submit_bh(WRITE, bh);
312 ll_rw_block(WRITE, 1, &bh);
313 402
314 bh = gfs2_log_get_buf(sdp); 403 bh = gfs2_log_get_buf(sdp);
315 mh = (struct gfs2_meta_header *)bh->b_data; 404 mh = (struct gfs2_meta_header *)bh->b_data;
@@ -319,15 +408,14 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
319 offset = sizeof(struct gfs2_meta_header); 408 offset = sizeof(struct gfs2_meta_header);
320 } 409 }
321 410
322 *(__be64 *)(bh->b_data + offset) = cpu_to_be64(rv->rv_blkno); 411 *(__be64 *)(bh->b_data + offset) = cpu_to_be64(bd->bd_blkno);
323 kfree(rv); 412 kmem_cache_free(gfs2_bufdata_cachep, bd);
324 413
325 offset += sizeof(u64); 414 offset += sizeof(u64);
326 } 415 }
327 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); 416 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
328 417
329 set_buffer_dirty(bh); 418 submit_bh(WRITE, bh);
330 ll_rw_block(WRITE, 1, &bh);
331} 419}
332 420
333static void revoke_lo_before_scan(struct gfs2_jdesc *jd, 421static void revoke_lo_before_scan(struct gfs2_jdesc *jd,
@@ -466,222 +554,136 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
466 struct address_space *mapping = bd->bd_bh->b_page->mapping; 554 struct address_space *mapping = bd->bd_bh->b_page->mapping;
467 struct gfs2_inode *ip = GFS2_I(mapping->host); 555 struct gfs2_inode *ip = GFS2_I(mapping->host);
468 556
557 lock_buffer(bd->bd_bh);
469 gfs2_log_lock(sdp); 558 gfs2_log_lock(sdp);
470 if (!list_empty(&bd->bd_list_tr)) { 559 if (!list_empty(&bd->bd_list_tr))
471 gfs2_log_unlock(sdp); 560 goto out;
472 return;
473 }
474 tr->tr_touched = 1; 561 tr->tr_touched = 1;
475 if (gfs2_is_jdata(ip)) { 562 if (gfs2_is_jdata(ip)) {
476 tr->tr_num_buf++; 563 tr->tr_num_buf++;
477 list_add(&bd->bd_list_tr, &tr->tr_list_buf); 564 list_add(&bd->bd_list_tr, &tr->tr_list_buf);
478 } 565 }
479 gfs2_log_unlock(sdp);
480 if (!list_empty(&le->le_list)) 566 if (!list_empty(&le->le_list))
481 return; 567 goto out;
482 568
483 gfs2_trans_add_gl(bd->bd_gl); 569 __glock_lo_add(sdp, &bd->bd_gl->gl_le);
484 if (gfs2_is_jdata(ip)) { 570 if (gfs2_is_jdata(ip)) {
485 sdp->sd_log_num_jdata++;
486 gfs2_pin(sdp, bd->bd_bh); 571 gfs2_pin(sdp, bd->bd_bh);
487 tr->tr_num_databuf_new++; 572 tr->tr_num_databuf_new++;
573 sdp->sd_log_num_databuf++;
574 list_add(&le->le_list, &sdp->sd_log_le_databuf);
575 } else {
576 list_add(&le->le_list, &sdp->sd_log_le_ordered);
488 } 577 }
489 gfs2_log_lock(sdp); 578out:
490 sdp->sd_log_num_databuf++;
491 list_add(&le->le_list, &sdp->sd_log_le_databuf);
492 gfs2_log_unlock(sdp); 579 gfs2_log_unlock(sdp);
580 unlock_buffer(bd->bd_bh);
493} 581}
494 582
495static int gfs2_check_magic(struct buffer_head *bh) 583static void gfs2_check_magic(struct buffer_head *bh)
496{ 584{
497 struct page *page = bh->b_page;
498 void *kaddr; 585 void *kaddr;
499 __be32 *ptr; 586 __be32 *ptr;
500 int rv = 0;
501 587
502 kaddr = kmap_atomic(page, KM_USER0); 588 clear_buffer_escaped(bh);
589 kaddr = kmap_atomic(bh->b_page, KM_USER0);
503 ptr = kaddr + bh_offset(bh); 590 ptr = kaddr + bh_offset(bh);
504 if (*ptr == cpu_to_be32(GFS2_MAGIC)) 591 if (*ptr == cpu_to_be32(GFS2_MAGIC))
505 rv = 1; 592 set_buffer_escaped(bh);
506 kunmap_atomic(kaddr, KM_USER0); 593 kunmap_atomic(kaddr, KM_USER0);
507
508 return rv;
509} 594}
510 595
511/** 596static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh,
512 * databuf_lo_before_commit - Scan the data buffers, writing as we go 597 struct list_head *list, struct list_head *done,
513 * 598 unsigned int n)
514 * Here we scan through the lists of buffers and make the assumption
515 * that any buffer thats been pinned is being journaled, and that
516 * any unpinned buffer is an ordered write data buffer and therefore
517 * will be written back rather than journaled.
518 */
519static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
520{ 599{
521 LIST_HEAD(started); 600 struct buffer_head *bh1;
522 struct gfs2_bufdata *bd1 = NULL, *bd2, *bdt;
523 struct buffer_head *bh = NULL,*bh1 = NULL;
524 struct gfs2_log_descriptor *ld; 601 struct gfs2_log_descriptor *ld;
525 unsigned int limit; 602 struct gfs2_bufdata *bd;
526 unsigned int total_dbuf; 603 __be64 *ptr;
527 unsigned int total_jdata = sdp->sd_log_num_jdata; 604
528 unsigned int num, n; 605 if (!bh)
529 __be64 *ptr = NULL; 606 return;
530 607
531 limit = databuf_limit(sdp); 608 ld = bh_log_desc(bh);
609 ld->ld_length = cpu_to_be32(n + 1);
610 ld->ld_data1 = cpu_to_be32(n);
532 611
533 /* 612 ptr = bh_log_ptr(bh);
534 * Start writing ordered buffers, write journaled buffers 613
535 * into the log along with a header 614 get_bh(bh);
536 */ 615 submit_bh(WRITE, bh);
537 gfs2_log_lock(sdp); 616 gfs2_log_lock(sdp);
538 total_dbuf = sdp->sd_log_num_databuf; 617 while(!list_empty(list)) {
539 bd2 = bd1 = list_prepare_entry(bd1, &sdp->sd_log_le_databuf, 618 bd = list_entry(list->next, struct gfs2_bufdata, bd_le.le_list);
540 bd_le.le_list); 619 list_move_tail(&bd->bd_le.le_list, done);
541 while(total_dbuf) { 620 get_bh(bd->bd_bh);
542 num = total_jdata; 621 while (be64_to_cpu(*ptr) != bd->bd_bh->b_blocknr) {
543 if (num > limit) 622 gfs2_log_incr_head(sdp);
544 num = limit; 623 ptr += 2;
545 n = 0;
546 list_for_each_entry_safe_continue(bd1, bdt,
547 &sdp->sd_log_le_databuf,
548 bd_le.le_list) {
549 /* store off the buffer head in a local ptr since
550 * gfs2_bufdata might change when we drop the log lock
551 */
552 bh1 = bd1->bd_bh;
553
554 /* An ordered write buffer */
555 if (bh1 && !buffer_pinned(bh1)) {
556 list_move(&bd1->bd_le.le_list, &started);
557 if (bd1 == bd2) {
558 bd2 = NULL;
559 bd2 = list_prepare_entry(bd2,
560 &sdp->sd_log_le_databuf,
561 bd_le.le_list);
562 }
563 total_dbuf--;
564 if (bh1) {
565 if (buffer_dirty(bh1)) {
566 get_bh(bh1);
567
568 gfs2_log_unlock(sdp);
569
570 ll_rw_block(SWRITE, 1, &bh1);
571 brelse(bh1);
572
573 gfs2_log_lock(sdp);
574 }
575 continue;
576 }
577 continue;
578 } else if (bh1) { /* A journaled buffer */
579 int magic;
580 gfs2_log_unlock(sdp);
581 if (!bh) {
582 bh = gfs2_log_get_buf(sdp);
583 ld = (struct gfs2_log_descriptor *)
584 bh->b_data;
585 ptr = (__be64 *)(bh->b_data +
586 DATABUF_OFFSET);
587 ld->ld_header.mh_magic =
588 cpu_to_be32(GFS2_MAGIC);
589 ld->ld_header.mh_type =
590 cpu_to_be32(GFS2_METATYPE_LD);
591 ld->ld_header.mh_format =
592 cpu_to_be32(GFS2_FORMAT_LD);
593 ld->ld_type =
594 cpu_to_be32(GFS2_LOG_DESC_JDATA);
595 ld->ld_length = cpu_to_be32(num + 1);
596 ld->ld_data1 = cpu_to_be32(num);
597 ld->ld_data2 = cpu_to_be32(0);
598 memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
599 }
600 magic = gfs2_check_magic(bh1);
601 *ptr++ = cpu_to_be64(bh1->b_blocknr);
602 *ptr++ = cpu_to_be64((__u64)magic);
603 clear_buffer_escaped(bh1);
604 if (unlikely(magic != 0))
605 set_buffer_escaped(bh1);
606 gfs2_log_lock(sdp);
607 if (++n >= num)
608 break;
609 } else if (!bh1) {
610 total_dbuf--;
611 sdp->sd_log_num_databuf--;
612 list_del_init(&bd1->bd_le.le_list);
613 if (bd1 == bd2) {
614 bd2 = NULL;
615 bd2 = list_prepare_entry(bd2,
616 &sdp->sd_log_le_databuf,
617 bd_le.le_list);
618 }
619 kmem_cache_free(gfs2_bufdata_cachep, bd1);
620 }
621 } 624 }
622 gfs2_log_unlock(sdp); 625 gfs2_log_unlock(sdp);
623 if (bh) { 626 lock_buffer(bd->bd_bh);
624 set_buffer_mapped(bh); 627 if (buffer_escaped(bd->bd_bh)) {
625 set_buffer_dirty(bh); 628 void *kaddr;
626 ll_rw_block(WRITE, 1, &bh); 629 bh1 = gfs2_log_get_buf(sdp);
627 bh = NULL; 630 kaddr = kmap_atomic(bd->bd_bh->b_page, KM_USER0);
631 memcpy(bh1->b_data, kaddr + bh_offset(bd->bd_bh),
632 bh1->b_size);
633 kunmap_atomic(kaddr, KM_USER0);
634 *(__be32 *)bh1->b_data = 0;
635 clear_buffer_escaped(bd->bd_bh);
636 unlock_buffer(bd->bd_bh);
637 brelse(bd->bd_bh);
638 } else {
639 bh1 = gfs2_log_fake_buf(sdp, bd->bd_bh);
628 } 640 }
629 n = 0; 641 submit_bh(WRITE, bh1);
630 gfs2_log_lock(sdp); 642 gfs2_log_lock(sdp);
631 list_for_each_entry_continue(bd2, &sdp->sd_log_le_databuf, 643 ptr += 2;
632 bd_le.le_list) {
633 if (!bd2->bd_bh)
634 continue;
635 /* copy buffer if it needs escaping */
636 gfs2_log_unlock(sdp);
637 if (unlikely(buffer_escaped(bd2->bd_bh))) {
638 void *kaddr;
639 struct page *page = bd2->bd_bh->b_page;
640 bh = gfs2_log_get_buf(sdp);
641 kaddr = kmap_atomic(page, KM_USER0);
642 memcpy(bh->b_data,
643 kaddr + bh_offset(bd2->bd_bh),
644 sdp->sd_sb.sb_bsize);
645 kunmap_atomic(kaddr, KM_USER0);
646 *(__be32 *)bh->b_data = 0;
647 } else {
648 bh = gfs2_log_fake_buf(sdp, bd2->bd_bh);
649 }
650 set_buffer_dirty(bh);
651 ll_rw_block(WRITE, 1, &bh);
652 gfs2_log_lock(sdp);
653 if (++n >= num)
654 break;
655 }
656 bh = NULL;
657 BUG_ON(total_dbuf < num);
658 total_dbuf -= num;
659 total_jdata -= num;
660 } 644 }
661 gfs2_log_unlock(sdp); 645 gfs2_log_unlock(sdp);
646 brelse(bh);
647}
662 648
663 /* Wait on all ordered buffers */ 649/**
664 while (!list_empty(&started)) { 650 * databuf_lo_before_commit - Scan the data buffers, writing as we go
665 gfs2_log_lock(sdp); 651 *
666 bd1 = list_entry(started.next, struct gfs2_bufdata, 652 */
667 bd_le.le_list);
668 list_del_init(&bd1->bd_le.le_list);
669 sdp->sd_log_num_databuf--;
670 bh = bd1->bd_bh;
671 if (bh) {
672 bh->b_private = NULL;
673 get_bh(bh);
674 gfs2_log_unlock(sdp);
675 wait_on_buffer(bh);
676 brelse(bh);
677 } else
678 gfs2_log_unlock(sdp);
679 653
680 kmem_cache_free(gfs2_bufdata_cachep, bd1); 654static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
681 } 655{
656 struct gfs2_bufdata *bd = NULL;
657 struct buffer_head *bh = NULL;
658 unsigned int n = 0;
659 __be64 *ptr = NULL, *end = NULL;
660 LIST_HEAD(processed);
661 LIST_HEAD(in_progress);
682 662
683 /* We've removed all the ordered write bufs here, so only jdata left */ 663 gfs2_log_lock(sdp);
684 gfs2_assert_warn(sdp, sdp->sd_log_num_databuf == sdp->sd_log_num_jdata); 664 while (!list_empty(&sdp->sd_log_le_databuf)) {
665 if (ptr == end) {
666 gfs2_log_unlock(sdp);
667 gfs2_write_blocks(sdp, bh, &in_progress, &processed, n);
668 n = 0;
669 bh = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_JDATA);
670 ptr = bh_log_ptr(bh);
671 end = bh_ptr_end(bh) - 1;
672 gfs2_log_lock(sdp);
673 continue;
674 }
675 bd = list_entry(sdp->sd_log_le_databuf.next, struct gfs2_bufdata, bd_le.le_list);
676 list_move_tail(&bd->bd_le.le_list, &in_progress);
677 gfs2_check_magic(bd->bd_bh);
678 *ptr++ = cpu_to_be64(bd->bd_bh->b_blocknr);
679 *ptr++ = cpu_to_be64(buffer_escaped(bh) ? 1 : 0);
680 n++;
681 }
682 gfs2_log_unlock(sdp);
683 gfs2_write_blocks(sdp, bh, &in_progress, &processed, n);
684 gfs2_log_lock(sdp);
685 list_splice(&processed, &sdp->sd_log_le_databuf);
686 gfs2_log_unlock(sdp);
685} 687}
686 688
687static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, 689static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
@@ -765,11 +767,9 @@ static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
765 bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list); 767 bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
766 list_del_init(&bd->bd_le.le_list); 768 list_del_init(&bd->bd_le.le_list);
767 sdp->sd_log_num_databuf--; 769 sdp->sd_log_num_databuf--;
768 sdp->sd_log_num_jdata--;
769 gfs2_unpin(sdp, bd->bd_bh, ai); 770 gfs2_unpin(sdp, bd->bd_bh, ai);
770 } 771 }
771 gfs2_assert_warn(sdp, !sdp->sd_log_num_databuf); 772 gfs2_assert_warn(sdp, !sdp->sd_log_num_databuf);
772 gfs2_assert_warn(sdp, !sdp->sd_log_num_jdata);
773} 773}
774 774
775 775
@@ -817,10 +817,10 @@ const struct gfs2_log_operations gfs2_databuf_lops = {
817 817
818const struct gfs2_log_operations *gfs2_log_ops[] = { 818const struct gfs2_log_operations *gfs2_log_ops[] = {
819 &gfs2_glock_lops, 819 &gfs2_glock_lops,
820 &gfs2_databuf_lops,
820 &gfs2_buf_lops, 821 &gfs2_buf_lops,
821 &gfs2_revoke_lops,
822 &gfs2_rg_lops, 822 &gfs2_rg_lops,
823 &gfs2_databuf_lops, 823 &gfs2_revoke_lops,
824 NULL, 824 NULL,
825}; 825};
826 826
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index d5d4e68b8807..79c91fd8381b 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -107,6 +107,8 @@ static int __init init_gfs2_fs(void)
107fail_unregister: 107fail_unregister:
108 unregister_filesystem(&gfs2_fs_type); 108 unregister_filesystem(&gfs2_fs_type);
109fail: 109fail:
110 gfs2_glock_exit();
111
110 if (gfs2_bufdata_cachep) 112 if (gfs2_bufdata_cachep)
111 kmem_cache_destroy(gfs2_bufdata_cachep); 113 kmem_cache_destroy(gfs2_bufdata_cachep);
112 114
@@ -127,6 +129,7 @@ fail:
127 129
128static void __exit exit_gfs2_fs(void) 130static void __exit exit_gfs2_fs(void)
129{ 131{
132 gfs2_glock_exit();
130 gfs2_unregister_debugfs(); 133 gfs2_unregister_debugfs();
131 unregister_filesystem(&gfs2_fs_type); 134 unregister_filesystem(&gfs2_fs_type);
132 unregister_filesystem(&gfs2meta_fs_type); 135 unregister_filesystem(&gfs2meta_fs_type);
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 8da343b34ae7..4da423985e4f 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -297,74 +297,35 @@ void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh,
297 unlock_page(bh->b_page); 297 unlock_page(bh->b_page);
298} 298}
299 299
300/** 300void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int meta)
301 * gfs2_pin - Pin a buffer in memory
302 * @sdp: the filesystem the buffer belongs to
303 * @bh: The buffer to be pinned
304 *
305 */
306
307void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
308{ 301{
302 struct gfs2_sbd *sdp = GFS2_SB(bh->b_page->mapping->host);
309 struct gfs2_bufdata *bd = bh->b_private; 303 struct gfs2_bufdata *bd = bh->b_private;
310 304 if (test_clear_buffer_pinned(bh)) {
311 gfs2_assert_withdraw(sdp, test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)); 305 list_del_init(&bd->bd_le.le_list);
312 306 if (meta) {
313 if (test_set_buffer_pinned(bh)) 307 gfs2_assert_warn(sdp, sdp->sd_log_num_buf);
314 gfs2_assert_withdraw(sdp, 0); 308 sdp->sd_log_num_buf--;
315 309 tr->tr_num_buf_rm++;
316 wait_on_buffer(bh); 310 } else {
317 311 gfs2_assert_warn(sdp, sdp->sd_log_num_databuf);
318 /* If this buffer is in the AIL and it has already been written 312 sdp->sd_log_num_databuf--;
319 to in-place disk block, remove it from the AIL. */ 313 tr->tr_num_databuf_rm++;
320 314 }
321 gfs2_log_lock(sdp); 315 tr->tr_touched = 1;
322 if (bd->bd_ail && !buffer_in_io(bh))
323 list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list);
324 gfs2_log_unlock(sdp);
325
326 clear_buffer_dirty(bh);
327 wait_on_buffer(bh);
328
329 if (!buffer_uptodate(bh))
330 gfs2_io_error_bh(sdp, bh);
331
332 get_bh(bh);
333}
334
335/**
336 * gfs2_unpin - Unpin a buffer
337 * @sdp: the filesystem the buffer belongs to
338 * @bh: The buffer to unpin
339 * @ai:
340 *
341 */
342
343void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
344 struct gfs2_ail *ai)
345{
346 struct gfs2_bufdata *bd = bh->b_private;
347
348 gfs2_assert_withdraw(sdp, buffer_uptodate(bh));
349
350 if (!buffer_pinned(bh))
351 gfs2_assert_withdraw(sdp, 0);
352
353 mark_buffer_dirty(bh);
354 clear_buffer_pinned(bh);
355
356 gfs2_log_lock(sdp);
357 if (bd->bd_ail) {
358 list_del(&bd->bd_ail_st_list);
359 brelse(bh); 316 brelse(bh);
360 } else {
361 struct gfs2_glock *gl = bd->bd_gl;
362 list_add(&bd->bd_ail_gl_list, &gl->gl_ail_list);
363 atomic_inc(&gl->gl_ail_count);
364 } 317 }
365 bd->bd_ail = ai; 318 if (bd) {
366 list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list); 319 if (bd->bd_ail) {
367 gfs2_log_unlock(sdp); 320 gfs2_remove_from_ail(NULL, bd);
321 bh->b_private = NULL;
322 bd->bd_bh = NULL;
323 bd->bd_blkno = bh->b_blocknr;
324 gfs2_trans_add_revoke(sdp, bd);
325 }
326 }
327 clear_buffer_dirty(bh);
328 clear_buffer_uptodate(bh);
368} 329}
369 330
370/** 331/**
@@ -383,44 +344,11 @@ void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen)
383 while (blen) { 344 while (blen) {
384 bh = getbuf(ip->i_gl, bstart, NO_CREATE); 345 bh = getbuf(ip->i_gl, bstart, NO_CREATE);
385 if (bh) { 346 if (bh) {
386 struct gfs2_bufdata *bd = bh->b_private;
387
388 if (test_clear_buffer_pinned(bh)) {
389 struct gfs2_trans *tr = current->journal_info;
390 struct gfs2_inode *bh_ip =
391 GFS2_I(bh->b_page->mapping->host);
392
393 gfs2_log_lock(sdp);
394 list_del_init(&bd->bd_le.le_list);
395 gfs2_assert_warn(sdp, sdp->sd_log_num_buf);
396 sdp->sd_log_num_buf--;
397 gfs2_log_unlock(sdp);
398 if (bh_ip->i_inode.i_private != NULL)
399 tr->tr_num_databuf_rm++;
400 else
401 tr->tr_num_buf_rm++;
402 brelse(bh);
403 }
404 if (bd) {
405 gfs2_log_lock(sdp);
406 if (bd->bd_ail) {
407 u64 blkno = bh->b_blocknr;
408 bd->bd_ail = NULL;
409 list_del(&bd->bd_ail_st_list);
410 list_del(&bd->bd_ail_gl_list);
411 atomic_dec(&bd->bd_gl->gl_ail_count);
412 brelse(bh);
413 gfs2_log_unlock(sdp);
414 gfs2_trans_add_revoke(sdp, blkno);
415 } else
416 gfs2_log_unlock(sdp);
417 }
418
419 lock_buffer(bh); 347 lock_buffer(bh);
420 clear_buffer_dirty(bh); 348 gfs2_log_lock(sdp);
421 clear_buffer_uptodate(bh); 349 gfs2_remove_from_journal(bh, current->journal_info, 1);
350 gfs2_log_unlock(sdp);
422 unlock_buffer(bh); 351 unlock_buffer(bh);
423
424 brelse(bh); 352 brelse(bh);
425 } 353 }
426 354
@@ -446,10 +374,10 @@ void gfs2_meta_cache_flush(struct gfs2_inode *ip)
446 374
447 for (x = 0; x < GFS2_MAX_META_HEIGHT; x++) { 375 for (x = 0; x < GFS2_MAX_META_HEIGHT; x++) {
448 bh_slot = &ip->i_cache[x]; 376 bh_slot = &ip->i_cache[x];
449 if (!*bh_slot) 377 if (*bh_slot) {
450 break; 378 brelse(*bh_slot);
451 brelse(*bh_slot); 379 *bh_slot = NULL;
452 *bh_slot = NULL; 380 }
453 } 381 }
454 382
455 spin_unlock(&ip->i_spin); 383 spin_unlock(&ip->i_spin);
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h
index 527bf19d9690..b7048222ebb4 100644
--- a/fs/gfs2/meta_io.h
+++ b/fs/gfs2/meta_io.h
@@ -50,9 +50,9 @@ int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh);
50 50
51void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh, 51void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh,
52 int meta); 52 int meta);
53void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh); 53
54void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh, 54void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr,
55 struct gfs2_ail *ai); 55 int meta);
56 56
57void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen); 57void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen);
58 58
diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c
index 4864659555d4..b941f9f9f958 100644
--- a/fs/gfs2/mount.c
+++ b/fs/gfs2/mount.c
@@ -42,6 +42,7 @@ enum {
42 Opt_nosuiddir, 42 Opt_nosuiddir,
43 Opt_data_writeback, 43 Opt_data_writeback,
44 Opt_data_ordered, 44 Opt_data_ordered,
45 Opt_err,
45}; 46};
46 47
47static match_table_t tokens = { 48static match_table_t tokens = {
@@ -64,7 +65,8 @@ static match_table_t tokens = {
64 {Opt_suiddir, "suiddir"}, 65 {Opt_suiddir, "suiddir"},
65 {Opt_nosuiddir, "nosuiddir"}, 66 {Opt_nosuiddir, "nosuiddir"},
66 {Opt_data_writeback, "data=writeback"}, 67 {Opt_data_writeback, "data=writeback"},
67 {Opt_data_ordered, "data=ordered"} 68 {Opt_data_ordered, "data=ordered"},
69 {Opt_err, NULL}
68}; 70};
69 71
70/** 72/**
@@ -237,6 +239,7 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount)
237 case Opt_data_ordered: 239 case Opt_data_ordered:
238 args->ar_data = GFS2_DATA_ORDERED; 240 args->ar_data = GFS2_DATA_ORDERED;
239 break; 241 break;
242 case Opt_err:
240 default: 243 default:
241 fs_info(sdp, "unknown option: %s\n", o); 244 fs_info(sdp, "unknown option: %s\n", o);
242 error = -EINVAL; 245 error = -EINVAL;
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 42a5f58f6fca..873a511ef2be 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -90,7 +90,7 @@ static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock,
90 error = gfs2_block_map(inode, lblock, 0, bh_result); 90 error = gfs2_block_map(inode, lblock, 0, bh_result);
91 if (error) 91 if (error)
92 return error; 92 return error;
93 if (bh_result->b_blocknr == 0) 93 if (!buffer_mapped(bh_result))
94 return -EIO; 94 return -EIO;
95 return 0; 95 return 0;
96} 96}
@@ -414,7 +414,8 @@ static int gfs2_prepare_write(struct file *file, struct page *page,
414 if (ind_blocks || data_blocks) 414 if (ind_blocks || data_blocks)
415 rblocks += RES_STATFS + RES_QUOTA; 415 rblocks += RES_STATFS + RES_QUOTA;
416 416
417 error = gfs2_trans_begin(sdp, rblocks, 0); 417 error = gfs2_trans_begin(sdp, rblocks,
418 PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize);
418 if (error) 419 if (error)
419 goto out_trans_fail; 420 goto out_trans_fail;
420 421
@@ -616,58 +617,50 @@ static sector_t gfs2_bmap(struct address_space *mapping, sector_t lblock)
616 return dblock; 617 return dblock;
617} 618}
618 619
619static void discard_buffer(struct gfs2_sbd *sdp, struct buffer_head *bh) 620static void gfs2_discard(struct gfs2_sbd *sdp, struct buffer_head *bh)
620{ 621{
621 struct gfs2_bufdata *bd; 622 struct gfs2_bufdata *bd;
622 623
624 lock_buffer(bh);
623 gfs2_log_lock(sdp); 625 gfs2_log_lock(sdp);
626 clear_buffer_dirty(bh);
624 bd = bh->b_private; 627 bd = bh->b_private;
625 if (bd) { 628 if (bd) {
626 bd->bd_bh = NULL; 629 if (!list_empty(&bd->bd_le.le_list) && !buffer_pinned(bh))
627 bh->b_private = NULL; 630 list_del_init(&bd->bd_le.le_list);
628 if (!bd->bd_ail && list_empty(&bd->bd_le.le_list)) 631 else
629 kmem_cache_free(gfs2_bufdata_cachep, bd); 632 gfs2_remove_from_journal(bh, current->journal_info, 0);
630 } 633 }
631 gfs2_log_unlock(sdp);
632
633 lock_buffer(bh);
634 clear_buffer_dirty(bh);
635 bh->b_bdev = NULL; 634 bh->b_bdev = NULL;
636 clear_buffer_mapped(bh); 635 clear_buffer_mapped(bh);
637 clear_buffer_req(bh); 636 clear_buffer_req(bh);
638 clear_buffer_new(bh); 637 clear_buffer_new(bh);
639 clear_buffer_delay(bh); 638 gfs2_log_unlock(sdp);
640 unlock_buffer(bh); 639 unlock_buffer(bh);
641} 640}
642 641
643static void gfs2_invalidatepage(struct page *page, unsigned long offset) 642static void gfs2_invalidatepage(struct page *page, unsigned long offset)
644{ 643{
645 struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host); 644 struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
646 struct buffer_head *head, *bh, *next; 645 struct buffer_head *bh, *head;
647 unsigned int curr_off = 0; 646 unsigned long pos = 0;
648 647
649 BUG_ON(!PageLocked(page)); 648 BUG_ON(!PageLocked(page));
650 if (offset == 0) 649 if (offset == 0)
651 ClearPageChecked(page); 650 ClearPageChecked(page);
652 if (!page_has_buffers(page)) 651 if (!page_has_buffers(page))
653 return; 652 goto out;
654 653
655 bh = head = page_buffers(page); 654 bh = head = page_buffers(page);
656 do { 655 do {
657 unsigned int next_off = curr_off + bh->b_size; 656 if (offset <= pos)
658 next = bh->b_this_page; 657 gfs2_discard(sdp, bh);
659 658 pos += bh->b_size;
660 if (offset <= curr_off) 659 bh = bh->b_this_page;
661 discard_buffer(sdp, bh);
662
663 curr_off = next_off;
664 bh = next;
665 } while (bh != head); 660 } while (bh != head);
666 661out:
667 if (!offset) 662 if (offset == 0)
668 try_to_release_page(page, 0); 663 try_to_release_page(page, 0);
669
670 return;
671} 664}
672 665
673/** 666/**
@@ -736,59 +729,6 @@ out:
736} 729}
737 730
738/** 731/**
739 * stuck_releasepage - We're stuck in gfs2_releasepage(). Print stuff out.
740 * @bh: the buffer we're stuck on
741 *
742 */
743
744static void stuck_releasepage(struct buffer_head *bh)
745{
746 struct inode *inode = bh->b_page->mapping->host;
747 struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
748 struct gfs2_bufdata *bd = bh->b_private;
749 struct gfs2_glock *gl;
750static unsigned limit = 0;
751
752 if (limit > 3)
753 return;
754 limit++;
755
756 fs_warn(sdp, "stuck in gfs2_releasepage() %p\n", inode);
757 fs_warn(sdp, "blkno = %llu, bh->b_count = %d\n",
758 (unsigned long long)bh->b_blocknr, atomic_read(&bh->b_count));
759 fs_warn(sdp, "pinned = %u\n", buffer_pinned(bh));
760 fs_warn(sdp, "bh->b_private = %s\n", (bd) ? "!NULL" : "NULL");
761
762 if (!bd)
763 return;
764
765 gl = bd->bd_gl;
766
767 fs_warn(sdp, "gl = (%u, %llu)\n",
768 gl->gl_name.ln_type, (unsigned long long)gl->gl_name.ln_number);
769
770 fs_warn(sdp, "bd_list_tr = %s, bd_le.le_list = %s\n",
771 (list_empty(&bd->bd_list_tr)) ? "no" : "yes",
772 (list_empty(&bd->bd_le.le_list)) ? "no" : "yes");
773
774 if (gl->gl_ops == &gfs2_inode_glops) {
775 struct gfs2_inode *ip = gl->gl_object;
776 unsigned int x;
777
778 if (!ip)
779 return;
780
781 fs_warn(sdp, "ip = %llu %llu\n",
782 (unsigned long long)ip->i_no_formal_ino,
783 (unsigned long long)ip->i_no_addr);
784
785 for (x = 0; x < GFS2_MAX_META_HEIGHT; x++)
786 fs_warn(sdp, "ip->i_cache[%u] = %s\n",
787 x, (ip->i_cache[x]) ? "!NULL" : "NULL");
788 }
789}
790
791/**
792 * gfs2_releasepage - free the metadata associated with a page 732 * gfs2_releasepage - free the metadata associated with a page
793 * @page: the page that's being released 733 * @page: the page that's being released
794 * @gfp_mask: passed from Linux VFS, ignored by us 734 * @gfp_mask: passed from Linux VFS, ignored by us
@@ -805,41 +745,39 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
805 struct gfs2_sbd *sdp = aspace->i_sb->s_fs_info; 745 struct gfs2_sbd *sdp = aspace->i_sb->s_fs_info;
806 struct buffer_head *bh, *head; 746 struct buffer_head *bh, *head;
807 struct gfs2_bufdata *bd; 747 struct gfs2_bufdata *bd;
808 unsigned long t = jiffies + gfs2_tune_get(sdp, gt_stall_secs) * HZ;
809 748
810 if (!page_has_buffers(page)) 749 if (!page_has_buffers(page))
811 goto out; 750 return 0;
812 751
752 gfs2_log_lock(sdp);
813 head = bh = page_buffers(page); 753 head = bh = page_buffers(page);
814 do { 754 do {
815 while (atomic_read(&bh->b_count)) { 755 if (atomic_read(&bh->b_count))
816 if (!atomic_read(&aspace->i_writecount)) 756 goto cannot_release;
817 return 0; 757 bd = bh->b_private;
818 758 if (bd && bd->bd_ail)
819 if (!(gfp_mask & __GFP_WAIT)) 759 goto cannot_release;
820 return 0;
821
822 if (time_after_eq(jiffies, t)) {
823 stuck_releasepage(bh);
824 /* should we withdraw here? */
825 return 0;
826 }
827
828 yield();
829 }
830
831 gfs2_assert_warn(sdp, !buffer_pinned(bh)); 760 gfs2_assert_warn(sdp, !buffer_pinned(bh));
832 gfs2_assert_warn(sdp, !buffer_dirty(bh)); 761 gfs2_assert_warn(sdp, !buffer_dirty(bh));
762 bh = bh->b_this_page;
763 } while(bh != head);
764 gfs2_log_unlock(sdp);
833 765
766 head = bh = page_buffers(page);
767 do {
834 gfs2_log_lock(sdp); 768 gfs2_log_lock(sdp);
835 bd = bh->b_private; 769 bd = bh->b_private;
836 if (bd) { 770 if (bd) {
837 gfs2_assert_warn(sdp, bd->bd_bh == bh); 771 gfs2_assert_warn(sdp, bd->bd_bh == bh);
838 gfs2_assert_warn(sdp, list_empty(&bd->bd_list_tr)); 772 gfs2_assert_warn(sdp, list_empty(&bd->bd_list_tr));
839 gfs2_assert_warn(sdp, !bd->bd_ail); 773 if (!list_empty(&bd->bd_le.le_list)) {
840 bd->bd_bh = NULL; 774 if (!buffer_pinned(bh))
841 if (!list_empty(&bd->bd_le.le_list)) 775 list_del_init(&bd->bd_le.le_list);
842 bd = NULL; 776 else
777 bd = NULL;
778 }
779 if (bd)
780 bd->bd_bh = NULL;
843 bh->b_private = NULL; 781 bh->b_private = NULL;
844 } 782 }
845 gfs2_log_unlock(sdp); 783 gfs2_log_unlock(sdp);
@@ -849,8 +787,10 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
849 bh = bh->b_this_page; 787 bh = bh->b_this_page;
850 } while (bh != head); 788 } while (bh != head);
851 789
852out:
853 return try_to_free_buffers(page); 790 return try_to_free_buffers(page);
791cannot_release:
792 gfs2_log_unlock(sdp);
793 return 0;
854} 794}
855 795
856const struct address_space_operations gfs2_file_aops = { 796const struct address_space_operations gfs2_file_aops = {
diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c
index b8312edee0e4..e2d1347796a9 100644
--- a/fs/gfs2/ops_export.c
+++ b/fs/gfs2/ops_export.c
@@ -237,7 +237,7 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj)
237 237
238 inode = gfs2_inode_lookup(sb, DT_UNKNOWN, 238 inode = gfs2_inode_lookup(sb, DT_UNKNOWN,
239 inum->no_addr, 239 inum->no_addr,
240 0); 240 0, 0);
241 if (!inode) 241 if (!inode)
242 goto fail; 242 goto fail;
243 if (IS_ERR(inode)) { 243 if (IS_ERR(inode)) {
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index 28773cab4a3d..7eb4b280ac66 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -571,7 +571,8 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
571 int error = 0; 571 int error = 0;
572 572
573 state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED; 573 state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED;
574 flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE; 574 flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE
575 | GL_FLOCK;
575 576
576 mutex_lock(&fp->f_fl_mutex); 577 mutex_lock(&fp->f_fl_mutex);
577 578
@@ -579,21 +580,19 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
579 if (gl) { 580 if (gl) {
580 if (fl_gh->gh_state == state) 581 if (fl_gh->gh_state == state)
581 goto out; 582 goto out;
582 gfs2_glock_hold(gl);
583 flock_lock_file_wait(file, 583 flock_lock_file_wait(file,
584 &(struct file_lock){.fl_type = F_UNLCK}); 584 &(struct file_lock){.fl_type = F_UNLCK});
585 gfs2_glock_dq_uninit(fl_gh); 585 gfs2_glock_dq_wait(fl_gh);
586 gfs2_holder_reinit(state, flags, fl_gh);
586 } else { 587 } else {
587 error = gfs2_glock_get(GFS2_SB(&ip->i_inode), 588 error = gfs2_glock_get(GFS2_SB(&ip->i_inode),
588 ip->i_no_addr, &gfs2_flock_glops, 589 ip->i_no_addr, &gfs2_flock_glops,
589 CREATE, &gl); 590 CREATE, &gl);
590 if (error) 591 if (error)
591 goto out; 592 goto out;
593 gfs2_holder_init(gl, state, flags, fl_gh);
594 gfs2_glock_put(gl);
592 } 595 }
593
594 gfs2_holder_init(gl, state, flags, fl_gh);
595 gfs2_glock_put(gl);
596
597 error = gfs2_glock_nq(fl_gh); 596 error = gfs2_glock_nq(fl_gh);
598 if (error) { 597 if (error) {
599 gfs2_holder_uninit(fl_gh); 598 gfs2_holder_uninit(fl_gh);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index cf5aa5050548..17de58e83d92 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -28,18 +28,18 @@
28#include "lm.h" 28#include "lm.h"
29#include "mount.h" 29#include "mount.h"
30#include "ops_fstype.h" 30#include "ops_fstype.h"
31#include "ops_dentry.h"
31#include "ops_super.h" 32#include "ops_super.h"
32#include "recovery.h" 33#include "recovery.h"
33#include "rgrp.h" 34#include "rgrp.h"
34#include "super.h" 35#include "super.h"
35#include "sys.h" 36#include "sys.h"
36#include "util.h" 37#include "util.h"
38#include "log.h"
37 39
38#define DO 0 40#define DO 0
39#define UNDO 1 41#define UNDO 1
40 42
41extern struct dentry_operations gfs2_dops;
42
43static struct gfs2_sbd *init_sbd(struct super_block *sb) 43static struct gfs2_sbd *init_sbd(struct super_block *sb)
44{ 44{
45 struct gfs2_sbd *sdp; 45 struct gfs2_sbd *sdp;
@@ -82,13 +82,15 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
82 INIT_LIST_HEAD(&sdp->sd_log_le_revoke); 82 INIT_LIST_HEAD(&sdp->sd_log_le_revoke);
83 INIT_LIST_HEAD(&sdp->sd_log_le_rg); 83 INIT_LIST_HEAD(&sdp->sd_log_le_rg);
84 INIT_LIST_HEAD(&sdp->sd_log_le_databuf); 84 INIT_LIST_HEAD(&sdp->sd_log_le_databuf);
85 INIT_LIST_HEAD(&sdp->sd_log_le_ordered);
85 86
86 mutex_init(&sdp->sd_log_reserve_mutex); 87 mutex_init(&sdp->sd_log_reserve_mutex);
87 INIT_LIST_HEAD(&sdp->sd_ail1_list); 88 INIT_LIST_HEAD(&sdp->sd_ail1_list);
88 INIT_LIST_HEAD(&sdp->sd_ail2_list); 89 INIT_LIST_HEAD(&sdp->sd_ail2_list);
89 90
90 init_rwsem(&sdp->sd_log_flush_lock); 91 init_rwsem(&sdp->sd_log_flush_lock);
91 INIT_LIST_HEAD(&sdp->sd_log_flush_list); 92 atomic_set(&sdp->sd_log_in_flight, 0);
93 init_waitqueue_head(&sdp->sd_log_flush_wait);
92 94
93 INIT_LIST_HEAD(&sdp->sd_revoke_list); 95 INIT_LIST_HEAD(&sdp->sd_revoke_list);
94 96
@@ -145,7 +147,8 @@ static int init_names(struct gfs2_sbd *sdp, int silent)
145 snprintf(sdp->sd_proto_name, GFS2_FSNAME_LEN, "%s", proto); 147 snprintf(sdp->sd_proto_name, GFS2_FSNAME_LEN, "%s", proto);
146 snprintf(sdp->sd_table_name, GFS2_FSNAME_LEN, "%s", table); 148 snprintf(sdp->sd_table_name, GFS2_FSNAME_LEN, "%s", table);
147 149
148 while ((table = strchr(sdp->sd_table_name, '/'))) 150 table = sdp->sd_table_name;
151 while ((table = strchr(table, '/')))
149 *table = '_'; 152 *table = '_';
150 153
151out: 154out:
@@ -161,14 +164,6 @@ static int init_locking(struct gfs2_sbd *sdp, struct gfs2_holder *mount_gh,
161 if (undo) 164 if (undo)
162 goto fail_trans; 165 goto fail_trans;
163 166
164 p = kthread_run(gfs2_scand, sdp, "gfs2_scand");
165 error = IS_ERR(p);
166 if (error) {
167 fs_err(sdp, "can't start scand thread: %d\n", error);
168 return error;
169 }
170 sdp->sd_scand_process = p;
171
172 for (sdp->sd_glockd_num = 0; 167 for (sdp->sd_glockd_num = 0;
173 sdp->sd_glockd_num < sdp->sd_args.ar_num_glockd; 168 sdp->sd_glockd_num < sdp->sd_args.ar_num_glockd;
174 sdp->sd_glockd_num++) { 169 sdp->sd_glockd_num++) {
@@ -229,14 +224,13 @@ fail:
229 while (sdp->sd_glockd_num--) 224 while (sdp->sd_glockd_num--)
230 kthread_stop(sdp->sd_glockd_process[sdp->sd_glockd_num]); 225 kthread_stop(sdp->sd_glockd_process[sdp->sd_glockd_num]);
231 226
232 kthread_stop(sdp->sd_scand_process);
233 return error; 227 return error;
234} 228}
235 229
236static inline struct inode *gfs2_lookup_root(struct super_block *sb, 230static inline struct inode *gfs2_lookup_root(struct super_block *sb,
237 u64 no_addr) 231 u64 no_addr)
238{ 232{
239 return gfs2_inode_lookup(sb, DT_DIR, no_addr, 0); 233 return gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, 0);
240} 234}
241 235
242static int init_sb(struct gfs2_sbd *sdp, int silent, int undo) 236static int init_sb(struct gfs2_sbd *sdp, int silent, int undo)
@@ -301,8 +295,9 @@ static int init_sb(struct gfs2_sbd *sdp, int silent, int undo)
301 fs_err(sdp, "can't get root dentry\n"); 295 fs_err(sdp, "can't get root dentry\n");
302 error = -ENOMEM; 296 error = -ENOMEM;
303 iput(inode); 297 iput(inode);
304 } 298 } else
305 sb->s_root->d_op = &gfs2_dops; 299 sb->s_root->d_op = &gfs2_dops;
300
306out: 301out:
307 gfs2_glock_dq_uninit(&sb_gh); 302 gfs2_glock_dq_uninit(&sb_gh);
308 return error; 303 return error;
@@ -368,7 +363,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
368 363
369 ip = GFS2_I(sdp->sd_jdesc->jd_inode); 364 ip = GFS2_I(sdp->sd_jdesc->jd_inode);
370 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 365 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED,
371 LM_FLAG_NOEXP | GL_EXACT, 366 LM_FLAG_NOEXP | GL_EXACT | GL_NOCACHE,
372 &sdp->sd_jinode_gh); 367 &sdp->sd_jinode_gh);
373 if (error) { 368 if (error) {
374 fs_err(sdp, "can't acquire journal inode glock: %d\n", 369 fs_err(sdp, "can't acquire journal inode glock: %d\n",
@@ -818,7 +813,6 @@ static struct super_block* get_gfs2_sb(const char *dev_name)
818 struct nameidata nd; 813 struct nameidata nd;
819 struct file_system_type *fstype; 814 struct file_system_type *fstype;
820 struct super_block *sb = NULL, *s; 815 struct super_block *sb = NULL, *s;
821 struct list_head *l;
822 int error; 816 int error;
823 817
824 error = path_lookup(dev_name, LOOKUP_FOLLOW, &nd); 818 error = path_lookup(dev_name, LOOKUP_FOLLOW, &nd);
@@ -830,8 +824,7 @@ static struct super_block* get_gfs2_sb(const char *dev_name)
830 error = vfs_getattr(nd.mnt, nd.dentry, &stat); 824 error = vfs_getattr(nd.mnt, nd.dentry, &stat);
831 825
832 fstype = get_fs_type("gfs2"); 826 fstype = get_fs_type("gfs2");
833 list_for_each(l, &fstype->fs_supers) { 827 list_for_each_entry(s, &fstype->fs_supers, s_instances) {
834 s = list_entry(l, struct super_block, s_instances);
835 if ((S_ISBLK(stat.mode) && s->s_dev == stat.rdev) || 828 if ((S_ISBLK(stat.mode) && s->s_dev == stat.rdev) ||
836 (S_ISDIR(stat.mode) && s == nd.dentry->d_inode->i_sb)) { 829 (S_ISDIR(stat.mode) && s == nd.dentry->d_inode->i_sb)) {
837 sb = s; 830 sb = s;
@@ -861,7 +854,7 @@ static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags,
861 error = -ENOENT; 854 error = -ENOENT;
862 goto error; 855 goto error;
863 } 856 }
864 sdp = (struct gfs2_sbd*) sb->s_fs_info; 857 sdp = sb->s_fs_info;
865 if (sdp->sd_vfs_meta) { 858 if (sdp->sd_vfs_meta) {
866 printk(KERN_WARNING "GFS2: gfs2meta mount already exists\n"); 859 printk(KERN_WARNING "GFS2: gfs2meta mount already exists\n");
867 error = -EBUSY; 860 error = -EBUSY;
@@ -896,7 +889,10 @@ error:
896 889
897static void gfs2_kill_sb(struct super_block *sb) 890static void gfs2_kill_sb(struct super_block *sb)
898{ 891{
899 gfs2_delete_debugfs_file(sb->s_fs_info); 892 if (sb->s_fs_info) {
893 gfs2_delete_debugfs_file(sb->s_fs_info);
894 gfs2_meta_syncfs(sb->s_fs_info);
895 }
900 kill_block_super(sb); 896 kill_block_super(sb);
901} 897}
902 898
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 911c115b5c6c..291f0c7eaa3b 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -69,7 +69,7 @@ static int gfs2_create(struct inode *dir, struct dentry *dentry,
69 mark_inode_dirty(inode); 69 mark_inode_dirty(inode);
70 break; 70 break;
71 } else if (PTR_ERR(inode) != -EEXIST || 71 } else if (PTR_ERR(inode) != -EEXIST ||
72 (nd->intent.open.flags & O_EXCL)) { 72 (nd && (nd->intent.open.flags & O_EXCL))) {
73 gfs2_holder_uninit(ghs); 73 gfs2_holder_uninit(ghs);
74 return PTR_ERR(inode); 74 return PTR_ERR(inode);
75 } 75 }
@@ -278,17 +278,25 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
278 gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2); 278 gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
279 279
280 280
281 error = gfs2_glock_nq_m(3, ghs); 281 error = gfs2_glock_nq(ghs); /* parent */
282 if (error) 282 if (error)
283 goto out; 283 goto out_parent;
284
285 error = gfs2_glock_nq(ghs + 1); /* child */
286 if (error)
287 goto out_child;
288
289 error = gfs2_glock_nq(ghs + 2); /* rgrp */
290 if (error)
291 goto out_rgrp;
284 292
285 error = gfs2_unlink_ok(dip, &dentry->d_name, ip); 293 error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
286 if (error) 294 if (error)
287 goto out_gunlock; 295 goto out_rgrp;
288 296
289 error = gfs2_trans_begin(sdp, 2*RES_DINODE + RES_LEAF + RES_RG_BIT, 0); 297 error = gfs2_trans_begin(sdp, 2*RES_DINODE + RES_LEAF + RES_RG_BIT, 0);
290 if (error) 298 if (error)
291 goto out_gunlock; 299 goto out_rgrp;
292 300
293 error = gfs2_dir_del(dip, &dentry->d_name); 301 error = gfs2_dir_del(dip, &dentry->d_name);
294 if (error) 302 if (error)
@@ -298,12 +306,15 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
298 306
299out_end_trans: 307out_end_trans:
300 gfs2_trans_end(sdp); 308 gfs2_trans_end(sdp);
301out_gunlock: 309 gfs2_glock_dq(ghs + 2);
302 gfs2_glock_dq_m(3, ghs); 310out_rgrp:
303out:
304 gfs2_holder_uninit(ghs);
305 gfs2_holder_uninit(ghs + 1);
306 gfs2_holder_uninit(ghs + 2); 311 gfs2_holder_uninit(ghs + 2);
312 gfs2_glock_dq(ghs + 1);
313out_child:
314 gfs2_holder_uninit(ghs + 1);
315 gfs2_glock_dq(ghs);
316out_parent:
317 gfs2_holder_uninit(ghs);
307 gfs2_glock_dq_uninit(&ri_gh); 318 gfs2_glock_dq_uninit(&ri_gh);
308 return error; 319 return error;
309} 320}
@@ -894,12 +905,17 @@ static int gfs2_permission(struct inode *inode, int mask, struct nameidata *nd)
894static int setattr_size(struct inode *inode, struct iattr *attr) 905static int setattr_size(struct inode *inode, struct iattr *attr)
895{ 906{
896 struct gfs2_inode *ip = GFS2_I(inode); 907 struct gfs2_inode *ip = GFS2_I(inode);
908 struct gfs2_sbd *sdp = GFS2_SB(inode);
897 int error; 909 int error;
898 910
899 if (attr->ia_size != ip->i_di.di_size) { 911 if (attr->ia_size != ip->i_di.di_size) {
900 error = vmtruncate(inode, attr->ia_size); 912 error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks);
901 if (error) 913 if (error)
902 return error; 914 return error;
915 error = vmtruncate(inode, attr->ia_size);
916 gfs2_trans_end(sdp);
917 if (error)
918 return error;
903 } 919 }
904 920
905 error = gfs2_truncatei(ip, attr->ia_size); 921 error = gfs2_truncatei(ip, attr->ia_size);
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
index 603d940f1159..950f31460e8b 100644
--- a/fs/gfs2/ops_super.c
+++ b/fs/gfs2/ops_super.c
@@ -92,7 +92,6 @@ static void gfs2_put_super(struct super_block *sb)
92 kthread_stop(sdp->sd_recoverd_process); 92 kthread_stop(sdp->sd_recoverd_process);
93 while (sdp->sd_glockd_num--) 93 while (sdp->sd_glockd_num--)
94 kthread_stop(sdp->sd_glockd_process[sdp->sd_glockd_num]); 94 kthread_stop(sdp->sd_glockd_process[sdp->sd_glockd_num]);
95 kthread_stop(sdp->sd_scand_process);
96 95
97 if (!(sb->s_flags & MS_RDONLY)) { 96 if (!(sb->s_flags & MS_RDONLY)) {
98 error = gfs2_make_fs_ro(sdp); 97 error = gfs2_make_fs_ro(sdp);
@@ -456,12 +455,15 @@ static void gfs2_delete_inode(struct inode *inode)
456 } 455 }
457 456
458 error = gfs2_dinode_dealloc(ip); 457 error = gfs2_dinode_dealloc(ip);
459 /* 458 if (error)
460 * Must do this before unlock to avoid trying to write back 459 goto out_unlock;
461 * potentially dirty data now that inode no longer exists 460
462 * on disk. 461 error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks);
463 */ 462 if (error)
463 goto out_unlock;
464 /* Needs to be done before glock release & also in a transaction */
464 truncate_inode_pages(&inode->i_data, 0); 465 truncate_inode_pages(&inode->i_data, 0);
466 gfs2_trans_end(sdp);
465 467
466out_unlock: 468out_unlock:
467 gfs2_glock_dq(&ip->i_iopen_gh); 469 gfs2_glock_dq(&ip->i_iopen_gh);
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 6e546ee8f3d4..addb51e0f135 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -70,6 +70,7 @@ struct gfs2_quota_host {
70 u64 qu_limit; 70 u64 qu_limit;
71 u64 qu_warn; 71 u64 qu_warn;
72 s64 qu_value; 72 s64 qu_value;
73 u32 qu_ll_next;
73}; 74};
74 75
75struct gfs2_quota_change_host { 76struct gfs2_quota_change_host {
@@ -580,6 +581,7 @@ static void gfs2_quota_in(struct gfs2_quota_host *qu, const void *buf)
580 qu->qu_limit = be64_to_cpu(str->qu_limit); 581 qu->qu_limit = be64_to_cpu(str->qu_limit);
581 qu->qu_warn = be64_to_cpu(str->qu_warn); 582 qu->qu_warn = be64_to_cpu(str->qu_warn);
582 qu->qu_value = be64_to_cpu(str->qu_value); 583 qu->qu_value = be64_to_cpu(str->qu_value);
584 qu->qu_ll_next = be32_to_cpu(str->qu_ll_next);
583} 585}
584 586
585static void gfs2_quota_out(const struct gfs2_quota_host *qu, void *buf) 587static void gfs2_quota_out(const struct gfs2_quota_host *qu, void *buf)
@@ -589,6 +591,7 @@ static void gfs2_quota_out(const struct gfs2_quota_host *qu, void *buf)
589 str->qu_limit = cpu_to_be64(qu->qu_limit); 591 str->qu_limit = cpu_to_be64(qu->qu_limit);
590 str->qu_warn = cpu_to_be64(qu->qu_warn); 592 str->qu_warn = cpu_to_be64(qu->qu_warn);
591 str->qu_value = cpu_to_be64(qu->qu_value); 593 str->qu_value = cpu_to_be64(qu->qu_value);
594 str->qu_ll_next = cpu_to_be32(qu->qu_ll_next);
592 memset(&str->qu_reserved, 0, sizeof(str->qu_reserved)); 595 memset(&str->qu_reserved, 0, sizeof(str->qu_reserved));
593} 596}
594 597
@@ -614,6 +617,16 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
614 s64 value; 617 s64 value;
615 int err = -EIO; 618 int err = -EIO;
616 619
620 if (gfs2_is_stuffed(ip)) {
621 struct gfs2_alloc *al = NULL;
622 al = gfs2_alloc_get(ip);
623 /* just request 1 blk */
624 al->al_requested = 1;
625 gfs2_inplace_reserve(ip);
626 gfs2_unstuff_dinode(ip, NULL);
627 gfs2_inplace_release(ip);
628 gfs2_alloc_put(ip);
629 }
617 page = grab_cache_page(mapping, index); 630 page = grab_cache_page(mapping, index);
618 if (!page) 631 if (!page)
619 return -ENOMEM; 632 return -ENOMEM;
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 5ada38c99a2c..beb6c7ac0086 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -469,7 +469,7 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd)
469 }; 469 };
470 470
471 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 471 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED,
472 LM_FLAG_NOEXP, &ji_gh); 472 LM_FLAG_NOEXP | GL_NOCACHE, &ji_gh);
473 if (error) 473 if (error)
474 goto fail_gunlock_j; 474 goto fail_gunlock_j;
475 } else { 475 } else {
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index ce48c4594ec8..708c287e1d0e 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -31,6 +31,7 @@
31#include "inode.h" 31#include "inode.h"
32 32
33#define BFITNOENT ((u32)~0) 33#define BFITNOENT ((u32)~0)
34#define NO_BLOCK ((u64)~0)
34 35
35/* 36/*
36 * These routines are used by the resource group routines (rgrp.c) 37 * These routines are used by the resource group routines (rgrp.c)
@@ -116,8 +117,7 @@ static unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd, unsigned char *buffer,
116 * @buffer: the buffer that holds the bitmaps 117 * @buffer: the buffer that holds the bitmaps
117 * @buflen: the length (in bytes) of the buffer 118 * @buflen: the length (in bytes) of the buffer
118 * @goal: start search at this block's bit-pair (within @buffer) 119 * @goal: start search at this block's bit-pair (within @buffer)
119 * @old_state: GFS2_BLKST_XXX the state of the block we're looking for; 120 * @old_state: GFS2_BLKST_XXX the state of the block we're looking for.
120 * bit 0 = alloc(1)/free(0), bit 1 = meta(1)/data(0)
121 * 121 *
122 * Scope of @goal and returned block number is only within this bitmap buffer, 122 * Scope of @goal and returned block number is only within this bitmap buffer,
123 * not entire rgrp or filesystem. @buffer will be offset from the actual 123 * not entire rgrp or filesystem. @buffer will be offset from the actual
@@ -137,9 +137,13 @@ static u32 gfs2_bitfit(struct gfs2_rgrpd *rgd, unsigned char *buffer,
137 byte = buffer + (goal / GFS2_NBBY); 137 byte = buffer + (goal / GFS2_NBBY);
138 bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE; 138 bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE;
139 end = buffer + buflen; 139 end = buffer + buflen;
140 alloc = (old_state & 1) ? 0 : 0x55; 140 alloc = (old_state == GFS2_BLKST_FREE) ? 0x55 : 0;
141 141
142 while (byte < end) { 142 while (byte < end) {
143 /* If we're looking for a free block we can eliminate all
144 bitmap settings with 0x55, which represents four data
145 blocks in a row. If we're looking for a data block, we can
146 eliminate 0x00 which corresponds to four free blocks. */
143 if ((*byte & 0x55) == alloc) { 147 if ((*byte & 0x55) == alloc) {
144 blk += (8 - bit) >> 1; 148 blk += (8 - bit) >> 1;
145 149
@@ -859,23 +863,28 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al)
859static struct inode *try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked) 863static struct inode *try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked)
860{ 864{
861 struct inode *inode; 865 struct inode *inode;
862 u32 goal = 0; 866 u32 goal = 0, block;
863 u64 no_addr; 867 u64 no_addr;
868 struct gfs2_sbd *sdp = rgd->rd_sbd;
864 869
865 for(;;) { 870 for(;;) {
866 if (goal >= rgd->rd_data) 871 if (goal >= rgd->rd_data)
867 break; 872 break;
868 goal = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED, 873 down_write(&sdp->sd_log_flush_lock);
869 GFS2_BLKST_UNLINKED); 874 block = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED,
870 if (goal == BFITNOENT) 875 GFS2_BLKST_UNLINKED);
876 up_write(&sdp->sd_log_flush_lock);
877 if (block == BFITNOENT)
871 break; 878 break;
872 no_addr = goal + rgd->rd_data0; 879 /* rgblk_search can return a block < goal, so we need to
880 keep it marching forward. */
881 no_addr = block + rgd->rd_data0;
873 goal++; 882 goal++;
874 if (no_addr < *last_unlinked) 883 if (*last_unlinked != NO_BLOCK && no_addr <= *last_unlinked)
875 continue; 884 continue;
876 *last_unlinked = no_addr; 885 *last_unlinked = no_addr;
877 inode = gfs2_inode_lookup(rgd->rd_sbd->sd_vfs, DT_UNKNOWN, 886 inode = gfs2_inode_lookup(rgd->rd_sbd->sd_vfs, DT_UNKNOWN,
878 no_addr, -1); 887 no_addr, -1, 1);
879 if (!IS_ERR(inode)) 888 if (!IS_ERR(inode))
880 return inode; 889 return inode;
881 } 890 }
@@ -1152,7 +1161,7 @@ int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file, unsigned int line)
1152 struct gfs2_alloc *al = &ip->i_alloc; 1161 struct gfs2_alloc *al = &ip->i_alloc;
1153 struct inode *inode; 1162 struct inode *inode;
1154 int error = 0; 1163 int error = 0;
1155 u64 last_unlinked = 0; 1164 u64 last_unlinked = NO_BLOCK;
1156 1165
1157 if (gfs2_assert_warn(sdp, al->al_requested)) 1166 if (gfs2_assert_warn(sdp, al->al_requested))
1158 return -EINVAL; 1167 return -EINVAL;
@@ -1289,7 +1298,9 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
1289 allocatable block anywhere else, we want to be able wrap around and 1298 allocatable block anywhere else, we want to be able wrap around and
1290 search in the first part of our first-searched bit block. */ 1299 search in the first part of our first-searched bit block. */
1291 for (x = 0; x <= length; x++) { 1300 for (x = 0; x <= length; x++) {
1292 if (bi->bi_clone) 1301 /* The GFS2_BLKST_UNLINKED state doesn't apply to the clone
1302 bitmaps, so we must search the originals for that. */
1303 if (old_state != GFS2_BLKST_UNLINKED && bi->bi_clone)
1293 blk = gfs2_bitfit(rgd, bi->bi_clone + bi->bi_offset, 1304 blk = gfs2_bitfit(rgd, bi->bi_clone + bi->bi_offset,
1294 bi->bi_len, goal, old_state); 1305 bi->bi_len, goal, old_state);
1295 else 1306 else
@@ -1305,9 +1316,7 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
1305 goal = 0; 1316 goal = 0;
1306 } 1317 }
1307 1318
1308 if (old_state != new_state) { 1319 if (blk != BFITNOENT && old_state != new_state) {
1309 gfs2_assert_withdraw(rgd->rd_sbd, blk != BFITNOENT);
1310
1311 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); 1320 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
1312 gfs2_setbit(rgd, bi->bi_bh->b_data + bi->bi_offset, 1321 gfs2_setbit(rgd, bi->bi_bh->b_data + bi->bi_offset,
1313 bi->bi_len, blk, new_state); 1322 bi->bi_len, blk, new_state);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index f916b9740c75..dd3e737f528e 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -58,7 +58,6 @@ void gfs2_tune_init(struct gfs2_tune *gt)
58 gt->gt_incore_log_blocks = 1024; 58 gt->gt_incore_log_blocks = 1024;
59 gt->gt_log_flush_secs = 60; 59 gt->gt_log_flush_secs = 60;
60 gt->gt_jindex_refresh_secs = 60; 60 gt->gt_jindex_refresh_secs = 60;
61 gt->gt_scand_secs = 15;
62 gt->gt_recoverd_secs = 60; 61 gt->gt_recoverd_secs = 60;
63 gt->gt_logd_secs = 1; 62 gt->gt_logd_secs = 1;
64 gt->gt_quotad_secs = 5; 63 gt->gt_quotad_secs = 5;
@@ -160,18 +159,15 @@ int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent)
160} 159}
161 160
162 161
163static int end_bio_io_page(struct bio *bio, unsigned int bytes_done, int error) 162static void end_bio_io_page(struct bio *bio, int error)
164{ 163{
165 struct page *page = bio->bi_private; 164 struct page *page = bio->bi_private;
166 if (bio->bi_size)
167 return 1;
168 165
169 if (!error) 166 if (!error)
170 SetPageUptodate(page); 167 SetPageUptodate(page);
171 else 168 else
172 printk(KERN_WARNING "gfs2: error %d reading superblock\n", error); 169 printk(KERN_WARNING "gfs2: error %d reading superblock\n", error);
173 unlock_page(page); 170 unlock_page(page);
174 return 0;
175} 171}
176 172
177static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf) 173static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index c26c21b53c19..06e0b7768d97 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -222,7 +222,6 @@ static struct kobj_type gfs2_ktype = {
222}; 222};
223 223
224static struct kset gfs2_kset = { 224static struct kset gfs2_kset = {
225 .kobj = {.name = "gfs2"},
226 .ktype = &gfs2_ktype, 225 .ktype = &gfs2_ktype,
227}; 226};
228 227
@@ -442,7 +441,6 @@ TUNE_ATTR(quota_simul_sync, 1);
442TUNE_ATTR(quota_cache_secs, 1); 441TUNE_ATTR(quota_cache_secs, 1);
443TUNE_ATTR(stall_secs, 1); 442TUNE_ATTR(stall_secs, 1);
444TUNE_ATTR(statfs_quantum, 1); 443TUNE_ATTR(statfs_quantum, 1);
445TUNE_ATTR_DAEMON(scand_secs, scand_process);
446TUNE_ATTR_DAEMON(recoverd_secs, recoverd_process); 444TUNE_ATTR_DAEMON(recoverd_secs, recoverd_process);
447TUNE_ATTR_DAEMON(logd_secs, logd_process); 445TUNE_ATTR_DAEMON(logd_secs, logd_process);
448TUNE_ATTR_DAEMON(quotad_secs, quotad_process); 446TUNE_ATTR_DAEMON(quotad_secs, quotad_process);
@@ -464,7 +462,6 @@ static struct attribute *tune_attrs[] = {
464 &tune_attr_quota_cache_secs.attr, 462 &tune_attr_quota_cache_secs.attr,
465 &tune_attr_stall_secs.attr, 463 &tune_attr_stall_secs.attr,
466 &tune_attr_statfs_quantum.attr, 464 &tune_attr_statfs_quantum.attr,
467 &tune_attr_scand_secs.attr,
468 &tune_attr_recoverd_secs.attr, 465 &tune_attr_recoverd_secs.attr,
469 &tune_attr_logd_secs.attr, 466 &tune_attr_logd_secs.attr,
470 &tune_attr_quotad_secs.attr, 467 &tune_attr_quotad_secs.attr,
@@ -553,6 +550,7 @@ int gfs2_sys_init(void)
553{ 550{
554 gfs2_sys_margs = NULL; 551 gfs2_sys_margs = NULL;
555 spin_lock_init(&gfs2_sys_margs_lock); 552 spin_lock_init(&gfs2_sys_margs_lock);
553 kobject_set_name(&gfs2_kset.kobj, "gfs2");
556 kobj_set_kset_s(&gfs2_kset, fs_subsys); 554 kobj_set_kset_s(&gfs2_kset, fs_subsys);
557 return kset_register(&gfs2_kset); 555 return kset_register(&gfs2_kset);
558} 556}
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index f8dabf8446bb..717983e2c2ae 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -142,25 +142,25 @@ void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta)
142 lops_add(sdp, &bd->bd_le); 142 lops_add(sdp, &bd->bd_le);
143} 143}
144 144
145void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, u64 blkno) 145void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
146{ 146{
147 struct gfs2_revoke *rv = kmalloc(sizeof(struct gfs2_revoke), 147 BUG_ON(!list_empty(&bd->bd_le.le_list));
148 GFP_NOFS | __GFP_NOFAIL); 148 BUG_ON(!list_empty(&bd->bd_ail_st_list));
149 lops_init_le(&rv->rv_le, &gfs2_revoke_lops); 149 BUG_ON(!list_empty(&bd->bd_ail_gl_list));
150 rv->rv_blkno = blkno; 150 lops_init_le(&bd->bd_le, &gfs2_revoke_lops);
151 lops_add(sdp, &rv->rv_le); 151 lops_add(sdp, &bd->bd_le);
152} 152}
153 153
154void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno) 154void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno)
155{ 155{
156 struct gfs2_revoke *rv; 156 struct gfs2_bufdata *bd;
157 int found = 0; 157 int found = 0;
158 158
159 gfs2_log_lock(sdp); 159 gfs2_log_lock(sdp);
160 160
161 list_for_each_entry(rv, &sdp->sd_log_le_revoke, rv_le.le_list) { 161 list_for_each_entry(bd, &sdp->sd_log_le_revoke, bd_le.le_list) {
162 if (rv->rv_blkno == blkno) { 162 if (bd->bd_blkno == blkno) {
163 list_del(&rv->rv_le.le_list); 163 list_del_init(&bd->bd_le.le_list);
164 gfs2_assert_withdraw(sdp, sdp->sd_log_num_revoke); 164 gfs2_assert_withdraw(sdp, sdp->sd_log_num_revoke);
165 sdp->sd_log_num_revoke--; 165 sdp->sd_log_num_revoke--;
166 found = 1; 166 found = 1;
@@ -172,7 +172,7 @@ void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno)
172 172
173 if (found) { 173 if (found) {
174 struct gfs2_trans *tr = current->journal_info; 174 struct gfs2_trans *tr = current->journal_info;
175 kfree(rv); 175 kmem_cache_free(gfs2_bufdata_cachep, bd);
176 tr->tr_num_revoke_rm++; 176 tr->tr_num_revoke_rm++;
177 } 177 }
178} 178}
diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h
index 23d4cbe1de5b..043d5f4b9c4c 100644
--- a/fs/gfs2/trans.h
+++ b/fs/gfs2/trans.h
@@ -32,7 +32,7 @@ void gfs2_trans_end(struct gfs2_sbd *sdp);
32 32
33void gfs2_trans_add_gl(struct gfs2_glock *gl); 33void gfs2_trans_add_gl(struct gfs2_glock *gl);
34void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta); 34void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta);
35void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, u64 blkno); 35void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd);
36void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno); 36void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno);
37void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd); 37void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd);
38 38
diff --git a/fs/inode.c b/fs/inode.c
index 29f5068f819b..f97de0aeb3b6 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -142,6 +142,15 @@ static struct inode *alloc_inode(struct super_block *sb)
142 return NULL; 142 return NULL;
143 } 143 }
144 144
145 spin_lock_init(&inode->i_lock);
146 lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key);
147
148 mutex_init(&inode->i_mutex);
149 lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key);
150
151 init_rwsem(&inode->i_alloc_sem);
152 lockdep_set_class(&inode->i_alloc_sem, &sb->s_type->i_alloc_sem_key);
153
145 mapping->a_ops = &empty_aops; 154 mapping->a_ops = &empty_aops;
146 mapping->host = inode; 155 mapping->host = inode;
147 mapping->flags = 0; 156 mapping->flags = 0;
@@ -190,8 +199,6 @@ void inode_init_once(struct inode *inode)
190 INIT_HLIST_NODE(&inode->i_hash); 199 INIT_HLIST_NODE(&inode->i_hash);
191 INIT_LIST_HEAD(&inode->i_dentry); 200 INIT_LIST_HEAD(&inode->i_dentry);
192 INIT_LIST_HEAD(&inode->i_devices); 201 INIT_LIST_HEAD(&inode->i_devices);
193 mutex_init(&inode->i_mutex);
194 init_rwsem(&inode->i_alloc_sem);
195 INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); 202 INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC);
196 rwlock_init(&inode->i_data.tree_lock); 203 rwlock_init(&inode->i_data.tree_lock);
197 spin_lock_init(&inode->i_data.i_mmap_lock); 204 spin_lock_init(&inode->i_data.i_mmap_lock);
@@ -199,7 +206,6 @@ void inode_init_once(struct inode *inode)
199 spin_lock_init(&inode->i_data.private_lock); 206 spin_lock_init(&inode->i_data.private_lock);
200 INIT_RAW_PRIO_TREE_ROOT(&inode->i_data.i_mmap); 207 INIT_RAW_PRIO_TREE_ROOT(&inode->i_data.i_mmap);
201 INIT_LIST_HEAD(&inode->i_data.i_mmap_nonlinear); 208 INIT_LIST_HEAD(&inode->i_data.i_mmap_nonlinear);
202 spin_lock_init(&inode->i_lock);
203 i_size_ordered_init(inode); 209 i_size_ordered_init(inode);
204#ifdef CONFIG_INOTIFY 210#ifdef CONFIG_INOTIFY
205 INIT_LIST_HEAD(&inode->inotify_watches); 211 INIT_LIST_HEAD(&inode->inotify_watches);
@@ -561,6 +567,18 @@ EXPORT_SYMBOL(new_inode);
561 567
562void unlock_new_inode(struct inode *inode) 568void unlock_new_inode(struct inode *inode)
563{ 569{
570#ifdef CONFIG_DEBUG_LOCK_ALLOC
571 struct file_system_type *type = inode->i_sb->s_type;
572 /*
573 * ensure nobody is actually holding i_mutex
574 */
575 mutex_destroy(&inode->i_mutex);
576 mutex_init(&inode->i_mutex);
577 if (inode->i_mode & S_IFDIR)
578 lockdep_set_class(&inode->i_mutex, &type->i_mutex_dir_key);
579 else
580 lockdep_set_class(&inode->i_mutex, &type->i_mutex_key);
581#endif
564 /* 582 /*
565 * This is special! We do not need the spinlock 583 * This is special! We do not need the spinlock
566 * when clearing I_LOCK, because we're guaranteed 584 * when clearing I_LOCK, because we're guaranteed
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 772b6531a2a2..8df5bac0b7a5 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -233,6 +233,8 @@ out:
233 return ret; 233 return ret;
234} 234}
235 235
236static struct lock_class_key jbd_handle_key;
237
236/* Allocate a new handle. This should probably be in a slab... */ 238/* Allocate a new handle. This should probably be in a slab... */
237static handle_t *new_handle(int nblocks) 239static handle_t *new_handle(int nblocks)
238{ 240{
@@ -243,6 +245,8 @@ static handle_t *new_handle(int nblocks)
243 handle->h_buffer_credits = nblocks; 245 handle->h_buffer_credits = nblocks;
244 handle->h_ref = 1; 246 handle->h_ref = 1;
245 247
248 lockdep_init_map(&handle->h_lockdep_map, "jbd_handle", &jbd_handle_key, 0);
249
246 return handle; 250 return handle;
247} 251}
248 252
@@ -286,6 +290,9 @@ handle_t *journal_start(journal_t *journal, int nblocks)
286 current->journal_info = NULL; 290 current->journal_info = NULL;
287 handle = ERR_PTR(err); 291 handle = ERR_PTR(err);
288 } 292 }
293
294 lock_acquire(&handle->h_lockdep_map, 0, 0, 0, 2, _THIS_IP_);
295
289 return handle; 296 return handle;
290} 297}
291 298
@@ -1411,6 +1418,8 @@ int journal_stop(handle_t *handle)
1411 spin_unlock(&journal->j_state_lock); 1418 spin_unlock(&journal->j_state_lock);
1412 } 1419 }
1413 1420
1421 lock_release(&handle->h_lockdep_map, 1, _THIS_IP_);
1422
1414 jbd_free_handle(handle); 1423 jbd_free_handle(handle);
1415 return err; 1424 return err;
1416} 1425}
diff --git a/fs/jffs2/Makefile b/fs/jffs2/Makefile
index c32b241e3d91..60e5d49ca03e 100644
--- a/fs/jffs2/Makefile
+++ b/fs/jffs2/Makefile
@@ -17,4 +17,5 @@ jffs2-$(CONFIG_JFFS2_FS_POSIX_ACL) += acl.o
17jffs2-$(CONFIG_JFFS2_RUBIN) += compr_rubin.o 17jffs2-$(CONFIG_JFFS2_RUBIN) += compr_rubin.o
18jffs2-$(CONFIG_JFFS2_RTIME) += compr_rtime.o 18jffs2-$(CONFIG_JFFS2_RTIME) += compr_rtime.o
19jffs2-$(CONFIG_JFFS2_ZLIB) += compr_zlib.o 19jffs2-$(CONFIG_JFFS2_ZLIB) += compr_zlib.o
20jffs2-$(CONFIG_JFFS2_LZO) += compr_lzo.o
20jffs2-$(CONFIG_JFFS2_SUMMARY) += summary.o 21jffs2-$(CONFIG_JFFS2_SUMMARY) += summary.o
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 65b3a1b5b88d..8ec9323e830a 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -176,7 +176,7 @@ static void jffs2_iset_acl(struct inode *inode, struct posix_acl **i_acl, struct
176 spin_unlock(&inode->i_lock); 176 spin_unlock(&inode->i_lock);
177} 177}
178 178
179static struct posix_acl *jffs2_get_acl(struct inode *inode, int type) 179struct posix_acl *jffs2_get_acl(struct inode *inode, int type)
180{ 180{
181 struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); 181 struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
182 struct posix_acl *acl; 182 struct posix_acl *acl;
@@ -247,8 +247,13 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
247 if (rc < 0) 247 if (rc < 0)
248 return rc; 248 return rc;
249 if (inode->i_mode != mode) { 249 if (inode->i_mode != mode) {
250 inode->i_mode = mode; 250 struct iattr attr;
251 jffs2_dirty_inode(inode); 251
252 attr.ia_valid = ATTR_MODE;
253 attr.ia_mode = mode;
254 rc = jffs2_do_setattr(inode, &attr);
255 if (rc < 0)
256 return rc;
252 } 257 }
253 if (rc == 0) 258 if (rc == 0)
254 acl = NULL; 259 acl = NULL;
@@ -307,22 +312,16 @@ int jffs2_permission(struct inode *inode, int mask, struct nameidata *nd)
307 return generic_permission(inode, mask, jffs2_check_acl); 312 return generic_permission(inode, mask, jffs2_check_acl);
308} 313}
309 314
310int jffs2_init_acl(struct inode *inode, struct inode *dir) 315int jffs2_init_acl(struct inode *inode, struct posix_acl *acl)
311{ 316{
312 struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); 317 struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
313 struct posix_acl *acl = NULL, *clone; 318 struct posix_acl *clone;
314 mode_t mode; 319 mode_t mode;
315 int rc = 0; 320 int rc = 0;
316 321
317 f->i_acl_access = JFFS2_ACL_NOT_CACHED; 322 f->i_acl_access = JFFS2_ACL_NOT_CACHED;
318 f->i_acl_default = JFFS2_ACL_NOT_CACHED; 323 f->i_acl_default = JFFS2_ACL_NOT_CACHED;
319 if (!S_ISLNK(inode->i_mode)) { 324
320 acl = jffs2_get_acl(dir, ACL_TYPE_DEFAULT);
321 if (IS_ERR(acl))
322 return PTR_ERR(acl);
323 if (!acl)
324 inode->i_mode &= ~current->fs->umask;
325 }
326 if (acl) { 325 if (acl) {
327 if (S_ISDIR(inode->i_mode)) { 326 if (S_ISDIR(inode->i_mode)) {
328 rc = jffs2_set_acl(inode, ACL_TYPE_DEFAULT, acl); 327 rc = jffs2_set_acl(inode, ACL_TYPE_DEFAULT, acl);
diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h
index c84378cee82a..90a2dbf59051 100644
--- a/fs/jffs2/acl.h
+++ b/fs/jffs2/acl.h
@@ -28,9 +28,10 @@ struct jffs2_acl_header {
28 28
29#define JFFS2_ACL_NOT_CACHED ((void *)-1) 29#define JFFS2_ACL_NOT_CACHED ((void *)-1)
30 30
31extern struct posix_acl *jffs2_get_acl(struct inode *inode, int type);
31extern int jffs2_permission(struct inode *, int, struct nameidata *); 32extern int jffs2_permission(struct inode *, int, struct nameidata *);
32extern int jffs2_acl_chmod(struct inode *); 33extern int jffs2_acl_chmod(struct inode *);
33extern int jffs2_init_acl(struct inode *, struct inode *); 34extern int jffs2_init_acl(struct inode *, struct posix_acl *);
34extern void jffs2_clear_acl(struct jffs2_inode_info *); 35extern void jffs2_clear_acl(struct jffs2_inode_info *);
35 36
36extern struct xattr_handler jffs2_acl_access_xattr_handler; 37extern struct xattr_handler jffs2_acl_access_xattr_handler;
@@ -38,6 +39,7 @@ extern struct xattr_handler jffs2_acl_default_xattr_handler;
38 39
39#else 40#else
40 41
42#define jffs2_get_acl(inode, type) (NULL)
41#define jffs2_permission NULL 43#define jffs2_permission NULL
42#define jffs2_acl_chmod(inode) (0) 44#define jffs2_acl_chmod(inode) (0)
43#define jffs2_init_acl(inode,dir) (0) 45#define jffs2_init_acl(inode,dir) (0)
diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c
index 504643f2e98b..d568ae846741 100644
--- a/fs/jffs2/background.c
+++ b/fs/jffs2/background.c
@@ -23,8 +23,8 @@ static int jffs2_garbage_collect_thread(void *);
23void jffs2_garbage_collect_trigger(struct jffs2_sb_info *c) 23void jffs2_garbage_collect_trigger(struct jffs2_sb_info *c)
24{ 24{
25 spin_lock(&c->erase_completion_lock); 25 spin_lock(&c->erase_completion_lock);
26 if (c->gc_task && jffs2_thread_should_wake(c)) 26 if (c->gc_task && jffs2_thread_should_wake(c))
27 send_sig(SIGHUP, c->gc_task, 1); 27 send_sig(SIGHUP, c->gc_task, 1);
28 spin_unlock(&c->erase_completion_lock); 28 spin_unlock(&c->erase_completion_lock);
29} 29}
30 30
diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c
index 0ca2fff2617f..722a6b682951 100644
--- a/fs/jffs2/build.c
+++ b/fs/jffs2/build.c
@@ -285,6 +285,14 @@ static void jffs2_calc_trigger_levels(struct jffs2_sb_info *c)
285 than actually making progress? */ 285 than actually making progress? */
286 c->resv_blocks_gcbad = 0;//c->resv_blocks_deletion + 2; 286 c->resv_blocks_gcbad = 0;//c->resv_blocks_deletion + 2;
287 287
288 /* What number of 'very dirty' eraseblocks do we allow before we
289 trigger the GC thread even if we don't _need_ the space. When we
290 can't mark nodes obsolete on the medium, the old dirty nodes cause
291 performance problems because we have to inspect and discard them. */
292 c->vdirty_blocks_gctrigger = c->resv_blocks_gctrigger;
293 if (jffs2_can_mark_obsolete(c))
294 c->vdirty_blocks_gctrigger *= 10;
295
288 /* If there's less than this amount of dirty space, don't bother 296 /* If there's less than this amount of dirty space, don't bother
289 trying to GC to make more space. It'll be a fruitless task */ 297 trying to GC to make more space. It'll be a fruitless task */
290 c->nospc_dirty_size = c->sector_size + (c->flash_size / 100); 298 c->nospc_dirty_size = c->sector_size + (c->flash_size / 100);
@@ -303,6 +311,8 @@ static void jffs2_calc_trigger_levels(struct jffs2_sb_info *c)
303 c->resv_blocks_gcbad, c->resv_blocks_gcbad*c->sector_size/1024); 311 c->resv_blocks_gcbad, c->resv_blocks_gcbad*c->sector_size/1024);
304 dbg_fsbuild("Amount of dirty space required to GC: %d bytes\n", 312 dbg_fsbuild("Amount of dirty space required to GC: %d bytes\n",
305 c->nospc_dirty_size); 313 c->nospc_dirty_size);
314 dbg_fsbuild("Very dirty blocks before GC triggered: %d\n",
315 c->vdirty_blocks_gctrigger);
306} 316}
307 317
308int jffs2_do_mount_fs(struct jffs2_sb_info *c) 318int jffs2_do_mount_fs(struct jffs2_sb_info *c)
diff --git a/fs/jffs2/compr.c b/fs/jffs2/compr.c
index 485d065de41f..86739ee53b37 100644
--- a/fs/jffs2/compr.c
+++ b/fs/jffs2/compr.c
@@ -5,7 +5,7 @@
5 * Created by Arjan van de Ven <arjanv@redhat.com> 5 * Created by Arjan van de Ven <arjanv@redhat.com>
6 * 6 *
7 * Copyright © 2004 Ferenc Havasi <havasi@inf.u-szeged.hu>, 7 * Copyright © 2004 Ferenc Havasi <havasi@inf.u-szeged.hu>,
8 * University of Szeged, Hungary 8 * University of Szeged, Hungary
9 * 9 *
10 * For licensing information, see the file 'LICENCE' in this directory. 10 * For licensing information, see the file 'LICENCE' in this directory.
11 * 11 *
@@ -24,6 +24,34 @@ static int jffs2_compression_mode = JFFS2_COMPR_MODE_PRIORITY;
24/* Statistics for blocks stored without compression */ 24/* Statistics for blocks stored without compression */
25static uint32_t none_stat_compr_blocks=0,none_stat_decompr_blocks=0,none_stat_compr_size=0; 25static uint32_t none_stat_compr_blocks=0,none_stat_decompr_blocks=0,none_stat_compr_size=0;
26 26
27
28/*
29 * Return 1 to use this compression
30 */
31static int jffs2_is_best_compression(struct jffs2_compressor *this,
32 struct jffs2_compressor *best, uint32_t size, uint32_t bestsize)
33{
34 switch (jffs2_compression_mode) {
35 case JFFS2_COMPR_MODE_SIZE:
36 if (bestsize > size)
37 return 1;
38 return 0;
39 case JFFS2_COMPR_MODE_FAVOURLZO:
40 if ((this->compr == JFFS2_COMPR_LZO) && (bestsize > size))
41 return 1;
42 if ((best->compr != JFFS2_COMPR_LZO) && (bestsize > size))
43 return 1;
44 if ((this->compr == JFFS2_COMPR_LZO) && (bestsize > (size * FAVOUR_LZO_PERCENT / 100)))
45 return 1;
46 if ((bestsize * FAVOUR_LZO_PERCENT / 100) > size)
47 return 1;
48
49 return 0;
50 }
51 /* Shouldn't happen */
52 return 0;
53}
54
27/* jffs2_compress: 55/* jffs2_compress:
28 * @data: Pointer to uncompressed data 56 * @data: Pointer to uncompressed data
29 * @cdata: Pointer to returned pointer to buffer for compressed data 57 * @cdata: Pointer to returned pointer to buffer for compressed data
@@ -43,121 +71,124 @@ static uint32_t none_stat_compr_blocks=0,none_stat_decompr_blocks=0,none_stat_co
43 * *datalen accordingly to show the amount of data which were compressed. 71 * *datalen accordingly to show the amount of data which were compressed.
44 */ 72 */
45uint16_t jffs2_compress(struct jffs2_sb_info *c, struct jffs2_inode_info *f, 73uint16_t jffs2_compress(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
46 unsigned char *data_in, unsigned char **cpage_out, 74 unsigned char *data_in, unsigned char **cpage_out,
47 uint32_t *datalen, uint32_t *cdatalen) 75 uint32_t *datalen, uint32_t *cdatalen)
48{ 76{
49 int ret = JFFS2_COMPR_NONE; 77 int ret = JFFS2_COMPR_NONE;
50 int compr_ret; 78 int compr_ret;
51 struct jffs2_compressor *this, *best=NULL; 79 struct jffs2_compressor *this, *best=NULL;
52 unsigned char *output_buf = NULL, *tmp_buf; 80 unsigned char *output_buf = NULL, *tmp_buf;
53 uint32_t orig_slen, orig_dlen; 81 uint32_t orig_slen, orig_dlen;
54 uint32_t best_slen=0, best_dlen=0; 82 uint32_t best_slen=0, best_dlen=0;
55 83
56 switch (jffs2_compression_mode) { 84 switch (jffs2_compression_mode) {
57 case JFFS2_COMPR_MODE_NONE: 85 case JFFS2_COMPR_MODE_NONE:
58 break; 86 break;
59 case JFFS2_COMPR_MODE_PRIORITY: 87 case JFFS2_COMPR_MODE_PRIORITY:
60 output_buf = kmalloc(*cdatalen,GFP_KERNEL); 88 output_buf = kmalloc(*cdatalen,GFP_KERNEL);
61 if (!output_buf) { 89 if (!output_buf) {
62 printk(KERN_WARNING "JFFS2: No memory for compressor allocation. Compression failed.\n"); 90 printk(KERN_WARNING "JFFS2: No memory for compressor allocation. Compression failed.\n");
63 goto out; 91 goto out;
64 } 92 }
65 orig_slen = *datalen; 93 orig_slen = *datalen;
66 orig_dlen = *cdatalen; 94 orig_dlen = *cdatalen;
67 spin_lock(&jffs2_compressor_list_lock); 95 spin_lock(&jffs2_compressor_list_lock);
68 list_for_each_entry(this, &jffs2_compressor_list, list) { 96 list_for_each_entry(this, &jffs2_compressor_list, list) {
69 /* Skip decompress-only backwards-compatibility and disabled modules */ 97 /* Skip decompress-only backwards-compatibility and disabled modules */
70 if ((!this->compress)||(this->disabled)) 98 if ((!this->compress)||(this->disabled))
71 continue; 99 continue;
72 100
73 this->usecount++; 101 this->usecount++;
74 spin_unlock(&jffs2_compressor_list_lock); 102 spin_unlock(&jffs2_compressor_list_lock);
75 *datalen = orig_slen; 103 *datalen = orig_slen;
76 *cdatalen = orig_dlen; 104 *cdatalen = orig_dlen;
77 compr_ret = this->compress(data_in, output_buf, datalen, cdatalen, NULL); 105 compr_ret = this->compress(data_in, output_buf, datalen, cdatalen, NULL);
78 spin_lock(&jffs2_compressor_list_lock); 106 spin_lock(&jffs2_compressor_list_lock);
79 this->usecount--; 107 this->usecount--;
80 if (!compr_ret) { 108 if (!compr_ret) {
81 ret = this->compr; 109 ret = this->compr;
82 this->stat_compr_blocks++; 110 this->stat_compr_blocks++;
83 this->stat_compr_orig_size += *datalen; 111 this->stat_compr_orig_size += *datalen;
84 this->stat_compr_new_size += *cdatalen; 112 this->stat_compr_new_size += *cdatalen;
85 break; 113 break;
86 } 114 }
87 } 115 }
88 spin_unlock(&jffs2_compressor_list_lock); 116 spin_unlock(&jffs2_compressor_list_lock);
89 if (ret == JFFS2_COMPR_NONE) kfree(output_buf); 117 if (ret == JFFS2_COMPR_NONE)
90 break; 118 kfree(output_buf);
91 case JFFS2_COMPR_MODE_SIZE: 119 break;
92 orig_slen = *datalen; 120 case JFFS2_COMPR_MODE_SIZE:
93 orig_dlen = *cdatalen; 121 case JFFS2_COMPR_MODE_FAVOURLZO:
94 spin_lock(&jffs2_compressor_list_lock); 122 orig_slen = *datalen;
95 list_for_each_entry(this, &jffs2_compressor_list, list) { 123 orig_dlen = *cdatalen;
96 /* Skip decompress-only backwards-compatibility and disabled modules */ 124 spin_lock(&jffs2_compressor_list_lock);
97 if ((!this->compress)||(this->disabled)) 125 list_for_each_entry(this, &jffs2_compressor_list, list) {
98 continue; 126 /* Skip decompress-only backwards-compatibility and disabled modules */
99 /* Allocating memory for output buffer if necessary */ 127 if ((!this->compress)||(this->disabled))
100 if ((this->compr_buf_size<orig_dlen)&&(this->compr_buf)) { 128 continue;
101 spin_unlock(&jffs2_compressor_list_lock); 129 /* Allocating memory for output buffer if necessary */
102 kfree(this->compr_buf); 130 if ((this->compr_buf_size < orig_slen) && (this->compr_buf)) {
103 spin_lock(&jffs2_compressor_list_lock); 131 spin_unlock(&jffs2_compressor_list_lock);
104 this->compr_buf_size=0; 132 kfree(this->compr_buf);
105 this->compr_buf=NULL; 133 spin_lock(&jffs2_compressor_list_lock);
106 } 134 this->compr_buf_size=0;
107 if (!this->compr_buf) { 135 this->compr_buf=NULL;
108 spin_unlock(&jffs2_compressor_list_lock); 136 }
109 tmp_buf = kmalloc(orig_dlen,GFP_KERNEL); 137 if (!this->compr_buf) {
110 spin_lock(&jffs2_compressor_list_lock); 138 spin_unlock(&jffs2_compressor_list_lock);
111 if (!tmp_buf) { 139 tmp_buf = kmalloc(orig_slen, GFP_KERNEL);
112 printk(KERN_WARNING "JFFS2: No memory for compressor allocation. (%d bytes)\n",orig_dlen); 140 spin_lock(&jffs2_compressor_list_lock);
113 continue; 141 if (!tmp_buf) {
114 } 142 printk(KERN_WARNING "JFFS2: No memory for compressor allocation. (%d bytes)\n", orig_slen);
115 else { 143 continue;
116 this->compr_buf = tmp_buf; 144 }
117 this->compr_buf_size = orig_dlen; 145 else {
118 } 146 this->compr_buf = tmp_buf;
119 } 147 this->compr_buf_size = orig_slen;
120 this->usecount++; 148 }
121 spin_unlock(&jffs2_compressor_list_lock); 149 }
122 *datalen = orig_slen; 150 this->usecount++;
123 *cdatalen = orig_dlen; 151 spin_unlock(&jffs2_compressor_list_lock);
124 compr_ret = this->compress(data_in, this->compr_buf, datalen, cdatalen, NULL); 152 *datalen = orig_slen;
125 spin_lock(&jffs2_compressor_list_lock); 153 *cdatalen = orig_dlen;
126 this->usecount--; 154 compr_ret = this->compress(data_in, this->compr_buf, datalen, cdatalen, NULL);
127 if (!compr_ret) { 155 spin_lock(&jffs2_compressor_list_lock);
128 if ((!best_dlen)||(best_dlen>*cdatalen)) { 156 this->usecount--;
129 best_dlen = *cdatalen; 157 if (!compr_ret) {
130 best_slen = *datalen; 158 if (((!best_dlen) || jffs2_is_best_compression(this, best, *cdatalen, best_dlen))
131 best = this; 159 && (*cdatalen < *datalen)) {
132 } 160 best_dlen = *cdatalen;
133 } 161 best_slen = *datalen;
134 } 162 best = this;
135 if (best_dlen) { 163 }
136 *cdatalen = best_dlen; 164 }
137 *datalen = best_slen; 165 }
138 output_buf = best->compr_buf; 166 if (best_dlen) {
139 best->compr_buf = NULL; 167 *cdatalen = best_dlen;
140 best->compr_buf_size = 0; 168 *datalen = best_slen;
141 best->stat_compr_blocks++; 169 output_buf = best->compr_buf;
142 best->stat_compr_orig_size += best_slen; 170 best->compr_buf = NULL;
143 best->stat_compr_new_size += best_dlen; 171 best->compr_buf_size = 0;
144 ret = best->compr; 172 best->stat_compr_blocks++;
145 } 173 best->stat_compr_orig_size += best_slen;
146 spin_unlock(&jffs2_compressor_list_lock); 174 best->stat_compr_new_size += best_dlen;
147 break; 175 ret = best->compr;
148 default: 176 }
149 printk(KERN_ERR "JFFS2: unknow compression mode.\n"); 177 spin_unlock(&jffs2_compressor_list_lock);
150 } 178 break;
179 default:
180 printk(KERN_ERR "JFFS2: unknow compression mode.\n");
181 }
151 out: 182 out:
152 if (ret == JFFS2_COMPR_NONE) { 183 if (ret == JFFS2_COMPR_NONE) {
153 *cpage_out = data_in; 184 *cpage_out = data_in;
154 *datalen = *cdatalen; 185 *datalen = *cdatalen;
155 none_stat_compr_blocks++; 186 none_stat_compr_blocks++;
156 none_stat_compr_size += *datalen; 187 none_stat_compr_size += *datalen;
157 } 188 }
158 else { 189 else {
159 *cpage_out = output_buf; 190 *cpage_out = output_buf;
160 } 191 }
161 return ret; 192 return ret;
162} 193}
163 194
@@ -165,8 +196,8 @@ int jffs2_decompress(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
165 uint16_t comprtype, unsigned char *cdata_in, 196 uint16_t comprtype, unsigned char *cdata_in,
166 unsigned char *data_out, uint32_t cdatalen, uint32_t datalen) 197 unsigned char *data_out, uint32_t cdatalen, uint32_t datalen)
167{ 198{
168 struct jffs2_compressor *this; 199 struct jffs2_compressor *this;
169 int ret; 200 int ret;
170 201
171 /* Older code had a bug where it would write non-zero 'usercompr' 202 /* Older code had a bug where it would write non-zero 'usercompr'
172 fields. Deal with it. */ 203 fields. Deal with it. */
@@ -177,32 +208,32 @@ int jffs2_decompress(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
177 case JFFS2_COMPR_NONE: 208 case JFFS2_COMPR_NONE:
178 /* This should be special-cased elsewhere, but we might as well deal with it */ 209 /* This should be special-cased elsewhere, but we might as well deal with it */
179 memcpy(data_out, cdata_in, datalen); 210 memcpy(data_out, cdata_in, datalen);
180 none_stat_decompr_blocks++; 211 none_stat_decompr_blocks++;
181 break; 212 break;
182 case JFFS2_COMPR_ZERO: 213 case JFFS2_COMPR_ZERO:
183 memset(data_out, 0, datalen); 214 memset(data_out, 0, datalen);
184 break; 215 break;
185 default: 216 default:
186 spin_lock(&jffs2_compressor_list_lock); 217 spin_lock(&jffs2_compressor_list_lock);
187 list_for_each_entry(this, &jffs2_compressor_list, list) { 218 list_for_each_entry(this, &jffs2_compressor_list, list) {
188 if (comprtype == this->compr) { 219 if (comprtype == this->compr) {
189 this->usecount++; 220 this->usecount++;
190 spin_unlock(&jffs2_compressor_list_lock); 221 spin_unlock(&jffs2_compressor_list_lock);
191 ret = this->decompress(cdata_in, data_out, cdatalen, datalen, NULL); 222 ret = this->decompress(cdata_in, data_out, cdatalen, datalen, NULL);
192 spin_lock(&jffs2_compressor_list_lock); 223 spin_lock(&jffs2_compressor_list_lock);
193 if (ret) { 224 if (ret) {
194 printk(KERN_WARNING "Decompressor \"%s\" returned %d\n", this->name, ret); 225 printk(KERN_WARNING "Decompressor \"%s\" returned %d\n", this->name, ret);
195 } 226 }
196 else { 227 else {
197 this->stat_decompr_blocks++; 228 this->stat_decompr_blocks++;
198 } 229 }
199 this->usecount--; 230 this->usecount--;
200 spin_unlock(&jffs2_compressor_list_lock); 231 spin_unlock(&jffs2_compressor_list_lock);
201 return ret; 232 return ret;
202 } 233 }
203 } 234 }
204 printk(KERN_WARNING "JFFS2 compression type 0x%02x not available.\n", comprtype); 235 printk(KERN_WARNING "JFFS2 compression type 0x%02x not available.\n", comprtype);
205 spin_unlock(&jffs2_compressor_list_lock); 236 spin_unlock(&jffs2_compressor_list_lock);
206 return -EIO; 237 return -EIO;
207 } 238 }
208 return 0; 239 return 0;
@@ -210,108 +241,119 @@ int jffs2_decompress(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
210 241
211int jffs2_register_compressor(struct jffs2_compressor *comp) 242int jffs2_register_compressor(struct jffs2_compressor *comp)
212{ 243{
213 struct jffs2_compressor *this; 244 struct jffs2_compressor *this;
214 245
215 if (!comp->name) { 246 if (!comp->name) {
216 printk(KERN_WARNING "NULL compressor name at registering JFFS2 compressor. Failed.\n"); 247 printk(KERN_WARNING "NULL compressor name at registering JFFS2 compressor. Failed.\n");
217 return -1; 248 return -1;
218 } 249 }
219 comp->compr_buf_size=0; 250 comp->compr_buf_size=0;
220 comp->compr_buf=NULL; 251 comp->compr_buf=NULL;
221 comp->usecount=0; 252 comp->usecount=0;
222 comp->stat_compr_orig_size=0; 253 comp->stat_compr_orig_size=0;
223 comp->stat_compr_new_size=0; 254 comp->stat_compr_new_size=0;
224 comp->stat_compr_blocks=0; 255 comp->stat_compr_blocks=0;
225 comp->stat_decompr_blocks=0; 256 comp->stat_decompr_blocks=0;
226 D1(printk(KERN_DEBUG "Registering JFFS2 compressor \"%s\"\n", comp->name)); 257 D1(printk(KERN_DEBUG "Registering JFFS2 compressor \"%s\"\n", comp->name));
227 258
228 spin_lock(&jffs2_compressor_list_lock); 259 spin_lock(&jffs2_compressor_list_lock);
229 260
230 list_for_each_entry(this, &jffs2_compressor_list, list) { 261 list_for_each_entry(this, &jffs2_compressor_list, list) {
231 if (this->priority < comp->priority) { 262 if (this->priority < comp->priority) {
232 list_add(&comp->list, this->list.prev); 263 list_add(&comp->list, this->list.prev);
233 goto out; 264 goto out;
234 } 265 }
235 } 266 }
236 list_add_tail(&comp->list, &jffs2_compressor_list); 267 list_add_tail(&comp->list, &jffs2_compressor_list);
237out: 268out:
238 D2(list_for_each_entry(this, &jffs2_compressor_list, list) { 269 D2(list_for_each_entry(this, &jffs2_compressor_list, list) {
239 printk(KERN_DEBUG "Compressor \"%s\", prio %d\n", this->name, this->priority); 270 printk(KERN_DEBUG "Compressor \"%s\", prio %d\n", this->name, this->priority);
240 }) 271 })
241 272
242 spin_unlock(&jffs2_compressor_list_lock); 273 spin_unlock(&jffs2_compressor_list_lock);
243 274
244 return 0; 275 return 0;
245} 276}
246 277
247int jffs2_unregister_compressor(struct jffs2_compressor *comp) 278int jffs2_unregister_compressor(struct jffs2_compressor *comp)
248{ 279{
249 D2(struct jffs2_compressor *this;) 280 D2(struct jffs2_compressor *this;)
250 281
251 D1(printk(KERN_DEBUG "Unregistering JFFS2 compressor \"%s\"\n", comp->name)); 282 D1(printk(KERN_DEBUG "Unregistering JFFS2 compressor \"%s\"\n", comp->name));
252 283
253 spin_lock(&jffs2_compressor_list_lock); 284 spin_lock(&jffs2_compressor_list_lock);
254 285
255 if (comp->usecount) { 286 if (comp->usecount) {
256 spin_unlock(&jffs2_compressor_list_lock); 287 spin_unlock(&jffs2_compressor_list_lock);
257 printk(KERN_WARNING "JFFS2: Compressor modul is in use. Unregister failed.\n"); 288 printk(KERN_WARNING "JFFS2: Compressor modul is in use. Unregister failed.\n");
258 return -1; 289 return -1;
259 } 290 }
260 list_del(&comp->list); 291 list_del(&comp->list);
261 292
262 D2(list_for_each_entry(this, &jffs2_compressor_list, list) { 293 D2(list_for_each_entry(this, &jffs2_compressor_list, list) {
263 printk(KERN_DEBUG "Compressor \"%s\", prio %d\n", this->name, this->priority); 294 printk(KERN_DEBUG "Compressor \"%s\", prio %d\n", this->name, this->priority);
264 }) 295 })
265 spin_unlock(&jffs2_compressor_list_lock); 296 spin_unlock(&jffs2_compressor_list_lock);
266 return 0; 297 return 0;
267} 298}
268 299
269void jffs2_free_comprbuf(unsigned char *comprbuf, unsigned char *orig) 300void jffs2_free_comprbuf(unsigned char *comprbuf, unsigned char *orig)
270{ 301{
271 if (orig != comprbuf) 302 if (orig != comprbuf)
272 kfree(comprbuf); 303 kfree(comprbuf);
273} 304}
274 305
275int __init jffs2_compressors_init(void) 306int __init jffs2_compressors_init(void)
276{ 307{
277/* Registering compressors */ 308/* Registering compressors */
278#ifdef CONFIG_JFFS2_ZLIB 309#ifdef CONFIG_JFFS2_ZLIB
279 jffs2_zlib_init(); 310 jffs2_zlib_init();
280#endif 311#endif
281#ifdef CONFIG_JFFS2_RTIME 312#ifdef CONFIG_JFFS2_RTIME
282 jffs2_rtime_init(); 313 jffs2_rtime_init();
283#endif 314#endif
284#ifdef CONFIG_JFFS2_RUBIN 315#ifdef CONFIG_JFFS2_RUBIN
285 jffs2_rubinmips_init(); 316 jffs2_rubinmips_init();
286 jffs2_dynrubin_init(); 317 jffs2_dynrubin_init();
318#endif
319#ifdef CONFIG_JFFS2_LZO
320 jffs2_lzo_init();
287#endif 321#endif
288/* Setting default compression mode */ 322/* Setting default compression mode */
289#ifdef CONFIG_JFFS2_CMODE_NONE 323#ifdef CONFIG_JFFS2_CMODE_NONE
290 jffs2_compression_mode = JFFS2_COMPR_MODE_NONE; 324 jffs2_compression_mode = JFFS2_COMPR_MODE_NONE;
291 D1(printk(KERN_INFO "JFFS2: default compression mode: none\n");) 325 D1(printk(KERN_INFO "JFFS2: default compression mode: none\n");)
292#else 326#else
293#ifdef CONFIG_JFFS2_CMODE_SIZE 327#ifdef CONFIG_JFFS2_CMODE_SIZE
294 jffs2_compression_mode = JFFS2_COMPR_MODE_SIZE; 328 jffs2_compression_mode = JFFS2_COMPR_MODE_SIZE;
295 D1(printk(KERN_INFO "JFFS2: default compression mode: size\n");) 329 D1(printk(KERN_INFO "JFFS2: default compression mode: size\n");)
330#else
331#ifdef CONFIG_JFFS2_CMODE_FAVOURLZO
332 jffs2_compression_mode = JFFS2_COMPR_MODE_FAVOURLZO;
333 D1(printk(KERN_INFO "JFFS2: default compression mode: favourlzo\n");)
296#else 334#else
297 D1(printk(KERN_INFO "JFFS2: default compression mode: priority\n");) 335 D1(printk(KERN_INFO "JFFS2: default compression mode: priority\n");)
336#endif
298#endif 337#endif
299#endif 338#endif
300 return 0; 339 return 0;
301} 340}
302 341
303int jffs2_compressors_exit(void) 342int jffs2_compressors_exit(void)
304{ 343{
305/* Unregistering compressors */ 344/* Unregistering compressors */
345#ifdef CONFIG_JFFS2_LZO
346 jffs2_lzo_exit();
347#endif
306#ifdef CONFIG_JFFS2_RUBIN 348#ifdef CONFIG_JFFS2_RUBIN
307 jffs2_dynrubin_exit(); 349 jffs2_dynrubin_exit();
308 jffs2_rubinmips_exit(); 350 jffs2_rubinmips_exit();
309#endif 351#endif
310#ifdef CONFIG_JFFS2_RTIME 352#ifdef CONFIG_JFFS2_RTIME
311 jffs2_rtime_exit(); 353 jffs2_rtime_exit();
312#endif 354#endif
313#ifdef CONFIG_JFFS2_ZLIB 355#ifdef CONFIG_JFFS2_ZLIB
314 jffs2_zlib_exit(); 356 jffs2_zlib_exit();
315#endif 357#endif
316 return 0; 358 return 0;
317} 359}
diff --git a/fs/jffs2/compr.h b/fs/jffs2/compr.h
index 68cc7010dbdf..7d1d72faa774 100644
--- a/fs/jffs2/compr.h
+++ b/fs/jffs2/compr.h
@@ -2,7 +2,7 @@
2 * JFFS2 -- Journalling Flash File System, Version 2. 2 * JFFS2 -- Journalling Flash File System, Version 2.
3 * 3 *
4 * Copyright © 2004 Ferenc Havasi <havasi@inf.u-szeged.hu>, 4 * Copyright © 2004 Ferenc Havasi <havasi@inf.u-szeged.hu>,
5 * University of Szeged, Hungary 5 * University of Szeged, Hungary
6 * 6 *
7 * For licensing information, see the file 'LICENCE' in this directory. 7 * For licensing information, see the file 'LICENCE' in this directory.
8 * 8 *
@@ -27,34 +27,38 @@
27#define JFFS2_RUBINMIPS_PRIORITY 10 27#define JFFS2_RUBINMIPS_PRIORITY 10
28#define JFFS2_DYNRUBIN_PRIORITY 20 28#define JFFS2_DYNRUBIN_PRIORITY 20
29#define JFFS2_LZARI_PRIORITY 30 29#define JFFS2_LZARI_PRIORITY 30
30#define JFFS2_LZO_PRIORITY 40
31#define JFFS2_RTIME_PRIORITY 50 30#define JFFS2_RTIME_PRIORITY 50
32#define JFFS2_ZLIB_PRIORITY 60 31#define JFFS2_ZLIB_PRIORITY 60
32#define JFFS2_LZO_PRIORITY 80
33
33 34
34#define JFFS2_RUBINMIPS_DISABLED /* RUBINs will be used only */ 35#define JFFS2_RUBINMIPS_DISABLED /* RUBINs will be used only */
35#define JFFS2_DYNRUBIN_DISABLED /* for decompression */ 36#define JFFS2_DYNRUBIN_DISABLED /* for decompression */
36 37
37#define JFFS2_COMPR_MODE_NONE 0 38#define JFFS2_COMPR_MODE_NONE 0
38#define JFFS2_COMPR_MODE_PRIORITY 1 39#define JFFS2_COMPR_MODE_PRIORITY 1
39#define JFFS2_COMPR_MODE_SIZE 2 40#define JFFS2_COMPR_MODE_SIZE 2
41#define JFFS2_COMPR_MODE_FAVOURLZO 3
42
43#define FAVOUR_LZO_PERCENT 80
40 44
41struct jffs2_compressor { 45struct jffs2_compressor {
42 struct list_head list; 46 struct list_head list;
43 int priority; /* used by prirority comr. mode */ 47 int priority; /* used by prirority comr. mode */
44 char *name; 48 char *name;
45 char compr; /* JFFS2_COMPR_XXX */ 49 char compr; /* JFFS2_COMPR_XXX */
46 int (*compress)(unsigned char *data_in, unsigned char *cpage_out, 50 int (*compress)(unsigned char *data_in, unsigned char *cpage_out,
47 uint32_t *srclen, uint32_t *destlen, void *model); 51 uint32_t *srclen, uint32_t *destlen, void *model);
48 int (*decompress)(unsigned char *cdata_in, unsigned char *data_out, 52 int (*decompress)(unsigned char *cdata_in, unsigned char *data_out,
49 uint32_t cdatalen, uint32_t datalen, void *model); 53 uint32_t cdatalen, uint32_t datalen, void *model);
50 int usecount; 54 int usecount;
51 int disabled; /* if seted the compressor won't compress */ 55 int disabled; /* if set the compressor won't compress */
52 unsigned char *compr_buf; /* used by size compr. mode */ 56 unsigned char *compr_buf; /* used by size compr. mode */
53 uint32_t compr_buf_size; /* used by size compr. mode */ 57 uint32_t compr_buf_size; /* used by size compr. mode */
54 uint32_t stat_compr_orig_size; 58 uint32_t stat_compr_orig_size;
55 uint32_t stat_compr_new_size; 59 uint32_t stat_compr_new_size;
56 uint32_t stat_compr_blocks; 60 uint32_t stat_compr_blocks;
57 uint32_t stat_decompr_blocks; 61 uint32_t stat_decompr_blocks;
58}; 62};
59 63
60int jffs2_register_compressor(struct jffs2_compressor *comp); 64int jffs2_register_compressor(struct jffs2_compressor *comp);
@@ -64,12 +68,12 @@ int jffs2_compressors_init(void);
64int jffs2_compressors_exit(void); 68int jffs2_compressors_exit(void);
65 69
66uint16_t jffs2_compress(struct jffs2_sb_info *c, struct jffs2_inode_info *f, 70uint16_t jffs2_compress(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
67 unsigned char *data_in, unsigned char **cpage_out, 71 unsigned char *data_in, unsigned char **cpage_out,
68 uint32_t *datalen, uint32_t *cdatalen); 72 uint32_t *datalen, uint32_t *cdatalen);
69 73
70int jffs2_decompress(struct jffs2_sb_info *c, struct jffs2_inode_info *f, 74int jffs2_decompress(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
71 uint16_t comprtype, unsigned char *cdata_in, 75 uint16_t comprtype, unsigned char *cdata_in,
72 unsigned char *data_out, uint32_t cdatalen, uint32_t datalen); 76 unsigned char *data_out, uint32_t cdatalen, uint32_t datalen);
73 77
74void jffs2_free_comprbuf(unsigned char *comprbuf, unsigned char *orig); 78void jffs2_free_comprbuf(unsigned char *comprbuf, unsigned char *orig);
75 79
@@ -90,5 +94,9 @@ void jffs2_rtime_exit(void);
90int jffs2_zlib_init(void); 94int jffs2_zlib_init(void);
91void jffs2_zlib_exit(void); 95void jffs2_zlib_exit(void);
92#endif 96#endif
97#ifdef CONFIG_JFFS2_LZO
98int jffs2_lzo_init(void);
99void jffs2_lzo_exit(void);
100#endif
93 101
94#endif /* __JFFS2_COMPR_H__ */ 102#endif /* __JFFS2_COMPR_H__ */
diff --git a/fs/jffs2/compr_lzo.c b/fs/jffs2/compr_lzo.c
new file mode 100644
index 000000000000..47b045797e42
--- /dev/null
+++ b/fs/jffs2/compr_lzo.c
@@ -0,0 +1,108 @@
1/*
2 * JFFS2 -- Journalling Flash File System, Version 2.
3 *
4 * Copyright © 2007 Nokia Corporation. All rights reserved.
5 *
6 * Created by Richard Purdie <rpurdie@openedhand.com>
7 *
8 * For licensing information, see the file 'LICENCE' in this directory.
9 *
10 */
11
12#include <linux/kernel.h>
13#include <linux/sched.h>
14#include <linux/slab.h>
15#include <linux/vmalloc.h>
16#include <linux/init.h>
17#include <linux/lzo.h>
18#include "compr.h"
19
20static void *lzo_mem;
21static void *lzo_compress_buf;
22static DEFINE_MUTEX(deflate_mutex);
23
24static void free_workspace(void)
25{
26 vfree(lzo_mem);
27 vfree(lzo_compress_buf);
28}
29
30static int __init alloc_workspace(void)
31{
32 lzo_mem = vmalloc(LZO1X_MEM_COMPRESS);
33 lzo_compress_buf = vmalloc(lzo1x_worst_compress(PAGE_SIZE));
34
35 if (!lzo_mem || !lzo_compress_buf) {
36 printk(KERN_WARNING "Failed to allocate lzo deflate workspace\n");
37 free_workspace();
38 return -ENOMEM;
39 }
40
41 return 0;
42}
43
44static int jffs2_lzo_compress(unsigned char *data_in, unsigned char *cpage_out,
45 uint32_t *sourcelen, uint32_t *dstlen, void *model)
46{
47 size_t compress_size;
48 int ret;
49
50 mutex_lock(&deflate_mutex);
51 ret = lzo1x_1_compress(data_in, *sourcelen, lzo_compress_buf, &compress_size, lzo_mem);
52 mutex_unlock(&deflate_mutex);
53
54 if (ret != LZO_E_OK)
55 return -1;
56
57 if (compress_size > *dstlen)
58 return -1;
59
60 memcpy(cpage_out, lzo_compress_buf, compress_size);
61 *dstlen = compress_size;
62
63 return 0;
64}
65
66static int jffs2_lzo_decompress(unsigned char *data_in, unsigned char *cpage_out,
67 uint32_t srclen, uint32_t destlen, void *model)
68{
69 size_t dl = destlen;
70 int ret;
71
72 ret = lzo1x_decompress_safe(data_in, srclen, cpage_out, &dl);
73
74 if (ret != LZO_E_OK || dl != destlen)
75 return -1;
76
77 return 0;
78}
79
80static struct jffs2_compressor jffs2_lzo_comp = {
81 .priority = JFFS2_LZO_PRIORITY,
82 .name = "lzo",
83 .compr = JFFS2_COMPR_LZO,
84 .compress = &jffs2_lzo_compress,
85 .decompress = &jffs2_lzo_decompress,
86 .disabled = 0,
87};
88
89int __init jffs2_lzo_init(void)
90{
91 int ret;
92
93 ret = alloc_workspace();
94 if (ret < 0)
95 return ret;
96
97 ret = jffs2_register_compressor(&jffs2_lzo_comp);
98 if (ret)
99 free_workspace();
100
101 return ret;
102}
103
104void jffs2_lzo_exit(void)
105{
106 jffs2_unregister_compressor(&jffs2_lzo_comp);
107 free_workspace();
108}
diff --git a/fs/jffs2/compr_rtime.c b/fs/jffs2/compr_rtime.c
index 0d0bfd2e4e0d..546d1538d076 100644
--- a/fs/jffs2/compr_rtime.c
+++ b/fs/jffs2/compr_rtime.c
@@ -104,7 +104,7 @@ static int jffs2_rtime_decompress(unsigned char *data_in,
104 } 104 }
105 } 105 }
106 } 106 }
107 return 0; 107 return 0;
108} 108}
109 109
110static struct jffs2_compressor jffs2_rtime_comp = { 110static struct jffs2_compressor jffs2_rtime_comp = {
diff --git a/fs/jffs2/compr_rubin.c b/fs/jffs2/compr_rubin.c
index ea0431e047d5..c73fa89b5f8a 100644
--- a/fs/jffs2/compr_rubin.c
+++ b/fs/jffs2/compr_rubin.c
@@ -384,7 +384,7 @@ static int jffs2_rubinmips_decompress(unsigned char *data_in,
384 void *model) 384 void *model)
385{ 385{
386 rubin_do_decompress(BIT_DIVIDER_MIPS, bits_mips, data_in, cpage_out, sourcelen, dstlen); 386 rubin_do_decompress(BIT_DIVIDER_MIPS, bits_mips, data_in, cpage_out, sourcelen, dstlen);
387 return 0; 387 return 0;
388} 388}
389 389
390static int jffs2_dynrubin_decompress(unsigned char *data_in, 390static int jffs2_dynrubin_decompress(unsigned char *data_in,
@@ -399,7 +399,7 @@ static int jffs2_dynrubin_decompress(unsigned char *data_in,
399 bits[c] = data_in[c]; 399 bits[c] = data_in[c];
400 400
401 rubin_do_decompress(256, bits, data_in+8, cpage_out, sourcelen-8, dstlen); 401 rubin_do_decompress(256, bits, data_in+8, cpage_out, sourcelen-8, dstlen);
402 return 0; 402 return 0;
403} 403}
404 404
405static struct jffs2_compressor jffs2_rubinmips_comp = { 405static struct jffs2_compressor jffs2_rubinmips_comp = {
diff --git a/fs/jffs2/compr_zlib.c b/fs/jffs2/compr_zlib.c
index 2b87fccc1557..cfd301a5edfc 100644
--- a/fs/jffs2/compr_zlib.c
+++ b/fs/jffs2/compr_zlib.c
@@ -181,7 +181,7 @@ static int jffs2_zlib_decompress(unsigned char *data_in,
181 } 181 }
182 zlib_inflateEnd(&inf_strm); 182 zlib_inflateEnd(&inf_strm);
183 mutex_unlock(&inflate_mutex); 183 mutex_unlock(&inflate_mutex);
184 return 0; 184 return 0;
185} 185}
186 186
187static struct jffs2_compressor jffs2_zlib_comp = { 187static struct jffs2_compressor jffs2_zlib_comp = {
@@ -203,11 +203,11 @@ int __init jffs2_zlib_init(void)
203 203
204 ret = alloc_workspaces(); 204 ret = alloc_workspaces();
205 if (ret) 205 if (ret)
206 return ret; 206 return ret;
207 207
208 ret = jffs2_register_compressor(&jffs2_zlib_comp); 208 ret = jffs2_register_compressor(&jffs2_zlib_comp);
209 if (ret) 209 if (ret)
210 free_workspaces(); 210 free_workspaces();
211 211
212 return ret; 212 return ret;
213} 213}
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index c1dfca310dd6..8353eb9c1799 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -32,7 +32,7 @@ static int jffs2_mkdir (struct inode *,struct dentry *,int);
32static int jffs2_rmdir (struct inode *,struct dentry *); 32static int jffs2_rmdir (struct inode *,struct dentry *);
33static int jffs2_mknod (struct inode *,struct dentry *,int,dev_t); 33static int jffs2_mknod (struct inode *,struct dentry *,int,dev_t);
34static int jffs2_rename (struct inode *, struct dentry *, 34static int jffs2_rename (struct inode *, struct dentry *,
35 struct inode *, struct dentry *); 35 struct inode *, struct dentry *);
36 36
37const struct file_operations jffs2_dir_operations = 37const struct file_operations jffs2_dir_operations =
38{ 38{
@@ -182,6 +182,7 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry, int mode,
182 struct jffs2_inode_info *f, *dir_f; 182 struct jffs2_inode_info *f, *dir_f;
183 struct jffs2_sb_info *c; 183 struct jffs2_sb_info *c;
184 struct inode *inode; 184 struct inode *inode;
185 struct posix_acl *acl;
185 int ret; 186 int ret;
186 187
187 ri = jffs2_alloc_raw_inode(); 188 ri = jffs2_alloc_raw_inode();
@@ -192,7 +193,7 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry, int mode,
192 193
193 D1(printk(KERN_DEBUG "jffs2_create()\n")); 194 D1(printk(KERN_DEBUG "jffs2_create()\n"));
194 195
195 inode = jffs2_new_inode(dir_i, mode, ri); 196 inode = jffs2_new_inode(dir_i, mode, ri, &acl);
196 197
197 if (IS_ERR(inode)) { 198 if (IS_ERR(inode)) {
198 D1(printk(KERN_DEBUG "jffs2_new_inode() failed\n")); 199 D1(printk(KERN_DEBUG "jffs2_new_inode() failed\n"));
@@ -212,12 +213,12 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry, int mode,
212 dentry->d_name.name, dentry->d_name.len); 213 dentry->d_name.name, dentry->d_name.len);
213 214
214 if (ret) 215 if (ret)
215 goto fail; 216 goto fail_acl;
216 217
217 ret = jffs2_init_security(inode, dir_i); 218 ret = jffs2_init_security(inode, dir_i);
218 if (ret) 219 if (ret)
219 goto fail; 220 goto fail_acl;
220 ret = jffs2_init_acl(inode, dir_i); 221 ret = jffs2_init_acl(inode, acl);
221 if (ret) 222 if (ret)
222 goto fail; 223 goto fail;
223 224
@@ -230,6 +231,8 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry, int mode,
230 inode->i_ino, inode->i_mode, inode->i_nlink, f->inocache->nlink, inode->i_mapping->nrpages)); 231 inode->i_ino, inode->i_mode, inode->i_nlink, f->inocache->nlink, inode->i_mapping->nrpages));
231 return 0; 232 return 0;
232 233
234 fail_acl:
235 posix_acl_release(acl);
233 fail: 236 fail:
234 make_bad_inode(inode); 237 make_bad_inode(inode);
235 iput(inode); 238 iput(inode);
@@ -306,6 +309,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
306 struct jffs2_full_dirent *fd; 309 struct jffs2_full_dirent *fd;
307 int namelen; 310 int namelen;
308 uint32_t alloclen; 311 uint32_t alloclen;
312 struct posix_acl *acl;
309 int ret, targetlen = strlen(target); 313 int ret, targetlen = strlen(target);
310 314
311 /* FIXME: If you care. We'd need to use frags for the target 315 /* FIXME: If you care. We'd need to use frags for the target
@@ -332,7 +336,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
332 return ret; 336 return ret;
333 } 337 }
334 338
335 inode = jffs2_new_inode(dir_i, S_IFLNK | S_IRWXUGO, ri); 339 inode = jffs2_new_inode(dir_i, S_IFLNK | S_IRWXUGO, ri, &acl);
336 340
337 if (IS_ERR(inode)) { 341 if (IS_ERR(inode)) {
338 jffs2_free_raw_inode(ri); 342 jffs2_free_raw_inode(ri);
@@ -362,6 +366,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
362 up(&f->sem); 366 up(&f->sem);
363 jffs2_complete_reservation(c); 367 jffs2_complete_reservation(c);
364 jffs2_clear_inode(inode); 368 jffs2_clear_inode(inode);
369 posix_acl_release(acl);
365 return PTR_ERR(fn); 370 return PTR_ERR(fn);
366 } 371 }
367 372
@@ -372,6 +377,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
372 up(&f->sem); 377 up(&f->sem);
373 jffs2_complete_reservation(c); 378 jffs2_complete_reservation(c);
374 jffs2_clear_inode(inode); 379 jffs2_clear_inode(inode);
380 posix_acl_release(acl);
375 return -ENOMEM; 381 return -ENOMEM;
376 } 382 }
377 383
@@ -389,9 +395,10 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
389 ret = jffs2_init_security(inode, dir_i); 395 ret = jffs2_init_security(inode, dir_i);
390 if (ret) { 396 if (ret) {
391 jffs2_clear_inode(inode); 397 jffs2_clear_inode(inode);
398 posix_acl_release(acl);
392 return ret; 399 return ret;
393 } 400 }
394 ret = jffs2_init_acl(inode, dir_i); 401 ret = jffs2_init_acl(inode, acl);
395 if (ret) { 402 if (ret) {
396 jffs2_clear_inode(inode); 403 jffs2_clear_inode(inode);
397 return ret; 404 return ret;
@@ -469,6 +476,7 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
469 struct jffs2_full_dirent *fd; 476 struct jffs2_full_dirent *fd;
470 int namelen; 477 int namelen;
471 uint32_t alloclen; 478 uint32_t alloclen;
479 struct posix_acl *acl;
472 int ret; 480 int ret;
473 481
474 mode |= S_IFDIR; 482 mode |= S_IFDIR;
@@ -491,7 +499,7 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
491 return ret; 499 return ret;
492 } 500 }
493 501
494 inode = jffs2_new_inode(dir_i, mode, ri); 502 inode = jffs2_new_inode(dir_i, mode, ri, &acl);
495 503
496 if (IS_ERR(inode)) { 504 if (IS_ERR(inode)) {
497 jffs2_free_raw_inode(ri); 505 jffs2_free_raw_inode(ri);
@@ -518,6 +526,7 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
518 up(&f->sem); 526 up(&f->sem);
519 jffs2_complete_reservation(c); 527 jffs2_complete_reservation(c);
520 jffs2_clear_inode(inode); 528 jffs2_clear_inode(inode);
529 posix_acl_release(acl);
521 return PTR_ERR(fn); 530 return PTR_ERR(fn);
522 } 531 }
523 /* No data here. Only a metadata node, which will be 532 /* No data here. Only a metadata node, which will be
@@ -531,9 +540,10 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
531 ret = jffs2_init_security(inode, dir_i); 540 ret = jffs2_init_security(inode, dir_i);
532 if (ret) { 541 if (ret) {
533 jffs2_clear_inode(inode); 542 jffs2_clear_inode(inode);
543 posix_acl_release(acl);
534 return ret; 544 return ret;
535 } 545 }
536 ret = jffs2_init_acl(inode, dir_i); 546 ret = jffs2_init_acl(inode, acl);
537 if (ret) { 547 if (ret) {
538 jffs2_clear_inode(inode); 548 jffs2_clear_inode(inode);
539 return ret; 549 return ret;
@@ -629,6 +639,7 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de
629 union jffs2_device_node dev; 639 union jffs2_device_node dev;
630 int devlen = 0; 640 int devlen = 0;
631 uint32_t alloclen; 641 uint32_t alloclen;
642 struct posix_acl *acl;
632 int ret; 643 int ret;
633 644
634 if (!new_valid_dev(rdev)) 645 if (!new_valid_dev(rdev))
@@ -655,7 +666,7 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de
655 return ret; 666 return ret;
656 } 667 }
657 668
658 inode = jffs2_new_inode(dir_i, mode, ri); 669 inode = jffs2_new_inode(dir_i, mode, ri, &acl);
659 670
660 if (IS_ERR(inode)) { 671 if (IS_ERR(inode)) {
661 jffs2_free_raw_inode(ri); 672 jffs2_free_raw_inode(ri);
@@ -684,6 +695,7 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de
684 up(&f->sem); 695 up(&f->sem);
685 jffs2_complete_reservation(c); 696 jffs2_complete_reservation(c);
686 jffs2_clear_inode(inode); 697 jffs2_clear_inode(inode);
698 posix_acl_release(acl);
687 return PTR_ERR(fn); 699 return PTR_ERR(fn);
688 } 700 }
689 /* No data here. Only a metadata node, which will be 701 /* No data here. Only a metadata node, which will be
@@ -697,9 +709,10 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de
697 ret = jffs2_init_security(inode, dir_i); 709 ret = jffs2_init_security(inode, dir_i);
698 if (ret) { 710 if (ret) {
699 jffs2_clear_inode(inode); 711 jffs2_clear_inode(inode);
712 posix_acl_release(acl);
700 return ret; 713 return ret;
701 } 714 }
702 ret = jffs2_init_acl(inode, dir_i); 715 ret = jffs2_init_acl(inode, acl);
703 if (ret) { 716 if (ret) {
704 jffs2_clear_inode(inode); 717 jffs2_clear_inode(inode);
705 return ret; 718 return ret;
@@ -770,7 +783,7 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de
770} 783}
771 784
772static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry, 785static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry,
773 struct inode *new_dir_i, struct dentry *new_dentry) 786 struct inode *new_dir_i, struct dentry *new_dentry)
774{ 787{
775 int ret; 788 int ret;
776 struct jffs2_sb_info *c = JFFS2_SB_INFO(old_dir_i->i_sb); 789 struct jffs2_sb_info *c = JFFS2_SB_INFO(old_dir_i->i_sb);
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index 66e7c2f1e644..a1db9180633f 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -38,8 +38,8 @@ static void jffs2_erase_block(struct jffs2_sb_info *c,
38#ifdef __ECOS 38#ifdef __ECOS
39 ret = jffs2_flash_erase(c, jeb); 39 ret = jffs2_flash_erase(c, jeb);
40 if (!ret) { 40 if (!ret) {
41 jffs2_erase_succeeded(c, jeb); 41 jffs2_erase_succeeded(c, jeb);
42 return; 42 return;
43 } 43 }
44 bad_offset = jeb->offset; 44 bad_offset = jeb->offset;
45#else /* Linux */ 45#else /* Linux */
@@ -50,12 +50,14 @@ static void jffs2_erase_block(struct jffs2_sb_info *c,
50 instr = kmalloc(sizeof(struct erase_info) + sizeof(struct erase_priv_struct), GFP_KERNEL); 50 instr = kmalloc(sizeof(struct erase_info) + sizeof(struct erase_priv_struct), GFP_KERNEL);
51 if (!instr) { 51 if (!instr) {
52 printk(KERN_WARNING "kmalloc for struct erase_info in jffs2_erase_block failed. Refiling block for later\n"); 52 printk(KERN_WARNING "kmalloc for struct erase_info in jffs2_erase_block failed. Refiling block for later\n");
53 down(&c->erase_free_sem);
53 spin_lock(&c->erase_completion_lock); 54 spin_lock(&c->erase_completion_lock);
54 list_move(&jeb->list, &c->erase_pending_list); 55 list_move(&jeb->list, &c->erase_pending_list);
55 c->erasing_size -= c->sector_size; 56 c->erasing_size -= c->sector_size;
56 c->dirty_size += c->sector_size; 57 c->dirty_size += c->sector_size;
57 jeb->dirty_size = c->sector_size; 58 jeb->dirty_size = c->sector_size;
58 spin_unlock(&c->erase_completion_lock); 59 spin_unlock(&c->erase_completion_lock);
60 up(&c->erase_free_sem);
59 return; 61 return;
60 } 62 }
61 63
@@ -82,12 +84,14 @@ static void jffs2_erase_block(struct jffs2_sb_info *c,
82 if (ret == -ENOMEM || ret == -EAGAIN) { 84 if (ret == -ENOMEM || ret == -EAGAIN) {
83 /* Erase failed immediately. Refile it on the list */ 85 /* Erase failed immediately. Refile it on the list */
84 D1(printk(KERN_DEBUG "Erase at 0x%08x failed: %d. Refiling on erase_pending_list\n", jeb->offset, ret)); 86 D1(printk(KERN_DEBUG "Erase at 0x%08x failed: %d. Refiling on erase_pending_list\n", jeb->offset, ret));
87 down(&c->erase_free_sem);
85 spin_lock(&c->erase_completion_lock); 88 spin_lock(&c->erase_completion_lock);
86 list_move(&jeb->list, &c->erase_pending_list); 89 list_move(&jeb->list, &c->erase_pending_list);
87 c->erasing_size -= c->sector_size; 90 c->erasing_size -= c->sector_size;
88 c->dirty_size += c->sector_size; 91 c->dirty_size += c->sector_size;
89 jeb->dirty_size = c->sector_size; 92 jeb->dirty_size = c->sector_size;
90 spin_unlock(&c->erase_completion_lock); 93 spin_unlock(&c->erase_completion_lock);
94 up(&c->erase_free_sem);
91 return; 95 return;
92 } 96 }
93 97
@@ -114,6 +118,7 @@ void jffs2_erase_pending_blocks(struct jffs2_sb_info *c, int count)
114 jeb = list_entry(c->erase_complete_list.next, struct jffs2_eraseblock, list); 118 jeb = list_entry(c->erase_complete_list.next, struct jffs2_eraseblock, list);
115 list_del(&jeb->list); 119 list_del(&jeb->list);
116 spin_unlock(&c->erase_completion_lock); 120 spin_unlock(&c->erase_completion_lock);
121 up(&c->erase_free_sem);
117 jffs2_mark_erased_block(c, jeb); 122 jffs2_mark_erased_block(c, jeb);
118 123
119 if (!--count) { 124 if (!--count) {
@@ -134,6 +139,7 @@ void jffs2_erase_pending_blocks(struct jffs2_sb_info *c, int count)
134 jffs2_free_jeb_node_refs(c, jeb); 139 jffs2_free_jeb_node_refs(c, jeb);
135 list_add(&jeb->list, &c->erasing_list); 140 list_add(&jeb->list, &c->erasing_list);
136 spin_unlock(&c->erase_completion_lock); 141 spin_unlock(&c->erase_completion_lock);
142 up(&c->erase_free_sem);
137 143
138 jffs2_erase_block(c, jeb); 144 jffs2_erase_block(c, jeb);
139 145
@@ -142,23 +148,25 @@ void jffs2_erase_pending_blocks(struct jffs2_sb_info *c, int count)
142 } 148 }
143 149
144 /* Be nice */ 150 /* Be nice */
145 cond_resched(); 151 yield();
152 down(&c->erase_free_sem);
146 spin_lock(&c->erase_completion_lock); 153 spin_lock(&c->erase_completion_lock);
147 } 154 }
148 155
149 spin_unlock(&c->erase_completion_lock); 156 spin_unlock(&c->erase_completion_lock);
157 up(&c->erase_free_sem);
150 done: 158 done:
151 D1(printk(KERN_DEBUG "jffs2_erase_pending_blocks completed\n")); 159 D1(printk(KERN_DEBUG "jffs2_erase_pending_blocks completed\n"));
152
153 up(&c->erase_free_sem);
154} 160}
155 161
156static void jffs2_erase_succeeded(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb) 162static void jffs2_erase_succeeded(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb)
157{ 163{
158 D1(printk(KERN_DEBUG "Erase completed successfully at 0x%08x\n", jeb->offset)); 164 D1(printk(KERN_DEBUG "Erase completed successfully at 0x%08x\n", jeb->offset));
165 down(&c->erase_free_sem);
159 spin_lock(&c->erase_completion_lock); 166 spin_lock(&c->erase_completion_lock);
160 list_move_tail(&jeb->list, &c->erase_complete_list); 167 list_move_tail(&jeb->list, &c->erase_complete_list);
161 spin_unlock(&c->erase_completion_lock); 168 spin_unlock(&c->erase_completion_lock);
169 up(&c->erase_free_sem);
162 /* Ensure that kupdated calls us again to mark them clean */ 170 /* Ensure that kupdated calls us again to mark them clean */
163 jffs2_erase_pending_trigger(c); 171 jffs2_erase_pending_trigger(c);
164} 172}
@@ -172,22 +180,26 @@ static void jffs2_erase_failed(struct jffs2_sb_info *c, struct jffs2_eraseblock
172 failed too many times. */ 180 failed too many times. */
173 if (!jffs2_write_nand_badblock(c, jeb, bad_offset)) { 181 if (!jffs2_write_nand_badblock(c, jeb, bad_offset)) {
174 /* We'd like to give this block another try. */ 182 /* We'd like to give this block another try. */
183 down(&c->erase_free_sem);
175 spin_lock(&c->erase_completion_lock); 184 spin_lock(&c->erase_completion_lock);
176 list_move(&jeb->list, &c->erase_pending_list); 185 list_move(&jeb->list, &c->erase_pending_list);
177 c->erasing_size -= c->sector_size; 186 c->erasing_size -= c->sector_size;
178 c->dirty_size += c->sector_size; 187 c->dirty_size += c->sector_size;
179 jeb->dirty_size = c->sector_size; 188 jeb->dirty_size = c->sector_size;
180 spin_unlock(&c->erase_completion_lock); 189 spin_unlock(&c->erase_completion_lock);
190 up(&c->erase_free_sem);
181 return; 191 return;
182 } 192 }
183 } 193 }
184 194
195 down(&c->erase_free_sem);
185 spin_lock(&c->erase_completion_lock); 196 spin_lock(&c->erase_completion_lock);
186 c->erasing_size -= c->sector_size; 197 c->erasing_size -= c->sector_size;
187 c->bad_size += c->sector_size; 198 c->bad_size += c->sector_size;
188 list_move(&jeb->list, &c->bad_list); 199 list_move(&jeb->list, &c->bad_list);
189 c->nr_erasing_blocks--; 200 c->nr_erasing_blocks--;
190 spin_unlock(&c->erase_completion_lock); 201 spin_unlock(&c->erase_completion_lock);
202 up(&c->erase_free_sem);
191 wake_up(&c->erase_wait); 203 wake_up(&c->erase_wait);
192} 204}
193 205
@@ -317,6 +329,33 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl
317 size_t retlen; 329 size_t retlen;
318 int ret = -EIO; 330 int ret = -EIO;
319 331
332 if (c->mtd->point) {
333 unsigned long *wordebuf;
334
335 ret = c->mtd->point(c->mtd, jeb->offset, c->sector_size, &retlen, (unsigned char **)&ebuf);
336 if (ret) {
337 D1(printk(KERN_DEBUG "MTD point failed %d\n", ret));
338 goto do_flash_read;
339 }
340 if (retlen < c->sector_size) {
341 /* Don't muck about if it won't let us point to the whole erase sector */
342 D1(printk(KERN_DEBUG "MTD point returned len too short: 0x%zx\n", retlen));
343 c->mtd->unpoint(c->mtd, ebuf, jeb->offset, retlen);
344 goto do_flash_read;
345 }
346 wordebuf = ebuf-sizeof(*wordebuf);
347 retlen /= sizeof(*wordebuf);
348 do {
349 if (*++wordebuf != ~0)
350 break;
351 } while(--retlen);
352 c->mtd->unpoint(c->mtd, ebuf, jeb->offset, c->sector_size);
353 if (retlen)
354 printk(KERN_WARNING "Newly-erased block contained word 0x%lx at offset 0x%08tx\n",
355 *wordebuf, jeb->offset + c->sector_size-retlen*sizeof(*wordebuf));
356 return 0;
357 }
358 do_flash_read:
320 ebuf = kmalloc(PAGE_SIZE, GFP_KERNEL); 359 ebuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
321 if (!ebuf) { 360 if (!ebuf) {
322 printk(KERN_WARNING "Failed to allocate page buffer for verifying erase at 0x%08x. Refiling\n", jeb->offset); 361 printk(KERN_WARNING "Failed to allocate page buffer for verifying erase at 0x%08x. Refiling\n", jeb->offset);
@@ -362,7 +401,7 @@ static void jffs2_mark_erased_block(struct jffs2_sb_info *c, struct jffs2_eraseb
362{ 401{
363 size_t retlen; 402 size_t retlen;
364 int ret; 403 int ret;
365 uint32_t bad_offset; 404 uint32_t uninitialized_var(bad_offset);
366 405
367 switch (jffs2_block_check_erase(c, jeb, &bad_offset)) { 406 switch (jffs2_block_check_erase(c, jeb, &bad_offset)) {
368 case -EAGAIN: goto refile; 407 case -EAGAIN: goto refile;
@@ -417,6 +456,7 @@ static void jffs2_mark_erased_block(struct jffs2_sb_info *c, struct jffs2_eraseb
417 jffs2_link_node_ref(c, jeb, jeb->offset | REF_NORMAL, c->cleanmarker_size, NULL); 456 jffs2_link_node_ref(c, jeb, jeb->offset | REF_NORMAL, c->cleanmarker_size, NULL);
418 } 457 }
419 458
459 down(&c->erase_free_sem);
420 spin_lock(&c->erase_completion_lock); 460 spin_lock(&c->erase_completion_lock);
421 c->erasing_size -= c->sector_size; 461 c->erasing_size -= c->sector_size;
422 c->free_size += jeb->free_size; 462 c->free_size += jeb->free_size;
@@ -429,23 +469,28 @@ static void jffs2_mark_erased_block(struct jffs2_sb_info *c, struct jffs2_eraseb
429 c->nr_erasing_blocks--; 469 c->nr_erasing_blocks--;
430 c->nr_free_blocks++; 470 c->nr_free_blocks++;
431 spin_unlock(&c->erase_completion_lock); 471 spin_unlock(&c->erase_completion_lock);
472 up(&c->erase_free_sem);
432 wake_up(&c->erase_wait); 473 wake_up(&c->erase_wait);
433 return; 474 return;
434 475
435filebad: 476filebad:
477 down(&c->erase_free_sem);
436 spin_lock(&c->erase_completion_lock); 478 spin_lock(&c->erase_completion_lock);
437 /* Stick it on a list (any list) so erase_failed can take it 479 /* Stick it on a list (any list) so erase_failed can take it
438 right off again. Silly, but shouldn't happen often. */ 480 right off again. Silly, but shouldn't happen often. */
439 list_add(&jeb->list, &c->erasing_list); 481 list_add(&jeb->list, &c->erasing_list);
440 spin_unlock(&c->erase_completion_lock); 482 spin_unlock(&c->erase_completion_lock);
483 up(&c->erase_free_sem);
441 jffs2_erase_failed(c, jeb, bad_offset); 484 jffs2_erase_failed(c, jeb, bad_offset);
442 return; 485 return;
443 486
444refile: 487refile:
445 /* Stick it back on the list from whence it came and come back later */ 488 /* Stick it back on the list from whence it came and come back later */
446 jffs2_erase_pending_trigger(c); 489 jffs2_erase_pending_trigger(c);
490 down(&c->erase_free_sem);
447 spin_lock(&c->erase_completion_lock); 491 spin_lock(&c->erase_completion_lock);
448 list_add(&jeb->list, &c->erase_complete_list); 492 list_add(&jeb->list, &c->erase_complete_list);
449 spin_unlock(&c->erase_completion_lock); 493 spin_unlock(&c->erase_completion_lock);
494 up(&c->erase_free_sem);
450 return; 495 return;
451} 496}
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 8bc727b71696..ed85f9afdbc8 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -24,7 +24,7 @@
24 24
25static int jffs2_flash_setup(struct jffs2_sb_info *c); 25static int jffs2_flash_setup(struct jffs2_sb_info *c);
26 26
27static int jffs2_do_setattr (struct inode *inode, struct iattr *iattr) 27int jffs2_do_setattr (struct inode *inode, struct iattr *iattr)
28{ 28{
29 struct jffs2_full_dnode *old_metadata, *new_metadata; 29 struct jffs2_full_dnode *old_metadata, *new_metadata;
30 struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); 30 struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
@@ -36,10 +36,8 @@ static int jffs2_do_setattr (struct inode *inode, struct iattr *iattr)
36 unsigned int ivalid; 36 unsigned int ivalid;
37 uint32_t alloclen; 37 uint32_t alloclen;
38 int ret; 38 int ret;
39
39 D1(printk(KERN_DEBUG "jffs2_setattr(): ino #%lu\n", inode->i_ino)); 40 D1(printk(KERN_DEBUG "jffs2_setattr(): ino #%lu\n", inode->i_ino));
40 ret = inode_change_ok(inode, iattr);
41 if (ret)
42 return ret;
43 41
44 /* Special cases - we don't want more than one data node 42 /* Special cases - we don't want more than one data node
45 for these types on the medium at any time. So setattr 43 for these types on the medium at any time. So setattr
@@ -183,9 +181,14 @@ int jffs2_setattr(struct dentry *dentry, struct iattr *iattr)
183{ 181{
184 int rc; 182 int rc;
185 183
184 rc = inode_change_ok(dentry->d_inode, iattr);
185 if (rc)
186 return rc;
187
186 rc = jffs2_do_setattr(dentry->d_inode, iattr); 188 rc = jffs2_do_setattr(dentry->d_inode, iattr);
187 if (!rc && (iattr->ia_valid & ATTR_MODE)) 189 if (!rc && (iattr->ia_valid & ATTR_MODE))
188 rc = jffs2_acl_chmod(dentry->d_inode); 190 rc = jffs2_acl_chmod(dentry->d_inode);
191
189 return rc; 192 return rc;
190} 193}
191 194
@@ -399,7 +402,8 @@ void jffs2_write_super (struct super_block *sb)
399 402
400/* jffs2_new_inode: allocate a new inode and inocache, add it to the hash, 403/* jffs2_new_inode: allocate a new inode and inocache, add it to the hash,
401 fill in the raw_inode while you're at it. */ 404 fill in the raw_inode while you're at it. */
402struct inode *jffs2_new_inode (struct inode *dir_i, int mode, struct jffs2_raw_inode *ri) 405struct inode *jffs2_new_inode (struct inode *dir_i, int mode, struct jffs2_raw_inode *ri,
406 struct posix_acl **acl)
403{ 407{
404 struct inode *inode; 408 struct inode *inode;
405 struct super_block *sb = dir_i->i_sb; 409 struct super_block *sb = dir_i->i_sb;
@@ -431,7 +435,23 @@ struct inode *jffs2_new_inode (struct inode *dir_i, int mode, struct jffs2_raw_i
431 } else { 435 } else {
432 ri->gid = cpu_to_je16(current->fsgid); 436 ri->gid = cpu_to_je16(current->fsgid);
433 } 437 }
434 ri->mode = cpu_to_jemode(mode); 438
439 /* POSIX ACLs have to be processed now, at least partly.
440 The umask is only applied if there's no default ACL */
441 if (!S_ISLNK(mode)) {
442 *acl = jffs2_get_acl(dir_i, ACL_TYPE_DEFAULT);
443 if (IS_ERR(*acl)) {
444 make_bad_inode(inode);
445 iput(inode);
446 inode = (void *)*acl;
447 *acl = NULL;
448 return inode;
449 }
450 if (!(*acl))
451 mode &= ~current->fs->umask;
452 } else {
453 *acl = NULL;
454 }
435 ret = jffs2_do_new_inode (c, f, mode, ri); 455 ret = jffs2_do_new_inode (c, f, mode, ri);
436 if (ret) { 456 if (ret) {
437 make_bad_inode(inode); 457 make_bad_inode(inode);
diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c
index 2d99e06ab223..32ff0373aa04 100644
--- a/fs/jffs2/gc.c
+++ b/fs/jffs2/gc.c
@@ -122,6 +122,7 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
122 struct jffs2_inode_cache *ic; 122 struct jffs2_inode_cache *ic;
123 struct jffs2_eraseblock *jeb; 123 struct jffs2_eraseblock *jeb;
124 struct jffs2_raw_node_ref *raw; 124 struct jffs2_raw_node_ref *raw;
125 uint32_t gcblock_dirty;
125 int ret = 0, inum, nlink; 126 int ret = 0, inum, nlink;
126 int xattr = 0; 127 int xattr = 0;
127 128
@@ -236,6 +237,7 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
236 } 237 }
237 238
238 raw = jeb->gc_node; 239 raw = jeb->gc_node;
240 gcblock_dirty = jeb->dirty_size;
239 241
240 while(ref_obsolete(raw)) { 242 while(ref_obsolete(raw)) {
241 D1(printk(KERN_DEBUG "Node at 0x%08x is obsolete... skipping\n", ref_offset(raw))); 243 D1(printk(KERN_DEBUG "Node at 0x%08x is obsolete... skipping\n", ref_offset(raw)));
@@ -282,7 +284,7 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
282 } else { 284 } else {
283 ret = jffs2_garbage_collect_xattr_ref(c, (struct jffs2_xattr_ref *)ic, raw); 285 ret = jffs2_garbage_collect_xattr_ref(c, (struct jffs2_xattr_ref *)ic, raw);
284 } 286 }
285 goto release_sem; 287 goto test_gcnode;
286 } 288 }
287#endif 289#endif
288 290
@@ -376,7 +378,7 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
376 378
377 if (ret != -EBADFD) { 379 if (ret != -EBADFD) {
378 spin_unlock(&c->inocache_lock); 380 spin_unlock(&c->inocache_lock);
379 goto release_sem; 381 goto test_gcnode;
380 } 382 }
381 383
382 /* Fall through if it wanted us to, with inocache_lock held */ 384 /* Fall through if it wanted us to, with inocache_lock held */
@@ -407,6 +409,12 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
407 409
408 jffs2_gc_release_inode(c, f); 410 jffs2_gc_release_inode(c, f);
409 411
412 test_gcnode:
413 if (jeb->dirty_size == gcblock_dirty && !ref_obsolete(jeb->gc_node)) {
414 /* Eep. This really should never happen. GC is broken */
415 printk(KERN_ERR "Error garbage collecting node at %08x!\n", ref_offset(jeb->gc_node));
416 ret = -ENOSPC;
417 }
410 release_sem: 418 release_sem:
411 up(&c->alloc_sem); 419 up(&c->alloc_sem);
412 420
@@ -556,7 +564,7 @@ static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
556 564
557 node = kmalloc(rawlen, GFP_KERNEL); 565 node = kmalloc(rawlen, GFP_KERNEL);
558 if (!node) 566 if (!node)
559 return -ENOMEM; 567 return -ENOMEM;
560 568
561 ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)node); 569 ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)node);
562 if (!ret && retlen != rawlen) 570 if (!ret && retlen != rawlen)
@@ -598,10 +606,15 @@ static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
598 goto bail; 606 goto bail;
599 } 607 }
600 608
609 if (strnlen(node->d.name, node->d.nsize) != node->d.nsize) {
610 printk(KERN_WARNING "Name in dirent node at 0x%08x contains zeroes\n", ref_offset(raw));
611 goto bail;
612 }
613
601 if (node->d.nsize) { 614 if (node->d.nsize) {
602 crc = crc32(0, node->d.name, node->d.nsize); 615 crc = crc32(0, node->d.name, node->d.nsize);
603 if (je32_to_cpu(node->d.name_crc) != crc) { 616 if (je32_to_cpu(node->d.name_crc) != crc) {
604 printk(KERN_WARNING "Name CRC failed on REF_PRISTINE dirent ode at 0x%08x: Read 0x%08x, calculated 0x%08x\n", 617 printk(KERN_WARNING "Name CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
605 ref_offset(raw), je32_to_cpu(node->d.name_crc), crc); 618 ref_offset(raw), je32_to_cpu(node->d.name_crc), crc);
606 goto bail; 619 goto bail;
607 } 620 }
@@ -624,7 +637,7 @@ static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
624 637
625 if (ret || (retlen != rawlen)) { 638 if (ret || (retlen != rawlen)) {
626 printk(KERN_NOTICE "Write of %d bytes at 0x%08x failed. returned %d, retlen %zd\n", 639 printk(KERN_NOTICE "Write of %d bytes at 0x%08x failed. returned %d, retlen %zd\n",
627 rawlen, phys_ofs, ret, retlen); 640 rawlen, phys_ofs, ret, retlen);
628 if (retlen) { 641 if (retlen) {
629 jffs2_add_physical_node_ref(c, phys_ofs | REF_OBSOLETE, rawlen, NULL); 642 jffs2_add_physical_node_ref(c, phys_ofs | REF_OBSOLETE, rawlen, NULL);
630 } else { 643 } else {
diff --git a/fs/jffs2/jffs2_fs_sb.h b/fs/jffs2/jffs2_fs_sb.h
index b13298a824ed..3a2197f3c812 100644
--- a/fs/jffs2/jffs2_fs_sb.h
+++ b/fs/jffs2/jffs2_fs_sb.h
@@ -69,6 +69,8 @@ struct jffs2_sb_info {
69 uint8_t resv_blocks_gctrigger; /* ... wake up the GC thread */ 69 uint8_t resv_blocks_gctrigger; /* ... wake up the GC thread */
70 uint8_t resv_blocks_gcbad; /* ... pick a block from the bad_list to GC */ 70 uint8_t resv_blocks_gcbad; /* ... pick a block from the bad_list to GC */
71 uint8_t resv_blocks_gcmerge; /* ... merge pages when garbage collecting */ 71 uint8_t resv_blocks_gcmerge; /* ... merge pages when garbage collecting */
72 /* Number of 'very dirty' blocks before we trigger immediate GC */
73 uint8_t vdirty_blocks_gctrigger;
72 74
73 uint32_t nospc_dirty_size; 75 uint32_t nospc_dirty_size;
74 76
@@ -106,6 +108,9 @@ struct jffs2_sb_info {
106 108
107 uint32_t wbuf_pagesize; /* 0 for NOR and other flashes with no wbuf */ 109 uint32_t wbuf_pagesize; /* 0 for NOR and other flashes with no wbuf */
108 110
111#ifdef CONFIG_JFFS2_FS_WBUF_VERIFY
112 unsigned char *wbuf_verify; /* read-back buffer for verification */
113#endif
109#ifdef CONFIG_JFFS2_FS_WRITEBUFFER 114#ifdef CONFIG_JFFS2_FS_WRITEBUFFER
110 unsigned char *wbuf; /* Write-behind buffer for NAND flash */ 115 unsigned char *wbuf; /* Write-behind buffer for NAND flash */
111 uint32_t wbuf_ofs; 116 uint32_t wbuf_ofs;
diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h
index bc5509fe577b..ec1aae9e695e 100644
--- a/fs/jffs2/nodelist.h
+++ b/fs/jffs2/nodelist.h
@@ -127,7 +127,7 @@ static inline struct jffs2_inode_cache *jffs2_raw_ref_to_ic(struct jffs2_raw_nod
127 return ((struct jffs2_inode_cache *)raw); 127 return ((struct jffs2_inode_cache *)raw);
128} 128}
129 129
130 /* flash_offset & 3 always has to be zero, because nodes are 130 /* flash_offset & 3 always has to be zero, because nodes are
131 always aligned at 4 bytes. So we have a couple of extra bits 131 always aligned at 4 bytes. So we have a couple of extra bits
132 to play with, which indicate the node's status; see below: */ 132 to play with, which indicate the node's status; see below: */
133#define REF_UNCHECKED 0 /* We haven't yet checked the CRC or built its inode */ 133#define REF_UNCHECKED 0 /* We haven't yet checked the CRC or built its inode */
diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c
index dbc908ad622b..a0313fa8748e 100644
--- a/fs/jffs2/nodemgmt.c
+++ b/fs/jffs2/nodemgmt.c
@@ -154,7 +154,7 @@ int jffs2_reserve_space_gc(struct jffs2_sb_info *c, uint32_t minsize,
154 while(ret == -EAGAIN) { 154 while(ret == -EAGAIN) {
155 ret = jffs2_do_reserve_space(c, minsize, len, sumsize); 155 ret = jffs2_do_reserve_space(c, minsize, len, sumsize);
156 if (ret) { 156 if (ret) {
157 D1(printk(KERN_DEBUG "jffs2_reserve_space_gc: looping, ret is %d\n", ret)); 157 D1(printk(KERN_DEBUG "jffs2_reserve_space_gc: looping, ret is %d\n", ret));
158 } 158 }
159 } 159 }
160 spin_unlock(&c->erase_completion_lock); 160 spin_unlock(&c->erase_completion_lock);
@@ -423,7 +423,12 @@ struct jffs2_raw_node_ref *jffs2_add_physical_node_ref(struct jffs2_sb_info *c,
423 even after refiling c->nextblock */ 423 even after refiling c->nextblock */
424 if ((c->nextblock || ((ofs & 3) != REF_OBSOLETE)) 424 if ((c->nextblock || ((ofs & 3) != REF_OBSOLETE))
425 && (jeb != c->nextblock || (ofs & ~3) != jeb->offset + (c->sector_size - jeb->free_size))) { 425 && (jeb != c->nextblock || (ofs & ~3) != jeb->offset + (c->sector_size - jeb->free_size))) {
426 printk(KERN_WARNING "argh. node added in wrong place\n"); 426 printk(KERN_WARNING "argh. node added in wrong place at 0x%08x(%d)\n", ofs & ~3, ofs & 3);
427 if (c->nextblock)
428 printk(KERN_WARNING "nextblock 0x%08x", c->nextblock->offset);
429 else
430 printk(KERN_WARNING "No nextblock");
431 printk(", expected at %08x\n", jeb->offset + (c->sector_size - jeb->free_size));
427 return ERR_PTR(-EINVAL); 432 return ERR_PTR(-EINVAL);
428 } 433 }
429#endif 434#endif
@@ -717,6 +722,8 @@ int jffs2_thread_should_wake(struct jffs2_sb_info *c)
717{ 722{
718 int ret = 0; 723 int ret = 0;
719 uint32_t dirty; 724 uint32_t dirty;
725 int nr_very_dirty = 0;
726 struct jffs2_eraseblock *jeb;
720 727
721 if (c->unchecked_size) { 728 if (c->unchecked_size) {
722 D1(printk(KERN_DEBUG "jffs2_thread_should_wake(): unchecked_size %d, checked_ino #%d\n", 729 D1(printk(KERN_DEBUG "jffs2_thread_should_wake(): unchecked_size %d, checked_ino #%d\n",
@@ -738,8 +745,18 @@ int jffs2_thread_should_wake(struct jffs2_sb_info *c)
738 (dirty > c->nospc_dirty_size)) 745 (dirty > c->nospc_dirty_size))
739 ret = 1; 746 ret = 1;
740 747
741 D1(printk(KERN_DEBUG "jffs2_thread_should_wake(): nr_free_blocks %d, nr_erasing_blocks %d, dirty_size 0x%x: %s\n", 748 list_for_each_entry(jeb, &c->very_dirty_list, list) {
742 c->nr_free_blocks, c->nr_erasing_blocks, c->dirty_size, ret?"yes":"no")); 749 nr_very_dirty++;
750 if (nr_very_dirty == c->vdirty_blocks_gctrigger) {
751 ret = 1;
752 /* In debug mode, actually go through and count them all */
753 D1(continue);
754 break;
755 }
756 }
757
758 D1(printk(KERN_DEBUG "jffs2_thread_should_wake(): nr_free_blocks %d, nr_erasing_blocks %d, dirty_size 0x%x, vdirty_blocks %d: %s\n",
759 c->nr_free_blocks, c->nr_erasing_blocks, c->dirty_size, nr_very_dirty, ret?"yes":"no"));
743 760
744 return ret; 761 return ret;
745} 762}
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index 80daea96bbc2..f6743a915cf3 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -173,12 +173,15 @@ int jffs2_ioctl(struct inode *, struct file *, unsigned int, unsigned long);
173extern const struct inode_operations jffs2_symlink_inode_operations; 173extern const struct inode_operations jffs2_symlink_inode_operations;
174 174
175/* fs.c */ 175/* fs.c */
176struct posix_acl;
177
176int jffs2_setattr (struct dentry *, struct iattr *); 178int jffs2_setattr (struct dentry *, struct iattr *);
179int jffs2_do_setattr (struct inode *, struct iattr *);
177void jffs2_read_inode (struct inode *); 180void jffs2_read_inode (struct inode *);
178void jffs2_clear_inode (struct inode *); 181void jffs2_clear_inode (struct inode *);
179void jffs2_dirty_inode(struct inode *inode); 182void jffs2_dirty_inode(struct inode *inode);
180struct inode *jffs2_new_inode (struct inode *dir_i, int mode, 183struct inode *jffs2_new_inode (struct inode *dir_i, int mode,
181 struct jffs2_raw_inode *ri); 184 struct jffs2_raw_inode *ri, struct posix_acl **acl);
182int jffs2_statfs (struct dentry *, struct kstatfs *); 185int jffs2_statfs (struct dentry *, struct kstatfs *);
183void jffs2_write_super (struct super_block *); 186void jffs2_write_super (struct super_block *);
184int jffs2_remount_fs (struct super_block *, int *, char *); 187int jffs2_remount_fs (struct super_block *, int *, char *);
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index b5baa356fed2..2eae5d2dbebe 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -65,7 +65,7 @@ static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info
65 err = c->mtd->point(c->mtd, ofs, len, &retlen, &buffer); 65 err = c->mtd->point(c->mtd, ofs, len, &retlen, &buffer);
66 if (!err && retlen < tn->csize) { 66 if (!err && retlen < tn->csize) {
67 JFFS2_WARNING("MTD point returned len too short: %zu instead of %u.\n", retlen, tn->csize); 67 JFFS2_WARNING("MTD point returned len too short: %zu instead of %u.\n", retlen, tn->csize);
68 c->mtd->unpoint(c->mtd, buffer, ofs, len); 68 c->mtd->unpoint(c->mtd, buffer, ofs, retlen);
69 } else if (err) 69 } else if (err)
70 JFFS2_WARNING("MTD point failed: error code %d.\n", err); 70 JFFS2_WARNING("MTD point failed: error code %d.\n", err);
71 else 71 else
@@ -211,7 +211,7 @@ static void jffs2_kill_tn(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info *
211 * ordering. 211 * ordering.
212 * 212 *
213 * Returns 0 if the node was handled (including marking it obsolete) 213 * Returns 0 if the node was handled (including marking it obsolete)
214 * < 0 an if error occurred 214 * < 0 an if error occurred
215 */ 215 */
216static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c, 216static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c,
217 struct jffs2_readinode_info *rii, 217 struct jffs2_readinode_info *rii,
@@ -862,8 +862,8 @@ static inline int read_unknown(struct jffs2_sb_info *c, struct jffs2_raw_node_re
862 JFFS2_ERROR("REF_UNCHECKED but unknown node at %#08x\n", 862 JFFS2_ERROR("REF_UNCHECKED but unknown node at %#08x\n",
863 ref_offset(ref)); 863 ref_offset(ref));
864 JFFS2_ERROR("Node is {%04x,%04x,%08x,%08x}. Please report this error.\n", 864 JFFS2_ERROR("Node is {%04x,%04x,%08x,%08x}. Please report this error.\n",
865 je16_to_cpu(un->magic), je16_to_cpu(un->nodetype), 865 je16_to_cpu(un->magic), je16_to_cpu(un->nodetype),
866 je32_to_cpu(un->totlen), je32_to_cpu(un->hdr_crc)); 866 je32_to_cpu(un->totlen), je32_to_cpu(un->hdr_crc));
867 jffs2_mark_node_obsolete(c, ref); 867 jffs2_mark_node_obsolete(c, ref);
868 return 0; 868 return 0;
869 } 869 }
diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c
index 6c75cd433342..272872d27fd5 100644
--- a/fs/jffs2/scan.c
+++ b/fs/jffs2/scan.c
@@ -101,7 +101,7 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
101 if (!ret && pointlen < c->mtd->size) { 101 if (!ret && pointlen < c->mtd->size) {
102 /* Don't muck about if it won't let us point to the whole flash */ 102 /* Don't muck about if it won't let us point to the whole flash */
103 D1(printk(KERN_DEBUG "MTD point returned len too short: 0x%zx\n", pointlen)); 103 D1(printk(KERN_DEBUG "MTD point returned len too short: 0x%zx\n", pointlen));
104 c->mtd->unpoint(c->mtd, flashbuf, 0, c->mtd->size); 104 c->mtd->unpoint(c->mtd, flashbuf, 0, pointlen);
105 flashbuf = NULL; 105 flashbuf = NULL;
106 } 106 }
107 if (ret) 107 if (ret)
@@ -863,7 +863,7 @@ scan_more:
863 switch (je16_to_cpu(node->nodetype) & JFFS2_COMPAT_MASK) { 863 switch (je16_to_cpu(node->nodetype) & JFFS2_COMPAT_MASK) {
864 case JFFS2_FEATURE_ROCOMPAT: 864 case JFFS2_FEATURE_ROCOMPAT:
865 printk(KERN_NOTICE "Read-only compatible feature node (0x%04x) found at offset 0x%08x\n", je16_to_cpu(node->nodetype), ofs); 865 printk(KERN_NOTICE "Read-only compatible feature node (0x%04x) found at offset 0x%08x\n", je16_to_cpu(node->nodetype), ofs);
866 c->flags |= JFFS2_SB_FLAG_RO; 866 c->flags |= JFFS2_SB_FLAG_RO;
867 if (!(jffs2_is_readonly(c))) 867 if (!(jffs2_is_readonly(c)))
868 return -EROFS; 868 return -EROFS;
869 if ((err = jffs2_scan_dirty_space(c, jeb, PAD(je32_to_cpu(node->totlen))))) 869 if ((err = jffs2_scan_dirty_space(c, jeb, PAD(je32_to_cpu(node->totlen)))))
@@ -1004,6 +1004,7 @@ static int jffs2_scan_dirent_node(struct jffs2_sb_info *c, struct jffs2_eraseblo
1004{ 1004{
1005 struct jffs2_full_dirent *fd; 1005 struct jffs2_full_dirent *fd;
1006 struct jffs2_inode_cache *ic; 1006 struct jffs2_inode_cache *ic;
1007 uint32_t checkedlen;
1007 uint32_t crc; 1008 uint32_t crc;
1008 int err; 1009 int err;
1009 1010
@@ -1024,12 +1025,18 @@ static int jffs2_scan_dirent_node(struct jffs2_sb_info *c, struct jffs2_eraseblo
1024 1025
1025 pseudo_random += je32_to_cpu(rd->version); 1026 pseudo_random += je32_to_cpu(rd->version);
1026 1027
1027 fd = jffs2_alloc_full_dirent(rd->nsize+1); 1028 /* Should never happen. Did. (OLPC trac #4184)*/
1029 checkedlen = strnlen(rd->name, rd->nsize);
1030 if (checkedlen < rd->nsize) {
1031 printk(KERN_ERR "Dirent at %08x has zeroes in name. Truncating to %d chars\n",
1032 ofs, checkedlen);
1033 }
1034 fd = jffs2_alloc_full_dirent(checkedlen+1);
1028 if (!fd) { 1035 if (!fd) {
1029 return -ENOMEM; 1036 return -ENOMEM;
1030 } 1037 }
1031 memcpy(&fd->name, rd->name, rd->nsize); 1038 memcpy(&fd->name, rd->name, checkedlen);
1032 fd->name[rd->nsize] = 0; 1039 fd->name[checkedlen] = 0;
1033 1040
1034 crc = crc32(0, fd->name, rd->nsize); 1041 crc = crc32(0, fd->name, rd->nsize);
1035 if (crc != je32_to_cpu(rd->name_crc)) { 1042 if (crc != je32_to_cpu(rd->name_crc)) {
@@ -1055,7 +1062,7 @@ static int jffs2_scan_dirent_node(struct jffs2_sb_info *c, struct jffs2_eraseblo
1055 fd->next = NULL; 1062 fd->next = NULL;
1056 fd->version = je32_to_cpu(rd->version); 1063 fd->version = je32_to_cpu(rd->version);
1057 fd->ino = je32_to_cpu(rd->ino); 1064 fd->ino = je32_to_cpu(rd->ino);
1058 fd->nhash = full_name_hash(fd->name, rd->nsize); 1065 fd->nhash = full_name_hash(fd->name, checkedlen);
1059 fd->type = rd->type; 1066 fd->type = rd->type;
1060 jffs2_add_fd_to_list(c, fd, &ic->scan_dents); 1067 jffs2_add_fd_to_list(c, fd, &ic->scan_dents);
1061 1068
diff --git a/fs/jffs2/security.c b/fs/jffs2/security.c
index bc9f6ba10823..02c39c64ecb3 100644
--- a/fs/jffs2/security.c
+++ b/fs/jffs2/security.c
@@ -38,9 +38,9 @@ int jffs2_init_security(struct inode *inode, struct inode *dir)
38 } 38 }
39 rc = do_jffs2_setxattr(inode, JFFS2_XPREFIX_SECURITY, name, value, len, 0); 39 rc = do_jffs2_setxattr(inode, JFFS2_XPREFIX_SECURITY, name, value, len, 0);
40 40
41 kfree(name); 41 kfree(name);
42 kfree(value); 42 kfree(value);
43 return rc; 43 return rc;
44} 44}
45 45
46/* ---- XATTR Handler for "security.*" ----------------- */ 46/* ---- XATTR Handler for "security.*" ----------------- */
diff --git a/fs/jffs2/summary.c b/fs/jffs2/summary.c
index d828b296392a..629af01e5ade 100644
--- a/fs/jffs2/summary.c
+++ b/fs/jffs2/summary.c
@@ -2,10 +2,10 @@
2 * JFFS2 -- Journalling Flash File System, Version 2. 2 * JFFS2 -- Journalling Flash File System, Version 2.
3 * 3 *
4 * Copyright © 2004 Ferenc Havasi <havasi@inf.u-szeged.hu>, 4 * Copyright © 2004 Ferenc Havasi <havasi@inf.u-szeged.hu>,
5 * Zoltan Sogor <weth@inf.u-szeged.hu>, 5 * Zoltan Sogor <weth@inf.u-szeged.hu>,
6 * Patrik Kluba <pajko@halom.u-szeged.hu>, 6 * Patrik Kluba <pajko@halom.u-szeged.hu>,
7 * University of Szeged, Hungary 7 * University of Szeged, Hungary
8 * 2006 KaiGai Kohei <kaigai@ak.jp.nec.com> 8 * 2006 KaiGai Kohei <kaigai@ak.jp.nec.com>
9 * 9 *
10 * For licensing information, see the file 'LICENCE' in this directory. 10 * For licensing information, see the file 'LICENCE' in this directory.
11 * 11 *
@@ -429,6 +429,7 @@ static int jffs2_sum_process_sum_data(struct jffs2_sb_info *c, struct jffs2_eras
429 429
430 case JFFS2_NODETYPE_DIRENT: { 430 case JFFS2_NODETYPE_DIRENT: {
431 struct jffs2_sum_dirent_flash *spd; 431 struct jffs2_sum_dirent_flash *spd;
432 int checkedlen;
432 spd = sp; 433 spd = sp;
433 434
434 dbg_summary("Dirent at 0x%08x-0x%08x\n", 435 dbg_summary("Dirent at 0x%08x-0x%08x\n",
@@ -436,12 +437,25 @@ static int jffs2_sum_process_sum_data(struct jffs2_sb_info *c, struct jffs2_eras
436 jeb->offset + je32_to_cpu(spd->offset) + je32_to_cpu(spd->totlen)); 437 jeb->offset + je32_to_cpu(spd->offset) + je32_to_cpu(spd->totlen));
437 438
438 439
439 fd = jffs2_alloc_full_dirent(spd->nsize+1); 440 /* This should never happen, but https://dev.laptop.org/ticket/4184 */
441 checkedlen = strnlen(spd->name, spd->nsize);
442 if (!checkedlen) {
443 printk(KERN_ERR "Dirent at %08x has zero at start of name. Aborting mount.\n",
444 jeb->offset + je32_to_cpu(spd->offset));
445 return -EIO;
446 }
447 if (checkedlen < spd->nsize) {
448 printk(KERN_ERR "Dirent at %08x has zeroes in name. Truncating to %d chars\n",
449 jeb->offset + je32_to_cpu(spd->offset), checkedlen);
450 }
451
452
453 fd = jffs2_alloc_full_dirent(checkedlen+1);
440 if (!fd) 454 if (!fd)
441 return -ENOMEM; 455 return -ENOMEM;
442 456
443 memcpy(&fd->name, spd->name, spd->nsize); 457 memcpy(&fd->name, spd->name, checkedlen);
444 fd->name[spd->nsize] = 0; 458 fd->name[checkedlen] = 0;
445 459
446 ic = jffs2_scan_make_ino_cache(c, je32_to_cpu(spd->pino)); 460 ic = jffs2_scan_make_ino_cache(c, je32_to_cpu(spd->pino));
447 if (!ic) { 461 if (!ic) {
@@ -455,7 +469,7 @@ static int jffs2_sum_process_sum_data(struct jffs2_sb_info *c, struct jffs2_eras
455 fd->next = NULL; 469 fd->next = NULL;
456 fd->version = je32_to_cpu(spd->version); 470 fd->version = je32_to_cpu(spd->version);
457 fd->ino = je32_to_cpu(spd->ino); 471 fd->ino = je32_to_cpu(spd->ino);
458 fd->nhash = full_name_hash(fd->name, spd->nsize); 472 fd->nhash = full_name_hash(fd->name, checkedlen);
459 fd->type = spd->type; 473 fd->type = spd->type;
460 474
461 jffs2_add_fd_to_list(c, fd, &ic->scan_dents); 475 jffs2_add_fd_to_list(c, fd, &ic->scan_dents);
diff --git a/fs/jffs2/summary.h b/fs/jffs2/summary.h
index 0c6669e21390..8bf34f2fa5ce 100644
--- a/fs/jffs2/summary.h
+++ b/fs/jffs2/summary.h
@@ -2,9 +2,9 @@
2 * JFFS2 -- Journalling Flash File System, Version 2. 2 * JFFS2 -- Journalling Flash File System, Version 2.
3 * 3 *
4 * Copyright © 2004 Ferenc Havasi <havasi@inf.u-szeged.hu>, 4 * Copyright © 2004 Ferenc Havasi <havasi@inf.u-szeged.hu>,
5 * Zoltan Sogor <weth@inf.u-szeged.hu>, 5 * Zoltan Sogor <weth@inf.u-szeged.hu>,
6 * Patrik Kluba <pajko@halom.u-szeged.hu>, 6 * Patrik Kluba <pajko@halom.u-szeged.hu>,
7 * University of Szeged, Hungary 7 * University of Szeged, Hungary
8 * 8 *
9 * For licensing information, see the file 'LICENCE' in this directory. 9 * For licensing information, see the file 'LICENCE' in this directory.
10 * 10 *
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index 91d1d0f1c66c..d1d4f27464ba 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -220,6 +220,47 @@ static struct jffs2_raw_node_ref **jffs2_incore_replace_raw(struct jffs2_sb_info
220 return NULL; 220 return NULL;
221} 221}
222 222
223#ifdef CONFIG_JFFS2_FS_WBUF_VERIFY
224static int jffs2_verify_write(struct jffs2_sb_info *c, unsigned char *buf,
225 uint32_t ofs)
226{
227 int ret;
228 size_t retlen;
229 char *eccstr;
230
231 ret = c->mtd->read(c->mtd, ofs, c->wbuf_pagesize, &retlen, c->wbuf_verify);
232 if (ret && ret != -EUCLEAN && ret != -EBADMSG) {
233 printk(KERN_WARNING "jffs2_verify_write(): Read back of page at %08x failed: %d\n", c->wbuf_ofs, ret);
234 return ret;
235 } else if (retlen != c->wbuf_pagesize) {
236 printk(KERN_WARNING "jffs2_verify_write(): Read back of page at %08x gave short read: %zd not %d.\n", ofs, retlen, c->wbuf_pagesize);
237 return -EIO;
238 }
239 if (!memcmp(buf, c->wbuf_verify, c->wbuf_pagesize))
240 return 0;
241
242 if (ret == -EUCLEAN)
243 eccstr = "corrected";
244 else if (ret == -EBADMSG)
245 eccstr = "correction failed";
246 else
247 eccstr = "OK or unused";
248
249 printk(KERN_WARNING "Write verify error (ECC %s) at %08x. Wrote:\n",
250 eccstr, c->wbuf_ofs);
251 print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, 16, 1,
252 c->wbuf, c->wbuf_pagesize, 0);
253
254 printk(KERN_WARNING "Read back:\n");
255 print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, 16, 1,
256 c->wbuf_verify, c->wbuf_pagesize, 0);
257
258 return -EIO;
259}
260#else
261#define jffs2_verify_write(c,b,o) (0)
262#endif
263
223/* Recover from failure to write wbuf. Recover the nodes up to the 264/* Recover from failure to write wbuf. Recover the nodes up to the
224 * wbuf, not the one which we were starting to try to write. */ 265 * wbuf, not the one which we were starting to try to write. */
225 266
@@ -380,7 +421,7 @@ static void jffs2_wbuf_recover(struct jffs2_sb_info *c)
380 ret = c->mtd->write(c->mtd, ofs, towrite, &retlen, 421 ret = c->mtd->write(c->mtd, ofs, towrite, &retlen,
381 rewrite_buf); 422 rewrite_buf);
382 423
383 if (ret || retlen != towrite) { 424 if (ret || retlen != towrite || jffs2_verify_write(c, rewrite_buf, ofs)) {
384 /* Argh. We tried. Really we did. */ 425 /* Argh. We tried. Really we did. */
385 printk(KERN_CRIT "Recovery of wbuf failed due to a second write error\n"); 426 printk(KERN_CRIT "Recovery of wbuf failed due to a second write error\n");
386 kfree(buf); 427 kfree(buf);
@@ -587,15 +628,16 @@ static int __jffs2_flush_wbuf(struct jffs2_sb_info *c, int pad)
587 628
588 ret = c->mtd->write(c->mtd, c->wbuf_ofs, c->wbuf_pagesize, &retlen, c->wbuf); 629 ret = c->mtd->write(c->mtd, c->wbuf_ofs, c->wbuf_pagesize, &retlen, c->wbuf);
589 630
590 if (ret || retlen != c->wbuf_pagesize) { 631 if (ret) {
591 if (ret) 632 printk(KERN_WARNING "jffs2_flush_wbuf(): Write failed with %d\n", ret);
592 printk(KERN_WARNING "jffs2_flush_wbuf(): Write failed with %d\n",ret); 633 goto wfail;
593 else { 634 } else if (retlen != c->wbuf_pagesize) {
594 printk(KERN_WARNING "jffs2_flush_wbuf(): Write was short: %zd instead of %d\n", 635 printk(KERN_WARNING "jffs2_flush_wbuf(): Write was short: %zd instead of %d\n",
595 retlen, c->wbuf_pagesize); 636 retlen, c->wbuf_pagesize);
596 ret = -EIO; 637 ret = -EIO;
597 } 638 goto wfail;
598 639 } else if ((ret = jffs2_verify_write(c, c->wbuf, c->wbuf_ofs))) {
640 wfail:
599 jffs2_wbuf_recover(c); 641 jffs2_wbuf_recover(c);
600 642
601 return ret; 643 return ret;
@@ -966,8 +1008,8 @@ exit:
966 1008
967#define NR_OOB_SCAN_PAGES 4 1009#define NR_OOB_SCAN_PAGES 4
968 1010
969/* For historical reasons we use only 12 bytes for OOB clean marker */ 1011/* For historical reasons we use only 8 bytes for OOB clean marker */
970#define OOB_CM_SIZE 12 1012#define OOB_CM_SIZE 8
971 1013
972static const struct jffs2_unknown_node oob_cleanmarker = 1014static const struct jffs2_unknown_node oob_cleanmarker =
973{ 1015{
@@ -1021,8 +1063,8 @@ int jffs2_check_oob_empty(struct jffs2_sb_info *c,
1021/* 1063/*
1022 * Check for a valid cleanmarker. 1064 * Check for a valid cleanmarker.
1023 * Returns: 0 if a valid cleanmarker was found 1065 * Returns: 0 if a valid cleanmarker was found
1024 * 1 if no cleanmarker was found 1066 * 1 if no cleanmarker was found
1025 * negative error code if an error occurred 1067 * negative error code if an error occurred
1026 */ 1068 */
1027int jffs2_check_nand_cleanmarker(struct jffs2_sb_info *c, 1069int jffs2_check_nand_cleanmarker(struct jffs2_sb_info *c,
1028 struct jffs2_eraseblock *jeb) 1070 struct jffs2_eraseblock *jeb)
@@ -1138,11 +1180,22 @@ int jffs2_nand_flash_setup(struct jffs2_sb_info *c)
1138 return -ENOMEM; 1180 return -ENOMEM;
1139 } 1181 }
1140 1182
1183#ifdef CONFIG_JFFS2_FS_WBUF_VERIFY
1184 c->wbuf_verify = kmalloc(c->wbuf_pagesize, GFP_KERNEL);
1185 if (!c->wbuf_verify) {
1186 kfree(c->oobbuf);
1187 kfree(c->wbuf);
1188 return -ENOMEM;
1189 }
1190#endif
1141 return 0; 1191 return 0;
1142} 1192}
1143 1193
1144void jffs2_nand_flash_cleanup(struct jffs2_sb_info *c) 1194void jffs2_nand_flash_cleanup(struct jffs2_sb_info *c)
1145{ 1195{
1196#ifdef CONFIG_JFFS2_FS_WBUF_VERIFY
1197 kfree(c->wbuf_verify);
1198#endif
1146 kfree(c->wbuf); 1199 kfree(c->wbuf);
1147 kfree(c->oobbuf); 1200 kfree(c->oobbuf);
1148} 1201}
diff --git a/fs/jffs2/write.c b/fs/jffs2/write.c
index 664c164aa67c..2f5695446d0f 100644
--- a/fs/jffs2/write.c
+++ b/fs/jffs2/write.c
@@ -215,6 +215,17 @@ struct jffs2_full_dirent *jffs2_write_dirent(struct jffs2_sb_info *c, struct jff
215 BUG(); 215 BUG();
216 }); 216 });
217 217
218 if (strnlen(name, namelen) != namelen) {
219 /* This should never happen, but seems to have done on at least one
220 occasion: https://dev.laptop.org/ticket/4184 */
221 printk(KERN_CRIT "Error in jffs2_write_dirent() -- name contains zero bytes!\n");
222 printk(KERN_CRIT "Directory inode #%u, name at *0x%p \"%s\"->ino #%u, name_crc 0x%08x\n",
223 je32_to_cpu(rd->pino), name, name, je32_to_cpu(rd->ino),
224 je32_to_cpu(rd->name_crc));
225 WARN_ON(1);
226 return ERR_PTR(-EIO);
227 }
228
218 vecs[0].iov_base = rd; 229 vecs[0].iov_base = rd;
219 vecs[0].iov_len = sizeof(*rd); 230 vecs[0].iov_len = sizeof(*rd);
220 vecs[1].iov_base = (unsigned char *)name; 231 vecs[1].iov_base = (unsigned char *)name;
@@ -226,7 +237,7 @@ struct jffs2_full_dirent *jffs2_write_dirent(struct jffs2_sb_info *c, struct jff
226 237
227 fd->version = je32_to_cpu(rd->version); 238 fd->version = je32_to_cpu(rd->version);
228 fd->ino = je32_to_cpu(rd->ino); 239 fd->ino = je32_to_cpu(rd->ino);
229 fd->nhash = full_name_hash(name, strlen(name)); 240 fd->nhash = full_name_hash(name, namelen);
230 fd->type = rd->type; 241 fd->type = rd->type;
231 memcpy(fd->name, name, namelen); 242 memcpy(fd->name, name, namelen);
232 fd->name[namelen]=0; 243 fd->name[namelen]=0;
diff --git a/fs/jffs2/xattr.h b/fs/jffs2/xattr.h
index 3b0ff2925937..6e3b5ddfb7ab 100644
--- a/fs/jffs2/xattr.h
+++ b/fs/jffs2/xattr.h
@@ -75,7 +75,7 @@ extern void jffs2_build_xattr_subsystem(struct jffs2_sb_info *c);
75extern void jffs2_clear_xattr_subsystem(struct jffs2_sb_info *c); 75extern void jffs2_clear_xattr_subsystem(struct jffs2_sb_info *c);
76 76
77extern struct jffs2_xattr_datum *jffs2_setup_xattr_datum(struct jffs2_sb_info *c, 77extern struct jffs2_xattr_datum *jffs2_setup_xattr_datum(struct jffs2_sb_info *c,
78 uint32_t xid, uint32_t version); 78 uint32_t xid, uint32_t version);
79 79
80extern void jffs2_xattr_delete_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic); 80extern void jffs2_xattr_delete_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic);
81extern void jffs2_xattr_free_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic); 81extern void jffs2_xattr_free_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic);
diff --git a/fs/jffs2/xattr_user.c b/fs/jffs2/xattr_user.c
index 40942bc516bb..8bbeab90ada1 100644
--- a/fs/jffs2/xattr_user.c
+++ b/fs/jffs2/xattr_user.c
@@ -17,7 +17,7 @@
17#include "nodelist.h" 17#include "nodelist.h"
18 18
19static int jffs2_user_getxattr(struct inode *inode, const char *name, 19static int jffs2_user_getxattr(struct inode *inode, const char *name,
20 void *buffer, size_t size) 20 void *buffer, size_t size)
21{ 21{
22 if (!strcmp(name, "")) 22 if (!strcmp(name, ""))
23 return -EINVAL; 23 return -EINVAL;
@@ -25,7 +25,7 @@ static int jffs2_user_getxattr(struct inode *inode, const char *name,
25} 25}
26 26
27static int jffs2_user_setxattr(struct inode *inode, const char *name, const void *buffer, 27static int jffs2_user_setxattr(struct inode *inode, const char *name, const void *buffer,
28 size_t size, int flags) 28 size_t size, int flags)
29{ 29{
30 if (!strcmp(name, "")) 30 if (!strcmp(name, ""))
31 return -EINVAL; 31 return -EINVAL;
diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c
index c14ba3cfa818..df0b8535de84 100644
--- a/fs/jfs/jfs_dtree.c
+++ b/fs/jfs/jfs_dtree.c
@@ -520,7 +520,7 @@ static void free_index(tid_t tid, struct inode *ip, u32 index, u32 next)
520 * Changes an entry in the directory index table 520 * Changes an entry in the directory index table
521 */ 521 */
522static void modify_index(tid_t tid, struct inode *ip, u32 index, s64 bn, 522static void modify_index(tid_t tid, struct inode *ip, u32 index, s64 bn,
523 int slot, struct metapage ** mp, u64 *lblock) 523 int slot, struct metapage ** mp, s64 *lblock)
524{ 524{
525 struct dir_table_slot *dirtab_slot; 525 struct dir_table_slot *dirtab_slot;
526 526
diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h
index cb8f30985ad1..439901d205fe 100644
--- a/fs/jfs/jfs_incore.h
+++ b/fs/jfs/jfs_incore.h
@@ -49,7 +49,7 @@ struct jfs_inode_info {
49 short btorder; /* access order */ 49 short btorder; /* access order */
50 short btindex; /* btpage entry index*/ 50 short btindex; /* btpage entry index*/
51 struct inode *ipimap; /* inode map */ 51 struct inode *ipimap; /* inode map */
52 long cflag; /* commit flags */ 52 unsigned long cflag; /* commit flags */
53 u16 bxflag; /* xflag of pseudo buffer? */ 53 u16 bxflag; /* xflag of pseudo buffer? */
54 unchar agno; /* ag number */ 54 unchar agno; /* ag number */
55 signed char active_ag; /* ag currently allocating from */ 55 signed char active_ag; /* ag currently allocating from */
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index de3e4a506dbc..15a3974cdeeb 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -2162,7 +2162,7 @@ static void lbmStartIO(struct lbuf * bp)
2162 /* check if journaling to disk has been disabled */ 2162 /* check if journaling to disk has been disabled */
2163 if (log->no_integrity) { 2163 if (log->no_integrity) {
2164 bio->bi_size = 0; 2164 bio->bi_size = 0;
2165 lbmIODone(bio, 0, 0); 2165 lbmIODone(bio, 0);
2166 } else { 2166 } else {
2167 submit_bio(WRITE_SYNC, bio); 2167 submit_bio(WRITE_SYNC, bio);
2168 INCREMENT(lmStat.submitted); 2168 INCREMENT(lmStat.submitted);
@@ -2200,16 +2200,13 @@ static int lbmIOWait(struct lbuf * bp, int flag)
2200 * 2200 *
2201 * executed at INTIODONE level 2201 * executed at INTIODONE level
2202 */ 2202 */
2203static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error) 2203static void lbmIODone(struct bio *bio, int error)
2204{ 2204{
2205 struct lbuf *bp = bio->bi_private; 2205 struct lbuf *bp = bio->bi_private;
2206 struct lbuf *nextbp, *tail; 2206 struct lbuf *nextbp, *tail;
2207 struct jfs_log *log; 2207 struct jfs_log *log;
2208 unsigned long flags; 2208 unsigned long flags;
2209 2209
2210 if (bio->bi_size)
2211 return 1;
2212
2213 /* 2210 /*
2214 * get back jfs buffer bound to the i/o buffer 2211 * get back jfs buffer bound to the i/o buffer
2215 */ 2212 */
@@ -2238,7 +2235,7 @@ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error)
2238 /* wakeup I/O initiator */ 2235 /* wakeup I/O initiator */
2239 LCACHE_WAKEUP(&bp->l_ioevent); 2236 LCACHE_WAKEUP(&bp->l_ioevent);
2240 2237
2241 return 0; 2238 return;
2242 } 2239 }
2243 2240
2244 /* 2241 /*
@@ -2263,7 +2260,7 @@ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error)
2263 if (bp->l_flag & lbmDIRECT) { 2260 if (bp->l_flag & lbmDIRECT) {
2264 LCACHE_WAKEUP(&bp->l_ioevent); 2261 LCACHE_WAKEUP(&bp->l_ioevent);
2265 LCACHE_UNLOCK(flags); 2262 LCACHE_UNLOCK(flags);
2266 return 0; 2263 return;
2267 } 2264 }
2268 2265
2269 tail = log->wqueue; 2266 tail = log->wqueue;
@@ -2342,8 +2339,6 @@ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error)
2342 2339
2343 LCACHE_UNLOCK(flags); /* unlock+enable */ 2340 LCACHE_UNLOCK(flags); /* unlock+enable */
2344 } 2341 }
2345
2346 return 0;
2347} 2342}
2348 2343
2349int jfsIOWait(void *arg) 2344int jfsIOWait(void *arg)
diff --git a/fs/jfs/jfs_logmgr.h b/fs/jfs/jfs_logmgr.h
index 1f85ef0ec045..9236bc49ae7f 100644
--- a/fs/jfs/jfs_logmgr.h
+++ b/fs/jfs/jfs_logmgr.h
@@ -376,7 +376,7 @@ struct jfs_log {
376 int size; /* 4: log size in log page (in page) */ 376 int size; /* 4: log size in log page (in page) */
377 int l2bsize; /* 4: log2 of bsize */ 377 int l2bsize; /* 4: log2 of bsize */
378 378
379 long flag; /* 4: flag */ 379 unsigned long flag; /* 4: flag */
380 380
381 struct lbuf *lbuf_free; /* 4: free lbufs */ 381 struct lbuf *lbuf_free; /* 4: free lbufs */
382 wait_queue_head_t free_wait; /* 4: */ 382 wait_queue_head_t free_wait; /* 4: */
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 62e96be02acf..941369c1ac8d 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -280,14 +280,10 @@ static void last_read_complete(struct page *page)
280 unlock_page(page); 280 unlock_page(page);
281} 281}
282 282
283static int metapage_read_end_io(struct bio *bio, unsigned int bytes_done, 283static void metapage_read_end_io(struct bio *bio, int err)
284 int err)
285{ 284{
286 struct page *page = bio->bi_private; 285 struct page *page = bio->bi_private;
287 286
288 if (bio->bi_size)
289 return 1;
290
291 if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { 287 if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
292 printk(KERN_ERR "metapage_read_end_io: I/O error\n"); 288 printk(KERN_ERR "metapage_read_end_io: I/O error\n");
293 SetPageError(page); 289 SetPageError(page);
@@ -295,8 +291,6 @@ static int metapage_read_end_io(struct bio *bio, unsigned int bytes_done,
295 291
296 dec_io(page, last_read_complete); 292 dec_io(page, last_read_complete);
297 bio_put(bio); 293 bio_put(bio);
298
299 return 0;
300} 294}
301 295
302static void remove_from_logsync(struct metapage *mp) 296static void remove_from_logsync(struct metapage *mp)
@@ -341,23 +335,18 @@ static void last_write_complete(struct page *page)
341 end_page_writeback(page); 335 end_page_writeback(page);
342} 336}
343 337
344static int metapage_write_end_io(struct bio *bio, unsigned int bytes_done, 338static void metapage_write_end_io(struct bio *bio, int err)
345 int err)
346{ 339{
347 struct page *page = bio->bi_private; 340 struct page *page = bio->bi_private;
348 341
349 BUG_ON(!PagePrivate(page)); 342 BUG_ON(!PagePrivate(page));
350 343
351 if (bio->bi_size)
352 return 1;
353
354 if (! test_bit(BIO_UPTODATE, &bio->bi_flags)) { 344 if (! test_bit(BIO_UPTODATE, &bio->bi_flags)) {
355 printk(KERN_ERR "metapage_write_end_io: I/O error\n"); 345 printk(KERN_ERR "metapage_write_end_io: I/O error\n");
356 SetPageError(page); 346 SetPageError(page);
357 } 347 }
358 dec_io(page, last_write_complete); 348 dec_io(page, last_write_complete);
359 bio_put(bio); 349 bio_put(bio);
360 return 0;
361} 350}
362 351
363static int metapage_writepage(struct page *page, struct writeback_control *wbc) 352static int metapage_writepage(struct page *page, struct writeback_control *wbc)
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 3353ed8421a7..908b23fadd05 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -10,6 +10,7 @@
10#include <linux/utsname.h> 10#include <linux/utsname.h>
11#include <linux/kernel.h> 11#include <linux/kernel.h>
12#include <linux/sunrpc/clnt.h> 12#include <linux/sunrpc/clnt.h>
13#include <linux/sunrpc/xprtsock.h>
13#include <linux/sunrpc/svc.h> 14#include <linux/sunrpc/svc.h>
14#include <linux/lockd/lockd.h> 15#include <linux/lockd/lockd.h>
15#include <linux/lockd/sm_inter.h> 16#include <linux/lockd/sm_inter.h>
@@ -132,7 +133,7 @@ nsm_create(void)
132 .sin_port = 0, 133 .sin_port = 0,
133 }; 134 };
134 struct rpc_create_args args = { 135 struct rpc_create_args args = {
135 .protocol = IPPROTO_UDP, 136 .protocol = XPRT_TRANSPORT_UDP,
136 .address = (struct sockaddr *)&sin, 137 .address = (struct sockaddr *)&sin,
137 .addrsize = sizeof(sin), 138 .addrsize = sizeof(sin),
138 .servername = "localhost", 139 .servername = "localhost",
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index 5316e307a49d..633653bff944 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -62,8 +62,9 @@ static __be32 *nlm_decode_cookie(__be32 *p, struct nlm_cookie *c)
62 } 62 }
63 else 63 else
64 { 64 {
65 printk(KERN_NOTICE 65 dprintk("lockd: bad cookie size %d (only cookies under "
66 "lockd: bad cookie size %d (only cookies under %d bytes are supported.)\n", len, NLM_MAXCOOKIELEN); 66 "%d bytes are supported.)\n",
67 len, NLM_MAXCOOKIELEN);
67 return NULL; 68 return NULL;
68 } 69 }
69 return p; 70 return p;
@@ -84,8 +85,7 @@ nlm_decode_fh(__be32 *p, struct nfs_fh *f)
84 unsigned int len; 85 unsigned int len;
85 86
86 if ((len = ntohl(*p++)) != NFS2_FHSIZE) { 87 if ((len = ntohl(*p++)) != NFS2_FHSIZE) {
87 printk(KERN_NOTICE 88 dprintk("lockd: bad fhandle size %d (should be %d)\n",
88 "lockd: bad fhandle size %d (should be %d)\n",
89 len, NFS2_FHSIZE); 89 len, NFS2_FHSIZE);
90 return NULL; 90 return NULL;
91 } 91 }
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index 846fc1d639dd..43ff9397e6c6 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -64,8 +64,9 @@ nlm4_decode_cookie(__be32 *p, struct nlm_cookie *c)
64 } 64 }
65 else 65 else
66 { 66 {
67 printk(KERN_NOTICE 67 dprintk("lockd: bad cookie size %d (only cookies under "
68 "lockd: bad cookie size %d (only cookies under %d bytes are supported.)\n", len, NLM_MAXCOOKIELEN); 68 "%d bytes are supported.)\n",
69 len, NLM_MAXCOOKIELEN);
69 return NULL; 70 return NULL;
70 } 71 }
71 return p; 72 return p;
@@ -86,8 +87,7 @@ nlm4_decode_fh(__be32 *p, struct nfs_fh *f)
86 memset(f->data, 0, sizeof(f->data)); 87 memset(f->data, 0, sizeof(f->data));
87 f->size = ntohl(*p++); 88 f->size = ntohl(*p++);
88 if (f->size > NFS_MAXFHSIZE) { 89 if (f->size > NFS_MAXFHSIZE) {
89 printk(KERN_NOTICE 90 dprintk("lockd: bad fhandle size %d (should be <=%d)\n",
90 "lockd: bad fhandle size %d (should be <=%d)\n",
91 f->size, NFS_MAXFHSIZE); 91 f->size, NFS_MAXFHSIZE);
92 return NULL; 92 return NULL;
93 } 93 }
diff --git a/fs/mpage.c b/fs/mpage.c
index c1698f2291aa..b1c3e5890508 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -39,14 +39,11 @@
39 * status of that page is hard. See end_buffer_async_read() for the details. 39 * status of that page is hard. See end_buffer_async_read() for the details.
40 * There is no point in duplicating all that complexity. 40 * There is no point in duplicating all that complexity.
41 */ 41 */
42static int mpage_end_io_read(struct bio *bio, unsigned int bytes_done, int err) 42static void mpage_end_io_read(struct bio *bio, int err)
43{ 43{
44 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 44 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
45 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 45 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
46 46
47 if (bio->bi_size)
48 return 1;
49
50 do { 47 do {
51 struct page *page = bvec->bv_page; 48 struct page *page = bvec->bv_page;
52 49
@@ -62,17 +59,13 @@ static int mpage_end_io_read(struct bio *bio, unsigned int bytes_done, int err)
62 unlock_page(page); 59 unlock_page(page);
63 } while (bvec >= bio->bi_io_vec); 60 } while (bvec >= bio->bi_io_vec);
64 bio_put(bio); 61 bio_put(bio);
65 return 0;
66} 62}
67 63
68static int mpage_end_io_write(struct bio *bio, unsigned int bytes_done, int err) 64static void mpage_end_io_write(struct bio *bio, int err)
69{ 65{
70 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 66 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
71 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 67 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
72 68
73 if (bio->bi_size)
74 return 1;
75
76 do { 69 do {
77 struct page *page = bvec->bv_page; 70 struct page *page = bvec->bv_page;
78 71
@@ -87,7 +80,6 @@ static int mpage_end_io_write(struct bio *bio, unsigned int bytes_done, int err)
87 end_page_writeback(page); 80 end_page_writeback(page);
88 } while (bvec >= bio->bi_io_vec); 81 } while (bvec >= bio->bi_io_vec);
89 bio_put(bio); 82 bio_put(bio);
90 return 0;
91} 83}
92 84
93static struct bio *mpage_bio_submit(int rw, struct bio *bio) 85static struct bio *mpage_bio_submit(int rw, struct bio *bio)
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index b55cb236cf74..df0f41e09885 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -16,4 +16,3 @@ nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \
16 nfs4namespace.o 16 nfs4namespace.o
17nfs-$(CONFIG_NFS_DIRECTIO) += direct.o 17nfs-$(CONFIG_NFS_DIRECTIO) += direct.o
18nfs-$(CONFIG_SYSCTL) += sysctl.o 18nfs-$(CONFIG_SYSCTL) += sysctl.o
19nfs-objs := $(nfs-y)
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index a204484072f3..a532ee12740a 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -23,6 +23,8 @@
23#include <linux/sunrpc/clnt.h> 23#include <linux/sunrpc/clnt.h>
24#include <linux/sunrpc/stats.h> 24#include <linux/sunrpc/stats.h>
25#include <linux/sunrpc/metrics.h> 25#include <linux/sunrpc/metrics.h>
26#include <linux/sunrpc/xprtsock.h>
27#include <linux/sunrpc/xprtrdma.h>
26#include <linux/nfs_fs.h> 28#include <linux/nfs_fs.h>
27#include <linux/nfs_mount.h> 29#include <linux/nfs_mount.h>
28#include <linux/nfs4_mount.h> 30#include <linux/nfs4_mount.h>
@@ -340,7 +342,8 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
340 to->to_retries = 2; 342 to->to_retries = 2;
341 343
342 switch (proto) { 344 switch (proto) {
343 case IPPROTO_TCP: 345 case XPRT_TRANSPORT_TCP:
346 case XPRT_TRANSPORT_RDMA:
344 if (!to->to_initval) 347 if (!to->to_initval)
345 to->to_initval = 60 * HZ; 348 to->to_initval = 60 * HZ;
346 if (to->to_initval > NFS_MAX_TCP_TIMEOUT) 349 if (to->to_initval > NFS_MAX_TCP_TIMEOUT)
@@ -349,7 +352,7 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
349 to->to_maxval = to->to_initval + (to->to_increment * to->to_retries); 352 to->to_maxval = to->to_initval + (to->to_increment * to->to_retries);
350 to->to_exponential = 0; 353 to->to_exponential = 0;
351 break; 354 break;
352 case IPPROTO_UDP: 355 case XPRT_TRANSPORT_UDP:
353 default: 356 default:
354 if (!to->to_initval) 357 if (!to->to_initval)
355 to->to_initval = 11 * HZ / 10; 358 to->to_initval = 11 * HZ / 10;
@@ -501,9 +504,9 @@ static int nfs_init_server_rpcclient(struct nfs_server *server, rpc_authflavor_t
501/* 504/*
502 * Initialise an NFS2 or NFS3 client 505 * Initialise an NFS2 or NFS3 client
503 */ 506 */
504static int nfs_init_client(struct nfs_client *clp, const struct nfs_mount_data *data) 507static int nfs_init_client(struct nfs_client *clp,
508 const struct nfs_parsed_mount_data *data)
505{ 509{
506 int proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP;
507 int error; 510 int error;
508 511
509 if (clp->cl_cons_state == NFS_CS_READY) { 512 if (clp->cl_cons_state == NFS_CS_READY) {
@@ -522,8 +525,8 @@ static int nfs_init_client(struct nfs_client *clp, const struct nfs_mount_data *
522 * Create a client RPC handle for doing FSSTAT with UNIX auth only 525 * Create a client RPC handle for doing FSSTAT with UNIX auth only
523 * - RFC 2623, sec 2.3.2 526 * - RFC 2623, sec 2.3.2
524 */ 527 */
525 error = nfs_create_rpc_client(clp, proto, data->timeo, data->retrans, 528 error = nfs_create_rpc_client(clp, data->nfs_server.protocol,
526 RPC_AUTH_UNIX, 0); 529 data->timeo, data->retrans, RPC_AUTH_UNIX, 0);
527 if (error < 0) 530 if (error < 0)
528 goto error; 531 goto error;
529 nfs_mark_client_ready(clp, NFS_CS_READY); 532 nfs_mark_client_ready(clp, NFS_CS_READY);
@@ -538,7 +541,8 @@ error:
538/* 541/*
539 * Create a version 2 or 3 client 542 * Create a version 2 or 3 client
540 */ 543 */
541static int nfs_init_server(struct nfs_server *server, const struct nfs_mount_data *data) 544static int nfs_init_server(struct nfs_server *server,
545 const struct nfs_parsed_mount_data *data)
542{ 546{
543 struct nfs_client *clp; 547 struct nfs_client *clp;
544 int error, nfsvers = 2; 548 int error, nfsvers = 2;
@@ -551,7 +555,8 @@ static int nfs_init_server(struct nfs_server *server, const struct nfs_mount_dat
551#endif 555#endif
552 556
553 /* Allocate or find a client reference we can use */ 557 /* Allocate or find a client reference we can use */
554 clp = nfs_get_client(data->hostname, &data->addr, nfsvers); 558 clp = nfs_get_client(data->nfs_server.hostname,
559 &data->nfs_server.address, nfsvers);
555 if (IS_ERR(clp)) { 560 if (IS_ERR(clp)) {
556 dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp)); 561 dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp));
557 return PTR_ERR(clp); 562 return PTR_ERR(clp);
@@ -581,7 +586,7 @@ static int nfs_init_server(struct nfs_server *server, const struct nfs_mount_dat
581 if (error < 0) 586 if (error < 0)
582 goto error; 587 goto error;
583 588
584 error = nfs_init_server_rpcclient(server, data->pseudoflavor); 589 error = nfs_init_server_rpcclient(server, data->auth_flavors[0]);
585 if (error < 0) 590 if (error < 0)
586 goto error; 591 goto error;
587 592
@@ -760,7 +765,7 @@ void nfs_free_server(struct nfs_server *server)
760 * Create a version 2 or 3 volume record 765 * Create a version 2 or 3 volume record
761 * - keyed on server and FSID 766 * - keyed on server and FSID
762 */ 767 */
763struct nfs_server *nfs_create_server(const struct nfs_mount_data *data, 768struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data,
764 struct nfs_fh *mntfh) 769 struct nfs_fh *mntfh)
765{ 770{
766 struct nfs_server *server; 771 struct nfs_server *server;
@@ -906,7 +911,7 @@ error:
906 * Create a version 4 volume record 911 * Create a version 4 volume record
907 */ 912 */
908static int nfs4_init_server(struct nfs_server *server, 913static int nfs4_init_server(struct nfs_server *server,
909 const struct nfs4_mount_data *data, rpc_authflavor_t authflavour) 914 const struct nfs_parsed_mount_data *data)
910{ 915{
911 int error; 916 int error;
912 917
@@ -926,7 +931,7 @@ static int nfs4_init_server(struct nfs_server *server,
926 server->acdirmin = data->acdirmin * HZ; 931 server->acdirmin = data->acdirmin * HZ;
927 server->acdirmax = data->acdirmax * HZ; 932 server->acdirmax = data->acdirmax * HZ;
928 933
929 error = nfs_init_server_rpcclient(server, authflavour); 934 error = nfs_init_server_rpcclient(server, data->auth_flavors[0]);
930 935
931 /* Done */ 936 /* Done */
932 dprintk("<-- nfs4_init_server() = %d\n", error); 937 dprintk("<-- nfs4_init_server() = %d\n", error);
@@ -937,12 +942,7 @@ static int nfs4_init_server(struct nfs_server *server,
937 * Create a version 4 volume record 942 * Create a version 4 volume record
938 * - keyed on server and FSID 943 * - keyed on server and FSID
939 */ 944 */
940struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *data, 945struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data,
941 const char *hostname,
942 const struct sockaddr_in *addr,
943 const char *mntpath,
944 const char *ip_addr,
945 rpc_authflavor_t authflavour,
946 struct nfs_fh *mntfh) 946 struct nfs_fh *mntfh)
947{ 947{
948 struct nfs_fattr fattr; 948 struct nfs_fattr fattr;
@@ -956,13 +956,18 @@ struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *data,
956 return ERR_PTR(-ENOMEM); 956 return ERR_PTR(-ENOMEM);
957 957
958 /* Get a client record */ 958 /* Get a client record */
959 error = nfs4_set_client(server, hostname, addr, ip_addr, authflavour, 959 error = nfs4_set_client(server,
960 data->proto, data->timeo, data->retrans); 960 data->nfs_server.hostname,
961 &data->nfs_server.address,
962 data->client_address,
963 data->auth_flavors[0],
964 data->nfs_server.protocol,
965 data->timeo, data->retrans);
961 if (error < 0) 966 if (error < 0)
962 goto error; 967 goto error;
963 968
964 /* set up the general RPC client */ 969 /* set up the general RPC client */
965 error = nfs4_init_server(server, data, authflavour); 970 error = nfs4_init_server(server, data);
966 if (error < 0) 971 if (error < 0)
967 goto error; 972 goto error;
968 973
@@ -971,7 +976,7 @@ struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *data,
971 BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); 976 BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
972 977
973 /* Probe the root fh to retrieve its FSID */ 978 /* Probe the root fh to retrieve its FSID */
974 error = nfs4_path_walk(server, mntfh, mntpath); 979 error = nfs4_path_walk(server, mntfh, data->nfs_server.export_path);
975 if (error < 0) 980 if (error < 0)
976 goto error; 981 goto error;
977 982
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index c55a761c22bb..af8b235d405d 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -52,7 +52,7 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_
52 for (fl = inode->i_flock; fl != 0; fl = fl->fl_next) { 52 for (fl = inode->i_flock; fl != 0; fl = fl->fl_next) {
53 if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) 53 if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK)))
54 continue; 54 continue;
55 if ((struct nfs_open_context *)fl->fl_file->private_data != ctx) 55 if (nfs_file_open_context(fl->fl_file) != ctx)
56 continue; 56 continue;
57 status = nfs4_lock_delegation_recall(state, fl); 57 status = nfs4_lock_delegation_recall(state, fl);
58 if (status >= 0) 58 if (status >= 0)
@@ -109,6 +109,7 @@ again:
109void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res) 109void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res)
110{ 110{
111 struct nfs_delegation *delegation = NFS_I(inode)->delegation; 111 struct nfs_delegation *delegation = NFS_I(inode)->delegation;
112 struct rpc_cred *oldcred;
112 113
113 if (delegation == NULL) 114 if (delegation == NULL)
114 return; 115 return;
@@ -116,11 +117,12 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, st
116 sizeof(delegation->stateid.data)); 117 sizeof(delegation->stateid.data));
117 delegation->type = res->delegation_type; 118 delegation->type = res->delegation_type;
118 delegation->maxsize = res->maxsize; 119 delegation->maxsize = res->maxsize;
119 put_rpccred(cred); 120 oldcred = delegation->cred;
120 delegation->cred = get_rpccred(cred); 121 delegation->cred = get_rpccred(cred);
121 delegation->flags &= ~NFS_DELEGATION_NEED_RECLAIM; 122 delegation->flags &= ~NFS_DELEGATION_NEED_RECLAIM;
122 NFS_I(inode)->delegation_state = delegation->type; 123 NFS_I(inode)->delegation_state = delegation->type;
123 smp_wmb(); 124 smp_wmb();
125 put_rpccred(oldcred);
124} 126}
125 127
126/* 128/*
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index e4a04d16b8b0..8ec7fbd8240c 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -200,9 +200,6 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
200 desc->timestamp = timestamp; 200 desc->timestamp = timestamp;
201 desc->timestamp_valid = 1; 201 desc->timestamp_valid = 1;
202 SetPageUptodate(page); 202 SetPageUptodate(page);
203 spin_lock(&inode->i_lock);
204 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME;
205 spin_unlock(&inode->i_lock);
206 /* Ensure consistent page alignment of the data. 203 /* Ensure consistent page alignment of the data.
207 * Note: assumes we have exclusive access to this mapping either 204 * Note: assumes we have exclusive access to this mapping either
208 * through inode->i_mutex or some other mechanism. 205 * through inode->i_mutex or some other mechanism.
@@ -214,9 +211,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
214 unlock_page(page); 211 unlock_page(page);
215 return 0; 212 return 0;
216 error: 213 error:
217 SetPageError(page);
218 unlock_page(page); 214 unlock_page(page);
219 nfs_zap_caches(inode);
220 desc->error = error; 215 desc->error = error;
221 return -EIO; 216 return -EIO;
222} 217}
@@ -407,7 +402,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
407 struct file *file = desc->file; 402 struct file *file = desc->file;
408 struct nfs_entry *entry = desc->entry; 403 struct nfs_entry *entry = desc->entry;
409 struct dentry *dentry = NULL; 404 struct dentry *dentry = NULL;
410 unsigned long fileid; 405 u64 fileid;
411 int loop_count = 0, 406 int loop_count = 0,
412 res; 407 res;
413 408
@@ -418,7 +413,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
418 unsigned d_type = DT_UNKNOWN; 413 unsigned d_type = DT_UNKNOWN;
419 /* Note: entry->prev_cookie contains the cookie for 414 /* Note: entry->prev_cookie contains the cookie for
420 * retrieving the current dirent on the server */ 415 * retrieving the current dirent on the server */
421 fileid = nfs_fileid_to_ino_t(entry->ino); 416 fileid = entry->ino;
422 417
423 /* Get a dentry if we have one */ 418 /* Get a dentry if we have one */
424 if (dentry != NULL) 419 if (dentry != NULL)
@@ -428,11 +423,12 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
428 /* Use readdirplus info */ 423 /* Use readdirplus info */
429 if (dentry != NULL && dentry->d_inode != NULL) { 424 if (dentry != NULL && dentry->d_inode != NULL) {
430 d_type = dt_type(dentry->d_inode); 425 d_type = dt_type(dentry->d_inode);
431 fileid = dentry->d_inode->i_ino; 426 fileid = NFS_FILEID(dentry->d_inode);
432 } 427 }
433 428
434 res = filldir(dirent, entry->name, entry->len, 429 res = filldir(dirent, entry->name, entry->len,
435 file->f_pos, fileid, d_type); 430 file->f_pos, nfs_compat_user_ino64(fileid),
431 d_type);
436 if (res < 0) 432 if (res < 0)
437 break; 433 break;
438 file->f_pos++; 434 file->f_pos++;
@@ -490,9 +486,6 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
490 page, 486 page,
491 NFS_SERVER(inode)->dtsize, 487 NFS_SERVER(inode)->dtsize,
492 desc->plus); 488 desc->plus);
493 spin_lock(&inode->i_lock);
494 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME;
495 spin_unlock(&inode->i_lock);
496 desc->page = page; 489 desc->page = page;
497 desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ 490 desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */
498 if (desc->error >= 0) { 491 if (desc->error >= 0) {
@@ -558,7 +551,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
558 memset(desc, 0, sizeof(*desc)); 551 memset(desc, 0, sizeof(*desc));
559 552
560 desc->file = filp; 553 desc->file = filp;
561 desc->dir_cookie = &((struct nfs_open_context *)filp->private_data)->dir_cookie; 554 desc->dir_cookie = &nfs_file_open_context(filp)->dir_cookie;
562 desc->decode = NFS_PROTO(inode)->decode_dirent; 555 desc->decode = NFS_PROTO(inode)->decode_dirent;
563 desc->plus = NFS_USE_READDIRPLUS(inode); 556 desc->plus = NFS_USE_READDIRPLUS(inode);
564 557
@@ -623,7 +616,7 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin)
623 } 616 }
624 if (offset != filp->f_pos) { 617 if (offset != filp->f_pos) {
625 filp->f_pos = offset; 618 filp->f_pos = offset;
626 ((struct nfs_open_context *)filp->private_data)->dir_cookie = 0; 619 nfs_file_open_context(filp)->dir_cookie = 0;
627 } 620 }
628out: 621out:
629 mutex_unlock(&filp->f_path.dentry->d_inode->i_mutex); 622 mutex_unlock(&filp->f_path.dentry->d_inode->i_mutex);
@@ -650,36 +643,18 @@ static int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync)
650 */ 643 */
651static int nfs_check_verifier(struct inode *dir, struct dentry *dentry) 644static int nfs_check_verifier(struct inode *dir, struct dentry *dentry)
652{ 645{
653 unsigned long verf;
654
655 if (IS_ROOT(dentry)) 646 if (IS_ROOT(dentry))
656 return 1; 647 return 1;
657 verf = dentry->d_time; 648 if (!nfs_verify_change_attribute(dir, dentry->d_time))
658 if (nfs_caches_unstable(dir) 649 return 0;
659 || verf != NFS_I(dir)->cache_change_attribute) 650 /* Revalidate nfsi->cache_change_attribute before we declare a match */
651 if (nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0)
652 return 0;
653 if (!nfs_verify_change_attribute(dir, dentry->d_time))
660 return 0; 654 return 0;
661 return 1; 655 return 1;
662} 656}
663 657
664static inline void nfs_set_verifier(struct dentry * dentry, unsigned long verf)
665{
666 dentry->d_time = verf;
667}
668
669static void nfs_refresh_verifier(struct dentry * dentry, unsigned long verf)
670{
671 nfs_set_verifier(dentry, verf);
672}
673
674/*
675 * Whenever an NFS operation succeeds, we know that the dentry
676 * is valid, so we update the revalidation timestamp.
677 */
678static inline void nfs_renew_times(struct dentry * dentry)
679{
680 dentry->d_time = jiffies;
681}
682
683/* 658/*
684 * Return the intent data that applies to this particular path component 659 * Return the intent data that applies to this particular path component
685 * 660 *
@@ -695,6 +670,19 @@ static inline unsigned int nfs_lookup_check_intent(struct nameidata *nd, unsigne
695} 670}
696 671
697/* 672/*
673 * Use intent information to check whether or not we're going to do
674 * an O_EXCL create using this path component.
675 */
676static int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd)
677{
678 if (NFS_PROTO(dir)->version == 2)
679 return 0;
680 if (nd == NULL || nfs_lookup_check_intent(nd, LOOKUP_CREATE) == 0)
681 return 0;
682 return (nd->intent.open.flags & O_EXCL) != 0;
683}
684
685/*
698 * Inode and filehandle revalidation for lookups. 686 * Inode and filehandle revalidation for lookups.
699 * 687 *
700 * We force revalidation in the cases where the VFS sets LOOKUP_REVAL, 688 * We force revalidation in the cases where the VFS sets LOOKUP_REVAL,
@@ -717,6 +705,7 @@ int nfs_lookup_verify_inode(struct inode *inode, struct nameidata *nd)
717 (S_ISREG(inode->i_mode) || 705 (S_ISREG(inode->i_mode) ||
718 S_ISDIR(inode->i_mode))) 706 S_ISDIR(inode->i_mode)))
719 goto out_force; 707 goto out_force;
708 return 0;
720 } 709 }
721 return nfs_revalidate_inode(server, inode); 710 return nfs_revalidate_inode(server, inode);
722out_force: 711out_force:
@@ -759,7 +748,6 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
759 int error; 748 int error;
760 struct nfs_fh fhandle; 749 struct nfs_fh fhandle;
761 struct nfs_fattr fattr; 750 struct nfs_fattr fattr;
762 unsigned long verifier;
763 751
764 parent = dget_parent(dentry); 752 parent = dget_parent(dentry);
765 lock_kernel(); 753 lock_kernel();
@@ -767,10 +755,6 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
767 nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE); 755 nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
768 inode = dentry->d_inode; 756 inode = dentry->d_inode;
769 757
770 /* Revalidate parent directory attribute cache */
771 if (nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0)
772 goto out_zap_parent;
773
774 if (!inode) { 758 if (!inode) {
775 if (nfs_neg_need_reval(dir, dentry, nd)) 759 if (nfs_neg_need_reval(dir, dentry, nd))
776 goto out_bad; 760 goto out_bad;
@@ -785,7 +769,7 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
785 } 769 }
786 770
787 /* Force a full look up iff the parent directory has changed */ 771 /* Force a full look up iff the parent directory has changed */
788 if (nfs_check_verifier(dir, dentry)) { 772 if (!nfs_is_exclusive_create(dir, nd) && nfs_check_verifier(dir, dentry)) {
789 if (nfs_lookup_verify_inode(inode, nd)) 773 if (nfs_lookup_verify_inode(inode, nd))
790 goto out_zap_parent; 774 goto out_zap_parent;
791 goto out_valid; 775 goto out_valid;
@@ -794,7 +778,6 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
794 if (NFS_STALE(inode)) 778 if (NFS_STALE(inode))
795 goto out_bad; 779 goto out_bad;
796 780
797 verifier = nfs_save_change_attribute(dir);
798 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr); 781 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr);
799 if (error) 782 if (error)
800 goto out_bad; 783 goto out_bad;
@@ -803,8 +786,7 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
803 if ((error = nfs_refresh_inode(inode, &fattr)) != 0) 786 if ((error = nfs_refresh_inode(inode, &fattr)) != 0)
804 goto out_bad; 787 goto out_bad;
805 788
806 nfs_renew_times(dentry); 789 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
807 nfs_refresh_verifier(dentry, verifier);
808 out_valid: 790 out_valid:
809 unlock_kernel(); 791 unlock_kernel();
810 dput(parent); 792 dput(parent);
@@ -815,7 +797,7 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
815out_zap_parent: 797out_zap_parent:
816 nfs_zap_caches(dir); 798 nfs_zap_caches(dir);
817 out_bad: 799 out_bad:
818 NFS_CACHEINV(dir); 800 nfs_mark_for_revalidate(dir);
819 if (inode && S_ISDIR(inode->i_mode)) { 801 if (inode && S_ISDIR(inode->i_mode)) {
820 /* Purge readdir caches. */ 802 /* Purge readdir caches. */
821 nfs_zap_caches(inode); 803 nfs_zap_caches(inode);
@@ -872,8 +854,6 @@ static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode)
872 nfs_complete_unlink(dentry, inode); 854 nfs_complete_unlink(dentry, inode);
873 unlock_kernel(); 855 unlock_kernel();
874 } 856 }
875 /* When creating a negative dentry, we want to renew d_time */
876 nfs_renew_times(dentry);
877 iput(inode); 857 iput(inode);
878} 858}
879 859
@@ -883,30 +863,6 @@ struct dentry_operations nfs_dentry_operations = {
883 .d_iput = nfs_dentry_iput, 863 .d_iput = nfs_dentry_iput,
884}; 864};
885 865
886/*
887 * Use intent information to check whether or not we're going to do
888 * an O_EXCL create using this path component.
889 */
890static inline
891int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd)
892{
893 if (NFS_PROTO(dir)->version == 2)
894 return 0;
895 if (nd == NULL || nfs_lookup_check_intent(nd, LOOKUP_CREATE) == 0)
896 return 0;
897 return (nd->intent.open.flags & O_EXCL) != 0;
898}
899
900static inline int nfs_reval_fsid(struct inode *dir, const struct nfs_fattr *fattr)
901{
902 struct nfs_server *server = NFS_SERVER(dir);
903
904 if (!nfs_fsid_equal(&server->fsid, &fattr->fsid))
905 /* Revalidate fsid using the parent directory */
906 return __nfs_revalidate_inode(server, dir);
907 return 0;
908}
909
910static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) 866static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
911{ 867{
912 struct dentry *res; 868 struct dentry *res;
@@ -945,11 +901,6 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
945 res = ERR_PTR(error); 901 res = ERR_PTR(error);
946 goto out_unlock; 902 goto out_unlock;
947 } 903 }
948 error = nfs_reval_fsid(dir, &fattr);
949 if (error < 0) {
950 res = ERR_PTR(error);
951 goto out_unlock;
952 }
953 inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr); 904 inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr);
954 res = (struct dentry *)inode; 905 res = (struct dentry *)inode;
955 if (IS_ERR(res)) 906 if (IS_ERR(res))
@@ -958,17 +909,10 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
958no_entry: 909no_entry:
959 res = d_materialise_unique(dentry, inode); 910 res = d_materialise_unique(dentry, inode);
960 if (res != NULL) { 911 if (res != NULL) {
961 struct dentry *parent;
962 if (IS_ERR(res)) 912 if (IS_ERR(res))
963 goto out_unlock; 913 goto out_unlock;
964 /* Was a directory renamed! */
965 parent = dget_parent(res);
966 if (!IS_ROOT(parent))
967 nfs_mark_for_revalidate(parent->d_inode);
968 dput(parent);
969 dentry = res; 914 dentry = res;
970 } 915 }
971 nfs_renew_times(dentry);
972 nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); 916 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
973out_unlock: 917out_unlock:
974 unlock_kernel(); 918 unlock_kernel();
@@ -1020,28 +964,16 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
1020 } 964 }
1021 dentry->d_op = NFS_PROTO(dir)->dentry_ops; 965 dentry->d_op = NFS_PROTO(dir)->dentry_ops;
1022 966
1023 /* Let vfs_create() deal with O_EXCL */ 967 /* Let vfs_create() deal with O_EXCL. Instantiate, but don't hash
968 * the dentry. */
1024 if (nd->intent.open.flags & O_EXCL) { 969 if (nd->intent.open.flags & O_EXCL) {
1025 d_add(dentry, NULL); 970 d_instantiate(dentry, NULL);
1026 goto out; 971 goto out;
1027 } 972 }
1028 973
1029 /* Open the file on the server */ 974 /* Open the file on the server */
1030 lock_kernel(); 975 lock_kernel();
1031 /* Revalidate parent directory attribute cache */ 976 res = nfs4_atomic_open(dir, dentry, nd);
1032 error = nfs_revalidate_inode(NFS_SERVER(dir), dir);
1033 if (error < 0) {
1034 res = ERR_PTR(error);
1035 unlock_kernel();
1036 goto out;
1037 }
1038
1039 if (nd->intent.open.flags & O_CREAT) {
1040 nfs_begin_data_update(dir);
1041 res = nfs4_atomic_open(dir, dentry, nd);
1042 nfs_end_data_update(dir);
1043 } else
1044 res = nfs4_atomic_open(dir, dentry, nd);
1045 unlock_kernel(); 977 unlock_kernel();
1046 if (IS_ERR(res)) { 978 if (IS_ERR(res)) {
1047 error = PTR_ERR(res); 979 error = PTR_ERR(res);
@@ -1063,8 +995,6 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
1063 } 995 }
1064 } else if (res != NULL) 996 } else if (res != NULL)
1065 dentry = res; 997 dentry = res;
1066 nfs_renew_times(dentry);
1067 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1068out: 998out:
1069 return res; 999 return res;
1070no_open: 1000no_open:
@@ -1076,7 +1006,6 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
1076 struct dentry *parent = NULL; 1006 struct dentry *parent = NULL;
1077 struct inode *inode = dentry->d_inode; 1007 struct inode *inode = dentry->d_inode;
1078 struct inode *dir; 1008 struct inode *dir;
1079 unsigned long verifier;
1080 int openflags, ret = 0; 1009 int openflags, ret = 0;
1081 1010
1082 parent = dget_parent(dentry); 1011 parent = dget_parent(dentry);
@@ -1086,8 +1015,12 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
1086 /* We can't create new files in nfs_open_revalidate(), so we 1015 /* We can't create new files in nfs_open_revalidate(), so we
1087 * optimize away revalidation of negative dentries. 1016 * optimize away revalidation of negative dentries.
1088 */ 1017 */
1089 if (inode == NULL) 1018 if (inode == NULL) {
1019 if (!nfs_neg_need_reval(dir, dentry, nd))
1020 ret = 1;
1090 goto out; 1021 goto out;
1022 }
1023
1091 /* NFS only supports OPEN on regular files */ 1024 /* NFS only supports OPEN on regular files */
1092 if (!S_ISREG(inode->i_mode)) 1025 if (!S_ISREG(inode->i_mode))
1093 goto no_open; 1026 goto no_open;
@@ -1104,10 +1037,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
1104 * change attribute *before* we do the RPC call. 1037 * change attribute *before* we do the RPC call.
1105 */ 1038 */
1106 lock_kernel(); 1039 lock_kernel();
1107 verifier = nfs_save_change_attribute(dir);
1108 ret = nfs4_open_revalidate(dir, dentry, openflags, nd); 1040 ret = nfs4_open_revalidate(dir, dentry, openflags, nd);
1109 if (!ret)
1110 nfs_refresh_verifier(dentry, verifier);
1111 unlock_kernel(); 1041 unlock_kernel();
1112out: 1042out:
1113 dput(parent); 1043 dput(parent);
@@ -1133,6 +1063,7 @@ static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc)
1133 .len = entry->len, 1063 .len = entry->len,
1134 }; 1064 };
1135 struct inode *inode; 1065 struct inode *inode;
1066 unsigned long verf = nfs_save_change_attribute(dir);
1136 1067
1137 switch (name.len) { 1068 switch (name.len) {
1138 case 2: 1069 case 2:
@@ -1143,6 +1074,14 @@ static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc)
1143 if (name.name[0] == '.') 1074 if (name.name[0] == '.')
1144 return dget(parent); 1075 return dget(parent);
1145 } 1076 }
1077
1078 spin_lock(&dir->i_lock);
1079 if (NFS_I(dir)->cache_validity & NFS_INO_INVALID_DATA) {
1080 spin_unlock(&dir->i_lock);
1081 return NULL;
1082 }
1083 spin_unlock(&dir->i_lock);
1084
1146 name.hash = full_name_hash(name.name, name.len); 1085 name.hash = full_name_hash(name.name, name.len);
1147 dentry = d_lookup(parent, &name); 1086 dentry = d_lookup(parent, &name);
1148 if (dentry != NULL) { 1087 if (dentry != NULL) {
@@ -1183,12 +1122,8 @@ static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc)
1183 dentry = alias; 1122 dentry = alias;
1184 } 1123 }
1185 1124
1186 nfs_renew_times(dentry);
1187 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1188 return dentry;
1189out_renew: 1125out_renew:
1190 nfs_renew_times(dentry); 1126 nfs_set_verifier(dentry, verf);
1191 nfs_refresh_verifier(dentry, nfs_save_change_attribute(dir));
1192 return dentry; 1127 return dentry;
1193} 1128}
1194 1129
@@ -1198,32 +1133,40 @@ out_renew:
1198int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle, 1133int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
1199 struct nfs_fattr *fattr) 1134 struct nfs_fattr *fattr)
1200{ 1135{
1136 struct dentry *parent = dget_parent(dentry);
1137 struct inode *dir = parent->d_inode;
1201 struct inode *inode; 1138 struct inode *inode;
1202 int error = -EACCES; 1139 int error = -EACCES;
1203 1140
1141 d_drop(dentry);
1142
1204 /* We may have been initialized further down */ 1143 /* We may have been initialized further down */
1205 if (dentry->d_inode) 1144 if (dentry->d_inode)
1206 return 0; 1145 goto out;
1207 if (fhandle->size == 0) { 1146 if (fhandle->size == 0) {
1208 struct inode *dir = dentry->d_parent->d_inode;
1209 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr); 1147 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
1210 if (error) 1148 if (error)
1211 return error; 1149 goto out_error;
1212 } 1150 }
1151 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1213 if (!(fattr->valid & NFS_ATTR_FATTR)) { 1152 if (!(fattr->valid & NFS_ATTR_FATTR)) {
1214 struct nfs_server *server = NFS_SB(dentry->d_sb); 1153 struct nfs_server *server = NFS_SB(dentry->d_sb);
1215 error = server->nfs_client->rpc_ops->getattr(server, fhandle, fattr); 1154 error = server->nfs_client->rpc_ops->getattr(server, fhandle, fattr);
1216 if (error < 0) 1155 if (error < 0)
1217 return error; 1156 goto out_error;
1218 } 1157 }
1219 inode = nfs_fhget(dentry->d_sb, fhandle, fattr); 1158 inode = nfs_fhget(dentry->d_sb, fhandle, fattr);
1220 error = PTR_ERR(inode); 1159 error = PTR_ERR(inode);
1221 if (IS_ERR(inode)) 1160 if (IS_ERR(inode))
1222 return error; 1161 goto out_error;
1223 d_instantiate(dentry, inode); 1162 d_add(dentry, inode);
1224 if (d_unhashed(dentry)) 1163out:
1225 d_rehash(dentry); 1164 dput(parent);
1226 return 0; 1165 return 0;
1166out_error:
1167 nfs_mark_for_revalidate(dir);
1168 dput(parent);
1169 return error;
1227} 1170}
1228 1171
1229/* 1172/*
@@ -1249,13 +1192,9 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
1249 open_flags = nd->intent.open.flags; 1192 open_flags = nd->intent.open.flags;
1250 1193
1251 lock_kernel(); 1194 lock_kernel();
1252 nfs_begin_data_update(dir);
1253 error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, nd); 1195 error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, nd);
1254 nfs_end_data_update(dir);
1255 if (error != 0) 1196 if (error != 0)
1256 goto out_err; 1197 goto out_err;
1257 nfs_renew_times(dentry);
1258 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1259 unlock_kernel(); 1198 unlock_kernel();
1260 return 0; 1199 return 0;
1261out_err: 1200out_err:
@@ -1283,13 +1222,9 @@ nfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
1283 attr.ia_valid = ATTR_MODE; 1222 attr.ia_valid = ATTR_MODE;
1284 1223
1285 lock_kernel(); 1224 lock_kernel();
1286 nfs_begin_data_update(dir);
1287 status = NFS_PROTO(dir)->mknod(dir, dentry, &attr, rdev); 1225 status = NFS_PROTO(dir)->mknod(dir, dentry, &attr, rdev);
1288 nfs_end_data_update(dir);
1289 if (status != 0) 1226 if (status != 0)
1290 goto out_err; 1227 goto out_err;
1291 nfs_renew_times(dentry);
1292 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1293 unlock_kernel(); 1228 unlock_kernel();
1294 return 0; 1229 return 0;
1295out_err: 1230out_err:
@@ -1313,13 +1248,9 @@ static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1313 attr.ia_mode = mode | S_IFDIR; 1248 attr.ia_mode = mode | S_IFDIR;
1314 1249
1315 lock_kernel(); 1250 lock_kernel();
1316 nfs_begin_data_update(dir);
1317 error = NFS_PROTO(dir)->mkdir(dir, dentry, &attr); 1251 error = NFS_PROTO(dir)->mkdir(dir, dentry, &attr);
1318 nfs_end_data_update(dir);
1319 if (error != 0) 1252 if (error != 0)
1320 goto out_err; 1253 goto out_err;
1321 nfs_renew_times(dentry);
1322 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1323 unlock_kernel(); 1254 unlock_kernel();
1324 return 0; 1255 return 0;
1325out_err: 1256out_err:
@@ -1336,12 +1267,10 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry)
1336 dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); 1267 dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
1337 1268
1338 lock_kernel(); 1269 lock_kernel();
1339 nfs_begin_data_update(dir);
1340 error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name); 1270 error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name);
1341 /* Ensure the VFS deletes this inode */ 1271 /* Ensure the VFS deletes this inode */
1342 if (error == 0 && dentry->d_inode != NULL) 1272 if (error == 0 && dentry->d_inode != NULL)
1343 clear_nlink(dentry->d_inode); 1273 clear_nlink(dentry->d_inode);
1344 nfs_end_data_update(dir);
1345 unlock_kernel(); 1274 unlock_kernel();
1346 1275
1347 return error; 1276 return error;
@@ -1350,9 +1279,9 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry)
1350static int nfs_sillyrename(struct inode *dir, struct dentry *dentry) 1279static int nfs_sillyrename(struct inode *dir, struct dentry *dentry)
1351{ 1280{
1352 static unsigned int sillycounter; 1281 static unsigned int sillycounter;
1353 const int i_inosize = sizeof(dir->i_ino)*2; 1282 const int fileidsize = sizeof(NFS_FILEID(dentry->d_inode))*2;
1354 const int countersize = sizeof(sillycounter)*2; 1283 const int countersize = sizeof(sillycounter)*2;
1355 const int slen = sizeof(".nfs") + i_inosize + countersize - 1; 1284 const int slen = sizeof(".nfs")+fileidsize+countersize-1;
1356 char silly[slen+1]; 1285 char silly[slen+1];
1357 struct qstr qsilly; 1286 struct qstr qsilly;
1358 struct dentry *sdentry; 1287 struct dentry *sdentry;
@@ -1370,8 +1299,9 @@ static int nfs_sillyrename(struct inode *dir, struct dentry *dentry)
1370 if (dentry->d_flags & DCACHE_NFSFS_RENAMED) 1299 if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
1371 goto out; 1300 goto out;
1372 1301
1373 sprintf(silly, ".nfs%*.*lx", 1302 sprintf(silly, ".nfs%*.*Lx",
1374 i_inosize, i_inosize, dentry->d_inode->i_ino); 1303 fileidsize, fileidsize,
1304 (unsigned long long)NFS_FILEID(dentry->d_inode));
1375 1305
1376 /* Return delegation in anticipation of the rename */ 1306 /* Return delegation in anticipation of the rename */
1377 nfs_inode_return_delegation(dentry->d_inode); 1307 nfs_inode_return_delegation(dentry->d_inode);
@@ -1398,19 +1328,14 @@ static int nfs_sillyrename(struct inode *dir, struct dentry *dentry)
1398 1328
1399 qsilly.name = silly; 1329 qsilly.name = silly;
1400 qsilly.len = strlen(silly); 1330 qsilly.len = strlen(silly);
1401 nfs_begin_data_update(dir);
1402 if (dentry->d_inode) { 1331 if (dentry->d_inode) {
1403 nfs_begin_data_update(dentry->d_inode);
1404 error = NFS_PROTO(dir)->rename(dir, &dentry->d_name, 1332 error = NFS_PROTO(dir)->rename(dir, &dentry->d_name,
1405 dir, &qsilly); 1333 dir, &qsilly);
1406 nfs_mark_for_revalidate(dentry->d_inode); 1334 nfs_mark_for_revalidate(dentry->d_inode);
1407 nfs_end_data_update(dentry->d_inode);
1408 } else 1335 } else
1409 error = NFS_PROTO(dir)->rename(dir, &dentry->d_name, 1336 error = NFS_PROTO(dir)->rename(dir, &dentry->d_name,
1410 dir, &qsilly); 1337 dir, &qsilly);
1411 nfs_end_data_update(dir);
1412 if (!error) { 1338 if (!error) {
1413 nfs_renew_times(dentry);
1414 nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); 1339 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1415 d_move(dentry, sdentry); 1340 d_move(dentry, sdentry);
1416 error = nfs_async_unlink(dir, dentry); 1341 error = nfs_async_unlink(dir, dentry);
@@ -1443,19 +1368,15 @@ static int nfs_safe_remove(struct dentry *dentry)
1443 goto out; 1368 goto out;
1444 } 1369 }
1445 1370
1446 nfs_begin_data_update(dir);
1447 if (inode != NULL) { 1371 if (inode != NULL) {
1448 nfs_inode_return_delegation(inode); 1372 nfs_inode_return_delegation(inode);
1449 nfs_begin_data_update(inode);
1450 error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); 1373 error = NFS_PROTO(dir)->remove(dir, &dentry->d_name);
1451 /* The VFS may want to delete this inode */ 1374 /* The VFS may want to delete this inode */
1452 if (error == 0) 1375 if (error == 0)
1453 drop_nlink(inode); 1376 drop_nlink(inode);
1454 nfs_mark_for_revalidate(inode); 1377 nfs_mark_for_revalidate(inode);
1455 nfs_end_data_update(inode);
1456 } else 1378 } else
1457 error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); 1379 error = NFS_PROTO(dir)->remove(dir, &dentry->d_name);
1458 nfs_end_data_update(dir);
1459out: 1380out:
1460 return error; 1381 return error;
1461} 1382}
@@ -1493,7 +1414,6 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry)
1493 spin_unlock(&dcache_lock); 1414 spin_unlock(&dcache_lock);
1494 error = nfs_safe_remove(dentry); 1415 error = nfs_safe_remove(dentry);
1495 if (!error) { 1416 if (!error) {
1496 nfs_renew_times(dentry);
1497 nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); 1417 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1498 } else if (need_rehash) 1418 } else if (need_rehash)
1499 d_rehash(dentry); 1419 d_rehash(dentry);
@@ -1548,9 +1468,7 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym
1548 memset(kaddr + pathlen, 0, PAGE_SIZE - pathlen); 1468 memset(kaddr + pathlen, 0, PAGE_SIZE - pathlen);
1549 kunmap_atomic(kaddr, KM_USER0); 1469 kunmap_atomic(kaddr, KM_USER0);
1550 1470
1551 nfs_begin_data_update(dir);
1552 error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr); 1471 error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr);
1553 nfs_end_data_update(dir);
1554 if (error != 0) { 1472 if (error != 0) {
1555 dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s) error %d\n", 1473 dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s) error %d\n",
1556 dir->i_sb->s_id, dir->i_ino, 1474 dir->i_sb->s_id, dir->i_ino,
@@ -1590,15 +1508,12 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
1590 dentry->d_parent->d_name.name, dentry->d_name.name); 1508 dentry->d_parent->d_name.name, dentry->d_name.name);
1591 1509
1592 lock_kernel(); 1510 lock_kernel();
1593 nfs_begin_data_update(dir); 1511 d_drop(dentry);
1594 nfs_begin_data_update(inode);
1595 error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name); 1512 error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name);
1596 if (error == 0) { 1513 if (error == 0) {
1597 atomic_inc(&inode->i_count); 1514 atomic_inc(&inode->i_count);
1598 d_instantiate(dentry, inode); 1515 d_add(dentry, inode);
1599 } 1516 }
1600 nfs_end_data_update(inode);
1601 nfs_end_data_update(dir);
1602 unlock_kernel(); 1517 unlock_kernel();
1603 return error; 1518 return error;
1604} 1519}
@@ -1701,22 +1616,16 @@ go_ahead:
1701 d_delete(new_dentry); 1616 d_delete(new_dentry);
1702 } 1617 }
1703 1618
1704 nfs_begin_data_update(old_dir);
1705 nfs_begin_data_update(new_dir);
1706 nfs_begin_data_update(old_inode);
1707 error = NFS_PROTO(old_dir)->rename(old_dir, &old_dentry->d_name, 1619 error = NFS_PROTO(old_dir)->rename(old_dir, &old_dentry->d_name,
1708 new_dir, &new_dentry->d_name); 1620 new_dir, &new_dentry->d_name);
1709 nfs_mark_for_revalidate(old_inode); 1621 nfs_mark_for_revalidate(old_inode);
1710 nfs_end_data_update(old_inode);
1711 nfs_end_data_update(new_dir);
1712 nfs_end_data_update(old_dir);
1713out: 1622out:
1714 if (rehash) 1623 if (rehash)
1715 d_rehash(rehash); 1624 d_rehash(rehash);
1716 if (!error) { 1625 if (!error) {
1717 d_move(old_dentry, new_dentry); 1626 d_move(old_dentry, new_dentry);
1718 nfs_renew_times(new_dentry); 1627 nfs_set_verifier(new_dentry,
1719 nfs_refresh_verifier(new_dentry, nfs_save_change_attribute(new_dir)); 1628 nfs_save_change_attribute(new_dir));
1720 } 1629 }
1721 1630
1722 /* new dentry created? */ 1631 /* new dentry created? */
@@ -1842,7 +1751,7 @@ static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, st
1842 return NULL; 1751 return NULL;
1843} 1752}
1844 1753
1845int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res) 1754static int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res)
1846{ 1755{
1847 struct nfs_inode *nfsi = NFS_I(inode); 1756 struct nfs_inode *nfsi = NFS_I(inode);
1848 struct nfs_access_entry *cache; 1757 struct nfs_access_entry *cache;
@@ -1854,7 +1763,7 @@ int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs
1854 cache = nfs_access_search_rbtree(inode, cred); 1763 cache = nfs_access_search_rbtree(inode, cred);
1855 if (cache == NULL) 1764 if (cache == NULL)
1856 goto out; 1765 goto out;
1857 if (time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode))) 1766 if (!time_in_range(jiffies, cache->jiffies, cache->jiffies + nfsi->attrtimeo))
1858 goto out_stale; 1767 goto out_stale;
1859 res->jiffies = cache->jiffies; 1768 res->jiffies = cache->jiffies;
1860 res->cred = cache->cred; 1769 res->cred = cache->cred;
@@ -1909,7 +1818,7 @@ found:
1909 nfs_access_free_entry(entry); 1818 nfs_access_free_entry(entry);
1910} 1819}
1911 1820
1912void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) 1821static void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
1913{ 1822{
1914 struct nfs_access_entry *cache = kmalloc(sizeof(*cache), GFP_KERNEL); 1823 struct nfs_access_entry *cache = kmalloc(sizeof(*cache), GFP_KERNEL);
1915 if (cache == NULL) 1824 if (cache == NULL)
@@ -1957,6 +1866,24 @@ out:
1957 return -EACCES; 1866 return -EACCES;
1958} 1867}
1959 1868
1869static int nfs_open_permission_mask(int openflags)
1870{
1871 int mask = 0;
1872
1873 if (openflags & FMODE_READ)
1874 mask |= MAY_READ;
1875 if (openflags & FMODE_WRITE)
1876 mask |= MAY_WRITE;
1877 if (openflags & FMODE_EXEC)
1878 mask |= MAY_EXEC;
1879 return mask;
1880}
1881
1882int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags)
1883{
1884 return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags));
1885}
1886
1960int nfs_permission(struct inode *inode, int mask, struct nameidata *nd) 1887int nfs_permission(struct inode *inode, int mask, struct nameidata *nd)
1961{ 1888{
1962 struct rpc_cred *cred; 1889 struct rpc_cred *cred;
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index fcf4d384610e..32fe97211eea 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -368,7 +368,7 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size
368 return -ENOMEM; 368 return -ENOMEM;
369 369
370 dreq->inode = inode; 370 dreq->inode = inode;
371 dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data); 371 dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
372 if (!is_sync_kiocb(iocb)) 372 if (!is_sync_kiocb(iocb))
373 dreq->iocb = iocb; 373 dreq->iocb = iocb;
374 374
@@ -510,7 +510,6 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode
510 nfs_direct_write_reschedule(dreq); 510 nfs_direct_write_reschedule(dreq);
511 break; 511 break;
512 default: 512 default:
513 nfs_end_data_update(inode);
514 if (dreq->commit_data != NULL) 513 if (dreq->commit_data != NULL)
515 nfs_commit_free(dreq->commit_data); 514 nfs_commit_free(dreq->commit_data);
516 nfs_direct_free_writedata(dreq); 515 nfs_direct_free_writedata(dreq);
@@ -533,7 +532,6 @@ static inline void nfs_alloc_commit_data(struct nfs_direct_req *dreq)
533 532
534static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode) 533static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
535{ 534{
536 nfs_end_data_update(inode);
537 nfs_direct_free_writedata(dreq); 535 nfs_direct_free_writedata(dreq);
538 nfs_zap_mapping(inode, inode->i_mapping); 536 nfs_zap_mapping(inode, inode->i_mapping);
539 nfs_direct_complete(dreq); 537 nfs_direct_complete(dreq);
@@ -718,14 +716,12 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz
718 sync = FLUSH_STABLE; 716 sync = FLUSH_STABLE;
719 717
720 dreq->inode = inode; 718 dreq->inode = inode;
721 dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data); 719 dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
722 if (!is_sync_kiocb(iocb)) 720 if (!is_sync_kiocb(iocb))
723 dreq->iocb = iocb; 721 dreq->iocb = iocb;
724 722
725 nfs_add_stats(inode, NFSIOS_DIRECTWRITTENBYTES, count); 723 nfs_add_stats(inode, NFSIOS_DIRECTWRITTENBYTES, count);
726 724
727 nfs_begin_data_update(inode);
728
729 rpc_clnt_sigmask(clnt, &oldset); 725 rpc_clnt_sigmask(clnt, &oldset);
730 result = nfs_direct_write_schedule(dreq, user_addr, count, pos, sync); 726 result = nfs_direct_write_schedule(dreq, user_addr, count, pos, sync);
731 if (!result) 727 if (!result)
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 9c98ccbf9de0..08c7c7387fce 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -33,6 +33,7 @@
33#include <asm/system.h> 33#include <asm/system.h>
34 34
35#include "delegation.h" 35#include "delegation.h"
36#include "internal.h"
36#include "iostat.h" 37#include "iostat.h"
37 38
38#define NFSDBG_FACILITY NFSDBG_FILE 39#define NFSDBG_FACILITY NFSDBG_FILE
@@ -55,6 +56,8 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl);
55static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl); 56static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl);
56static int nfs_setlease(struct file *file, long arg, struct file_lock **fl); 57static int nfs_setlease(struct file *file, long arg, struct file_lock **fl);
57 58
59static struct vm_operations_struct nfs_file_vm_ops;
60
58const struct file_operations nfs_file_operations = { 61const struct file_operations nfs_file_operations = {
59 .llseek = nfs_file_llseek, 62 .llseek = nfs_file_llseek,
60 .read = do_sync_read, 63 .read = do_sync_read,
@@ -174,13 +177,38 @@ static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin)
174} 177}
175 178
176/* 179/*
180 * Helper for nfs_file_flush() and nfs_fsync()
181 *
182 * Notice that it clears the NFS_CONTEXT_ERROR_WRITE before synching to
183 * disk, but it retrieves and clears ctx->error after synching, despite
184 * the two being set at the same time in nfs_context_set_write_error().
185 * This is because the former is used to notify the _next_ call to
186 * nfs_file_write() that a write error occured, and hence cause it to
187 * fall back to doing a synchronous write.
188 */
189static int nfs_do_fsync(struct nfs_open_context *ctx, struct inode *inode)
190{
191 int have_error, status;
192 int ret = 0;
193
194 have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
195 status = nfs_wb_all(inode);
196 have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
197 if (have_error)
198 ret = xchg(&ctx->error, 0);
199 if (!ret)
200 ret = status;
201 return ret;
202}
203
204/*
177 * Flush all dirty pages, and check for write errors. 205 * Flush all dirty pages, and check for write errors.
178 * 206 *
179 */ 207 */
180static int 208static int
181nfs_file_flush(struct file *file, fl_owner_t id) 209nfs_file_flush(struct file *file, fl_owner_t id)
182{ 210{
183 struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; 211 struct nfs_open_context *ctx = nfs_file_open_context(file);
184 struct inode *inode = file->f_path.dentry->d_inode; 212 struct inode *inode = file->f_path.dentry->d_inode;
185 int status; 213 int status;
186 214
@@ -189,16 +217,11 @@ nfs_file_flush(struct file *file, fl_owner_t id)
189 if ((file->f_mode & FMODE_WRITE) == 0) 217 if ((file->f_mode & FMODE_WRITE) == 0)
190 return 0; 218 return 0;
191 nfs_inc_stats(inode, NFSIOS_VFSFLUSH); 219 nfs_inc_stats(inode, NFSIOS_VFSFLUSH);
192 lock_kernel(); 220
193 /* Ensure that data+attribute caches are up to date after close() */ 221 /* Ensure that data+attribute caches are up to date after close() */
194 status = nfs_wb_all(inode); 222 status = nfs_do_fsync(ctx, inode);
195 if (!status) { 223 if (!status)
196 status = ctx->error; 224 nfs_revalidate_inode(NFS_SERVER(inode), inode);
197 ctx->error = 0;
198 if (!status)
199 nfs_revalidate_inode(NFS_SERVER(inode), inode);
200 }
201 unlock_kernel();
202 return status; 225 return status;
203} 226}
204 227
@@ -257,8 +280,11 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
257 dentry->d_parent->d_name.name, dentry->d_name.name); 280 dentry->d_parent->d_name.name, dentry->d_name.name);
258 281
259 status = nfs_revalidate_mapping(inode, file->f_mapping); 282 status = nfs_revalidate_mapping(inode, file->f_mapping);
260 if (!status) 283 if (!status) {
261 status = generic_file_mmap(file, vma); 284 vma->vm_ops = &nfs_file_vm_ops;
285 vma->vm_flags |= VM_CAN_NONLINEAR;
286 file_accessed(file);
287 }
262 return status; 288 return status;
263} 289}
264 290
@@ -270,21 +296,13 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
270static int 296static int
271nfs_fsync(struct file *file, struct dentry *dentry, int datasync) 297nfs_fsync(struct file *file, struct dentry *dentry, int datasync)
272{ 298{
273 struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; 299 struct nfs_open_context *ctx = nfs_file_open_context(file);
274 struct inode *inode = dentry->d_inode; 300 struct inode *inode = dentry->d_inode;
275 int status;
276 301
277 dfprintk(VFS, "nfs: fsync(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); 302 dfprintk(VFS, "nfs: fsync(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino);
278 303
279 nfs_inc_stats(inode, NFSIOS_VFSFSYNC); 304 nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
280 lock_kernel(); 305 return nfs_do_fsync(ctx, inode);
281 status = nfs_wb_all(inode);
282 if (!status) {
283 status = ctx->error;
284 ctx->error = 0;
285 }
286 unlock_kernel();
287 return status;
288} 306}
289 307
290/* 308/*
@@ -333,7 +351,7 @@ static int nfs_launder_page(struct page *page)
333const struct address_space_operations nfs_file_aops = { 351const struct address_space_operations nfs_file_aops = {
334 .readpage = nfs_readpage, 352 .readpage = nfs_readpage,
335 .readpages = nfs_readpages, 353 .readpages = nfs_readpages,
336 .set_page_dirty = nfs_set_page_dirty, 354 .set_page_dirty = __set_page_dirty_nobuffers,
337 .writepage = nfs_writepage, 355 .writepage = nfs_writepage,
338 .writepages = nfs_writepages, 356 .writepages = nfs_writepages,
339 .prepare_write = nfs_prepare_write, 357 .prepare_write = nfs_prepare_write,
@@ -346,6 +364,43 @@ const struct address_space_operations nfs_file_aops = {
346 .launder_page = nfs_launder_page, 364 .launder_page = nfs_launder_page,
347}; 365};
348 366
367static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
368{
369 struct file *filp = vma->vm_file;
370 unsigned pagelen;
371 int ret = -EINVAL;
372
373 lock_page(page);
374 if (page->mapping != vma->vm_file->f_path.dentry->d_inode->i_mapping)
375 goto out_unlock;
376 pagelen = nfs_page_length(page);
377 if (pagelen == 0)
378 goto out_unlock;
379 ret = nfs_prepare_write(filp, page, 0, pagelen);
380 if (!ret)
381 ret = nfs_commit_write(filp, page, 0, pagelen);
382out_unlock:
383 unlock_page(page);
384 return ret;
385}
386
387static struct vm_operations_struct nfs_file_vm_ops = {
388 .fault = filemap_fault,
389 .page_mkwrite = nfs_vm_page_mkwrite,
390};
391
392static int nfs_need_sync_write(struct file *filp, struct inode *inode)
393{
394 struct nfs_open_context *ctx;
395
396 if (IS_SYNC(inode) || (filp->f_flags & O_SYNC))
397 return 1;
398 ctx = nfs_file_open_context(filp);
399 if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags))
400 return 1;
401 return 0;
402}
403
349static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, 404static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
350 unsigned long nr_segs, loff_t pos) 405 unsigned long nr_segs, loff_t pos)
351{ 406{
@@ -382,8 +437,8 @@ static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
382 nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count); 437 nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count);
383 result = generic_file_aio_write(iocb, iov, nr_segs, pos); 438 result = generic_file_aio_write(iocb, iov, nr_segs, pos);
384 /* Return error values for O_SYNC and IS_SYNC() */ 439 /* Return error values for O_SYNC and IS_SYNC() */
385 if (result >= 0 && (IS_SYNC(inode) || (iocb->ki_filp->f_flags & O_SYNC))) { 440 if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) {
386 int err = nfs_fsync(iocb->ki_filp, dentry, 1); 441 int err = nfs_do_fsync(nfs_file_open_context(iocb->ki_filp), inode);
387 if (err < 0) 442 if (err < 0)
388 result = err; 443 result = err;
389 } 444 }
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 71a49c3acabd..035c769b715e 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -49,6 +49,11 @@
49 49
50#define NFSDBG_FACILITY NFSDBG_VFS 50#define NFSDBG_FACILITY NFSDBG_VFS
51 51
52#define NFS_64_BIT_INODE_NUMBERS_ENABLED 1
53
54/* Default is to see 64-bit inode numbers */
55static int enable_ino64 = NFS_64_BIT_INODE_NUMBERS_ENABLED;
56
52static void nfs_invalidate_inode(struct inode *); 57static void nfs_invalidate_inode(struct inode *);
53static int nfs_update_inode(struct inode *, struct nfs_fattr *); 58static int nfs_update_inode(struct inode *, struct nfs_fattr *);
54 59
@@ -62,6 +67,25 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
62 return nfs_fileid_to_ino_t(fattr->fileid); 67 return nfs_fileid_to_ino_t(fattr->fileid);
63} 68}
64 69
70/**
71 * nfs_compat_user_ino64 - returns the user-visible inode number
72 * @fileid: 64-bit fileid
73 *
74 * This function returns a 32-bit inode number if the boot parameter
75 * nfs.enable_ino64 is zero.
76 */
77u64 nfs_compat_user_ino64(u64 fileid)
78{
79 int ino;
80
81 if (enable_ino64)
82 return fileid;
83 ino = fileid;
84 if (sizeof(ino) < sizeof(fileid))
85 ino ^= fileid >> (sizeof(fileid)-sizeof(ino)) * 8;
86 return ino;
87}
88
65int nfs_write_inode(struct inode *inode, int sync) 89int nfs_write_inode(struct inode *inode, int sync)
66{ 90{
67 int ret; 91 int ret;
@@ -85,7 +109,6 @@ void nfs_clear_inode(struct inode *inode)
85 */ 109 */
86 BUG_ON(nfs_have_writebacks(inode)); 110 BUG_ON(nfs_have_writebacks(inode));
87 BUG_ON(!list_empty(&NFS_I(inode)->open_files)); 111 BUG_ON(!list_empty(&NFS_I(inode)->open_files));
88 BUG_ON(atomic_read(&NFS_I(inode)->data_updates) != 0);
89 nfs_zap_acl_cache(inode); 112 nfs_zap_acl_cache(inode);
90 nfs_access_zap_cache(inode); 113 nfs_access_zap_cache(inode);
91} 114}
@@ -118,8 +141,8 @@ static void nfs_zap_caches_locked(struct inode *inode)
118 141
119 nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); 142 nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE);
120 143
121 NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); 144 nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
122 NFS_ATTRTIMEO_UPDATE(inode) = jiffies; 145 nfsi->attrtimeo_timestamp = jiffies;
123 146
124 memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); 147 memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode)));
125 if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) 148 if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))
@@ -156,6 +179,13 @@ static void nfs_zap_acl_cache(struct inode *inode)
156 spin_unlock(&inode->i_lock); 179 spin_unlock(&inode->i_lock);
157} 180}
158 181
182void nfs_invalidate_atime(struct inode *inode)
183{
184 spin_lock(&inode->i_lock);
185 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME;
186 spin_unlock(&inode->i_lock);
187}
188
159/* 189/*
160 * Invalidate, but do not unhash, the inode. 190 * Invalidate, but do not unhash, the inode.
161 * NB: must be called with inode->i_lock held! 191 * NB: must be called with inode->i_lock held!
@@ -338,7 +368,6 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
338 return 0; 368 return 0;
339 369
340 lock_kernel(); 370 lock_kernel();
341 nfs_begin_data_update(inode);
342 /* Write all dirty data */ 371 /* Write all dirty data */
343 if (S_ISREG(inode->i_mode)) { 372 if (S_ISREG(inode->i_mode)) {
344 filemap_write_and_wait(inode->i_mapping); 373 filemap_write_and_wait(inode->i_mapping);
@@ -352,7 +381,6 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
352 error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr); 381 error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr);
353 if (error == 0) 382 if (error == 0)
354 nfs_refresh_inode(inode, &fattr); 383 nfs_refresh_inode(inode, &fattr);
355 nfs_end_data_update(inode);
356 unlock_kernel(); 384 unlock_kernel();
357 return error; 385 return error;
358} 386}
@@ -431,7 +459,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
431 459
432 /* Flush out writes to the server in order to update c/mtime */ 460 /* Flush out writes to the server in order to update c/mtime */
433 if (S_ISREG(inode->i_mode)) 461 if (S_ISREG(inode->i_mode))
434 nfs_sync_mapping_range(inode->i_mapping, 0, 0, FLUSH_NOCOMMIT); 462 nfs_wb_nocommit(inode);
435 463
436 /* 464 /*
437 * We may force a getattr if the user cares about atime. 465 * We may force a getattr if the user cares about atime.
@@ -450,8 +478,10 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
450 err = __nfs_revalidate_inode(NFS_SERVER(inode), inode); 478 err = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
451 else 479 else
452 err = nfs_revalidate_inode(NFS_SERVER(inode), inode); 480 err = nfs_revalidate_inode(NFS_SERVER(inode), inode);
453 if (!err) 481 if (!err) {
454 generic_fillattr(inode, stat); 482 generic_fillattr(inode, stat);
483 stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode));
484 }
455 return err; 485 return err;
456} 486}
457 487
@@ -536,7 +566,7 @@ struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_c
536static void nfs_file_clear_open_context(struct file *filp) 566static void nfs_file_clear_open_context(struct file *filp)
537{ 567{
538 struct inode *inode = filp->f_path.dentry->d_inode; 568 struct inode *inode = filp->f_path.dentry->d_inode;
539 struct nfs_open_context *ctx = (struct nfs_open_context *)filp->private_data; 569 struct nfs_open_context *ctx = nfs_file_open_context(filp);
540 570
541 if (ctx) { 571 if (ctx) {
542 filp->private_data = NULL; 572 filp->private_data = NULL;
@@ -598,16 +628,10 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
598 status = nfs_wait_on_inode(inode); 628 status = nfs_wait_on_inode(inode);
599 if (status < 0) 629 if (status < 0)
600 goto out; 630 goto out;
601 if (NFS_STALE(inode)) { 631
602 status = -ESTALE; 632 status = -ESTALE;
603 /* Do we trust the cached ESTALE? */ 633 if (NFS_STALE(inode))
604 if (NFS_ATTRTIMEO(inode) != 0) { 634 goto out;
605 if (nfsi->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME)) {
606 /* no */
607 } else
608 goto out;
609 }
610 }
611 635
612 status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), &fattr); 636 status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), &fattr);
613 if (status != 0) { 637 if (status != 0) {
@@ -654,7 +678,7 @@ int nfs_attribute_timeout(struct inode *inode)
654 678
655 if (nfs_have_delegation(inode, FMODE_READ)) 679 if (nfs_have_delegation(inode, FMODE_READ))
656 return 0; 680 return 0;
657 return time_after(jiffies, nfsi->read_cache_jiffies+nfsi->attrtimeo); 681 return !time_in_range(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo);
658} 682}
659 683
660/** 684/**
@@ -683,11 +707,8 @@ static int nfs_invalidate_mapping_nolock(struct inode *inode, struct address_spa
683 } 707 }
684 spin_lock(&inode->i_lock); 708 spin_lock(&inode->i_lock);
685 nfsi->cache_validity &= ~NFS_INO_INVALID_DATA; 709 nfsi->cache_validity &= ~NFS_INO_INVALID_DATA;
686 if (S_ISDIR(inode->i_mode)) { 710 if (S_ISDIR(inode->i_mode))
687 memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); 711 memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
688 /* This ensures we revalidate child dentries */
689 nfsi->cache_change_attribute = jiffies;
690 }
691 spin_unlock(&inode->i_lock); 712 spin_unlock(&inode->i_lock);
692 nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE); 713 nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE);
693 dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n", 714 dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n",
@@ -756,56 +777,27 @@ out:
756 return ret; 777 return ret;
757} 778}
758 779
759/**
760 * nfs_begin_data_update
761 * @inode - pointer to inode
762 * Declare that a set of operations will update file data on the server
763 */
764void nfs_begin_data_update(struct inode *inode)
765{
766 atomic_inc(&NFS_I(inode)->data_updates);
767}
768
769/**
770 * nfs_end_data_update
771 * @inode - pointer to inode
772 * Declare end of the operations that will update file data
773 * This will mark the inode as immediately needing revalidation
774 * of its attribute cache.
775 */
776void nfs_end_data_update(struct inode *inode)
777{
778 struct nfs_inode *nfsi = NFS_I(inode);
779
780 /* Directories: invalidate page cache */
781 if (S_ISDIR(inode->i_mode)) {
782 spin_lock(&inode->i_lock);
783 nfsi->cache_validity |= NFS_INO_INVALID_DATA;
784 spin_unlock(&inode->i_lock);
785 }
786 nfsi->cache_change_attribute = jiffies;
787 atomic_dec(&nfsi->data_updates);
788}
789
790static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) 780static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr)
791{ 781{
792 struct nfs_inode *nfsi = NFS_I(inode); 782 struct nfs_inode *nfsi = NFS_I(inode);
793 unsigned long now = jiffies;
794 783
784 if ((fattr->valid & NFS_ATTR_WCC_V4) != 0 &&
785 nfsi->change_attr == fattr->pre_change_attr) {
786 nfsi->change_attr = fattr->change_attr;
787 if (S_ISDIR(inode->i_mode))
788 nfsi->cache_validity |= NFS_INO_INVALID_DATA;
789 }
795 /* If we have atomic WCC data, we may update some attributes */ 790 /* If we have atomic WCC data, we may update some attributes */
796 if ((fattr->valid & NFS_ATTR_WCC) != 0) { 791 if ((fattr->valid & NFS_ATTR_WCC) != 0) {
797 if (timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) { 792 if (timespec_equal(&inode->i_ctime, &fattr->pre_ctime))
798 memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); 793 memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
799 nfsi->cache_change_attribute = now;
800 }
801 if (timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) { 794 if (timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) {
802 memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); 795 memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
803 nfsi->cache_change_attribute = now; 796 if (S_ISDIR(inode->i_mode))
797 nfsi->cache_validity |= NFS_INO_INVALID_DATA;
804 } 798 }
805 if (inode->i_size == fattr->pre_size && nfsi->npages == 0) { 799 if (inode->i_size == fattr->pre_size && nfsi->npages == 0)
806 inode->i_size = fattr->size; 800 inode->i_size = fattr->size;
807 nfsi->cache_change_attribute = now;
808 }
809 } 801 }
810} 802}
811 803
@@ -822,7 +814,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
822{ 814{
823 struct nfs_inode *nfsi = NFS_I(inode); 815 struct nfs_inode *nfsi = NFS_I(inode);
824 loff_t cur_size, new_isize; 816 loff_t cur_size, new_isize;
825 int data_unstable; 817 unsigned long invalid = 0;
826 818
827 819
828 /* Has the inode gone and changed behind our back? */ 820 /* Has the inode gone and changed behind our back? */
@@ -831,37 +823,41 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
831 return -EIO; 823 return -EIO;
832 } 824 }
833 825
834 /* Are we in the process of updating data on the server? */
835 data_unstable = nfs_caches_unstable(inode);
836
837 /* Do atomic weak cache consistency updates */ 826 /* Do atomic weak cache consistency updates */
838 nfs_wcc_update_inode(inode, fattr); 827 nfs_wcc_update_inode(inode, fattr);
839 828
840 if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 && 829 if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 &&
841 nfsi->change_attr != fattr->change_attr) 830 nfsi->change_attr != fattr->change_attr)
842 nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; 831 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
843 832
844 /* Verify a few of the more important attributes */ 833 /* Verify a few of the more important attributes */
845 if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) 834 if (!timespec_equal(&inode->i_mtime, &fattr->mtime))
846 nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; 835 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
847 836
848 cur_size = i_size_read(inode); 837 cur_size = i_size_read(inode);
849 new_isize = nfs_size_to_loff_t(fattr->size); 838 new_isize = nfs_size_to_loff_t(fattr->size);
850 if (cur_size != new_isize && nfsi->npages == 0) 839 if (cur_size != new_isize && nfsi->npages == 0)
851 nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; 840 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
852 841
853 /* Have any file permissions changed? */ 842 /* Have any file permissions changed? */
854 if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) 843 if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)
855 || inode->i_uid != fattr->uid 844 || inode->i_uid != fattr->uid
856 || inode->i_gid != fattr->gid) 845 || inode->i_gid != fattr->gid)
857 nfsi->cache_validity |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; 846 invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
858 847
859 /* Has the link count changed? */ 848 /* Has the link count changed? */
860 if (inode->i_nlink != fattr->nlink) 849 if (inode->i_nlink != fattr->nlink)
861 nfsi->cache_validity |= NFS_INO_INVALID_ATTR; 850 invalid |= NFS_INO_INVALID_ATTR;
862 851
863 if (!timespec_equal(&inode->i_atime, &fattr->atime)) 852 if (!timespec_equal(&inode->i_atime, &fattr->atime))
864 nfsi->cache_validity |= NFS_INO_INVALID_ATIME; 853 invalid |= NFS_INO_INVALID_ATIME;
854
855 if (invalid != 0)
856 nfsi->cache_validity |= invalid;
857 else
858 nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR
859 | NFS_INO_INVALID_ATIME
860 | NFS_INO_REVAL_PAGECACHE);
865 861
866 nfsi->read_cache_jiffies = fattr->time_start; 862 nfsi->read_cache_jiffies = fattr->time_start;
867 return 0; 863 return 0;
@@ -911,17 +907,41 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
911int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr) 907int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr)
912{ 908{
913 struct nfs_inode *nfsi = NFS_I(inode); 909 struct nfs_inode *nfsi = NFS_I(inode);
914 int status = 0;
915 910
916 spin_lock(&inode->i_lock); 911 spin_lock(&inode->i_lock);
917 if (unlikely((fattr->valid & NFS_ATTR_FATTR) == 0)) { 912 nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
918 nfsi->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; 913 if (S_ISDIR(inode->i_mode))
919 goto out; 914 nfsi->cache_validity |= NFS_INO_INVALID_DATA;
920 }
921 status = nfs_update_inode(inode, fattr);
922out:
923 spin_unlock(&inode->i_lock); 915 spin_unlock(&inode->i_lock);
924 return status; 916 return nfs_refresh_inode(inode, fattr);
917}
918
919/**
920 * nfs_post_op_update_inode_force_wcc - try to update the inode attribute cache
921 * @inode - pointer to inode
922 * @fattr - updated attributes
923 *
924 * After an operation that has changed the inode metadata, mark the
925 * attribute cache as being invalid, then try to update it. Fake up
926 * weak cache consistency data, if none exist.
927 *
928 * This function is mainly designed to be used by the ->write_done() functions.
929 */
930int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr)
931{
932 if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 &&
933 (fattr->valid & NFS_ATTR_WCC_V4) == 0) {
934 fattr->pre_change_attr = NFS_I(inode)->change_attr;
935 fattr->valid |= NFS_ATTR_WCC_V4;
936 }
937 if ((fattr->valid & NFS_ATTR_FATTR) != 0 &&
938 (fattr->valid & NFS_ATTR_WCC) == 0) {
939 memcpy(&fattr->pre_ctime, &inode->i_ctime, sizeof(fattr->pre_ctime));
940 memcpy(&fattr->pre_mtime, &inode->i_mtime, sizeof(fattr->pre_mtime));
941 fattr->pre_size = inode->i_size;
942 fattr->valid |= NFS_ATTR_WCC;
943 }
944 return nfs_post_op_update_inode(inode, fattr);
925} 945}
926 946
927/* 947/*
@@ -941,9 +961,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
941 struct nfs_server *server; 961 struct nfs_server *server;
942 struct nfs_inode *nfsi = NFS_I(inode); 962 struct nfs_inode *nfsi = NFS_I(inode);
943 loff_t cur_isize, new_isize; 963 loff_t cur_isize, new_isize;
944 unsigned int invalid = 0; 964 unsigned long invalid = 0;
945 unsigned long now = jiffies; 965 unsigned long now = jiffies;
946 int data_stable;
947 966
948 dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n", 967 dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n",
949 __FUNCTION__, inode->i_sb->s_id, inode->i_ino, 968 __FUNCTION__, inode->i_sb->s_id, inode->i_ino,
@@ -968,57 +987,51 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
968 * Update the read time so we don't revalidate too often. 987 * Update the read time so we don't revalidate too often.
969 */ 988 */
970 nfsi->read_cache_jiffies = fattr->time_start; 989 nfsi->read_cache_jiffies = fattr->time_start;
971 nfsi->last_updated = now;
972 990
973 /* Fix a wraparound issue with nfsi->cache_change_attribute */ 991 nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ATIME
974 if (time_before(now, nfsi->cache_change_attribute)) 992 | NFS_INO_REVAL_PAGECACHE);
975 nfsi->cache_change_attribute = now - 600*HZ;
976
977 /* Are we racing with known updates of the metadata on the server? */
978 data_stable = nfs_verify_change_attribute(inode, fattr->time_start);
979 if (data_stable)
980 nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_ATIME);
981 993
982 /* Do atomic weak cache consistency updates */ 994 /* Do atomic weak cache consistency updates */
983 nfs_wcc_update_inode(inode, fattr); 995 nfs_wcc_update_inode(inode, fattr);
984 996
997 /* More cache consistency checks */
998 if (!(fattr->valid & NFS_ATTR_FATTR_V4)) {
999 /* NFSv2/v3: Check if the mtime agrees */
1000 if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) {
1001 dprintk("NFS: mtime change on server for file %s/%ld\n",
1002 inode->i_sb->s_id, inode->i_ino);
1003 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
1004 nfsi->cache_change_attribute = now;
1005 }
1006 /* If ctime has changed we should definitely clear access+acl caches */
1007 if (!timespec_equal(&inode->i_ctime, &fattr->ctime))
1008 invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
1009 } else if (nfsi->change_attr != fattr->change_attr) {
1010 dprintk("NFS: change_attr change on server for file %s/%ld\n",
1011 inode->i_sb->s_id, inode->i_ino);
1012 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
1013 nfsi->cache_change_attribute = now;
1014 }
1015
985 /* Check if our cached file size is stale */ 1016 /* Check if our cached file size is stale */
986 new_isize = nfs_size_to_loff_t(fattr->size); 1017 new_isize = nfs_size_to_loff_t(fattr->size);
987 cur_isize = i_size_read(inode); 1018 cur_isize = i_size_read(inode);
988 if (new_isize != cur_isize) { 1019 if (new_isize != cur_isize) {
989 /* Do we perhaps have any outstanding writes? */ 1020 /* Do we perhaps have any outstanding writes, or has
990 if (nfsi->npages == 0) { 1021 * the file grown beyond our last write? */
991 /* No, but did we race with nfs_end_data_update()? */ 1022 if (nfsi->npages == 0 || new_isize > cur_isize) {
992 if (data_stable) {
993 inode->i_size = new_isize;
994 invalid |= NFS_INO_INVALID_DATA;
995 }
996 invalid |= NFS_INO_INVALID_ATTR;
997 } else if (new_isize > cur_isize) {
998 inode->i_size = new_isize; 1023 inode->i_size = new_isize;
999 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; 1024 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
1000 } 1025 }
1001 nfsi->cache_change_attribute = now;
1002 dprintk("NFS: isize change on server for file %s/%ld\n", 1026 dprintk("NFS: isize change on server for file %s/%ld\n",
1003 inode->i_sb->s_id, inode->i_ino); 1027 inode->i_sb->s_id, inode->i_ino);
1004 } 1028 }
1005 1029
1006 /* Check if the mtime agrees */
1007 if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) {
1008 memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
1009 dprintk("NFS: mtime change on server for file %s/%ld\n",
1010 inode->i_sb->s_id, inode->i_ino);
1011 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
1012 nfsi->cache_change_attribute = now;
1013 }
1014 1030
1015 /* If ctime has changed we should definitely clear access+acl caches */ 1031 memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
1016 if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) { 1032 memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
1017 invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
1018 memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
1019 nfsi->cache_change_attribute = now;
1020 }
1021 memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); 1033 memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime));
1034 nfsi->change_attr = fattr->change_attr;
1022 1035
1023 if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) || 1036 if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) ||
1024 inode->i_uid != fattr->uid || 1037 inode->i_uid != fattr->uid ||
@@ -1039,31 +1052,29 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1039 inode->i_blocks = fattr->du.nfs2.blocks; 1052 inode->i_blocks = fattr->du.nfs2.blocks;
1040 } 1053 }
1041 1054
1042 if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 &&
1043 nfsi->change_attr != fattr->change_attr) {
1044 dprintk("NFS: change_attr change on server for file %s/%ld\n",
1045 inode->i_sb->s_id, inode->i_ino);
1046 nfsi->change_attr = fattr->change_attr;
1047 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
1048 nfsi->cache_change_attribute = now;
1049 }
1050
1051 /* Update attrtimeo value if we're out of the unstable period */ 1055 /* Update attrtimeo value if we're out of the unstable period */
1052 if (invalid & NFS_INO_INVALID_ATTR) { 1056 if (invalid & NFS_INO_INVALID_ATTR) {
1053 nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); 1057 nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE);
1054 nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); 1058 nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
1055 nfsi->attrtimeo_timestamp = now; 1059 nfsi->attrtimeo_timestamp = now;
1056 } else if (time_after(now, nfsi->attrtimeo_timestamp+nfsi->attrtimeo)) { 1060 nfsi->last_updated = now;
1057 if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode)) 1061 } else {
1058 nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode); 1062 if (!time_in_range(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) {
1059 nfsi->attrtimeo_timestamp = now; 1063 if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode))
1064 nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode);
1065 nfsi->attrtimeo_timestamp = now;
1066 }
1067 /*
1068 * Avoid jiffy wraparound issues with nfsi->last_updated
1069 */
1070 if (!time_in_range(nfsi->last_updated, nfsi->read_cache_jiffies, now))
1071 nfsi->last_updated = nfsi->read_cache_jiffies;
1060 } 1072 }
1073 invalid &= ~NFS_INO_INVALID_ATTR;
1061 /* Don't invalidate the data if we were to blame */ 1074 /* Don't invalidate the data if we were to blame */
1062 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) 1075 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)
1063 || S_ISLNK(inode->i_mode))) 1076 || S_ISLNK(inode->i_mode)))
1064 invalid &= ~NFS_INO_INVALID_DATA; 1077 invalid &= ~NFS_INO_INVALID_DATA;
1065 if (data_stable)
1066 invalid &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME|NFS_INO_REVAL_PAGECACHE);
1067 if (!nfs_have_delegation(inode, FMODE_READ) || 1078 if (!nfs_have_delegation(inode, FMODE_READ) ||
1068 (nfsi->cache_validity & NFS_INO_REVAL_FORCED)) 1079 (nfsi->cache_validity & NFS_INO_REVAL_FORCED))
1069 nfsi->cache_validity |= invalid; 1080 nfsi->cache_validity |= invalid;
@@ -1152,7 +1163,6 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag
1152 INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); 1163 INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
1153 INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); 1164 INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
1154 INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC); 1165 INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC);
1155 atomic_set(&nfsi->data_updates, 0);
1156 nfsi->ncommit = 0; 1166 nfsi->ncommit = 0;
1157 nfsi->npages = 0; 1167 nfsi->npages = 0;
1158 nfs4_init_once(nfsi); 1168 nfs4_init_once(nfsi);
@@ -1249,6 +1259,7 @@ static void __exit exit_nfs_fs(void)
1249/* Not quite true; I just maintain it */ 1259/* Not quite true; I just maintain it */
1250MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>"); 1260MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>");
1251MODULE_LICENSE("GPL"); 1261MODULE_LICENSE("GPL");
1262module_param(enable_ino64, bool, 0644);
1252 1263
1253module_init(init_nfs_fs) 1264module_init(init_nfs_fs)
1254module_exit(exit_nfs_fs) 1265module_exit(exit_nfs_fs)
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 76cf55d57101..f3acf48412be 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -5,8 +5,6 @@
5#include <linux/mount.h> 5#include <linux/mount.h>
6 6
7struct nfs_string; 7struct nfs_string;
8struct nfs_mount_data;
9struct nfs4_mount_data;
10 8
11/* Maximum number of readahead requests 9/* Maximum number of readahead requests
12 * FIXME: this should really be a sysctl so that users may tune it to suit 10 * FIXME: this should really be a sysctl so that users may tune it to suit
@@ -27,20 +25,50 @@ struct nfs_clone_mount {
27 rpc_authflavor_t authflavor; 25 rpc_authflavor_t authflavor;
28}; 26};
29 27
28/*
29 * In-kernel mount arguments
30 */
31struct nfs_parsed_mount_data {
32 int flags;
33 int rsize, wsize;
34 int timeo, retrans;
35 int acregmin, acregmax,
36 acdirmin, acdirmax;
37 int namlen;
38 unsigned int bsize;
39 unsigned int auth_flavor_len;
40 rpc_authflavor_t auth_flavors[1];
41 char *client_address;
42
43 struct {
44 struct sockaddr_in address;
45 char *hostname;
46 unsigned int program;
47 unsigned int version;
48 unsigned short port;
49 int protocol;
50 } mount_server;
51
52 struct {
53 struct sockaddr_in address;
54 char *hostname;
55 char *export_path;
56 unsigned int program;
57 int protocol;
58 } nfs_server;
59};
60
30/* client.c */ 61/* client.c */
31extern struct rpc_program nfs_program; 62extern struct rpc_program nfs_program;
32 63
33extern void nfs_put_client(struct nfs_client *); 64extern void nfs_put_client(struct nfs_client *);
34extern struct nfs_client *nfs_find_client(const struct sockaddr_in *, int); 65extern struct nfs_client *nfs_find_client(const struct sockaddr_in *, int);
35extern struct nfs_server *nfs_create_server(const struct nfs_mount_data *, 66extern struct nfs_server *nfs_create_server(
36 struct nfs_fh *); 67 const struct nfs_parsed_mount_data *,
37extern struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *, 68 struct nfs_fh *);
38 const char *, 69extern struct nfs_server *nfs4_create_server(
39 const struct sockaddr_in *, 70 const struct nfs_parsed_mount_data *,
40 const char *, 71 struct nfs_fh *);
41 const char *,
42 rpc_authflavor_t,
43 struct nfs_fh *);
44extern struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *, 72extern struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *,
45 struct nfs_fh *); 73 struct nfs_fh *);
46extern void nfs_free_server(struct nfs_server *server); 74extern void nfs_free_server(struct nfs_server *server);
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index c5fce7567200..668ab96c7b59 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -251,6 +251,7 @@ nfs_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args)
251 replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readres_sz) << 2; 251 replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readres_sz) << 2;
252 xdr_inline_pages(&req->rq_rcv_buf, replen, 252 xdr_inline_pages(&req->rq_rcv_buf, replen,
253 args->pages, args->pgbase, count); 253 args->pages, args->pgbase, count);
254 req->rq_rcv_buf.flags |= XDRBUF_READ;
254 return 0; 255 return 0;
255} 256}
256 257
@@ -271,7 +272,7 @@ nfs_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res)
271 res->eof = 0; 272 res->eof = 0;
272 hdrlen = (u8 *) p - (u8 *) iov->iov_base; 273 hdrlen = (u8 *) p - (u8 *) iov->iov_base;
273 if (iov->iov_len < hdrlen) { 274 if (iov->iov_len < hdrlen) {
274 printk(KERN_WARNING "NFS: READ reply header overflowed:" 275 dprintk("NFS: READ reply header overflowed:"
275 "length %d > %Zu\n", hdrlen, iov->iov_len); 276 "length %d > %Zu\n", hdrlen, iov->iov_len);
276 return -errno_NFSERR_IO; 277 return -errno_NFSERR_IO;
277 } else if (iov->iov_len != hdrlen) { 278 } else if (iov->iov_len != hdrlen) {
@@ -281,7 +282,7 @@ nfs_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res)
281 282
282 recvd = req->rq_rcv_buf.len - hdrlen; 283 recvd = req->rq_rcv_buf.len - hdrlen;
283 if (count > recvd) { 284 if (count > recvd) {
284 printk(KERN_WARNING "NFS: server cheating in read reply: " 285 dprintk("NFS: server cheating in read reply: "
285 "count %d > recvd %d\n", count, recvd); 286 "count %d > recvd %d\n", count, recvd);
286 count = recvd; 287 count = recvd;
287 } 288 }
@@ -313,6 +314,7 @@ nfs_xdr_writeargs(struct rpc_rqst *req, __be32 *p, struct nfs_writeargs *args)
313 314
314 /* Copy the page array */ 315 /* Copy the page array */
315 xdr_encode_pages(sndbuf, args->pages, args->pgbase, count); 316 xdr_encode_pages(sndbuf, args->pages, args->pgbase, count);
317 sndbuf->flags |= XDRBUF_WRITE;
316 return 0; 318 return 0;
317} 319}
318 320
@@ -431,7 +433,7 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy)
431 433
432 hdrlen = (u8 *) p - (u8 *) iov->iov_base; 434 hdrlen = (u8 *) p - (u8 *) iov->iov_base;
433 if (iov->iov_len < hdrlen) { 435 if (iov->iov_len < hdrlen) {
434 printk(KERN_WARNING "NFS: READDIR reply header overflowed:" 436 dprintk("NFS: READDIR reply header overflowed:"
435 "length %d > %Zu\n", hdrlen, iov->iov_len); 437 "length %d > %Zu\n", hdrlen, iov->iov_len);
436 return -errno_NFSERR_IO; 438 return -errno_NFSERR_IO;
437 } else if (iov->iov_len != hdrlen) { 439 } else if (iov->iov_len != hdrlen) {
@@ -454,7 +456,7 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy)
454 len = ntohl(*p++); 456 len = ntohl(*p++);
455 p += XDR_QUADLEN(len) + 1; /* name plus cookie */ 457 p += XDR_QUADLEN(len) + 1; /* name plus cookie */
456 if (len > NFS2_MAXNAMLEN) { 458 if (len > NFS2_MAXNAMLEN) {
457 printk(KERN_WARNING "NFS: giant filename in readdir (len 0x%x)!\n", 459 dprintk("NFS: giant filename in readdir (len 0x%x)!\n",
458 len); 460 len);
459 goto err_unmap; 461 goto err_unmap;
460 } 462 }
@@ -471,7 +473,7 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy)
471 entry[0] = entry[1] = 0; 473 entry[0] = entry[1] = 0;
472 /* truncate listing ? */ 474 /* truncate listing ? */
473 if (!nr) { 475 if (!nr) {
474 printk(KERN_NOTICE "NFS: readdir reply truncated!\n"); 476 dprintk("NFS: readdir reply truncated!\n");
475 entry[1] = 1; 477 entry[1] = 1;
476 } 478 }
477 goto out; 479 goto out;
@@ -583,12 +585,12 @@ nfs_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, void *dummy)
583 /* Convert length of symlink */ 585 /* Convert length of symlink */
584 len = ntohl(*p++); 586 len = ntohl(*p++);
585 if (len >= rcvbuf->page_len || len <= 0) { 587 if (len >= rcvbuf->page_len || len <= 0) {
586 dprintk(KERN_WARNING "nfs: server returned giant symlink!\n"); 588 dprintk("nfs: server returned giant symlink!\n");
587 return -ENAMETOOLONG; 589 return -ENAMETOOLONG;
588 } 590 }
589 hdrlen = (u8 *) p - (u8 *) iov->iov_base; 591 hdrlen = (u8 *) p - (u8 *) iov->iov_base;
590 if (iov->iov_len < hdrlen) { 592 if (iov->iov_len < hdrlen) {
591 printk(KERN_WARNING "NFS: READLINK reply header overflowed:" 593 dprintk("NFS: READLINK reply header overflowed:"
592 "length %d > %Zu\n", hdrlen, iov->iov_len); 594 "length %d > %Zu\n", hdrlen, iov->iov_len);
593 return -errno_NFSERR_IO; 595 return -errno_NFSERR_IO;
594 } else if (iov->iov_len != hdrlen) { 596 } else if (iov->iov_len != hdrlen) {
@@ -597,7 +599,7 @@ nfs_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, void *dummy)
597 } 599 }
598 recvd = req->rq_rcv_buf.len - hdrlen; 600 recvd = req->rq_rcv_buf.len - hdrlen;
599 if (recvd < len) { 601 if (recvd < len) {
600 printk(KERN_WARNING "NFS: server cheating in readlink reply: " 602 dprintk("NFS: server cheating in readlink reply: "
601 "count %u > recvd %u\n", len, recvd); 603 "count %u > recvd %u\n", len, recvd);
602 return -EIO; 604 return -EIO;
603 } 605 }
@@ -695,7 +697,7 @@ nfs_stat_to_errno(int stat)
695 if (nfs_errtbl[i].stat == stat) 697 if (nfs_errtbl[i].stat == stat)
696 return nfs_errtbl[i].errno; 698 return nfs_errtbl[i].errno;
697 } 699 }
698 printk(KERN_ERR "nfs_stat_to_errno: bad nfs status return value: %d\n", stat); 700 dprintk("nfs_stat_to_errno: bad nfs status return value: %d\n", stat);
699 return nfs_errtbl[i].errno; 701 return nfs_errtbl[i].errno;
700} 702}
701 703
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 7322da4d2055..9b7362565c0c 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -317,13 +317,11 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
317 } 317 }
318 318
319 dprintk("NFS call setacl\n"); 319 dprintk("NFS call setacl\n");
320 nfs_begin_data_update(inode);
321 msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_SETACL]; 320 msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_SETACL];
322 status = rpc_call_sync(server->client_acl, &msg, 0); 321 status = rpc_call_sync(server->client_acl, &msg, 0);
323 spin_lock(&inode->i_lock); 322 spin_lock(&inode->i_lock);
324 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS; 323 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS;
325 spin_unlock(&inode->i_lock); 324 spin_unlock(&inode->i_lock);
326 nfs_end_data_update(inode);
327 dprintk("NFS reply setacl: %d\n", status); 325 dprintk("NFS reply setacl: %d\n", status);
328 326
329 /* pages may have been allocated at the xdr layer. */ 327 /* pages may have been allocated at the xdr layer. */
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index c7ca5d70870b..4cdc2361a669 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -166,6 +166,7 @@ nfs3_proc_lookup(struct inode *dir, struct qstr *name,
166 nfs_fattr_init(&dir_attr); 166 nfs_fattr_init(&dir_attr);
167 nfs_fattr_init(fattr); 167 nfs_fattr_init(fattr);
168 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); 168 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
169 nfs_refresh_inode(dir, &dir_attr);
169 if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR)) { 170 if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR)) {
170 msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR]; 171 msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR];
171 msg.rpc_argp = fhandle; 172 msg.rpc_argp = fhandle;
@@ -173,8 +174,6 @@ nfs3_proc_lookup(struct inode *dir, struct qstr *name,
173 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); 174 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
174 } 175 }
175 dprintk("NFS reply lookup: %d\n", status); 176 dprintk("NFS reply lookup: %d\n", status);
176 if (status >= 0)
177 status = nfs_refresh_inode(dir, &dir_attr);
178 return status; 177 return status;
179} 178}
180 179
@@ -607,6 +606,9 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
607 606
608 nfs_fattr_init(&dir_attr); 607 nfs_fattr_init(&dir_attr);
609 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); 608 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
609
610 nfs_invalidate_atime(dir);
611
610 nfs_refresh_inode(dir, &dir_attr); 612 nfs_refresh_inode(dir, &dir_attr);
611 dprintk("NFS reply readdir: %d\n", status); 613 dprintk("NFS reply readdir: %d\n", status);
612 return status; 614 return status;
@@ -724,9 +726,9 @@ static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data)
724{ 726{
725 if (nfs3_async_handle_jukebox(task, data->inode)) 727 if (nfs3_async_handle_jukebox(task, data->inode))
726 return -EAGAIN; 728 return -EAGAIN;
727 /* Call back common NFS readpage processing */ 729
728 if (task->tk_status >= 0) 730 nfs_invalidate_atime(data->inode);
729 nfs_refresh_inode(data->inode, &data->fattr); 731 nfs_refresh_inode(data->inode, &data->fattr);
730 return 0; 732 return 0;
731} 733}
732 734
@@ -747,7 +749,7 @@ static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data)
747 if (nfs3_async_handle_jukebox(task, data->inode)) 749 if (nfs3_async_handle_jukebox(task, data->inode))
748 return -EAGAIN; 750 return -EAGAIN;
749 if (task->tk_status >= 0) 751 if (task->tk_status >= 0)
750 nfs_post_op_update_inode(data->inode, data->res.fattr); 752 nfs_post_op_update_inode_force_wcc(data->inode, data->res.fattr);
751 return 0; 753 return 0;
752} 754}
753 755
@@ -775,8 +777,7 @@ static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data)
775{ 777{
776 if (nfs3_async_handle_jukebox(task, data->inode)) 778 if (nfs3_async_handle_jukebox(task, data->inode))
777 return -EAGAIN; 779 return -EAGAIN;
778 if (task->tk_status >= 0) 780 nfs_refresh_inode(data->inode, data->res.fattr);
779 nfs_post_op_update_inode(data->inode, data->res.fattr);
780 return 0; 781 return 0;
781} 782}
782 783
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index d9e08f0cf2a0..616d3267b7e7 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -346,6 +346,7 @@ nfs3_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args)
346 replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS3_readres_sz) << 2; 346 replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS3_readres_sz) << 2;
347 xdr_inline_pages(&req->rq_rcv_buf, replen, 347 xdr_inline_pages(&req->rq_rcv_buf, replen,
348 args->pages, args->pgbase, count); 348 args->pages, args->pgbase, count);
349 req->rq_rcv_buf.flags |= XDRBUF_READ;
349 return 0; 350 return 0;
350} 351}
351 352
@@ -367,6 +368,7 @@ nfs3_xdr_writeargs(struct rpc_rqst *req, __be32 *p, struct nfs_writeargs *args)
367 368
368 /* Copy the page array */ 369 /* Copy the page array */
369 xdr_encode_pages(sndbuf, args->pages, args->pgbase, count); 370 xdr_encode_pages(sndbuf, args->pages, args->pgbase, count);
371 sndbuf->flags |= XDRBUF_WRITE;
370 return 0; 372 return 0;
371} 373}
372 374
@@ -524,7 +526,7 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res
524 526
525 hdrlen = (u8 *) p - (u8 *) iov->iov_base; 527 hdrlen = (u8 *) p - (u8 *) iov->iov_base;
526 if (iov->iov_len < hdrlen) { 528 if (iov->iov_len < hdrlen) {
527 printk(KERN_WARNING "NFS: READDIR reply header overflowed:" 529 dprintk("NFS: READDIR reply header overflowed:"
528 "length %d > %Zu\n", hdrlen, iov->iov_len); 530 "length %d > %Zu\n", hdrlen, iov->iov_len);
529 return -errno_NFSERR_IO; 531 return -errno_NFSERR_IO;
530 } else if (iov->iov_len != hdrlen) { 532 } else if (iov->iov_len != hdrlen) {
@@ -547,7 +549,7 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res
547 len = ntohl(*p++); /* string length */ 549 len = ntohl(*p++); /* string length */
548 p += XDR_QUADLEN(len) + 2; /* name + cookie */ 550 p += XDR_QUADLEN(len) + 2; /* name + cookie */
549 if (len > NFS3_MAXNAMLEN) { 551 if (len > NFS3_MAXNAMLEN) {
550 printk(KERN_WARNING "NFS: giant filename in readdir (len %x)!\n", 552 dprintk("NFS: giant filename in readdir (len %x)!\n",
551 len); 553 len);
552 goto err_unmap; 554 goto err_unmap;
553 } 555 }
@@ -567,7 +569,7 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res
567 goto short_pkt; 569 goto short_pkt;
568 len = ntohl(*p++); 570 len = ntohl(*p++);
569 if (len > NFS3_FHSIZE) { 571 if (len > NFS3_FHSIZE) {
570 printk(KERN_WARNING "NFS: giant filehandle in " 572 dprintk("NFS: giant filehandle in "
571 "readdir (len %x)!\n", len); 573 "readdir (len %x)!\n", len);
572 goto err_unmap; 574 goto err_unmap;
573 } 575 }
@@ -588,7 +590,7 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res
588 entry[0] = entry[1] = 0; 590 entry[0] = entry[1] = 0;
589 /* truncate listing ? */ 591 /* truncate listing ? */
590 if (!nr) { 592 if (!nr) {
591 printk(KERN_NOTICE "NFS: readdir reply truncated!\n"); 593 dprintk("NFS: readdir reply truncated!\n");
592 entry[1] = 1; 594 entry[1] = 1;
593 } 595 }
594 goto out; 596 goto out;
@@ -826,22 +828,23 @@ nfs3_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
826 /* Convert length of symlink */ 828 /* Convert length of symlink */
827 len = ntohl(*p++); 829 len = ntohl(*p++);
828 if (len >= rcvbuf->page_len || len <= 0) { 830 if (len >= rcvbuf->page_len || len <= 0) {
829 dprintk(KERN_WARNING "nfs: server returned giant symlink!\n"); 831 dprintk("nfs: server returned giant symlink!\n");
830 return -ENAMETOOLONG; 832 return -ENAMETOOLONG;
831 } 833 }
832 834
833 hdrlen = (u8 *) p - (u8 *) iov->iov_base; 835 hdrlen = (u8 *) p - (u8 *) iov->iov_base;
834 if (iov->iov_len < hdrlen) { 836 if (iov->iov_len < hdrlen) {
835 printk(KERN_WARNING "NFS: READLINK reply header overflowed:" 837 dprintk("NFS: READLINK reply header overflowed:"
836 "length %d > %Zu\n", hdrlen, iov->iov_len); 838 "length %d > %Zu\n", hdrlen, iov->iov_len);
837 return -errno_NFSERR_IO; 839 return -errno_NFSERR_IO;
838 } else if (iov->iov_len != hdrlen) { 840 } else if (iov->iov_len != hdrlen) {
839 dprintk("NFS: READLINK header is short. iovec will be shifted.\n"); 841 dprintk("NFS: READLINK header is short. "
842 "iovec will be shifted.\n");
840 xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen); 843 xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen);
841 } 844 }
842 recvd = req->rq_rcv_buf.len - hdrlen; 845 recvd = req->rq_rcv_buf.len - hdrlen;
843 if (recvd < len) { 846 if (recvd < len) {
844 printk(KERN_WARNING "NFS: server cheating in readlink reply: " 847 dprintk("NFS: server cheating in readlink reply: "
845 "count %u > recvd %u\n", len, recvd); 848 "count %u > recvd %u\n", len, recvd);
846 return -EIO; 849 return -EIO;
847 } 850 }
@@ -876,13 +879,13 @@ nfs3_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res)
876 ocount = ntohl(*p++); 879 ocount = ntohl(*p++);
877 880
878 if (ocount != count) { 881 if (ocount != count) {
879 printk(KERN_WARNING "NFS: READ count doesn't match RPC opaque count.\n"); 882 dprintk("NFS: READ count doesn't match RPC opaque count.\n");
880 return -errno_NFSERR_IO; 883 return -errno_NFSERR_IO;
881 } 884 }
882 885
883 hdrlen = (u8 *) p - (u8 *) iov->iov_base; 886 hdrlen = (u8 *) p - (u8 *) iov->iov_base;
884 if (iov->iov_len < hdrlen) { 887 if (iov->iov_len < hdrlen) {
885 printk(KERN_WARNING "NFS: READ reply header overflowed:" 888 dprintk("NFS: READ reply header overflowed:"
886 "length %d > %Zu\n", hdrlen, iov->iov_len); 889 "length %d > %Zu\n", hdrlen, iov->iov_len);
887 return -errno_NFSERR_IO; 890 return -errno_NFSERR_IO;
888 } else if (iov->iov_len != hdrlen) { 891 } else if (iov->iov_len != hdrlen) {
@@ -892,7 +895,7 @@ nfs3_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res)
892 895
893 recvd = req->rq_rcv_buf.len - hdrlen; 896 recvd = req->rq_rcv_buf.len - hdrlen;
894 if (count > recvd) { 897 if (count > recvd) {
895 printk(KERN_WARNING "NFS: server cheating in read reply: " 898 dprintk("NFS: server cheating in read reply: "
896 "count %d > recvd %d\n", count, recvd); 899 "count %d > recvd %d\n", count, recvd);
897 count = recvd; 900 count = recvd;
898 res->eof = 0; 901 res->eof = 0;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 4b90e17555a9..cb99fd90a9ac 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -62,10 +62,8 @@ struct nfs4_opendata;
62static int _nfs4_proc_open(struct nfs4_opendata *data); 62static int _nfs4_proc_open(struct nfs4_opendata *data);
63static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); 63static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
64static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *); 64static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *);
65static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry);
66static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception); 65static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception);
67static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp); 66static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp);
68static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openflags);
69static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr); 67static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
70static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr); 68static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
71 69
@@ -177,7 +175,7 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent
177 *p++ = xdr_one; /* bitmap length */ 175 *p++ = xdr_one; /* bitmap length */
178 *p++ = htonl(FATTR4_WORD0_FILEID); /* bitmap */ 176 *p++ = htonl(FATTR4_WORD0_FILEID); /* bitmap */
179 *p++ = htonl(8); /* attribute buffer length */ 177 *p++ = htonl(8); /* attribute buffer length */
180 p = xdr_encode_hyper(p, dentry->d_inode->i_ino); 178 p = xdr_encode_hyper(p, NFS_FILEID(dentry->d_inode));
181 } 179 }
182 180
183 *p++ = xdr_one; /* next */ 181 *p++ = xdr_one; /* next */
@@ -189,7 +187,7 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent
189 *p++ = xdr_one; /* bitmap length */ 187 *p++ = xdr_one; /* bitmap length */
190 *p++ = htonl(FATTR4_WORD0_FILEID); /* bitmap */ 188 *p++ = htonl(FATTR4_WORD0_FILEID); /* bitmap */
191 *p++ = htonl(8); /* attribute buffer length */ 189 *p++ = htonl(8); /* attribute buffer length */
192 p = xdr_encode_hyper(p, dentry->d_parent->d_inode->i_ino); 190 p = xdr_encode_hyper(p, NFS_FILEID(dentry->d_parent->d_inode));
193 191
194 readdir->pgbase = (char *)p - (char *)start; 192 readdir->pgbase = (char *)p - (char *)start;
195 readdir->count -= readdir->pgbase; 193 readdir->count -= readdir->pgbase;
@@ -211,8 +209,9 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)
211 209
212 spin_lock(&dir->i_lock); 210 spin_lock(&dir->i_lock);
213 nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA; 211 nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA;
214 if (cinfo->before == nfsi->change_attr && cinfo->atomic) 212 if (!cinfo->atomic || cinfo->before != nfsi->change_attr)
215 nfsi->change_attr = cinfo->after; 213 nfsi->cache_change_attribute = jiffies;
214 nfsi->change_attr = cinfo->after;
216 spin_unlock(&dir->i_lock); 215 spin_unlock(&dir->i_lock);
217} 216}
218 217
@@ -454,7 +453,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
454 memcpy(stateid.data, delegation->stateid.data, sizeof(stateid.data)); 453 memcpy(stateid.data, delegation->stateid.data, sizeof(stateid.data));
455 rcu_read_unlock(); 454 rcu_read_unlock();
456 lock_kernel(); 455 lock_kernel();
457 ret = _nfs4_do_access(state->inode, state->owner->so_cred, open_mode); 456 ret = nfs_may_open(state->inode, state->owner->so_cred, open_mode);
458 unlock_kernel(); 457 unlock_kernel();
459 if (ret != 0) 458 if (ret != 0)
460 goto out; 459 goto out;
@@ -948,36 +947,6 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
948 return 0; 947 return 0;
949} 948}
950 949
951static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openflags)
952{
953 struct nfs_access_entry cache;
954 int mask = 0;
955 int status;
956
957 if (openflags & FMODE_READ)
958 mask |= MAY_READ;
959 if (openflags & FMODE_WRITE)
960 mask |= MAY_WRITE;
961 if (openflags & FMODE_EXEC)
962 mask |= MAY_EXEC;
963 status = nfs_access_get_cached(inode, cred, &cache);
964 if (status == 0)
965 goto out;
966
967 /* Be clever: ask server to check for all possible rights */
968 cache.mask = MAY_EXEC | MAY_WRITE | MAY_READ;
969 cache.cred = cred;
970 cache.jiffies = jiffies;
971 status = _nfs4_proc_access(inode, &cache);
972 if (status != 0)
973 return status;
974 nfs_access_add_cache(inode, &cache);
975out:
976 if ((cache.mask & mask) == mask)
977 return 0;
978 return -EACCES;
979}
980
981static int nfs4_recover_expired_lease(struct nfs_server *server) 950static int nfs4_recover_expired_lease(struct nfs_server *server)
982{ 951{
983 struct nfs_client *clp = server->nfs_client; 952 struct nfs_client *clp = server->nfs_client;
@@ -1381,7 +1350,7 @@ static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct
1381 1350
1382 /* If the open_intent is for execute, we have an extra check to make */ 1351 /* If the open_intent is for execute, we have an extra check to make */
1383 if (nd->intent.open.flags & FMODE_EXEC) { 1352 if (nd->intent.open.flags & FMODE_EXEC) {
1384 ret = _nfs4_do_access(state->inode, 1353 ret = nfs_may_open(state->inode,
1385 state->owner->so_cred, 1354 state->owner->so_cred,
1386 nd->intent.open.flags); 1355 nd->intent.open.flags);
1387 if (ret < 0) 1356 if (ret < 0)
@@ -1390,7 +1359,7 @@ static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct
1390 filp = lookup_instantiate_filp(nd, path->dentry, NULL); 1359 filp = lookup_instantiate_filp(nd, path->dentry, NULL);
1391 if (!IS_ERR(filp)) { 1360 if (!IS_ERR(filp)) {
1392 struct nfs_open_context *ctx; 1361 struct nfs_open_context *ctx;
1393 ctx = (struct nfs_open_context *)filp->private_data; 1362 ctx = nfs_file_open_context(filp);
1394 ctx->state = state; 1363 ctx->state = state;
1395 return 0; 1364 return 0;
1396 } 1365 }
@@ -1428,13 +1397,16 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
1428 state = nfs4_do_open(dir, &path, nd->intent.open.flags, &attr, cred); 1397 state = nfs4_do_open(dir, &path, nd->intent.open.flags, &attr, cred);
1429 put_rpccred(cred); 1398 put_rpccred(cred);
1430 if (IS_ERR(state)) { 1399 if (IS_ERR(state)) {
1431 if (PTR_ERR(state) == -ENOENT) 1400 if (PTR_ERR(state) == -ENOENT) {
1432 d_add(dentry, NULL); 1401 d_add(dentry, NULL);
1402 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1403 }
1433 return (struct dentry *)state; 1404 return (struct dentry *)state;
1434 } 1405 }
1435 res = d_add_unique(dentry, igrab(state->inode)); 1406 res = d_add_unique(dentry, igrab(state->inode));
1436 if (res != NULL) 1407 if (res != NULL)
1437 path.dentry = res; 1408 path.dentry = res;
1409 nfs_set_verifier(path.dentry, nfs_save_change_attribute(dir));
1438 nfs4_intent_set_file(nd, &path, state); 1410 nfs4_intent_set_file(nd, &path, state);
1439 return res; 1411 return res;
1440} 1412}
@@ -1468,6 +1440,7 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, st
1468 } 1440 }
1469 } 1441 }
1470 if (state->inode == dentry->d_inode) { 1442 if (state->inode == dentry->d_inode) {
1443 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1471 nfs4_intent_set_file(nd, &path, state); 1444 nfs4_intent_set_file(nd, &path, state);
1472 return 1; 1445 return 1;
1473 } 1446 }
@@ -1757,10 +1730,16 @@ static int nfs4_proc_lookup(struct inode *dir, struct qstr *name, struct nfs_fh
1757 1730
1758static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry) 1731static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry)
1759{ 1732{
1733 struct nfs_server *server = NFS_SERVER(inode);
1734 struct nfs_fattr fattr;
1760 struct nfs4_accessargs args = { 1735 struct nfs4_accessargs args = {
1761 .fh = NFS_FH(inode), 1736 .fh = NFS_FH(inode),
1737 .bitmask = server->attr_bitmask,
1738 };
1739 struct nfs4_accessres res = {
1740 .server = server,
1741 .fattr = &fattr,
1762 }; 1742 };
1763 struct nfs4_accessres res = { 0 };
1764 struct rpc_message msg = { 1743 struct rpc_message msg = {
1765 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_ACCESS], 1744 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_ACCESS],
1766 .rpc_argp = &args, 1745 .rpc_argp = &args,
@@ -1786,6 +1765,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry
1786 if (mode & MAY_EXEC) 1765 if (mode & MAY_EXEC)
1787 args.access |= NFS4_ACCESS_EXECUTE; 1766 args.access |= NFS4_ACCESS_EXECUTE;
1788 } 1767 }
1768 nfs_fattr_init(&fattr);
1789 status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); 1769 status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
1790 if (!status) { 1770 if (!status) {
1791 entry->mask = 0; 1771 entry->mask = 0;
@@ -1795,6 +1775,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry
1795 entry->mask |= MAY_WRITE; 1775 entry->mask |= MAY_WRITE;
1796 if (res.access & (NFS4_ACCESS_LOOKUP|NFS4_ACCESS_EXECUTE)) 1776 if (res.access & (NFS4_ACCESS_LOOKUP|NFS4_ACCESS_EXECUTE))
1797 entry->mask |= MAY_EXEC; 1777 entry->mask |= MAY_EXEC;
1778 nfs_refresh_inode(inode, &fattr);
1798 } 1779 }
1799 return status; 1780 return status;
1800} 1781}
@@ -1900,11 +1881,13 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
1900 } 1881 }
1901 state = nfs4_do_open(dir, &path, flags, sattr, cred); 1882 state = nfs4_do_open(dir, &path, flags, sattr, cred);
1902 put_rpccred(cred); 1883 put_rpccred(cred);
1884 d_drop(dentry);
1903 if (IS_ERR(state)) { 1885 if (IS_ERR(state)) {
1904 status = PTR_ERR(state); 1886 status = PTR_ERR(state);
1905 goto out; 1887 goto out;
1906 } 1888 }
1907 d_instantiate(dentry, igrab(state->inode)); 1889 d_add(dentry, igrab(state->inode));
1890 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1908 if (flags & O_EXCL) { 1891 if (flags & O_EXCL) {
1909 struct nfs_fattr fattr; 1892 struct nfs_fattr fattr;
1910 status = nfs4_do_setattr(state->inode, &fattr, sattr, state); 1893 status = nfs4_do_setattr(state->inode, &fattr, sattr, state);
@@ -2218,6 +2201,9 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
2218 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); 2201 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
2219 if (status == 0) 2202 if (status == 0)
2220 memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE); 2203 memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE);
2204
2205 nfs_invalidate_atime(dir);
2206
2221 dprintk("%s: returns %d\n", __FUNCTION__, status); 2207 dprintk("%s: returns %d\n", __FUNCTION__, status);
2222 return status; 2208 return status;
2223} 2209}
@@ -2414,6 +2400,8 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
2414 rpc_restart_call(task); 2400 rpc_restart_call(task);
2415 return -EAGAIN; 2401 return -EAGAIN;
2416 } 2402 }
2403
2404 nfs_invalidate_atime(data->inode);
2417 if (task->tk_status > 0) 2405 if (task->tk_status > 0)
2418 renew_lease(server, data->timestamp); 2406 renew_lease(server, data->timestamp);
2419 return 0; 2407 return 0;
@@ -2443,7 +2431,7 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
2443 } 2431 }
2444 if (task->tk_status >= 0) { 2432 if (task->tk_status >= 0) {
2445 renew_lease(NFS_SERVER(inode), data->timestamp); 2433 renew_lease(NFS_SERVER(inode), data->timestamp);
2446 nfs_post_op_update_inode(inode, data->res.fattr); 2434 nfs_post_op_update_inode_force_wcc(inode, data->res.fattr);
2447 } 2435 }
2448 return 0; 2436 return 0;
2449} 2437}
@@ -2485,8 +2473,7 @@ static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data)
2485 rpc_restart_call(task); 2473 rpc_restart_call(task);
2486 return -EAGAIN; 2474 return -EAGAIN;
2487 } 2475 }
2488 if (task->tk_status >= 0) 2476 nfs_refresh_inode(inode, data->res.fattr);
2489 nfs_post_op_update_inode(inode, data->res.fattr);
2490 return 0; 2477 return 0;
2491} 2478}
2492 2479
@@ -3056,7 +3043,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
3056 if (status == 0) { 3043 if (status == 0) {
3057 status = data->rpc_status; 3044 status = data->rpc_status;
3058 if (status == 0) 3045 if (status == 0)
3059 nfs_post_op_update_inode(inode, &data->fattr); 3046 nfs_refresh_inode(inode, &data->fattr);
3060 } 3047 }
3061 rpc_put_task(task); 3048 rpc_put_task(task);
3062 return status; 3049 return status;
@@ -3303,7 +3290,7 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *
3303 status = -ENOMEM; 3290 status = -ENOMEM;
3304 if (seqid == NULL) 3291 if (seqid == NULL)
3305 goto out; 3292 goto out;
3306 task = nfs4_do_unlck(request, request->fl_file->private_data, lsp, seqid); 3293 task = nfs4_do_unlck(request, nfs_file_open_context(request->fl_file), lsp, seqid);
3307 status = PTR_ERR(task); 3294 status = PTR_ERR(task);
3308 if (IS_ERR(task)) 3295 if (IS_ERR(task))
3309 goto out; 3296 goto out;
@@ -3447,7 +3434,7 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
3447 int ret; 3434 int ret;
3448 3435
3449 dprintk("%s: begin!\n", __FUNCTION__); 3436 dprintk("%s: begin!\n", __FUNCTION__);
3450 data = nfs4_alloc_lockdata(fl, fl->fl_file->private_data, 3437 data = nfs4_alloc_lockdata(fl, nfs_file_open_context(fl->fl_file),
3451 fl->fl_u.nfs4_fl.owner); 3438 fl->fl_u.nfs4_fl.owner);
3452 if (data == NULL) 3439 if (data == NULL)
3453 return -ENOMEM; 3440 return -ENOMEM;
@@ -3573,7 +3560,7 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request)
3573 int status; 3560 int status;
3574 3561
3575 /* verify open state */ 3562 /* verify open state */
3576 ctx = (struct nfs_open_context *)filp->private_data; 3563 ctx = nfs_file_open_context(filp);
3577 state = ctx->state; 3564 state = ctx->state;
3578 3565
3579 if (request->fl_start < 0 || request->fl_end < 0) 3566 if (request->fl_start < 0 || request->fl_end < 0)
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 3e4adf8c8312..bfb36261cecb 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -774,7 +774,7 @@ static int nfs4_reclaim_locks(struct nfs4_state_recovery_ops *ops, struct nfs4_s
774 for (fl = inode->i_flock; fl != 0; fl = fl->fl_next) { 774 for (fl = inode->i_flock; fl != 0; fl = fl->fl_next) {
775 if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) 775 if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK)))
776 continue; 776 continue;
777 if (((struct nfs_open_context *)fl->fl_file->private_data)->state != state) 777 if (nfs_file_open_context(fl->fl_file)->state != state)
778 continue; 778 continue;
779 status = ops->recover_lock(state, fl); 779 status = ops->recover_lock(state, fl);
780 if (status >= 0) 780 if (status >= 0)
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index badd73b7ca12..51dd3804866f 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -376,10 +376,12 @@ static int nfs4_stat_to_errno(int);
376 decode_locku_maxsz) 376 decode_locku_maxsz)
377#define NFS4_enc_access_sz (compound_encode_hdr_maxsz + \ 377#define NFS4_enc_access_sz (compound_encode_hdr_maxsz + \
378 encode_putfh_maxsz + \ 378 encode_putfh_maxsz + \
379 encode_access_maxsz) 379 encode_access_maxsz + \
380 encode_getattr_maxsz)
380#define NFS4_dec_access_sz (compound_decode_hdr_maxsz + \ 381#define NFS4_dec_access_sz (compound_decode_hdr_maxsz + \
381 decode_putfh_maxsz + \ 382 decode_putfh_maxsz + \
382 decode_access_maxsz) 383 decode_access_maxsz + \
384 decode_getattr_maxsz)
383#define NFS4_enc_getattr_sz (compound_encode_hdr_maxsz + \ 385#define NFS4_enc_getattr_sz (compound_encode_hdr_maxsz + \
384 encode_putfh_maxsz + \ 386 encode_putfh_maxsz + \
385 encode_getattr_maxsz) 387 encode_getattr_maxsz)
@@ -562,7 +564,6 @@ struct compound_hdr {
562 564
563#define RESERVE_SPACE(nbytes) do { \ 565#define RESERVE_SPACE(nbytes) do { \
564 p = xdr_reserve_space(xdr, nbytes); \ 566 p = xdr_reserve_space(xdr, nbytes); \
565 if (!p) printk("RESERVE_SPACE(%d) failed in function %s\n", (int) (nbytes), __FUNCTION__); \
566 BUG_ON(!p); \ 567 BUG_ON(!p); \
567} while (0) 568} while (0)
568 569
@@ -628,8 +629,8 @@ static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const s
628 if (iap->ia_valid & ATTR_UID) { 629 if (iap->ia_valid & ATTR_UID) {
629 owner_namelen = nfs_map_uid_to_name(server->nfs_client, iap->ia_uid, owner_name); 630 owner_namelen = nfs_map_uid_to_name(server->nfs_client, iap->ia_uid, owner_name);
630 if (owner_namelen < 0) { 631 if (owner_namelen < 0) {
631 printk(KERN_WARNING "nfs: couldn't resolve uid %d to string\n", 632 dprintk("nfs: couldn't resolve uid %d to string\n",
632 iap->ia_uid); 633 iap->ia_uid);
633 /* XXX */ 634 /* XXX */
634 strcpy(owner_name, "nobody"); 635 strcpy(owner_name, "nobody");
635 owner_namelen = sizeof("nobody") - 1; 636 owner_namelen = sizeof("nobody") - 1;
@@ -640,8 +641,8 @@ static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const s
640 if (iap->ia_valid & ATTR_GID) { 641 if (iap->ia_valid & ATTR_GID) {
641 owner_grouplen = nfs_map_gid_to_group(server->nfs_client, iap->ia_gid, owner_group); 642 owner_grouplen = nfs_map_gid_to_group(server->nfs_client, iap->ia_gid, owner_group);
642 if (owner_grouplen < 0) { 643 if (owner_grouplen < 0) {
643 printk(KERN_WARNING "nfs4: couldn't resolve gid %d to string\n", 644 dprintk("nfs: couldn't resolve gid %d to string\n",
644 iap->ia_gid); 645 iap->ia_gid);
645 strcpy(owner_group, "nobody"); 646 strcpy(owner_group, "nobody");
646 owner_grouplen = sizeof("nobody") - 1; 647 owner_grouplen = sizeof("nobody") - 1;
647 /* goto out; */ 648 /* goto out; */
@@ -711,7 +712,7 @@ static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const s
711 * Now we backfill the bitmap and the attribute buffer length. 712 * Now we backfill the bitmap and the attribute buffer length.
712 */ 713 */
713 if (len != ((char *)p - (char *)q) + 4) { 714 if (len != ((char *)p - (char *)q) + 4) {
714 printk ("encode_attr: Attr length calculation error! %u != %Zu\n", 715 printk(KERN_ERR "nfs: Attr length error, %u != %Zu\n",
715 len, ((char *)p - (char *)q) + 4); 716 len, ((char *)p - (char *)q) + 4);
716 BUG(); 717 BUG();
717 } 718 }
@@ -1376,14 +1377,20 @@ static int nfs4_xdr_enc_access(struct rpc_rqst *req, __be32 *p, const struct nfs
1376{ 1377{
1377 struct xdr_stream xdr; 1378 struct xdr_stream xdr;
1378 struct compound_hdr hdr = { 1379 struct compound_hdr hdr = {
1379 .nops = 2, 1380 .nops = 3,
1380 }; 1381 };
1381 int status; 1382 int status;
1382 1383
1383 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1384 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1384 encode_compound_hdr(&xdr, &hdr); 1385 encode_compound_hdr(&xdr, &hdr);
1385 if ((status = encode_putfh(&xdr, args->fh)) == 0) 1386 status = encode_putfh(&xdr, args->fh);
1386 status = encode_access(&xdr, args->access); 1387 if (status != 0)
1388 goto out;
1389 status = encode_access(&xdr, args->access);
1390 if (status != 0)
1391 goto out;
1392 status = encode_getfattr(&xdr, args->bitmask);
1393out:
1387 return status; 1394 return status;
1388} 1395}
1389 1396
@@ -1857,6 +1864,7 @@ static int nfs4_xdr_enc_read(struct rpc_rqst *req, __be32 *p, struct nfs_readarg
1857 replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_read_sz) << 2; 1864 replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_read_sz) << 2;
1858 xdr_inline_pages(&req->rq_rcv_buf, replen, 1865 xdr_inline_pages(&req->rq_rcv_buf, replen,
1859 args->pages, args->pgbase, args->count); 1866 args->pages, args->pgbase, args->count);
1867 req->rq_rcv_buf.flags |= XDRBUF_READ;
1860out: 1868out:
1861 return status; 1869 return status;
1862} 1870}
@@ -1933,6 +1941,7 @@ static int nfs4_xdr_enc_write(struct rpc_rqst *req, __be32 *p, struct nfs_writea
1933 status = encode_write(&xdr, args); 1941 status = encode_write(&xdr, args);
1934 if (status) 1942 if (status)
1935 goto out; 1943 goto out;
1944 req->rq_snd_buf.flags |= XDRBUF_WRITE;
1936 status = encode_getfattr(&xdr, args->bitmask); 1945 status = encode_getfattr(&xdr, args->bitmask);
1937out: 1946out:
1938 return status; 1947 return status;
@@ -2180,9 +2189,9 @@ out:
2180#define READ_BUF(nbytes) do { \ 2189#define READ_BUF(nbytes) do { \
2181 p = xdr_inline_decode(xdr, nbytes); \ 2190 p = xdr_inline_decode(xdr, nbytes); \
2182 if (unlikely(!p)) { \ 2191 if (unlikely(!p)) { \
2183 printk(KERN_INFO "%s: prematurely hit end of receive" \ 2192 dprintk("nfs: %s: prematurely hit end of receive" \
2184 " buffer\n", __FUNCTION__); \ 2193 " buffer\n", __FUNCTION__); \
2185 printk(KERN_INFO "%s: xdr->p=%p, bytes=%u, xdr->end=%p\n", \ 2194 dprintk("nfs: %s: xdr->p=%p, bytes=%u, xdr->end=%p\n", \
2186 __FUNCTION__, xdr->p, nbytes, xdr->end); \ 2195 __FUNCTION__, xdr->p, nbytes, xdr->end); \
2187 return -EIO; \ 2196 return -EIO; \
2188 } \ 2197 } \
@@ -2223,9 +2232,8 @@ static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
2223 READ_BUF(8); 2232 READ_BUF(8);
2224 READ32(opnum); 2233 READ32(opnum);
2225 if (opnum != expected) { 2234 if (opnum != expected) {
2226 printk(KERN_NOTICE 2235 dprintk("nfs: Server returned operation"
2227 "nfs4_decode_op_hdr: Server returned operation" 2236 " %d but we issued a request for %d\n",
2228 " %d but we issued a request for %d\n",
2229 opnum, expected); 2237 opnum, expected);
2230 return -EIO; 2238 return -EIO;
2231 } 2239 }
@@ -2758,7 +2766,7 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nf
2758 dprintk("%s: nfs_map_name_to_uid failed!\n", 2766 dprintk("%s: nfs_map_name_to_uid failed!\n",
2759 __FUNCTION__); 2767 __FUNCTION__);
2760 } else 2768 } else
2761 printk(KERN_WARNING "%s: name too long (%u)!\n", 2769 dprintk("%s: name too long (%u)!\n",
2762 __FUNCTION__, len); 2770 __FUNCTION__, len);
2763 bitmap[1] &= ~FATTR4_WORD1_OWNER; 2771 bitmap[1] &= ~FATTR4_WORD1_OWNER;
2764 } 2772 }
@@ -2783,7 +2791,7 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nf
2783 dprintk("%s: nfs_map_group_to_gid failed!\n", 2791 dprintk("%s: nfs_map_group_to_gid failed!\n",
2784 __FUNCTION__); 2792 __FUNCTION__);
2785 } else 2793 } else
2786 printk(KERN_WARNING "%s: name too long (%u)!\n", 2794 dprintk("%s: name too long (%u)!\n",
2787 __FUNCTION__, len); 2795 __FUNCTION__, len);
2788 bitmap[1] &= ~FATTR4_WORD1_OWNER_GROUP; 2796 bitmap[1] &= ~FATTR4_WORD1_OWNER_GROUP;
2789 } 2797 }
@@ -2950,7 +2958,8 @@ static int verify_attr_len(struct xdr_stream *xdr, __be32 *savep, uint32_t attrl
2950 unsigned int nwords = xdr->p - savep; 2958 unsigned int nwords = xdr->p - savep;
2951 2959
2952 if (unlikely(attrwords != nwords)) { 2960 if (unlikely(attrwords != nwords)) {
2953 printk(KERN_WARNING "%s: server returned incorrect attribute length: %u %c %u\n", 2961 dprintk("%s: server returned incorrect attribute length: "
2962 "%u %c %u\n",
2954 __FUNCTION__, 2963 __FUNCTION__,
2955 attrwords << 2, 2964 attrwords << 2,
2956 (attrwords < nwords) ? '<' : '>', 2965 (attrwords < nwords) ? '<' : '>',
@@ -3451,7 +3460,7 @@ static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_
3451 hdrlen = (u8 *) p - (u8 *) iov->iov_base; 3460 hdrlen = (u8 *) p - (u8 *) iov->iov_base;
3452 recvd = req->rq_rcv_buf.len - hdrlen; 3461 recvd = req->rq_rcv_buf.len - hdrlen;
3453 if (count > recvd) { 3462 if (count > recvd) {
3454 printk(KERN_WARNING "NFS: server cheating in read reply: " 3463 dprintk("NFS: server cheating in read reply: "
3455 "count %u > recvd %u\n", count, recvd); 3464 "count %u > recvd %u\n", count, recvd);
3456 count = recvd; 3465 count = recvd;
3457 eof = 0; 3466 eof = 0;
@@ -3500,7 +3509,8 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
3500 p += 2; /* cookie */ 3509 p += 2; /* cookie */
3501 len = ntohl(*p++); /* filename length */ 3510 len = ntohl(*p++); /* filename length */
3502 if (len > NFS4_MAXNAMLEN) { 3511 if (len > NFS4_MAXNAMLEN) {
3503 printk(KERN_WARNING "NFS: giant filename in readdir (len 0x%x)\n", len); 3512 dprintk("NFS: giant filename in readdir (len 0x%x)\n",
3513 len);
3504 goto err_unmap; 3514 goto err_unmap;
3505 } 3515 }
3506 xlen = XDR_QUADLEN(len); 3516 xlen = XDR_QUADLEN(len);
@@ -3528,7 +3538,7 @@ short_pkt:
3528 entry[0] = entry[1] = 0; 3538 entry[0] = entry[1] = 0;
3529 /* truncate listing ? */ 3539 /* truncate listing ? */
3530 if (!nr) { 3540 if (!nr) {
3531 printk(KERN_NOTICE "NFS: readdir reply truncated!\n"); 3541 dprintk("NFS: readdir reply truncated!\n");
3532 entry[1] = 1; 3542 entry[1] = 1;
3533 } 3543 }
3534 goto out; 3544 goto out;
@@ -3554,13 +3564,13 @@ static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req)
3554 READ_BUF(4); 3564 READ_BUF(4);
3555 READ32(len); 3565 READ32(len);
3556 if (len >= rcvbuf->page_len || len <= 0) { 3566 if (len >= rcvbuf->page_len || len <= 0) {
3557 dprintk(KERN_WARNING "nfs: server returned giant symlink!\n"); 3567 dprintk("nfs: server returned giant symlink!\n");
3558 return -ENAMETOOLONG; 3568 return -ENAMETOOLONG;
3559 } 3569 }
3560 hdrlen = (char *) xdr->p - (char *) iov->iov_base; 3570 hdrlen = (char *) xdr->p - (char *) iov->iov_base;
3561 recvd = req->rq_rcv_buf.len - hdrlen; 3571 recvd = req->rq_rcv_buf.len - hdrlen;
3562 if (recvd < len) { 3572 if (recvd < len) {
3563 printk(KERN_WARNING "NFS: server cheating in readlink reply: " 3573 dprintk("NFS: server cheating in readlink reply: "
3564 "count %u > recvd %u\n", len, recvd); 3574 "count %u > recvd %u\n", len, recvd);
3565 return -EIO; 3575 return -EIO;
3566 } 3576 }
@@ -3643,7 +3653,7 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
3643 hdrlen = (u8 *)xdr->p - (u8 *)iov->iov_base; 3653 hdrlen = (u8 *)xdr->p - (u8 *)iov->iov_base;
3644 recvd = req->rq_rcv_buf.len - hdrlen; 3654 recvd = req->rq_rcv_buf.len - hdrlen;
3645 if (attrlen > recvd) { 3655 if (attrlen > recvd) {
3646 printk(KERN_WARNING "NFS: server cheating in getattr" 3656 dprintk("NFS: server cheating in getattr"
3647 " acl reply: attrlen %u > recvd %u\n", 3657 " acl reply: attrlen %u > recvd %u\n",
3648 attrlen, recvd); 3658 attrlen, recvd);
3649 return -EINVAL; 3659 return -EINVAL;
@@ -3688,8 +3698,7 @@ static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp)
3688 READ_BUF(8); 3698 READ_BUF(8);
3689 READ32(opnum); 3699 READ32(opnum);
3690 if (opnum != OP_SETCLIENTID) { 3700 if (opnum != OP_SETCLIENTID) {
3691 printk(KERN_NOTICE 3701 dprintk("nfs: decode_setclientid: Server returned operation"
3692 "nfs4_decode_setclientid: Server returned operation"
3693 " %d\n", opnum); 3702 " %d\n", opnum);
3694 return -EIO; 3703 return -EIO;
3695 } 3704 }
@@ -3783,8 +3792,13 @@ static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_ac
3783 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 3792 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
3784 if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) 3793 if ((status = decode_compound_hdr(&xdr, &hdr)) != 0)
3785 goto out; 3794 goto out;
3786 if ((status = decode_putfh(&xdr)) == 0) 3795 status = decode_putfh(&xdr);
3787 status = decode_access(&xdr, res); 3796 if (status != 0)
3797 goto out;
3798 status = decode_access(&xdr, res);
3799 if (status != 0)
3800 goto out;
3801 decode_getfattr(&xdr, res->fattr, res->server);
3788out: 3802out:
3789 return status; 3803 return status;
3790} 3804}
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 3490322d1145..e87b44ee9ac9 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -76,6 +76,7 @@
76#include <linux/fs.h> 76#include <linux/fs.h>
77#include <linux/init.h> 77#include <linux/init.h>
78#include <linux/sunrpc/clnt.h> 78#include <linux/sunrpc/clnt.h>
79#include <linux/sunrpc/xprtsock.h>
79#include <linux/nfs.h> 80#include <linux/nfs.h>
80#include <linux/nfs_fs.h> 81#include <linux/nfs_fs.h>
81#include <linux/nfs_mount.h> 82#include <linux/nfs_mount.h>
@@ -491,7 +492,7 @@ static int __init root_nfs_get_handle(void)
491 struct sockaddr_in sin; 492 struct sockaddr_in sin;
492 int status; 493 int status;
493 int protocol = (nfs_data.flags & NFS_MOUNT_TCP) ? 494 int protocol = (nfs_data.flags & NFS_MOUNT_TCP) ?
494 IPPROTO_TCP : IPPROTO_UDP; 495 XPRT_TRANSPORT_TCP : XPRT_TRANSPORT_UDP;
495 int version = (nfs_data.flags & NFS_MOUNT_VER3) ? 496 int version = (nfs_data.flags & NFS_MOUNT_VER3) ?
496 NFS_MNT3_VERSION : NFS_MNT_VERSION; 497 NFS_MNT3_VERSION : NFS_MNT_VERSION;
497 498
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 845cdde1d8b7..97669ed05500 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -476,6 +476,8 @@ nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
476 dprintk("NFS call readdir %d\n", (unsigned int)cookie); 476 dprintk("NFS call readdir %d\n", (unsigned int)cookie);
477 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); 477 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
478 478
479 nfs_invalidate_atime(dir);
480
479 dprintk("NFS reply readdir: %d\n", status); 481 dprintk("NFS reply readdir: %d\n", status);
480 return status; 482 return status;
481} 483}
@@ -550,6 +552,7 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
550 552
551static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data) 553static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data)
552{ 554{
555 nfs_invalidate_atime(data->inode);
553 if (task->tk_status >= 0) { 556 if (task->tk_status >= 0) {
554 nfs_refresh_inode(data->inode, data->res.fattr); 557 nfs_refresh_inode(data->inode, data->res.fattr);
555 /* Emulate the eof flag, which isn't normally needed in NFSv2 558 /* Emulate the eof flag, which isn't normally needed in NFSv2
@@ -576,7 +579,7 @@ static void nfs_proc_read_setup(struct nfs_read_data *data)
576static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data) 579static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data)
577{ 580{
578 if (task->tk_status >= 0) 581 if (task->tk_status >= 0)
579 nfs_post_op_update_inode(data->inode, data->res.fattr); 582 nfs_post_op_update_inode_force_wcc(data->inode, data->res.fattr);
580 return 0; 583 return 0;
581} 584}
582 585
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 19e05633f4e3..4587a86adaac 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -341,9 +341,6 @@ int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
341 set_bit(NFS_INO_STALE, &NFS_FLAGS(data->inode)); 341 set_bit(NFS_INO_STALE, &NFS_FLAGS(data->inode));
342 nfs_mark_for_revalidate(data->inode); 342 nfs_mark_for_revalidate(data->inode);
343 } 343 }
344 spin_lock(&data->inode->i_lock);
345 NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME;
346 spin_unlock(&data->inode->i_lock);
347 return 0; 344 return 0;
348} 345}
349 346
@@ -497,8 +494,7 @@ int nfs_readpage(struct file *file, struct page *page)
497 if (ctx == NULL) 494 if (ctx == NULL)
498 goto out_unlock; 495 goto out_unlock;
499 } else 496 } else
500 ctx = get_nfs_open_context((struct nfs_open_context *) 497 ctx = get_nfs_open_context(nfs_file_open_context(file));
501 file->private_data);
502 498
503 error = nfs_readpage_async(ctx, inode, page); 499 error = nfs_readpage_async(ctx, inode, page);
504 500
@@ -576,8 +572,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
576 if (desc.ctx == NULL) 572 if (desc.ctx == NULL)
577 return -EBADF; 573 return -EBADF;
578 } else 574 } else
579 desc.ctx = get_nfs_open_context((struct nfs_open_context *) 575 desc.ctx = get_nfs_open_context(nfs_file_open_context(filp));
580 filp->private_data);
581 if (rsize < PAGE_CACHE_SIZE) 576 if (rsize < PAGE_CACHE_SIZE)
582 nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); 577 nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0);
583 else 578 else
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index b878528b64c1..fa517ae9207f 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -33,6 +33,8 @@
33#include <linux/sunrpc/clnt.h> 33#include <linux/sunrpc/clnt.h>
34#include <linux/sunrpc/stats.h> 34#include <linux/sunrpc/stats.h>
35#include <linux/sunrpc/metrics.h> 35#include <linux/sunrpc/metrics.h>
36#include <linux/sunrpc/xprtsock.h>
37#include <linux/sunrpc/xprtrdma.h>
36#include <linux/nfs_fs.h> 38#include <linux/nfs_fs.h>
37#include <linux/nfs_mount.h> 39#include <linux/nfs_mount.h>
38#include <linux/nfs4_mount.h> 40#include <linux/nfs4_mount.h>
@@ -58,36 +60,6 @@
58 60
59#define NFSDBG_FACILITY NFSDBG_VFS 61#define NFSDBG_FACILITY NFSDBG_VFS
60 62
61
62struct nfs_parsed_mount_data {
63 int flags;
64 int rsize, wsize;
65 int timeo, retrans;
66 int acregmin, acregmax,
67 acdirmin, acdirmax;
68 int namlen;
69 unsigned int bsize;
70 unsigned int auth_flavor_len;
71 rpc_authflavor_t auth_flavors[1];
72 char *client_address;
73
74 struct {
75 struct sockaddr_in address;
76 unsigned int program;
77 unsigned int version;
78 unsigned short port;
79 int protocol;
80 } mount_server;
81
82 struct {
83 struct sockaddr_in address;
84 char *hostname;
85 char *export_path;
86 unsigned int program;
87 int protocol;
88 } nfs_server;
89};
90
91enum { 63enum {
92 /* Mount options that take no arguments */ 64 /* Mount options that take no arguments */
93 Opt_soft, Opt_hard, 65 Opt_soft, Opt_hard,
@@ -97,7 +69,7 @@ enum {
97 Opt_ac, Opt_noac, 69 Opt_ac, Opt_noac,
98 Opt_lock, Opt_nolock, 70 Opt_lock, Opt_nolock,
99 Opt_v2, Opt_v3, 71 Opt_v2, Opt_v3,
100 Opt_udp, Opt_tcp, 72 Opt_udp, Opt_tcp, Opt_rdma,
101 Opt_acl, Opt_noacl, 73 Opt_acl, Opt_noacl,
102 Opt_rdirplus, Opt_nordirplus, 74 Opt_rdirplus, Opt_nordirplus,
103 Opt_sharecache, Opt_nosharecache, 75 Opt_sharecache, Opt_nosharecache,
@@ -116,7 +88,7 @@ enum {
116 88
117 /* Mount options that take string arguments */ 89 /* Mount options that take string arguments */
118 Opt_sec, Opt_proto, Opt_mountproto, 90 Opt_sec, Opt_proto, Opt_mountproto,
119 Opt_addr, Opt_mounthost, Opt_clientaddr, 91 Opt_addr, Opt_mountaddr, Opt_clientaddr,
120 92
121 /* Mount options that are ignored */ 93 /* Mount options that are ignored */
122 Opt_userspace, Opt_deprecated, 94 Opt_userspace, Opt_deprecated,
@@ -143,6 +115,7 @@ static match_table_t nfs_mount_option_tokens = {
143 { Opt_v3, "v3" }, 115 { Opt_v3, "v3" },
144 { Opt_udp, "udp" }, 116 { Opt_udp, "udp" },
145 { Opt_tcp, "tcp" }, 117 { Opt_tcp, "tcp" },
118 { Opt_rdma, "rdma" },
146 { Opt_acl, "acl" }, 119 { Opt_acl, "acl" },
147 { Opt_noacl, "noacl" }, 120 { Opt_noacl, "noacl" },
148 { Opt_rdirplus, "rdirplus" }, 121 { Opt_rdirplus, "rdirplus" },
@@ -175,13 +148,14 @@ static match_table_t nfs_mount_option_tokens = {
175 { Opt_mountproto, "mountproto=%s" }, 148 { Opt_mountproto, "mountproto=%s" },
176 { Opt_addr, "addr=%s" }, 149 { Opt_addr, "addr=%s" },
177 { Opt_clientaddr, "clientaddr=%s" }, 150 { Opt_clientaddr, "clientaddr=%s" },
178 { Opt_mounthost, "mounthost=%s" }, 151 { Opt_userspace, "mounthost=%s" },
152 { Opt_mountaddr, "mountaddr=%s" },
179 153
180 { Opt_err, NULL } 154 { Opt_err, NULL }
181}; 155};
182 156
183enum { 157enum {
184 Opt_xprt_udp, Opt_xprt_tcp, 158 Opt_xprt_udp, Opt_xprt_tcp, Opt_xprt_rdma,
185 159
186 Opt_xprt_err 160 Opt_xprt_err
187}; 161};
@@ -189,6 +163,7 @@ enum {
189static match_table_t nfs_xprt_protocol_tokens = { 163static match_table_t nfs_xprt_protocol_tokens = {
190 { Opt_xprt_udp, "udp" }, 164 { Opt_xprt_udp, "udp" },
191 { Opt_xprt_tcp, "tcp" }, 165 { Opt_xprt_tcp, "tcp" },
166 { Opt_xprt_rdma, "rdma" },
192 167
193 { Opt_xprt_err, NULL } 168 { Opt_xprt_err, NULL }
194}; 169};
@@ -449,7 +424,7 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
449 const char *nostr; 424 const char *nostr;
450 } nfs_info[] = { 425 } nfs_info[] = {
451 { NFS_MOUNT_SOFT, ",soft", ",hard" }, 426 { NFS_MOUNT_SOFT, ",soft", ",hard" },
452 { NFS_MOUNT_INTR, ",intr", "" }, 427 { NFS_MOUNT_INTR, ",intr", ",nointr" },
453 { NFS_MOUNT_NOCTO, ",nocto", "" }, 428 { NFS_MOUNT_NOCTO, ",nocto", "" },
454 { NFS_MOUNT_NOAC, ",noac", "" }, 429 { NFS_MOUNT_NOAC, ",noac", "" },
455 { NFS_MOUNT_NONLM, ",nolock", "" }, 430 { NFS_MOUNT_NONLM, ",nolock", "" },
@@ -460,8 +435,6 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
460 }; 435 };
461 const struct proc_nfs_info *nfs_infop; 436 const struct proc_nfs_info *nfs_infop;
462 struct nfs_client *clp = nfss->nfs_client; 437 struct nfs_client *clp = nfss->nfs_client;
463 char buf[12];
464 const char *proto;
465 438
466 seq_printf(m, ",vers=%d", clp->rpc_ops->version); 439 seq_printf(m, ",vers=%d", clp->rpc_ops->version);
467 seq_printf(m, ",rsize=%d", nfss->rsize); 440 seq_printf(m, ",rsize=%d", nfss->rsize);
@@ -480,18 +453,8 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
480 else 453 else
481 seq_puts(m, nfs_infop->nostr); 454 seq_puts(m, nfs_infop->nostr);
482 } 455 }
483 switch (nfss->client->cl_xprt->prot) { 456 seq_printf(m, ",proto=%s",
484 case IPPROTO_TCP: 457 rpc_peeraddr2str(nfss->client, RPC_DISPLAY_PROTO));
485 proto = "tcp";
486 break;
487 case IPPROTO_UDP:
488 proto = "udp";
489 break;
490 default:
491 snprintf(buf, sizeof(buf), "%u", nfss->client->cl_xprt->prot);
492 proto = buf;
493 }
494 seq_printf(m, ",proto=%s", proto);
495 seq_printf(m, ",timeo=%lu", 10U * clp->retrans_timeo / HZ); 458 seq_printf(m, ",timeo=%lu", 10U * clp->retrans_timeo / HZ);
496 seq_printf(m, ",retrans=%u", clp->retrans_count); 459 seq_printf(m, ",retrans=%u", clp->retrans_count);
497 seq_printf(m, ",sec=%s", nfs_pseudoflavour_to_name(nfss->client->cl_auth->au_flavor)); 460 seq_printf(m, ",sec=%s", nfs_pseudoflavour_to_name(nfss->client->cl_auth->au_flavor));
@@ -506,8 +469,8 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
506 469
507 nfs_show_mount_options(m, nfss, 0); 470 nfs_show_mount_options(m, nfss, 0);
508 471
509 seq_puts(m, ",addr="); 472 seq_printf(m, ",addr="NIPQUAD_FMT,
510 seq_escape(m, nfss->nfs_client->cl_hostname, " \t\n\\"); 473 NIPQUAD(nfss->nfs_client->cl_addr.sin_addr));
511 474
512 return 0; 475 return 0;
513} 476}
@@ -698,13 +661,19 @@ static int nfs_parse_mount_options(char *raw,
698 break; 661 break;
699 case Opt_udp: 662 case Opt_udp:
700 mnt->flags &= ~NFS_MOUNT_TCP; 663 mnt->flags &= ~NFS_MOUNT_TCP;
701 mnt->nfs_server.protocol = IPPROTO_UDP; 664 mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP;
702 mnt->timeo = 7; 665 mnt->timeo = 7;
703 mnt->retrans = 5; 666 mnt->retrans = 5;
704 break; 667 break;
705 case Opt_tcp: 668 case Opt_tcp:
706 mnt->flags |= NFS_MOUNT_TCP; 669 mnt->flags |= NFS_MOUNT_TCP;
707 mnt->nfs_server.protocol = IPPROTO_TCP; 670 mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP;
671 mnt->timeo = 600;
672 mnt->retrans = 2;
673 break;
674 case Opt_rdma:
675 mnt->flags |= NFS_MOUNT_TCP; /* for side protocols */
676 mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA;
708 mnt->timeo = 600; 677 mnt->timeo = 600;
709 mnt->retrans = 2; 678 mnt->retrans = 2;
710 break; 679 break;
@@ -913,13 +882,20 @@ static int nfs_parse_mount_options(char *raw,
913 switch (token) { 882 switch (token) {
914 case Opt_xprt_udp: 883 case Opt_xprt_udp:
915 mnt->flags &= ~NFS_MOUNT_TCP; 884 mnt->flags &= ~NFS_MOUNT_TCP;
916 mnt->nfs_server.protocol = IPPROTO_UDP; 885 mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP;
917 mnt->timeo = 7; 886 mnt->timeo = 7;
918 mnt->retrans = 5; 887 mnt->retrans = 5;
919 break; 888 break;
920 case Opt_xprt_tcp: 889 case Opt_xprt_tcp:
921 mnt->flags |= NFS_MOUNT_TCP; 890 mnt->flags |= NFS_MOUNT_TCP;
922 mnt->nfs_server.protocol = IPPROTO_TCP; 891 mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP;
892 mnt->timeo = 600;
893 mnt->retrans = 2;
894 break;
895 case Opt_xprt_rdma:
896 /* vector side protocols to TCP */
897 mnt->flags |= NFS_MOUNT_TCP;
898 mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA;
923 mnt->timeo = 600; 899 mnt->timeo = 600;
924 mnt->retrans = 2; 900 mnt->retrans = 2;
925 break; 901 break;
@@ -937,11 +913,12 @@ static int nfs_parse_mount_options(char *raw,
937 913
938 switch (token) { 914 switch (token) {
939 case Opt_xprt_udp: 915 case Opt_xprt_udp:
940 mnt->mount_server.protocol = IPPROTO_UDP; 916 mnt->mount_server.protocol = XPRT_TRANSPORT_UDP;
941 break; 917 break;
942 case Opt_xprt_tcp: 918 case Opt_xprt_tcp:
943 mnt->mount_server.protocol = IPPROTO_TCP; 919 mnt->mount_server.protocol = XPRT_TRANSPORT_TCP;
944 break; 920 break;
921 case Opt_xprt_rdma: /* not used for side protocols */
945 default: 922 default:
946 goto out_unrec_xprt; 923 goto out_unrec_xprt;
947 } 924 }
@@ -961,7 +938,7 @@ static int nfs_parse_mount_options(char *raw,
961 goto out_nomem; 938 goto out_nomem;
962 mnt->client_address = string; 939 mnt->client_address = string;
963 break; 940 break;
964 case Opt_mounthost: 941 case Opt_mountaddr:
965 string = match_strdup(args); 942 string = match_strdup(args);
966 if (string == NULL) 943 if (string == NULL)
967 goto out_nomem; 944 goto out_nomem;
@@ -1027,16 +1004,10 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args,
1027 sin = args->mount_server.address; 1004 sin = args->mount_server.address;
1028 else 1005 else
1029 sin = args->nfs_server.address; 1006 sin = args->nfs_server.address;
1030 if (args->mount_server.port == 0) { 1007 /*
1031 status = rpcb_getport_sync(&sin, 1008 * autobind will be used if mount_server.port == 0
1032 args->mount_server.program, 1009 */
1033 args->mount_server.version, 1010 sin.sin_port = htons(args->mount_server.port);
1034 args->mount_server.protocol);
1035 if (status < 0)
1036 goto out_err;
1037 sin.sin_port = htons(status);
1038 } else
1039 sin.sin_port = htons(args->mount_server.port);
1040 1011
1041 /* 1012 /*
1042 * Now ask the mount server to map our export path 1013 * Now ask the mount server to map our export path
@@ -1049,14 +1020,11 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args,
1049 args->mount_server.version, 1020 args->mount_server.version,
1050 args->mount_server.protocol, 1021 args->mount_server.protocol,
1051 root_fh); 1022 root_fh);
1052 if (status < 0) 1023 if (status == 0)
1053 goto out_err; 1024 return 0;
1054
1055 return status;
1056 1025
1057out_err: 1026 dfprintk(MOUNT, "NFS: unable to mount server " NIPQUAD_FMT
1058 dfprintk(MOUNT, "NFS: unable to contact server on host " 1027 ", error %d\n", NIPQUAD(sin.sin_addr.s_addr), status);
1059 NIPQUAD_FMT "\n", NIPQUAD(sin.sin_addr.s_addr));
1060 return status; 1028 return status;
1061} 1029}
1062 1030
@@ -1079,15 +1047,31 @@ out_err:
1079 * XXX: as far as I can tell, changing the NFS program number is not 1047 * XXX: as far as I can tell, changing the NFS program number is not
1080 * supported in the NFS client. 1048 * supported in the NFS client.
1081 */ 1049 */
1082static int nfs_validate_mount_data(struct nfs_mount_data **options, 1050static int nfs_validate_mount_data(void *options,
1051 struct nfs_parsed_mount_data *args,
1083 struct nfs_fh *mntfh, 1052 struct nfs_fh *mntfh,
1084 const char *dev_name) 1053 const char *dev_name)
1085{ 1054{
1086 struct nfs_mount_data *data = *options; 1055 struct nfs_mount_data *data = (struct nfs_mount_data *)options;
1087 1056
1088 if (data == NULL) 1057 if (data == NULL)
1089 goto out_no_data; 1058 goto out_no_data;
1090 1059
1060 memset(args, 0, sizeof(*args));
1061 args->flags = (NFS_MOUNT_VER3 | NFS_MOUNT_TCP);
1062 args->rsize = NFS_MAX_FILE_IO_SIZE;
1063 args->wsize = NFS_MAX_FILE_IO_SIZE;
1064 args->timeo = 600;
1065 args->retrans = 2;
1066 args->acregmin = 3;
1067 args->acregmax = 60;
1068 args->acdirmin = 30;
1069 args->acdirmax = 60;
1070 args->mount_server.protocol = XPRT_TRANSPORT_UDP;
1071 args->mount_server.program = NFS_MNT_PROGRAM;
1072 args->nfs_server.protocol = XPRT_TRANSPORT_TCP;
1073 args->nfs_server.program = NFS_PROGRAM;
1074
1091 switch (data->version) { 1075 switch (data->version) {
1092 case 1: 1076 case 1:
1093 data->namlen = 0; 1077 data->namlen = 0;
@@ -1116,92 +1100,73 @@ static int nfs_validate_mount_data(struct nfs_mount_data **options,
1116 if (mntfh->size < sizeof(mntfh->data)) 1100 if (mntfh->size < sizeof(mntfh->data))
1117 memset(mntfh->data + mntfh->size, 0, 1101 memset(mntfh->data + mntfh->size, 0,
1118 sizeof(mntfh->data) - mntfh->size); 1102 sizeof(mntfh->data) - mntfh->size);
1103
1104 if (!nfs_verify_server_address((struct sockaddr *) &data->addr))
1105 goto out_no_address;
1106
1107 /*
1108 * Translate to nfs_parsed_mount_data, which nfs_fill_super
1109 * can deal with.
1110 */
1111 args->flags = data->flags;
1112 args->rsize = data->rsize;
1113 args->wsize = data->wsize;
1114 args->flags = data->flags;
1115 args->timeo = data->timeo;
1116 args->retrans = data->retrans;
1117 args->acregmin = data->acregmin;
1118 args->acregmax = data->acregmax;
1119 args->acdirmin = data->acdirmin;
1120 args->acdirmax = data->acdirmax;
1121 args->nfs_server.address = data->addr;
1122 if (!(data->flags & NFS_MOUNT_TCP))
1123 args->nfs_server.protocol = XPRT_TRANSPORT_UDP;
1124 /* N.B. caller will free nfs_server.hostname in all cases */
1125 args->nfs_server.hostname = kstrdup(data->hostname, GFP_KERNEL);
1126 args->namlen = data->namlen;
1127 args->bsize = data->bsize;
1128 args->auth_flavors[0] = data->pseudoflavor;
1119 break; 1129 break;
1120 default: { 1130 default: {
1121 unsigned int len; 1131 unsigned int len;
1122 char *c; 1132 char *c;
1123 int status; 1133 int status;
1124 struct nfs_parsed_mount_data args = {
1125 .flags = (NFS_MOUNT_VER3 | NFS_MOUNT_TCP),
1126 .rsize = NFS_MAX_FILE_IO_SIZE,
1127 .wsize = NFS_MAX_FILE_IO_SIZE,
1128 .timeo = 600,
1129 .retrans = 2,
1130 .acregmin = 3,
1131 .acregmax = 60,
1132 .acdirmin = 30,
1133 .acdirmax = 60,
1134 .mount_server.protocol = IPPROTO_UDP,
1135 .mount_server.program = NFS_MNT_PROGRAM,
1136 .nfs_server.protocol = IPPROTO_TCP,
1137 .nfs_server.program = NFS_PROGRAM,
1138 };
1139
1140 if (nfs_parse_mount_options((char *) *options, &args) == 0)
1141 return -EINVAL;
1142 1134
1143 data = kzalloc(sizeof(*data), GFP_KERNEL); 1135 if (nfs_parse_mount_options((char *)options, args) == 0)
1144 if (data == NULL) 1136 return -EINVAL;
1145 return -ENOMEM;
1146 1137
1147 /* 1138 if (!nfs_verify_server_address((struct sockaddr *)
1148 * NB: after this point, caller will free "data" 1139 &args->nfs_server.address))
1149 * if we return an error 1140 goto out_no_address;
1150 */
1151 *options = data;
1152 1141
1153 c = strchr(dev_name, ':'); 1142 c = strchr(dev_name, ':');
1154 if (c == NULL) 1143 if (c == NULL)
1155 return -EINVAL; 1144 return -EINVAL;
1156 len = c - dev_name; 1145 len = c - dev_name;
1157 if (len > sizeof(data->hostname)) 1146 /* N.B. caller will free nfs_server.hostname in all cases */
1158 return -ENAMETOOLONG; 1147 args->nfs_server.hostname = kstrndup(dev_name, len, GFP_KERNEL);
1159 strncpy(data->hostname, dev_name, len);
1160 args.nfs_server.hostname = data->hostname;
1161 1148
1162 c++; 1149 c++;
1163 if (strlen(c) > NFS_MAXPATHLEN) 1150 if (strlen(c) > NFS_MAXPATHLEN)
1164 return -ENAMETOOLONG; 1151 return -ENAMETOOLONG;
1165 args.nfs_server.export_path = c; 1152 args->nfs_server.export_path = c;
1166 1153
1167 status = nfs_try_mount(&args, mntfh); 1154 status = nfs_try_mount(args, mntfh);
1168 if (status) 1155 if (status)
1169 return status; 1156 return status;
1170 1157
1171 /*
1172 * Translate to nfs_mount_data, which nfs_fill_super
1173 * can deal with.
1174 */
1175 data->version = 6;
1176 data->flags = args.flags;
1177 data->rsize = args.rsize;
1178 data->wsize = args.wsize;
1179 data->timeo = args.timeo;
1180 data->retrans = args.retrans;
1181 data->acregmin = args.acregmin;
1182 data->acregmax = args.acregmax;
1183 data->acdirmin = args.acdirmin;
1184 data->acdirmax = args.acdirmax;
1185 data->addr = args.nfs_server.address;
1186 data->namlen = args.namlen;
1187 data->bsize = args.bsize;
1188 data->pseudoflavor = args.auth_flavors[0];
1189
1190 break; 1158 break;
1191 } 1159 }
1192 } 1160 }
1193 1161
1194 if (!(data->flags & NFS_MOUNT_SECFLAVOUR)) 1162 if (!(args->flags & NFS_MOUNT_SECFLAVOUR))
1195 data->pseudoflavor = RPC_AUTH_UNIX; 1163 args->auth_flavors[0] = RPC_AUTH_UNIX;
1196 1164
1197#ifndef CONFIG_NFS_V3 1165#ifndef CONFIG_NFS_V3
1198 if (data->flags & NFS_MOUNT_VER3) 1166 if (args->flags & NFS_MOUNT_VER3)
1199 goto out_v3_not_compiled; 1167 goto out_v3_not_compiled;
1200#endif /* !CONFIG_NFS_V3 */ 1168#endif /* !CONFIG_NFS_V3 */
1201 1169
1202 if (!nfs_verify_server_address((struct sockaddr *) &data->addr))
1203 goto out_no_address;
1204
1205 return 0; 1170 return 0;
1206 1171
1207out_no_data: 1172out_no_data:
@@ -1258,7 +1223,8 @@ static inline void nfs_initialise_sb(struct super_block *sb)
1258/* 1223/*
1259 * Finish setting up an NFS2/3 superblock 1224 * Finish setting up an NFS2/3 superblock
1260 */ 1225 */
1261static void nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data) 1226static void nfs_fill_super(struct super_block *sb,
1227 struct nfs_parsed_mount_data *data)
1262{ 1228{
1263 struct nfs_server *server = NFS_SB(sb); 1229 struct nfs_server *server = NFS_SB(sb);
1264 1230
@@ -1379,7 +1345,7 @@ static int nfs_get_sb(struct file_system_type *fs_type,
1379 struct nfs_server *server = NULL; 1345 struct nfs_server *server = NULL;
1380 struct super_block *s; 1346 struct super_block *s;
1381 struct nfs_fh mntfh; 1347 struct nfs_fh mntfh;
1382 struct nfs_mount_data *data = raw_data; 1348 struct nfs_parsed_mount_data data;
1383 struct dentry *mntroot; 1349 struct dentry *mntroot;
1384 int (*compare_super)(struct super_block *, void *) = nfs_compare_super; 1350 int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
1385 struct nfs_sb_mountdata sb_mntdata = { 1351 struct nfs_sb_mountdata sb_mntdata = {
@@ -1388,12 +1354,12 @@ static int nfs_get_sb(struct file_system_type *fs_type,
1388 int error; 1354 int error;
1389 1355
1390 /* Validate the mount data */ 1356 /* Validate the mount data */
1391 error = nfs_validate_mount_data(&data, &mntfh, dev_name); 1357 error = nfs_validate_mount_data(raw_data, &data, &mntfh, dev_name);
1392 if (error < 0) 1358 if (error < 0)
1393 goto out; 1359 goto out;
1394 1360
1395 /* Get a volume representation */ 1361 /* Get a volume representation */
1396 server = nfs_create_server(data, &mntfh); 1362 server = nfs_create_server(&data, &mntfh);
1397 if (IS_ERR(server)) { 1363 if (IS_ERR(server)) {
1398 error = PTR_ERR(server); 1364 error = PTR_ERR(server);
1399 goto out; 1365 goto out;
@@ -1417,7 +1383,7 @@ static int nfs_get_sb(struct file_system_type *fs_type,
1417 1383
1418 if (!s->s_root) { 1384 if (!s->s_root) {
1419 /* initial superblock/root creation */ 1385 /* initial superblock/root creation */
1420 nfs_fill_super(s, data); 1386 nfs_fill_super(s, &data);
1421 } 1387 }
1422 1388
1423 mntroot = nfs_get_root(s, &mntfh); 1389 mntroot = nfs_get_root(s, &mntfh);
@@ -1432,8 +1398,7 @@ static int nfs_get_sb(struct file_system_type *fs_type,
1432 error = 0; 1398 error = 0;
1433 1399
1434out: 1400out:
1435 if (data != raw_data) 1401 kfree(data.nfs_server.hostname);
1436 kfree(data);
1437 return error; 1402 return error;
1438 1403
1439out_err_nosb: 1404out_err_nosb:
@@ -1559,38 +1524,49 @@ static void nfs4_fill_super(struct super_block *sb)
1559/* 1524/*
1560 * Validate NFSv4 mount options 1525 * Validate NFSv4 mount options
1561 */ 1526 */
1562static int nfs4_validate_mount_data(struct nfs4_mount_data **options, 1527static int nfs4_validate_mount_data(void *options,
1563 const char *dev_name, 1528 struct nfs_parsed_mount_data *args,
1564 struct sockaddr_in *addr, 1529 const char *dev_name)
1565 rpc_authflavor_t *authflavour,
1566 char **hostname,
1567 char **mntpath,
1568 char **ip_addr)
1569{ 1530{
1570 struct nfs4_mount_data *data = *options; 1531 struct nfs4_mount_data *data = (struct nfs4_mount_data *)options;
1571 char *c; 1532 char *c;
1572 1533
1573 if (data == NULL) 1534 if (data == NULL)
1574 goto out_no_data; 1535 goto out_no_data;
1575 1536
1537 memset(args, 0, sizeof(*args));
1538 args->rsize = NFS_MAX_FILE_IO_SIZE;
1539 args->wsize = NFS_MAX_FILE_IO_SIZE;
1540 args->timeo = 600;
1541 args->retrans = 2;
1542 args->acregmin = 3;
1543 args->acregmax = 60;
1544 args->acdirmin = 30;
1545 args->acdirmax = 60;
1546 args->nfs_server.protocol = XPRT_TRANSPORT_TCP;
1547
1576 switch (data->version) { 1548 switch (data->version) {
1577 case 1: 1549 case 1:
1578 if (data->host_addrlen != sizeof(*addr)) 1550 if (data->host_addrlen != sizeof(args->nfs_server.address))
1579 goto out_no_address; 1551 goto out_no_address;
1580 if (copy_from_user(addr, data->host_addr, sizeof(*addr))) 1552 if (copy_from_user(&args->nfs_server.address,
1553 data->host_addr,
1554 sizeof(args->nfs_server.address)))
1581 return -EFAULT; 1555 return -EFAULT;
1582 if (addr->sin_port == 0) 1556 if (args->nfs_server.address.sin_port == 0)
1583 addr->sin_port = htons(NFS_PORT); 1557 args->nfs_server.address.sin_port = htons(NFS_PORT);
1584 if (!nfs_verify_server_address((struct sockaddr *) addr)) 1558 if (!nfs_verify_server_address((struct sockaddr *)
1559 &args->nfs_server.address))
1585 goto out_no_address; 1560 goto out_no_address;
1586 1561
1587 switch (data->auth_flavourlen) { 1562 switch (data->auth_flavourlen) {
1588 case 0: 1563 case 0:
1589 *authflavour = RPC_AUTH_UNIX; 1564 args->auth_flavors[0] = RPC_AUTH_UNIX;
1590 break; 1565 break;
1591 case 1: 1566 case 1:
1592 if (copy_from_user(authflavour, data->auth_flavours, 1567 if (copy_from_user(&args->auth_flavors[0],
1593 sizeof(*authflavour))) 1568 data->auth_flavours,
1569 sizeof(args->auth_flavors[0])))
1594 return -EFAULT; 1570 return -EFAULT;
1595 break; 1571 break;
1596 default: 1572 default:
@@ -1600,75 +1576,57 @@ static int nfs4_validate_mount_data(struct nfs4_mount_data **options,
1600 c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN); 1576 c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN);
1601 if (IS_ERR(c)) 1577 if (IS_ERR(c))
1602 return PTR_ERR(c); 1578 return PTR_ERR(c);
1603 *hostname = c; 1579 args->nfs_server.hostname = c;
1604 1580
1605 c = strndup_user(data->mnt_path.data, NFS4_MAXPATHLEN); 1581 c = strndup_user(data->mnt_path.data, NFS4_MAXPATHLEN);
1606 if (IS_ERR(c)) 1582 if (IS_ERR(c))
1607 return PTR_ERR(c); 1583 return PTR_ERR(c);
1608 *mntpath = c; 1584 args->nfs_server.export_path = c;
1609 dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", *mntpath); 1585 dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", c);
1610 1586
1611 c = strndup_user(data->client_addr.data, 16); 1587 c = strndup_user(data->client_addr.data, 16);
1612 if (IS_ERR(c)) 1588 if (IS_ERR(c))
1613 return PTR_ERR(c); 1589 return PTR_ERR(c);
1614 *ip_addr = c; 1590 args->client_address = c;
1591
1592 /*
1593 * Translate to nfs_parsed_mount_data, which nfs4_fill_super
1594 * can deal with.
1595 */
1596
1597 args->flags = data->flags & NFS4_MOUNT_FLAGMASK;
1598 args->rsize = data->rsize;
1599 args->wsize = data->wsize;
1600 args->timeo = data->timeo;
1601 args->retrans = data->retrans;
1602 args->acregmin = data->acregmin;
1603 args->acregmax = data->acregmax;
1604 args->acdirmin = data->acdirmin;
1605 args->acdirmax = data->acdirmax;
1606 args->nfs_server.protocol = data->proto;
1615 1607
1616 break; 1608 break;
1617 default: { 1609 default: {
1618 unsigned int len; 1610 unsigned int len;
1619 struct nfs_parsed_mount_data args = { 1611
1620 .rsize = NFS_MAX_FILE_IO_SIZE, 1612 if (nfs_parse_mount_options((char *)options, args) == 0)
1621 .wsize = NFS_MAX_FILE_IO_SIZE,
1622 .timeo = 600,
1623 .retrans = 2,
1624 .acregmin = 3,
1625 .acregmax = 60,
1626 .acdirmin = 30,
1627 .acdirmax = 60,
1628 .nfs_server.protocol = IPPROTO_TCP,
1629 };
1630
1631 if (nfs_parse_mount_options((char *) *options, &args) == 0)
1632 return -EINVAL; 1613 return -EINVAL;
1633 1614
1634 if (!nfs_verify_server_address((struct sockaddr *) 1615 if (!nfs_verify_server_address((struct sockaddr *)
1635 &args.nfs_server.address)) 1616 &args->nfs_server.address))
1636 return -EINVAL; 1617 return -EINVAL;
1637 *addr = args.nfs_server.address;
1638 1618
1639 switch (args.auth_flavor_len) { 1619 switch (args->auth_flavor_len) {
1640 case 0: 1620 case 0:
1641 *authflavour = RPC_AUTH_UNIX; 1621 args->auth_flavors[0] = RPC_AUTH_UNIX;
1642 break; 1622 break;
1643 case 1: 1623 case 1:
1644 *authflavour = (rpc_authflavor_t) args.auth_flavors[0];
1645 break; 1624 break;
1646 default: 1625 default:
1647 goto out_inval_auth; 1626 goto out_inval_auth;
1648 } 1627 }
1649 1628
1650 /* 1629 /*
1651 * Translate to nfs4_mount_data, which nfs4_fill_super
1652 * can deal with.
1653 */
1654 data = kzalloc(sizeof(*data), GFP_KERNEL);
1655 if (data == NULL)
1656 return -ENOMEM;
1657 *options = data;
1658
1659 data->version = 1;
1660 data->flags = args.flags & NFS4_MOUNT_FLAGMASK;
1661 data->rsize = args.rsize;
1662 data->wsize = args.wsize;
1663 data->timeo = args.timeo;
1664 data->retrans = args.retrans;
1665 data->acregmin = args.acregmin;
1666 data->acregmax = args.acregmax;
1667 data->acdirmin = args.acdirmin;
1668 data->acdirmax = args.acdirmax;
1669 data->proto = args.nfs_server.protocol;
1670
1671 /*
1672 * Split "dev_name" into "hostname:mntpath". 1630 * Split "dev_name" into "hostname:mntpath".
1673 */ 1631 */
1674 c = strchr(dev_name, ':'); 1632 c = strchr(dev_name, ':');
@@ -1678,27 +1636,25 @@ static int nfs4_validate_mount_data(struct nfs4_mount_data **options,
1678 len = c - dev_name; 1636 len = c - dev_name;
1679 if (len > NFS4_MAXNAMLEN) 1637 if (len > NFS4_MAXNAMLEN)
1680 return -ENAMETOOLONG; 1638 return -ENAMETOOLONG;
1681 *hostname = kzalloc(len, GFP_KERNEL); 1639 args->nfs_server.hostname = kzalloc(len, GFP_KERNEL);
1682 if (*hostname == NULL) 1640 if (args->nfs_server.hostname == NULL)
1683 return -ENOMEM; 1641 return -ENOMEM;
1684 strncpy(*hostname, dev_name, len - 1); 1642 strncpy(args->nfs_server.hostname, dev_name, len - 1);
1685 1643
1686 c++; /* step over the ':' */ 1644 c++; /* step over the ':' */
1687 len = strlen(c); 1645 len = strlen(c);
1688 if (len > NFS4_MAXPATHLEN) 1646 if (len > NFS4_MAXPATHLEN)
1689 return -ENAMETOOLONG; 1647 return -ENAMETOOLONG;
1690 *mntpath = kzalloc(len + 1, GFP_KERNEL); 1648 args->nfs_server.export_path = kzalloc(len + 1, GFP_KERNEL);
1691 if (*mntpath == NULL) 1649 if (args->nfs_server.export_path == NULL)
1692 return -ENOMEM; 1650 return -ENOMEM;
1693 strncpy(*mntpath, c, len); 1651 strncpy(args->nfs_server.export_path, c, len);
1694 1652
1695 dprintk("MNTPATH: %s\n", *mntpath); 1653 dprintk("MNTPATH: %s\n", args->nfs_server.export_path);
1696 1654
1697 if (args.client_address == NULL) 1655 if (args->client_address == NULL)
1698 goto out_no_client_address; 1656 goto out_no_client_address;
1699 1657
1700 *ip_addr = args.client_address;
1701
1702 break; 1658 break;
1703 } 1659 }
1704 } 1660 }
@@ -1729,14 +1685,11 @@ out_no_client_address:
1729static int nfs4_get_sb(struct file_system_type *fs_type, 1685static int nfs4_get_sb(struct file_system_type *fs_type,
1730 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) 1686 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
1731{ 1687{
1732 struct nfs4_mount_data *data = raw_data; 1688 struct nfs_parsed_mount_data data;
1733 struct super_block *s; 1689 struct super_block *s;
1734 struct nfs_server *server; 1690 struct nfs_server *server;
1735 struct sockaddr_in addr;
1736 rpc_authflavor_t authflavour;
1737 struct nfs_fh mntfh; 1691 struct nfs_fh mntfh;
1738 struct dentry *mntroot; 1692 struct dentry *mntroot;
1739 char *mntpath = NULL, *hostname = NULL, *ip_addr = NULL;
1740 int (*compare_super)(struct super_block *, void *) = nfs_compare_super; 1693 int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
1741 struct nfs_sb_mountdata sb_mntdata = { 1694 struct nfs_sb_mountdata sb_mntdata = {
1742 .mntflags = flags, 1695 .mntflags = flags,
@@ -1744,14 +1697,12 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
1744 int error; 1697 int error;
1745 1698
1746 /* Validate the mount data */ 1699 /* Validate the mount data */
1747 error = nfs4_validate_mount_data(&data, dev_name, &addr, &authflavour, 1700 error = nfs4_validate_mount_data(raw_data, &data, dev_name);
1748 &hostname, &mntpath, &ip_addr);
1749 if (error < 0) 1701 if (error < 0)
1750 goto out; 1702 goto out;
1751 1703
1752 /* Get a volume representation */ 1704 /* Get a volume representation */
1753 server = nfs4_create_server(data, hostname, &addr, mntpath, ip_addr, 1705 server = nfs4_create_server(&data, &mntfh);
1754 authflavour, &mntfh);
1755 if (IS_ERR(server)) { 1706 if (IS_ERR(server)) {
1756 error = PTR_ERR(server); 1707 error = PTR_ERR(server);
1757 goto out; 1708 goto out;
@@ -1790,9 +1741,9 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
1790 error = 0; 1741 error = 0;
1791 1742
1792out: 1743out:
1793 kfree(ip_addr); 1744 kfree(data.client_address);
1794 kfree(mntpath); 1745 kfree(data.nfs_server.export_path);
1795 kfree(hostname); 1746 kfree(data.nfs_server.hostname);
1796 return error; 1747 return error;
1797 1748
1798out_free: 1749out_free:
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 045ab805c17f..1aed850d18f2 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -66,7 +66,6 @@ static void nfs_async_unlink_init(struct rpc_task *task, void *calldata)
66 .rpc_cred = data->cred, 66 .rpc_cred = data->cred,
67 }; 67 };
68 68
69 nfs_begin_data_update(dir);
70 NFS_PROTO(dir)->unlink_setup(&msg, dir); 69 NFS_PROTO(dir)->unlink_setup(&msg, dir);
71 rpc_call_setup(task, &msg, 0); 70 rpc_call_setup(task, &msg, 0);
72} 71}
@@ -84,8 +83,6 @@ static void nfs_async_unlink_done(struct rpc_task *task, void *calldata)
84 83
85 if (!NFS_PROTO(dir)->unlink_done(task, dir)) 84 if (!NFS_PROTO(dir)->unlink_done(task, dir))
86 rpc_restart_call(task); 85 rpc_restart_call(task);
87 else
88 nfs_end_data_update(dir);
89} 86}
90 87
91/** 88/**
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 0d7a77cc394b..e2bb66c34406 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -110,6 +110,13 @@ void nfs_writedata_release(void *wdata)
110 nfs_writedata_free(wdata); 110 nfs_writedata_free(wdata);
111} 111}
112 112
113static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
114{
115 ctx->error = error;
116 smp_wmb();
117 set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
118}
119
113static struct nfs_page *nfs_page_find_request_locked(struct page *page) 120static struct nfs_page *nfs_page_find_request_locked(struct page *page)
114{ 121{
115 struct nfs_page *req = NULL; 122 struct nfs_page *req = NULL;
@@ -243,10 +250,7 @@ static void nfs_end_page_writeback(struct page *page)
243 250
244/* 251/*
245 * Find an associated nfs write request, and prepare to flush it out 252 * Find an associated nfs write request, and prepare to flush it out
246 * Returns 1 if there was no write request, or if the request was 253 * May return an error if the user signalled nfs_wait_on_request().
247 * already tagged by nfs_set_page_dirty.Returns 0 if the request
248 * was not tagged.
249 * May also return an error if the user signalled nfs_wait_on_request().
250 */ 254 */
251static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, 255static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
252 struct page *page) 256 struct page *page)
@@ -261,7 +265,7 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
261 req = nfs_page_find_request_locked(page); 265 req = nfs_page_find_request_locked(page);
262 if (req == NULL) { 266 if (req == NULL) {
263 spin_unlock(&inode->i_lock); 267 spin_unlock(&inode->i_lock);
264 return 1; 268 return 0;
265 } 269 }
266 if (nfs_lock_request_dontget(req)) 270 if (nfs_lock_request_dontget(req))
267 break; 271 break;
@@ -282,7 +286,7 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
282 spin_unlock(&inode->i_lock); 286 spin_unlock(&inode->i_lock);
283 nfs_unlock_request(req); 287 nfs_unlock_request(req);
284 nfs_pageio_complete(pgio); 288 nfs_pageio_complete(pgio);
285 return 1; 289 return 0;
286 } 290 }
287 if (nfs_set_page_writeback(page) != 0) { 291 if (nfs_set_page_writeback(page) != 0) {
288 spin_unlock(&inode->i_lock); 292 spin_unlock(&inode->i_lock);
@@ -290,70 +294,56 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
290 } 294 }
291 radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, 295 radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index,
292 NFS_PAGE_TAG_LOCKED); 296 NFS_PAGE_TAG_LOCKED);
293 ret = test_bit(PG_NEED_FLUSH, &req->wb_flags);
294 spin_unlock(&inode->i_lock); 297 spin_unlock(&inode->i_lock);
295 nfs_pageio_add_request(pgio, req); 298 nfs_pageio_add_request(pgio, req);
296 return ret; 299 return 0;
297} 300}
298 301
299/* 302static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio)
300 * Write an mmapped page to the server.
301 */
302static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc)
303{ 303{
304 struct nfs_pageio_descriptor mypgio, *pgio;
305 struct nfs_open_context *ctx;
306 struct inode *inode = page->mapping->host; 304 struct inode *inode = page->mapping->host;
307 unsigned offset;
308 int err;
309 305
310 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); 306 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
311 nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1); 307 nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1);
312 308
313 if (wbc->for_writepages)
314 pgio = wbc->fs_private;
315 else {
316 nfs_pageio_init_write(&mypgio, inode, wb_priority(wbc));
317 pgio = &mypgio;
318 }
319
320 nfs_pageio_cond_complete(pgio, page->index); 309 nfs_pageio_cond_complete(pgio, page->index);
310 return nfs_page_async_flush(pgio, page);
311}
321 312
322 err = nfs_page_async_flush(pgio, page); 313/*
323 if (err <= 0) 314 * Write an mmapped page to the server.
324 goto out; 315 */
325 err = 0; 316static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc)
326 offset = nfs_page_length(page); 317{
327 if (!offset) 318 struct nfs_pageio_descriptor pgio;
328 goto out; 319 int err;
329
330 nfs_pageio_cond_complete(pgio, page->index);
331 320
332 ctx = nfs_find_open_context(inode, NULL, FMODE_WRITE); 321 nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc));
333 if (ctx == NULL) { 322 err = nfs_do_writepage(page, wbc, &pgio);
334 err = -EBADF; 323 nfs_pageio_complete(&pgio);
335 goto out; 324 if (err < 0)
336 } 325 return err;
337 err = nfs_writepage_setup(ctx, page, 0, offset); 326 if (pgio.pg_error < 0)
338 put_nfs_open_context(ctx); 327 return pgio.pg_error;
339 if (err != 0) 328 return 0;
340 goto out;
341 err = nfs_page_async_flush(pgio, page);
342 if (err > 0)
343 err = 0;
344out:
345 if (!wbc->for_writepages)
346 nfs_pageio_complete(pgio);
347 return err;
348} 329}
349 330
350int nfs_writepage(struct page *page, struct writeback_control *wbc) 331int nfs_writepage(struct page *page, struct writeback_control *wbc)
351{ 332{
352 int err; 333 int ret;
334
335 ret = nfs_writepage_locked(page, wbc);
336 unlock_page(page);
337 return ret;
338}
339
340static int nfs_writepages_callback(struct page *page, struct writeback_control *wbc, void *data)
341{
342 int ret;
353 343
354 err = nfs_writepage_locked(page, wbc); 344 ret = nfs_do_writepage(page, wbc, data);
355 unlock_page(page); 345 unlock_page(page);
356 return err; 346 return ret;
357} 347}
358 348
359int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) 349int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
@@ -365,12 +355,11 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
365 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); 355 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
366 356
367 nfs_pageio_init_write(&pgio, inode, wb_priority(wbc)); 357 nfs_pageio_init_write(&pgio, inode, wb_priority(wbc));
368 wbc->fs_private = &pgio; 358 err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
369 err = generic_writepages(mapping, wbc);
370 nfs_pageio_complete(&pgio); 359 nfs_pageio_complete(&pgio);
371 if (err) 360 if (err < 0)
372 return err; 361 return err;
373 if (pgio.pg_error) 362 if (pgio.pg_error < 0)
374 return pgio.pg_error; 363 return pgio.pg_error;
375 return 0; 364 return 0;
376} 365}
@@ -389,14 +378,11 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
389 return error; 378 return error;
390 if (!nfsi->npages) { 379 if (!nfsi->npages) {
391 igrab(inode); 380 igrab(inode);
392 nfs_begin_data_update(inode);
393 if (nfs_have_delegation(inode, FMODE_WRITE)) 381 if (nfs_have_delegation(inode, FMODE_WRITE))
394 nfsi->change_attr++; 382 nfsi->change_attr++;
395 } 383 }
396 SetPagePrivate(req->wb_page); 384 SetPagePrivate(req->wb_page);
397 set_page_private(req->wb_page, (unsigned long)req); 385 set_page_private(req->wb_page, (unsigned long)req);
398 if (PageDirty(req->wb_page))
399 set_bit(PG_NEED_FLUSH, &req->wb_flags);
400 nfsi->npages++; 386 nfsi->npages++;
401 kref_get(&req->wb_kref); 387 kref_get(&req->wb_kref);
402 return 0; 388 return 0;
@@ -416,12 +402,9 @@ static void nfs_inode_remove_request(struct nfs_page *req)
416 set_page_private(req->wb_page, 0); 402 set_page_private(req->wb_page, 0);
417 ClearPagePrivate(req->wb_page); 403 ClearPagePrivate(req->wb_page);
418 radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index); 404 radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index);
419 if (test_and_clear_bit(PG_NEED_FLUSH, &req->wb_flags))
420 __set_page_dirty_nobuffers(req->wb_page);
421 nfsi->npages--; 405 nfsi->npages--;
422 if (!nfsi->npages) { 406 if (!nfsi->npages) {
423 spin_unlock(&inode->i_lock); 407 spin_unlock(&inode->i_lock);
424 nfs_end_data_update(inode);
425 iput(inode); 408 iput(inode);
426 } else 409 } else
427 spin_unlock(&inode->i_lock); 410 spin_unlock(&inode->i_lock);
@@ -682,7 +665,7 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
682 665
683int nfs_flush_incompatible(struct file *file, struct page *page) 666int nfs_flush_incompatible(struct file *file, struct page *page)
684{ 667{
685 struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; 668 struct nfs_open_context *ctx = nfs_file_open_context(file);
686 struct nfs_page *req; 669 struct nfs_page *req;
687 int do_flush, status; 670 int do_flush, status;
688 /* 671 /*
@@ -716,7 +699,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
716int nfs_updatepage(struct file *file, struct page *page, 699int nfs_updatepage(struct file *file, struct page *page,
717 unsigned int offset, unsigned int count) 700 unsigned int offset, unsigned int count)
718{ 701{
719 struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; 702 struct nfs_open_context *ctx = nfs_file_open_context(file);
720 struct inode *inode = page->mapping->host; 703 struct inode *inode = page->mapping->host;
721 int status = 0; 704 int status = 0;
722 705
@@ -967,7 +950,7 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
967 950
968 if (task->tk_status < 0) { 951 if (task->tk_status < 0) {
969 nfs_set_pageerror(page); 952 nfs_set_pageerror(page);
970 req->wb_context->error = task->tk_status; 953 nfs_context_set_write_error(req->wb_context, task->tk_status);
971 dprintk(", error = %d\n", task->tk_status); 954 dprintk(", error = %d\n", task->tk_status);
972 goto out; 955 goto out;
973 } 956 }
@@ -1030,7 +1013,7 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
1030 1013
1031 if (task->tk_status < 0) { 1014 if (task->tk_status < 0) {
1032 nfs_set_pageerror(page); 1015 nfs_set_pageerror(page);
1033 req->wb_context->error = task->tk_status; 1016 nfs_context_set_write_error(req->wb_context, task->tk_status);
1034 dprintk(", error = %d\n", task->tk_status); 1017 dprintk(", error = %d\n", task->tk_status);
1035 goto remove_request; 1018 goto remove_request;
1036 } 1019 }
@@ -1244,7 +1227,7 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
1244 req->wb_bytes, 1227 req->wb_bytes,
1245 (long long)req_offset(req)); 1228 (long long)req_offset(req));
1246 if (task->tk_status < 0) { 1229 if (task->tk_status < 0) {
1247 req->wb_context->error = task->tk_status; 1230 nfs_context_set_write_error(req->wb_context, task->tk_status);
1248 nfs_inode_remove_request(req); 1231 nfs_inode_remove_request(req);
1249 dprintk(", error = %d\n", task->tk_status); 1232 dprintk(", error = %d\n", task->tk_status);
1250 goto next; 1233 goto next;
@@ -1347,53 +1330,52 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
1347 return ret; 1330 return ret;
1348} 1331}
1349 1332
1350/* 1333static int __nfs_write_mapping(struct address_space *mapping, struct writeback_control *wbc, int how)
1351 * flush the inode to disk.
1352 */
1353int nfs_wb_all(struct inode *inode)
1354{ 1334{
1355 struct address_space *mapping = inode->i_mapping;
1356 struct writeback_control wbc = {
1357 .bdi = mapping->backing_dev_info,
1358 .sync_mode = WB_SYNC_ALL,
1359 .nr_to_write = LONG_MAX,
1360 .for_writepages = 1,
1361 .range_cyclic = 1,
1362 };
1363 int ret; 1335 int ret;
1364 1336
1365 ret = nfs_writepages(mapping, &wbc); 1337 ret = nfs_writepages(mapping, wbc);
1366 if (ret < 0) 1338 if (ret < 0)
1367 goto out; 1339 goto out;
1368 ret = nfs_sync_mapping_wait(mapping, &wbc, 0); 1340 ret = nfs_sync_mapping_wait(mapping, wbc, how);
1369 if (ret >= 0) 1341 if (ret < 0)
1370 return 0; 1342 goto out;
1343 return 0;
1371out: 1344out:
1372 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); 1345 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
1373 return ret; 1346 return ret;
1374} 1347}
1375 1348
1376int nfs_sync_mapping_range(struct address_space *mapping, loff_t range_start, loff_t range_end, int how) 1349/* Two pass sync: first using WB_SYNC_NONE, then WB_SYNC_ALL */
1350static int nfs_write_mapping(struct address_space *mapping, int how)
1377{ 1351{
1378 struct writeback_control wbc = { 1352 struct writeback_control wbc = {
1379 .bdi = mapping->backing_dev_info, 1353 .bdi = mapping->backing_dev_info,
1380 .sync_mode = WB_SYNC_ALL, 1354 .sync_mode = WB_SYNC_NONE,
1381 .nr_to_write = LONG_MAX, 1355 .nr_to_write = LONG_MAX,
1382 .range_start = range_start,
1383 .range_end = range_end,
1384 .for_writepages = 1, 1356 .for_writepages = 1,
1357 .range_cyclic = 1,
1385 }; 1358 };
1386 int ret; 1359 int ret;
1387 1360
1388 ret = nfs_writepages(mapping, &wbc); 1361 ret = __nfs_write_mapping(mapping, &wbc, how);
1389 if (ret < 0) 1362 if (ret < 0)
1390 goto out; 1363 return ret;
1391 ret = nfs_sync_mapping_wait(mapping, &wbc, how); 1364 wbc.sync_mode = WB_SYNC_ALL;
1392 if (ret >= 0) 1365 return __nfs_write_mapping(mapping, &wbc, how);
1393 return 0; 1366}
1394out: 1367
1395 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); 1368/*
1396 return ret; 1369 * flush the inode to disk.
1370 */
1371int nfs_wb_all(struct inode *inode)
1372{
1373 return nfs_write_mapping(inode->i_mapping, 0);
1374}
1375
1376int nfs_wb_nocommit(struct inode *inode)
1377{
1378 return nfs_write_mapping(inode->i_mapping, FLUSH_NOCOMMIT);
1397} 1379}
1398 1380
1399int nfs_wb_page_cancel(struct inode *inode, struct page *page) 1381int nfs_wb_page_cancel(struct inode *inode, struct page *page)
@@ -1477,35 +1459,6 @@ int nfs_wb_page(struct inode *inode, struct page* page)
1477 return nfs_wb_page_priority(inode, page, FLUSH_STABLE); 1459 return nfs_wb_page_priority(inode, page, FLUSH_STABLE);
1478} 1460}
1479 1461
1480int nfs_set_page_dirty(struct page *page)
1481{
1482 struct address_space *mapping = page->mapping;
1483 struct inode *inode;
1484 struct nfs_page *req;
1485 int ret;
1486
1487 if (!mapping)
1488 goto out_raced;
1489 inode = mapping->host;
1490 if (!inode)
1491 goto out_raced;
1492 spin_lock(&inode->i_lock);
1493 req = nfs_page_find_request_locked(page);
1494 if (req != NULL) {
1495 /* Mark any existing write requests for flushing */
1496 ret = !test_and_set_bit(PG_NEED_FLUSH, &req->wb_flags);
1497 spin_unlock(&inode->i_lock);
1498 nfs_release_request(req);
1499 return ret;
1500 }
1501 ret = __set_page_dirty_nobuffers(page);
1502 spin_unlock(&inode->i_lock);
1503 return ret;
1504out_raced:
1505 return !TestSetPageDirty(page);
1506}
1507
1508
1509int __init nfs_init_writepagecache(void) 1462int __init nfs_init_writepagecache(void)
1510{ 1463{
1511 nfs_wdata_cachep = kmem_cache_create("nfs_write_data", 1464 nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 10f6e7dcf633..2d116d2298f8 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -174,9 +174,6 @@ static __be32 *
174encode_fattr3(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, 174encode_fattr3(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp,
175 struct kstat *stat) 175 struct kstat *stat)
176{ 176{
177 struct dentry *dentry = fhp->fh_dentry;
178 struct timespec time;
179
180 *p++ = htonl(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]); 177 *p++ = htonl(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]);
181 *p++ = htonl((u32) stat->mode); 178 *p++ = htonl((u32) stat->mode);
182 *p++ = htonl((u32) stat->nlink); 179 *p++ = htonl((u32) stat->nlink);
@@ -191,10 +188,9 @@ encode_fattr3(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp,
191 *p++ = htonl((u32) MAJOR(stat->rdev)); 188 *p++ = htonl((u32) MAJOR(stat->rdev));
192 *p++ = htonl((u32) MINOR(stat->rdev)); 189 *p++ = htonl((u32) MINOR(stat->rdev));
193 p = encode_fsid(p, fhp); 190 p = encode_fsid(p, fhp);
194 p = xdr_encode_hyper(p, (u64) stat->ino); 191 p = xdr_encode_hyper(p, stat->ino);
195 p = encode_time3(p, &stat->atime); 192 p = encode_time3(p, &stat->atime);
196 lease_get_mtime(dentry->d_inode, &time); 193 p = encode_time3(p, &stat->mtime);
197 p = encode_time3(p, &time);
198 p = encode_time3(p, &stat->ctime); 194 p = encode_time3(p, &stat->ctime);
199 195
200 return p; 196 return p;
@@ -203,31 +199,9 @@ encode_fattr3(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp,
203static __be32 * 199static __be32 *
204encode_saved_post_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp) 200encode_saved_post_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp)
205{ 201{
206 struct inode *inode = fhp->fh_dentry->d_inode;
207
208 /* Attributes to follow */ 202 /* Attributes to follow */
209 *p++ = xdr_one; 203 *p++ = xdr_one;
210 204 return encode_fattr3(rqstp, p, fhp, &fhp->fh_post_attr);
211 *p++ = htonl(nfs3_ftypes[(fhp->fh_post_mode & S_IFMT) >> 12]);
212 *p++ = htonl((u32) fhp->fh_post_mode);
213 *p++ = htonl((u32) fhp->fh_post_nlink);
214 *p++ = htonl((u32) nfsd_ruid(rqstp, fhp->fh_post_uid));
215 *p++ = htonl((u32) nfsd_rgid(rqstp, fhp->fh_post_gid));
216 if (S_ISLNK(fhp->fh_post_mode) && fhp->fh_post_size > NFS3_MAXPATHLEN) {
217 p = xdr_encode_hyper(p, (u64) NFS3_MAXPATHLEN);
218 } else {
219 p = xdr_encode_hyper(p, (u64) fhp->fh_post_size);
220 }
221 p = xdr_encode_hyper(p, ((u64)fhp->fh_post_blocks) << 9);
222 *p++ = fhp->fh_post_rdev[0];
223 *p++ = fhp->fh_post_rdev[1];
224 p = encode_fsid(p, fhp);
225 p = xdr_encode_hyper(p, (u64) inode->i_ino);
226 p = encode_time3(p, &fhp->fh_post_atime);
227 p = encode_time3(p, &fhp->fh_post_mtime);
228 p = encode_time3(p, &fhp->fh_post_ctime);
229
230 return p;
231} 205}
232 206
233/* 207/*
@@ -246,6 +220,7 @@ encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp)
246 err = vfs_getattr(fhp->fh_export->ex_mnt, dentry, &stat); 220 err = vfs_getattr(fhp->fh_export->ex_mnt, dentry, &stat);
247 if (!err) { 221 if (!err) {
248 *p++ = xdr_one; /* attributes follow */ 222 *p++ = xdr_one; /* attributes follow */
223 lease_get_mtime(dentry->d_inode, &stat.mtime);
249 return encode_fattr3(rqstp, p, fhp, &stat); 224 return encode_fattr3(rqstp, p, fhp, &stat);
250 } 225 }
251 } 226 }
@@ -284,6 +259,23 @@ encode_wcc_data(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp)
284 return encode_post_op_attr(rqstp, p, fhp); 259 return encode_post_op_attr(rqstp, p, fhp);
285} 260}
286 261
262/*
263 * Fill in the post_op attr for the wcc data
264 */
265void fill_post_wcc(struct svc_fh *fhp)
266{
267 int err;
268
269 if (fhp->fh_post_saved)
270 printk("nfsd: inode locked twice during operation.\n");
271
272 err = vfs_getattr(fhp->fh_export->ex_mnt, fhp->fh_dentry,
273 &fhp->fh_post_attr);
274 if (err)
275 fhp->fh_post_saved = 0;
276 else
277 fhp->fh_post_saved = 1;
278}
287 279
288/* 280/*
289 * XDR decode functions 281 * XDR decode functions
@@ -643,8 +635,11 @@ int
643nfs3svc_encode_attrstat(struct svc_rqst *rqstp, __be32 *p, 635nfs3svc_encode_attrstat(struct svc_rqst *rqstp, __be32 *p,
644 struct nfsd3_attrstat *resp) 636 struct nfsd3_attrstat *resp)
645{ 637{
646 if (resp->status == 0) 638 if (resp->status == 0) {
639 lease_get_mtime(resp->fh.fh_dentry->d_inode,
640 &resp->stat.mtime);
647 p = encode_fattr3(rqstp, p, &resp->fh, &resp->stat); 641 p = encode_fattr3(rqstp, p, &resp->fh, &resp->stat);
642 }
648 return xdr_ressize_check(rqstp, p); 643 return xdr_ressize_check(rqstp, p);
649} 644}
650 645
@@ -802,7 +797,7 @@ nfs3svc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p,
802 797
803static __be32 * 798static __be32 *
804encode_entry_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, 799encode_entry_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name,
805 int namlen, ino_t ino) 800 int namlen, u64 ino)
806{ 801{
807 *p++ = xdr_one; /* mark entry present */ 802 *p++ = xdr_one; /* mark entry present */
808 p = xdr_encode_hyper(p, ino); /* file id */ 803 p = xdr_encode_hyper(p, ino); /* file id */
@@ -873,7 +868,7 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
873#define NFS3_ENTRYPLUS_BAGGAGE (1 + 21 + 1 + (NFS3_FHSIZE >> 2)) 868#define NFS3_ENTRYPLUS_BAGGAGE (1 + 21 + 1 + (NFS3_FHSIZE >> 2))
874static int 869static int
875encode_entry(struct readdir_cd *ccd, const char *name, int namlen, 870encode_entry(struct readdir_cd *ccd, const char *name, int namlen,
876 loff_t offset, ino_t ino, unsigned int d_type, int plus) 871 loff_t offset, u64 ino, unsigned int d_type, int plus)
877{ 872{
878 struct nfsd3_readdirres *cd = container_of(ccd, struct nfsd3_readdirres, 873 struct nfsd3_readdirres *cd = container_of(ccd, struct nfsd3_readdirres,
879 common); 874 common);
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 31d6633c7fe4..9d536a8cb379 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -39,6 +39,7 @@
39#include <linux/errno.h> 39#include <linux/errno.h>
40#include <linux/delay.h> 40#include <linux/delay.h>
41#include <linux/sched.h> 41#include <linux/sched.h>
42#include <linux/kthread.h>
42#include <linux/sunrpc/xdr.h> 43#include <linux/sunrpc/xdr.h>
43#include <linux/sunrpc/svc.h> 44#include <linux/sunrpc/svc.h>
44#include <linux/sunrpc/clnt.h> 45#include <linux/sunrpc/clnt.h>
@@ -343,26 +344,28 @@ static struct rpc_version * nfs_cb_version[] = {
343 &nfs_cb_version4, 344 &nfs_cb_version4,
344}; 345};
345 346
346/* 347/* Reference counting, callback cleanup, etc., all look racy as heck.
347 * Use the SETCLIENTID credential 348 * And why is cb_set an atomic? */
348 */ 349
349static struct rpc_cred * 350static int do_probe_callback(void *data)
350nfsd4_lookupcred(struct nfs4_client *clp, int taskflags)
351{ 351{
352 struct auth_cred acred; 352 struct nfs4_client *clp = data;
353 struct rpc_clnt *clnt = clp->cl_callback.cb_client; 353 struct nfs4_callback *cb = &clp->cl_callback;
354 struct rpc_cred *ret; 354 struct rpc_message msg = {
355 355 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
356 get_group_info(clp->cl_cred.cr_group_info); 356 .rpc_argp = clp,
357 acred.uid = clp->cl_cred.cr_uid; 357 };
358 acred.gid = clp->cl_cred.cr_gid; 358 int status;
359 acred.group_info = clp->cl_cred.cr_group_info; 359
360 360 status = rpc_call_sync(cb->cb_client, &msg, RPC_TASK_SOFT);
361 dprintk("NFSD: looking up %s cred\n", 361
362 clnt->cl_auth->au_ops->au_name); 362 if (status) {
363 ret = rpcauth_lookup_credcache(clnt->cl_auth, &acred, taskflags); 363 rpc_shutdown_client(cb->cb_client);
364 put_group_info(clp->cl_cred.cr_group_info); 364 cb->cb_client = NULL;
365 return ret; 365 } else
366 atomic_set(&cb->cb_set, 1);
367 put_nfs4_client(clp);
368 return 0;
366} 369}
367 370
368/* 371/*
@@ -390,11 +393,7 @@ nfsd4_probe_callback(struct nfs4_client *clp)
390 .authflavor = RPC_AUTH_UNIX, /* XXX: need AUTH_GSS... */ 393 .authflavor = RPC_AUTH_UNIX, /* XXX: need AUTH_GSS... */
391 .flags = (RPC_CLNT_CREATE_NOPING), 394 .flags = (RPC_CLNT_CREATE_NOPING),
392 }; 395 };
393 struct rpc_message msg = { 396 struct task_struct *t;
394 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
395 .rpc_argp = clp,
396 };
397 int status;
398 397
399 if (atomic_read(&cb->cb_set)) 398 if (atomic_read(&cb->cb_set))
400 return; 399 return;
@@ -426,16 +425,11 @@ nfsd4_probe_callback(struct nfs4_client *clp)
426 /* the task holds a reference to the nfs4_client struct */ 425 /* the task holds a reference to the nfs4_client struct */
427 atomic_inc(&clp->cl_count); 426 atomic_inc(&clp->cl_count);
428 427
429 msg.rpc_cred = nfsd4_lookupcred(clp,0); 428 t = kthread_run(do_probe_callback, clp, "nfs4_cb_probe");
430 if (IS_ERR(msg.rpc_cred))
431 goto out_release_clp;
432 status = rpc_call_async(cb->cb_client, &msg, RPC_TASK_ASYNC, &nfs4_cb_null_ops, NULL);
433 put_rpccred(msg.rpc_cred);
434 429
435 if (status != 0) { 430 if (IS_ERR(t))
436 dprintk("NFSD: asynchronous NFSPROC4_CB_NULL failed!\n");
437 goto out_release_clp; 431 goto out_release_clp;
438 } 432
439 return; 433 return;
440 434
441out_release_clp: 435out_release_clp:
@@ -447,30 +441,6 @@ out_err:
447 (int)clp->cl_name.len, clp->cl_name.data); 441 (int)clp->cl_name.len, clp->cl_name.data);
448} 442}
449 443
450static void
451nfs4_cb_null(struct rpc_task *task, void *dummy)
452{
453 struct nfs4_client *clp = (struct nfs4_client *)task->tk_msg.rpc_argp;
454 struct nfs4_callback *cb = &clp->cl_callback;
455 __be32 addr = htonl(cb->cb_addr);
456
457 dprintk("NFSD: nfs4_cb_null task->tk_status %d\n", task->tk_status);
458
459 if (task->tk_status < 0) {
460 dprintk("NFSD: callback establishment to client %.*s failed\n",
461 (int)clp->cl_name.len, clp->cl_name.data);
462 goto out;
463 }
464 atomic_set(&cb->cb_set, 1);
465 dprintk("NFSD: callback set to client %u.%u.%u.%u\n", NIPQUAD(addr));
466out:
467 put_nfs4_client(clp);
468}
469
470static const struct rpc_call_ops nfs4_cb_null_ops = {
471 .rpc_call_done = nfs4_cb_null,
472};
473
474/* 444/*
475 * called with dp->dl_count inc'ed. 445 * called with dp->dl_count inc'ed.
476 * nfs4_lock_state() may or may not have been called. 446 * nfs4_lock_state() may or may not have been called.
@@ -491,10 +461,6 @@ nfsd4_cb_recall(struct nfs4_delegation *dp)
491 if ((!atomic_read(&clp->cl_callback.cb_set)) || !clnt) 461 if ((!atomic_read(&clp->cl_callback.cb_set)) || !clnt)
492 return; 462 return;
493 463
494 msg.rpc_cred = nfsd4_lookupcred(clp, 0);
495 if (IS_ERR(msg.rpc_cred))
496 goto out;
497
498 cbr->cbr_trunc = 0; /* XXX need to implement truncate optimization */ 464 cbr->cbr_trunc = 0; /* XXX need to implement truncate optimization */
499 cbr->cbr_dp = dp; 465 cbr->cbr_dp = dp;
500 466
@@ -515,13 +481,12 @@ nfsd4_cb_recall(struct nfs4_delegation *dp)
515 status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT); 481 status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT);
516 } 482 }
517out_put_cred: 483out_put_cred:
518 put_rpccred(msg.rpc_cred);
519out:
520 if (status == -EIO) 484 if (status == -EIO)
521 atomic_set(&clp->cl_callback.cb_set, 0); 485 atomic_set(&clp->cl_callback.cb_set, 0);
522 /* Success or failure, now we're either waiting for lease expiration 486 /* Success or failure, now we're either waiting for lease expiration
523 * or deleg_return. */ 487 * or deleg_return. */
524 dprintk("NFSD: nfs4_cb_recall: dp %p dl_flock %p dl_count %d\n",dp, dp->dl_flock, atomic_read(&dp->dl_count)); 488 dprintk("NFSD: nfs4_cb_recall: dp %p dl_flock %p dl_count %d\n",dp, dp->dl_flock, atomic_read(&dp->dl_count));
489 put_nfs4_client(clp);
525 nfs4_put_delegation(dp); 490 nfs4_put_delegation(dp);
526 return; 491 return;
527} 492}
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index 2ccffde81b84..4c0c683ce07a 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -207,6 +207,7 @@ idtoname_parse(struct cache_detail *cd, char *buf, int buflen)
207{ 207{
208 struct ent ent, *res; 208 struct ent ent, *res;
209 char *buf1, *bp; 209 char *buf1, *bp;
210 int len;
210 int error = -EINVAL; 211 int error = -EINVAL;
211 212
212 if (buf[buflen - 1] != '\n') 213 if (buf[buflen - 1] != '\n')
@@ -248,10 +249,11 @@ idtoname_parse(struct cache_detail *cd, char *buf, int buflen)
248 goto out; 249 goto out;
249 250
250 /* Name */ 251 /* Name */
251 error = qword_get(&buf, buf1, PAGE_SIZE); 252 error = -EINVAL;
252 if (error == -EINVAL) 253 len = qword_get(&buf, buf1, PAGE_SIZE);
254 if (len < 0)
253 goto out; 255 goto out;
254 if (error == -ENOENT) 256 if (len == 0)
255 set_bit(CACHE_NEGATIVE, &ent.h.flags); 257 set_bit(CACHE_NEGATIVE, &ent.h.flags);
256 else { 258 else {
257 if (error >= IDMAP_NAMESZ) { 259 if (error >= IDMAP_NAMESZ) {
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 29b7e63cb32c..18ead1790bb3 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -238,12 +238,12 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
238 break; 238 break;
239 case NFS4_OPEN_CLAIM_DELEGATE_PREV: 239 case NFS4_OPEN_CLAIM_DELEGATE_PREV:
240 open->op_stateowner->so_confirmed = 1; 240 open->op_stateowner->so_confirmed = 1;
241 printk("NFSD: unsupported OPEN claim type %d\n", 241 dprintk("NFSD: unsupported OPEN claim type %d\n",
242 open->op_claim_type); 242 open->op_claim_type);
243 status = nfserr_notsupp; 243 status = nfserr_notsupp;
244 goto out; 244 goto out;
245 default: 245 default:
246 printk("NFSD: Invalid OPEN claim type %d\n", 246 dprintk("NFSD: Invalid OPEN claim type %d\n",
247 open->op_claim_type); 247 open->op_claim_type);
248 status = nfserr_inval; 248 status = nfserr_inval;
249 goto out; 249 goto out;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 3c028b9c6e0e..31673cd251c3 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -358,9 +358,22 @@ alloc_client(struct xdr_netobj name)
358 return clp; 358 return clp;
359} 359}
360 360
361static void
362shutdown_callback_client(struct nfs4_client *clp)
363{
364 struct rpc_clnt *clnt = clp->cl_callback.cb_client;
365
366 /* shutdown rpc client, ending any outstanding recall rpcs */
367 if (clnt) {
368 clp->cl_callback.cb_client = NULL;
369 rpc_shutdown_client(clnt);
370 }
371}
372
361static inline void 373static inline void
362free_client(struct nfs4_client *clp) 374free_client(struct nfs4_client *clp)
363{ 375{
376 shutdown_callback_client(clp);
364 if (clp->cl_cred.cr_group_info) 377 if (clp->cl_cred.cr_group_info)
365 put_group_info(clp->cl_cred.cr_group_info); 378 put_group_info(clp->cl_cred.cr_group_info);
366 kfree(clp->cl_name.data); 379 kfree(clp->cl_name.data);
@@ -375,18 +388,6 @@ put_nfs4_client(struct nfs4_client *clp)
375} 388}
376 389
377static void 390static void
378shutdown_callback_client(struct nfs4_client *clp)
379{
380 struct rpc_clnt *clnt = clp->cl_callback.cb_client;
381
382 /* shutdown rpc client, ending any outstanding recall rpcs */
383 if (clnt) {
384 clp->cl_callback.cb_client = NULL;
385 rpc_shutdown_client(clnt);
386 }
387}
388
389static void
390expire_client(struct nfs4_client *clp) 391expire_client(struct nfs4_client *clp)
391{ 392{
392 struct nfs4_stateowner *sop; 393 struct nfs4_stateowner *sop;
@@ -396,8 +397,6 @@ expire_client(struct nfs4_client *clp)
396 dprintk("NFSD: expire_client cl_count %d\n", 397 dprintk("NFSD: expire_client cl_count %d\n",
397 atomic_read(&clp->cl_count)); 398 atomic_read(&clp->cl_count));
398 399
399 shutdown_callback_client(clp);
400
401 INIT_LIST_HEAD(&reaplist); 400 INIT_LIST_HEAD(&reaplist);
402 spin_lock(&recall_lock); 401 spin_lock(&recall_lock);
403 while (!list_empty(&clp->cl_delegations)) { 402 while (!list_empty(&clp->cl_delegations)) {
@@ -462,26 +461,28 @@ copy_cred(struct svc_cred *target, struct svc_cred *source) {
462} 461}
463 462
464static inline int 463static inline int
465same_name(const char *n1, const char *n2) { 464same_name(const char *n1, const char *n2)
465{
466 return 0 == memcmp(n1, n2, HEXDIR_LEN); 466 return 0 == memcmp(n1, n2, HEXDIR_LEN);
467} 467}
468 468
469static int 469static int
470cmp_verf(nfs4_verifier *v1, nfs4_verifier *v2) { 470same_verf(nfs4_verifier *v1, nfs4_verifier *v2)
471 return(!memcmp(v1->data,v2->data,sizeof(v1->data))); 471{
472 return 0 == memcmp(v1->data, v2->data, sizeof(v1->data));
472} 473}
473 474
474static int 475static int
475cmp_clid(clientid_t * cl1, clientid_t * cl2) { 476same_clid(clientid_t *cl1, clientid_t *cl2)
476 return((cl1->cl_boot == cl2->cl_boot) && 477{
477 (cl1->cl_id == cl2->cl_id)); 478 return (cl1->cl_boot == cl2->cl_boot) && (cl1->cl_id == cl2->cl_id);
478} 479}
479 480
480/* XXX what about NGROUP */ 481/* XXX what about NGROUP */
481static int 482static int
482cmp_creds(struct svc_cred *cr1, struct svc_cred *cr2){ 483same_creds(struct svc_cred *cr1, struct svc_cred *cr2)
483 return(cr1->cr_uid == cr2->cr_uid); 484{
484 485 return cr1->cr_uid == cr2->cr_uid;
485} 486}
486 487
487static void 488static void
@@ -507,7 +508,7 @@ check_name(struct xdr_netobj name) {
507 if (name.len == 0) 508 if (name.len == 0)
508 return 0; 509 return 0;
509 if (name.len > NFS4_OPAQUE_LIMIT) { 510 if (name.len > NFS4_OPAQUE_LIMIT) {
510 printk("NFSD: check_name: name too long(%d)!\n", name.len); 511 dprintk("NFSD: check_name: name too long(%d)!\n", name.len);
511 return 0; 512 return 0;
512 } 513 }
513 return 1; 514 return 1;
@@ -546,7 +547,7 @@ find_confirmed_client(clientid_t *clid)
546 unsigned int idhashval = clientid_hashval(clid->cl_id); 547 unsigned int idhashval = clientid_hashval(clid->cl_id);
547 548
548 list_for_each_entry(clp, &conf_id_hashtbl[idhashval], cl_idhash) { 549 list_for_each_entry(clp, &conf_id_hashtbl[idhashval], cl_idhash) {
549 if (cmp_clid(&clp->cl_clientid, clid)) 550 if (same_clid(&clp->cl_clientid, clid))
550 return clp; 551 return clp;
551 } 552 }
552 return NULL; 553 return NULL;
@@ -559,7 +560,7 @@ find_unconfirmed_client(clientid_t *clid)
559 unsigned int idhashval = clientid_hashval(clid->cl_id); 560 unsigned int idhashval = clientid_hashval(clid->cl_id);
560 561
561 list_for_each_entry(clp, &unconf_id_hashtbl[idhashval], cl_idhash) { 562 list_for_each_entry(clp, &unconf_id_hashtbl[idhashval], cl_idhash) {
562 if (cmp_clid(&clp->cl_clientid, clid)) 563 if (same_clid(&clp->cl_clientid, clid))
563 return clp; 564 return clp;
564 } 565 }
565 return NULL; 566 return NULL;
@@ -753,7 +754,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
753 * or different ip_address 754 * or different ip_address
754 */ 755 */
755 status = nfserr_clid_inuse; 756 status = nfserr_clid_inuse;
756 if (!cmp_creds(&conf->cl_cred, &rqstp->rq_cred) 757 if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)
757 || conf->cl_addr != sin->sin_addr.s_addr) { 758 || conf->cl_addr != sin->sin_addr.s_addr) {
758 dprintk("NFSD: setclientid: string in use by client" 759 dprintk("NFSD: setclientid: string in use by client"
759 "at %u.%u.%u.%u\n", NIPQUAD(conf->cl_addr)); 760 "at %u.%u.%u.%u\n", NIPQUAD(conf->cl_addr));
@@ -772,14 +773,8 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
772 new = create_client(clname, dname); 773 new = create_client(clname, dname);
773 if (new == NULL) 774 if (new == NULL)
774 goto out; 775 goto out;
775 copy_verf(new, &clverifier);
776 new->cl_addr = sin->sin_addr.s_addr;
777 copy_cred(&new->cl_cred,&rqstp->rq_cred);
778 gen_clid(new); 776 gen_clid(new);
779 gen_confirm(new); 777 } else if (same_verf(&conf->cl_verifier, &clverifier)) {
780 gen_callback(new, setclid);
781 add_to_unconfirmed(new, strhashval);
782 } else if (cmp_verf(&conf->cl_verifier, &clverifier)) {
783 /* 778 /*
784 * CASE 1: 779 * CASE 1:
785 * cl_name match, confirmed, principal match 780 * cl_name match, confirmed, principal match
@@ -804,13 +799,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
804 new = create_client(clname, dname); 799 new = create_client(clname, dname);
805 if (new == NULL) 800 if (new == NULL)
806 goto out; 801 goto out;
807 copy_verf(new,&conf->cl_verifier);
808 new->cl_addr = sin->sin_addr.s_addr;
809 copy_cred(&new->cl_cred,&rqstp->rq_cred);
810 copy_clid(new, conf); 802 copy_clid(new, conf);
811 gen_confirm(new);
812 gen_callback(new, setclid);
813 add_to_unconfirmed(new,strhashval);
814 } else if (!unconf) { 803 } else if (!unconf) {
815 /* 804 /*
816 * CASE 2: 805 * CASE 2:
@@ -823,14 +812,8 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
823 new = create_client(clname, dname); 812 new = create_client(clname, dname);
824 if (new == NULL) 813 if (new == NULL)
825 goto out; 814 goto out;
826 copy_verf(new,&clverifier);
827 new->cl_addr = sin->sin_addr.s_addr;
828 copy_cred(&new->cl_cred,&rqstp->rq_cred);
829 gen_clid(new); 815 gen_clid(new);
830 gen_confirm(new); 816 } else if (!same_verf(&conf->cl_confirm, &unconf->cl_confirm)) {
831 gen_callback(new, setclid);
832 add_to_unconfirmed(new, strhashval);
833 } else if (!cmp_verf(&conf->cl_confirm, &unconf->cl_confirm)) {
834 /* 817 /*
835 * CASE3: 818 * CASE3:
836 * confirmed found (name, principal match) 819 * confirmed found (name, principal match)
@@ -850,19 +833,19 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
850 new = create_client(clname, dname); 833 new = create_client(clname, dname);
851 if (new == NULL) 834 if (new == NULL)
852 goto out; 835 goto out;
853 copy_verf(new,&clverifier);
854 new->cl_addr = sin->sin_addr.s_addr;
855 copy_cred(&new->cl_cred,&rqstp->rq_cred);
856 gen_clid(new); 836 gen_clid(new);
857 gen_confirm(new);
858 gen_callback(new, setclid);
859 add_to_unconfirmed(new, strhashval);
860 } else { 837 } else {
861 /* No cases hit !!! */ 838 /* No cases hit !!! */
862 status = nfserr_inval; 839 status = nfserr_inval;
863 goto out; 840 goto out;
864 841
865 } 842 }
843 copy_verf(new, &clverifier);
844 new->cl_addr = sin->sin_addr.s_addr;
845 copy_cred(&new->cl_cred, &rqstp->rq_cred);
846 gen_confirm(new);
847 gen_callback(new, setclid);
848 add_to_unconfirmed(new, strhashval);
866 setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot; 849 setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot;
867 setclid->se_clientid.cl_id = new->cl_clientid.cl_id; 850 setclid->se_clientid.cl_id = new->cl_clientid.cl_id;
868 memcpy(setclid->se_confirm.data, new->cl_confirm.data, sizeof(setclid->se_confirm.data)); 851 memcpy(setclid->se_confirm.data, new->cl_confirm.data, sizeof(setclid->se_confirm.data));
@@ -910,16 +893,16 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
910 goto out; 893 goto out;
911 894
912 if ((conf && unconf) && 895 if ((conf && unconf) &&
913 (cmp_verf(&unconf->cl_confirm, &confirm)) && 896 (same_verf(&unconf->cl_confirm, &confirm)) &&
914 (cmp_verf(&conf->cl_verifier, &unconf->cl_verifier)) && 897 (same_verf(&conf->cl_verifier, &unconf->cl_verifier)) &&
915 (same_name(conf->cl_recdir,unconf->cl_recdir)) && 898 (same_name(conf->cl_recdir,unconf->cl_recdir)) &&
916 (!cmp_verf(&conf->cl_confirm, &unconf->cl_confirm))) { 899 (!same_verf(&conf->cl_confirm, &unconf->cl_confirm))) {
917 /* CASE 1: 900 /* CASE 1:
918 * unconf record that matches input clientid and input confirm. 901 * unconf record that matches input clientid and input confirm.
919 * conf record that matches input clientid. 902 * conf record that matches input clientid.
920 * conf and unconf records match names, verifiers 903 * conf and unconf records match names, verifiers
921 */ 904 */
922 if (!cmp_creds(&conf->cl_cred, &unconf->cl_cred)) 905 if (!same_creds(&conf->cl_cred, &unconf->cl_cred))
923 status = nfserr_clid_inuse; 906 status = nfserr_clid_inuse;
924 else { 907 else {
925 /* XXX: We just turn off callbacks until we can handle 908 /* XXX: We just turn off callbacks until we can handle
@@ -933,7 +916,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
933 } 916 }
934 } else if ((conf && !unconf) || 917 } else if ((conf && !unconf) ||
935 ((conf && unconf) && 918 ((conf && unconf) &&
936 (!cmp_verf(&conf->cl_verifier, &unconf->cl_verifier) || 919 (!same_verf(&conf->cl_verifier, &unconf->cl_verifier) ||
937 !same_name(conf->cl_recdir, unconf->cl_recdir)))) { 920 !same_name(conf->cl_recdir, unconf->cl_recdir)))) {
938 /* CASE 2: 921 /* CASE 2:
939 * conf record that matches input clientid. 922 * conf record that matches input clientid.
@@ -941,18 +924,18 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
941 * unconf->cl_name or unconf->cl_verifier don't match the 924 * unconf->cl_name or unconf->cl_verifier don't match the
942 * conf record. 925 * conf record.
943 */ 926 */
944 if (!cmp_creds(&conf->cl_cred,&rqstp->rq_cred)) 927 if (!same_creds(&conf->cl_cred, &rqstp->rq_cred))
945 status = nfserr_clid_inuse; 928 status = nfserr_clid_inuse;
946 else 929 else
947 status = nfs_ok; 930 status = nfs_ok;
948 } else if (!conf && unconf 931 } else if (!conf && unconf
949 && cmp_verf(&unconf->cl_confirm, &confirm)) { 932 && same_verf(&unconf->cl_confirm, &confirm)) {
950 /* CASE 3: 933 /* CASE 3:
951 * conf record not found. 934 * conf record not found.
952 * unconf record found. 935 * unconf record found.
953 * unconf->cl_confirm matches input confirm 936 * unconf->cl_confirm matches input confirm
954 */ 937 */
955 if (!cmp_creds(&unconf->cl_cred, &rqstp->rq_cred)) { 938 if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred)) {
956 status = nfserr_clid_inuse; 939 status = nfserr_clid_inuse;
957 } else { 940 } else {
958 unsigned int hash = 941 unsigned int hash =
@@ -967,8 +950,8 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
967 conf = unconf; 950 conf = unconf;
968 status = nfs_ok; 951 status = nfs_ok;
969 } 952 }
970 } else if ((!conf || (conf && !cmp_verf(&conf->cl_confirm, &confirm))) 953 } else if ((!conf || (conf && !same_verf(&conf->cl_confirm, &confirm)))
971 && (!unconf || (unconf && !cmp_verf(&unconf->cl_confirm, 954 && (!unconf || (unconf && !same_verf(&unconf->cl_confirm,
972 &confirm)))) { 955 &confirm)))) {
973 /* CASE 4: 956 /* CASE 4:
974 * conf record not found, or if conf, conf->cl_confirm does not 957 * conf record not found, or if conf, conf->cl_confirm does not
@@ -1019,7 +1002,7 @@ nfsd4_free_slab(struct kmem_cache **slab)
1019 *slab = NULL; 1002 *slab = NULL;
1020} 1003}
1021 1004
1022static void 1005void
1023nfsd4_free_slabs(void) 1006nfsd4_free_slabs(void)
1024{ 1007{
1025 nfsd4_free_slab(&stateowner_slab); 1008 nfsd4_free_slab(&stateowner_slab);
@@ -1207,10 +1190,12 @@ move_to_close_lru(struct nfs4_stateowner *sop)
1207} 1190}
1208 1191
1209static int 1192static int
1210cmp_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner, clientid_t *clid) { 1193same_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner,
1211 return ((sop->so_owner.len == owner->len) && 1194 clientid_t *clid)
1212 !memcmp(sop->so_owner.data, owner->data, owner->len) && 1195{
1213 (sop->so_client->cl_clientid.cl_id == clid->cl_id)); 1196 return (sop->so_owner.len == owner->len) &&
1197 0 == memcmp(sop->so_owner.data, owner->data, owner->len) &&
1198 (sop->so_client->cl_clientid.cl_id == clid->cl_id);
1214} 1199}
1215 1200
1216static struct nfs4_stateowner * 1201static struct nfs4_stateowner *
@@ -1219,7 +1204,7 @@ find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open)
1219 struct nfs4_stateowner *so = NULL; 1204 struct nfs4_stateowner *so = NULL;
1220 1205
1221 list_for_each_entry(so, &ownerstr_hashtbl[hashval], so_strhash) { 1206 list_for_each_entry(so, &ownerstr_hashtbl[hashval], so_strhash) {
1222 if (cmp_owner_str(so, &open->op_owner, &open->op_clientid)) 1207 if (same_owner_str(so, &open->op_owner, &open->op_clientid))
1223 return so; 1208 return so;
1224 } 1209 }
1225 return NULL; 1210 return NULL;
@@ -1360,6 +1345,7 @@ void nfsd_break_deleg_cb(struct file_lock *fl)
1360 * lock) we know the server hasn't removed the lease yet, we know 1345 * lock) we know the server hasn't removed the lease yet, we know
1361 * it's safe to take a reference: */ 1346 * it's safe to take a reference: */
1362 atomic_inc(&dp->dl_count); 1347 atomic_inc(&dp->dl_count);
1348 atomic_inc(&dp->dl_client->cl_count);
1363 1349
1364 spin_lock(&recall_lock); 1350 spin_lock(&recall_lock);
1365 list_add_tail(&dp->dl_recall_lru, &del_recall_lru); 1351 list_add_tail(&dp->dl_recall_lru, &del_recall_lru);
@@ -1368,8 +1354,12 @@ void nfsd_break_deleg_cb(struct file_lock *fl)
1368 /* only place dl_time is set. protected by lock_kernel*/ 1354 /* only place dl_time is set. protected by lock_kernel*/
1369 dp->dl_time = get_seconds(); 1355 dp->dl_time = get_seconds();
1370 1356
1371 /* XXX need to merge NFSD_LEASE_TIME with fs/locks.c:lease_break_time */ 1357 /*
1372 fl->fl_break_time = jiffies + NFSD_LEASE_TIME * HZ; 1358 * We don't want the locks code to timeout the lease for us;
1359 * we'll remove it ourself if the delegation isn't returned
1360 * in time.
1361 */
1362 fl->fl_break_time = 0;
1373 1363
1374 t = kthread_run(do_recall, dp, "%s", "nfs4_cb_recall"); 1364 t = kthread_run(do_recall, dp, "%s", "nfs4_cb_recall");
1375 if (IS_ERR(t)) { 1365 if (IS_ERR(t)) {
@@ -1378,6 +1368,7 @@ void nfsd_break_deleg_cb(struct file_lock *fl)
1378 printk(KERN_INFO "NFSD: Callback thread failed for " 1368 printk(KERN_INFO "NFSD: Callback thread failed for "
1379 "for client (clientid %08x/%08x)\n", 1369 "for client (clientid %08x/%08x)\n",
1380 clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); 1370 clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
1371 put_nfs4_client(dp->dl_client);
1381 nfs4_put_delegation(dp); 1372 nfs4_put_delegation(dp);
1382 } 1373 }
1383} 1374}
@@ -1738,7 +1729,7 @@ out:
1738 if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS 1729 if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS
1739 && flag == NFS4_OPEN_DELEGATE_NONE 1730 && flag == NFS4_OPEN_DELEGATE_NONE
1740 && open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE) 1731 && open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE)
1741 printk("NFSD: WARNING: refusing delegation reclaim\n"); 1732 dprintk("NFSD: WARNING: refusing delegation reclaim\n");
1742 open->op_delegate_type = flag; 1733 open->op_delegate_type = flag;
1743} 1734}
1744 1735
@@ -2147,7 +2138,7 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei
2147 *sopp = NULL; 2138 *sopp = NULL;
2148 2139
2149 if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) { 2140 if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) {
2150 printk("NFSD: preprocess_seqid_op: magic stateid!\n"); 2141 dprintk("NFSD: preprocess_seqid_op: magic stateid!\n");
2151 return nfserr_bad_stateid; 2142 return nfserr_bad_stateid;
2152 } 2143 }
2153 2144
@@ -2181,25 +2172,24 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei
2181 lkflg = setlkflg(lock->lk_type); 2172 lkflg = setlkflg(lock->lk_type);
2182 2173
2183 if (lock->lk_is_new) { 2174 if (lock->lk_is_new) {
2184 if (!sop->so_is_open_owner) 2175 if (!sop->so_is_open_owner)
2185 return nfserr_bad_stateid; 2176 return nfserr_bad_stateid;
2186 if (!cmp_clid(&clp->cl_clientid, lockclid)) 2177 if (!same_clid(&clp->cl_clientid, lockclid))
2187 return nfserr_bad_stateid; 2178 return nfserr_bad_stateid;
2188 /* stp is the open stateid */ 2179 /* stp is the open stateid */
2189 status = nfs4_check_openmode(stp, lkflg); 2180 status = nfs4_check_openmode(stp, lkflg);
2190 if (status) 2181 if (status)
2191 return status; 2182 return status;
2192 } else { 2183 } else {
2193 /* stp is the lock stateid */ 2184 /* stp is the lock stateid */
2194 status = nfs4_check_openmode(stp->st_openstp, lkflg); 2185 status = nfs4_check_openmode(stp->st_openstp, lkflg);
2195 if (status) 2186 if (status)
2196 return status; 2187 return status;
2197 } 2188 }
2198
2199 } 2189 }
2200 2190
2201 if ((flags & CHECK_FH) && nfs4_check_fh(current_fh, stp)) { 2191 if ((flags & CHECK_FH) && nfs4_check_fh(current_fh, stp)) {
2202 printk("NFSD: preprocess_seqid_op: fh-stateid mismatch!\n"); 2192 dprintk("NFSD: preprocess_seqid_op: fh-stateid mismatch!\n");
2203 return nfserr_bad_stateid; 2193 return nfserr_bad_stateid;
2204 } 2194 }
2205 2195
@@ -2215,22 +2205,22 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei
2215 goto check_replay; 2205 goto check_replay;
2216 2206
2217 if (sop->so_confirmed && flags & CONFIRM) { 2207 if (sop->so_confirmed && flags & CONFIRM) {
2218 printk("NFSD: preprocess_seqid_op: expected" 2208 dprintk("NFSD: preprocess_seqid_op: expected"
2219 " unconfirmed stateowner!\n"); 2209 " unconfirmed stateowner!\n");
2220 return nfserr_bad_stateid; 2210 return nfserr_bad_stateid;
2221 } 2211 }
2222 if (!sop->so_confirmed && !(flags & CONFIRM)) { 2212 if (!sop->so_confirmed && !(flags & CONFIRM)) {
2223 printk("NFSD: preprocess_seqid_op: stateowner not" 2213 dprintk("NFSD: preprocess_seqid_op: stateowner not"
2224 " confirmed yet!\n"); 2214 " confirmed yet!\n");
2225 return nfserr_bad_stateid; 2215 return nfserr_bad_stateid;
2226 } 2216 }
2227 if (stateid->si_generation > stp->st_stateid.si_generation) { 2217 if (stateid->si_generation > stp->st_stateid.si_generation) {
2228 printk("NFSD: preprocess_seqid_op: future stateid?!\n"); 2218 dprintk("NFSD: preprocess_seqid_op: future stateid?!\n");
2229 return nfserr_bad_stateid; 2219 return nfserr_bad_stateid;
2230 } 2220 }
2231 2221
2232 if (stateid->si_generation < stp->st_stateid.si_generation) { 2222 if (stateid->si_generation < stp->st_stateid.si_generation) {
2233 printk("NFSD: preprocess_seqid_op: old stateid!\n"); 2223 dprintk("NFSD: preprocess_seqid_op: old stateid!\n");
2234 return nfserr_old_stateid; 2224 return nfserr_old_stateid;
2235 } 2225 }
2236 renew_client(sop->so_client); 2226 renew_client(sop->so_client);
@@ -2242,7 +2232,7 @@ check_replay:
2242 /* indicate replay to calling function */ 2232 /* indicate replay to calling function */
2243 return nfserr_replay_me; 2233 return nfserr_replay_me;
2244 } 2234 }
2245 printk("NFSD: preprocess_seqid_op: bad seqid (expected %d, got %d)\n", 2235 dprintk("NFSD: preprocess_seqid_op: bad seqid (expected %d, got %d)\n",
2246 sop->so_seqid, seqid); 2236 sop->so_seqid, seqid);
2247 *sopp = NULL; 2237 *sopp = NULL;
2248 return nfserr_bad_seqid; 2238 return nfserr_bad_seqid;
@@ -2561,7 +2551,7 @@ find_lockstateowner_str(struct inode *inode, clientid_t *clid,
2561 struct nfs4_stateowner *op; 2551 struct nfs4_stateowner *op;
2562 2552
2563 list_for_each_entry(op, &lock_ownerstr_hashtbl[hashval], so_strhash) { 2553 list_for_each_entry(op, &lock_ownerstr_hashtbl[hashval], so_strhash) {
2564 if (cmp_owner_str(op, owner, clid)) 2554 if (same_owner_str(op, owner, clid))
2565 return op; 2555 return op;
2566 } 2556 }
2567 return NULL; 2557 return NULL;
@@ -2855,7 +2845,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
2855 file_lock.fl_type = F_WRLCK; 2845 file_lock.fl_type = F_WRLCK;
2856 break; 2846 break;
2857 default: 2847 default:
2858 printk("NFSD: nfs4_lockt: bad lock type!\n"); 2848 dprintk("NFSD: nfs4_lockt: bad lock type!\n");
2859 status = nfserr_inval; 2849 status = nfserr_inval;
2860 goto out; 2850 goto out;
2861 } 2851 }
@@ -3025,7 +3015,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
3025 INIT_LIST_HEAD(&matches); 3015 INIT_LIST_HEAD(&matches);
3026 for (i = 0; i < LOCK_HASH_SIZE; i++) { 3016 for (i = 0; i < LOCK_HASH_SIZE; i++) {
3027 list_for_each_entry(sop, &lock_ownerid_hashtbl[i], so_idhash) { 3017 list_for_each_entry(sop, &lock_ownerid_hashtbl[i], so_idhash) {
3028 if (!cmp_owner_str(sop, owner, clid)) 3018 if (!same_owner_str(sop, owner, clid))
3029 continue; 3019 continue;
3030 list_for_each_entry(stp, &sop->so_stateids, 3020 list_for_each_entry(stp, &sop->so_stateids,
3031 st_perstateowner) { 3021 st_perstateowner) {
@@ -3149,11 +3139,14 @@ nfs4_check_open_reclaim(clientid_t *clid)
3149 3139
3150/* initialization to perform at module load time: */ 3140/* initialization to perform at module load time: */
3151 3141
3152void 3142int
3153nfs4_state_init(void) 3143nfs4_state_init(void)
3154{ 3144{
3155 int i; 3145 int i, status;
3156 3146
3147 status = nfsd4_init_slabs();
3148 if (status)
3149 return status;
3157 for (i = 0; i < CLIENT_HASH_SIZE; i++) { 3150 for (i = 0; i < CLIENT_HASH_SIZE; i++) {
3158 INIT_LIST_HEAD(&conf_id_hashtbl[i]); 3151 INIT_LIST_HEAD(&conf_id_hashtbl[i]);
3159 INIT_LIST_HEAD(&conf_str_hashtbl[i]); 3152 INIT_LIST_HEAD(&conf_str_hashtbl[i]);
@@ -3182,6 +3175,7 @@ nfs4_state_init(void)
3182 for (i = 0; i < CLIENT_HASH_SIZE; i++) 3175 for (i = 0; i < CLIENT_HASH_SIZE; i++)
3183 INIT_LIST_HEAD(&reclaim_str_hashtbl[i]); 3176 INIT_LIST_HEAD(&reclaim_str_hashtbl[i]);
3184 reclaim_str_hashtbl_size = 0; 3177 reclaim_str_hashtbl_size = 0;
3178 return 0;
3185} 3179}
3186 3180
3187static void 3181static void
@@ -3242,20 +3236,15 @@ __nfs4_state_start(void)
3242 set_max_delegations(); 3236 set_max_delegations();
3243} 3237}
3244 3238
3245int 3239void
3246nfs4_state_start(void) 3240nfs4_state_start(void)
3247{ 3241{
3248 int status;
3249
3250 if (nfs4_init) 3242 if (nfs4_init)
3251 return 0; 3243 return;
3252 status = nfsd4_init_slabs();
3253 if (status)
3254 return status;
3255 nfsd4_load_reboot_recovery_data(); 3244 nfsd4_load_reboot_recovery_data();
3256 __nfs4_state_start(); 3245 __nfs4_state_start();
3257 nfs4_init = 1; 3246 nfs4_init = 1;
3258 return 0; 3247 return;
3259} 3248}
3260 3249
3261int 3250int
@@ -3313,7 +3302,6 @@ nfs4_state_shutdown(void)
3313 nfs4_lock_state(); 3302 nfs4_lock_state();
3314 nfs4_release_reclaim(); 3303 nfs4_release_reclaim();
3315 __nfs4_state_shutdown(); 3304 __nfs4_state_shutdown();
3316 nfsd4_free_slabs();
3317 nfs4_unlock_state(); 3305 nfs4_unlock_state();
3318} 3306}
3319 3307
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 8ef0964179bc..57333944af7f 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -102,7 +102,8 @@ check_filename(char *str, int len, __be32 err)
102out: \ 102out: \
103 return status; \ 103 return status; \
104xdr_error: \ 104xdr_error: \
105 printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__); \ 105 dprintk("NFSD: xdr error (%s:%d)\n", \
106 __FILE__, __LINE__); \
106 status = nfserr_bad_xdr; \ 107 status = nfserr_bad_xdr; \
107 goto out 108 goto out
108 109
@@ -124,7 +125,8 @@ xdr_error: \
124 if (!(x = (p==argp->tmp || p == argp->tmpp) ? \ 125 if (!(x = (p==argp->tmp || p == argp->tmpp) ? \
125 savemem(argp, p, nbytes) : \ 126 savemem(argp, p, nbytes) : \
126 (char *)p)) { \ 127 (char *)p)) { \
127 printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__); \ 128 dprintk("NFSD: xdr error (%s:%d)\n", \
129 __FILE__, __LINE__); \
128 goto xdr_error; \ 130 goto xdr_error; \
129 } \ 131 } \
130 p += XDR_QUADLEN(nbytes); \ 132 p += XDR_QUADLEN(nbytes); \
@@ -140,7 +142,8 @@ xdr_error: \
140 p = argp->p; \ 142 p = argp->p; \
141 argp->p += XDR_QUADLEN(nbytes); \ 143 argp->p += XDR_QUADLEN(nbytes); \
142 } else if (!(p = read_buf(argp, nbytes))) { \ 144 } else if (!(p = read_buf(argp, nbytes))) { \
143 printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__); \ 145 dprintk("NFSD: xdr error (%s:%d)\n", \
146 __FILE__, __LINE__); \
144 goto xdr_error; \ 147 goto xdr_error; \
145 } \ 148 } \
146} while (0) 149} while (0)
@@ -948,7 +951,8 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
948 */ 951 */
949 avail = (char*)argp->end - (char*)argp->p; 952 avail = (char*)argp->end - (char*)argp->p;
950 if (avail + argp->pagelen < write->wr_buflen) { 953 if (avail + argp->pagelen < write->wr_buflen) {
951 printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__); 954 dprintk("NFSD: xdr error (%s:%d)\n",
955 __FILE__, __LINE__);
952 goto xdr_error; 956 goto xdr_error;
953 } 957 }
954 argp->rqstp->rq_vec[0].iov_base = p; 958 argp->rqstp->rq_vec[0].iov_base = p;
@@ -1019,7 +1023,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
1019 argp->ops = kmalloc(argp->opcnt * sizeof(*argp->ops), GFP_KERNEL); 1023 argp->ops = kmalloc(argp->opcnt * sizeof(*argp->ops), GFP_KERNEL);
1020 if (!argp->ops) { 1024 if (!argp->ops) {
1021 argp->ops = argp->iops; 1025 argp->ops = argp->iops;
1022 printk(KERN_INFO "nfsd: couldn't allocate room for COMPOUND\n"); 1026 dprintk("nfsd: couldn't allocate room for COMPOUND\n");
1023 goto xdr_error; 1027 goto xdr_error;
1024 } 1028 }
1025 } 1029 }
@@ -1326,7 +1330,7 @@ static char *nfsd4_path(struct svc_rqst *rqstp, struct svc_export *exp, __be32 *
1326 path = exp->ex_path; 1330 path = exp->ex_path;
1327 1331
1328 if (strncmp(path, rootpath, strlen(rootpath))) { 1332 if (strncmp(path, rootpath, strlen(rootpath))) {
1329 printk("nfsd: fs_locations failed;" 1333 dprintk("nfsd: fs_locations failed;"
1330 "%s is not contained in %s\n", path, rootpath); 1334 "%s is not contained in %s\n", path, rootpath);
1331 *stat = nfserr_notsupp; 1335 *stat = nfserr_notsupp;
1332 return NULL; 1336 return NULL;
@@ -1475,7 +1479,8 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
1475 err = vfs_getattr(exp->ex_mnt, dentry, &stat); 1479 err = vfs_getattr(exp->ex_mnt, dentry, &stat);
1476 if (err) 1480 if (err)
1477 goto out_nfserr; 1481 goto out_nfserr;
1478 if ((bmval0 & (FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL)) || 1482 if ((bmval0 & (FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL |
1483 FATTR4_WORD0_MAXNAME)) ||
1479 (bmval1 & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE | 1484 (bmval1 & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE |
1480 FATTR4_WORD1_SPACE_TOTAL))) { 1485 FATTR4_WORD1_SPACE_TOTAL))) {
1481 err = vfs_statfs(dentry, &statfs); 1486 err = vfs_statfs(dentry, &statfs);
@@ -1679,7 +1684,7 @@ out_acl:
1679 if (bmval0 & FATTR4_WORD0_FILEID) { 1684 if (bmval0 & FATTR4_WORD0_FILEID) {
1680 if ((buflen -= 8) < 0) 1685 if ((buflen -= 8) < 0)
1681 goto out_resource; 1686 goto out_resource;
1682 WRITE64((u64) stat.ino); 1687 WRITE64(stat.ino);
1683 } 1688 }
1684 if (bmval0 & FATTR4_WORD0_FILES_AVAIL) { 1689 if (bmval0 & FATTR4_WORD0_FILES_AVAIL) {
1685 if ((buflen -= 8) < 0) 1690 if ((buflen -= 8) < 0)
@@ -1721,7 +1726,7 @@ out_acl:
1721 if (bmval0 & FATTR4_WORD0_MAXNAME) { 1726 if (bmval0 & FATTR4_WORD0_MAXNAME) {
1722 if ((buflen -= 4) < 0) 1727 if ((buflen -= 4) < 0)
1723 goto out_resource; 1728 goto out_resource;
1724 WRITE32(~(u32) 0); 1729 WRITE32(statfs.f_namelen);
1725 } 1730 }
1726 if (bmval0 & FATTR4_WORD0_MAXREAD) { 1731 if (bmval0 & FATTR4_WORD0_MAXREAD) {
1727 if ((buflen -= 8) < 0) 1732 if ((buflen -= 8) < 0)
@@ -1821,16 +1826,15 @@ out_acl:
1821 WRITE32(stat.mtime.tv_nsec); 1826 WRITE32(stat.mtime.tv_nsec);
1822 } 1827 }
1823 if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) { 1828 if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) {
1824 struct dentry *mnt_pnt, *mnt_root;
1825
1826 if ((buflen -= 8) < 0) 1829 if ((buflen -= 8) < 0)
1827 goto out_resource; 1830 goto out_resource;
1828 mnt_root = exp->ex_mnt->mnt_root; 1831 if (exp->ex_mnt->mnt_root->d_inode == dentry->d_inode) {
1829 if (mnt_root->d_inode == dentry->d_inode) { 1832 err = vfs_getattr(exp->ex_mnt->mnt_parent,
1830 mnt_pnt = exp->ex_mnt->mnt_mountpoint; 1833 exp->ex_mnt->mnt_mountpoint, &stat);
1831 WRITE64((u64) mnt_pnt->d_inode->i_ino); 1834 if (err)
1832 } else 1835 goto out_nfserr;
1833 WRITE64((u64) stat.ino); 1836 }
1837 WRITE64(stat.ino);
1834 } 1838 }
1835 *attrlenp = htonl((char *)p - (char *)attrlenp - 4); 1839 *attrlenp = htonl((char *)p - (char *)attrlenp - 4);
1836 *countp = p - buffer; 1840 *countp = p - buffer;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index baac89d917ca..77dc9893b7ba 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -298,7 +298,7 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size)
298 * qword quoting is used, so filehandle will be \x.... 298 * qword quoting is used, so filehandle will be \x....
299 */ 299 */
300 char *dname, *path; 300 char *dname, *path;
301 int maxsize; 301 int uninitialized_var(maxsize);
302 char *mesg = buf; 302 char *mesg = buf;
303 int len; 303 int len;
304 struct auth_domain *dom; 304 struct auth_domain *dom;
@@ -679,11 +679,13 @@ static int __init init_nfsd(void)
679 int retval; 679 int retval;
680 printk(KERN_INFO "Installing knfsd (copyright (C) 1996 okir@monad.swb.de).\n"); 680 printk(KERN_INFO "Installing knfsd (copyright (C) 1996 okir@monad.swb.de).\n");
681 681
682 retval = nfs4_state_init(); /* nfs4 locking state */
683 if (retval)
684 return retval;
682 nfsd_stat_init(); /* Statistics */ 685 nfsd_stat_init(); /* Statistics */
683 nfsd_cache_init(); /* RPC reply cache */ 686 nfsd_cache_init(); /* RPC reply cache */
684 nfsd_export_init(); /* Exports table */ 687 nfsd_export_init(); /* Exports table */
685 nfsd_lockd_init(); /* lockd->nfsd callbacks */ 688 nfsd_lockd_init(); /* lockd->nfsd callbacks */
686 nfs4_state_init(); /* NFSv4 locking state */
687 nfsd_idmap_init(); /* Name to ID mapping */ 689 nfsd_idmap_init(); /* Name to ID mapping */
688 if (proc_mkdir("fs/nfs", NULL)) { 690 if (proc_mkdir("fs/nfs", NULL)) {
689 struct proc_dir_entry *entry; 691 struct proc_dir_entry *entry;
@@ -712,6 +714,7 @@ static void __exit exit_nfsd(void)
712 nfsd_stat_shutdown(); 714 nfsd_stat_shutdown();
713 nfsd_lockd_shutdown(); 715 nfsd_lockd_shutdown();
714 nfsd_idmap_shutdown(); 716 nfsd_idmap_shutdown();
717 nfsd4_free_slabs();
715 unregister_filesystem(&nfsd_fs_type); 718 unregister_filesystem(&nfsd_fs_type);
716} 719}
717 720
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index a8c89ae4c743..1190aeaa92be 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -349,9 +349,7 @@ nfsd_svc(unsigned short port, int nrservs)
349 error = nfsd_racache_init(2*nrservs); 349 error = nfsd_racache_init(2*nrservs);
350 if (error<0) 350 if (error<0)
351 goto out; 351 goto out;
352 error = nfs4_state_start(); 352 nfs4_state_start();
353 if (error<0)
354 goto out;
355 353
356 nfsd_reset_versions(); 354 nfsd_reset_versions();
357 355
@@ -546,10 +544,8 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
546 /* Now call the procedure handler, and encode NFS status. */ 544 /* Now call the procedure handler, and encode NFS status. */
547 nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp); 545 nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp);
548 nfserr = map_new_errors(rqstp->rq_vers, nfserr); 546 nfserr = map_new_errors(rqstp->rq_vers, nfserr);
549 if (nfserr == nfserr_jukebox && rqstp->rq_vers == 2)
550 nfserr = nfserr_dropit;
551 if (nfserr == nfserr_dropit) { 547 if (nfserr == nfserr_dropit) {
552 dprintk("nfsd: Dropping request due to malloc failure!\n"); 548 dprintk("nfsd: Dropping request; may be revisited later\n");
553 nfsd_cache_update(rqstp, RC_NOCACHE, NULL); 549 nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
554 return 0; 550 return 0;
555 } 551 }
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index cb3e7fadb772..986f9b32083c 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -523,6 +523,10 @@ nfssvc_encode_entry(void *ccdv, const char *name,
523 cd->common.err = nfserr_toosmall; 523 cd->common.err = nfserr_toosmall;
524 return -EINVAL; 524 return -EINVAL;
525 } 525 }
526 if (ino > ~((u32) 0)) {
527 cd->common.err = nfserr_fbig;
528 return -EINVAL;
529 }
526 *p++ = xdr_one; /* mark entry present */ 530 *p++ = xdr_one; /* mark entry present */
527 *p++ = htonl((u32) ino); /* file id */ 531 *p++ = htonl((u32) ino); /* file id */
528 p = xdr_encode_array(p, name, namlen);/* name length & name */ 532 p = xdr_encode_array(p, name, namlen);/* name length & name */
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 085ded6f6d3a..2a8d665b134b 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -289,7 +289,8 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
289 if (!iap->ia_valid) 289 if (!iap->ia_valid)
290 goto out; 290 goto out;
291 291
292 /* NFSv2 does not differentiate between "set-[ac]time-to-now" 292 /*
293 * NFSv2 does not differentiate between "set-[ac]time-to-now"
293 * which only requires access, and "set-[ac]time-to-X" which 294 * which only requires access, and "set-[ac]time-to-X" which
294 * requires ownership. 295 * requires ownership.
295 * So if it looks like it might be "set both to the same time which 296 * So if it looks like it might be "set both to the same time which
@@ -302,25 +303,33 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
302 */ 303 */
303#define BOTH_TIME_SET (ATTR_ATIME_SET | ATTR_MTIME_SET) 304#define BOTH_TIME_SET (ATTR_ATIME_SET | ATTR_MTIME_SET)
304#define MAX_TOUCH_TIME_ERROR (30*60) 305#define MAX_TOUCH_TIME_ERROR (30*60)
305 if ((iap->ia_valid & BOTH_TIME_SET) == BOTH_TIME_SET 306 if ((iap->ia_valid & BOTH_TIME_SET) == BOTH_TIME_SET &&
306 && iap->ia_mtime.tv_sec == iap->ia_atime.tv_sec 307 iap->ia_mtime.tv_sec == iap->ia_atime.tv_sec) {
307 ) { 308 /*
308 /* Looks probable. Now just make sure time is in the right ballpark. 309 * Looks probable.
309 * Solaris, at least, doesn't seem to care what the time request is. 310 *
310 * We require it be within 30 minutes of now. 311 * Now just make sure time is in the right ballpark.
311 */ 312 * Solaris, at least, doesn't seem to care what the time
312 time_t delta = iap->ia_atime.tv_sec - get_seconds(); 313 * request is. We require it be within 30 minutes of now.
313 if (delta<0) delta = -delta;
314 if (delta < MAX_TOUCH_TIME_ERROR &&
315 inode_change_ok(inode, iap) != 0) {
316 /* turn off ATTR_[AM]TIME_SET but leave ATTR_[AM]TIME
317 * this will cause notify_change to set these times to "now"
318 */ 314 */
319 iap->ia_valid &= ~BOTH_TIME_SET; 315 time_t delta = iap->ia_atime.tv_sec - get_seconds();
320 } 316 if (delta < 0)
317 delta = -delta;
318 if (delta < MAX_TOUCH_TIME_ERROR &&
319 inode_change_ok(inode, iap) != 0) {
320 /*
321 * Turn off ATTR_[AM]TIME_SET but leave ATTR_[AM]TIME.
322 * This will cause notify_change to set these times
323 * to "now"
324 */
325 iap->ia_valid &= ~BOTH_TIME_SET;
326 }
321 } 327 }
322 328
323 /* The size case is special. It changes the file as well as the attributes. */ 329 /*
330 * The size case is special.
331 * It changes the file as well as the attributes.
332 */
324 if (iap->ia_valid & ATTR_SIZE) { 333 if (iap->ia_valid & ATTR_SIZE) {
325 if (iap->ia_size < inode->i_size) { 334 if (iap->ia_size < inode->i_size) {
326 err = nfsd_permission(rqstp, fhp->fh_export, dentry, MAY_TRUNC|MAY_OWNER_OVERRIDE); 335 err = nfsd_permission(rqstp, fhp->fh_export, dentry, MAY_TRUNC|MAY_OWNER_OVERRIDE);
diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index af4ef808fa94..345798ebd366 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -17,6 +17,18 @@ ToDo/Notes:
17 happen is unclear however so it is worth waiting until someone hits 17 happen is unclear however so it is worth waiting until someone hits
18 the problem. 18 the problem.
19 19
202.1.29 - Fix a deadlock at mount time.
21
22 - During mount the VFS holds s_umount lock on the superblock. So when
23 we try to empty the journal $LogFile contents by calling
24 ntfs_attr_set() when the machine does not have much memory and the
25 journal is large ntfs_attr_set() results in the VM trying to balance
26 dirty pages which in turn tries to that the s_umount lock and thus we
27 get a deadlock. The solution is to not use ntfs_attr_set() and
28 instead do the zeroing by hand at the block level rather than page
29 cache level.
30 - Fix sparse warnings.
31
202.1.28 - Fix a deadlock. 322.1.28 - Fix a deadlock.
21 33
22 - Fix deadlock in fs/ntfs/inode.c::ntfs_put_inode(). Thanks to Sergey 34 - Fix deadlock in fs/ntfs/inode.c::ntfs_put_inode(). Thanks to Sergey
diff --git a/fs/ntfs/Makefile b/fs/ntfs/Makefile
index 825508385565..58b6be992544 100644
--- a/fs/ntfs/Makefile
+++ b/fs/ntfs/Makefile
@@ -6,7 +6,7 @@ ntfs-objs := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \
6 index.o inode.o mft.o mst.o namei.o runlist.o super.o sysctl.o \ 6 index.o inode.o mft.o mst.o namei.o runlist.o super.o sysctl.o \
7 unistr.o upcase.o 7 unistr.o upcase.o
8 8
9EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.28\" 9EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.29\"
10 10
11ifeq ($(CONFIG_NTFS_DEBUG),y) 11ifeq ($(CONFIG_NTFS_DEBUG),y)
12EXTRA_CFLAGS += -DDEBUG 12EXTRA_CFLAGS += -DDEBUG
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 6e5c2534f4bc..cfdc7900d271 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -2,7 +2,7 @@
2 * aops.c - NTFS kernel address space operations and page cache handling. 2 * aops.c - NTFS kernel address space operations and page cache handling.
3 * Part of the Linux-NTFS project. 3 * Part of the Linux-NTFS project.
4 * 4 *
5 * Copyright (c) 2001-2006 Anton Altaparmakov 5 * Copyright (c) 2001-2007 Anton Altaparmakov
6 * Copyright (c) 2002 Richard Russon 6 * Copyright (c) 2002 Richard Russon
7 * 7 *
8 * This program/include file is free software; you can redistribute it and/or 8 * This program/include file is free software; you can redistribute it and/or
@@ -396,7 +396,7 @@ static int ntfs_readpage(struct file *file, struct page *page)
396 loff_t i_size; 396 loff_t i_size;
397 struct inode *vi; 397 struct inode *vi;
398 ntfs_inode *ni, *base_ni; 398 ntfs_inode *ni, *base_ni;
399 u8 *kaddr; 399 u8 *addr;
400 ntfs_attr_search_ctx *ctx; 400 ntfs_attr_search_ctx *ctx;
401 MFT_RECORD *mrec; 401 MFT_RECORD *mrec;
402 unsigned long flags; 402 unsigned long flags;
@@ -491,15 +491,15 @@ retry_readpage:
491 /* Race with shrinking truncate. */ 491 /* Race with shrinking truncate. */
492 attr_len = i_size; 492 attr_len = i_size;
493 } 493 }
494 kaddr = kmap_atomic(page, KM_USER0); 494 addr = kmap_atomic(page, KM_USER0);
495 /* Copy the data to the page. */ 495 /* Copy the data to the page. */
496 memcpy(kaddr, (u8*)ctx->attr + 496 memcpy(addr, (u8*)ctx->attr +
497 le16_to_cpu(ctx->attr->data.resident.value_offset), 497 le16_to_cpu(ctx->attr->data.resident.value_offset),
498 attr_len); 498 attr_len);
499 /* Zero the remainder of the page. */ 499 /* Zero the remainder of the page. */
500 memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len); 500 memset(addr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
501 flush_dcache_page(page); 501 flush_dcache_page(page);
502 kunmap_atomic(kaddr, KM_USER0); 502 kunmap_atomic(addr, KM_USER0);
503put_unm_err_out: 503put_unm_err_out:
504 ntfs_attr_put_search_ctx(ctx); 504 ntfs_attr_put_search_ctx(ctx);
505unm_err_out: 505unm_err_out:
@@ -1344,7 +1344,7 @@ static int ntfs_writepage(struct page *page, struct writeback_control *wbc)
1344 loff_t i_size; 1344 loff_t i_size;
1345 struct inode *vi = page->mapping->host; 1345 struct inode *vi = page->mapping->host;
1346 ntfs_inode *base_ni = NULL, *ni = NTFS_I(vi); 1346 ntfs_inode *base_ni = NULL, *ni = NTFS_I(vi);
1347 char *kaddr; 1347 char *addr;
1348 ntfs_attr_search_ctx *ctx = NULL; 1348 ntfs_attr_search_ctx *ctx = NULL;
1349 MFT_RECORD *m = NULL; 1349 MFT_RECORD *m = NULL;
1350 u32 attr_len; 1350 u32 attr_len;
@@ -1484,14 +1484,14 @@ retry_writepage:
1484 /* Shrinking cannot fail. */ 1484 /* Shrinking cannot fail. */
1485 BUG_ON(err); 1485 BUG_ON(err);
1486 } 1486 }
1487 kaddr = kmap_atomic(page, KM_USER0); 1487 addr = kmap_atomic(page, KM_USER0);
1488 /* Copy the data from the page to the mft record. */ 1488 /* Copy the data from the page to the mft record. */
1489 memcpy((u8*)ctx->attr + 1489 memcpy((u8*)ctx->attr +
1490 le16_to_cpu(ctx->attr->data.resident.value_offset), 1490 le16_to_cpu(ctx->attr->data.resident.value_offset),
1491 kaddr, attr_len); 1491 addr, attr_len);
1492 /* Zero out of bounds area in the page cache page. */ 1492 /* Zero out of bounds area in the page cache page. */
1493 memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len); 1493 memset(addr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
1494 kunmap_atomic(kaddr, KM_USER0); 1494 kunmap_atomic(addr, KM_USER0);
1495 flush_dcache_page(page); 1495 flush_dcache_page(page);
1496 flush_dcache_mft_record_page(ctx->ntfs_ino); 1496 flush_dcache_mft_record_page(ctx->ntfs_ino);
1497 /* We are done with the page. */ 1497 /* We are done with the page. */
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index 1c08fefe487a..92dabdcf2b80 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -1,7 +1,7 @@
1/** 1/**
2 * attrib.c - NTFS attribute operations. Part of the Linux-NTFS project. 2 * attrib.c - NTFS attribute operations. Part of the Linux-NTFS project.
3 * 3 *
4 * Copyright (c) 2001-2006 Anton Altaparmakov 4 * Copyright (c) 2001-2007 Anton Altaparmakov
5 * Copyright (c) 2002 Richard Russon 5 * Copyright (c) 2002 Richard Russon
6 * 6 *
7 * This program/include file is free software; you can redistribute it and/or 7 * This program/include file is free software; you can redistribute it and/or
@@ -2500,7 +2500,7 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val)
2500 struct page *page; 2500 struct page *page;
2501 u8 *kaddr; 2501 u8 *kaddr;
2502 pgoff_t idx, end; 2502 pgoff_t idx, end;
2503 unsigned int start_ofs, end_ofs, size; 2503 unsigned start_ofs, end_ofs, size;
2504 2504
2505 ntfs_debug("Entering for ofs 0x%llx, cnt 0x%llx, val 0x%hx.", 2505 ntfs_debug("Entering for ofs 0x%llx, cnt 0x%llx, val 0x%hx.",
2506 (long long)ofs, (long long)cnt, val); 2506 (long long)ofs, (long long)cnt, val);
@@ -2548,6 +2548,8 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val)
2548 kunmap_atomic(kaddr, KM_USER0); 2548 kunmap_atomic(kaddr, KM_USER0);
2549 set_page_dirty(page); 2549 set_page_dirty(page);
2550 page_cache_release(page); 2550 page_cache_release(page);
2551 balance_dirty_pages_ratelimited(mapping);
2552 cond_resched();
2551 if (idx == end) 2553 if (idx == end)
2552 goto done; 2554 goto done;
2553 idx++; 2555 idx++;
@@ -2604,6 +2606,8 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val)
2604 kunmap_atomic(kaddr, KM_USER0); 2606 kunmap_atomic(kaddr, KM_USER0);
2605 set_page_dirty(page); 2607 set_page_dirty(page);
2606 page_cache_release(page); 2608 page_cache_release(page);
2609 balance_dirty_pages_ratelimited(mapping);
2610 cond_resched();
2607 } 2611 }
2608done: 2612done:
2609 ntfs_debug("Done."); 2613 ntfs_debug("Done.");
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index ffcc504a1667..c814204d4ea0 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * file.c - NTFS kernel file operations. Part of the Linux-NTFS project. 2 * file.c - NTFS kernel file operations. Part of the Linux-NTFS project.
3 * 3 *
4 * Copyright (c) 2001-2006 Anton Altaparmakov 4 * Copyright (c) 2001-2007 Anton Altaparmakov
5 * 5 *
6 * This program/include file is free software; you can redistribute it and/or 6 * This program/include file is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as published 7 * modify it under the terms of the GNU General Public License as published
@@ -26,7 +26,6 @@
26#include <linux/swap.h> 26#include <linux/swap.h>
27#include <linux/uio.h> 27#include <linux/uio.h>
28#include <linux/writeback.h> 28#include <linux/writeback.h>
29#include <linux/sched.h>
30 29
31#include <asm/page.h> 30#include <asm/page.h>
32#include <asm/uaccess.h> 31#include <asm/uaccess.h>
@@ -362,7 +361,7 @@ static inline void ntfs_fault_in_pages_readable(const char __user *uaddr,
362 volatile char c; 361 volatile char c;
363 362
364 /* Set @end to the first byte outside the last page we care about. */ 363 /* Set @end to the first byte outside the last page we care about. */
365 end = (const char __user*)PAGE_ALIGN((ptrdiff_t __user)uaddr + bytes); 364 end = (const char __user*)PAGE_ALIGN((unsigned long)uaddr + bytes);
366 365
367 while (!__get_user(c, uaddr) && (uaddr += PAGE_SIZE, uaddr < end)) 366 while (!__get_user(c, uaddr) && (uaddr += PAGE_SIZE, uaddr < end))
368 ; 367 ;
@@ -532,7 +531,8 @@ static int ntfs_prepare_pages_for_non_resident_write(struct page **pages,
532 blocksize_bits = vol->sb->s_blocksize_bits; 531 blocksize_bits = vol->sb->s_blocksize_bits;
533 u = 0; 532 u = 0;
534 do { 533 do {
535 struct page *page = pages[u]; 534 page = pages[u];
535 BUG_ON(!page);
536 /* 536 /*
537 * create_empty_buffers() will create uptodate/dirty buffers if 537 * create_empty_buffers() will create uptodate/dirty buffers if
538 * the page is uptodate/dirty. 538 * the page is uptodate/dirty.
@@ -1291,7 +1291,7 @@ static inline size_t ntfs_copy_from_user(struct page **pages,
1291 size_t bytes) 1291 size_t bytes)
1292{ 1292{
1293 struct page **last_page = pages + nr_pages; 1293 struct page **last_page = pages + nr_pages;
1294 char *kaddr; 1294 char *addr;
1295 size_t total = 0; 1295 size_t total = 0;
1296 unsigned len; 1296 unsigned len;
1297 int left; 1297 int left;
@@ -1300,13 +1300,13 @@ static inline size_t ntfs_copy_from_user(struct page **pages,
1300 len = PAGE_CACHE_SIZE - ofs; 1300 len = PAGE_CACHE_SIZE - ofs;
1301 if (len > bytes) 1301 if (len > bytes)
1302 len = bytes; 1302 len = bytes;
1303 kaddr = kmap_atomic(*pages, KM_USER0); 1303 addr = kmap_atomic(*pages, KM_USER0);
1304 left = __copy_from_user_inatomic(kaddr + ofs, buf, len); 1304 left = __copy_from_user_inatomic(addr + ofs, buf, len);
1305 kunmap_atomic(kaddr, KM_USER0); 1305 kunmap_atomic(addr, KM_USER0);
1306 if (unlikely(left)) { 1306 if (unlikely(left)) {
1307 /* Do it the slow way. */ 1307 /* Do it the slow way. */
1308 kaddr = kmap(*pages); 1308 addr = kmap(*pages);
1309 left = __copy_from_user(kaddr + ofs, buf, len); 1309 left = __copy_from_user(addr + ofs, buf, len);
1310 kunmap(*pages); 1310 kunmap(*pages);
1311 if (unlikely(left)) 1311 if (unlikely(left))
1312 goto err_out; 1312 goto err_out;
@@ -1408,26 +1408,26 @@ static inline size_t ntfs_copy_from_user_iovec(struct page **pages,
1408 size_t *iov_ofs, size_t bytes) 1408 size_t *iov_ofs, size_t bytes)
1409{ 1409{
1410 struct page **last_page = pages + nr_pages; 1410 struct page **last_page = pages + nr_pages;
1411 char *kaddr; 1411 char *addr;
1412 size_t copied, len, total = 0; 1412 size_t copied, len, total = 0;
1413 1413
1414 do { 1414 do {
1415 len = PAGE_CACHE_SIZE - ofs; 1415 len = PAGE_CACHE_SIZE - ofs;
1416 if (len > bytes) 1416 if (len > bytes)
1417 len = bytes; 1417 len = bytes;
1418 kaddr = kmap_atomic(*pages, KM_USER0); 1418 addr = kmap_atomic(*pages, KM_USER0);
1419 copied = __ntfs_copy_from_user_iovec_inatomic(kaddr + ofs, 1419 copied = __ntfs_copy_from_user_iovec_inatomic(addr + ofs,
1420 *iov, *iov_ofs, len); 1420 *iov, *iov_ofs, len);
1421 kunmap_atomic(kaddr, KM_USER0); 1421 kunmap_atomic(addr, KM_USER0);
1422 if (unlikely(copied != len)) { 1422 if (unlikely(copied != len)) {
1423 /* Do it the slow way. */ 1423 /* Do it the slow way. */
1424 kaddr = kmap(*pages); 1424 addr = kmap(*pages);
1425 copied = __ntfs_copy_from_user_iovec_inatomic(kaddr + ofs, 1425 copied = __ntfs_copy_from_user_iovec_inatomic(addr + ofs,
1426 *iov, *iov_ofs, len); 1426 *iov, *iov_ofs, len);
1427 /* 1427 /*
1428 * Zero the rest of the target like __copy_from_user(). 1428 * Zero the rest of the target like __copy_from_user().
1429 */ 1429 */
1430 memset(kaddr + ofs + copied, 0, len - copied); 1430 memset(addr + ofs + copied, 0, len - copied);
1431 kunmap(*pages); 1431 kunmap(*pages);
1432 if (unlikely(copied != len)) 1432 if (unlikely(copied != len))
1433 goto err_out; 1433 goto err_out;
@@ -1735,8 +1735,6 @@ static int ntfs_commit_pages_after_write(struct page **pages,
1735 read_unlock_irqrestore(&ni->size_lock, flags); 1735 read_unlock_irqrestore(&ni->size_lock, flags);
1736 BUG_ON(initialized_size != i_size); 1736 BUG_ON(initialized_size != i_size);
1737 if (end > initialized_size) { 1737 if (end > initialized_size) {
1738 unsigned long flags;
1739
1740 write_lock_irqsave(&ni->size_lock, flags); 1738 write_lock_irqsave(&ni->size_lock, flags);
1741 ni->initialized_size = end; 1739 ni->initialized_size = end;
1742 i_size_write(vi, end); 1740 i_size_write(vi, end);
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index b532a730cec2..e9da092e2772 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -34,7 +34,6 @@
34#include "dir.h" 34#include "dir.h"
35#include "debug.h" 35#include "debug.h"
36#include "inode.h" 36#include "inode.h"
37#include "attrib.h"
38#include "lcnalloc.h" 37#include "lcnalloc.h"
39#include "malloc.h" 38#include "malloc.h"
40#include "mft.h" 39#include "mft.h"
@@ -2500,8 +2499,6 @@ retry_truncate:
2500 /* Resize the attribute record to best fit the new attribute size. */ 2499 /* Resize the attribute record to best fit the new attribute size. */
2501 if (new_size < vol->mft_record_size && 2500 if (new_size < vol->mft_record_size &&
2502 !ntfs_resident_attr_value_resize(m, a, new_size)) { 2501 !ntfs_resident_attr_value_resize(m, a, new_size)) {
2503 unsigned long flags;
2504
2505 /* The resize succeeded! */ 2502 /* The resize succeeded! */
2506 flush_dcache_mft_record_page(ctx->ntfs_ino); 2503 flush_dcache_mft_record_page(ctx->ntfs_ino);
2507 mark_mft_record_dirty(ctx->ntfs_ino); 2504 mark_mft_record_dirty(ctx->ntfs_ino);
diff --git a/fs/ntfs/logfile.c b/fs/ntfs/logfile.c
index acfed325f4ec..d7932e95b1fd 100644
--- a/fs/ntfs/logfile.c
+++ b/fs/ntfs/logfile.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * logfile.c - NTFS kernel journal handling. Part of the Linux-NTFS project. 2 * logfile.c - NTFS kernel journal handling. Part of the Linux-NTFS project.
3 * 3 *
4 * Copyright (c) 2002-2005 Anton Altaparmakov 4 * Copyright (c) 2002-2007 Anton Altaparmakov
5 * 5 *
6 * This program/include file is free software; you can redistribute it and/or 6 * This program/include file is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as published 7 * modify it under the terms of the GNU General Public License as published
@@ -724,24 +724,139 @@ bool ntfs_is_logfile_clean(struct inode *log_vi, const RESTART_PAGE_HEADER *rp)
724 */ 724 */
725bool ntfs_empty_logfile(struct inode *log_vi) 725bool ntfs_empty_logfile(struct inode *log_vi)
726{ 726{
727 ntfs_volume *vol = NTFS_SB(log_vi->i_sb); 727 VCN vcn, end_vcn;
728 ntfs_inode *log_ni = NTFS_I(log_vi);
729 ntfs_volume *vol = log_ni->vol;
730 struct super_block *sb = vol->sb;
731 runlist_element *rl;
732 unsigned long flags;
733 unsigned block_size, block_size_bits;
734 int err;
735 bool should_wait = true;
728 736
729 ntfs_debug("Entering."); 737 ntfs_debug("Entering.");
730 if (!NVolLogFileEmpty(vol)) { 738 if (NVolLogFileEmpty(vol)) {
731 int err; 739 ntfs_debug("Done.");
732 740 return true;
733 err = ntfs_attr_set(NTFS_I(log_vi), 0, i_size_read(log_vi),
734 0xff);
735 if (unlikely(err)) {
736 ntfs_error(vol->sb, "Failed to fill $LogFile with "
737 "0xff bytes (error code %i).", err);
738 return false;
739 }
740 /* Set the flag so we do not have to do it again on remount. */
741 NVolSetLogFileEmpty(vol);
742 } 741 }
742 /*
743 * We cannot use ntfs_attr_set() because we may be still in the middle
744 * of a mount operation. Thus we do the emptying by hand by first
745 * zapping the page cache pages for the $LogFile/$DATA attribute and
746 * then emptying each of the buffers in each of the clusters specified
747 * by the runlist by hand.
748 */
749 block_size = sb->s_blocksize;
750 block_size_bits = sb->s_blocksize_bits;
751 vcn = 0;
752 read_lock_irqsave(&log_ni->size_lock, flags);
753 end_vcn = (log_ni->initialized_size + vol->cluster_size_mask) >>
754 vol->cluster_size_bits;
755 read_unlock_irqrestore(&log_ni->size_lock, flags);
756 truncate_inode_pages(log_vi->i_mapping, 0);
757 down_write(&log_ni->runlist.lock);
758 rl = log_ni->runlist.rl;
759 if (unlikely(!rl || vcn < rl->vcn || !rl->length)) {
760map_vcn:
761 err = ntfs_map_runlist_nolock(log_ni, vcn, NULL);
762 if (err) {
763 ntfs_error(sb, "Failed to map runlist fragment (error "
764 "%d).", -err);
765 goto err;
766 }
767 rl = log_ni->runlist.rl;
768 BUG_ON(!rl || vcn < rl->vcn || !rl->length);
769 }
770 /* Seek to the runlist element containing @vcn. */
771 while (rl->length && vcn >= rl[1].vcn)
772 rl++;
773 do {
774 LCN lcn;
775 sector_t block, end_block;
776 s64 len;
777
778 /*
779 * If this run is not mapped map it now and start again as the
780 * runlist will have been updated.
781 */
782 lcn = rl->lcn;
783 if (unlikely(lcn == LCN_RL_NOT_MAPPED)) {
784 vcn = rl->vcn;
785 goto map_vcn;
786 }
787 /* If this run is not valid abort with an error. */
788 if (unlikely(!rl->length || lcn < LCN_HOLE))
789 goto rl_err;
790 /* Skip holes. */
791 if (lcn == LCN_HOLE)
792 continue;
793 block = lcn << vol->cluster_size_bits >> block_size_bits;
794 len = rl->length;
795 if (rl[1].vcn > end_vcn)
796 len = end_vcn - rl->vcn;
797 end_block = (lcn + len) << vol->cluster_size_bits >>
798 block_size_bits;
799 /* Iterate over the blocks in the run and empty them. */
800 do {
801 struct buffer_head *bh;
802
803 /* Obtain the buffer, possibly not uptodate. */
804 bh = sb_getblk(sb, block);
805 BUG_ON(!bh);
806 /* Setup buffer i/o submission. */
807 lock_buffer(bh);
808 bh->b_end_io = end_buffer_write_sync;
809 get_bh(bh);
810 /* Set the entire contents of the buffer to 0xff. */
811 memset(bh->b_data, -1, block_size);
812 if (!buffer_uptodate(bh))
813 set_buffer_uptodate(bh);
814 if (buffer_dirty(bh))
815 clear_buffer_dirty(bh);
816 /*
817 * Submit the buffer and wait for i/o to complete but
818 * only for the first buffer so we do not miss really
819 * serious i/o errors. Once the first buffer has
820 * completed ignore errors afterwards as we can assume
821 * that if one buffer worked all of them will work.
822 */
823 submit_bh(WRITE, bh);
824 if (should_wait) {
825 should_wait = false;
826 wait_on_buffer(bh);
827 if (unlikely(!buffer_uptodate(bh)))
828 goto io_err;
829 }
830 brelse(bh);
831 } while (++block < end_block);
832 } while ((++rl)->vcn < end_vcn);
833 up_write(&log_ni->runlist.lock);
834 /*
835 * Zap the pages again just in case any got instantiated whilst we were
836 * emptying the blocks by hand. FIXME: We may not have completed
837 * writing to all the buffer heads yet so this may happen too early.
838 * We really should use a kernel thread to do the emptying
839 * asynchronously and then we can also set the volume dirty and output
840 * an error message if emptying should fail.
841 */
842 truncate_inode_pages(log_vi->i_mapping, 0);
843 /* Set the flag so we do not have to do it again on remount. */
844 NVolSetLogFileEmpty(vol);
743 ntfs_debug("Done."); 845 ntfs_debug("Done.");
744 return true; 846 return true;
847io_err:
848 ntfs_error(sb, "Failed to write buffer. Unmount and run chkdsk.");
849 goto dirty_err;
850rl_err:
851 ntfs_error(sb, "Runlist is corrupt. Unmount and run chkdsk.");
852dirty_err:
853 NVolSetErrors(vol);
854 err = -EIO;
855err:
856 up_write(&log_ni->runlist.lock);
857 ntfs_error(sb, "Failed to fill $LogFile with 0xff bytes (error %d).",
858 -err);
859 return false;
745} 860}
746 861
747#endif /* NTFS_RW */ 862#endif /* NTFS_RW */
diff --git a/fs/ntfs/runlist.c b/fs/ntfs/runlist.c
index 9afd72c7ad0d..56a9a6d25a2a 100644
--- a/fs/ntfs/runlist.c
+++ b/fs/ntfs/runlist.c
@@ -1,7 +1,7 @@
1/** 1/**
2 * runlist.c - NTFS runlist handling code. Part of the Linux-NTFS project. 2 * runlist.c - NTFS runlist handling code. Part of the Linux-NTFS project.
3 * 3 *
4 * Copyright (c) 2001-2005 Anton Altaparmakov 4 * Copyright (c) 2001-2007 Anton Altaparmakov
5 * Copyright (c) 2002-2005 Richard Russon 5 * Copyright (c) 2002-2005 Richard Russon
6 * 6 *
7 * This program/include file is free software; you can redistribute it and/or 7 * This program/include file is free software; you can redistribute it and/or
@@ -1714,7 +1714,7 @@ extend_hole:
1714 sizeof(*rl)); 1714 sizeof(*rl));
1715 /* Adjust the beginning of the tail if necessary. */ 1715 /* Adjust the beginning of the tail if necessary. */
1716 if (end > rl->vcn) { 1716 if (end > rl->vcn) {
1717 s64 delta = end - rl->vcn; 1717 delta = end - rl->vcn;
1718 rl->vcn = end; 1718 rl->vcn = end;
1719 rl->length -= delta; 1719 rl->length -= delta;
1720 /* Only adjust the lcn if it is real. */ 1720 /* Only adjust the lcn if it is real. */
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 778a850b4634..4ba7f0bdc248 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -354,7 +354,6 @@ struct ocfs2_insert_type {
354 enum ocfs2_append_type ins_appending; 354 enum ocfs2_append_type ins_appending;
355 enum ocfs2_contig_type ins_contig; 355 enum ocfs2_contig_type ins_contig;
356 int ins_contig_index; 356 int ins_contig_index;
357 int ins_free_records;
358 int ins_tree_depth; 357 int ins_tree_depth;
359}; 358};
360 359
@@ -362,7 +361,6 @@ struct ocfs2_merge_ctxt {
362 enum ocfs2_contig_type c_contig_type; 361 enum ocfs2_contig_type c_contig_type;
363 int c_has_empty_extent; 362 int c_has_empty_extent;
364 int c_split_covers_rec; 363 int c_split_covers_rec;
365 int c_used_tail_recs;
366}; 364};
367 365
368/* 366/*
@@ -2808,36 +2806,28 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
2808 struct ocfs2_merge_ctxt *ctxt) 2806 struct ocfs2_merge_ctxt *ctxt)
2809 2807
2810{ 2808{
2811 int ret = 0, delete_tail_recs = 0; 2809 int ret = 0;
2812 struct ocfs2_extent_list *el = path_leaf_el(left_path); 2810 struct ocfs2_extent_list *el = path_leaf_el(left_path);
2813 struct ocfs2_extent_rec *rec = &el->l_recs[split_index]; 2811 struct ocfs2_extent_rec *rec = &el->l_recs[split_index];
2814 2812
2815 BUG_ON(ctxt->c_contig_type == CONTIG_NONE); 2813 BUG_ON(ctxt->c_contig_type == CONTIG_NONE);
2816 2814
2817 if (ctxt->c_split_covers_rec) { 2815 if (ctxt->c_split_covers_rec && ctxt->c_has_empty_extent) {
2818 delete_tail_recs++; 2816 /*
2819 2817 * The merge code will need to create an empty
2820 if (ctxt->c_contig_type == CONTIG_LEFTRIGHT || 2818 * extent to take the place of the newly
2821 ctxt->c_has_empty_extent) 2819 * emptied slot. Remove any pre-existing empty
2822 delete_tail_recs++; 2820 * extents - having more than one in a leaf is
2823 2821 * illegal.
2824 if (ctxt->c_has_empty_extent) { 2822 */
2825 /* 2823 ret = ocfs2_rotate_tree_left(inode, handle, left_path,
2826 * The merge code will need to create an empty 2824 dealloc);
2827 * extent to take the place of the newly 2825 if (ret) {
2828 * emptied slot. Remove any pre-existing empty 2826 mlog_errno(ret);
2829 * extents - having more than one in a leaf is 2827 goto out;
2830 * illegal.
2831 */
2832 ret = ocfs2_rotate_tree_left(inode, handle, left_path,
2833 dealloc);
2834 if (ret) {
2835 mlog_errno(ret);
2836 goto out;
2837 }
2838 split_index--;
2839 rec = &el->l_recs[split_index];
2840 } 2828 }
2829 split_index--;
2830 rec = &el->l_recs[split_index];
2841 } 2831 }
2842 2832
2843 if (ctxt->c_contig_type == CONTIG_LEFTRIGHT) { 2833 if (ctxt->c_contig_type == CONTIG_LEFTRIGHT) {
@@ -3593,6 +3583,7 @@ static int ocfs2_figure_insert_type(struct inode *inode,
3593 struct buffer_head *di_bh, 3583 struct buffer_head *di_bh,
3594 struct buffer_head **last_eb_bh, 3584 struct buffer_head **last_eb_bh,
3595 struct ocfs2_extent_rec *insert_rec, 3585 struct ocfs2_extent_rec *insert_rec,
3586 int *free_records,
3596 struct ocfs2_insert_type *insert) 3587 struct ocfs2_insert_type *insert)
3597{ 3588{
3598 int ret; 3589 int ret;
@@ -3633,7 +3624,7 @@ static int ocfs2_figure_insert_type(struct inode *inode,
3633 * XXX: This test is simplistic, we can search for empty 3624 * XXX: This test is simplistic, we can search for empty
3634 * extent records too. 3625 * extent records too.
3635 */ 3626 */
3636 insert->ins_free_records = le16_to_cpu(el->l_count) - 3627 *free_records = le16_to_cpu(el->l_count) -
3637 le16_to_cpu(el->l_next_free_rec); 3628 le16_to_cpu(el->l_next_free_rec);
3638 3629
3639 if (!insert->ins_tree_depth) { 3630 if (!insert->ins_tree_depth) {
@@ -3730,10 +3721,13 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
3730 struct ocfs2_alloc_context *meta_ac) 3721 struct ocfs2_alloc_context *meta_ac)
3731{ 3722{
3732 int status; 3723 int status;
3724 int uninitialized_var(free_records);
3733 struct buffer_head *last_eb_bh = NULL; 3725 struct buffer_head *last_eb_bh = NULL;
3734 struct ocfs2_insert_type insert = {0, }; 3726 struct ocfs2_insert_type insert = {0, };
3735 struct ocfs2_extent_rec rec; 3727 struct ocfs2_extent_rec rec;
3736 3728
3729 BUG_ON(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL);
3730
3737 mlog(0, "add %u clusters at position %u to inode %llu\n", 3731 mlog(0, "add %u clusters at position %u to inode %llu\n",
3738 new_clusters, cpos, (unsigned long long)OCFS2_I(inode)->ip_blkno); 3732 new_clusters, cpos, (unsigned long long)OCFS2_I(inode)->ip_blkno);
3739 3733
@@ -3752,7 +3746,7 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
3752 rec.e_flags = flags; 3746 rec.e_flags = flags;
3753 3747
3754 status = ocfs2_figure_insert_type(inode, fe_bh, &last_eb_bh, &rec, 3748 status = ocfs2_figure_insert_type(inode, fe_bh, &last_eb_bh, &rec,
3755 &insert); 3749 &free_records, &insert);
3756 if (status < 0) { 3750 if (status < 0) {
3757 mlog_errno(status); 3751 mlog_errno(status);
3758 goto bail; 3752 goto bail;
@@ -3762,9 +3756,9 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
3762 "Insert.contig_index: %d, Insert.free_records: %d, " 3756 "Insert.contig_index: %d, Insert.free_records: %d, "
3763 "Insert.tree_depth: %d\n", 3757 "Insert.tree_depth: %d\n",
3764 insert.ins_appending, insert.ins_contig, insert.ins_contig_index, 3758 insert.ins_appending, insert.ins_contig, insert.ins_contig_index,
3765 insert.ins_free_records, insert.ins_tree_depth); 3759 free_records, insert.ins_tree_depth);
3766 3760
3767 if (insert.ins_contig == CONTIG_NONE && insert.ins_free_records == 0) { 3761 if (insert.ins_contig == CONTIG_NONE && free_records == 0) {
3768 status = ocfs2_grow_tree(inode, handle, fe_bh, 3762 status = ocfs2_grow_tree(inode, handle, fe_bh,
3769 &insert.ins_tree_depth, &last_eb_bh, 3763 &insert.ins_tree_depth, &last_eb_bh,
3770 meta_ac); 3764 meta_ac);
@@ -3847,26 +3841,17 @@ leftright:
3847 3841
3848 if (le16_to_cpu(rightmost_el->l_next_free_rec) == 3842 if (le16_to_cpu(rightmost_el->l_next_free_rec) ==
3849 le16_to_cpu(rightmost_el->l_count)) { 3843 le16_to_cpu(rightmost_el->l_count)) {
3850 int old_depth = depth;
3851
3852 ret = ocfs2_grow_tree(inode, handle, di_bh, &depth, last_eb_bh, 3844 ret = ocfs2_grow_tree(inode, handle, di_bh, &depth, last_eb_bh,
3853 meta_ac); 3845 meta_ac);
3854 if (ret) { 3846 if (ret) {
3855 mlog_errno(ret); 3847 mlog_errno(ret);
3856 goto out; 3848 goto out;
3857 } 3849 }
3858
3859 if (old_depth != depth) {
3860 eb = (struct ocfs2_extent_block *)(*last_eb_bh)->b_data;
3861 rightmost_el = &eb->h_list;
3862 }
3863 } 3850 }
3864 3851
3865 memset(&insert, 0, sizeof(struct ocfs2_insert_type)); 3852 memset(&insert, 0, sizeof(struct ocfs2_insert_type));
3866 insert.ins_appending = APPEND_NONE; 3853 insert.ins_appending = APPEND_NONE;
3867 insert.ins_contig = CONTIG_NONE; 3854 insert.ins_contig = CONTIG_NONE;
3868 insert.ins_free_records = le16_to_cpu(rightmost_el->l_count)
3869 - le16_to_cpu(rightmost_el->l_next_free_rec);
3870 insert.ins_tree_depth = depth; 3855 insert.ins_tree_depth = depth;
3871 3856
3872 insert_range = le32_to_cpu(split_rec.e_cpos) + 3857 insert_range = le32_to_cpu(split_rec.e_cpos) +
@@ -4015,11 +4000,6 @@ static int __ocfs2_mark_extent_written(struct inode *inode,
4015 } else 4000 } else
4016 rightmost_el = path_root_el(path); 4001 rightmost_el = path_root_el(path);
4017 4002
4018 ctxt.c_used_tail_recs = le16_to_cpu(rightmost_el->l_next_free_rec);
4019 if (ctxt.c_used_tail_recs > 0 &&
4020 ocfs2_is_empty_extent(&rightmost_el->l_recs[0]))
4021 ctxt.c_used_tail_recs--;
4022
4023 if (rec->e_cpos == split_rec->e_cpos && 4003 if (rec->e_cpos == split_rec->e_cpos &&
4024 rec->e_leaf_clusters == split_rec->e_leaf_clusters) 4004 rec->e_leaf_clusters == split_rec->e_leaf_clusters)
4025 ctxt.c_split_covers_rec = 1; 4005 ctxt.c_split_covers_rec = 1;
@@ -4028,10 +4008,9 @@ static int __ocfs2_mark_extent_written(struct inode *inode,
4028 4008
4029 ctxt.c_has_empty_extent = ocfs2_is_empty_extent(&el->l_recs[0]); 4009 ctxt.c_has_empty_extent = ocfs2_is_empty_extent(&el->l_recs[0]);
4030 4010
4031 mlog(0, "index: %d, contig: %u, used_tail_recs: %u, " 4011 mlog(0, "index: %d, contig: %u, has_empty: %u, split_covers: %u\n",
4032 "has_empty: %u, split_covers: %u\n", split_index, 4012 split_index, ctxt.c_contig_type, ctxt.c_has_empty_extent,
4033 ctxt.c_contig_type, ctxt.c_used_tail_recs, 4013 ctxt.c_split_covers_rec);
4034 ctxt.c_has_empty_extent, ctxt.c_split_covers_rec);
4035 4014
4036 if (ctxt.c_contig_type == CONTIG_NONE) { 4015 if (ctxt.c_contig_type == CONTIG_NONE) {
4037 if (ctxt.c_split_covers_rec) 4016 if (ctxt.c_split_covers_rec)
@@ -4180,27 +4159,18 @@ static int ocfs2_split_tree(struct inode *inode, struct buffer_head *di_bh,
4180 4159
4181 if (le16_to_cpu(rightmost_el->l_next_free_rec) == 4160 if (le16_to_cpu(rightmost_el->l_next_free_rec) ==
4182 le16_to_cpu(rightmost_el->l_count)) { 4161 le16_to_cpu(rightmost_el->l_count)) {
4183 int old_depth = depth;
4184
4185 ret = ocfs2_grow_tree(inode, handle, di_bh, &depth, &last_eb_bh, 4162 ret = ocfs2_grow_tree(inode, handle, di_bh, &depth, &last_eb_bh,
4186 meta_ac); 4163 meta_ac);
4187 if (ret) { 4164 if (ret) {
4188 mlog_errno(ret); 4165 mlog_errno(ret);
4189 goto out; 4166 goto out;
4190 } 4167 }
4191
4192 if (old_depth != depth) {
4193 eb = (struct ocfs2_extent_block *)last_eb_bh->b_data;
4194 rightmost_el = &eb->h_list;
4195 }
4196 } 4168 }
4197 4169
4198 memset(&insert, 0, sizeof(struct ocfs2_insert_type)); 4170 memset(&insert, 0, sizeof(struct ocfs2_insert_type));
4199 insert.ins_appending = APPEND_NONE; 4171 insert.ins_appending = APPEND_NONE;
4200 insert.ins_contig = CONTIG_NONE; 4172 insert.ins_contig = CONTIG_NONE;
4201 insert.ins_split = SPLIT_RIGHT; 4173 insert.ins_split = SPLIT_RIGHT;
4202 insert.ins_free_records = le16_to_cpu(rightmost_el->l_count)
4203 - le16_to_cpu(rightmost_el->l_next_free_rec);
4204 insert.ins_tree_depth = depth; 4174 insert.ins_tree_depth = depth;
4205 4175
4206 ret = ocfs2_do_insert_extent(inode, handle, di_bh, &split_rec, &insert); 4176 ret = ocfs2_do_insert_extent(inode, handle, di_bh, &split_rec, &insert);
@@ -5665,12 +5635,50 @@ static int ocfs2_ordered_zero_func(handle_t *handle, struct buffer_head *bh)
5665 return ocfs2_journal_dirty_data(handle, bh); 5635 return ocfs2_journal_dirty_data(handle, bh);
5666} 5636}
5667 5637
5638static void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle,
5639 unsigned int from, unsigned int to,
5640 struct page *page, int zero, u64 *phys)
5641{
5642 int ret, partial = 0;
5643
5644 ret = ocfs2_map_page_blocks(page, phys, inode, from, to, 0);
5645 if (ret)
5646 mlog_errno(ret);
5647
5648 if (zero)
5649 zero_user_page(page, from, to - from, KM_USER0);
5650
5651 /*
5652 * Need to set the buffers we zero'd into uptodate
5653 * here if they aren't - ocfs2_map_page_blocks()
5654 * might've skipped some
5655 */
5656 if (ocfs2_should_order_data(inode)) {
5657 ret = walk_page_buffers(handle,
5658 page_buffers(page),
5659 from, to, &partial,
5660 ocfs2_ordered_zero_func);
5661 if (ret < 0)
5662 mlog_errno(ret);
5663 } else {
5664 ret = walk_page_buffers(handle, page_buffers(page),
5665 from, to, &partial,
5666 ocfs2_writeback_zero_func);
5667 if (ret < 0)
5668 mlog_errno(ret);
5669 }
5670
5671 if (!partial)
5672 SetPageUptodate(page);
5673
5674 flush_dcache_page(page);
5675}
5676
5668static void ocfs2_zero_cluster_pages(struct inode *inode, loff_t start, 5677static void ocfs2_zero_cluster_pages(struct inode *inode, loff_t start,
5669 loff_t end, struct page **pages, 5678 loff_t end, struct page **pages,
5670 int numpages, u64 phys, handle_t *handle) 5679 int numpages, u64 phys, handle_t *handle)
5671{ 5680{
5672 int i, ret, partial = 0; 5681 int i;
5673 void *kaddr;
5674 struct page *page; 5682 struct page *page;
5675 unsigned int from, to = PAGE_CACHE_SIZE; 5683 unsigned int from, to = PAGE_CACHE_SIZE;
5676 struct super_block *sb = inode->i_sb; 5684 struct super_block *sb = inode->i_sb;
@@ -5691,87 +5699,31 @@ static void ocfs2_zero_cluster_pages(struct inode *inode, loff_t start,
5691 BUG_ON(from > PAGE_CACHE_SIZE); 5699 BUG_ON(from > PAGE_CACHE_SIZE);
5692 BUG_ON(to > PAGE_CACHE_SIZE); 5700 BUG_ON(to > PAGE_CACHE_SIZE);
5693 5701
5694 ret = ocfs2_map_page_blocks(page, &phys, inode, from, to, 0); 5702 ocfs2_map_and_dirty_page(inode, handle, from, to, page, 1,
5695 if (ret) 5703 &phys);
5696 mlog_errno(ret);
5697
5698 kaddr = kmap_atomic(page, KM_USER0);
5699 memset(kaddr + from, 0, to - from);
5700 kunmap_atomic(kaddr, KM_USER0);
5701
5702 /*
5703 * Need to set the buffers we zero'd into uptodate
5704 * here if they aren't - ocfs2_map_page_blocks()
5705 * might've skipped some
5706 */
5707 if (ocfs2_should_order_data(inode)) {
5708 ret = walk_page_buffers(handle,
5709 page_buffers(page),
5710 from, to, &partial,
5711 ocfs2_ordered_zero_func);
5712 if (ret < 0)
5713 mlog_errno(ret);
5714 } else {
5715 ret = walk_page_buffers(handle, page_buffers(page),
5716 from, to, &partial,
5717 ocfs2_writeback_zero_func);
5718 if (ret < 0)
5719 mlog_errno(ret);
5720 }
5721
5722 if (!partial)
5723 SetPageUptodate(page);
5724
5725 flush_dcache_page(page);
5726 5704
5727 start = (page->index + 1) << PAGE_CACHE_SHIFT; 5705 start = (page->index + 1) << PAGE_CACHE_SHIFT;
5728 } 5706 }
5729out: 5707out:
5730 if (pages) { 5708 if (pages)
5731 for (i = 0; i < numpages; i++) { 5709 ocfs2_unlock_and_free_pages(pages, numpages);
5732 page = pages[i];
5733 unlock_page(page);
5734 mark_page_accessed(page);
5735 page_cache_release(page);
5736 }
5737 }
5738} 5710}
5739 5711
5740static int ocfs2_grab_eof_pages(struct inode *inode, loff_t start, loff_t end, 5712static int ocfs2_grab_eof_pages(struct inode *inode, loff_t start, loff_t end,
5741 struct page **pages, int *num, u64 *phys) 5713 struct page **pages, int *num)
5742{ 5714{
5743 int i, numpages = 0, ret = 0; 5715 int numpages, ret = 0;
5744 unsigned int ext_flags;
5745 struct super_block *sb = inode->i_sb; 5716 struct super_block *sb = inode->i_sb;
5746 struct address_space *mapping = inode->i_mapping; 5717 struct address_space *mapping = inode->i_mapping;
5747 unsigned long index; 5718 unsigned long index;
5748 loff_t last_page_bytes; 5719 loff_t last_page_bytes;
5749 5720
5750 BUG_ON(!ocfs2_sparse_alloc(OCFS2_SB(sb)));
5751 BUG_ON(start > end); 5721 BUG_ON(start > end);
5752 5722
5753 if (start == end)
5754 goto out;
5755
5756 BUG_ON(start >> OCFS2_SB(sb)->s_clustersize_bits != 5723 BUG_ON(start >> OCFS2_SB(sb)->s_clustersize_bits !=
5757 (end - 1) >> OCFS2_SB(sb)->s_clustersize_bits); 5724 (end - 1) >> OCFS2_SB(sb)->s_clustersize_bits);
5758 5725
5759 ret = ocfs2_extent_map_get_blocks(inode, start >> sb->s_blocksize_bits, 5726 numpages = 0;
5760 phys, NULL, &ext_flags);
5761 if (ret) {
5762 mlog_errno(ret);
5763 goto out;
5764 }
5765
5766 /* Tail is a hole. */
5767 if (*phys == 0)
5768 goto out;
5769
5770 /* Tail is marked as unwritten, we can count on write to zero
5771 * in that case. */
5772 if (ext_flags & OCFS2_EXT_UNWRITTEN)
5773 goto out;
5774
5775 last_page_bytes = PAGE_ALIGN(end); 5727 last_page_bytes = PAGE_ALIGN(end);
5776 index = start >> PAGE_CACHE_SHIFT; 5728 index = start >> PAGE_CACHE_SHIFT;
5777 do { 5729 do {
@@ -5788,14 +5740,8 @@ static int ocfs2_grab_eof_pages(struct inode *inode, loff_t start, loff_t end,
5788 5740
5789out: 5741out:
5790 if (ret != 0) { 5742 if (ret != 0) {
5791 if (pages) { 5743 if (pages)
5792 for (i = 0; i < numpages; i++) { 5744 ocfs2_unlock_and_free_pages(pages, numpages);
5793 if (pages[i]) {
5794 unlock_page(pages[i]);
5795 page_cache_release(pages[i]);
5796 }
5797 }
5798 }
5799 numpages = 0; 5745 numpages = 0;
5800 } 5746 }
5801 5747
@@ -5816,18 +5762,20 @@ out:
5816int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle, 5762int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle,
5817 u64 range_start, u64 range_end) 5763 u64 range_start, u64 range_end)
5818{ 5764{
5819 int ret, numpages; 5765 int ret = 0, numpages;
5820 struct page **pages = NULL; 5766 struct page **pages = NULL;
5821 u64 phys; 5767 u64 phys;
5768 unsigned int ext_flags;
5769 struct super_block *sb = inode->i_sb;
5822 5770
5823 /* 5771 /*
5824 * File systems which don't support sparse files zero on every 5772 * File systems which don't support sparse files zero on every
5825 * extend. 5773 * extend.
5826 */ 5774 */
5827 if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) 5775 if (!ocfs2_sparse_alloc(OCFS2_SB(sb)))
5828 return 0; 5776 return 0;
5829 5777
5830 pages = kcalloc(ocfs2_pages_per_cluster(inode->i_sb), 5778 pages = kcalloc(ocfs2_pages_per_cluster(sb),
5831 sizeof(struct page *), GFP_NOFS); 5779 sizeof(struct page *), GFP_NOFS);
5832 if (pages == NULL) { 5780 if (pages == NULL) {
5833 ret = -ENOMEM; 5781 ret = -ENOMEM;
@@ -5835,16 +5783,31 @@ int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle,
5835 goto out; 5783 goto out;
5836 } 5784 }
5837 5785
5838 ret = ocfs2_grab_eof_pages(inode, range_start, range_end, pages, 5786 if (range_start == range_end)
5839 &numpages, &phys); 5787 goto out;
5788
5789 ret = ocfs2_extent_map_get_blocks(inode,
5790 range_start >> sb->s_blocksize_bits,
5791 &phys, NULL, &ext_flags);
5840 if (ret) { 5792 if (ret) {
5841 mlog_errno(ret); 5793 mlog_errno(ret);
5842 goto out; 5794 goto out;
5843 } 5795 }
5844 5796
5845 if (numpages == 0) 5797 /*
5798 * Tail is a hole, or is marked unwritten. In either case, we
5799 * can count on read and write to return/push zero's.
5800 */
5801 if (phys == 0 || ext_flags & OCFS2_EXT_UNWRITTEN)
5846 goto out; 5802 goto out;
5847 5803
5804 ret = ocfs2_grab_eof_pages(inode, range_start, range_end, pages,
5805 &numpages);
5806 if (ret) {
5807 mlog_errno(ret);
5808 goto out;
5809 }
5810
5848 ocfs2_zero_cluster_pages(inode, range_start, range_end, pages, 5811 ocfs2_zero_cluster_pages(inode, range_start, range_end, pages,
5849 numpages, phys, handle); 5812 numpages, phys, handle);
5850 5813
@@ -5865,6 +5828,178 @@ out:
5865 return ret; 5828 return ret;
5866} 5829}
5867 5830
5831static void ocfs2_zero_dinode_id2(struct inode *inode, struct ocfs2_dinode *di)
5832{
5833 unsigned int blocksize = 1 << inode->i_sb->s_blocksize_bits;
5834
5835 memset(&di->id2, 0, blocksize - offsetof(struct ocfs2_dinode, id2));
5836}
5837
5838void ocfs2_dinode_new_extent_list(struct inode *inode,
5839 struct ocfs2_dinode *di)
5840{
5841 ocfs2_zero_dinode_id2(inode, di);
5842 di->id2.i_list.l_tree_depth = 0;
5843 di->id2.i_list.l_next_free_rec = 0;
5844 di->id2.i_list.l_count = cpu_to_le16(ocfs2_extent_recs_per_inode(inode->i_sb));
5845}
5846
5847void ocfs2_set_inode_data_inline(struct inode *inode, struct ocfs2_dinode *di)
5848{
5849 struct ocfs2_inode_info *oi = OCFS2_I(inode);
5850 struct ocfs2_inline_data *idata = &di->id2.i_data;
5851
5852 spin_lock(&oi->ip_lock);
5853 oi->ip_dyn_features |= OCFS2_INLINE_DATA_FL;
5854 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
5855 spin_unlock(&oi->ip_lock);
5856
5857 /*
5858 * We clear the entire i_data structure here so that all
5859 * fields can be properly initialized.
5860 */
5861 ocfs2_zero_dinode_id2(inode, di);
5862
5863 idata->id_count = cpu_to_le16(ocfs2_max_inline_data(inode->i_sb));
5864}
5865
5866int ocfs2_convert_inline_data_to_extents(struct inode *inode,
5867 struct buffer_head *di_bh)
5868{
5869 int ret, i, has_data, num_pages = 0;
5870 handle_t *handle;
5871 u64 uninitialized_var(block);
5872 struct ocfs2_inode_info *oi = OCFS2_I(inode);
5873 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5874 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
5875 struct ocfs2_alloc_context *data_ac = NULL;
5876 struct page **pages = NULL;
5877 loff_t end = osb->s_clustersize;
5878
5879 has_data = i_size_read(inode) ? 1 : 0;
5880
5881 if (has_data) {
5882 pages = kcalloc(ocfs2_pages_per_cluster(osb->sb),
5883 sizeof(struct page *), GFP_NOFS);
5884 if (pages == NULL) {
5885 ret = -ENOMEM;
5886 mlog_errno(ret);
5887 goto out;
5888 }
5889
5890 ret = ocfs2_reserve_clusters(osb, 1, &data_ac);
5891 if (ret) {
5892 mlog_errno(ret);
5893 goto out;
5894 }
5895 }
5896
5897 handle = ocfs2_start_trans(osb, OCFS2_INLINE_TO_EXTENTS_CREDITS);
5898 if (IS_ERR(handle)) {
5899 ret = PTR_ERR(handle);
5900 mlog_errno(ret);
5901 goto out_unlock;
5902 }
5903
5904 ret = ocfs2_journal_access(handle, inode, di_bh,
5905 OCFS2_JOURNAL_ACCESS_WRITE);
5906 if (ret) {
5907 mlog_errno(ret);
5908 goto out_commit;
5909 }
5910
5911 if (has_data) {
5912 u32 bit_off, num;
5913 unsigned int page_end;
5914 u64 phys;
5915
5916 ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off,
5917 &num);
5918 if (ret) {
5919 mlog_errno(ret);
5920 goto out_commit;
5921 }
5922
5923 /*
5924 * Save two copies, one for insert, and one that can
5925 * be changed by ocfs2_map_and_dirty_page() below.
5926 */
5927 block = phys = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
5928
5929 /*
5930 * Non sparse file systems zero on extend, so no need
5931 * to do that now.
5932 */
5933 if (!ocfs2_sparse_alloc(osb) &&
5934 PAGE_CACHE_SIZE < osb->s_clustersize)
5935 end = PAGE_CACHE_SIZE;
5936
5937 ret = ocfs2_grab_eof_pages(inode, 0, end, pages, &num_pages);
5938 if (ret) {
5939 mlog_errno(ret);
5940 goto out_commit;
5941 }
5942
5943 /*
5944 * This should populate the 1st page for us and mark
5945 * it up to date.
5946 */
5947 ret = ocfs2_read_inline_data(inode, pages[0], di_bh);
5948 if (ret) {
5949 mlog_errno(ret);
5950 goto out_commit;
5951 }
5952
5953 page_end = PAGE_CACHE_SIZE;
5954 if (PAGE_CACHE_SIZE > osb->s_clustersize)
5955 page_end = osb->s_clustersize;
5956
5957 for (i = 0; i < num_pages; i++)
5958 ocfs2_map_and_dirty_page(inode, handle, 0, page_end,
5959 pages[i], i > 0, &phys);
5960 }
5961
5962 spin_lock(&oi->ip_lock);
5963 oi->ip_dyn_features &= ~OCFS2_INLINE_DATA_FL;
5964 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
5965 spin_unlock(&oi->ip_lock);
5966
5967 ocfs2_dinode_new_extent_list(inode, di);
5968
5969 ocfs2_journal_dirty(handle, di_bh);
5970
5971 if (has_data) {
5972 /*
5973 * An error at this point should be extremely rare. If
5974 * this proves to be false, we could always re-build
5975 * the in-inode data from our pages.
5976 */
5977 ret = ocfs2_insert_extent(osb, handle, inode, di_bh,
5978 0, block, 1, 0, NULL);
5979 if (ret) {
5980 mlog_errno(ret);
5981 goto out_commit;
5982 }
5983
5984 inode->i_blocks = ocfs2_inode_sector_count(inode);
5985 }
5986
5987out_commit:
5988 ocfs2_commit_trans(osb, handle);
5989
5990out_unlock:
5991 if (data_ac)
5992 ocfs2_free_alloc_context(data_ac);
5993
5994out:
5995 if (pages) {
5996 ocfs2_unlock_and_free_pages(pages, num_pages);
5997 kfree(pages);
5998 }
5999
6000 return ret;
6001}
6002
5868/* 6003/*
5869 * It is expected, that by the time you call this function, 6004 * It is expected, that by the time you call this function,
5870 * inode->i_size and fe->i_size have been adjusted. 6005 * inode->i_size and fe->i_size have been adjusted.
@@ -6090,6 +6225,81 @@ bail:
6090 return status; 6225 return status;
6091} 6226}
6092 6227
6228/*
6229 * 'start' is inclusive, 'end' is not.
6230 */
6231int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
6232 unsigned int start, unsigned int end, int trunc)
6233{
6234 int ret;
6235 unsigned int numbytes;
6236 handle_t *handle;
6237 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
6238 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
6239 struct ocfs2_inline_data *idata = &di->id2.i_data;
6240
6241 if (end > i_size_read(inode))
6242 end = i_size_read(inode);
6243
6244 BUG_ON(start >= end);
6245
6246 if (!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) ||
6247 !(le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_DATA_FL) ||
6248 !ocfs2_supports_inline_data(osb)) {
6249 ocfs2_error(inode->i_sb,
6250 "Inline data flags for inode %llu don't agree! "
6251 "Disk: 0x%x, Memory: 0x%x, Superblock: 0x%x\n",
6252 (unsigned long long)OCFS2_I(inode)->ip_blkno,
6253 le16_to_cpu(di->i_dyn_features),
6254 OCFS2_I(inode)->ip_dyn_features,
6255 osb->s_feature_incompat);
6256 ret = -EROFS;
6257 goto out;
6258 }
6259
6260 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
6261 if (IS_ERR(handle)) {
6262 ret = PTR_ERR(handle);
6263 mlog_errno(ret);
6264 goto out;
6265 }
6266
6267 ret = ocfs2_journal_access(handle, inode, di_bh,
6268 OCFS2_JOURNAL_ACCESS_WRITE);
6269 if (ret) {
6270 mlog_errno(ret);
6271 goto out_commit;
6272 }
6273
6274 numbytes = end - start;
6275 memset(idata->id_data + start, 0, numbytes);
6276
6277 /*
6278 * No need to worry about the data page here - it's been
6279 * truncated already and inline data doesn't need it for
6280 * pushing zero's to disk, so we'll let readpage pick it up
6281 * later.
6282 */
6283 if (trunc) {
6284 i_size_write(inode, start);
6285 di->i_size = cpu_to_le64(start);
6286 }
6287
6288 inode->i_blocks = ocfs2_inode_sector_count(inode);
6289 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
6290
6291 di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec);
6292 di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
6293
6294 ocfs2_journal_dirty(handle, di_bh);
6295
6296out_commit:
6297 ocfs2_commit_trans(osb, handle);
6298
6299out:
6300 return ret;
6301}
6302
6093static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc) 6303static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc)
6094{ 6304{
6095 /* 6305 /*
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 990df48ae8d3..42ff94bd8011 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -62,6 +62,11 @@ static inline int ocfs2_extend_meta_needed(struct ocfs2_dinode *fe)
62 return le16_to_cpu(fe->id2.i_list.l_tree_depth) + 2; 62 return le16_to_cpu(fe->id2.i_list.l_tree_depth) + 2;
63} 63}
64 64
65void ocfs2_dinode_new_extent_list(struct inode *inode, struct ocfs2_dinode *di);
66void ocfs2_set_inode_data_inline(struct inode *inode, struct ocfs2_dinode *di);
67int ocfs2_convert_inline_data_to_extents(struct inode *inode,
68 struct buffer_head *di_bh);
69
65int ocfs2_truncate_log_init(struct ocfs2_super *osb); 70int ocfs2_truncate_log_init(struct ocfs2_super *osb);
66void ocfs2_truncate_log_shutdown(struct ocfs2_super *osb); 71void ocfs2_truncate_log_shutdown(struct ocfs2_super *osb);
67void ocfs2_schedule_truncate_log_flush(struct ocfs2_super *osb, 72void ocfs2_schedule_truncate_log_flush(struct ocfs2_super *osb,
@@ -115,6 +120,8 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb,
115 struct inode *inode, 120 struct inode *inode,
116 struct buffer_head *fe_bh, 121 struct buffer_head *fe_bh,
117 struct ocfs2_truncate_context *tc); 122 struct ocfs2_truncate_context *tc);
123int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
124 unsigned int start, unsigned int end, int trunc);
118 125
119int ocfs2_find_leaf(struct inode *inode, struct ocfs2_extent_list *root_el, 126int ocfs2_find_leaf(struct inode *inode, struct ocfs2_extent_list *root_el,
120 u32 cpos, struct buffer_head **leaf_bh); 127 u32 cpos, struct buffer_head **leaf_bh);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index f37f25c931f5..34d10452c56d 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -206,9 +206,70 @@ bail:
206 return err; 206 return err;
207} 207}
208 208
209int ocfs2_read_inline_data(struct inode *inode, struct page *page,
210 struct buffer_head *di_bh)
211{
212 void *kaddr;
213 unsigned int size;
214 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
215
216 if (!(le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_DATA_FL)) {
217 ocfs2_error(inode->i_sb, "Inode %llu lost inline data flag",
218 (unsigned long long)OCFS2_I(inode)->ip_blkno);
219 return -EROFS;
220 }
221
222 size = i_size_read(inode);
223
224 if (size > PAGE_CACHE_SIZE ||
225 size > ocfs2_max_inline_data(inode->i_sb)) {
226 ocfs2_error(inode->i_sb,
227 "Inode %llu has with inline data has bad size: %u",
228 (unsigned long long)OCFS2_I(inode)->ip_blkno, size);
229 return -EROFS;
230 }
231
232 kaddr = kmap_atomic(page, KM_USER0);
233 if (size)
234 memcpy(kaddr, di->id2.i_data.id_data, size);
235 /* Clear the remaining part of the page */
236 memset(kaddr + size, 0, PAGE_CACHE_SIZE - size);
237 flush_dcache_page(page);
238 kunmap_atomic(kaddr, KM_USER0);
239
240 SetPageUptodate(page);
241
242 return 0;
243}
244
245static int ocfs2_readpage_inline(struct inode *inode, struct page *page)
246{
247 int ret;
248 struct buffer_head *di_bh = NULL;
249 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
250
251 BUG_ON(!PageLocked(page));
252 BUG_ON(!OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL);
253
254 ret = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &di_bh,
255 OCFS2_BH_CACHED, inode);
256 if (ret) {
257 mlog_errno(ret);
258 goto out;
259 }
260
261 ret = ocfs2_read_inline_data(inode, page, di_bh);
262out:
263 unlock_page(page);
264
265 brelse(di_bh);
266 return ret;
267}
268
209static int ocfs2_readpage(struct file *file, struct page *page) 269static int ocfs2_readpage(struct file *file, struct page *page)
210{ 270{
211 struct inode *inode = page->mapping->host; 271 struct inode *inode = page->mapping->host;
272 struct ocfs2_inode_info *oi = OCFS2_I(inode);
212 loff_t start = (loff_t)page->index << PAGE_CACHE_SHIFT; 273 loff_t start = (loff_t)page->index << PAGE_CACHE_SHIFT;
213 int ret, unlock = 1; 274 int ret, unlock = 1;
214 275
@@ -222,7 +283,7 @@ static int ocfs2_readpage(struct file *file, struct page *page)
222 goto out; 283 goto out;
223 } 284 }
224 285
225 if (down_read_trylock(&OCFS2_I(inode)->ip_alloc_sem) == 0) { 286 if (down_read_trylock(&oi->ip_alloc_sem) == 0) {
226 ret = AOP_TRUNCATED_PAGE; 287 ret = AOP_TRUNCATED_PAGE;
227 goto out_meta_unlock; 288 goto out_meta_unlock;
228 } 289 }
@@ -252,7 +313,10 @@ static int ocfs2_readpage(struct file *file, struct page *page)
252 goto out_alloc; 313 goto out_alloc;
253 } 314 }
254 315
255 ret = block_read_full_page(page, ocfs2_get_block); 316 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL)
317 ret = ocfs2_readpage_inline(inode, page);
318 else
319 ret = block_read_full_page(page, ocfs2_get_block);
256 unlock = 0; 320 unlock = 0;
257 321
258 ocfs2_data_unlock(inode, 0); 322 ocfs2_data_unlock(inode, 0);
@@ -301,12 +365,8 @@ int ocfs2_prepare_write_nolock(struct inode *inode, struct page *page,
301{ 365{
302 int ret; 366 int ret;
303 367
304 down_read(&OCFS2_I(inode)->ip_alloc_sem);
305
306 ret = block_prepare_write(page, from, to, ocfs2_get_block); 368 ret = block_prepare_write(page, from, to, ocfs2_get_block);
307 369
308 up_read(&OCFS2_I(inode)->ip_alloc_sem);
309
310 return ret; 370 return ret;
311} 371}
312 372
@@ -401,7 +461,9 @@ static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block)
401 down_read(&OCFS2_I(inode)->ip_alloc_sem); 461 down_read(&OCFS2_I(inode)->ip_alloc_sem);
402 } 462 }
403 463
404 err = ocfs2_extent_map_get_blocks(inode, block, &p_blkno, NULL, NULL); 464 if (!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL))
465 err = ocfs2_extent_map_get_blocks(inode, block, &p_blkno, NULL,
466 NULL);
405 467
406 if (!INODE_JOURNAL(inode)) { 468 if (!INODE_JOURNAL(inode)) {
407 up_read(&OCFS2_I(inode)->ip_alloc_sem); 469 up_read(&OCFS2_I(inode)->ip_alloc_sem);
@@ -415,7 +477,6 @@ static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block)
415 goto bail; 477 goto bail;
416 } 478 }
417 479
418
419bail: 480bail:
420 status = err ? 0 : p_blkno; 481 status = err ? 0 : p_blkno;
421 482
@@ -570,6 +631,13 @@ static ssize_t ocfs2_direct_IO(int rw,
570 631
571 mlog_entry_void(); 632 mlog_entry_void();
572 633
634 /*
635 * Fallback to buffered I/O if we see an inode without
636 * extents.
637 */
638 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
639 return 0;
640
573 if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) { 641 if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) {
574 /* 642 /*
575 * We get PR data locks even for O_DIRECT. This 643 * We get PR data locks even for O_DIRECT. This
@@ -834,18 +902,22 @@ struct ocfs2_write_ctxt {
834 struct ocfs2_cached_dealloc_ctxt w_dealloc; 902 struct ocfs2_cached_dealloc_ctxt w_dealloc;
835}; 903};
836 904
837static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc) 905void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages)
838{ 906{
839 int i; 907 int i;
840 908
841 for(i = 0; i < wc->w_num_pages; i++) { 909 for(i = 0; i < num_pages; i++) {
842 if (wc->w_pages[i] == NULL) 910 if (pages[i]) {
843 continue; 911 unlock_page(pages[i]);
844 912 mark_page_accessed(pages[i]);
845 unlock_page(wc->w_pages[i]); 913 page_cache_release(pages[i]);
846 mark_page_accessed(wc->w_pages[i]); 914 }
847 page_cache_release(wc->w_pages[i]);
848 } 915 }
916}
917
918static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc)
919{
920 ocfs2_unlock_and_free_pages(wc->w_pages, wc->w_num_pages);
849 921
850 brelse(wc->w_di_bh); 922 brelse(wc->w_di_bh);
851 kfree(wc); 923 kfree(wc);
@@ -1360,6 +1432,160 @@ out:
1360 return ret; 1432 return ret;
1361} 1433}
1362 1434
1435static int ocfs2_write_begin_inline(struct address_space *mapping,
1436 struct inode *inode,
1437 struct ocfs2_write_ctxt *wc)
1438{
1439 int ret;
1440 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1441 struct page *page;
1442 handle_t *handle;
1443 struct ocfs2_dinode *di = (struct ocfs2_dinode *)wc->w_di_bh->b_data;
1444
1445 page = find_or_create_page(mapping, 0, GFP_NOFS);
1446 if (!page) {
1447 ret = -ENOMEM;
1448 mlog_errno(ret);
1449 goto out;
1450 }
1451 /*
1452 * If we don't set w_num_pages then this page won't get unlocked
1453 * and freed on cleanup of the write context.
1454 */
1455 wc->w_pages[0] = wc->w_target_page = page;
1456 wc->w_num_pages = 1;
1457
1458 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
1459 if (IS_ERR(handle)) {
1460 ret = PTR_ERR(handle);
1461 mlog_errno(ret);
1462 goto out;
1463 }
1464
1465 ret = ocfs2_journal_access(handle, inode, wc->w_di_bh,
1466 OCFS2_JOURNAL_ACCESS_WRITE);
1467 if (ret) {
1468 ocfs2_commit_trans(osb, handle);
1469
1470 mlog_errno(ret);
1471 goto out;
1472 }
1473
1474 if (!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL))
1475 ocfs2_set_inode_data_inline(inode, di);
1476
1477 if (!PageUptodate(page)) {
1478 ret = ocfs2_read_inline_data(inode, page, wc->w_di_bh);
1479 if (ret) {
1480 ocfs2_commit_trans(osb, handle);
1481
1482 goto out;
1483 }
1484 }
1485
1486 wc->w_handle = handle;
1487out:
1488 return ret;
1489}
1490
1491int ocfs2_size_fits_inline_data(struct buffer_head *di_bh, u64 new_size)
1492{
1493 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1494
1495 if (new_size < le16_to_cpu(di->id2.i_data.id_count))
1496 return 1;
1497 return 0;
1498}
1499
1500static int ocfs2_try_to_write_inline_data(struct address_space *mapping,
1501 struct inode *inode, loff_t pos,
1502 unsigned len, struct page *mmap_page,
1503 struct ocfs2_write_ctxt *wc)
1504{
1505 int ret, written = 0;
1506 loff_t end = pos + len;
1507 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1508
1509 mlog(0, "Inode %llu, write of %u bytes at off %llu. features: 0x%x\n",
1510 (unsigned long long)oi->ip_blkno, len, (unsigned long long)pos,
1511 oi->ip_dyn_features);
1512
1513 /*
1514 * Handle inodes which already have inline data 1st.
1515 */
1516 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1517 if (mmap_page == NULL &&
1518 ocfs2_size_fits_inline_data(wc->w_di_bh, end))
1519 goto do_inline_write;
1520
1521 /*
1522 * The write won't fit - we have to give this inode an
1523 * inline extent list now.
1524 */
1525 ret = ocfs2_convert_inline_data_to_extents(inode, wc->w_di_bh);
1526 if (ret)
1527 mlog_errno(ret);
1528 goto out;
1529 }
1530
1531 /*
1532 * Check whether the inode can accept inline data.
1533 */
1534 if (oi->ip_clusters != 0 || i_size_read(inode) != 0)
1535 return 0;
1536
1537 /*
1538 * Check whether the write can fit.
1539 */
1540 if (mmap_page || end > ocfs2_max_inline_data(inode->i_sb))
1541 return 0;
1542
1543do_inline_write:
1544 ret = ocfs2_write_begin_inline(mapping, inode, wc);
1545 if (ret) {
1546 mlog_errno(ret);
1547 goto out;
1548 }
1549
1550 /*
1551 * This signals to the caller that the data can be written
1552 * inline.
1553 */
1554 written = 1;
1555out:
1556 return written ? written : ret;
1557}
1558
1559/*
1560 * This function only does anything for file systems which can't
1561 * handle sparse files.
1562 *
1563 * What we want to do here is fill in any hole between the current end
1564 * of allocation and the end of our write. That way the rest of the
1565 * write path can treat it as an non-allocating write, which has no
1566 * special case code for sparse/nonsparse files.
1567 */
1568static int ocfs2_expand_nonsparse_inode(struct inode *inode, loff_t pos,
1569 unsigned len,
1570 struct ocfs2_write_ctxt *wc)
1571{
1572 int ret;
1573 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1574 loff_t newsize = pos + len;
1575
1576 if (ocfs2_sparse_alloc(osb))
1577 return 0;
1578
1579 if (newsize <= i_size_read(inode))
1580 return 0;
1581
1582 ret = ocfs2_extend_no_holes(inode, newsize, newsize - len);
1583 if (ret)
1584 mlog_errno(ret);
1585
1586 return ret;
1587}
1588
1363int ocfs2_write_begin_nolock(struct address_space *mapping, 1589int ocfs2_write_begin_nolock(struct address_space *mapping,
1364 loff_t pos, unsigned len, unsigned flags, 1590 loff_t pos, unsigned len, unsigned flags,
1365 struct page **pagep, void **fsdata, 1591 struct page **pagep, void **fsdata,
@@ -1381,6 +1607,25 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1381 return ret; 1607 return ret;
1382 } 1608 }
1383 1609
1610 if (ocfs2_supports_inline_data(osb)) {
1611 ret = ocfs2_try_to_write_inline_data(mapping, inode, pos, len,
1612 mmap_page, wc);
1613 if (ret == 1) {
1614 ret = 0;
1615 goto success;
1616 }
1617 if (ret < 0) {
1618 mlog_errno(ret);
1619 goto out;
1620 }
1621 }
1622
1623 ret = ocfs2_expand_nonsparse_inode(inode, pos, len, wc);
1624 if (ret) {
1625 mlog_errno(ret);
1626 goto out;
1627 }
1628
1384 ret = ocfs2_populate_write_desc(inode, wc, &clusters_to_alloc, 1629 ret = ocfs2_populate_write_desc(inode, wc, &clusters_to_alloc,
1385 &extents_to_split); 1630 &extents_to_split);
1386 if (ret) { 1631 if (ret) {
@@ -1462,6 +1707,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1462 if (meta_ac) 1707 if (meta_ac)
1463 ocfs2_free_alloc_context(meta_ac); 1708 ocfs2_free_alloc_context(meta_ac);
1464 1709
1710success:
1465 *pagep = wc->w_target_page; 1711 *pagep = wc->w_target_page;
1466 *fsdata = wc; 1712 *fsdata = wc;
1467 return 0; 1713 return 0;
@@ -1529,6 +1775,31 @@ out_fail:
1529 return ret; 1775 return ret;
1530} 1776}
1531 1777
1778static void ocfs2_write_end_inline(struct inode *inode, loff_t pos,
1779 unsigned len, unsigned *copied,
1780 struct ocfs2_dinode *di,
1781 struct ocfs2_write_ctxt *wc)
1782{
1783 void *kaddr;
1784
1785 if (unlikely(*copied < len)) {
1786 if (!PageUptodate(wc->w_target_page)) {
1787 *copied = 0;
1788 return;
1789 }
1790 }
1791
1792 kaddr = kmap_atomic(wc->w_target_page, KM_USER0);
1793 memcpy(di->id2.i_data.id_data + pos, kaddr + pos, *copied);
1794 kunmap_atomic(kaddr, KM_USER0);
1795
1796 mlog(0, "Data written to inode at offset %llu. "
1797 "id_count = %u, copied = %u, i_dyn_features = 0x%x\n",
1798 (unsigned long long)pos, *copied,
1799 le16_to_cpu(di->id2.i_data.id_count),
1800 le16_to_cpu(di->i_dyn_features));
1801}
1802
1532int ocfs2_write_end_nolock(struct address_space *mapping, 1803int ocfs2_write_end_nolock(struct address_space *mapping,
1533 loff_t pos, unsigned len, unsigned copied, 1804 loff_t pos, unsigned len, unsigned copied,
1534 struct page *page, void *fsdata) 1805 struct page *page, void *fsdata)
@@ -1542,6 +1813,11 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
1542 handle_t *handle = wc->w_handle; 1813 handle_t *handle = wc->w_handle;
1543 struct page *tmppage; 1814 struct page *tmppage;
1544 1815
1816 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1817 ocfs2_write_end_inline(inode, pos, len, &copied, di, wc);
1818 goto out_write_size;
1819 }
1820
1545 if (unlikely(copied < len)) { 1821 if (unlikely(copied < len)) {
1546 if (!PageUptodate(wc->w_target_page)) 1822 if (!PageUptodate(wc->w_target_page))
1547 copied = 0; 1823 copied = 0;
@@ -1579,6 +1855,7 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
1579 block_commit_write(tmppage, from, to); 1855 block_commit_write(tmppage, from, to);
1580 } 1856 }
1581 1857
1858out_write_size:
1582 pos += copied; 1859 pos += copied;
1583 if (pos > inode->i_size) { 1860 if (pos > inode->i_size) {
1584 i_size_write(inode, pos); 1861 i_size_write(inode, pos);
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index 389579bd64e3..113560877dbb 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -34,6 +34,8 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
34 struct inode *inode, unsigned int from, 34 struct inode *inode, unsigned int from,
35 unsigned int to, int new); 35 unsigned int to, int new);
36 36
37void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages);
38
37int walk_page_buffers( handle_t *handle, 39int walk_page_buffers( handle_t *handle,
38 struct buffer_head *head, 40 struct buffer_head *head,
39 unsigned from, 41 unsigned from,
@@ -59,6 +61,10 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
59 struct page **pagep, void **fsdata, 61 struct page **pagep, void **fsdata,
60 struct buffer_head *di_bh, struct page *mmap_page); 62 struct buffer_head *di_bh, struct page *mmap_page);
61 63
64int ocfs2_read_inline_data(struct inode *inode, struct page *page,
65 struct buffer_head *di_bh);
66int ocfs2_size_fits_inline_data(struct buffer_head *di_bh, u64 new_size);
67
62/* all ocfs2_dio_end_io()'s fault */ 68/* all ocfs2_dio_end_io()'s fault */
63#define ocfs2_iocb_is_rw_locked(iocb) \ 69#define ocfs2_iocb_is_rw_locked(iocb) \
64 test_bit(0, (unsigned long *)&iocb->private) 70 test_bit(0, (unsigned long *)&iocb->private)
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 2bd7f788cf34..f14b541fab95 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -216,8 +216,7 @@ static void o2hb_wait_on_io(struct o2hb_region *reg,
216 wait_for_completion(&wc->wc_io_complete); 216 wait_for_completion(&wc->wc_io_complete);
217} 217}
218 218
219static int o2hb_bio_end_io(struct bio *bio, 219static void o2hb_bio_end_io(struct bio *bio,
220 unsigned int bytes_done,
221 int error) 220 int error)
222{ 221{
223 struct o2hb_bio_wait_ctxt *wc = bio->bi_private; 222 struct o2hb_bio_wait_ctxt *wc = bio->bi_private;
@@ -227,12 +226,8 @@ static int o2hb_bio_end_io(struct bio *bio,
227 wc->wc_error = error; 226 wc->wc_error = error;
228 } 227 }
229 228
230 if (bio->bi_size)
231 return 1;
232
233 o2hb_bio_wait_dec(wc, 1); 229 o2hb_bio_wait_dec(wc, 1);
234 bio_put(bio); 230 bio_put(bio);
235 return 0;
236} 231}
237 232
238/* Setup a Bio to cover I/O against num_slots slots starting at 233/* Setup a Bio to cover I/O against num_slots slots starting at
diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c
index e9e042b93dbf..a4882c8df945 100644
--- a/fs/ocfs2/cluster/masklog.c
+++ b/fs/ocfs2/cluster/masklog.c
@@ -143,7 +143,7 @@ static struct kobj_type mlog_ktype = {
143}; 143};
144 144
145static struct kset mlog_kset = { 145static struct kset mlog_kset = {
146 .kobj = {.name = "logmask", .ktype = &mlog_ktype}, 146 .kobj = {.ktype = &mlog_ktype},
147}; 147};
148 148
149int mlog_sys_init(struct kset *o2cb_subsys) 149int mlog_sys_init(struct kset *o2cb_subsys)
@@ -156,6 +156,7 @@ int mlog_sys_init(struct kset *o2cb_subsys)
156 } 156 }
157 mlog_attr_ptrs[i] = NULL; 157 mlog_attr_ptrs[i] = NULL;
158 158
159 kobject_set_name(&mlog_kset.kobj, "logmask");
159 kobj_set_kset_s(&mlog_kset, *o2cb_subsys); 160 kobj_set_kset_s(&mlog_kset, *o2cb_subsys);
160 return kset_register(&mlog_kset); 161 return kset_register(&mlog_kset);
161} 162}
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 0d5fdde959c8..7453b70c1a19 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -55,10 +55,16 @@
55#include "journal.h" 55#include "journal.h"
56#include "namei.h" 56#include "namei.h"
57#include "suballoc.h" 57#include "suballoc.h"
58#include "super.h"
58#include "uptodate.h" 59#include "uptodate.h"
59 60
60#include "buffer_head_io.h" 61#include "buffer_head_io.h"
61 62
63#define NAMEI_RA_CHUNKS 2
64#define NAMEI_RA_BLOCKS 4
65#define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
66#define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b))
67
62static unsigned char ocfs2_filetype_table[] = { 68static unsigned char ocfs2_filetype_table[] = {
63 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK 69 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
64}; 70};
@@ -66,12 +72,614 @@ static unsigned char ocfs2_filetype_table[] = {
66static int ocfs2_extend_dir(struct ocfs2_super *osb, 72static int ocfs2_extend_dir(struct ocfs2_super *osb,
67 struct inode *dir, 73 struct inode *dir,
68 struct buffer_head *parent_fe_bh, 74 struct buffer_head *parent_fe_bh,
75 unsigned int blocks_wanted,
69 struct buffer_head **new_de_bh); 76 struct buffer_head **new_de_bh);
77static int ocfs2_do_extend_dir(struct super_block *sb,
78 handle_t *handle,
79 struct inode *dir,
80 struct buffer_head *parent_fe_bh,
81 struct ocfs2_alloc_context *data_ac,
82 struct ocfs2_alloc_context *meta_ac,
83 struct buffer_head **new_bh);
84
70/* 85/*
71 * ocfs2_readdir() 86 * bh passed here can be an inode block or a dir data block, depending
87 * on the inode inline data flag.
88 */
89static int ocfs2_check_dir_entry(struct inode * dir,
90 struct ocfs2_dir_entry * de,
91 struct buffer_head * bh,
92 unsigned long offset)
93{
94 const char *error_msg = NULL;
95 const int rlen = le16_to_cpu(de->rec_len);
96
97 if (rlen < OCFS2_DIR_REC_LEN(1))
98 error_msg = "rec_len is smaller than minimal";
99 else if (rlen % 4 != 0)
100 error_msg = "rec_len % 4 != 0";
101 else if (rlen < OCFS2_DIR_REC_LEN(de->name_len))
102 error_msg = "rec_len is too small for name_len";
103 else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize)
104 error_msg = "directory entry across blocks";
105
106 if (error_msg != NULL)
107 mlog(ML_ERROR, "bad entry in directory #%llu: %s - "
108 "offset=%lu, inode=%llu, rec_len=%d, name_len=%d\n",
109 (unsigned long long)OCFS2_I(dir)->ip_blkno, error_msg,
110 offset, (unsigned long long)le64_to_cpu(de->inode), rlen,
111 de->name_len);
112 return error_msg == NULL ? 1 : 0;
113}
114
115static inline int ocfs2_match(int len,
116 const char * const name,
117 struct ocfs2_dir_entry *de)
118{
119 if (len != de->name_len)
120 return 0;
121 if (!de->inode)
122 return 0;
123 return !memcmp(name, de->name, len);
124}
125
126/*
127 * Returns 0 if not found, -1 on failure, and 1 on success
128 */
129static int inline ocfs2_search_dirblock(struct buffer_head *bh,
130 struct inode *dir,
131 const char *name, int namelen,
132 unsigned long offset,
133 char *first_de,
134 unsigned int bytes,
135 struct ocfs2_dir_entry **res_dir)
136{
137 struct ocfs2_dir_entry *de;
138 char *dlimit, *de_buf;
139 int de_len;
140 int ret = 0;
141
142 mlog_entry_void();
143
144 de_buf = first_de;
145 dlimit = de_buf + bytes;
146
147 while (de_buf < dlimit) {
148 /* this code is executed quadratically often */
149 /* do minimal checking `by hand' */
150
151 de = (struct ocfs2_dir_entry *) de_buf;
152
153 if (de_buf + namelen <= dlimit &&
154 ocfs2_match(namelen, name, de)) {
155 /* found a match - just to be sure, do a full check */
156 if (!ocfs2_check_dir_entry(dir, de, bh, offset)) {
157 ret = -1;
158 goto bail;
159 }
160 *res_dir = de;
161 ret = 1;
162 goto bail;
163 }
164
165 /* prevent looping on a bad block */
166 de_len = le16_to_cpu(de->rec_len);
167 if (de_len <= 0) {
168 ret = -1;
169 goto bail;
170 }
171
172 de_buf += de_len;
173 offset += de_len;
174 }
175
176bail:
177 mlog_exit(ret);
178 return ret;
179}
180
181static struct buffer_head *ocfs2_find_entry_id(const char *name,
182 int namelen,
183 struct inode *dir,
184 struct ocfs2_dir_entry **res_dir)
185{
186 int ret, found;
187 struct buffer_head *di_bh = NULL;
188 struct ocfs2_dinode *di;
189 struct ocfs2_inline_data *data;
190
191 ret = ocfs2_read_block(OCFS2_SB(dir->i_sb), OCFS2_I(dir)->ip_blkno,
192 &di_bh, OCFS2_BH_CACHED, dir);
193 if (ret) {
194 mlog_errno(ret);
195 goto out;
196 }
197
198 di = (struct ocfs2_dinode *)di_bh->b_data;
199 data = &di->id2.i_data;
200
201 found = ocfs2_search_dirblock(di_bh, dir, name, namelen, 0,
202 data->id_data, i_size_read(dir), res_dir);
203 if (found == 1)
204 return di_bh;
205
206 brelse(di_bh);
207out:
208 return NULL;
209}
210
211struct buffer_head *ocfs2_find_entry_el(const char *name, int namelen,
212 struct inode *dir,
213 struct ocfs2_dir_entry **res_dir)
214{
215 struct super_block *sb;
216 struct buffer_head *bh_use[NAMEI_RA_SIZE];
217 struct buffer_head *bh, *ret = NULL;
218 unsigned long start, block, b;
219 int ra_max = 0; /* Number of bh's in the readahead
220 buffer, bh_use[] */
221 int ra_ptr = 0; /* Current index into readahead
222 buffer */
223 int num = 0;
224 int nblocks, i, err;
225
226 mlog_entry_void();
227
228 sb = dir->i_sb;
229
230 nblocks = i_size_read(dir) >> sb->s_blocksize_bits;
231 start = OCFS2_I(dir)->ip_dir_start_lookup;
232 if (start >= nblocks)
233 start = 0;
234 block = start;
235
236restart:
237 do {
238 /*
239 * We deal with the read-ahead logic here.
240 */
241 if (ra_ptr >= ra_max) {
242 /* Refill the readahead buffer */
243 ra_ptr = 0;
244 b = block;
245 for (ra_max = 0; ra_max < NAMEI_RA_SIZE; ra_max++) {
246 /*
247 * Terminate if we reach the end of the
248 * directory and must wrap, or if our
249 * search has finished at this block.
250 */
251 if (b >= nblocks || (num && block == start)) {
252 bh_use[ra_max] = NULL;
253 break;
254 }
255 num++;
256
257 bh = ocfs2_bread(dir, b++, &err, 1);
258 bh_use[ra_max] = bh;
259 }
260 }
261 if ((bh = bh_use[ra_ptr++]) == NULL)
262 goto next;
263 wait_on_buffer(bh);
264 if (!buffer_uptodate(bh)) {
265 /* read error, skip block & hope for the best */
266 ocfs2_error(dir->i_sb, "reading directory %llu, "
267 "offset %lu\n",
268 (unsigned long long)OCFS2_I(dir)->ip_blkno,
269 block);
270 brelse(bh);
271 goto next;
272 }
273 i = ocfs2_search_dirblock(bh, dir, name, namelen,
274 block << sb->s_blocksize_bits,
275 bh->b_data, sb->s_blocksize,
276 res_dir);
277 if (i == 1) {
278 OCFS2_I(dir)->ip_dir_start_lookup = block;
279 ret = bh;
280 goto cleanup_and_exit;
281 } else {
282 brelse(bh);
283 if (i < 0)
284 goto cleanup_and_exit;
285 }
286 next:
287 if (++block >= nblocks)
288 block = 0;
289 } while (block != start);
290
291 /*
292 * If the directory has grown while we were searching, then
293 * search the last part of the directory before giving up.
294 */
295 block = nblocks;
296 nblocks = i_size_read(dir) >> sb->s_blocksize_bits;
297 if (block < nblocks) {
298 start = 0;
299 goto restart;
300 }
301
302cleanup_and_exit:
303 /* Clean up the read-ahead blocks */
304 for (; ra_ptr < ra_max; ra_ptr++)
305 brelse(bh_use[ra_ptr]);
306
307 mlog_exit_ptr(ret);
308 return ret;
309}
310
311/*
312 * Try to find an entry of the provided name within 'dir'.
72 * 313 *
314 * If nothing was found, NULL is returned. Otherwise, a buffer_head
315 * and pointer to the dir entry are passed back.
316 *
317 * Caller can NOT assume anything about the contents of the
318 * buffer_head - it is passed back only so that it can be passed into
319 * any one of the manipulation functions (add entry, delete entry,
320 * etc). As an example, bh in the extent directory case is a data
321 * block, in the inline-data case it actually points to an inode.
73 */ 322 */
74int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir) 323struct buffer_head *ocfs2_find_entry(const char *name, int namelen,
324 struct inode *dir,
325 struct ocfs2_dir_entry **res_dir)
326{
327 *res_dir = NULL;
328
329 if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
330 return ocfs2_find_entry_id(name, namelen, dir, res_dir);
331
332 return ocfs2_find_entry_el(name, namelen, dir, res_dir);
333}
334
335/*
336 * Update inode number and type of a previously found directory entry.
337 */
338int ocfs2_update_entry(struct inode *dir, handle_t *handle,
339 struct buffer_head *de_bh, struct ocfs2_dir_entry *de,
340 struct inode *new_entry_inode)
341{
342 int ret;
343
344 /*
345 * The same code works fine for both inline-data and extent
346 * based directories, so no need to split this up.
347 */
348
349 ret = ocfs2_journal_access(handle, dir, de_bh,
350 OCFS2_JOURNAL_ACCESS_WRITE);
351 if (ret) {
352 mlog_errno(ret);
353 goto out;
354 }
355
356 de->inode = cpu_to_le64(OCFS2_I(new_entry_inode)->ip_blkno);
357 ocfs2_set_de_type(de, new_entry_inode->i_mode);
358
359 ocfs2_journal_dirty(handle, de_bh);
360
361out:
362 return ret;
363}
364
365static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir,
366 struct ocfs2_dir_entry *de_del,
367 struct buffer_head *bh, char *first_de,
368 unsigned int bytes)
369{
370 struct ocfs2_dir_entry *de, *pde;
371 int i, status = -ENOENT;
372
373 mlog_entry("(0x%p, 0x%p, 0x%p, 0x%p)\n", handle, dir, de_del, bh);
374
375 i = 0;
376 pde = NULL;
377 de = (struct ocfs2_dir_entry *) first_de;
378 while (i < bytes) {
379 if (!ocfs2_check_dir_entry(dir, de, bh, i)) {
380 status = -EIO;
381 mlog_errno(status);
382 goto bail;
383 }
384 if (de == de_del) {
385 status = ocfs2_journal_access(handle, dir, bh,
386 OCFS2_JOURNAL_ACCESS_WRITE);
387 if (status < 0) {
388 status = -EIO;
389 mlog_errno(status);
390 goto bail;
391 }
392 if (pde)
393 pde->rec_len =
394 cpu_to_le16(le16_to_cpu(pde->rec_len) +
395 le16_to_cpu(de->rec_len));
396 else
397 de->inode = 0;
398 dir->i_version++;
399 status = ocfs2_journal_dirty(handle, bh);
400 goto bail;
401 }
402 i += le16_to_cpu(de->rec_len);
403 pde = de;
404 de = (struct ocfs2_dir_entry *)((char *)de + le16_to_cpu(de->rec_len));
405 }
406bail:
407 mlog_exit(status);
408 return status;
409}
410
411static inline int ocfs2_delete_entry_id(handle_t *handle,
412 struct inode *dir,
413 struct ocfs2_dir_entry *de_del,
414 struct buffer_head *bh)
415{
416 int ret;
417 struct buffer_head *di_bh = NULL;
418 struct ocfs2_dinode *di;
419 struct ocfs2_inline_data *data;
420
421 ret = ocfs2_read_block(OCFS2_SB(dir->i_sb), OCFS2_I(dir)->ip_blkno,
422 &di_bh, OCFS2_BH_CACHED, dir);
423 if (ret) {
424 mlog_errno(ret);
425 goto out;
426 }
427
428 di = (struct ocfs2_dinode *)di_bh->b_data;
429 data = &di->id2.i_data;
430
431 ret = __ocfs2_delete_entry(handle, dir, de_del, bh, data->id_data,
432 i_size_read(dir));
433
434 brelse(di_bh);
435out:
436 return ret;
437}
438
439static inline int ocfs2_delete_entry_el(handle_t *handle,
440 struct inode *dir,
441 struct ocfs2_dir_entry *de_del,
442 struct buffer_head *bh)
443{
444 return __ocfs2_delete_entry(handle, dir, de_del, bh, bh->b_data,
445 bh->b_size);
446}
447
448/*
449 * ocfs2_delete_entry deletes a directory entry by merging it with the
450 * previous entry
451 */
452int ocfs2_delete_entry(handle_t *handle,
453 struct inode *dir,
454 struct ocfs2_dir_entry *de_del,
455 struct buffer_head *bh)
456{
457 if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
458 return ocfs2_delete_entry_id(handle, dir, de_del, bh);
459
460 return ocfs2_delete_entry_el(handle, dir, de_del, bh);
461}
462
463/*
464 * Check whether 'de' has enough room to hold an entry of
465 * 'new_rec_len' bytes.
466 */
467static inline int ocfs2_dirent_would_fit(struct ocfs2_dir_entry *de,
468 unsigned int new_rec_len)
469{
470 unsigned int de_really_used;
471
472 /* Check whether this is an empty record with enough space */
473 if (le64_to_cpu(de->inode) == 0 &&
474 le16_to_cpu(de->rec_len) >= new_rec_len)
475 return 1;
476
477 /*
478 * Record might have free space at the end which we can
479 * use.
480 */
481 de_really_used = OCFS2_DIR_REC_LEN(de->name_len);
482 if (le16_to_cpu(de->rec_len) >= (de_really_used + new_rec_len))
483 return 1;
484
485 return 0;
486}
487
488/* we don't always have a dentry for what we want to add, so people
489 * like orphan dir can call this instead.
490 *
491 * If you pass me insert_bh, I'll skip the search of the other dir
492 * blocks and put the record in there.
493 */
494int __ocfs2_add_entry(handle_t *handle,
495 struct inode *dir,
496 const char *name, int namelen,
497 struct inode *inode, u64 blkno,
498 struct buffer_head *parent_fe_bh,
499 struct buffer_head *insert_bh)
500{
501 unsigned long offset;
502 unsigned short rec_len;
503 struct ocfs2_dir_entry *de, *de1;
504 struct ocfs2_dinode *di = (struct ocfs2_dinode *)parent_fe_bh->b_data;
505 struct super_block *sb = dir->i_sb;
506 int retval, status;
507 unsigned int size = sb->s_blocksize;
508 char *data_start = insert_bh->b_data;
509
510 mlog_entry_void();
511
512 if (!namelen)
513 return -EINVAL;
514
515 if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
516 data_start = di->id2.i_data.id_data;
517 size = i_size_read(dir);
518
519 BUG_ON(insert_bh != parent_fe_bh);
520 }
521
522 rec_len = OCFS2_DIR_REC_LEN(namelen);
523 offset = 0;
524 de = (struct ocfs2_dir_entry *) data_start;
525 while (1) {
526 BUG_ON((char *)de >= (size + data_start));
527
528 /* These checks should've already been passed by the
529 * prepare function, but I guess we can leave them
530 * here anyway. */
531 if (!ocfs2_check_dir_entry(dir, de, insert_bh, offset)) {
532 retval = -ENOENT;
533 goto bail;
534 }
535 if (ocfs2_match(namelen, name, de)) {
536 retval = -EEXIST;
537 goto bail;
538 }
539
540 if (ocfs2_dirent_would_fit(de, rec_len)) {
541 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
542 retval = ocfs2_mark_inode_dirty(handle, dir, parent_fe_bh);
543 if (retval < 0) {
544 mlog_errno(retval);
545 goto bail;
546 }
547
548 status = ocfs2_journal_access(handle, dir, insert_bh,
549 OCFS2_JOURNAL_ACCESS_WRITE);
550 /* By now the buffer is marked for journaling */
551 offset += le16_to_cpu(de->rec_len);
552 if (le64_to_cpu(de->inode)) {
553 de1 = (struct ocfs2_dir_entry *)((char *) de +
554 OCFS2_DIR_REC_LEN(de->name_len));
555 de1->rec_len =
556 cpu_to_le16(le16_to_cpu(de->rec_len) -
557 OCFS2_DIR_REC_LEN(de->name_len));
558 de->rec_len = cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len));
559 de = de1;
560 }
561 de->file_type = OCFS2_FT_UNKNOWN;
562 if (blkno) {
563 de->inode = cpu_to_le64(blkno);
564 ocfs2_set_de_type(de, inode->i_mode);
565 } else
566 de->inode = 0;
567 de->name_len = namelen;
568 memcpy(de->name, name, namelen);
569
570 dir->i_version++;
571 status = ocfs2_journal_dirty(handle, insert_bh);
572 retval = 0;
573 goto bail;
574 }
575 offset += le16_to_cpu(de->rec_len);
576 de = (struct ocfs2_dir_entry *) ((char *) de + le16_to_cpu(de->rec_len));
577 }
578
579 /* when you think about it, the assert above should prevent us
580 * from ever getting here. */
581 retval = -ENOSPC;
582bail:
583
584 mlog_exit(retval);
585 return retval;
586}
587
588static int ocfs2_dir_foreach_blk_id(struct inode *inode,
589 unsigned long *f_version,
590 loff_t *f_pos, void *priv,
591 filldir_t filldir, int *filldir_err)
592{
593 int ret, i, filldir_ret;
594 unsigned long offset = *f_pos;
595 struct buffer_head *di_bh = NULL;
596 struct ocfs2_dinode *di;
597 struct ocfs2_inline_data *data;
598 struct ocfs2_dir_entry *de;
599
600 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), OCFS2_I(inode)->ip_blkno,
601 &di_bh, OCFS2_BH_CACHED, inode);
602 if (ret) {
603 mlog(ML_ERROR, "Unable to read inode block for dir %llu\n",
604 (unsigned long long)OCFS2_I(inode)->ip_blkno);
605 goto out;
606 }
607
608 di = (struct ocfs2_dinode *)di_bh->b_data;
609 data = &di->id2.i_data;
610
611 while (*f_pos < i_size_read(inode)) {
612revalidate:
613 /* If the dir block has changed since the last call to
614 * readdir(2), then we might be pointing to an invalid
615 * dirent right now. Scan from the start of the block
616 * to make sure. */
617 if (*f_version != inode->i_version) {
618 for (i = 0; i < i_size_read(inode) && i < offset; ) {
619 de = (struct ocfs2_dir_entry *)
620 (data->id_data + i);
621 /* It's too expensive to do a full
622 * dirent test each time round this
623 * loop, but we do have to test at
624 * least that it is non-zero. A
625 * failure will be detected in the
626 * dirent test below. */
627 if (le16_to_cpu(de->rec_len) <
628 OCFS2_DIR_REC_LEN(1))
629 break;
630 i += le16_to_cpu(de->rec_len);
631 }
632 *f_pos = offset = i;
633 *f_version = inode->i_version;
634 }
635
636 de = (struct ocfs2_dir_entry *) (data->id_data + *f_pos);
637 if (!ocfs2_check_dir_entry(inode, de, di_bh, *f_pos)) {
638 /* On error, skip the f_pos to the end. */
639 *f_pos = i_size_read(inode);
640 goto out;
641 }
642 offset += le16_to_cpu(de->rec_len);
643 if (le64_to_cpu(de->inode)) {
644 /* We might block in the next section
645 * if the data destination is
646 * currently swapped out. So, use a
647 * version stamp to detect whether or
648 * not the directory has been modified
649 * during the copy operation.
650 */
651 unsigned long version = *f_version;
652 unsigned char d_type = DT_UNKNOWN;
653
654 if (de->file_type < OCFS2_FT_MAX)
655 d_type = ocfs2_filetype_table[de->file_type];
656
657 filldir_ret = filldir(priv, de->name,
658 de->name_len,
659 *f_pos,
660 le64_to_cpu(de->inode),
661 d_type);
662 if (filldir_ret) {
663 if (filldir_err)
664 *filldir_err = filldir_ret;
665 break;
666 }
667 if (version != *f_version)
668 goto revalidate;
669 }
670 *f_pos += le16_to_cpu(de->rec_len);
671 }
672
673out:
674 brelse(di_bh);
675
676 return 0;
677}
678
679static int ocfs2_dir_foreach_blk_el(struct inode *inode,
680 unsigned long *f_version,
681 loff_t *f_pos, void *priv,
682 filldir_t filldir, int *filldir_err)
75{ 683{
76 int error = 0; 684 int error = 0;
77 unsigned long offset, blk, last_ra_blk = 0; 685 unsigned long offset, blk, last_ra_blk = 0;
@@ -79,45 +687,23 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
79 struct buffer_head * bh, * tmp; 687 struct buffer_head * bh, * tmp;
80 struct ocfs2_dir_entry * de; 688 struct ocfs2_dir_entry * de;
81 int err; 689 int err;
82 struct inode *inode = filp->f_path.dentry->d_inode;
83 struct super_block * sb = inode->i_sb; 690 struct super_block * sb = inode->i_sb;
84 unsigned int ra_sectors = 16; 691 unsigned int ra_sectors = 16;
85 int lock_level = 0;
86
87 mlog_entry("dirino=%llu\n",
88 (unsigned long long)OCFS2_I(inode)->ip_blkno);
89 692
90 stored = 0; 693 stored = 0;
91 bh = NULL; 694 bh = NULL;
92 695
93 error = ocfs2_meta_lock_atime(inode, filp->f_vfsmnt, &lock_level); 696 offset = (*f_pos) & (sb->s_blocksize - 1);
94 if (lock_level && error >= 0) {
95 /* We release EX lock which used to update atime
96 * and get PR lock again to reduce contention
97 * on commonly accessed directories. */
98 ocfs2_meta_unlock(inode, 1);
99 lock_level = 0;
100 error = ocfs2_meta_lock(inode, NULL, 0);
101 }
102 if (error < 0) {
103 if (error != -ENOENT)
104 mlog_errno(error);
105 /* we haven't got any yet, so propagate the error. */
106 stored = error;
107 goto bail_nolock;
108 }
109 697
110 offset = filp->f_pos & (sb->s_blocksize - 1); 698 while (!error && !stored && *f_pos < i_size_read(inode)) {
111 699 blk = (*f_pos) >> sb->s_blocksize_bits;
112 while (!error && !stored && filp->f_pos < i_size_read(inode)) {
113 blk = (filp->f_pos) >> sb->s_blocksize_bits;
114 bh = ocfs2_bread(inode, blk, &err, 0); 700 bh = ocfs2_bread(inode, blk, &err, 0);
115 if (!bh) { 701 if (!bh) {
116 mlog(ML_ERROR, 702 mlog(ML_ERROR,
117 "directory #%llu contains a hole at offset %lld\n", 703 "directory #%llu contains a hole at offset %lld\n",
118 (unsigned long long)OCFS2_I(inode)->ip_blkno, 704 (unsigned long long)OCFS2_I(inode)->ip_blkno,
119 filp->f_pos); 705 *f_pos);
120 filp->f_pos += sb->s_blocksize - offset; 706 *f_pos += sb->s_blocksize - offset;
121 continue; 707 continue;
122 } 708 }
123 709
@@ -143,7 +729,7 @@ revalidate:
143 * readdir(2), then we might be pointing to an invalid 729 * readdir(2), then we might be pointing to an invalid
144 * dirent right now. Scan from the start of the block 730 * dirent right now. Scan from the start of the block
145 * to make sure. */ 731 * to make sure. */
146 if (filp->f_version != inode->i_version) { 732 if (*f_version != inode->i_version) {
147 for (i = 0; i < sb->s_blocksize && i < offset; ) { 733 for (i = 0; i < sb->s_blocksize && i < offset; ) {
148 de = (struct ocfs2_dir_entry *) (bh->b_data + i); 734 de = (struct ocfs2_dir_entry *) (bh->b_data + i);
149 /* It's too expensive to do a full 735 /* It's too expensive to do a full
@@ -158,21 +744,20 @@ revalidate:
158 i += le16_to_cpu(de->rec_len); 744 i += le16_to_cpu(de->rec_len);
159 } 745 }
160 offset = i; 746 offset = i;
161 filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1)) 747 *f_pos = ((*f_pos) & ~(sb->s_blocksize - 1))
162 | offset; 748 | offset;
163 filp->f_version = inode->i_version; 749 *f_version = inode->i_version;
164 } 750 }
165 751
166 while (!error && filp->f_pos < i_size_read(inode) 752 while (!error && *f_pos < i_size_read(inode)
167 && offset < sb->s_blocksize) { 753 && offset < sb->s_blocksize) {
168 de = (struct ocfs2_dir_entry *) (bh->b_data + offset); 754 de = (struct ocfs2_dir_entry *) (bh->b_data + offset);
169 if (!ocfs2_check_dir_entry(inode, de, bh, offset)) { 755 if (!ocfs2_check_dir_entry(inode, de, bh, offset)) {
170 /* On error, skip the f_pos to the 756 /* On error, skip the f_pos to the
171 next block. */ 757 next block. */
172 filp->f_pos = (filp->f_pos | 758 *f_pos = ((*f_pos) | (sb->s_blocksize - 1)) + 1;
173 (sb->s_blocksize - 1)) + 1;
174 brelse(bh); 759 brelse(bh);
175 goto bail; 760 goto out;
176 } 761 }
177 offset += le16_to_cpu(de->rec_len); 762 offset += le16_to_cpu(de->rec_len);
178 if (le64_to_cpu(de->inode)) { 763 if (le64_to_cpu(de->inode)) {
@@ -183,36 +768,109 @@ revalidate:
183 * not the directory has been modified 768 * not the directory has been modified
184 * during the copy operation. 769 * during the copy operation.
185 */ 770 */
186 unsigned long version = filp->f_version; 771 unsigned long version = *f_version;
187 unsigned char d_type = DT_UNKNOWN; 772 unsigned char d_type = DT_UNKNOWN;
188 773
189 if (de->file_type < OCFS2_FT_MAX) 774 if (de->file_type < OCFS2_FT_MAX)
190 d_type = ocfs2_filetype_table[de->file_type]; 775 d_type = ocfs2_filetype_table[de->file_type];
191 error = filldir(dirent, de->name, 776 error = filldir(priv, de->name,
192 de->name_len, 777 de->name_len,
193 filp->f_pos, 778 *f_pos,
194 ino_from_blkno(sb, le64_to_cpu(de->inode)), 779 le64_to_cpu(de->inode),
195 d_type); 780 d_type);
196 if (error) 781 if (error) {
782 if (filldir_err)
783 *filldir_err = error;
197 break; 784 break;
198 if (version != filp->f_version) 785 }
786 if (version != *f_version)
199 goto revalidate; 787 goto revalidate;
200 stored ++; 788 stored ++;
201 } 789 }
202 filp->f_pos += le16_to_cpu(de->rec_len); 790 *f_pos += le16_to_cpu(de->rec_len);
203 } 791 }
204 offset = 0; 792 offset = 0;
205 brelse(bh); 793 brelse(bh);
206 } 794 }
207 795
208 stored = 0; 796 stored = 0;
209bail: 797out:
798 return stored;
799}
800
801static int ocfs2_dir_foreach_blk(struct inode *inode, unsigned long *f_version,
802 loff_t *f_pos, void *priv, filldir_t filldir,
803 int *filldir_err)
804{
805 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
806 return ocfs2_dir_foreach_blk_id(inode, f_version, f_pos, priv,
807 filldir, filldir_err);
808
809 return ocfs2_dir_foreach_blk_el(inode, f_version, f_pos, priv, filldir,
810 filldir_err);
811}
812
813/*
814 * This is intended to be called from inside other kernel functions,
815 * so we fake some arguments.
816 */
817int ocfs2_dir_foreach(struct inode *inode, loff_t *f_pos, void *priv,
818 filldir_t filldir)
819{
820 int ret = 0, filldir_err = 0;
821 unsigned long version = inode->i_version;
822
823 while (*f_pos < i_size_read(inode)) {
824 ret = ocfs2_dir_foreach_blk(inode, &version, f_pos, priv,
825 filldir, &filldir_err);
826 if (ret || filldir_err)
827 break;
828 }
829
830 if (ret > 0)
831 ret = -EIO;
832
833 return 0;
834}
835
836/*
837 * ocfs2_readdir()
838 *
839 */
840int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
841{
842 int error = 0;
843 struct inode *inode = filp->f_path.dentry->d_inode;
844 int lock_level = 0;
845
846 mlog_entry("dirino=%llu\n",
847 (unsigned long long)OCFS2_I(inode)->ip_blkno);
848
849 error = ocfs2_meta_lock_atime(inode, filp->f_vfsmnt, &lock_level);
850 if (lock_level && error >= 0) {
851 /* We release EX lock which used to update atime
852 * and get PR lock again to reduce contention
853 * on commonly accessed directories. */
854 ocfs2_meta_unlock(inode, 1);
855 lock_level = 0;
856 error = ocfs2_meta_lock(inode, NULL, 0);
857 }
858 if (error < 0) {
859 if (error != -ENOENT)
860 mlog_errno(error);
861 /* we haven't got any yet, so propagate the error. */
862 goto bail_nolock;
863 }
864
865 error = ocfs2_dir_foreach_blk(inode, &filp->f_version, &filp->f_pos,
866 dirent, filldir, NULL);
867
210 ocfs2_meta_unlock(inode, lock_level); 868 ocfs2_meta_unlock(inode, lock_level);
211 869
212bail_nolock: 870bail_nolock:
213 mlog_exit(stored); 871 mlog_exit(error);
214 872
215 return stored; 873 return error;
216} 874}
217 875
218/* 876/*
@@ -252,6 +910,23 @@ leave:
252 return status; 910 return status;
253} 911}
254 912
913/*
914 * Convenience function for callers which just want the block number
915 * mapped to a name and don't require the full dirent info, etc.
916 */
917int ocfs2_lookup_ino_from_name(struct inode *dir, const char *name,
918 int namelen, u64 *blkno)
919{
920 int ret;
921 struct buffer_head *bh = NULL;
922 struct ocfs2_dir_entry *dirent = NULL;
923
924 ret = ocfs2_find_files_on_disk(name, namelen, blkno, dir, &bh, &dirent);
925 brelse(bh);
926
927 return ret;
928}
929
255/* Check for a name within a directory. 930/* Check for a name within a directory.
256 * 931 *
257 * Return 0 if the name does not exist 932 * Return 0 if the name does not exist
@@ -284,77 +959,414 @@ bail:
284 return ret; 959 return ret;
285} 960}
286 961
962struct ocfs2_empty_dir_priv {
963 unsigned seen_dot;
964 unsigned seen_dot_dot;
965 unsigned seen_other;
966};
967static int ocfs2_empty_dir_filldir(void *priv, const char *name, int name_len,
968 loff_t pos, u64 ino, unsigned type)
969{
970 struct ocfs2_empty_dir_priv *p = priv;
971
972 /*
973 * Check the positions of "." and ".." records to be sure
974 * they're in the correct place.
975 */
976 if (name_len == 1 && !strncmp(".", name, 1) && pos == 0) {
977 p->seen_dot = 1;
978 return 0;
979 }
980
981 if (name_len == 2 && !strncmp("..", name, 2) &&
982 pos == OCFS2_DIR_REC_LEN(1)) {
983 p->seen_dot_dot = 1;
984 return 0;
985 }
986
987 p->seen_other = 1;
988 return 1;
989}
287/* 990/*
288 * routine to check that the specified directory is empty (for rmdir) 991 * routine to check that the specified directory is empty (for rmdir)
992 *
993 * Returns 1 if dir is empty, zero otherwise.
289 */ 994 */
290int ocfs2_empty_dir(struct inode *inode) 995int ocfs2_empty_dir(struct inode *inode)
291{ 996{
292 unsigned long offset; 997 int ret;
293 struct buffer_head * bh; 998 loff_t start = 0;
294 struct ocfs2_dir_entry * de, * de1; 999 struct ocfs2_empty_dir_priv priv;
295 struct super_block * sb; 1000
296 int err; 1001 memset(&priv, 0, sizeof(priv));
1002
1003 ret = ocfs2_dir_foreach(inode, &start, &priv, ocfs2_empty_dir_filldir);
1004 if (ret)
1005 mlog_errno(ret);
297 1006
298 sb = inode->i_sb; 1007 if (!priv.seen_dot || !priv.seen_dot_dot) {
299 if ((i_size_read(inode) < 1008 mlog(ML_ERROR, "bad directory (dir #%llu) - no `.' or `..'\n",
300 (OCFS2_DIR_REC_LEN(1) + OCFS2_DIR_REC_LEN(2))) ||
301 !(bh = ocfs2_bread(inode, 0, &err, 0))) {
302 mlog(ML_ERROR, "bad directory (dir #%llu) - no data block\n",
303 (unsigned long long)OCFS2_I(inode)->ip_blkno); 1009 (unsigned long long)OCFS2_I(inode)->ip_blkno);
1010 /*
1011 * XXX: Is it really safe to allow an unlink to continue?
1012 */
304 return 1; 1013 return 1;
305 } 1014 }
306 1015
307 de = (struct ocfs2_dir_entry *) bh->b_data; 1016 return !priv.seen_other;
308 de1 = (struct ocfs2_dir_entry *) 1017}
309 ((char *)de + le16_to_cpu(de->rec_len)); 1018
310 if ((le64_to_cpu(de->inode) != OCFS2_I(inode)->ip_blkno) || 1019static void ocfs2_fill_initial_dirents(struct inode *inode,
311 !le64_to_cpu(de1->inode) || 1020 struct inode *parent,
312 strcmp(".", de->name) || 1021 char *start, unsigned int size)
313 strcmp("..", de1->name)) { 1022{
314 mlog(ML_ERROR, "bad directory (dir #%llu) - no `.' or `..'\n", 1023 struct ocfs2_dir_entry *de = (struct ocfs2_dir_entry *)start;
315 (unsigned long long)OCFS2_I(inode)->ip_blkno); 1024
316 brelse(bh); 1025 de->inode = cpu_to_le64(OCFS2_I(inode)->ip_blkno);
317 return 1; 1026 de->name_len = 1;
1027 de->rec_len =
1028 cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len));
1029 strcpy(de->name, ".");
1030 ocfs2_set_de_type(de, S_IFDIR);
1031
1032 de = (struct ocfs2_dir_entry *) ((char *)de + le16_to_cpu(de->rec_len));
1033 de->inode = cpu_to_le64(OCFS2_I(parent)->ip_blkno);
1034 de->rec_len = cpu_to_le16(size - OCFS2_DIR_REC_LEN(1));
1035 de->name_len = 2;
1036 strcpy(de->name, "..");
1037 ocfs2_set_de_type(de, S_IFDIR);
1038}
1039
1040/*
1041 * This works together with code in ocfs2_mknod_locked() which sets
1042 * the inline-data flag and initializes the inline-data section.
1043 */
1044static int ocfs2_fill_new_dir_id(struct ocfs2_super *osb,
1045 handle_t *handle,
1046 struct inode *parent,
1047 struct inode *inode,
1048 struct buffer_head *di_bh)
1049{
1050 int ret;
1051 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1052 struct ocfs2_inline_data *data = &di->id2.i_data;
1053 unsigned int size = le16_to_cpu(data->id_count);
1054
1055 ret = ocfs2_journal_access(handle, inode, di_bh,
1056 OCFS2_JOURNAL_ACCESS_WRITE);
1057 if (ret) {
1058 mlog_errno(ret);
1059 goto out;
318 } 1060 }
319 offset = le16_to_cpu(de->rec_len) + le16_to_cpu(de1->rec_len); 1061
320 de = (struct ocfs2_dir_entry *)((char *)de1 + le16_to_cpu(de1->rec_len)); 1062 ocfs2_fill_initial_dirents(inode, parent, data->id_data, size);
321 while (offset < i_size_read(inode) ) { 1063
322 if (!bh || (void *)de >= (void *)(bh->b_data + sb->s_blocksize)) { 1064 ocfs2_journal_dirty(handle, di_bh);
323 brelse(bh); 1065 if (ret) {
324 bh = ocfs2_bread(inode, 1066 mlog_errno(ret);
325 offset >> sb->s_blocksize_bits, &err, 0); 1067 goto out;
326 if (!bh) { 1068 }
327 mlog(ML_ERROR, "dir %llu has a hole at %lu\n", 1069
328 (unsigned long long)OCFS2_I(inode)->ip_blkno, offset); 1070 i_size_write(inode, size);
329 offset += sb->s_blocksize; 1071 inode->i_nlink = 2;
330 continue; 1072 inode->i_blocks = ocfs2_inode_sector_count(inode);
331 } 1073
332 de = (struct ocfs2_dir_entry *) bh->b_data; 1074 ret = ocfs2_mark_inode_dirty(handle, inode, di_bh);
333 } 1075 if (ret < 0)
334 if (!ocfs2_check_dir_entry(inode, de, bh, offset)) { 1076 mlog_errno(ret);
335 brelse(bh); 1077
336 return 1; 1078out:
1079 return ret;
1080}
1081
1082static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb,
1083 handle_t *handle,
1084 struct inode *parent,
1085 struct inode *inode,
1086 struct buffer_head *fe_bh,
1087 struct ocfs2_alloc_context *data_ac)
1088{
1089 int status;
1090 struct buffer_head *new_bh = NULL;
1091
1092 mlog_entry_void();
1093
1094 status = ocfs2_do_extend_dir(osb->sb, handle, inode, fe_bh,
1095 data_ac, NULL, &new_bh);
1096 if (status < 0) {
1097 mlog_errno(status);
1098 goto bail;
1099 }
1100
1101 ocfs2_set_new_buffer_uptodate(inode, new_bh);
1102
1103 status = ocfs2_journal_access(handle, inode, new_bh,
1104 OCFS2_JOURNAL_ACCESS_CREATE);
1105 if (status < 0) {
1106 mlog_errno(status);
1107 goto bail;
1108 }
1109 memset(new_bh->b_data, 0, osb->sb->s_blocksize);
1110
1111 ocfs2_fill_initial_dirents(inode, parent, new_bh->b_data,
1112 osb->sb->s_blocksize);
1113
1114 status = ocfs2_journal_dirty(handle, new_bh);
1115 if (status < 0) {
1116 mlog_errno(status);
1117 goto bail;
1118 }
1119
1120 i_size_write(inode, inode->i_sb->s_blocksize);
1121 inode->i_nlink = 2;
1122 inode->i_blocks = ocfs2_inode_sector_count(inode);
1123 status = ocfs2_mark_inode_dirty(handle, inode, fe_bh);
1124 if (status < 0) {
1125 mlog_errno(status);
1126 goto bail;
1127 }
1128
1129 status = 0;
1130bail:
1131 if (new_bh)
1132 brelse(new_bh);
1133
1134 mlog_exit(status);
1135 return status;
1136}
1137
1138int ocfs2_fill_new_dir(struct ocfs2_super *osb,
1139 handle_t *handle,
1140 struct inode *parent,
1141 struct inode *inode,
1142 struct buffer_head *fe_bh,
1143 struct ocfs2_alloc_context *data_ac)
1144{
1145 BUG_ON(!ocfs2_supports_inline_data(osb) && data_ac == NULL);
1146
1147 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
1148 return ocfs2_fill_new_dir_id(osb, handle, parent, inode, fe_bh);
1149
1150 return ocfs2_fill_new_dir_el(osb, handle, parent, inode, fe_bh,
1151 data_ac);
1152}
1153
1154static void ocfs2_expand_last_dirent(char *start, unsigned int old_size,
1155 unsigned int new_size)
1156{
1157 struct ocfs2_dir_entry *de;
1158 struct ocfs2_dir_entry *prev_de;
1159 char *de_buf, *limit;
1160 unsigned int bytes = new_size - old_size;
1161
1162 limit = start + old_size;
1163 de_buf = start;
1164 de = (struct ocfs2_dir_entry *)de_buf;
1165 do {
1166 prev_de = de;
1167 de_buf += le16_to_cpu(de->rec_len);
1168 de = (struct ocfs2_dir_entry *)de_buf;
1169 } while (de_buf < limit);
1170
1171 le16_add_cpu(&prev_de->rec_len, bytes);
1172}
1173
1174/*
1175 * We allocate enough clusters to fulfill "blocks_wanted", but set
1176 * i_size to exactly one block. Ocfs2_extend_dir() will handle the
1177 * rest automatically for us.
1178 *
1179 * *first_block_bh is a pointer to the 1st data block allocated to the
1180 * directory.
1181 */
1182static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
1183 unsigned int blocks_wanted,
1184 struct buffer_head **first_block_bh)
1185{
1186 int ret, credits = OCFS2_INLINE_TO_EXTENTS_CREDITS;
1187 u32 alloc, bit_off, len;
1188 struct super_block *sb = dir->i_sb;
1189 u64 blkno, bytes = blocks_wanted << sb->s_blocksize_bits;
1190 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
1191 struct ocfs2_inode_info *oi = OCFS2_I(dir);
1192 struct ocfs2_alloc_context *data_ac;
1193 struct buffer_head *dirdata_bh = NULL;
1194 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1195 handle_t *handle;
1196
1197 alloc = ocfs2_clusters_for_bytes(sb, bytes);
1198
1199 /*
1200 * We should never need more than 2 clusters for this -
1201 * maximum dirent size is far less than one block. In fact,
1202 * the only time we'd need more than one cluster is if
1203 * blocksize == clustersize and the dirent won't fit in the
1204 * extra space that the expansion to a single block gives. As
1205 * of today, that only happens on 4k/4k file systems.
1206 */
1207 BUG_ON(alloc > 2);
1208
1209 ret = ocfs2_reserve_clusters(osb, alloc, &data_ac);
1210 if (ret) {
1211 mlog_errno(ret);
1212 goto out;
1213 }
1214
1215 down_write(&oi->ip_alloc_sem);
1216
1217 /*
1218 * Prepare for worst case allocation scenario of two seperate
1219 * extents.
1220 */
1221 if (alloc == 2)
1222 credits += OCFS2_SUBALLOC_ALLOC;
1223
1224 handle = ocfs2_start_trans(osb, credits);
1225 if (IS_ERR(handle)) {
1226 ret = PTR_ERR(handle);
1227 mlog_errno(ret);
1228 goto out_sem;
1229 }
1230
1231 /*
1232 * Try to claim as many clusters as the bitmap can give though
1233 * if we only get one now, that's enough to continue. The rest
1234 * will be claimed after the conversion to extents.
1235 */
1236 ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off, &len);
1237 if (ret) {
1238 mlog_errno(ret);
1239 goto out_commit;
1240 }
1241
1242 /*
1243 * Operations are carefully ordered so that we set up the new
1244 * data block first. The conversion from inline data to
1245 * extents follows.
1246 */
1247 blkno = ocfs2_clusters_to_blocks(dir->i_sb, bit_off);
1248 dirdata_bh = sb_getblk(sb, blkno);
1249 if (!dirdata_bh) {
1250 ret = -EIO;
1251 mlog_errno(ret);
1252 goto out_commit;
1253 }
1254
1255 ocfs2_set_new_buffer_uptodate(dir, dirdata_bh);
1256
1257 ret = ocfs2_journal_access(handle, dir, dirdata_bh,
1258 OCFS2_JOURNAL_ACCESS_CREATE);
1259 if (ret) {
1260 mlog_errno(ret);
1261 goto out_commit;
1262 }
1263
1264 memcpy(dirdata_bh->b_data, di->id2.i_data.id_data, i_size_read(dir));
1265 memset(dirdata_bh->b_data + i_size_read(dir), 0,
1266 sb->s_blocksize - i_size_read(dir));
1267 ocfs2_expand_last_dirent(dirdata_bh->b_data, i_size_read(dir),
1268 sb->s_blocksize);
1269
1270 ret = ocfs2_journal_dirty(handle, dirdata_bh);
1271 if (ret) {
1272 mlog_errno(ret);
1273 goto out_commit;
1274 }
1275
1276 /*
1277 * Set extent, i_size, etc on the directory. After this, the
1278 * inode should contain the same exact dirents as before and
1279 * be fully accessible from system calls.
1280 *
1281 * We let the later dirent insert modify c/mtime - to the user
1282 * the data hasn't changed.
1283 */
1284 ret = ocfs2_journal_access(handle, dir, di_bh,
1285 OCFS2_JOURNAL_ACCESS_CREATE);
1286 if (ret) {
1287 mlog_errno(ret);
1288 goto out_commit;
1289 }
1290
1291 spin_lock(&oi->ip_lock);
1292 oi->ip_dyn_features &= ~OCFS2_INLINE_DATA_FL;
1293 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
1294 spin_unlock(&oi->ip_lock);
1295
1296 ocfs2_dinode_new_extent_list(dir, di);
1297
1298 i_size_write(dir, sb->s_blocksize);
1299 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
1300
1301 di->i_size = cpu_to_le64(sb->s_blocksize);
1302 di->i_ctime = di->i_mtime = cpu_to_le64(dir->i_ctime.tv_sec);
1303 di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(dir->i_ctime.tv_nsec);
1304 dir->i_blocks = ocfs2_inode_sector_count(dir);
1305
1306 /*
1307 * This should never fail as our extent list is empty and all
1308 * related blocks have been journaled already.
1309 */
1310 ret = ocfs2_insert_extent(osb, handle, dir, di_bh, 0, blkno, len, 0,
1311 NULL);
1312 if (ret) {
1313 mlog_errno(ret);
1314 goto out;
1315 }
1316
1317 ret = ocfs2_journal_dirty(handle, di_bh);
1318 if (ret) {
1319 mlog_errno(ret);
1320 goto out_commit;
1321 }
1322
1323 /*
1324 * We asked for two clusters, but only got one in the 1st
1325 * pass. Claim the 2nd cluster as a separate extent.
1326 */
1327 if (alloc > len) {
1328 ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off,
1329 &len);
1330 if (ret) {
1331 mlog_errno(ret);
1332 goto out_commit;
337 } 1333 }
338 if (le64_to_cpu(de->inode)) { 1334 blkno = ocfs2_clusters_to_blocks(dir->i_sb, bit_off);
339 brelse(bh); 1335
340 return 0; 1336 ret = ocfs2_insert_extent(osb, handle, dir, di_bh, 1, blkno,
1337 len, 0, NULL);
1338 if (ret) {
1339 mlog_errno(ret);
1340 goto out;
341 } 1341 }
342 offset += le16_to_cpu(de->rec_len);
343 de = (struct ocfs2_dir_entry *)
344 ((char *)de + le16_to_cpu(de->rec_len));
345 } 1342 }
346 brelse(bh); 1343
347 return 1; 1344 *first_block_bh = dirdata_bh;
1345 dirdata_bh = NULL;
1346
1347out_commit:
1348 ocfs2_commit_trans(osb, handle);
1349
1350out_sem:
1351 up_write(&oi->ip_alloc_sem);
1352
1353out:
1354 if (data_ac)
1355 ocfs2_free_alloc_context(data_ac);
1356
1357 brelse(dirdata_bh);
1358
1359 return ret;
348} 1360}
349 1361
350/* returns a bh of the 1st new block in the allocation. */ 1362/* returns a bh of the 1st new block in the allocation. */
351int ocfs2_do_extend_dir(struct super_block *sb, 1363static int ocfs2_do_extend_dir(struct super_block *sb,
352 handle_t *handle, 1364 handle_t *handle,
353 struct inode *dir, 1365 struct inode *dir,
354 struct buffer_head *parent_fe_bh, 1366 struct buffer_head *parent_fe_bh,
355 struct ocfs2_alloc_context *data_ac, 1367 struct ocfs2_alloc_context *data_ac,
356 struct ocfs2_alloc_context *meta_ac, 1368 struct ocfs2_alloc_context *meta_ac,
357 struct buffer_head **new_bh) 1369 struct buffer_head **new_bh)
358{ 1370{
359 int status; 1371 int status;
360 int extend; 1372 int extend;
@@ -396,10 +1408,18 @@ bail:
396 return status; 1408 return status;
397} 1409}
398 1410
399/* assumes you already have a cluster lock on the directory. */ 1411/*
1412 * Assumes you already have a cluster lock on the directory.
1413 *
1414 * 'blocks_wanted' is only used if we have an inline directory which
1415 * is to be turned into an extent based one. The size of the dirent to
1416 * insert might be larger than the space gained by growing to just one
1417 * block, so we may have to grow the inode by two blocks in that case.
1418 */
400static int ocfs2_extend_dir(struct ocfs2_super *osb, 1419static int ocfs2_extend_dir(struct ocfs2_super *osb,
401 struct inode *dir, 1420 struct inode *dir,
402 struct buffer_head *parent_fe_bh, 1421 struct buffer_head *parent_fe_bh,
1422 unsigned int blocks_wanted,
403 struct buffer_head **new_de_bh) 1423 struct buffer_head **new_de_bh)
404{ 1424{
405 int status = 0; 1425 int status = 0;
@@ -415,6 +1435,38 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
415 1435
416 mlog_entry_void(); 1436 mlog_entry_void();
417 1437
1438 if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1439 status = ocfs2_expand_inline_dir(dir, parent_fe_bh,
1440 blocks_wanted, &new_bh);
1441 if (status) {
1442 mlog_errno(status);
1443 goto bail;
1444 }
1445
1446 if (blocks_wanted == 1) {
1447 /*
1448 * If the new dirent will fit inside the space
1449 * created by pushing out to one block, then
1450 * we can complete the operation
1451 * here. Otherwise we have to expand i_size
1452 * and format the 2nd block below.
1453 */
1454 BUG_ON(new_bh == NULL);
1455 goto bail_bh;
1456 }
1457
1458 /*
1459 * Get rid of 'new_bh' - we want to format the 2nd
1460 * data block and return that instead.
1461 */
1462 brelse(new_bh);
1463 new_bh = NULL;
1464
1465 dir_i_size = i_size_read(dir);
1466 credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS;
1467 goto do_extend;
1468 }
1469
418 dir_i_size = i_size_read(dir); 1470 dir_i_size = i_size_read(dir);
419 mlog(0, "extending dir %llu (i_size = %lld)\n", 1471 mlog(0, "extending dir %llu (i_size = %lld)\n",
420 (unsigned long long)OCFS2_I(dir)->ip_blkno, dir_i_size); 1472 (unsigned long long)OCFS2_I(dir)->ip_blkno, dir_i_size);
@@ -452,6 +1504,7 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
452 credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS; 1504 credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS;
453 } 1505 }
454 1506
1507do_extend:
455 down_write(&OCFS2_I(dir)->ip_alloc_sem); 1508 down_write(&OCFS2_I(dir)->ip_alloc_sem);
456 drop_alloc_sem = 1; 1509 drop_alloc_sem = 1;
457 1510
@@ -497,6 +1550,7 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
497 goto bail; 1550 goto bail;
498 } 1551 }
499 1552
1553bail_bh:
500 *new_de_bh = new_bh; 1554 *new_de_bh = new_bh;
501 get_bh(*new_de_bh); 1555 get_bh(*new_de_bh);
502bail: 1556bail:
@@ -517,41 +1571,71 @@ bail:
517 return status; 1571 return status;
518} 1572}
519 1573
520/* 1574static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh,
521 * Search the dir for a good spot, extending it if necessary. The 1575 const char *name, int namelen,
522 * block containing an appropriate record is returned in ret_de_bh. 1576 struct buffer_head **ret_de_bh,
523 */ 1577 unsigned int *blocks_wanted)
524int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb,
525 struct inode *dir,
526 struct buffer_head *parent_fe_bh,
527 const char *name,
528 int namelen,
529 struct buffer_head **ret_de_bh)
530{ 1578{
531 unsigned long offset; 1579 int ret;
532 struct buffer_head * bh = NULL; 1580 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
533 unsigned short rec_len; 1581 struct ocfs2_dir_entry *de, *last_de = NULL;
534 struct ocfs2_dinode *fe; 1582 char *de_buf, *limit;
535 struct ocfs2_dir_entry *de; 1583 unsigned long offset = 0;
536 struct super_block *sb; 1584 unsigned int rec_len, new_rec_len;
537 int status; 1585
1586 de_buf = di->id2.i_data.id_data;
1587 limit = de_buf + i_size_read(dir);
1588 rec_len = OCFS2_DIR_REC_LEN(namelen);
538 1589
539 mlog_entry_void(); 1590 while (de_buf < limit) {
1591 de = (struct ocfs2_dir_entry *)de_buf;
540 1592
541 mlog(0, "getting ready to insert namelen %d into dir %llu\n", 1593 if (!ocfs2_check_dir_entry(dir, de, di_bh, offset)) {
542 namelen, (unsigned long long)OCFS2_I(dir)->ip_blkno); 1594 ret = -ENOENT;
1595 goto out;
1596 }
1597 if (ocfs2_match(namelen, name, de)) {
1598 ret = -EEXIST;
1599 goto out;
1600 }
1601 if (ocfs2_dirent_would_fit(de, rec_len)) {
1602 /* Ok, we found a spot. Return this bh and let
1603 * the caller actually fill it in. */
1604 *ret_de_bh = di_bh;
1605 get_bh(*ret_de_bh);
1606 ret = 0;
1607 goto out;
1608 }
543 1609
544 BUG_ON(!S_ISDIR(dir->i_mode)); 1610 last_de = de;
545 fe = (struct ocfs2_dinode *) parent_fe_bh->b_data; 1611 de_buf += le16_to_cpu(de->rec_len);
546 BUG_ON(le64_to_cpu(fe->i_size) != i_size_read(dir)); 1612 offset += le16_to_cpu(de->rec_len);
1613 }
547 1614
548 sb = dir->i_sb; 1615 /*
1616 * We're going to require expansion of the directory - figure
1617 * out how many blocks we'll need so that a place for the
1618 * dirent can be found.
1619 */
1620 *blocks_wanted = 1;
1621 new_rec_len = le16_to_cpu(last_de->rec_len) + (dir->i_sb->s_blocksize - i_size_read(dir));
1622 if (new_rec_len < (rec_len + OCFS2_DIR_REC_LEN(last_de->name_len)))
1623 *blocks_wanted = 2;
1624
1625 ret = -ENOSPC;
1626out:
1627 return ret;
1628}
549 1629
550 if (!namelen) { 1630static int ocfs2_find_dir_space_el(struct inode *dir, const char *name,
551 status = -EINVAL; 1631 int namelen, struct buffer_head **ret_de_bh)
552 mlog_errno(status); 1632{
553 goto bail; 1633 unsigned long offset;
554 } 1634 struct buffer_head *bh = NULL;
1635 unsigned short rec_len;
1636 struct ocfs2_dir_entry *de;
1637 struct super_block *sb = dir->i_sb;
1638 int status;
555 1639
556 bh = ocfs2_bread(dir, 0, &status, 0); 1640 bh = ocfs2_bread(dir, 0, &status, 0);
557 if (!bh) { 1641 if (!bh) {
@@ -568,17 +1652,11 @@ int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb,
568 bh = NULL; 1652 bh = NULL;
569 1653
570 if (i_size_read(dir) <= offset) { 1654 if (i_size_read(dir) <= offset) {
571 status = ocfs2_extend_dir(osb, 1655 /*
572 dir, 1656 * Caller will have to expand this
573 parent_fe_bh, 1657 * directory.
574 &bh); 1658 */
575 if (status < 0) { 1659 status = -ENOSPC;
576 mlog_errno(status);
577 goto bail;
578 }
579 BUG_ON(!bh);
580 *ret_de_bh = bh;
581 get_bh(*ret_de_bh);
582 goto bail; 1660 goto bail;
583 } 1661 }
584 bh = ocfs2_bread(dir, 1662 bh = ocfs2_bread(dir,
@@ -600,10 +1678,7 @@ int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb,
600 status = -EEXIST; 1678 status = -EEXIST;
601 goto bail; 1679 goto bail;
602 } 1680 }
603 if (((le64_to_cpu(de->inode) == 0) && 1681 if (ocfs2_dirent_would_fit(de, rec_len)) {
604 (le16_to_cpu(de->rec_len) >= rec_len)) ||
605 (le16_to_cpu(de->rec_len) >=
606 (OCFS2_DIR_REC_LEN(de->name_len) + rec_len))) {
607 /* Ok, we found a spot. Return this bh and let 1682 /* Ok, we found a spot. Return this bh and let
608 * the caller actually fill it in. */ 1683 * the caller actually fill it in. */
609 *ret_de_bh = bh; 1684 *ret_de_bh = bh;
@@ -623,3 +1698,61 @@ bail:
623 mlog_exit(status); 1698 mlog_exit(status);
624 return status; 1699 return status;
625} 1700}
1701
1702int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb,
1703 struct inode *dir,
1704 struct buffer_head *parent_fe_bh,
1705 const char *name,
1706 int namelen,
1707 struct buffer_head **ret_de_bh)
1708{
1709 int ret;
1710 unsigned int blocks_wanted = 1;
1711 struct buffer_head *bh = NULL;
1712
1713 mlog(0, "getting ready to insert namelen %d into dir %llu\n",
1714 namelen, (unsigned long long)OCFS2_I(dir)->ip_blkno);
1715
1716 *ret_de_bh = NULL;
1717
1718 if (!namelen) {
1719 ret = -EINVAL;
1720 mlog_errno(ret);
1721 goto out;
1722 }
1723
1724 if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1725 ret = ocfs2_find_dir_space_id(dir, parent_fe_bh, name,
1726 namelen, &bh, &blocks_wanted);
1727 } else
1728 ret = ocfs2_find_dir_space_el(dir, name, namelen, &bh);
1729
1730 if (ret && ret != -ENOSPC) {
1731 mlog_errno(ret);
1732 goto out;
1733 }
1734
1735 if (ret == -ENOSPC) {
1736 /*
1737 * We have to expand the directory to add this name.
1738 */
1739 BUG_ON(bh);
1740
1741 ret = ocfs2_extend_dir(osb, dir, parent_fe_bh, blocks_wanted,
1742 &bh);
1743 if (ret) {
1744 if (ret != -ENOSPC)
1745 mlog_errno(ret);
1746 goto out;
1747 }
1748
1749 BUG_ON(!bh);
1750 }
1751
1752 *ret_de_bh = bh;
1753 bh = NULL;
1754out:
1755 if (bh)
1756 brelse(bh);
1757 return ret;
1758}
diff --git a/fs/ocfs2/dir.h b/fs/ocfs2/dir.h
index 3f67e146864a..ce48b9080d87 100644
--- a/fs/ocfs2/dir.h
+++ b/fs/ocfs2/dir.h
@@ -26,17 +26,49 @@
26#ifndef OCFS2_DIR_H 26#ifndef OCFS2_DIR_H
27#define OCFS2_DIR_H 27#define OCFS2_DIR_H
28 28
29struct buffer_head *ocfs2_find_entry(const char *name,
30 int namelen,
31 struct inode *dir,
32 struct ocfs2_dir_entry **res_dir);
33int ocfs2_delete_entry(handle_t *handle,
34 struct inode *dir,
35 struct ocfs2_dir_entry *de_del,
36 struct buffer_head *bh);
37int __ocfs2_add_entry(handle_t *handle,
38 struct inode *dir,
39 const char *name, int namelen,
40 struct inode *inode, u64 blkno,
41 struct buffer_head *parent_fe_bh,
42 struct buffer_head *insert_bh);
43static inline int ocfs2_add_entry(handle_t *handle,
44 struct dentry *dentry,
45 struct inode *inode, u64 blkno,
46 struct buffer_head *parent_fe_bh,
47 struct buffer_head *insert_bh)
48{
49 return __ocfs2_add_entry(handle, dentry->d_parent->d_inode,
50 dentry->d_name.name, dentry->d_name.len,
51 inode, blkno, parent_fe_bh, insert_bh);
52}
53int ocfs2_update_entry(struct inode *dir, handle_t *handle,
54 struct buffer_head *de_bh, struct ocfs2_dir_entry *de,
55 struct inode *new_entry_inode);
56
29int ocfs2_check_dir_for_entry(struct inode *dir, 57int ocfs2_check_dir_for_entry(struct inode *dir,
30 const char *name, 58 const char *name,
31 int namelen); 59 int namelen);
32int ocfs2_empty_dir(struct inode *inode); /* FIXME: to namei.c */ 60int ocfs2_empty_dir(struct inode *inode);
33int ocfs2_find_files_on_disk(const char *name, 61int ocfs2_find_files_on_disk(const char *name,
34 int namelen, 62 int namelen,
35 u64 *blkno, 63 u64 *blkno,
36 struct inode *inode, 64 struct inode *inode,
37 struct buffer_head **dirent_bh, 65 struct buffer_head **dirent_bh,
38 struct ocfs2_dir_entry **dirent); 66 struct ocfs2_dir_entry **dirent);
67int ocfs2_lookup_ino_from_name(struct inode *dir, const char *name,
68 int namelen, u64 *blkno);
39int ocfs2_readdir(struct file *filp, void *dirent, filldir_t filldir); 69int ocfs2_readdir(struct file *filp, void *dirent, filldir_t filldir);
70int ocfs2_dir_foreach(struct inode *inode, loff_t *f_pos, void *priv,
71 filldir_t filldir);
40int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb, 72int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb,
41 struct inode *dir, 73 struct inode *dir,
42 struct buffer_head *parent_fe_bh, 74 struct buffer_head *parent_fe_bh,
@@ -44,11 +76,11 @@ int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb,
44 int namelen, 76 int namelen,
45 struct buffer_head **ret_de_bh); 77 struct buffer_head **ret_de_bh);
46struct ocfs2_alloc_context; 78struct ocfs2_alloc_context;
47int ocfs2_do_extend_dir(struct super_block *sb, 79int ocfs2_fill_new_dir(struct ocfs2_super *osb,
48 handle_t *handle, 80 handle_t *handle,
49 struct inode *dir, 81 struct inode *parent,
50 struct buffer_head *parent_fe_bh, 82 struct inode *inode,
51 struct ocfs2_alloc_context *data_ac, 83 struct buffer_head *fe_bh,
52 struct ocfs2_alloc_context *meta_ac, 84 struct ocfs2_alloc_context *data_ac);
53 struct buffer_head **new_bh); 85
54#endif /* OCFS2_DIR_H */ 86#endif /* OCFS2_DIR_H */
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index f71250ed166f..41c76ff2fcfb 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -1482,6 +1482,7 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
1482 lvb->lvb_imtime_packed = 1482 lvb->lvb_imtime_packed =
1483 cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime)); 1483 cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime));
1484 lvb->lvb_iattr = cpu_to_be32(oi->ip_attr); 1484 lvb->lvb_iattr = cpu_to_be32(oi->ip_attr);
1485 lvb->lvb_idynfeatures = cpu_to_be16(oi->ip_dyn_features);
1485 lvb->lvb_igeneration = cpu_to_be32(inode->i_generation); 1486 lvb->lvb_igeneration = cpu_to_be32(inode->i_generation);
1486 1487
1487out: 1488out:
@@ -1515,6 +1516,7 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
1515 i_size_write(inode, be64_to_cpu(lvb->lvb_isize)); 1516 i_size_write(inode, be64_to_cpu(lvb->lvb_isize));
1516 1517
1517 oi->ip_attr = be32_to_cpu(lvb->lvb_iattr); 1518 oi->ip_attr = be32_to_cpu(lvb->lvb_iattr);
1519 oi->ip_dyn_features = be16_to_cpu(lvb->lvb_idynfeatures);
1518 ocfs2_set_inode_flags(inode); 1520 ocfs2_set_inode_flags(inode);
1519 1521
1520 /* fast-symlinks are a special case */ 1522 /* fast-symlinks are a special case */
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index 492bad32a8c0..87a785e41205 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -29,12 +29,12 @@
29 29
30#include "dcache.h" 30#include "dcache.h"
31 31
32#define OCFS2_LVB_VERSION 4 32#define OCFS2_LVB_VERSION 5
33 33
34struct ocfs2_meta_lvb { 34struct ocfs2_meta_lvb {
35 __u8 lvb_version; 35 __u8 lvb_version;
36 __u8 lvb_reserved0; 36 __u8 lvb_reserved0;
37 __be16 lvb_reserved1; 37 __be16 lvb_idynfeatures;
38 __be32 lvb_iclusters; 38 __be32 lvb_iclusters;
39 __be32 lvb_iuid; 39 __be32 lvb_iuid;
40 __be32 lvb_igid; 40 __be32 lvb_igid;
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index bc48177bd183..c3bbc198f9ce 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -88,8 +88,6 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
88 struct dentry *parent; 88 struct dentry *parent;
89 struct inode *inode; 89 struct inode *inode;
90 struct inode *dir = child->d_inode; 90 struct inode *dir = child->d_inode;
91 struct buffer_head *dirent_bh = NULL;
92 struct ocfs2_dir_entry *dirent;
93 91
94 mlog_entry("(0x%p, '%.*s')\n", child, 92 mlog_entry("(0x%p, '%.*s')\n", child,
95 child->d_name.len, child->d_name.name); 93 child->d_name.len, child->d_name.name);
@@ -105,8 +103,7 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
105 goto bail; 103 goto bail;
106 } 104 }
107 105
108 status = ocfs2_find_files_on_disk("..", 2, &blkno, dir, &dirent_bh, 106 status = ocfs2_lookup_ino_from_name(dir, "..", 2, &blkno);
109 &dirent);
110 if (status < 0) { 107 if (status < 0) {
111 parent = ERR_PTR(-ENOENT); 108 parent = ERR_PTR(-ENOENT);
112 goto bail_unlock; 109 goto bail_unlock;
@@ -131,9 +128,6 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
131bail_unlock: 128bail_unlock:
132 ocfs2_meta_unlock(dir, 0); 129 ocfs2_meta_unlock(dir, 0);
133 130
134 if (dirent_bh)
135 brelse(dirent_bh);
136
137bail: 131bail:
138 mlog_exit_ptr(parent); 132 mlog_exit_ptr(parent);
139 133
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index 03c1d365c78b..c58668a326fe 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -387,6 +387,12 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
387 struct ocfs2_extent_rec *rec; 387 struct ocfs2_extent_rec *rec;
388 u32 coff; 388 u32 coff;
389 389
390 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
391 ret = -ERANGE;
392 mlog_errno(ret);
393 goto out;
394 }
395
390 ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster, 396 ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster,
391 num_clusters, extent_flags); 397 num_clusters, extent_flags);
392 if (ret == 0) 398 if (ret == 0)
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index f3bc3658e7a5..a62b14eb4065 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -397,6 +397,15 @@ static int ocfs2_truncate_file(struct inode *inode,
397 unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1); 397 unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1);
398 truncate_inode_pages(inode->i_mapping, new_i_size); 398 truncate_inode_pages(inode->i_mapping, new_i_size);
399 399
400 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
401 status = ocfs2_truncate_inline(inode, di_bh, new_i_size,
402 i_size_read(inode), 0);
403 if (status)
404 mlog_errno(status);
405
406 goto bail_unlock_data;
407 }
408
400 /* alright, we're going to need to do a full blown alloc size 409 /* alright, we're going to need to do a full blown alloc size
401 * change. Orphan the inode so that recovery can complete the 410 * change. Orphan the inode so that recovery can complete the
402 * truncate if necessary. This does the task of marking 411 * truncate if necessary. This does the task of marking
@@ -779,25 +788,6 @@ leave:
779 return status; 788 return status;
780} 789}
781 790
782static int ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
783 u32 clusters_to_add, int mark_unwritten)
784{
785 int ret;
786
787 /*
788 * The alloc sem blocks peope in read/write from reading our
789 * allocation until we're done changing it. We depend on
790 * i_mutex to block other extend/truncate calls while we're
791 * here.
792 */
793 down_write(&OCFS2_I(inode)->ip_alloc_sem);
794 ret = __ocfs2_extend_allocation(inode, logical_start, clusters_to_add,
795 mark_unwritten);
796 up_write(&OCFS2_I(inode)->ip_alloc_sem);
797
798 return ret;
799}
800
801/* Some parts of this taken from generic_cont_expand, which turned out 791/* Some parts of this taken from generic_cont_expand, which turned out
802 * to be too fragile to do exactly what we need without us having to 792 * to be too fragile to do exactly what we need without us having to
803 * worry about recursive locking in ->prepare_write() and 793 * worry about recursive locking in ->prepare_write() and
@@ -889,25 +879,48 @@ out:
889 return ret; 879 return ret;
890} 880}
891 881
892/* 882int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, u64 zero_to)
893 * A tail_to_skip value > 0 indicates that we're being called from 883{
894 * ocfs2_file_aio_write(). This has the following implications: 884 int ret;
895 * 885 u32 clusters_to_add;
896 * - we don't want to update i_size 886 struct ocfs2_inode_info *oi = OCFS2_I(inode);
897 * - di_bh will be NULL, which is fine because it's only used in the 887
898 * case where we want to update i_size. 888 clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size);
899 * - ocfs2_zero_extend() will then only be filling the hole created 889 if (clusters_to_add < oi->ip_clusters)
900 * between i_size and the start of the write. 890 clusters_to_add = 0;
901 */ 891 else
892 clusters_to_add -= oi->ip_clusters;
893
894 if (clusters_to_add) {
895 ret = __ocfs2_extend_allocation(inode, oi->ip_clusters,
896 clusters_to_add, 0);
897 if (ret) {
898 mlog_errno(ret);
899 goto out;
900 }
901 }
902
903 /*
904 * Call this even if we don't add any clusters to the tree. We
905 * still need to zero the area between the old i_size and the
906 * new i_size.
907 */
908 ret = ocfs2_zero_extend(inode, zero_to);
909 if (ret < 0)
910 mlog_errno(ret);
911
912out:
913 return ret;
914}
915
902static int ocfs2_extend_file(struct inode *inode, 916static int ocfs2_extend_file(struct inode *inode,
903 struct buffer_head *di_bh, 917 struct buffer_head *di_bh,
904 u64 new_i_size, 918 u64 new_i_size)
905 size_t tail_to_skip)
906{ 919{
907 int ret = 0; 920 int ret = 0, data_locked = 0;
908 u32 clusters_to_add = 0; 921 struct ocfs2_inode_info *oi = OCFS2_I(inode);
909 922
910 BUG_ON(!tail_to_skip && !di_bh); 923 BUG_ON(!di_bh);
911 924
912 /* setattr sometimes calls us like this. */ 925 /* setattr sometimes calls us like this. */
913 if (new_i_size == 0) 926 if (new_i_size == 0)
@@ -917,13 +930,18 @@ static int ocfs2_extend_file(struct inode *inode,
917 goto out; 930 goto out;
918 BUG_ON(new_i_size < i_size_read(inode)); 931 BUG_ON(new_i_size < i_size_read(inode));
919 932
920 if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) { 933 /*
921 BUG_ON(tail_to_skip != 0); 934 * Fall through for converting inline data, even if the fs
935 * supports sparse files.
936 *
937 * The check for inline data here is legal - nobody can add
938 * the feature since we have i_mutex. We must check it again
939 * after acquiring ip_alloc_sem though, as paths like mmap
940 * might have raced us to converting the inode to extents.
941 */
942 if (!(oi->ip_dyn_features & OCFS2_INLINE_DATA_FL)
943 && ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
922 goto out_update_size; 944 goto out_update_size;
923 }
924
925 clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size) -
926 OCFS2_I(inode)->ip_clusters;
927 945
928 /* 946 /*
929 * protect the pages that ocfs2_zero_extend is going to be 947 * protect the pages that ocfs2_zero_extend is going to be
@@ -937,39 +955,52 @@ static int ocfs2_extend_file(struct inode *inode,
937 mlog_errno(ret); 955 mlog_errno(ret);
938 goto out; 956 goto out;
939 } 957 }
958 data_locked = 1;
959
960 /*
961 * The alloc sem blocks people in read/write from reading our
962 * allocation until we're done changing it. We depend on
963 * i_mutex to block other extend/truncate calls while we're
964 * here.
965 */
966 down_write(&oi->ip_alloc_sem);
967
968 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
969 /*
970 * We can optimize small extends by keeping the inodes
971 * inline data.
972 */
973 if (ocfs2_size_fits_inline_data(di_bh, new_i_size)) {
974 up_write(&oi->ip_alloc_sem);
975 goto out_update_size;
976 }
977
978 ret = ocfs2_convert_inline_data_to_extents(inode, di_bh);
979 if (ret) {
980 up_write(&oi->ip_alloc_sem);
940 981
941 if (clusters_to_add) {
942 ret = ocfs2_extend_allocation(inode,
943 OCFS2_I(inode)->ip_clusters,
944 clusters_to_add, 0);
945 if (ret < 0) {
946 mlog_errno(ret); 982 mlog_errno(ret);
947 goto out_unlock; 983 goto out_unlock;
948 } 984 }
949 } 985 }
950 986
951 /* 987 if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
952 * Call this even if we don't add any clusters to the tree. We 988 ret = ocfs2_extend_no_holes(inode, new_i_size, new_i_size);
953 * still need to zero the area between the old i_size and the 989
954 * new i_size. 990 up_write(&oi->ip_alloc_sem);
955 */ 991
956 ret = ocfs2_zero_extend(inode, (u64)new_i_size - tail_to_skip);
957 if (ret < 0) { 992 if (ret < 0) {
958 mlog_errno(ret); 993 mlog_errno(ret);
959 goto out_unlock; 994 goto out_unlock;
960 } 995 }
961 996
962out_update_size: 997out_update_size:
963 if (!tail_to_skip) { 998 ret = ocfs2_simple_size_update(inode, di_bh, new_i_size);
964 /* We're being called from ocfs2_setattr() which wants 999 if (ret < 0)
965 * us to update i_size */ 1000 mlog_errno(ret);
966 ret = ocfs2_simple_size_update(inode, di_bh, new_i_size);
967 if (ret < 0)
968 mlog_errno(ret);
969 }
970 1001
971out_unlock: 1002out_unlock:
972 if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) 1003 if (data_locked)
973 ocfs2_data_unlock(inode, 1); 1004 ocfs2_data_unlock(inode, 1);
974 1005
975out: 1006out:
@@ -1035,7 +1066,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
1035 if (i_size_read(inode) > attr->ia_size) 1066 if (i_size_read(inode) > attr->ia_size)
1036 status = ocfs2_truncate_file(inode, bh, attr->ia_size); 1067 status = ocfs2_truncate_file(inode, bh, attr->ia_size);
1037 else 1068 else
1038 status = ocfs2_extend_file(inode, bh, attr->ia_size, 0); 1069 status = ocfs2_extend_file(inode, bh, attr->ia_size);
1039 if (status < 0) { 1070 if (status < 0) {
1040 if (status != -ENOSPC) 1071 if (status != -ENOSPC)
1041 mlog_errno(status); 1072 mlog_errno(status);
@@ -1243,6 +1274,31 @@ static int ocfs2_allocate_unwritten_extents(struct inode *inode,
1243{ 1274{
1244 int ret; 1275 int ret;
1245 u32 cpos, phys_cpos, clusters, alloc_size; 1276 u32 cpos, phys_cpos, clusters, alloc_size;
1277 u64 end = start + len;
1278 struct buffer_head *di_bh = NULL;
1279
1280 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1281 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
1282 OCFS2_I(inode)->ip_blkno, &di_bh,
1283 OCFS2_BH_CACHED, inode);
1284 if (ret) {
1285 mlog_errno(ret);
1286 goto out;
1287 }
1288
1289 /*
1290 * Nothing to do if the requested reservation range
1291 * fits within the inode.
1292 */
1293 if (ocfs2_size_fits_inline_data(di_bh, end))
1294 goto out;
1295
1296 ret = ocfs2_convert_inline_data_to_extents(inode, di_bh);
1297 if (ret) {
1298 mlog_errno(ret);
1299 goto out;
1300 }
1301 }
1246 1302
1247 /* 1303 /*
1248 * We consider both start and len to be inclusive. 1304 * We consider both start and len to be inclusive.
@@ -1288,6 +1344,8 @@ next:
1288 1344
1289 ret = 0; 1345 ret = 0;
1290out: 1346out:
1347
1348 brelse(di_bh);
1291 return ret; 1349 return ret;
1292} 1350}
1293 1351
@@ -1469,6 +1527,14 @@ static int ocfs2_remove_inode_range(struct inode *inode,
1469 if (byte_len == 0) 1527 if (byte_len == 0)
1470 return 0; 1528 return 0;
1471 1529
1530 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1531 ret = ocfs2_truncate_inline(inode, di_bh, byte_start,
1532 byte_start + byte_len, 1);
1533 if (ret)
1534 mlog_errno(ret);
1535 return ret;
1536 }
1537
1472 trunc_start = ocfs2_clusters_for_bytes(osb->sb, byte_start); 1538 trunc_start = ocfs2_clusters_for_bytes(osb->sb, byte_start);
1473 trunc_len = (byte_start + byte_len) >> osb->s_clustersize_bits; 1539 trunc_len = (byte_start + byte_len) >> osb->s_clustersize_bits;
1474 if (trunc_len >= trunc_start) 1540 if (trunc_len >= trunc_start)
@@ -1713,15 +1779,13 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
1713 int appending, 1779 int appending,
1714 int *direct_io) 1780 int *direct_io)
1715{ 1781{
1716 int ret = 0, meta_level = appending; 1782 int ret = 0, meta_level = 0;
1717 struct inode *inode = dentry->d_inode; 1783 struct inode *inode = dentry->d_inode;
1718 u32 clusters; 1784 loff_t saved_pos, end;
1719 loff_t newsize, saved_pos;
1720 1785
1721 /* 1786 /*
1722 * We sample i_size under a read level meta lock to see if our write 1787 * We start with a read level meta lock and only jump to an ex
1723 * is extending the file, if it is we back off and get a write level 1788 * if we need to make modifications here.
1724 * meta lock.
1725 */ 1789 */
1726 for(;;) { 1790 for(;;) {
1727 ret = ocfs2_meta_lock(inode, NULL, meta_level); 1791 ret = ocfs2_meta_lock(inode, NULL, meta_level);
@@ -1763,87 +1827,47 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
1763 saved_pos = *ppos; 1827 saved_pos = *ppos;
1764 } 1828 }
1765 1829
1766 if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) { 1830 end = saved_pos + count;
1767 loff_t end = saved_pos + count;
1768 1831
1769 /* 1832 /*
1770 * Skip the O_DIRECT checks if we don't need 1833 * Skip the O_DIRECT checks if we don't need
1771 * them. 1834 * them.
1772 */ 1835 */
1773 if (!direct_io || !(*direct_io)) 1836 if (!direct_io || !(*direct_io))
1774 break;
1775
1776 /*
1777 * Allowing concurrent direct writes means
1778 * i_size changes wouldn't be synchronized, so
1779 * one node could wind up truncating another
1780 * nodes writes.
1781 */
1782 if (end > i_size_read(inode)) {
1783 *direct_io = 0;
1784 break;
1785 }
1786
1787 /*
1788 * We don't fill holes during direct io, so
1789 * check for them here. If any are found, the
1790 * caller will have to retake some cluster
1791 * locks and initiate the io as buffered.
1792 */
1793 ret = ocfs2_check_range_for_holes(inode, saved_pos,
1794 count);
1795 if (ret == 1) {
1796 *direct_io = 0;
1797 ret = 0;
1798 } else if (ret < 0)
1799 mlog_errno(ret);
1800 break; 1837 break;
1801 }
1802 1838
1803 /* 1839 /*
1804 * The rest of this loop is concerned with legacy file 1840 * There's no sane way to do direct writes to an inode
1805 * systems which don't support sparse files. 1841 * with inline data.
1806 */ 1842 */
1807 1843 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1808 newsize = count + saved_pos; 1844 *direct_io = 0;
1809
1810 mlog(0, "pos=%lld newsize=%lld cursize=%lld\n",
1811 (long long) saved_pos, (long long) newsize,
1812 (long long) i_size_read(inode));
1813
1814 /* No need for a higher level metadata lock if we're
1815 * never going past i_size. */
1816 if (newsize <= i_size_read(inode))
1817 break; 1845 break;
1818
1819 if (meta_level == 0) {
1820 ocfs2_meta_unlock(inode, meta_level);
1821 meta_level = 1;
1822 continue;
1823 } 1846 }
1824 1847
1825 spin_lock(&OCFS2_I(inode)->ip_lock); 1848 /*
1826 clusters = ocfs2_clusters_for_bytes(inode->i_sb, newsize) - 1849 * Allowing concurrent direct writes means
1827 OCFS2_I(inode)->ip_clusters; 1850 * i_size changes wouldn't be synchronized, so
1828 spin_unlock(&OCFS2_I(inode)->ip_lock); 1851 * one node could wind up truncating another
1829 1852 * nodes writes.
1830 mlog(0, "Writing at EOF, may need more allocation: " 1853 */
1831 "i_size = %lld, newsize = %lld, need %u clusters\n", 1854 if (end > i_size_read(inode)) {
1832 (long long) i_size_read(inode), (long long) newsize, 1855 *direct_io = 0;
1833 clusters);
1834
1835 /* We only want to continue the rest of this loop if
1836 * our extend will actually require more
1837 * allocation. */
1838 if (!clusters)
1839 break; 1856 break;
1840
1841 ret = ocfs2_extend_file(inode, NULL, newsize, count);
1842 if (ret < 0) {
1843 if (ret != -ENOSPC)
1844 mlog_errno(ret);
1845 goto out_unlock;
1846 } 1857 }
1858
1859 /*
1860 * We don't fill holes during direct io, so
1861 * check for them here. If any are found, the
1862 * caller will have to retake some cluster
1863 * locks and initiate the io as buffered.
1864 */
1865 ret = ocfs2_check_range_for_holes(inode, saved_pos, count);
1866 if (ret == 1) {
1867 *direct_io = 0;
1868 ret = 0;
1869 } else if (ret < 0)
1870 mlog_errno(ret);
1847 break; 1871 break;
1848 } 1872 }
1849 1873
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index 36fe27f268ee..066f14add3a8 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -47,6 +47,8 @@ int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
47 struct ocfs2_alloc_context *data_ac, 47 struct ocfs2_alloc_context *data_ac,
48 struct ocfs2_alloc_context *meta_ac, 48 struct ocfs2_alloc_context *meta_ac,
49 enum ocfs2_alloc_restarted *reason_ret); 49 enum ocfs2_alloc_restarted *reason_ret);
50int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size,
51 u64 zero_to);
50int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di, 52int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di,
51 u32 clusters_to_add, u32 extents_to_split, 53 u32 clusters_to_add, u32 extents_to_split,
52 struct ocfs2_alloc_context **data_ac, 54 struct ocfs2_alloc_context **data_ac,
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index c53a6763bbbe..1d5e0cb0fda1 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -241,6 +241,7 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
241 241
242 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters); 242 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
243 OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr); 243 OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr);
244 OCFS2_I(inode)->ip_dyn_features = le16_to_cpu(fe->i_dyn_features);
244 245
245 inode->i_version = 1; 246 inode->i_version = 1;
246 inode->i_generation = le32_to_cpu(fe->i_generation); 247 inode->i_generation = le32_to_cpu(fe->i_generation);
@@ -513,6 +514,10 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
513 514
514 fe = (struct ocfs2_dinode *) fe_bh->b_data; 515 fe = (struct ocfs2_dinode *) fe_bh->b_data;
515 516
517 /*
518 * This check will also skip truncate of inodes with inline
519 * data and fast symlinks.
520 */
516 if (fe->i_clusters) { 521 if (fe->i_clusters) {
517 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); 522 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
518 if (IS_ERR(handle)) { 523 if (IS_ERR(handle)) {
@@ -1220,6 +1225,7 @@ int ocfs2_mark_inode_dirty(handle_t *handle,
1220 fe->i_clusters = cpu_to_le32(OCFS2_I(inode)->ip_clusters); 1225 fe->i_clusters = cpu_to_le32(OCFS2_I(inode)->ip_clusters);
1221 ocfs2_get_inode_flags(OCFS2_I(inode)); 1226 ocfs2_get_inode_flags(OCFS2_I(inode));
1222 fe->i_attr = cpu_to_le32(OCFS2_I(inode)->ip_attr); 1227 fe->i_attr = cpu_to_le32(OCFS2_I(inode)->ip_attr);
1228 fe->i_dyn_features = cpu_to_le16(OCFS2_I(inode)->ip_dyn_features);
1223 spin_unlock(&OCFS2_I(inode)->ip_lock); 1229 spin_unlock(&OCFS2_I(inode)->ip_lock);
1224 1230
1225 fe->i_size = cpu_to_le64(i_size_read(inode)); 1231 fe->i_size = cpu_to_le64(i_size_read(inode));
@@ -1257,6 +1263,7 @@ void ocfs2_refresh_inode(struct inode *inode,
1257 1263
1258 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters); 1264 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
1259 OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr); 1265 OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr);
1266 OCFS2_I(inode)->ip_dyn_features = le16_to_cpu(fe->i_dyn_features);
1260 ocfs2_set_inode_flags(inode); 1267 ocfs2_set_inode_flags(inode);
1261 i_size_write(inode, le64_to_cpu(fe->i_size)); 1268 i_size_write(inode, le64_to_cpu(fe->i_size));
1262 inode->i_nlink = le16_to_cpu(fe->i_links_count); 1269 inode->i_nlink = le16_to_cpu(fe->i_links_count);
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index a41d0817121b..70e881c55536 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -51,6 +51,7 @@ struct ocfs2_inode_info
51 51
52 u32 ip_flags; /* see below */ 52 u32 ip_flags; /* see below */
53 u32 ip_attr; /* inode attributes */ 53 u32 ip_attr; /* inode attributes */
54 u16 ip_dyn_features;
54 55
55 /* protected by recovery_lock. */ 56 /* protected by recovery_lock. */
56 struct inode *ip_next_orphan; 57 struct inode *ip_next_orphan;
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index dbfb20bb27ea..f9d01e25298d 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -35,13 +35,13 @@
35#include "ocfs2.h" 35#include "ocfs2.h"
36 36
37#include "alloc.h" 37#include "alloc.h"
38#include "dir.h"
38#include "dlmglue.h" 39#include "dlmglue.h"
39#include "extent_map.h" 40#include "extent_map.h"
40#include "heartbeat.h" 41#include "heartbeat.h"
41#include "inode.h" 42#include "inode.h"
42#include "journal.h" 43#include "journal.h"
43#include "localalloc.h" 44#include "localalloc.h"
44#include "namei.h"
45#include "slot_map.h" 45#include "slot_map.h"
46#include "super.h" 46#include "super.h"
47#include "vote.h" 47#include "vote.h"
@@ -1213,17 +1213,49 @@ bail:
1213 return status; 1213 return status;
1214} 1214}
1215 1215
1216struct ocfs2_orphan_filldir_priv {
1217 struct inode *head;
1218 struct ocfs2_super *osb;
1219};
1220
1221static int ocfs2_orphan_filldir(void *priv, const char *name, int name_len,
1222 loff_t pos, u64 ino, unsigned type)
1223{
1224 struct ocfs2_orphan_filldir_priv *p = priv;
1225 struct inode *iter;
1226
1227 if (name_len == 1 && !strncmp(".", name, 1))
1228 return 0;
1229 if (name_len == 2 && !strncmp("..", name, 2))
1230 return 0;
1231
1232 /* Skip bad inodes so that recovery can continue */
1233 iter = ocfs2_iget(p->osb, ino,
1234 OCFS2_FI_FLAG_ORPHAN_RECOVERY);
1235 if (IS_ERR(iter))
1236 return 0;
1237
1238 mlog(0, "queue orphan %llu\n",
1239 (unsigned long long)OCFS2_I(iter)->ip_blkno);
1240 /* No locking is required for the next_orphan queue as there
1241 * is only ever a single process doing orphan recovery. */
1242 OCFS2_I(iter)->ip_next_orphan = p->head;
1243 p->head = iter;
1244
1245 return 0;
1246}
1247
1216static int ocfs2_queue_orphans(struct ocfs2_super *osb, 1248static int ocfs2_queue_orphans(struct ocfs2_super *osb,
1217 int slot, 1249 int slot,
1218 struct inode **head) 1250 struct inode **head)
1219{ 1251{
1220 int status; 1252 int status;
1221 struct inode *orphan_dir_inode = NULL; 1253 struct inode *orphan_dir_inode = NULL;
1222 struct inode *iter; 1254 struct ocfs2_orphan_filldir_priv priv;
1223 unsigned long offset, blk, local; 1255 loff_t pos = 0;
1224 struct buffer_head *bh = NULL; 1256
1225 struct ocfs2_dir_entry *de; 1257 priv.osb = osb;
1226 struct super_block *sb = osb->sb; 1258 priv.head = *head;
1227 1259
1228 orphan_dir_inode = ocfs2_get_system_file_inode(osb, 1260 orphan_dir_inode = ocfs2_get_system_file_inode(osb,
1229 ORPHAN_DIR_SYSTEM_INODE, 1261 ORPHAN_DIR_SYSTEM_INODE,
@@ -1241,77 +1273,15 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb,
1241 goto out; 1273 goto out;
1242 } 1274 }
1243 1275
1244 offset = 0; 1276 status = ocfs2_dir_foreach(orphan_dir_inode, &pos, &priv,
1245 iter = NULL; 1277 ocfs2_orphan_filldir);
1246 while(offset < i_size_read(orphan_dir_inode)) { 1278 if (status) {
1247 blk = offset >> sb->s_blocksize_bits; 1279 mlog_errno(status);
1248 1280 goto out;
1249 bh = ocfs2_bread(orphan_dir_inode, blk, &status, 0);
1250 if (!bh)
1251 status = -EINVAL;
1252 if (status < 0) {
1253 if (bh)
1254 brelse(bh);
1255 mlog_errno(status);
1256 goto out_unlock;
1257 }
1258
1259 local = 0;
1260 while(offset < i_size_read(orphan_dir_inode)
1261 && local < sb->s_blocksize) {
1262 de = (struct ocfs2_dir_entry *) (bh->b_data + local);
1263
1264 if (!ocfs2_check_dir_entry(orphan_dir_inode,
1265 de, bh, local)) {
1266 status = -EINVAL;
1267 mlog_errno(status);
1268 brelse(bh);
1269 goto out_unlock;
1270 }
1271
1272 local += le16_to_cpu(de->rec_len);
1273 offset += le16_to_cpu(de->rec_len);
1274
1275 /* I guess we silently fail on no inode? */
1276 if (!le64_to_cpu(de->inode))
1277 continue;
1278 if (de->file_type > OCFS2_FT_MAX) {
1279 mlog(ML_ERROR,
1280 "block %llu contains invalid de: "
1281 "inode = %llu, rec_len = %u, "
1282 "name_len = %u, file_type = %u, "
1283 "name='%.*s'\n",
1284 (unsigned long long)bh->b_blocknr,
1285 (unsigned long long)le64_to_cpu(de->inode),
1286 le16_to_cpu(de->rec_len),
1287 de->name_len,
1288 de->file_type,
1289 de->name_len,
1290 de->name);
1291 continue;
1292 }
1293 if (de->name_len == 1 && !strncmp(".", de->name, 1))
1294 continue;
1295 if (de->name_len == 2 && !strncmp("..", de->name, 2))
1296 continue;
1297
1298 iter = ocfs2_iget(osb, le64_to_cpu(de->inode),
1299 OCFS2_FI_FLAG_ORPHAN_RECOVERY);
1300 if (IS_ERR(iter))
1301 continue;
1302
1303 mlog(0, "queue orphan %llu\n",
1304 (unsigned long long)OCFS2_I(iter)->ip_blkno);
1305 /* No locking is required for the next_orphan
1306 * queue as there is only ever a single
1307 * process doing orphan recovery. */
1308 OCFS2_I(iter)->ip_next_orphan = *head;
1309 *head = iter;
1310 }
1311 brelse(bh);
1312 } 1281 }
1313 1282
1314out_unlock: 1283 *head = priv.head;
1284
1315 ocfs2_meta_unlock(orphan_dir_inode, 0); 1285 ocfs2_meta_unlock(orphan_dir_inode, 0);
1316out: 1286out:
1317 mutex_unlock(&orphan_dir_inode->i_mutex); 1287 mutex_unlock(&orphan_dir_inode->i_mutex);
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index ce60aab013aa..4b32e0961568 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -282,6 +282,9 @@ int ocfs2_journal_dirty_data(handle_t *handle,
282 * prev. group desc. if we relink. */ 282 * prev. group desc. if we relink. */
283#define OCFS2_SUBALLOC_ALLOC (3) 283#define OCFS2_SUBALLOC_ALLOC (3)
284 284
285#define OCFS2_INLINE_TO_EXTENTS_CREDITS (OCFS2_SUBALLOC_ALLOC \
286 + OCFS2_INODE_UPDATE_CREDITS)
287
285/* dinode + group descriptor update. We don't relink on free yet. */ 288/* dinode + group descriptor update. We don't relink on free yet. */
286#define OCFS2_SUBALLOC_FREE (2) 289#define OCFS2_SUBALLOC_FREE (2)
287 290
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 701e6d04ed5d..729259016c18 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -64,29 +64,6 @@
64 64
65#include "buffer_head_io.h" 65#include "buffer_head_io.h"
66 66
67#define NAMEI_RA_CHUNKS 2
68#define NAMEI_RA_BLOCKS 4
69#define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
70#define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b))
71
72static int inline ocfs2_search_dirblock(struct buffer_head *bh,
73 struct inode *dir,
74 const char *name, int namelen,
75 unsigned long offset,
76 struct ocfs2_dir_entry **res_dir);
77
78static int ocfs2_delete_entry(handle_t *handle,
79 struct inode *dir,
80 struct ocfs2_dir_entry *de_del,
81 struct buffer_head *bh);
82
83static int __ocfs2_add_entry(handle_t *handle,
84 struct inode *dir,
85 const char *name, int namelen,
86 struct inode *inode, u64 blkno,
87 struct buffer_head *parent_fe_bh,
88 struct buffer_head *insert_bh);
89
90static int ocfs2_mknod_locked(struct ocfs2_super *osb, 67static int ocfs2_mknod_locked(struct ocfs2_super *osb,
91 struct inode *dir, 68 struct inode *dir,
92 struct dentry *dentry, int mode, 69 struct dentry *dentry, int mode,
@@ -97,13 +74,6 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
97 struct inode **ret_inode, 74 struct inode **ret_inode,
98 struct ocfs2_alloc_context *inode_ac); 75 struct ocfs2_alloc_context *inode_ac);
99 76
100static int ocfs2_fill_new_dir(struct ocfs2_super *osb,
101 handle_t *handle,
102 struct inode *parent,
103 struct inode *inode,
104 struct buffer_head *fe_bh,
105 struct ocfs2_alloc_context *data_ac);
106
107static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb, 77static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
108 struct inode **ret_orphan_dir, 78 struct inode **ret_orphan_dir,
109 struct inode *inode, 79 struct inode *inode,
@@ -123,17 +93,6 @@ static int ocfs2_create_symlink_data(struct ocfs2_super *osb,
123 struct inode *inode, 93 struct inode *inode,
124 const char *symname); 94 const char *symname);
125 95
126static inline int ocfs2_add_entry(handle_t *handle,
127 struct dentry *dentry,
128 struct inode *inode, u64 blkno,
129 struct buffer_head *parent_fe_bh,
130 struct buffer_head *insert_bh)
131{
132 return __ocfs2_add_entry(handle, dentry->d_parent->d_inode,
133 dentry->d_name.name, dentry->d_name.len,
134 inode, blkno, parent_fe_bh, insert_bh);
135}
136
137/* An orphan dir name is an 8 byte value, printed as a hex string */ 96/* An orphan dir name is an 8 byte value, printed as a hex string */
138#define OCFS2_ORPHAN_NAMELEN ((int)(2 * sizeof(u64))) 97#define OCFS2_ORPHAN_NAMELEN ((int)(2 * sizeof(u64)))
139 98
@@ -142,10 +101,8 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
142{ 101{
143 int status; 102 int status;
144 u64 blkno; 103 u64 blkno;
145 struct buffer_head *dirent_bh = NULL;
146 struct inode *inode = NULL; 104 struct inode *inode = NULL;
147 struct dentry *ret; 105 struct dentry *ret;
148 struct ocfs2_dir_entry *dirent;
149 struct ocfs2_inode_info *oi; 106 struct ocfs2_inode_info *oi;
150 107
151 mlog_entry("(0x%p, 0x%p, '%.*s')\n", dir, dentry, 108 mlog_entry("(0x%p, 0x%p, '%.*s')\n", dir, dentry,
@@ -167,9 +124,8 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
167 goto bail; 124 goto bail;
168 } 125 }
169 126
170 status = ocfs2_find_files_on_disk(dentry->d_name.name, 127 status = ocfs2_lookup_ino_from_name(dir, dentry->d_name.name,
171 dentry->d_name.len, &blkno, 128 dentry->d_name.len, &blkno);
172 dir, &dirent_bh, &dirent);
173 if (status < 0) 129 if (status < 0)
174 goto bail_add; 130 goto bail_add;
175 131
@@ -224,83 +180,12 @@ bail_unlock:
224 ocfs2_meta_unlock(dir, 0); 180 ocfs2_meta_unlock(dir, 0);
225 181
226bail: 182bail:
227 if (dirent_bh)
228 brelse(dirent_bh);
229 183
230 mlog_exit_ptr(ret); 184 mlog_exit_ptr(ret);
231 185
232 return ret; 186 return ret;
233} 187}
234 188
235static int ocfs2_fill_new_dir(struct ocfs2_super *osb,
236 handle_t *handle,
237 struct inode *parent,
238 struct inode *inode,
239 struct buffer_head *fe_bh,
240 struct ocfs2_alloc_context *data_ac)
241{
242 int status;
243 struct buffer_head *new_bh = NULL;
244 struct ocfs2_dir_entry *de = NULL;
245
246 mlog_entry_void();
247
248 status = ocfs2_do_extend_dir(osb->sb, handle, inode, fe_bh,
249 data_ac, NULL, &new_bh);
250 if (status < 0) {
251 mlog_errno(status);
252 goto bail;
253 }
254
255 ocfs2_set_new_buffer_uptodate(inode, new_bh);
256
257 status = ocfs2_journal_access(handle, inode, new_bh,
258 OCFS2_JOURNAL_ACCESS_CREATE);
259 if (status < 0) {
260 mlog_errno(status);
261 goto bail;
262 }
263 memset(new_bh->b_data, 0, osb->sb->s_blocksize);
264
265 de = (struct ocfs2_dir_entry *) new_bh->b_data;
266 de->inode = cpu_to_le64(OCFS2_I(inode)->ip_blkno);
267 de->name_len = 1;
268 de->rec_len =
269 cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len));
270 strcpy(de->name, ".");
271 ocfs2_set_de_type(de, S_IFDIR);
272 de = (struct ocfs2_dir_entry *) ((char *)de + le16_to_cpu(de->rec_len));
273 de->inode = cpu_to_le64(OCFS2_I(parent)->ip_blkno);
274 de->rec_len = cpu_to_le16(inode->i_sb->s_blocksize -
275 OCFS2_DIR_REC_LEN(1));
276 de->name_len = 2;
277 strcpy(de->name, "..");
278 ocfs2_set_de_type(de, S_IFDIR);
279
280 status = ocfs2_journal_dirty(handle, new_bh);
281 if (status < 0) {
282 mlog_errno(status);
283 goto bail;
284 }
285
286 i_size_write(inode, inode->i_sb->s_blocksize);
287 inode->i_nlink = 2;
288 inode->i_blocks = ocfs2_inode_sector_count(inode);
289 status = ocfs2_mark_inode_dirty(handle, inode, fe_bh);
290 if (status < 0) {
291 mlog_errno(status);
292 goto bail;
293 }
294
295 status = 0;
296bail:
297 if (new_bh)
298 brelse(new_bh);
299
300 mlog_exit(status);
301 return status;
302}
303
304static int ocfs2_mknod(struct inode *dir, 189static int ocfs2_mknod(struct inode *dir,
305 struct dentry *dentry, 190 struct dentry *dentry,
306 int mode, 191 int mode,
@@ -365,9 +250,8 @@ static int ocfs2_mknod(struct inode *dir,
365 goto leave; 250 goto leave;
366 } 251 }
367 252
368 /* are we making a directory? If so, reserve a cluster for his 253 /* Reserve a cluster if creating an extent based directory. */
369 * 1st extent. */ 254 if (S_ISDIR(mode) && !ocfs2_supports_inline_data(osb)) {
370 if (S_ISDIR(mode)) {
371 status = ocfs2_reserve_clusters(osb, 1, &data_ac); 255 status = ocfs2_reserve_clusters(osb, 1, &data_ac);
372 if (status < 0) { 256 if (status < 0) {
373 if (status != -ENOSPC) 257 if (status != -ENOSPC)
@@ -564,10 +448,21 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
564 cpu_to_le32(CURRENT_TIME.tv_nsec); 448 cpu_to_le32(CURRENT_TIME.tv_nsec);
565 fe->i_dtime = 0; 449 fe->i_dtime = 0;
566 450
567 fel = &fe->id2.i_list; 451 /*
568 fel->l_tree_depth = 0; 452 * If supported, directories start with inline data.
569 fel->l_next_free_rec = 0; 453 */
570 fel->l_count = cpu_to_le16(ocfs2_extent_recs_per_inode(osb->sb)); 454 if (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) {
455 u16 feat = le16_to_cpu(fe->i_dyn_features);
456
457 fe->i_dyn_features = cpu_to_le16(feat | OCFS2_INLINE_DATA_FL);
458
459 fe->id2.i_data.id_count = cpu_to_le16(ocfs2_max_inline_data(osb->sb));
460 } else {
461 fel = &fe->id2.i_list;
462 fel->l_tree_depth = 0;
463 fel->l_next_free_rec = 0;
464 fel->l_count = cpu_to_le16(ocfs2_extent_recs_per_inode(osb->sb));
465 }
571 466
572 status = ocfs2_journal_dirty(handle, *new_fe_bh); 467 status = ocfs2_journal_dirty(handle, *new_fe_bh);
573 if (status < 0) { 468 if (status < 0) {
@@ -1048,11 +943,6 @@ static void ocfs2_double_unlock(struct inode *inode1, struct inode *inode2)
1048 ocfs2_meta_unlock(inode2, 1); 943 ocfs2_meta_unlock(inode2, 1);
1049} 944}
1050 945
1051#define PARENT_INO(buffer) \
1052 ((struct ocfs2_dir_entry *) \
1053 ((char *)buffer + \
1054 le16_to_cpu(((struct ocfs2_dir_entry *)buffer)->rec_len)))->inode
1055
1056static int ocfs2_rename(struct inode *old_dir, 946static int ocfs2_rename(struct inode *old_dir,
1057 struct dentry *old_dentry, 947 struct dentry *old_dentry,
1058 struct inode *new_dir, 948 struct inode *new_dir,
@@ -1070,12 +960,12 @@ static int ocfs2_rename(struct inode *old_dir,
1070 struct buffer_head *old_inode_bh = NULL; 960 struct buffer_head *old_inode_bh = NULL;
1071 struct buffer_head *insert_entry_bh = NULL; 961 struct buffer_head *insert_entry_bh = NULL;
1072 struct ocfs2_super *osb = NULL; 962 struct ocfs2_super *osb = NULL;
1073 u64 newfe_blkno; 963 u64 newfe_blkno, old_de_ino;
1074 handle_t *handle = NULL; 964 handle_t *handle = NULL;
1075 struct buffer_head *old_dir_bh = NULL; 965 struct buffer_head *old_dir_bh = NULL;
1076 struct buffer_head *new_dir_bh = NULL; 966 struct buffer_head *new_dir_bh = NULL;
1077 struct ocfs2_dir_entry *old_de = NULL, *new_de = NULL; // dirent for old_dentry 967 struct ocfs2_dir_entry *old_inode_dot_dot_de = NULL, *old_de = NULL,
1078 // and new_dentry 968 *new_de = NULL;
1079 struct buffer_head *new_de_bh = NULL, *old_de_bh = NULL; // bhs for above 969 struct buffer_head *new_de_bh = NULL, *old_de_bh = NULL; // bhs for above
1080 struct buffer_head *old_inode_de_bh = NULL; // if old_dentry is a dir, 970 struct buffer_head *old_inode_de_bh = NULL; // if old_dentry is a dir,
1081 // this is the 1st dirent bh 971 // this is the 1st dirent bh
@@ -1159,27 +1049,35 @@ static int ocfs2_rename(struct inode *old_dir,
1159 } 1049 }
1160 1050
1161 if (S_ISDIR(old_inode->i_mode)) { 1051 if (S_ISDIR(old_inode->i_mode)) {
1162 status = -EIO; 1052 u64 old_inode_parent;
1163 old_inode_de_bh = ocfs2_bread(old_inode, 0, &status, 0); 1053
1164 if (!old_inode_de_bh) 1054 status = ocfs2_find_files_on_disk("..", 2, &old_inode_parent,
1055 old_inode, &old_inode_de_bh,
1056 &old_inode_dot_dot_de);
1057 if (status) {
1058 status = -EIO;
1165 goto bail; 1059 goto bail;
1060 }
1166 1061
1167 status = -EIO; 1062 if (old_inode_parent != OCFS2_I(old_dir)->ip_blkno) {
1168 if (le64_to_cpu(PARENT_INO(old_inode_de_bh->b_data)) != 1063 status = -EIO;
1169 OCFS2_I(old_dir)->ip_blkno)
1170 goto bail; 1064 goto bail;
1171 status = -EMLINK; 1065 }
1172 if (!new_inode && new_dir!=old_dir && 1066
1173 new_dir->i_nlink >= OCFS2_LINK_MAX) 1067 if (!new_inode && new_dir != old_dir &&
1068 new_dir->i_nlink >= OCFS2_LINK_MAX) {
1069 status = -EMLINK;
1174 goto bail; 1070 goto bail;
1071 }
1175 } 1072 }
1176 1073
1177 status = -ENOENT; 1074 status = ocfs2_lookup_ino_from_name(old_dir, old_dentry->d_name.name,
1178 old_de_bh = ocfs2_find_entry(old_dentry->d_name.name, 1075 old_dentry->d_name.len,
1179 old_dentry->d_name.len, 1076 &old_de_ino);
1180 old_dir, &old_de); 1077 if (status) {
1181 if (!old_de_bh) 1078 status = -ENOENT;
1182 goto bail; 1079 goto bail;
1080 }
1183 1081
1184 /* 1082 /*
1185 * Check for inode number is _not_ due to possible IO errors. 1083 * Check for inode number is _not_ due to possible IO errors.
@@ -1187,8 +1085,10 @@ static int ocfs2_rename(struct inode *old_dir,
1187 * and merrily kill the link to whatever was created under the 1085 * and merrily kill the link to whatever was created under the
1188 * same name. Goodbye sticky bit ;-< 1086 * same name. Goodbye sticky bit ;-<
1189 */ 1087 */
1190 if (le64_to_cpu(old_de->inode) != OCFS2_I(old_inode)->ip_blkno) 1088 if (old_de_ino != OCFS2_I(old_inode)->ip_blkno) {
1089 status = -ENOENT;
1191 goto bail; 1090 goto bail;
1091 }
1192 1092
1193 /* check if the target already exists (in which case we need 1093 /* check if the target already exists (in which case we need
1194 * to delete it */ 1094 * to delete it */
@@ -1321,20 +1221,13 @@ static int ocfs2_rename(struct inode *old_dir,
1321 } 1221 }
1322 1222
1323 /* change the dirent to point to the correct inode */ 1223 /* change the dirent to point to the correct inode */
1324 status = ocfs2_journal_access(handle, new_dir, new_de_bh, 1224 status = ocfs2_update_entry(new_dir, handle, new_de_bh,
1325 OCFS2_JOURNAL_ACCESS_WRITE); 1225 new_de, old_inode);
1326 if (status < 0) { 1226 if (status < 0) {
1327 mlog_errno(status); 1227 mlog_errno(status);
1328 goto bail; 1228 goto bail;
1329 } 1229 }
1330 new_de->inode = cpu_to_le64(OCFS2_I(old_inode)->ip_blkno);
1331 new_de->file_type = old_de->file_type;
1332 new_dir->i_version++; 1230 new_dir->i_version++;
1333 status = ocfs2_journal_dirty(handle, new_de_bh);
1334 if (status < 0) {
1335 mlog_errno(status);
1336 goto bail;
1337 }
1338 1231
1339 if (S_ISDIR(new_inode->i_mode)) 1232 if (S_ISDIR(new_inode->i_mode))
1340 newfe->i_links_count = 0; 1233 newfe->i_links_count = 0;
@@ -1370,7 +1263,21 @@ static int ocfs2_rename(struct inode *old_dir,
1370 } else 1263 } else
1371 mlog_errno(status); 1264 mlog_errno(status);
1372 1265
1373 /* now that the name has been added to new_dir, remove the old name */ 1266 /*
1267 * Now that the name has been added to new_dir, remove the old name.
1268 *
1269 * We don't keep any directory entry context around until now
1270 * because the insert might have changed the type of directory
1271 * we're dealing with.
1272 */
1273 old_de_bh = ocfs2_find_entry(old_dentry->d_name.name,
1274 old_dentry->d_name.len,
1275 old_dir, &old_de);
1276 if (!old_de_bh) {
1277 status = -EIO;
1278 goto bail;
1279 }
1280
1374 status = ocfs2_delete_entry(handle, old_dir, old_de, old_de_bh); 1281 status = ocfs2_delete_entry(handle, old_dir, old_de, old_de_bh);
1375 if (status < 0) { 1282 if (status < 0) {
1376 mlog_errno(status); 1283 mlog_errno(status);
@@ -1383,12 +1290,8 @@ static int ocfs2_rename(struct inode *old_dir,
1383 } 1290 }
1384 old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME; 1291 old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME;
1385 if (old_inode_de_bh) { 1292 if (old_inode_de_bh) {
1386 status = ocfs2_journal_access(handle, old_inode, 1293 status = ocfs2_update_entry(old_inode, handle, old_inode_de_bh,
1387 old_inode_de_bh, 1294 old_inode_dot_dot_de, new_dir);
1388 OCFS2_JOURNAL_ACCESS_WRITE);
1389 PARENT_INO(old_inode_de_bh->b_data) =
1390 cpu_to_le64(OCFS2_I(new_dir)->ip_blkno);
1391 status = ocfs2_journal_dirty(handle, old_inode_de_bh);
1392 old_dir->i_nlink--; 1295 old_dir->i_nlink--;
1393 if (new_inode) { 1296 if (new_inode) {
1394 new_inode->i_nlink--; 1297 new_inode->i_nlink--;
@@ -1767,329 +1670,6 @@ bail:
1767 return status; 1670 return status;
1768} 1671}
1769 1672
1770int ocfs2_check_dir_entry(struct inode * dir,
1771 struct ocfs2_dir_entry * de,
1772 struct buffer_head * bh,
1773 unsigned long offset)
1774{
1775 const char *error_msg = NULL;
1776 const int rlen = le16_to_cpu(de->rec_len);
1777
1778 if (rlen < OCFS2_DIR_REC_LEN(1))
1779 error_msg = "rec_len is smaller than minimal";
1780 else if (rlen % 4 != 0)
1781 error_msg = "rec_len % 4 != 0";
1782 else if (rlen < OCFS2_DIR_REC_LEN(de->name_len))
1783 error_msg = "rec_len is too small for name_len";
1784 else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize)
1785 error_msg = "directory entry across blocks";
1786
1787 if (error_msg != NULL)
1788 mlog(ML_ERROR, "bad entry in directory #%llu: %s - "
1789 "offset=%lu, inode=%llu, rec_len=%d, name_len=%d\n",
1790 (unsigned long long)OCFS2_I(dir)->ip_blkno, error_msg,
1791 offset, (unsigned long long)le64_to_cpu(de->inode), rlen,
1792 de->name_len);
1793 return error_msg == NULL ? 1 : 0;
1794}
1795
1796/* we don't always have a dentry for what we want to add, so people
1797 * like orphan dir can call this instead.
1798 *
1799 * If you pass me insert_bh, I'll skip the search of the other dir
1800 * blocks and put the record in there.
1801 */
1802static int __ocfs2_add_entry(handle_t *handle,
1803 struct inode *dir,
1804 const char *name, int namelen,
1805 struct inode *inode, u64 blkno,
1806 struct buffer_head *parent_fe_bh,
1807 struct buffer_head *insert_bh)
1808{
1809 unsigned long offset;
1810 unsigned short rec_len;
1811 struct ocfs2_dir_entry *de, *de1;
1812 struct super_block *sb;
1813 int retval, status;
1814
1815 mlog_entry_void();
1816
1817 sb = dir->i_sb;
1818
1819 if (!namelen)
1820 return -EINVAL;
1821
1822 rec_len = OCFS2_DIR_REC_LEN(namelen);
1823 offset = 0;
1824 de = (struct ocfs2_dir_entry *) insert_bh->b_data;
1825 while (1) {
1826 BUG_ON((char *)de >= sb->s_blocksize + insert_bh->b_data);
1827 /* These checks should've already been passed by the
1828 * prepare function, but I guess we can leave them
1829 * here anyway. */
1830 if (!ocfs2_check_dir_entry(dir, de, insert_bh, offset)) {
1831 retval = -ENOENT;
1832 goto bail;
1833 }
1834 if (ocfs2_match(namelen, name, de)) {
1835 retval = -EEXIST;
1836 goto bail;
1837 }
1838 if (((le64_to_cpu(de->inode) == 0) &&
1839 (le16_to_cpu(de->rec_len) >= rec_len)) ||
1840 (le16_to_cpu(de->rec_len) >=
1841 (OCFS2_DIR_REC_LEN(de->name_len) + rec_len))) {
1842 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
1843 retval = ocfs2_mark_inode_dirty(handle, dir, parent_fe_bh);
1844 if (retval < 0) {
1845 mlog_errno(retval);
1846 goto bail;
1847 }
1848
1849 status = ocfs2_journal_access(handle, dir, insert_bh,
1850 OCFS2_JOURNAL_ACCESS_WRITE);
1851 /* By now the buffer is marked for journaling */
1852 offset += le16_to_cpu(de->rec_len);
1853 if (le64_to_cpu(de->inode)) {
1854 de1 = (struct ocfs2_dir_entry *)((char *) de +
1855 OCFS2_DIR_REC_LEN(de->name_len));
1856 de1->rec_len =
1857 cpu_to_le16(le16_to_cpu(de->rec_len) -
1858 OCFS2_DIR_REC_LEN(de->name_len));
1859 de->rec_len = cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len));
1860 de = de1;
1861 }
1862 de->file_type = OCFS2_FT_UNKNOWN;
1863 if (blkno) {
1864 de->inode = cpu_to_le64(blkno);
1865 ocfs2_set_de_type(de, inode->i_mode);
1866 } else
1867 de->inode = 0;
1868 de->name_len = namelen;
1869 memcpy(de->name, name, namelen);
1870
1871 dir->i_version++;
1872 status = ocfs2_journal_dirty(handle, insert_bh);
1873 retval = 0;
1874 goto bail;
1875 }
1876 offset += le16_to_cpu(de->rec_len);
1877 de = (struct ocfs2_dir_entry *) ((char *) de + le16_to_cpu(de->rec_len));
1878 }
1879
1880 /* when you think about it, the assert above should prevent us
1881 * from ever getting here. */
1882 retval = -ENOSPC;
1883bail:
1884
1885 mlog_exit(retval);
1886 return retval;
1887}
1888
1889
1890/*
1891 * ocfs2_delete_entry deletes a directory entry by merging it with the
1892 * previous entry
1893 */
1894static int ocfs2_delete_entry(handle_t *handle,
1895 struct inode *dir,
1896 struct ocfs2_dir_entry *de_del,
1897 struct buffer_head *bh)
1898{
1899 struct ocfs2_dir_entry *de, *pde;
1900 int i, status = -ENOENT;
1901
1902 mlog_entry("(0x%p, 0x%p, 0x%p, 0x%p)\n", handle, dir, de_del, bh);
1903
1904 i = 0;
1905 pde = NULL;
1906 de = (struct ocfs2_dir_entry *) bh->b_data;
1907 while (i < bh->b_size) {
1908 if (!ocfs2_check_dir_entry(dir, de, bh, i)) {
1909 status = -EIO;
1910 mlog_errno(status);
1911 goto bail;
1912 }
1913 if (de == de_del) {
1914 status = ocfs2_journal_access(handle, dir, bh,
1915 OCFS2_JOURNAL_ACCESS_WRITE);
1916 if (status < 0) {
1917 status = -EIO;
1918 mlog_errno(status);
1919 goto bail;
1920 }
1921 if (pde)
1922 pde->rec_len =
1923 cpu_to_le16(le16_to_cpu(pde->rec_len) +
1924 le16_to_cpu(de->rec_len));
1925 else
1926 de->inode = 0;
1927 dir->i_version++;
1928 status = ocfs2_journal_dirty(handle, bh);
1929 goto bail;
1930 }
1931 i += le16_to_cpu(de->rec_len);
1932 pde = de;
1933 de = (struct ocfs2_dir_entry *)((char *)de + le16_to_cpu(de->rec_len));
1934 }
1935bail:
1936 mlog_exit(status);
1937 return status;
1938}
1939
1940/*
1941 * Returns 0 if not found, -1 on failure, and 1 on success
1942 */
1943static int inline ocfs2_search_dirblock(struct buffer_head *bh,
1944 struct inode *dir,
1945 const char *name, int namelen,
1946 unsigned long offset,
1947 struct ocfs2_dir_entry **res_dir)
1948{
1949 struct ocfs2_dir_entry *de;
1950 char *dlimit, *de_buf;
1951 int de_len;
1952 int ret = 0;
1953
1954 mlog_entry_void();
1955
1956 de_buf = bh->b_data;
1957 dlimit = de_buf + dir->i_sb->s_blocksize;
1958
1959 while (de_buf < dlimit) {
1960 /* this code is executed quadratically often */
1961 /* do minimal checking `by hand' */
1962
1963 de = (struct ocfs2_dir_entry *) de_buf;
1964
1965 if (de_buf + namelen <= dlimit &&
1966 ocfs2_match(namelen, name, de)) {
1967 /* found a match - just to be sure, do a full check */
1968 if (!ocfs2_check_dir_entry(dir, de, bh, offset)) {
1969 ret = -1;
1970 goto bail;
1971 }
1972 *res_dir = de;
1973 ret = 1;
1974 goto bail;
1975 }
1976
1977 /* prevent looping on a bad block */
1978 de_len = le16_to_cpu(de->rec_len);
1979 if (de_len <= 0) {
1980 ret = -1;
1981 goto bail;
1982 }
1983
1984 de_buf += de_len;
1985 offset += de_len;
1986 }
1987
1988bail:
1989 mlog_exit(ret);
1990 return ret;
1991}
1992
1993struct buffer_head *ocfs2_find_entry(const char *name, int namelen,
1994 struct inode *dir,
1995 struct ocfs2_dir_entry **res_dir)
1996{
1997 struct super_block *sb;
1998 struct buffer_head *bh_use[NAMEI_RA_SIZE];
1999 struct buffer_head *bh, *ret = NULL;
2000 unsigned long start, block, b;
2001 int ra_max = 0; /* Number of bh's in the readahead
2002 buffer, bh_use[] */
2003 int ra_ptr = 0; /* Current index into readahead
2004 buffer */
2005 int num = 0;
2006 int nblocks, i, err;
2007
2008 mlog_entry_void();
2009
2010 *res_dir = NULL;
2011 sb = dir->i_sb;
2012
2013 nblocks = i_size_read(dir) >> sb->s_blocksize_bits;
2014 start = OCFS2_I(dir)->ip_dir_start_lookup;
2015 if (start >= nblocks)
2016 start = 0;
2017 block = start;
2018
2019restart:
2020 do {
2021 /*
2022 * We deal with the read-ahead logic here.
2023 */
2024 if (ra_ptr >= ra_max) {
2025 /* Refill the readahead buffer */
2026 ra_ptr = 0;
2027 b = block;
2028 for (ra_max = 0; ra_max < NAMEI_RA_SIZE; ra_max++) {
2029 /*
2030 * Terminate if we reach the end of the
2031 * directory and must wrap, or if our
2032 * search has finished at this block.
2033 */
2034 if (b >= nblocks || (num && block == start)) {
2035 bh_use[ra_max] = NULL;
2036 break;
2037 }
2038 num++;
2039
2040 bh = ocfs2_bread(dir, b++, &err, 1);
2041 bh_use[ra_max] = bh;
2042 }
2043 }
2044 if ((bh = bh_use[ra_ptr++]) == NULL)
2045 goto next;
2046 wait_on_buffer(bh);
2047 if (!buffer_uptodate(bh)) {
2048 /* read error, skip block & hope for the best */
2049 ocfs2_error(dir->i_sb, "reading directory %llu, "
2050 "offset %lu\n",
2051 (unsigned long long)OCFS2_I(dir)->ip_blkno,
2052 block);
2053 brelse(bh);
2054 goto next;
2055 }
2056 i = ocfs2_search_dirblock(bh, dir, name, namelen,
2057 block << sb->s_blocksize_bits,
2058 res_dir);
2059 if (i == 1) {
2060 OCFS2_I(dir)->ip_dir_start_lookup = block;
2061 ret = bh;
2062 goto cleanup_and_exit;
2063 } else {
2064 brelse(bh);
2065 if (i < 0)
2066 goto cleanup_and_exit;
2067 }
2068 next:
2069 if (++block >= nblocks)
2070 block = 0;
2071 } while (block != start);
2072
2073 /*
2074 * If the directory has grown while we were searching, then
2075 * search the last part of the directory before giving up.
2076 */
2077 block = nblocks;
2078 nblocks = i_size_read(dir) >> sb->s_blocksize_bits;
2079 if (block < nblocks) {
2080 start = 0;
2081 goto restart;
2082 }
2083
2084cleanup_and_exit:
2085 /* Clean up the read-ahead blocks */
2086 for (; ra_ptr < ra_max; ra_ptr++)
2087 brelse(bh_use[ra_ptr]);
2088
2089 mlog_exit_ptr(ret);
2090 return ret;
2091}
2092
2093static int ocfs2_blkno_stringify(u64 blkno, char *name) 1673static int ocfs2_blkno_stringify(u64 blkno, char *name)
2094{ 1674{
2095 int status, namelen; 1675 int status, namelen;
diff --git a/fs/ocfs2/namei.h b/fs/ocfs2/namei.h
index 0975c7b7212b..688aef64c879 100644
--- a/fs/ocfs2/namei.h
+++ b/fs/ocfs2/namei.h
@@ -30,29 +30,10 @@ extern const struct inode_operations ocfs2_dir_iops;
30 30
31struct dentry *ocfs2_get_parent(struct dentry *child); 31struct dentry *ocfs2_get_parent(struct dentry *child);
32 32
33int ocfs2_check_dir_entry (struct inode *dir,
34 struct ocfs2_dir_entry *de,
35 struct buffer_head *bh,
36 unsigned long offset);
37struct buffer_head *ocfs2_find_entry(const char *name,
38 int namelen,
39 struct inode *dir,
40 struct ocfs2_dir_entry **res_dir);
41int ocfs2_orphan_del(struct ocfs2_super *osb, 33int ocfs2_orphan_del(struct ocfs2_super *osb,
42 handle_t *handle, 34 handle_t *handle,
43 struct inode *orphan_dir_inode, 35 struct inode *orphan_dir_inode,
44 struct inode *inode, 36 struct inode *inode,
45 struct buffer_head *orphan_dir_bh); 37 struct buffer_head *orphan_dir_bh);
46 38
47static inline int ocfs2_match(int len,
48 const char * const name,
49 struct ocfs2_dir_entry *de)
50{
51 if (len != de->name_len)
52 return 0;
53 if (!de->inode)
54 return 0;
55 return !memcmp(name, de->name, len);
56}
57
58#endif /* OCFS2_NAMEI_H */ 39#endif /* OCFS2_NAMEI_H */
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 58307853fb4a..60a23e1906b0 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -319,6 +319,13 @@ static inline int ocfs2_writes_unwritten_extents(struct ocfs2_super *osb)
319 return 0; 319 return 0;
320} 320}
321 321
322static inline int ocfs2_supports_inline_data(struct ocfs2_super *osb)
323{
324 if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_INLINE_DATA)
325 return 1;
326 return 0;
327}
328
322/* set / clear functions because cluster events can make these happen 329/* set / clear functions because cluster events can make these happen
323 * in parallel so we want the transitions to be atomic. this also 330 * in parallel so we want the transitions to be atomic. this also
324 * means that any future flags osb_flags must be protected by spinlock 331 * means that any future flags osb_flags must be protected by spinlock
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 82f8a75b207e..6ef876759a73 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -87,7 +87,8 @@
87 87
88#define OCFS2_FEATURE_COMPAT_SUPP OCFS2_FEATURE_COMPAT_BACKUP_SB 88#define OCFS2_FEATURE_COMPAT_SUPP OCFS2_FEATURE_COMPAT_BACKUP_SB
89#define OCFS2_FEATURE_INCOMPAT_SUPP (OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT \ 89#define OCFS2_FEATURE_INCOMPAT_SUPP (OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT \
90 | OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC) 90 | OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC \
91 | OCFS2_FEATURE_INCOMPAT_INLINE_DATA)
91#define OCFS2_FEATURE_RO_COMPAT_SUPP OCFS2_FEATURE_RO_COMPAT_UNWRITTEN 92#define OCFS2_FEATURE_RO_COMPAT_SUPP OCFS2_FEATURE_RO_COMPAT_UNWRITTEN
92 93
93/* 94/*
@@ -111,6 +112,20 @@
111#define OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC 0x0010 112#define OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC 0x0010
112 113
113/* 114/*
115 * Tunefs sets this incompat flag before starting an operation which
116 * would require cleanup on abort. This is done to protect users from
117 * inadvertently mounting the fs after an aborted run without
118 * fsck-ing.
119 *
120 * s_tunefs_flags on the super block describes precisely which
121 * operations were in progress.
122 */
123#define OCFS2_FEATURE_INCOMPAT_TUNEFS_INPROG 0x0020
124
125/* Support for data packed into inode blocks */
126#define OCFS2_FEATURE_INCOMPAT_INLINE_DATA 0x0040
127
128/*
114 * backup superblock flag is used to indicate that this volume 129 * backup superblock flag is used to indicate that this volume
115 * has backup superblocks. 130 * has backup superblocks.
116 */ 131 */
@@ -130,6 +145,11 @@
130#define OCFS2_MAX_BACKUP_SUPERBLOCKS 6 145#define OCFS2_MAX_BACKUP_SUPERBLOCKS 6
131 146
132/* 147/*
148 * Flags on ocfs2_super_block.s_tunefs_flags
149 */
150#define OCFS2_TUNEFS_INPROG_REMOVE_SLOT 0x0001 /* Removing slots */
151
152/*
133 * Flags on ocfs2_dinode.i_flags 153 * Flags on ocfs2_dinode.i_flags
134 */ 154 */
135#define OCFS2_VALID_FL (0x00000001) /* Inode is valid */ 155#define OCFS2_VALID_FL (0x00000001) /* Inode is valid */
@@ -146,6 +166,17 @@
146#define OCFS2_CHAIN_FL (0x00000400) /* Chain allocator */ 166#define OCFS2_CHAIN_FL (0x00000400) /* Chain allocator */
147#define OCFS2_DEALLOC_FL (0x00000800) /* Truncate log */ 167#define OCFS2_DEALLOC_FL (0x00000800) /* Truncate log */
148 168
169/*
170 * Flags on ocfs2_dinode.i_dyn_features
171 *
172 * These can change much more often than i_flags. When adding flags,
173 * keep in mind that i_dyn_features is only 16 bits wide.
174 */
175#define OCFS2_INLINE_DATA_FL (0x0001) /* Data stored in inode block */
176#define OCFS2_HAS_XATTR_FL (0x0002)
177#define OCFS2_INLINE_XATTR_FL (0x0004)
178#define OCFS2_INDEXED_DIR_FL (0x0008)
179
149/* Inode attributes, keep in sync with EXT2 */ 180/* Inode attributes, keep in sync with EXT2 */
150#define OCFS2_SECRM_FL (0x00000001) /* Secure deletion */ 181#define OCFS2_SECRM_FL (0x00000001) /* Secure deletion */
151#define OCFS2_UNRM_FL (0x00000002) /* Undelete */ 182#define OCFS2_UNRM_FL (0x00000002) /* Undelete */
@@ -447,8 +478,8 @@ struct ocfs2_super_block {
447 __le32 s_clustersize_bits; /* Clustersize for this fs */ 478 __le32 s_clustersize_bits; /* Clustersize for this fs */
448/*40*/ __le16 s_max_slots; /* Max number of simultaneous mounts 479/*40*/ __le16 s_max_slots; /* Max number of simultaneous mounts
449 before tunefs required */ 480 before tunefs required */
450 __le16 s_reserved1; 481 __le16 s_tunefs_flag;
451 __le32 s_reserved2; 482 __le32 s_reserved1;
452 __le64 s_first_cluster_group; /* Block offset of 1st cluster 483 __le64 s_first_cluster_group; /* Block offset of 1st cluster
453 * group header */ 484 * group header */
454/*50*/ __u8 s_label[OCFS2_MAX_VOL_LABEL_LEN]; /* Label for mounting, etc. */ 485/*50*/ __u8 s_label[OCFS2_MAX_VOL_LABEL_LEN]; /* Label for mounting, etc. */
@@ -471,6 +502,19 @@ struct ocfs2_local_alloc
471}; 502};
472 503
473/* 504/*
505 * Data-in-inode header. This is only used if i_dyn_features has
506 * OCFS2_INLINE_DATA_FL set.
507 */
508struct ocfs2_inline_data
509{
510/*00*/ __le16 id_count; /* Number of bytes that can be used
511 * for data, starting at id_data */
512 __le16 id_reserved0;
513 __le32 id_reserved1;
514 __u8 id_data[0]; /* Start of user data */
515};
516
517/*
474 * On disk inode for OCFS2 518 * On disk inode for OCFS2
475 */ 519 */
476struct ocfs2_dinode { 520struct ocfs2_dinode {
@@ -502,7 +546,7 @@ struct ocfs2_dinode {
502 __le32 i_attr; 546 __le32 i_attr;
503 __le16 i_orphaned_slot; /* Only valid when OCFS2_ORPHANED_FL 547 __le16 i_orphaned_slot; /* Only valid when OCFS2_ORPHANED_FL
504 was set in i_flags */ 548 was set in i_flags */
505 __le16 i_reserved1; 549 __le16 i_dyn_features;
506/*70*/ __le64 i_reserved2[8]; 550/*70*/ __le64 i_reserved2[8];
507/*B8*/ union { 551/*B8*/ union {
508 __le64 i_pad1; /* Generic way to refer to this 552 __le64 i_pad1; /* Generic way to refer to this
@@ -528,6 +572,7 @@ struct ocfs2_dinode {
528 struct ocfs2_chain_list i_chain; 572 struct ocfs2_chain_list i_chain;
529 struct ocfs2_extent_list i_list; 573 struct ocfs2_extent_list i_list;
530 struct ocfs2_truncate_log i_dealloc; 574 struct ocfs2_truncate_log i_dealloc;
575 struct ocfs2_inline_data i_data;
531 __u8 i_symlink[0]; 576 __u8 i_symlink[0];
532 } id2; 577 } id2;
533/* Actual on-disk size is one block */ 578/* Actual on-disk size is one block */
@@ -577,6 +622,12 @@ static inline int ocfs2_fast_symlink_chars(struct super_block *sb)
577 offsetof(struct ocfs2_dinode, id2.i_symlink); 622 offsetof(struct ocfs2_dinode, id2.i_symlink);
578} 623}
579 624
625static inline int ocfs2_max_inline_data(struct super_block *sb)
626{
627 return sb->s_blocksize -
628 offsetof(struct ocfs2_dinode, id2.i_data.id_data);
629}
630
580static inline int ocfs2_extent_recs_per_inode(struct super_block *sb) 631static inline int ocfs2_extent_recs_per_inode(struct super_block *sb)
581{ 632{
582 int size; 633 int size;
@@ -656,6 +707,11 @@ static inline int ocfs2_fast_symlink_chars(int blocksize)
656 return blocksize - offsetof(struct ocfs2_dinode, id2.i_symlink); 707 return blocksize - offsetof(struct ocfs2_dinode, id2.i_symlink);
657} 708}
658 709
710static inline int ocfs2_max_inline_data(int blocksize)
711{
712 return blocksize - offsetof(struct ocfs2_dinode, id2.i_data.id_data);
713}
714
659static inline int ocfs2_extent_recs_per_inode(int blocksize) 715static inline int ocfs2_extent_recs_per_inode(int blocksize)
660{ 716{
661 int size; 717 int size;
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index c034b5129c1e..0e2a1b45bf92 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -39,6 +39,7 @@
39#include <linux/parser.h> 39#include <linux/parser.h>
40#include <linux/crc32.h> 40#include <linux/crc32.h>
41#include <linux/debugfs.h> 41#include <linux/debugfs.h>
42#include <linux/mount.h>
42 43
43#include <cluster/nodemanager.h> 44#include <cluster/nodemanager.h>
44 45
@@ -91,6 +92,7 @@ struct mount_options
91static int ocfs2_parse_options(struct super_block *sb, char *options, 92static int ocfs2_parse_options(struct super_block *sb, char *options,
92 struct mount_options *mopt, 93 struct mount_options *mopt,
93 int is_remount); 94 int is_remount);
95static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt);
94static void ocfs2_put_super(struct super_block *sb); 96static void ocfs2_put_super(struct super_block *sb);
95static int ocfs2_mount_volume(struct super_block *sb); 97static int ocfs2_mount_volume(struct super_block *sb);
96static int ocfs2_remount(struct super_block *sb, int *flags, char *data); 98static int ocfs2_remount(struct super_block *sb, int *flags, char *data);
@@ -105,7 +107,7 @@ static int ocfs2_sync_fs(struct super_block *sb, int wait);
105 107
106static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb); 108static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb);
107static int ocfs2_init_local_system_inodes(struct ocfs2_super *osb); 109static int ocfs2_init_local_system_inodes(struct ocfs2_super *osb);
108static int ocfs2_release_system_inodes(struct ocfs2_super *osb); 110static void ocfs2_release_system_inodes(struct ocfs2_super *osb);
109static int ocfs2_fill_local_node_info(struct ocfs2_super *osb); 111static int ocfs2_fill_local_node_info(struct ocfs2_super *osb);
110static int ocfs2_check_volume(struct ocfs2_super *osb); 112static int ocfs2_check_volume(struct ocfs2_super *osb);
111static int ocfs2_verify_volume(struct ocfs2_dinode *di, 113static int ocfs2_verify_volume(struct ocfs2_dinode *di,
@@ -133,6 +135,7 @@ static const struct super_operations ocfs2_sops = {
133 .write_super = ocfs2_write_super, 135 .write_super = ocfs2_write_super,
134 .put_super = ocfs2_put_super, 136 .put_super = ocfs2_put_super,
135 .remount_fs = ocfs2_remount, 137 .remount_fs = ocfs2_remount,
138 .show_options = ocfs2_show_options,
136}; 139};
137 140
138enum { 141enum {
@@ -177,7 +180,7 @@ static void ocfs2_write_super(struct super_block *sb)
177 180
178static int ocfs2_sync_fs(struct super_block *sb, int wait) 181static int ocfs2_sync_fs(struct super_block *sb, int wait)
179{ 182{
180 int status = 0; 183 int status;
181 tid_t target; 184 tid_t target;
182 struct ocfs2_super *osb = OCFS2_SB(sb); 185 struct ocfs2_super *osb = OCFS2_SB(sb);
183 186
@@ -275,9 +278,9 @@ bail:
275 return status; 278 return status;
276} 279}
277 280
278static int ocfs2_release_system_inodes(struct ocfs2_super *osb) 281static void ocfs2_release_system_inodes(struct ocfs2_super *osb)
279{ 282{
280 int status = 0, i; 283 int i;
281 struct inode *inode; 284 struct inode *inode;
282 285
283 mlog_entry_void(); 286 mlog_entry_void();
@@ -302,8 +305,7 @@ static int ocfs2_release_system_inodes(struct ocfs2_super *osb)
302 osb->root_inode = NULL; 305 osb->root_inode = NULL;
303 } 306 }
304 307
305 mlog_exit(status); 308 mlog_exit(0);
306 return status;
307} 309}
308 310
309/* We're allocating fs objects, use GFP_NOFS */ 311/* We're allocating fs objects, use GFP_NOFS */
@@ -453,7 +455,7 @@ static int ocfs2_sb_probe(struct super_block *sb,
453 struct buffer_head **bh, 455 struct buffer_head **bh,
454 int *sector_size) 456 int *sector_size)
455{ 457{
456 int status = 0, tmpstat; 458 int status, tmpstat;
457 struct ocfs1_vol_disk_hdr *hdr; 459 struct ocfs1_vol_disk_hdr *hdr;
458 struct ocfs2_dinode *di; 460 struct ocfs2_dinode *di;
459 int blksize; 461 int blksize;
@@ -830,6 +832,41 @@ bail:
830 return status; 832 return status;
831} 833}
832 834
835static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
836{
837 struct ocfs2_super *osb = OCFS2_SB(mnt->mnt_sb);
838 unsigned long opts = osb->s_mount_opt;
839
840 if (opts & OCFS2_MOUNT_HB_LOCAL)
841 seq_printf(s, ",_netdev,heartbeat=local");
842 else
843 seq_printf(s, ",heartbeat=none");
844
845 if (opts & OCFS2_MOUNT_NOINTR)
846 seq_printf(s, ",nointr");
847
848 if (opts & OCFS2_MOUNT_DATA_WRITEBACK)
849 seq_printf(s, ",data=writeback");
850 else
851 seq_printf(s, ",data=ordered");
852
853 if (opts & OCFS2_MOUNT_BARRIER)
854 seq_printf(s, ",barrier=1");
855
856 if (opts & OCFS2_MOUNT_ERRORS_PANIC)
857 seq_printf(s, ",errors=panic");
858 else
859 seq_printf(s, ",errors=remount-ro");
860
861 if (osb->preferred_slot != OCFS2_INVALID_SLOT)
862 seq_printf(s, ",preferred_slot=%d", osb->preferred_slot);
863
864 if (osb->s_atime_quantum != OCFS2_DEFAULT_ATIME_QUANTUM)
865 seq_printf(s, ",atime_quantum=%u", osb->s_atime_quantum);
866
867 return 0;
868}
869
833static int __init ocfs2_init(void) 870static int __init ocfs2_init(void)
834{ 871{
835 int status; 872 int status;
@@ -1209,12 +1246,13 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
1209 tmp = ocfs2_request_umount_vote(osb); 1246 tmp = ocfs2_request_umount_vote(osb);
1210 if (tmp < 0) 1247 if (tmp < 0)
1211 mlog_errno(tmp); 1248 mlog_errno(tmp);
1249 }
1212 1250
1213 if (osb->slot_num != OCFS2_INVALID_SLOT) 1251 if (osb->slot_num != OCFS2_INVALID_SLOT)
1214 ocfs2_put_slot(osb); 1252 ocfs2_put_slot(osb);
1215 1253
1254 if (osb->dlm)
1216 ocfs2_super_unlock(osb, 1); 1255 ocfs2_super_unlock(osb, 1);
1217 }
1218 1256
1219 ocfs2_release_system_inodes(osb); 1257 ocfs2_release_system_inodes(osb);
1220 1258
@@ -1275,7 +1313,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
1275 struct buffer_head *bh, 1313 struct buffer_head *bh,
1276 int sector_size) 1314 int sector_size)
1277{ 1315{
1278 int status = 0; 1316 int status;
1279 int i, cbits, bbits; 1317 int i, cbits, bbits;
1280 struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; 1318 struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
1281 struct inode *inode = NULL; 1319 struct inode *inode = NULL;
@@ -1596,7 +1634,7 @@ static int ocfs2_verify_volume(struct ocfs2_dinode *di,
1596 1634
1597static int ocfs2_check_volume(struct ocfs2_super *osb) 1635static int ocfs2_check_volume(struct ocfs2_super *osb)
1598{ 1636{
1599 int status = 0; 1637 int status;
1600 int dirty; 1638 int dirty;
1601 int local; 1639 int local;
1602 struct ocfs2_dinode *local_alloc = NULL; /* only used if we 1640 struct ocfs2_dinode *local_alloc = NULL; /* only used if we
diff --git a/fs/ocfs2/sysfile.c b/fs/ocfs2/sysfile.c
index 5df6e35d09b1..fd2e846e3e6f 100644
--- a/fs/ocfs2/sysfile.c
+++ b/fs/ocfs2/sysfile.c
@@ -100,17 +100,14 @@ static struct inode * _ocfs2_get_system_file_inode(struct ocfs2_super *osb,
100 char namebuf[40]; 100 char namebuf[40];
101 struct inode *inode = NULL; 101 struct inode *inode = NULL;
102 u64 blkno; 102 u64 blkno;
103 struct buffer_head *dirent_bh = NULL;
104 struct ocfs2_dir_entry *de = NULL;
105 int status = 0; 103 int status = 0;
106 104
107 ocfs2_sprintf_system_inode_name(namebuf, 105 ocfs2_sprintf_system_inode_name(namebuf,
108 sizeof(namebuf), 106 sizeof(namebuf),
109 type, slot); 107 type, slot);
110 108
111 status = ocfs2_find_files_on_disk(namebuf, strlen(namebuf), 109 status = ocfs2_lookup_ino_from_name(osb->sys_root_inode, namebuf,
112 &blkno, osb->sys_root_inode, 110 strlen(namebuf), &blkno);
113 &dirent_bh, &de);
114 if (status < 0) { 111 if (status < 0) {
115 goto bail; 112 goto bail;
116 } 113 }
@@ -122,8 +119,7 @@ static struct inode * _ocfs2_get_system_file_inode(struct ocfs2_super *osb,
122 goto bail; 119 goto bail;
123 } 120 }
124bail: 121bail:
125 if (dirent_bh) 122
126 brelse(dirent_bh);
127 return inode; 123 return inode;
128} 124}
129 125
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 783c57ec07d3..722e12e5acc7 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -381,10 +381,12 @@ void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len,
381 p->partno = part; 381 p->partno = part;
382 p->policy = disk->policy; 382 p->policy = disk->policy;
383 383
384 if (isdigit(disk->kobj.name[strlen(disk->kobj.name)-1])) 384 if (isdigit(disk->kobj.k_name[strlen(disk->kobj.k_name)-1]))
385 snprintf(p->kobj.name,KOBJ_NAME_LEN,"%sp%d",disk->kobj.name,part); 385 kobject_set_name(&p->kobj, "%sp%d",
386 kobject_name(&disk->kobj), part);
386 else 387 else
387 snprintf(p->kobj.name,KOBJ_NAME_LEN,"%s%d",disk->kobj.name,part); 388 kobject_set_name(&p->kobj, "%s%d",
389 kobject_name(&disk->kobj),part);
388 p->kobj.parent = &disk->kobj; 390 p->kobj.parent = &disk->kobj;
389 p->kobj.ktype = &ktype_part; 391 p->kobj.ktype = &ktype_part;
390 kobject_init(&p->kobj); 392 kobject_init(&p->kobj);
@@ -477,9 +479,9 @@ void register_disk(struct gendisk *disk)
477 struct hd_struct *p; 479 struct hd_struct *p;
478 int err; 480 int err;
479 481
480 strlcpy(disk->kobj.name,disk->disk_name,KOBJ_NAME_LEN); 482 kobject_set_name(&disk->kobj, "%s", disk->disk_name);
481 /* ewww... some of these buggers have / in name... */ 483 /* ewww... some of these buggers have / in name... */
482 s = strchr(disk->kobj.name, '/'); 484 s = strchr(disk->kobj.k_name, '/');
483 if (s) 485 if (s)
484 *s = '!'; 486 *s = '!';
485 if ((err = kobject_add(&disk->kobj))) 487 if ((err = kobject_add(&disk->kobj)))
diff --git a/fs/partitions/sun.c b/fs/partitions/sun.c
index 794118da4ef3..c95e6a62c01d 100644
--- a/fs/partitions/sun.c
+++ b/fs/partitions/sun.c
@@ -95,8 +95,8 @@ int sun_partition(struct parsed_partitions *state, struct block_device *bdev)
95 * So that old Linux-Sun partitions continue to work, 95 * So that old Linux-Sun partitions continue to work,
96 * alow the VTOC to be used under the additional condition ... 96 * alow the VTOC to be used under the additional condition ...
97 */ 97 */
98 use_vtoc = use_vtoc || !(label->vtoc.sanity | 98 use_vtoc = use_vtoc || !(label->vtoc.sanity ||
99 label->vtoc.version | label->vtoc.nparts); 99 label->vtoc.version || label->vtoc.nparts);
100 spc = be16_to_cpu(label->ntrks) * be16_to_cpu(label->nsect); 100 spc = be16_to_cpu(label->ntrks) * be16_to_cpu(label->nsect);
101 for (i = 0; i < nparts; i++, p++) { 101 for (i = 0; i < nparts; i++, p++) {
102 unsigned long st_sector; 102 unsigned long st_sector;
diff --git a/fs/pipe.c b/fs/pipe.c
index 6b3d91a691bf..e66ec48e95d8 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -45,8 +45,7 @@ void pipe_wait(struct pipe_inode_info *pipe)
45 * Pipes are system-local resources, so sleeping on them 45 * Pipes are system-local resources, so sleeping on them
46 * is considered a noninteractive wait: 46 * is considered a noninteractive wait:
47 */ 47 */
48 prepare_to_wait(&pipe->wait, &wait, 48 prepare_to_wait(&pipe->wait, &wait, TASK_INTERRUPTIBLE);
49 TASK_INTERRUPTIBLE | TASK_NONINTERACTIVE);
50 if (pipe->inode) 49 if (pipe->inode)
51 mutex_unlock(&pipe->inode->i_mutex); 50 mutex_unlock(&pipe->inode->i_mutex);
52 schedule(); 51 schedule();
@@ -383,7 +382,7 @@ redo:
383 382
384 /* Signal writers asynchronously that there is more room. */ 383 /* Signal writers asynchronously that there is more room. */
385 if (do_wakeup) { 384 if (do_wakeup) {
386 wake_up_interruptible(&pipe->wait); 385 wake_up_interruptible_sync(&pipe->wait);
387 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); 386 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
388 } 387 }
389 if (ret > 0) 388 if (ret > 0)
@@ -556,7 +555,7 @@ redo2:
556out: 555out:
557 mutex_unlock(&inode->i_mutex); 556 mutex_unlock(&inode->i_mutex);
558 if (do_wakeup) { 557 if (do_wakeup) {
559 wake_up_interruptible(&pipe->wait); 558 wake_up_interruptible_sync(&pipe->wait);
560 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 559 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
561 } 560 }
562 if (ret > 0) 561 if (ret > 0)
@@ -650,7 +649,7 @@ pipe_release(struct inode *inode, int decr, int decw)
650 if (!pipe->readers && !pipe->writers) { 649 if (!pipe->readers && !pipe->writers) {
651 free_pipe_info(inode); 650 free_pipe_info(inode);
652 } else { 651 } else {
653 wake_up_interruptible(&pipe->wait); 652 wake_up_interruptible_sync(&pipe->wait);
654 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 653 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
655 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); 654 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
656 } 655 }
diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index bce38e3f06cb..ebaba0213546 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -11,6 +11,7 @@ proc-y += inode.o root.o base.o generic.o array.o \
11 proc_tty.o proc_misc.o 11 proc_tty.o proc_misc.o
12 12
13proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o 13proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o
14proc-$(CONFIG_NET) += proc_net.o
14proc-$(CONFIG_PROC_KCORE) += kcore.o 15proc-$(CONFIG_PROC_KCORE) += kcore.o
15proc-$(CONFIG_PROC_VMCORE) += vmcore.o 16proc-$(CONFIG_PROC_VMCORE) += vmcore.o
16proc-$(CONFIG_PROC_DEVICETREE) += proc_devtree.o 17proc-$(CONFIG_PROC_DEVICETREE) += proc_devtree.o
diff --git a/fs/proc/array.c b/fs/proc/array.c
index ee4814dd98f9..27b59f5f3bd1 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -370,6 +370,11 @@ static cputime_t task_stime(struct task_struct *p)
370} 370}
371#endif 371#endif
372 372
373static cputime_t task_gtime(struct task_struct *p)
374{
375 return p->gtime;
376}
377
373static int do_task_stat(struct task_struct *task, char *buffer, int whole) 378static int do_task_stat(struct task_struct *task, char *buffer, int whole)
374{ 379{
375 unsigned long vsize, eip, esp, wchan = ~0UL; 380 unsigned long vsize, eip, esp, wchan = ~0UL;
@@ -385,6 +390,7 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole)
385 unsigned long cmin_flt = 0, cmaj_flt = 0; 390 unsigned long cmin_flt = 0, cmaj_flt = 0;
386 unsigned long min_flt = 0, maj_flt = 0; 391 unsigned long min_flt = 0, maj_flt = 0;
387 cputime_t cutime, cstime, utime, stime; 392 cputime_t cutime, cstime, utime, stime;
393 cputime_t cgtime, gtime;
388 unsigned long rsslim = 0; 394 unsigned long rsslim = 0;
389 char tcomm[sizeof(task->comm)]; 395 char tcomm[sizeof(task->comm)];
390 unsigned long flags; 396 unsigned long flags;
@@ -403,6 +409,7 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole)
403 sigemptyset(&sigign); 409 sigemptyset(&sigign);
404 sigemptyset(&sigcatch); 410 sigemptyset(&sigcatch);
405 cutime = cstime = utime = stime = cputime_zero; 411 cutime = cstime = utime = stime = cputime_zero;
412 cgtime = gtime = cputime_zero;
406 413
407 rcu_read_lock(); 414 rcu_read_lock();
408 if (lock_task_sighand(task, &flags)) { 415 if (lock_task_sighand(task, &flags)) {
@@ -420,6 +427,7 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole)
420 cmaj_flt = sig->cmaj_flt; 427 cmaj_flt = sig->cmaj_flt;
421 cutime = sig->cutime; 428 cutime = sig->cutime;
422 cstime = sig->cstime; 429 cstime = sig->cstime;
430 cgtime = sig->cgtime;
423 rsslim = sig->rlim[RLIMIT_RSS].rlim_cur; 431 rsslim = sig->rlim[RLIMIT_RSS].rlim_cur;
424 432
425 /* add up live thread stats at the group level */ 433 /* add up live thread stats at the group level */
@@ -430,6 +438,7 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole)
430 maj_flt += t->maj_flt; 438 maj_flt += t->maj_flt;
431 utime = cputime_add(utime, task_utime(t)); 439 utime = cputime_add(utime, task_utime(t));
432 stime = cputime_add(stime, task_stime(t)); 440 stime = cputime_add(stime, task_stime(t));
441 gtime = cputime_add(gtime, task_gtime(t));
433 t = next_thread(t); 442 t = next_thread(t);
434 } while (t != task); 443 } while (t != task);
435 444
@@ -437,6 +446,7 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole)
437 maj_flt += sig->maj_flt; 446 maj_flt += sig->maj_flt;
438 utime = cputime_add(utime, sig->utime); 447 utime = cputime_add(utime, sig->utime);
439 stime = cputime_add(stime, sig->stime); 448 stime = cputime_add(stime, sig->stime);
449 gtime += cputime_add(gtime, sig->gtime);
440 } 450 }
441 451
442 sid = signal_session(sig); 452 sid = signal_session(sig);
@@ -454,6 +464,7 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole)
454 maj_flt = task->maj_flt; 464 maj_flt = task->maj_flt;
455 utime = task_utime(task); 465 utime = task_utime(task);
456 stime = task_stime(task); 466 stime = task_stime(task);
467 gtime = task_gtime(task);
457 } 468 }
458 469
459 /* scale priority and nice values from timeslices to -20..20 */ 470 /* scale priority and nice values from timeslices to -20..20 */
@@ -471,7 +482,7 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole)
471 482
472 res = sprintf(buffer, "%d (%s) %c %d %d %d %d %d %u %lu \ 483 res = sprintf(buffer, "%d (%s) %c %d %d %d %d %d %u %lu \
473%lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \ 484%lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \
474%lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu\n", 485%lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu %lu %ld\n",
475 task->pid, 486 task->pid,
476 tcomm, 487 tcomm,
477 state, 488 state,
@@ -516,7 +527,9 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole)
516 task_cpu(task), 527 task_cpu(task),
517 task->rt_priority, 528 task->rt_priority,
518 task->policy, 529 task->policy,
519 (unsigned long long)delayacct_blkio_ticks(task)); 530 (unsigned long long)delayacct_blkio_ticks(task),
531 cputime_to_clock_t(gtime),
532 cputime_to_clock_t(cgtime));
520 if (mm) 533 if (mm)
521 mmput(mm); 534 mmput(mm);
522 return res; 535 return res;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 19489b0d5554..e5d0953d4db1 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -304,7 +304,7 @@ static int proc_pid_schedstat(struct task_struct *task, char *buffer)
304 return sprintf(buffer, "%llu %llu %lu\n", 304 return sprintf(buffer, "%llu %llu %lu\n",
305 task->sched_info.cpu_time, 305 task->sched_info.cpu_time,
306 task->sched_info.run_delay, 306 task->sched_info.run_delay,
307 task->sched_info.pcnt); 307 task->sched_info.pcount);
308} 308}
309#endif 309#endif
310 310
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index b215c3524fa6..1820eb2ef762 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -16,6 +16,11 @@ extern int proc_sys_init(void);
16#else 16#else
17static inline void proc_sys_init(void) { } 17static inline void proc_sys_init(void) { }
18#endif 18#endif
19#ifdef CONFIG_NET
20extern int proc_net_init(void);
21#else
22static inline int proc_net_init(void) { return 0; }
23#endif
19 24
20struct vmalloc_info { 25struct vmalloc_info {
21 unsigned long used; 26 unsigned long used;
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index c9d6d5f400ad..0071939c0095 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -442,6 +442,7 @@ static int show_stat(struct seq_file *p, void *v)
442 int i; 442 int i;
443 unsigned long jif; 443 unsigned long jif;
444 cputime64_t user, nice, system, idle, iowait, irq, softirq, steal; 444 cputime64_t user, nice, system, idle, iowait, irq, softirq, steal;
445 cputime64_t guest;
445 u64 sum = 0; 446 u64 sum = 0;
446 struct timespec boottime; 447 struct timespec boottime;
447 unsigned int *per_irq_sum; 448 unsigned int *per_irq_sum;
@@ -452,6 +453,7 @@ static int show_stat(struct seq_file *p, void *v)
452 453
453 user = nice = system = idle = iowait = 454 user = nice = system = idle = iowait =
454 irq = softirq = steal = cputime64_zero; 455 irq = softirq = steal = cputime64_zero;
456 guest = cputime64_zero;
455 getboottime(&boottime); 457 getboottime(&boottime);
456 jif = boottime.tv_sec; 458 jif = boottime.tv_sec;
457 459
@@ -466,6 +468,7 @@ static int show_stat(struct seq_file *p, void *v)
466 irq = cputime64_add(irq, kstat_cpu(i).cpustat.irq); 468 irq = cputime64_add(irq, kstat_cpu(i).cpustat.irq);
467 softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq); 469 softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq);
468 steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal); 470 steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal);
471 guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest);
469 for (j = 0; j < NR_IRQS; j++) { 472 for (j = 0; j < NR_IRQS; j++) {
470 unsigned int temp = kstat_cpu(i).irqs[j]; 473 unsigned int temp = kstat_cpu(i).irqs[j];
471 sum += temp; 474 sum += temp;
@@ -473,7 +476,7 @@ static int show_stat(struct seq_file *p, void *v)
473 } 476 }
474 } 477 }
475 478
476 seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu\n", 479 seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
477 (unsigned long long)cputime64_to_clock_t(user), 480 (unsigned long long)cputime64_to_clock_t(user),
478 (unsigned long long)cputime64_to_clock_t(nice), 481 (unsigned long long)cputime64_to_clock_t(nice),
479 (unsigned long long)cputime64_to_clock_t(system), 482 (unsigned long long)cputime64_to_clock_t(system),
@@ -481,7 +484,8 @@ static int show_stat(struct seq_file *p, void *v)
481 (unsigned long long)cputime64_to_clock_t(iowait), 484 (unsigned long long)cputime64_to_clock_t(iowait),
482 (unsigned long long)cputime64_to_clock_t(irq), 485 (unsigned long long)cputime64_to_clock_t(irq),
483 (unsigned long long)cputime64_to_clock_t(softirq), 486 (unsigned long long)cputime64_to_clock_t(softirq),
484 (unsigned long long)cputime64_to_clock_t(steal)); 487 (unsigned long long)cputime64_to_clock_t(steal),
488 (unsigned long long)cputime64_to_clock_t(guest));
485 for_each_online_cpu(i) { 489 for_each_online_cpu(i) {
486 490
487 /* Copy values here to work around gcc-2.95.3, gcc-2.96 */ 491 /* Copy values here to work around gcc-2.95.3, gcc-2.96 */
@@ -493,7 +497,9 @@ static int show_stat(struct seq_file *p, void *v)
493 irq = kstat_cpu(i).cpustat.irq; 497 irq = kstat_cpu(i).cpustat.irq;
494 softirq = kstat_cpu(i).cpustat.softirq; 498 softirq = kstat_cpu(i).cpustat.softirq;
495 steal = kstat_cpu(i).cpustat.steal; 499 steal = kstat_cpu(i).cpustat.steal;
496 seq_printf(p, "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu\n", 500 guest = kstat_cpu(i).cpustat.guest;
501 seq_printf(p,
502 "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
497 i, 503 i,
498 (unsigned long long)cputime64_to_clock_t(user), 504 (unsigned long long)cputime64_to_clock_t(user),
499 (unsigned long long)cputime64_to_clock_t(nice), 505 (unsigned long long)cputime64_to_clock_t(nice),
@@ -502,7 +508,8 @@ static int show_stat(struct seq_file *p, void *v)
502 (unsigned long long)cputime64_to_clock_t(iowait), 508 (unsigned long long)cputime64_to_clock_t(iowait),
503 (unsigned long long)cputime64_to_clock_t(irq), 509 (unsigned long long)cputime64_to_clock_t(irq),
504 (unsigned long long)cputime64_to_clock_t(softirq), 510 (unsigned long long)cputime64_to_clock_t(softirq),
505 (unsigned long long)cputime64_to_clock_t(steal)); 511 (unsigned long long)cputime64_to_clock_t(steal),
512 (unsigned long long)cputime64_to_clock_t(guest));
506 } 513 }
507 seq_printf(p, "intr %llu", (unsigned long long)sum); 514 seq_printf(p, "intr %llu", (unsigned long long)sum);
508 515
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
new file mode 100644
index 000000000000..2e91fb756e9a
--- /dev/null
+++ b/fs/proc/proc_net.c
@@ -0,0 +1,200 @@
1/*
2 * linux/fs/proc/net.c
3 *
4 * Copyright (C) 2007
5 *
6 * Author: Eric Biederman <ebiederm@xmission.com>
7 *
8 * proc net directory handling functions
9 */
10
11#include <asm/uaccess.h>
12
13#include <linux/errno.h>
14#include <linux/time.h>
15#include <linux/proc_fs.h>
16#include <linux/stat.h>
17#include <linux/init.h>
18#include <linux/sched.h>
19#include <linux/module.h>
20#include <linux/bitops.h>
21#include <linux/smp_lock.h>
22#include <linux/mount.h>
23#include <linux/nsproxy.h>
24#include <net/net_namespace.h>
25
26#include "internal.h"
27
28
29struct proc_dir_entry *proc_net_create(struct net *net,
30 const char *name, mode_t mode, get_info_t *get_info)
31{
32 return create_proc_info_entry(name,mode, net->proc_net, get_info);
33}
34EXPORT_SYMBOL_GPL(proc_net_create);
35
36struct proc_dir_entry *proc_net_fops_create(struct net *net,
37 const char *name, mode_t mode, const struct file_operations *fops)
38{
39 struct proc_dir_entry *res;
40
41 res = create_proc_entry(name, mode, net->proc_net);
42 if (res)
43 res->proc_fops = fops;
44 return res;
45}
46EXPORT_SYMBOL_GPL(proc_net_fops_create);
47
48void proc_net_remove(struct net *net, const char *name)
49{
50 remove_proc_entry(name, net->proc_net);
51}
52EXPORT_SYMBOL_GPL(proc_net_remove);
53
54struct net *get_proc_net(const struct inode *inode)
55{
56 return maybe_get_net(PDE_NET(PDE(inode)));
57}
58EXPORT_SYMBOL_GPL(get_proc_net);
59
60static struct proc_dir_entry *proc_net_shadow;
61
62static struct dentry *proc_net_shadow_dentry(struct dentry *parent,
63 struct proc_dir_entry *de)
64{
65 struct dentry *shadow = NULL;
66 struct inode *inode;
67 if (!de)
68 goto out;
69 de_get(de);
70 inode = proc_get_inode(parent->d_inode->i_sb, de->low_ino, de);
71 if (!inode)
72 goto out_de_put;
73 shadow = d_alloc_name(parent, de->name);
74 if (!shadow)
75 goto out_iput;
76 shadow->d_op = parent->d_op; /* proc_dentry_operations */
77 d_instantiate(shadow, inode);
78out:
79 return shadow;
80out_iput:
81 iput(inode);
82out_de_put:
83 de_put(de);
84 goto out;
85}
86
87static void *proc_net_follow_link(struct dentry *parent, struct nameidata *nd)
88{
89 struct net *net = current->nsproxy->net_ns;
90 struct dentry *shadow;
91 shadow = proc_net_shadow_dentry(parent, net->proc_net);
92 if (!shadow)
93 return ERR_PTR(-ENOENT);
94
95 dput(nd->dentry);
96 /* My dentry count is 1 and that should be enough as the
97 * shadow dentry is thrown away immediately.
98 */
99 nd->dentry = shadow;
100 return NULL;
101}
102
103static struct dentry *proc_net_lookup(struct inode *dir, struct dentry *dentry,
104 struct nameidata *nd)
105{
106 struct net *net = current->nsproxy->net_ns;
107 struct dentry *shadow;
108
109 shadow = proc_net_shadow_dentry(nd->dentry, net->proc_net);
110 if (!shadow)
111 return ERR_PTR(-ENOENT);
112
113 dput(nd->dentry);
114 nd->dentry = shadow;
115
116 return shadow->d_inode->i_op->lookup(shadow->d_inode, dentry, nd);
117}
118
119static int proc_net_setattr(struct dentry *dentry, struct iattr *iattr)
120{
121 struct net *net = current->nsproxy->net_ns;
122 struct dentry *shadow;
123 int ret;
124
125 shadow = proc_net_shadow_dentry(dentry->d_parent, net->proc_net);
126 if (!shadow)
127 return -ENOENT;
128 ret = shadow->d_inode->i_op->setattr(shadow, iattr);
129 dput(shadow);
130 return ret;
131}
132
133static const struct file_operations proc_net_dir_operations = {
134 .read = generic_read_dir,
135};
136
137static struct inode_operations proc_net_dir_inode_operations = {
138 .follow_link = proc_net_follow_link,
139 .lookup = proc_net_lookup,
140 .setattr = proc_net_setattr,
141};
142
143static __net_init int proc_net_ns_init(struct net *net)
144{
145 struct proc_dir_entry *root, *netd, *net_statd;
146 int err;
147
148 err = -ENOMEM;
149 root = kzalloc(sizeof(*root), GFP_KERNEL);
150 if (!root)
151 goto out;
152
153 err = -EEXIST;
154 netd = proc_mkdir("net", root);
155 if (!netd)
156 goto free_root;
157
158 err = -EEXIST;
159 net_statd = proc_mkdir("stat", netd);
160 if (!net_statd)
161 goto free_net;
162
163 root->data = net;
164 netd->data = net;
165 net_statd->data = net;
166
167 net->proc_net_root = root;
168 net->proc_net = netd;
169 net->proc_net_stat = net_statd;
170 err = 0;
171
172out:
173 return err;
174free_net:
175 remove_proc_entry("net", root);
176free_root:
177 kfree(root);
178 goto out;
179}
180
181static __net_exit void proc_net_ns_exit(struct net *net)
182{
183 remove_proc_entry("stat", net->proc_net);
184 remove_proc_entry("net", net->proc_net_root);
185 kfree(net->proc_net_root);
186}
187
188struct pernet_operations __net_initdata proc_net_ns_ops = {
189 .init = proc_net_ns_init,
190 .exit = proc_net_ns_exit,
191};
192
193int __init proc_net_init(void)
194{
195 proc_net_shadow = proc_mkdir("net", NULL);
196 proc_net_shadow->proc_iops = &proc_net_dir_inode_operations;
197 proc_net_shadow->proc_fops = &proc_net_dir_operations;
198
199 return register_pernet_subsys(&proc_net_ns_ops);
200}
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 41f17037f738..cf3046638b09 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -21,7 +21,7 @@
21 21
22#include "internal.h" 22#include "internal.h"
23 23
24struct proc_dir_entry *proc_net, *proc_net_stat, *proc_bus, *proc_root_fs, *proc_root_driver; 24struct proc_dir_entry *proc_bus, *proc_root_fs, *proc_root_driver;
25 25
26static int proc_get_sb(struct file_system_type *fs_type, 26static int proc_get_sb(struct file_system_type *fs_type,
27 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 27 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
@@ -61,8 +61,8 @@ void __init proc_root_init(void)
61 return; 61 return;
62 } 62 }
63 proc_misc_init(); 63 proc_misc_init();
64 proc_net = proc_mkdir("net", NULL); 64
65 proc_net_stat = proc_mkdir("net/stat", NULL); 65 proc_net_init();
66 66
67#ifdef CONFIG_SYSVIPC 67#ifdef CONFIG_SYSVIPC
68 proc_mkdir("sysvipc", NULL); 68 proc_mkdir("sysvipc", NULL);
@@ -159,7 +159,5 @@ EXPORT_SYMBOL(create_proc_entry);
159EXPORT_SYMBOL(remove_proc_entry); 159EXPORT_SYMBOL(remove_proc_entry);
160EXPORT_SYMBOL(proc_root); 160EXPORT_SYMBOL(proc_root);
161EXPORT_SYMBOL(proc_root_fs); 161EXPORT_SYMBOL(proc_root_fs);
162EXPORT_SYMBOL(proc_net);
163EXPORT_SYMBOL(proc_net_stat);
164EXPORT_SYMBOL(proc_bus); 162EXPORT_SYMBOL(proc_bus);
165EXPORT_SYMBOL(proc_root_driver); 163EXPORT_SYMBOL(proc_root_driver);
diff --git a/fs/seq_file.c b/fs/seq_file.c
index bbb19be260ce..ca71c115bdaa 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -429,6 +429,39 @@ int seq_release_private(struct inode *inode, struct file *file)
429} 429}
430EXPORT_SYMBOL(seq_release_private); 430EXPORT_SYMBOL(seq_release_private);
431 431
432void *__seq_open_private(struct file *f, const struct seq_operations *ops,
433 int psize)
434{
435 int rc;
436 void *private;
437 struct seq_file *seq;
438
439 private = kzalloc(psize, GFP_KERNEL);
440 if (private == NULL)
441 goto out;
442
443 rc = seq_open(f, ops);
444 if (rc < 0)
445 goto out_free;
446
447 seq = f->private_data;
448 seq->private = private;
449 return private;
450
451out_free:
452 kfree(private);
453out:
454 return NULL;
455}
456EXPORT_SYMBOL(__seq_open_private);
457
458int seq_open_private(struct file *filp, const struct seq_operations *ops,
459 int psize)
460{
461 return __seq_open_private(filp, ops, psize) ? 0 : -ENOMEM;
462}
463EXPORT_SYMBOL(seq_open_private);
464
432int seq_putc(struct seq_file *m, char c) 465int seq_putc(struct seq_file *m, char c)
433{ 466{
434 if (m->count < m->size) { 467 if (m->count < m->size) {
diff --git a/fs/smbfs/smbiod.c b/fs/smbfs/smbiod.c
index 67176af8515f..283c5720c9de 100644
--- a/fs/smbfs/smbiod.c
+++ b/fs/smbfs/smbiod.c
@@ -45,7 +45,7 @@ static LIST_HEAD(smb_servers);
45static DEFINE_SPINLOCK(servers_lock); 45static DEFINE_SPINLOCK(servers_lock);
46 46
47#define SMBIOD_DATA_READY (1<<0) 47#define SMBIOD_DATA_READY (1<<0)
48static long smbiod_flags; 48static unsigned long smbiod_flags;
49 49
50static int smbiod(void *); 50static int smbiod(void *);
51static int smbiod_start(void); 51static int smbiod_start(void);
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index 5afe2a26f5d8..006fc64227dd 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -1,9 +1,15 @@
1/* 1/*
2 * bin.c - binary file operations for sysfs. 2 * fs/sysfs/bin.c - sysfs binary file implementation
3 * 3 *
4 * Copyright (c) 2003 Patrick Mochel 4 * Copyright (c) 2003 Patrick Mochel
5 * Copyright (c) 2003 Matthew Wilcox 5 * Copyright (c) 2003 Matthew Wilcox
6 * Copyright (c) 2004 Silicon Graphics, Inc. 6 * Copyright (c) 2004 Silicon Graphics, Inc.
7 * Copyright (c) 2007 SUSE Linux Products GmbH
8 * Copyright (c) 2007 Tejun Heo <teheo@suse.de>
9 *
10 * This file is released under the GPLv2.
11 *
12 * Please see Documentation/filesystems/sysfs.txt for more information.
7 */ 13 */
8 14
9#undef DEBUG 15#undef DEBUG
@@ -14,9 +20,9 @@
14#include <linux/kobject.h> 20#include <linux/kobject.h>
15#include <linux/module.h> 21#include <linux/module.h>
16#include <linux/slab.h> 22#include <linux/slab.h>
23#include <linux/mutex.h>
17 24
18#include <asm/uaccess.h> 25#include <asm/uaccess.h>
19#include <asm/semaphore.h>
20 26
21#include "sysfs.h" 27#include "sysfs.h"
22 28
@@ -30,8 +36,8 @@ static int
30fill_read(struct dentry *dentry, char *buffer, loff_t off, size_t count) 36fill_read(struct dentry *dentry, char *buffer, loff_t off, size_t count)
31{ 37{
32 struct sysfs_dirent *attr_sd = dentry->d_fsdata; 38 struct sysfs_dirent *attr_sd = dentry->d_fsdata;
33 struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr; 39 struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr;
34 struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj; 40 struct kobject *kobj = attr_sd->s_parent->s_dir.kobj;
35 int rc; 41 int rc;
36 42
37 /* need attr_sd for attr, its parent for kobj */ 43 /* need attr_sd for attr, its parent for kobj */
@@ -87,8 +93,8 @@ static int
87flush_write(struct dentry *dentry, char *buffer, loff_t offset, size_t count) 93flush_write(struct dentry *dentry, char *buffer, loff_t offset, size_t count)
88{ 94{
89 struct sysfs_dirent *attr_sd = dentry->d_fsdata; 95 struct sysfs_dirent *attr_sd = dentry->d_fsdata;
90 struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr; 96 struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr;
91 struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj; 97 struct kobject *kobj = attr_sd->s_parent->s_dir.kobj;
92 int rc; 98 int rc;
93 99
94 /* need attr_sd for attr, its parent for kobj */ 100 /* need attr_sd for attr, its parent for kobj */
@@ -140,8 +146,8 @@ static int mmap(struct file *file, struct vm_area_struct *vma)
140{ 146{
141 struct bin_buffer *bb = file->private_data; 147 struct bin_buffer *bb = file->private_data;
142 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; 148 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
143 struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr; 149 struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr;
144 struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj; 150 struct kobject *kobj = attr_sd->s_parent->s_dir.kobj;
145 int rc; 151 int rc;
146 152
147 mutex_lock(&bb->mutex); 153 mutex_lock(&bb->mutex);
@@ -167,12 +173,12 @@ static int mmap(struct file *file, struct vm_area_struct *vma)
167static int open(struct inode * inode, struct file * file) 173static int open(struct inode * inode, struct file * file)
168{ 174{
169 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; 175 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
170 struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr; 176 struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr;
171 struct bin_buffer *bb = NULL; 177 struct bin_buffer *bb = NULL;
172 int error; 178 int error;
173 179
174 /* need attr_sd for attr */ 180 /* binary file operations requires both @sd and its parent */
175 if (!sysfs_get_active(attr_sd)) 181 if (!sysfs_get_active_two(attr_sd))
176 return -ENODEV; 182 return -ENODEV;
177 183
178 error = -EACCES; 184 error = -EACCES;
@@ -193,13 +199,12 @@ static int open(struct inode * inode, struct file * file)
193 mutex_init(&bb->mutex); 199 mutex_init(&bb->mutex);
194 file->private_data = bb; 200 file->private_data = bb;
195 201
196 /* open succeeded, put active reference and pin attr_sd */ 202 /* open succeeded, put active references */
197 sysfs_put_active(attr_sd); 203 sysfs_put_active_two(attr_sd);
198 sysfs_get(attr_sd);
199 return 0; 204 return 0;
200 205
201 err_out: 206 err_out:
202 sysfs_put_active(attr_sd); 207 sysfs_put_active_two(attr_sd);
203 kfree(bb); 208 kfree(bb);
204 return error; 209 return error;
205} 210}
@@ -211,7 +216,6 @@ static int release(struct inode * inode, struct file * file)
211 216
212 if (bb->mmapped) 217 if (bb->mmapped)
213 sysfs_put_active_two(attr_sd); 218 sysfs_put_active_two(attr_sd);
214 sysfs_put(attr_sd);
215 kfree(bb->buffer); 219 kfree(bb->buffer);
216 kfree(bb); 220 kfree(bb);
217 return 0; 221 return 0;
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 83e76b3813c9..9161db4d6b5c 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -1,5 +1,13 @@
1/* 1/*
2 * dir.c - Operations for sysfs directories. 2 * fs/sysfs/dir.c - sysfs core and dir operation implementation
3 *
4 * Copyright (c) 2001-3 Patrick Mochel
5 * Copyright (c) 2007 SUSE Linux Products GmbH
6 * Copyright (c) 2007 Tejun Heo <teheo@suse.de>
7 *
8 * This file is released under the GPLv2.
9 *
10 * Please see Documentation/filesystems/sysfs.txt for more information.
3 */ 11 */
4 12
5#undef DEBUG 13#undef DEBUG
@@ -11,10 +19,11 @@
11#include <linux/namei.h> 19#include <linux/namei.h>
12#include <linux/idr.h> 20#include <linux/idr.h>
13#include <linux/completion.h> 21#include <linux/completion.h>
14#include <asm/semaphore.h> 22#include <linux/mutex.h>
15#include "sysfs.h" 23#include "sysfs.h"
16 24
17DEFINE_MUTEX(sysfs_mutex); 25DEFINE_MUTEX(sysfs_mutex);
26DEFINE_MUTEX(sysfs_rename_mutex);
18spinlock_t sysfs_assoc_lock = SPIN_LOCK_UNLOCKED; 27spinlock_t sysfs_assoc_lock = SPIN_LOCK_UNLOCKED;
19 28
20static spinlock_t sysfs_ino_lock = SPIN_LOCK_UNLOCKED; 29static spinlock_t sysfs_ino_lock = SPIN_LOCK_UNLOCKED;
@@ -25,18 +34,28 @@ static DEFINE_IDA(sysfs_ino_ida);
25 * @sd: sysfs_dirent of interest 34 * @sd: sysfs_dirent of interest
26 * 35 *
27 * Link @sd into its sibling list which starts from 36 * Link @sd into its sibling list which starts from
28 * sd->s_parent->s_children. 37 * sd->s_parent->s_dir.children.
29 * 38 *
30 * Locking: 39 * Locking:
31 * mutex_lock(sysfs_mutex) 40 * mutex_lock(sysfs_mutex)
32 */ 41 */
33void sysfs_link_sibling(struct sysfs_dirent *sd) 42static void sysfs_link_sibling(struct sysfs_dirent *sd)
34{ 43{
35 struct sysfs_dirent *parent_sd = sd->s_parent; 44 struct sysfs_dirent *parent_sd = sd->s_parent;
45 struct sysfs_dirent **pos;
36 46
37 BUG_ON(sd->s_sibling); 47 BUG_ON(sd->s_sibling);
38 sd->s_sibling = parent_sd->s_children; 48
39 parent_sd->s_children = sd; 49 /* Store directory entries in order by ino. This allows
50 * readdir to properly restart without having to add a
51 * cursor into the s_dir.children list.
52 */
53 for (pos = &parent_sd->s_dir.children; *pos; pos = &(*pos)->s_sibling) {
54 if (sd->s_ino < (*pos)->s_ino)
55 break;
56 }
57 sd->s_sibling = *pos;
58 *pos = sd;
40} 59}
41 60
42/** 61/**
@@ -44,16 +63,17 @@ void sysfs_link_sibling(struct sysfs_dirent *sd)
44 * @sd: sysfs_dirent of interest 63 * @sd: sysfs_dirent of interest
45 * 64 *
46 * Unlink @sd from its sibling list which starts from 65 * Unlink @sd from its sibling list which starts from
47 * sd->s_parent->s_children. 66 * sd->s_parent->s_dir.children.
48 * 67 *
49 * Locking: 68 * Locking:
50 * mutex_lock(sysfs_mutex) 69 * mutex_lock(sysfs_mutex)
51 */ 70 */
52void sysfs_unlink_sibling(struct sysfs_dirent *sd) 71static void sysfs_unlink_sibling(struct sysfs_dirent *sd)
53{ 72{
54 struct sysfs_dirent **pos; 73 struct sysfs_dirent **pos;
55 74
56 for (pos = &sd->s_parent->s_children; *pos; pos = &(*pos)->s_sibling) { 75 for (pos = &sd->s_parent->s_dir.children; *pos;
76 pos = &(*pos)->s_sibling) {
57 if (*pos == sd) { 77 if (*pos == sd) {
58 *pos = sd->s_sibling; 78 *pos = sd->s_sibling;
59 sd->s_sibling = NULL; 79 sd->s_sibling = NULL;
@@ -67,96 +87,39 @@ void sysfs_unlink_sibling(struct sysfs_dirent *sd)
67 * @sd: sysfs_dirent of interest 87 * @sd: sysfs_dirent of interest
68 * 88 *
69 * Get dentry for @sd. Dentry is looked up if currently not 89 * Get dentry for @sd. Dentry is looked up if currently not
70 * present. This function climbs sysfs_dirent tree till it 90 * present. This function descends from the root looking up
71 * reaches a sysfs_dirent with valid dentry attached and descends 91 * dentry for each step.
72 * down from there looking up dentry for each step.
73 * 92 *
74 * LOCKING: 93 * LOCKING:
75 * Kernel thread context (may sleep) 94 * mutex_lock(sysfs_rename_mutex)
76 * 95 *
77 * RETURNS: 96 * RETURNS:
78 * Pointer to found dentry on success, ERR_PTR() value on error. 97 * Pointer to found dentry on success, ERR_PTR() value on error.
79 */ 98 */
80struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd) 99struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd)
81{ 100{
82 struct sysfs_dirent *cur; 101 struct dentry *dentry = dget(sysfs_sb->s_root);
83 struct dentry *parent_dentry, *dentry;
84 int i, depth;
85
86 /* Find the first parent which has valid s_dentry and get the
87 * dentry.
88 */
89 mutex_lock(&sysfs_mutex);
90 restart0:
91 spin_lock(&sysfs_assoc_lock);
92 restart1:
93 spin_lock(&dcache_lock);
94 102
95 dentry = NULL; 103 while (dentry->d_fsdata != sd) {
96 depth = 0; 104 struct sysfs_dirent *cur;
97 cur = sd; 105 struct dentry *parent;
98 while (!cur->s_dentry || !cur->s_dentry->d_inode) {
99 if (cur->s_flags & SYSFS_FLAG_REMOVED) {
100 dentry = ERR_PTR(-ENOENT);
101 depth = 0;
102 break;
103 }
104 cur = cur->s_parent;
105 depth++;
106 }
107 if (!IS_ERR(dentry))
108 dentry = dget_locked(cur->s_dentry);
109 106
110 spin_unlock(&dcache_lock); 107 /* find the first ancestor which hasn't been looked up */
111 spin_unlock(&sysfs_assoc_lock); 108 cur = sd;
112 109 while (cur->s_parent != dentry->d_fsdata)
113 /* from the found dentry, look up depth times */
114 while (depth--) {
115 /* find and get depth'th ancestor */
116 for (cur = sd, i = 0; cur && i < depth; i++)
117 cur = cur->s_parent; 110 cur = cur->s_parent;
118 111
119 /* This can happen if tree structure was modified due
120 * to move/rename. Restart.
121 */
122 if (i != depth) {
123 dput(dentry);
124 goto restart0;
125 }
126
127 sysfs_get(cur);
128
129 mutex_unlock(&sysfs_mutex);
130
131 /* look it up */ 112 /* look it up */
132 parent_dentry = dentry; 113 parent = dentry;
133 dentry = lookup_one_len_kern(cur->s_name, parent_dentry, 114 mutex_lock(&parent->d_inode->i_mutex);
115 dentry = lookup_one_len_kern(cur->s_name, parent,
134 strlen(cur->s_name)); 116 strlen(cur->s_name));
135 dput(parent_dentry); 117 mutex_unlock(&parent->d_inode->i_mutex);
136 118 dput(parent);
137 if (IS_ERR(dentry)) {
138 sysfs_put(cur);
139 return dentry;
140 }
141 119
142 mutex_lock(&sysfs_mutex); 120 if (IS_ERR(dentry))
143 spin_lock(&sysfs_assoc_lock); 121 break;
144
145 /* This, again, can happen if tree structure has
146 * changed and we looked up the wrong thing. Restart.
147 */
148 if (cur->s_dentry != dentry) {
149 dput(dentry);
150 sysfs_put(cur);
151 goto restart1;
152 }
153
154 spin_unlock(&sysfs_assoc_lock);
155
156 sysfs_put(cur);
157 } 122 }
158
159 mutex_unlock(&sysfs_mutex);
160 return dentry; 123 return dentry;
161} 124}
162 125
@@ -319,7 +282,7 @@ void release_sysfs_dirent(struct sysfs_dirent * sd)
319 parent_sd = sd->s_parent; 282 parent_sd = sd->s_parent;
320 283
321 if (sysfs_type(sd) == SYSFS_KOBJ_LINK) 284 if (sysfs_type(sd) == SYSFS_KOBJ_LINK)
322 sysfs_put(sd->s_elem.symlink.target_sd); 285 sysfs_put(sd->s_symlink.target_sd);
323 if (sysfs_type(sd) & SYSFS_COPY_NAME) 286 if (sysfs_type(sd) & SYSFS_COPY_NAME)
324 kfree(sd->s_name); 287 kfree(sd->s_name);
325 kfree(sd->s_iattr); 288 kfree(sd->s_iattr);
@@ -335,22 +298,7 @@ static void sysfs_d_iput(struct dentry * dentry, struct inode * inode)
335{ 298{
336 struct sysfs_dirent * sd = dentry->d_fsdata; 299 struct sysfs_dirent * sd = dentry->d_fsdata;
337 300
338 if (sd) { 301 sysfs_put(sd);
339 /* sd->s_dentry is protected with sysfs_assoc_lock.
340 * This allows sysfs_drop_dentry() to dereference it.
341 */
342 spin_lock(&sysfs_assoc_lock);
343
344 /* The dentry might have been deleted or another
345 * lookup could have happened updating sd->s_dentry to
346 * point the new dentry. Ignore if it isn't pointing
347 * to this dentry.
348 */
349 if (sd->s_dentry == dentry)
350 sd->s_dentry = NULL;
351 spin_unlock(&sysfs_assoc_lock);
352 sysfs_put(sd);
353 }
354 iput(inode); 302 iput(inode);
355} 303}
356 304
@@ -378,7 +326,6 @@ struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type)
378 326
379 atomic_set(&sd->s_count, 1); 327 atomic_set(&sd->s_count, 1);
380 atomic_set(&sd->s_active, 0); 328 atomic_set(&sd->s_active, 0);
381 atomic_set(&sd->s_event, 1);
382 329
383 sd->s_name = name; 330 sd->s_name = name;
384 sd->s_mode = mode; 331 sd->s_mode = mode;
@@ -393,30 +340,6 @@ struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type)
393 return NULL; 340 return NULL;
394} 341}
395 342
396/**
397 * sysfs_attach_dentry - associate sysfs_dirent with dentry
398 * @sd: target sysfs_dirent
399 * @dentry: dentry to associate
400 *
401 * Associate @sd with @dentry. This is protected by
402 * sysfs_assoc_lock to avoid race with sysfs_d_iput().
403 *
404 * LOCKING:
405 * mutex_lock(sysfs_mutex)
406 */
407static void sysfs_attach_dentry(struct sysfs_dirent *sd, struct dentry *dentry)
408{
409 dentry->d_op = &sysfs_dentry_ops;
410 dentry->d_fsdata = sysfs_get(sd);
411
412 /* protect sd->s_dentry against sysfs_d_iput */
413 spin_lock(&sysfs_assoc_lock);
414 sd->s_dentry = dentry;
415 spin_unlock(&sysfs_assoc_lock);
416
417 d_rehash(dentry);
418}
419
420static int sysfs_ilookup_test(struct inode *inode, void *arg) 343static int sysfs_ilookup_test(struct inode *inode, void *arg)
421{ 344{
422 struct sysfs_dirent *sd = arg; 345 struct sysfs_dirent *sd = arg;
@@ -480,10 +403,8 @@ void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
480 * @sd: sysfs_dirent to be added 403 * @sd: sysfs_dirent to be added
481 * 404 *
482 * Get @acxt->parent_sd and set sd->s_parent to it and increment 405 * Get @acxt->parent_sd and set sd->s_parent to it and increment
483 * nlink of parent inode if @sd is a directory. @sd is NOT 406 * nlink of parent inode if @sd is a directory and link into the
484 * linked into the children list of the parent. The caller 407 * children list of the parent.
485 * should invoke sysfs_link_sibling() after this function
486 * completes if @sd needs to be on the children list.
487 * 408 *
488 * This function should be called between calls to 409 * This function should be called between calls to
489 * sysfs_addrm_start() and sysfs_addrm_finish() and should be 410 * sysfs_addrm_start() and sysfs_addrm_finish() and should be
@@ -491,15 +412,30 @@ void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
491 * 412 *
492 * LOCKING: 413 * LOCKING:
493 * Determined by sysfs_addrm_start(). 414 * Determined by sysfs_addrm_start().
415 *
416 * RETURNS:
417 * 0 on success, -EEXIST if entry with the given name already
418 * exists.
494 */ 419 */
495void sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) 420int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
496{ 421{
422 if (sysfs_find_dirent(acxt->parent_sd, sd->s_name)) {
423 printk(KERN_WARNING "sysfs: duplicate filename '%s' "
424 "can not be created\n", sd->s_name);
425 WARN_ON(1);
426 return -EEXIST;
427 }
428
497 sd->s_parent = sysfs_get(acxt->parent_sd); 429 sd->s_parent = sysfs_get(acxt->parent_sd);
498 430
499 if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode) 431 if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode)
500 inc_nlink(acxt->parent_inode); 432 inc_nlink(acxt->parent_inode);
501 433
502 acxt->cnt++; 434 acxt->cnt++;
435
436 sysfs_link_sibling(sd);
437
438 return 0;
503} 439}
504 440
505/** 441/**
@@ -508,9 +444,7 @@ void sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
508 * @sd: sysfs_dirent to be added 444 * @sd: sysfs_dirent to be added
509 * 445 *
510 * Mark @sd removed and drop nlink of parent inode if @sd is a 446 * Mark @sd removed and drop nlink of parent inode if @sd is a
511 * directory. @sd is NOT unlinked from the children list of the 447 * directory. @sd is unlinked from the children list.
512 * parent. The caller is repsonsible for removing @sd from the
513 * children list before calling this function.
514 * 448 *
515 * This function should be called between calls to 449 * This function should be called between calls to
516 * sysfs_addrm_start() and sysfs_addrm_finish() and should be 450 * sysfs_addrm_start() and sysfs_addrm_finish() and should be
@@ -521,7 +455,9 @@ void sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
521 */ 455 */
522void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) 456void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
523{ 457{
524 BUG_ON(sd->s_sibling || (sd->s_flags & SYSFS_FLAG_REMOVED)); 458 BUG_ON(sd->s_flags & SYSFS_FLAG_REMOVED);
459
460 sysfs_unlink_sibling(sd);
525 461
526 sd->s_flags |= SYSFS_FLAG_REMOVED; 462 sd->s_flags |= SYSFS_FLAG_REMOVED;
527 sd->s_sibling = acxt->removed; 463 sd->s_sibling = acxt->removed;
@@ -540,53 +476,49 @@ void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
540 * Drop dentry for @sd. @sd must have been unlinked from its 476 * Drop dentry for @sd. @sd must have been unlinked from its
541 * parent on entry to this function such that it can't be looked 477 * parent on entry to this function such that it can't be looked
542 * up anymore. 478 * up anymore.
543 *
544 * @sd->s_dentry which is protected with sysfs_assoc_lock points
545 * to the currently associated dentry but we're not holding a
546 * reference to it and racing with dput(). Grab dcache_lock and
547 * verify dentry before dropping it. If @sd->s_dentry is NULL or
548 * dput() beats us, no need to bother.
549 */ 479 */
550static void sysfs_drop_dentry(struct sysfs_dirent *sd) 480static void sysfs_drop_dentry(struct sysfs_dirent *sd)
551{ 481{
552 struct dentry *dentry = NULL;
553 struct inode *inode; 482 struct inode *inode;
483 struct dentry *dentry;
554 484
555 /* We're not holding a reference to ->s_dentry dentry but the 485 inode = ilookup(sysfs_sb, sd->s_ino);
556 * field will stay valid as long as sysfs_assoc_lock is held. 486 if (!inode)
487 return;
488
489 /* Drop any existing dentries associated with sd.
490 *
491 * For the dentry to be properly freed we need to grab a
492 * reference to the dentry under the dcache lock, unhash it,
493 * and then put it. The playing with the dentry count allows
494 * dput to immediately free the dentry if it is not in use.
557 */ 495 */
558 spin_lock(&sysfs_assoc_lock); 496repeat:
559 spin_lock(&dcache_lock); 497 spin_lock(&dcache_lock);
560 498 list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
561 /* drop dentry if it's there and dput() didn't kill it yet */ 499 if (d_unhashed(dentry))
562 if (sd->s_dentry && sd->s_dentry->d_inode) { 500 continue;
563 dentry = dget_locked(sd->s_dentry); 501 dget_locked(dentry);
564 spin_lock(&dentry->d_lock); 502 spin_lock(&dentry->d_lock);
565 __d_drop(dentry); 503 __d_drop(dentry);
566 spin_unlock(&dentry->d_lock); 504 spin_unlock(&dentry->d_lock);
505 spin_unlock(&dcache_lock);
506 dput(dentry);
507 goto repeat;
567 } 508 }
568
569 spin_unlock(&dcache_lock); 509 spin_unlock(&dcache_lock);
570 spin_unlock(&sysfs_assoc_lock);
571
572 /* dentries for shadowed inodes are pinned, unpin */
573 if (dentry && sysfs_is_shadowed_inode(dentry->d_inode))
574 dput(dentry);
575 dput(dentry);
576 510
577 /* adjust nlink and update timestamp */ 511 /* adjust nlink and update timestamp */
578 inode = ilookup(sysfs_sb, sd->s_ino); 512 mutex_lock(&inode->i_mutex);
579 if (inode) {
580 mutex_lock(&inode->i_mutex);
581 513
582 inode->i_ctime = CURRENT_TIME; 514 inode->i_ctime = CURRENT_TIME;
515 drop_nlink(inode);
516 if (sysfs_type(sd) == SYSFS_DIR)
583 drop_nlink(inode); 517 drop_nlink(inode);
584 if (sysfs_type(sd) == SYSFS_DIR)
585 drop_nlink(inode);
586 518
587 mutex_unlock(&inode->i_mutex); 519 mutex_unlock(&inode->i_mutex);
588 iput(inode); 520
589 } 521 iput(inode);
590} 522}
591 523
592/** 524/**
@@ -599,11 +531,8 @@ static void sysfs_drop_dentry(struct sysfs_dirent *sd)
599 * 531 *
600 * LOCKING: 532 * LOCKING:
601 * All mutexes acquired by sysfs_addrm_start() are released. 533 * All mutexes acquired by sysfs_addrm_start() are released.
602 *
603 * RETURNS:
604 * Number of added/removed sysfs_dirents since sysfs_addrm_start().
605 */ 534 */
606int sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt) 535void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
607{ 536{
608 /* release resources acquired by sysfs_addrm_start() */ 537 /* release resources acquired by sysfs_addrm_start() */
609 mutex_unlock(&sysfs_mutex); 538 mutex_unlock(&sysfs_mutex);
@@ -629,8 +558,6 @@ int sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
629 sysfs_deactivate(sd); 558 sysfs_deactivate(sd);
630 sysfs_put(sd); 559 sysfs_put(sd);
631 } 560 }
632
633 return acxt->cnt;
634} 561}
635 562
636/** 563/**
@@ -651,8 +578,8 @@ struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
651{ 578{
652 struct sysfs_dirent *sd; 579 struct sysfs_dirent *sd;
653 580
654 for (sd = parent_sd->s_children; sd; sd = sd->s_sibling) 581 for (sd = parent_sd->s_dir.children; sd; sd = sd->s_sibling)
655 if (sysfs_type(sd) && !strcmp(sd->s_name, name)) 582 if (!strcmp(sd->s_name, name))
656 return sd; 583 return sd;
657 return NULL; 584 return NULL;
658} 585}
@@ -690,28 +617,25 @@ static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd,
690 umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO; 617 umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO;
691 struct sysfs_addrm_cxt acxt; 618 struct sysfs_addrm_cxt acxt;
692 struct sysfs_dirent *sd; 619 struct sysfs_dirent *sd;
620 int rc;
693 621
694 /* allocate */ 622 /* allocate */
695 sd = sysfs_new_dirent(name, mode, SYSFS_DIR); 623 sd = sysfs_new_dirent(name, mode, SYSFS_DIR);
696 if (!sd) 624 if (!sd)
697 return -ENOMEM; 625 return -ENOMEM;
698 sd->s_elem.dir.kobj = kobj; 626 sd->s_dir.kobj = kobj;
699 627
700 /* link in */ 628 /* link in */
701 sysfs_addrm_start(&acxt, parent_sd); 629 sysfs_addrm_start(&acxt, parent_sd);
630 rc = sysfs_add_one(&acxt, sd);
631 sysfs_addrm_finish(&acxt);
702 632
703 if (!sysfs_find_dirent(parent_sd, name)) { 633 if (rc == 0)
704 sysfs_add_one(&acxt, sd); 634 *p_sd = sd;
705 sysfs_link_sibling(sd); 635 else
706 }
707
708 if (!sysfs_addrm_finish(&acxt)) {
709 sysfs_put(sd); 636 sysfs_put(sd);
710 return -EEXIST;
711 }
712 637
713 *p_sd = sd; 638 return rc;
714 return 0;
715} 639}
716 640
717int sysfs_create_subdir(struct kobject *kobj, const char *name, 641int sysfs_create_subdir(struct kobject *kobj, const char *name,
@@ -723,24 +647,18 @@ int sysfs_create_subdir(struct kobject *kobj, const char *name,
723/** 647/**
724 * sysfs_create_dir - create a directory for an object. 648 * sysfs_create_dir - create a directory for an object.
725 * @kobj: object we're creating directory for. 649 * @kobj: object we're creating directory for.
726 * @shadow_parent: parent object.
727 */ 650 */
728int sysfs_create_dir(struct kobject *kobj, 651int sysfs_create_dir(struct kobject * kobj)
729 struct sysfs_dirent *shadow_parent_sd)
730{ 652{
731 struct sysfs_dirent *parent_sd, *sd; 653 struct sysfs_dirent *parent_sd, *sd;
732 int error = 0; 654 int error = 0;
733 655
734 BUG_ON(!kobj); 656 BUG_ON(!kobj);
735 657
736 if (shadow_parent_sd) 658 if (kobj->parent)
737 parent_sd = shadow_parent_sd;
738 else if (kobj->parent)
739 parent_sd = kobj->parent->sd; 659 parent_sd = kobj->parent->sd;
740 else if (sysfs_mount && sysfs_mount->mnt_sb)
741 parent_sd = sysfs_mount->mnt_sb->s_root->d_fsdata;
742 else 660 else
743 return -EFAULT; 661 parent_sd = &sysfs_root;
744 662
745 error = create_dir(kobj, parent_sd, kobject_name(kobj), &sd); 663 error = create_dir(kobj, parent_sd, kobject_name(kobj), &sd);
746 if (!error) 664 if (!error)
@@ -748,39 +666,20 @@ int sysfs_create_dir(struct kobject *kobj,
748 return error; 666 return error;
749} 667}
750 668
751static int sysfs_count_nlink(struct sysfs_dirent *sd)
752{
753 struct sysfs_dirent *child;
754 int nr = 0;
755
756 for (child = sd->s_children; child; child = child->s_sibling)
757 if (sysfs_type(child) == SYSFS_DIR)
758 nr++;
759 return nr + 2;
760}
761
762static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry, 669static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
763 struct nameidata *nd) 670 struct nameidata *nd)
764{ 671{
765 struct dentry *ret = NULL; 672 struct dentry *ret = NULL;
766 struct sysfs_dirent * parent_sd = dentry->d_parent->d_fsdata; 673 struct sysfs_dirent *parent_sd = dentry->d_parent->d_fsdata;
767 struct sysfs_dirent * sd; 674 struct sysfs_dirent *sd;
768 struct bin_attribute *bin_attr;
769 struct inode *inode; 675 struct inode *inode;
770 int found = 0;
771 676
772 mutex_lock(&sysfs_mutex); 677 mutex_lock(&sysfs_mutex);
773 678
774 for (sd = parent_sd->s_children; sd; sd = sd->s_sibling) { 679 sd = sysfs_find_dirent(parent_sd, dentry->d_name.name);
775 if (sysfs_type(sd) &&
776 !strcmp(sd->s_name, dentry->d_name.name)) {
777 found = 1;
778 break;
779 }
780 }
781 680
782 /* no such entry */ 681 /* no such entry */
783 if (!found) 682 if (!sd)
784 goto out_unlock; 683 goto out_unlock;
785 684
786 /* attach dentry and inode */ 685 /* attach dentry and inode */
@@ -790,33 +689,11 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
790 goto out_unlock; 689 goto out_unlock;
791 } 690 }
792 691
793 if (inode->i_state & I_NEW) { 692 /* instantiate and hash dentry */
794 /* initialize inode according to type */ 693 dentry->d_op = &sysfs_dentry_ops;
795 switch (sysfs_type(sd)) { 694 dentry->d_fsdata = sysfs_get(sd);
796 case SYSFS_DIR: 695 d_instantiate(dentry, inode);
797 inode->i_op = &sysfs_dir_inode_operations; 696 d_rehash(dentry);
798 inode->i_fop = &sysfs_dir_operations;
799 inode->i_nlink = sysfs_count_nlink(sd);
800 break;
801 case SYSFS_KOBJ_ATTR:
802 inode->i_size = PAGE_SIZE;
803 inode->i_fop = &sysfs_file_operations;
804 break;
805 case SYSFS_KOBJ_BIN_ATTR:
806 bin_attr = sd->s_elem.bin_attr.bin_attr;
807 inode->i_size = bin_attr->size;
808 inode->i_fop = &bin_fops;
809 break;
810 case SYSFS_KOBJ_LINK:
811 inode->i_op = &sysfs_symlink_inode_operations;
812 break;
813 default:
814 BUG();
815 }
816 }
817
818 sysfs_instantiate(dentry, inode);
819 sysfs_attach_dentry(sd, dentry);
820 697
821 out_unlock: 698 out_unlock:
822 mutex_unlock(&sysfs_mutex); 699 mutex_unlock(&sysfs_mutex);
@@ -833,7 +710,6 @@ static void remove_dir(struct sysfs_dirent *sd)
833 struct sysfs_addrm_cxt acxt; 710 struct sysfs_addrm_cxt acxt;
834 711
835 sysfs_addrm_start(&acxt, sd->s_parent); 712 sysfs_addrm_start(&acxt, sd->s_parent);
836 sysfs_unlink_sibling(sd);
837 sysfs_remove_one(&acxt, sd); 713 sysfs_remove_one(&acxt, sd);
838 sysfs_addrm_finish(&acxt); 714 sysfs_addrm_finish(&acxt);
839} 715}
@@ -854,15 +730,13 @@ static void __sysfs_remove_dir(struct sysfs_dirent *dir_sd)
854 730
855 pr_debug("sysfs %s: removing dir\n", dir_sd->s_name); 731 pr_debug("sysfs %s: removing dir\n", dir_sd->s_name);
856 sysfs_addrm_start(&acxt, dir_sd); 732 sysfs_addrm_start(&acxt, dir_sd);
857 pos = &dir_sd->s_children; 733 pos = &dir_sd->s_dir.children;
858 while (*pos) { 734 while (*pos) {
859 struct sysfs_dirent *sd = *pos; 735 struct sysfs_dirent *sd = *pos;
860 736
861 if (sysfs_type(sd) && sysfs_type(sd) != SYSFS_DIR) { 737 if (sysfs_type(sd) != SYSFS_DIR)
862 *pos = sd->s_sibling;
863 sd->s_sibling = NULL;
864 sysfs_remove_one(&acxt, sd); 738 sysfs_remove_one(&acxt, sd);
865 } else 739 else
866 pos = &(*pos)->s_sibling; 740 pos = &(*pos)->s_sibling;
867 } 741 }
868 sysfs_addrm_finish(&acxt); 742 sysfs_addrm_finish(&acxt);
@@ -890,90 +764,68 @@ void sysfs_remove_dir(struct kobject * kobj)
890 __sysfs_remove_dir(sd); 764 __sysfs_remove_dir(sd);
891} 765}
892 766
893int sysfs_rename_dir(struct kobject *kobj, struct sysfs_dirent *new_parent_sd, 767int sysfs_rename_dir(struct kobject * kobj, const char *new_name)
894 const char *new_name)
895{ 768{
896 struct sysfs_dirent *sd = kobj->sd; 769 struct sysfs_dirent *sd = kobj->sd;
897 struct dentry *new_parent = NULL; 770 struct dentry *parent = NULL;
898 struct dentry *old_dentry = NULL, *new_dentry = NULL; 771 struct dentry *old_dentry = NULL, *new_dentry = NULL;
899 const char *dup_name = NULL; 772 const char *dup_name = NULL;
900 int error; 773 int error;
901 774
902 /* get dentries */ 775 mutex_lock(&sysfs_rename_mutex);
776
777 error = 0;
778 if (strcmp(sd->s_name, new_name) == 0)
779 goto out; /* nothing to rename */
780
781 /* get the original dentry */
903 old_dentry = sysfs_get_dentry(sd); 782 old_dentry = sysfs_get_dentry(sd);
904 if (IS_ERR(old_dentry)) { 783 if (IS_ERR(old_dentry)) {
905 error = PTR_ERR(old_dentry); 784 error = PTR_ERR(old_dentry);
906 goto out_dput; 785 goto out;
907 }
908
909 new_parent = sysfs_get_dentry(new_parent_sd);
910 if (IS_ERR(new_parent)) {
911 error = PTR_ERR(new_parent);
912 goto out_dput;
913 } 786 }
914 787
915 /* lock new_parent and get dentry for new name */ 788 parent = old_dentry->d_parent;
916 mutex_lock(&new_parent->d_inode->i_mutex);
917 789
918 new_dentry = lookup_one_len(new_name, new_parent, strlen(new_name)); 790 /* lock parent and get dentry for new name */
919 if (IS_ERR(new_dentry)) { 791 mutex_lock(&parent->d_inode->i_mutex);
920 error = PTR_ERR(new_dentry); 792 mutex_lock(&sysfs_mutex);
921 goto out_unlock;
922 }
923 793
924 /* By allowing two different directories with the same 794 error = -EEXIST;
925 * d_parent we allow this routine to move between different 795 if (sysfs_find_dirent(sd->s_parent, new_name))
926 * shadows of the same directory
927 */
928 error = -EINVAL;
929 if (old_dentry->d_parent->d_inode != new_parent->d_inode ||
930 new_dentry->d_parent->d_inode != new_parent->d_inode ||
931 old_dentry == new_dentry)
932 goto out_unlock; 796 goto out_unlock;
933 797
934 error = -EEXIST; 798 error = -ENOMEM;
935 if (new_dentry->d_inode) 799 new_dentry = d_alloc_name(parent, new_name);
800 if (!new_dentry)
936 goto out_unlock; 801 goto out_unlock;
937 802
938 /* rename kobject and sysfs_dirent */ 803 /* rename kobject and sysfs_dirent */
939 error = -ENOMEM; 804 error = -ENOMEM;
940 new_name = dup_name = kstrdup(new_name, GFP_KERNEL); 805 new_name = dup_name = kstrdup(new_name, GFP_KERNEL);
941 if (!new_name) 806 if (!new_name)
942 goto out_drop; 807 goto out_unlock;
943 808
944 error = kobject_set_name(kobj, "%s", new_name); 809 error = kobject_set_name(kobj, "%s", new_name);
945 if (error) 810 if (error)
946 goto out_drop; 811 goto out_unlock;
947
948 mutex_lock(&sysfs_mutex);
949 812
950 dup_name = sd->s_name; 813 dup_name = sd->s_name;
951 sd->s_name = new_name; 814 sd->s_name = new_name;
952 815
953 /* move under the new parent */ 816 /* rename */
954 d_add(new_dentry, NULL); 817 d_add(new_dentry, NULL);
955 d_move(sd->s_dentry, new_dentry); 818 d_move(old_dentry, new_dentry);
956
957 sysfs_unlink_sibling(sd);
958 sysfs_get(new_parent_sd);
959 sysfs_put(sd->s_parent);
960 sd->s_parent = new_parent_sd;
961 sysfs_link_sibling(sd);
962
963 mutex_unlock(&sysfs_mutex);
964 819
965 error = 0; 820 error = 0;
966 goto out_unlock;
967
968 out_drop:
969 d_drop(new_dentry);
970 out_unlock: 821 out_unlock:
971 mutex_unlock(&new_parent->d_inode->i_mutex); 822 mutex_unlock(&sysfs_mutex);
972 out_dput: 823 mutex_unlock(&parent->d_inode->i_mutex);
973 kfree(dup_name); 824 kfree(dup_name);
974 dput(new_parent);
975 dput(old_dentry); 825 dput(old_dentry);
976 dput(new_dentry); 826 dput(new_dentry);
827 out:
828 mutex_unlock(&sysfs_rename_mutex);
977 return error; 829 return error;
978} 830}
979 831
@@ -985,96 +837,69 @@ int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj)
985 struct dentry *old_dentry = NULL, *new_dentry = NULL; 837 struct dentry *old_dentry = NULL, *new_dentry = NULL;
986 int error; 838 int error;
987 839
840 mutex_lock(&sysfs_rename_mutex);
988 BUG_ON(!sd->s_parent); 841 BUG_ON(!sd->s_parent);
989 new_parent_sd = new_parent_kobj->sd ? new_parent_kobj->sd : &sysfs_root; 842 new_parent_sd = new_parent_kobj->sd ? new_parent_kobj->sd : &sysfs_root;
990 843
844 error = 0;
845 if (sd->s_parent == new_parent_sd)
846 goto out; /* nothing to move */
847
991 /* get dentries */ 848 /* get dentries */
992 old_dentry = sysfs_get_dentry(sd); 849 old_dentry = sysfs_get_dentry(sd);
993 if (IS_ERR(old_dentry)) { 850 if (IS_ERR(old_dentry)) {
994 error = PTR_ERR(old_dentry); 851 error = PTR_ERR(old_dentry);
995 goto out_dput; 852 goto out;
996 } 853 }
997 old_parent = sd->s_parent->s_dentry; 854 old_parent = old_dentry->d_parent;
998 855
999 new_parent = sysfs_get_dentry(new_parent_sd); 856 new_parent = sysfs_get_dentry(new_parent_sd);
1000 if (IS_ERR(new_parent)) { 857 if (IS_ERR(new_parent)) {
1001 error = PTR_ERR(new_parent); 858 error = PTR_ERR(new_parent);
1002 goto out_dput; 859 goto out;
1003 } 860 }
1004 861
1005 if (old_parent->d_inode == new_parent->d_inode) {
1006 error = 0;
1007 goto out_dput; /* nothing to move */
1008 }
1009again: 862again:
1010 mutex_lock(&old_parent->d_inode->i_mutex); 863 mutex_lock(&old_parent->d_inode->i_mutex);
1011 if (!mutex_trylock(&new_parent->d_inode->i_mutex)) { 864 if (!mutex_trylock(&new_parent->d_inode->i_mutex)) {
1012 mutex_unlock(&old_parent->d_inode->i_mutex); 865 mutex_unlock(&old_parent->d_inode->i_mutex);
1013 goto again; 866 goto again;
1014 } 867 }
868 mutex_lock(&sysfs_mutex);
1015 869
1016 new_dentry = lookup_one_len(kobj->name, new_parent, strlen(kobj->name)); 870 error = -EEXIST;
1017 if (IS_ERR(new_dentry)) { 871 if (sysfs_find_dirent(new_parent_sd, sd->s_name))
1018 error = PTR_ERR(new_dentry);
1019 goto out_unlock; 872 goto out_unlock;
1020 } else 873
1021 error = 0; 874 error = -ENOMEM;
875 new_dentry = d_alloc_name(new_parent, sd->s_name);
876 if (!new_dentry)
877 goto out_unlock;
878
879 error = 0;
1022 d_add(new_dentry, NULL); 880 d_add(new_dentry, NULL);
1023 d_move(sd->s_dentry, new_dentry); 881 d_move(old_dentry, new_dentry);
1024 dput(new_dentry); 882 dput(new_dentry);
1025 883
1026 /* Remove from old parent's list and insert into new parent's list. */ 884 /* Remove from old parent's list and insert into new parent's list. */
1027 mutex_lock(&sysfs_mutex);
1028
1029 sysfs_unlink_sibling(sd); 885 sysfs_unlink_sibling(sd);
1030 sysfs_get(new_parent_sd); 886 sysfs_get(new_parent_sd);
1031 sysfs_put(sd->s_parent); 887 sysfs_put(sd->s_parent);
1032 sd->s_parent = new_parent_sd; 888 sd->s_parent = new_parent_sd;
1033 sysfs_link_sibling(sd); 889 sysfs_link_sibling(sd);
1034 890
1035 mutex_unlock(&sysfs_mutex);
1036
1037 out_unlock: 891 out_unlock:
892 mutex_unlock(&sysfs_mutex);
1038 mutex_unlock(&new_parent->d_inode->i_mutex); 893 mutex_unlock(&new_parent->d_inode->i_mutex);
1039 mutex_unlock(&old_parent->d_inode->i_mutex); 894 mutex_unlock(&old_parent->d_inode->i_mutex);
1040 out_dput: 895 out:
1041 dput(new_parent); 896 dput(new_parent);
1042 dput(old_dentry); 897 dput(old_dentry);
1043 dput(new_dentry); 898 dput(new_dentry);
899 mutex_unlock(&sysfs_rename_mutex);
1044 return error; 900 return error;
1045} 901}
1046 902
1047static int sysfs_dir_open(struct inode *inode, struct file *file)
1048{
1049 struct dentry * dentry = file->f_path.dentry;
1050 struct sysfs_dirent * parent_sd = dentry->d_fsdata;
1051 struct sysfs_dirent * sd;
1052
1053 sd = sysfs_new_dirent("_DIR_", 0, 0);
1054 if (sd) {
1055 mutex_lock(&sysfs_mutex);
1056 sd->s_parent = sysfs_get(parent_sd);
1057 sysfs_link_sibling(sd);
1058 mutex_unlock(&sysfs_mutex);
1059 }
1060
1061 file->private_data = sd;
1062 return sd ? 0 : -ENOMEM;
1063}
1064
1065static int sysfs_dir_close(struct inode *inode, struct file *file)
1066{
1067 struct sysfs_dirent * cursor = file->private_data;
1068
1069 mutex_lock(&sysfs_mutex);
1070 sysfs_unlink_sibling(cursor);
1071 mutex_unlock(&sysfs_mutex);
1072
1073 release_sysfs_dirent(cursor);
1074
1075 return 0;
1076}
1077
1078/* Relationship between s_mode and the DT_xxx types */ 903/* Relationship between s_mode and the DT_xxx types */
1079static inline unsigned char dt_type(struct sysfs_dirent *sd) 904static inline unsigned char dt_type(struct sysfs_dirent *sd)
1080{ 905{
@@ -1085,232 +910,51 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
1085{ 910{
1086 struct dentry *dentry = filp->f_path.dentry; 911 struct dentry *dentry = filp->f_path.dentry;
1087 struct sysfs_dirent * parent_sd = dentry->d_fsdata; 912 struct sysfs_dirent * parent_sd = dentry->d_fsdata;
1088 struct sysfs_dirent *cursor = filp->private_data; 913 struct sysfs_dirent *pos;
1089 struct sysfs_dirent **pos;
1090 ino_t ino; 914 ino_t ino;
1091 int i = filp->f_pos;
1092 915
1093 switch (i) { 916 if (filp->f_pos == 0) {
1094 case 0: 917 ino = parent_sd->s_ino;
1095 ino = parent_sd->s_ino; 918 if (filldir(dirent, ".", 1, filp->f_pos, ino, DT_DIR) == 0)
1096 if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
1097 break;
1098 filp->f_pos++; 919 filp->f_pos++;
1099 i++; 920 }
1100 /* fallthrough */ 921 if (filp->f_pos == 1) {
1101 case 1: 922 if (parent_sd->s_parent)
1102 if (parent_sd->s_parent) 923 ino = parent_sd->s_parent->s_ino;
1103 ino = parent_sd->s_parent->s_ino; 924 else
1104 else 925 ino = parent_sd->s_ino;
1105 ino = parent_sd->s_ino; 926 if (filldir(dirent, "..", 2, filp->f_pos, ino, DT_DIR) == 0)
1106 if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
1107 break;
1108 filp->f_pos++; 927 filp->f_pos++;
1109 i++; 928 }
1110 /* fallthrough */ 929 if ((filp->f_pos > 1) && (filp->f_pos < INT_MAX)) {
1111 default: 930 mutex_lock(&sysfs_mutex);
1112 mutex_lock(&sysfs_mutex);
1113
1114 pos = &parent_sd->s_children;
1115 while (*pos != cursor)
1116 pos = &(*pos)->s_sibling;
1117
1118 /* unlink cursor */
1119 *pos = cursor->s_sibling;
1120
1121 if (filp->f_pos == 2)
1122 pos = &parent_sd->s_children;
1123
1124 for ( ; *pos; pos = &(*pos)->s_sibling) {
1125 struct sysfs_dirent *next = *pos;
1126 const char * name;
1127 int len;
1128
1129 if (!sysfs_type(next))
1130 continue;
1131
1132 name = next->s_name;
1133 len = strlen(name);
1134 ino = next->s_ino;
1135
1136 if (filldir(dirent, name, len, filp->f_pos, ino,
1137 dt_type(next)) < 0)
1138 break;
1139 931
1140 filp->f_pos++; 932 /* Skip the dentries we have already reported */
1141 } 933 pos = parent_sd->s_dir.children;
934 while (pos && (filp->f_pos > pos->s_ino))
935 pos = pos->s_sibling;
1142 936
1143 /* put cursor back in */ 937 for ( ; pos; pos = pos->s_sibling) {
1144 cursor->s_sibling = *pos; 938 const char * name;
1145 *pos = cursor; 939 int len;
1146 940
1147 mutex_unlock(&sysfs_mutex); 941 name = pos->s_name;
1148 } 942 len = strlen(name);
1149 return 0; 943 filp->f_pos = ino = pos->s_ino;
1150}
1151
1152static loff_t sysfs_dir_lseek(struct file * file, loff_t offset, int origin)
1153{
1154 struct dentry * dentry = file->f_path.dentry;
1155 944
1156 switch (origin) { 945 if (filldir(dirent, name, len, filp->f_pos, ino,
1157 case 1: 946 dt_type(pos)) < 0)
1158 offset += file->f_pos;
1159 case 0:
1160 if (offset >= 0)
1161 break; 947 break;
1162 default:
1163 return -EINVAL;
1164 }
1165 if (offset != file->f_pos) {
1166 mutex_lock(&sysfs_mutex);
1167
1168 file->f_pos = offset;
1169 if (file->f_pos >= 2) {
1170 struct sysfs_dirent *sd = dentry->d_fsdata;
1171 struct sysfs_dirent *cursor = file->private_data;
1172 struct sysfs_dirent **pos;
1173 loff_t n = file->f_pos - 2;
1174
1175 sysfs_unlink_sibling(cursor);
1176
1177 pos = &sd->s_children;
1178 while (n && *pos) {
1179 struct sysfs_dirent *next = *pos;
1180 if (sysfs_type(next))
1181 n--;
1182 pos = &(*pos)->s_sibling;
1183 }
1184
1185 cursor->s_sibling = *pos;
1186 *pos = cursor;
1187 } 948 }
1188 949 if (!pos)
950 filp->f_pos = INT_MAX;
1189 mutex_unlock(&sysfs_mutex); 951 mutex_unlock(&sysfs_mutex);
1190 } 952 }
1191
1192 return offset;
1193}
1194
1195
1196/**
1197 * sysfs_make_shadowed_dir - Setup so a directory can be shadowed
1198 * @kobj: object we're creating shadow of.
1199 */
1200
1201int sysfs_make_shadowed_dir(struct kobject *kobj,
1202 void * (*follow_link)(struct dentry *, struct nameidata *))
1203{
1204 struct dentry *dentry;
1205 struct inode *inode;
1206 struct inode_operations *i_op;
1207
1208 /* get dentry for @kobj->sd, dentry of a shadowed dir is pinned */
1209 dentry = sysfs_get_dentry(kobj->sd);
1210 if (IS_ERR(dentry))
1211 return PTR_ERR(dentry);
1212
1213 inode = dentry->d_inode;
1214 if (inode->i_op != &sysfs_dir_inode_operations) {
1215 dput(dentry);
1216 return -EINVAL;
1217 }
1218
1219 i_op = kmalloc(sizeof(*i_op), GFP_KERNEL);
1220 if (!i_op)
1221 return -ENOMEM;
1222
1223 memcpy(i_op, &sysfs_dir_inode_operations, sizeof(*i_op));
1224 i_op->follow_link = follow_link;
1225
1226 /* Locking of inode->i_op?
1227 * Since setting i_op is a single word write and they
1228 * are atomic we should be ok here.
1229 */
1230 inode->i_op = i_op;
1231 return 0; 953 return 0;
1232} 954}
1233 955
1234/**
1235 * sysfs_create_shadow_dir - create a shadow directory for an object.
1236 * @kobj: object we're creating directory for.
1237 *
1238 * sysfs_make_shadowed_dir must already have been called on this
1239 * directory.
1240 */
1241
1242struct sysfs_dirent *sysfs_create_shadow_dir(struct kobject *kobj)
1243{
1244 struct sysfs_dirent *parent_sd = kobj->sd->s_parent;
1245 struct dentry *dir, *parent, *shadow;
1246 struct inode *inode;
1247 struct sysfs_dirent *sd;
1248 struct sysfs_addrm_cxt acxt;
1249
1250 dir = sysfs_get_dentry(kobj->sd);
1251 if (IS_ERR(dir)) {
1252 sd = (void *)dir;
1253 goto out;
1254 }
1255 parent = dir->d_parent;
1256
1257 inode = dir->d_inode;
1258 sd = ERR_PTR(-EINVAL);
1259 if (!sysfs_is_shadowed_inode(inode))
1260 goto out_dput;
1261
1262 shadow = d_alloc(parent, &dir->d_name);
1263 if (!shadow)
1264 goto nomem;
1265
1266 sd = sysfs_new_dirent("_SHADOW_", inode->i_mode, SYSFS_DIR);
1267 if (!sd)
1268 goto nomem;
1269 sd->s_elem.dir.kobj = kobj;
1270
1271 sysfs_addrm_start(&acxt, parent_sd);
1272
1273 /* add but don't link into children list */
1274 sysfs_add_one(&acxt, sd);
1275
1276 /* attach and instantiate dentry */
1277 sysfs_attach_dentry(sd, shadow);
1278 d_instantiate(shadow, igrab(inode));
1279 inc_nlink(inode); /* tj: synchronization? */
1280
1281 sysfs_addrm_finish(&acxt);
1282
1283 dget(shadow); /* Extra count - pin the dentry in core */
1284
1285 goto out_dput;
1286
1287 nomem:
1288 dput(shadow);
1289 sd = ERR_PTR(-ENOMEM);
1290 out_dput:
1291 dput(dir);
1292 out:
1293 return sd;
1294}
1295
1296/**
1297 * sysfs_remove_shadow_dir - remove an object's directory.
1298 * @shadow_sd: sysfs_dirent of shadow directory
1299 *
1300 * The only thing special about this is that we remove any files in
1301 * the directory before we remove the directory, and we've inlined
1302 * what used to be sysfs_rmdir() below, instead of calling separately.
1303 */
1304
1305void sysfs_remove_shadow_dir(struct sysfs_dirent *shadow_sd)
1306{
1307 __sysfs_remove_dir(shadow_sd);
1308}
1309 956
1310const struct file_operations sysfs_dir_operations = { 957const struct file_operations sysfs_dir_operations = {
1311 .open = sysfs_dir_open,
1312 .release = sysfs_dir_close,
1313 .llseek = sysfs_dir_lseek,
1314 .read = generic_read_dir, 958 .read = generic_read_dir,
1315 .readdir = sysfs_readdir, 959 .readdir = sysfs_readdir,
1316}; 960};
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 3e1cc062a740..d3be1e7fb48b 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -1,15 +1,22 @@
1/* 1/*
2 * file.c - operations for regular (text) files. 2 * fs/sysfs/file.c - sysfs regular (text) file implementation
3 *
4 * Copyright (c) 2001-3 Patrick Mochel
5 * Copyright (c) 2007 SUSE Linux Products GmbH
6 * Copyright (c) 2007 Tejun Heo <teheo@suse.de>
7 *
8 * This file is released under the GPLv2.
9 *
10 * Please see Documentation/filesystems/sysfs.txt for more information.
3 */ 11 */
4 12
5#include <linux/module.h> 13#include <linux/module.h>
6#include <linux/fsnotify.h>
7#include <linux/kobject.h> 14#include <linux/kobject.h>
8#include <linux/namei.h> 15#include <linux/namei.h>
9#include <linux/poll.h> 16#include <linux/poll.h>
10#include <linux/list.h> 17#include <linux/list.h>
18#include <linux/mutex.h>
11#include <asm/uaccess.h> 19#include <asm/uaccess.h>
12#include <asm/semaphore.h>
13 20
14#include "sysfs.h" 21#include "sysfs.h"
15 22
@@ -50,14 +57,33 @@ static struct sysfs_ops subsys_sysfs_ops = {
50 .store = subsys_attr_store, 57 .store = subsys_attr_store,
51}; 58};
52 59
60/*
61 * There's one sysfs_buffer for each open file and one
62 * sysfs_open_dirent for each sysfs_dirent with one or more open
63 * files.
64 *
65 * filp->private_data points to sysfs_buffer and
66 * sysfs_dirent->s_attr.open points to sysfs_open_dirent. s_attr.open
67 * is protected by sysfs_open_dirent_lock.
68 */
69static spinlock_t sysfs_open_dirent_lock = SPIN_LOCK_UNLOCKED;
70
71struct sysfs_open_dirent {
72 atomic_t refcnt;
73 atomic_t event;
74 wait_queue_head_t poll;
75 struct list_head buffers; /* goes through sysfs_buffer.list */
76};
77
53struct sysfs_buffer { 78struct sysfs_buffer {
54 size_t count; 79 size_t count;
55 loff_t pos; 80 loff_t pos;
56 char * page; 81 char * page;
57 struct sysfs_ops * ops; 82 struct sysfs_ops * ops;
58 struct semaphore sem; 83 struct mutex mutex;
59 int needs_read_fill; 84 int needs_read_fill;
60 int event; 85 int event;
86 struct list_head list;
61}; 87};
62 88
63/** 89/**
@@ -74,7 +100,7 @@ struct sysfs_buffer {
74static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer) 100static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer)
75{ 101{
76 struct sysfs_dirent *attr_sd = dentry->d_fsdata; 102 struct sysfs_dirent *attr_sd = dentry->d_fsdata;
77 struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj; 103 struct kobject *kobj = attr_sd->s_parent->s_dir.kobj;
78 struct sysfs_ops * ops = buffer->ops; 104 struct sysfs_ops * ops = buffer->ops;
79 int ret = 0; 105 int ret = 0;
80 ssize_t count; 106 ssize_t count;
@@ -88,8 +114,8 @@ static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer
88 if (!sysfs_get_active_two(attr_sd)) 114 if (!sysfs_get_active_two(attr_sd))
89 return -ENODEV; 115 return -ENODEV;
90 116
91 buffer->event = atomic_read(&attr_sd->s_event); 117 buffer->event = atomic_read(&attr_sd->s_attr.open->event);
92 count = ops->show(kobj, attr_sd->s_elem.attr.attr, buffer->page); 118 count = ops->show(kobj, attr_sd->s_attr.attr, buffer->page);
93 119
94 sysfs_put_active_two(attr_sd); 120 sysfs_put_active_two(attr_sd);
95 121
@@ -128,7 +154,7 @@ sysfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos)
128 struct sysfs_buffer * buffer = file->private_data; 154 struct sysfs_buffer * buffer = file->private_data;
129 ssize_t retval = 0; 155 ssize_t retval = 0;
130 156
131 down(&buffer->sem); 157 mutex_lock(&buffer->mutex);
132 if (buffer->needs_read_fill) { 158 if (buffer->needs_read_fill) {
133 retval = fill_read_buffer(file->f_path.dentry,buffer); 159 retval = fill_read_buffer(file->f_path.dentry,buffer);
134 if (retval) 160 if (retval)
@@ -139,7 +165,7 @@ sysfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos)
139 retval = simple_read_from_buffer(buf, count, ppos, buffer->page, 165 retval = simple_read_from_buffer(buf, count, ppos, buffer->page,
140 buffer->count); 166 buffer->count);
141out: 167out:
142 up(&buffer->sem); 168 mutex_unlock(&buffer->mutex);
143 return retval; 169 return retval;
144} 170}
145 171
@@ -189,7 +215,7 @@ static int
189flush_write_buffer(struct dentry * dentry, struct sysfs_buffer * buffer, size_t count) 215flush_write_buffer(struct dentry * dentry, struct sysfs_buffer * buffer, size_t count)
190{ 216{
191 struct sysfs_dirent *attr_sd = dentry->d_fsdata; 217 struct sysfs_dirent *attr_sd = dentry->d_fsdata;
192 struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj; 218 struct kobject *kobj = attr_sd->s_parent->s_dir.kobj;
193 struct sysfs_ops * ops = buffer->ops; 219 struct sysfs_ops * ops = buffer->ops;
194 int rc; 220 int rc;
195 221
@@ -197,7 +223,7 @@ flush_write_buffer(struct dentry * dentry, struct sysfs_buffer * buffer, size_t
197 if (!sysfs_get_active_two(attr_sd)) 223 if (!sysfs_get_active_two(attr_sd))
198 return -ENODEV; 224 return -ENODEV;
199 225
200 rc = ops->store(kobj, attr_sd->s_elem.attr.attr, buffer->page, count); 226 rc = ops->store(kobj, attr_sd->s_attr.attr, buffer->page, count);
201 227
202 sysfs_put_active_two(attr_sd); 228 sysfs_put_active_two(attr_sd);
203 229
@@ -228,20 +254,102 @@ sysfs_write_file(struct file *file, const char __user *buf, size_t count, loff_t
228 struct sysfs_buffer * buffer = file->private_data; 254 struct sysfs_buffer * buffer = file->private_data;
229 ssize_t len; 255 ssize_t len;
230 256
231 down(&buffer->sem); 257 mutex_lock(&buffer->mutex);
232 len = fill_write_buffer(buffer, buf, count); 258 len = fill_write_buffer(buffer, buf, count);
233 if (len > 0) 259 if (len > 0)
234 len = flush_write_buffer(file->f_path.dentry, buffer, len); 260 len = flush_write_buffer(file->f_path.dentry, buffer, len);
235 if (len > 0) 261 if (len > 0)
236 *ppos += len; 262 *ppos += len;
237 up(&buffer->sem); 263 mutex_unlock(&buffer->mutex);
238 return len; 264 return len;
239} 265}
240 266
267/**
268 * sysfs_get_open_dirent - get or create sysfs_open_dirent
269 * @sd: target sysfs_dirent
270 * @buffer: sysfs_buffer for this instance of open
271 *
272 * If @sd->s_attr.open exists, increment its reference count;
273 * otherwise, create one. @buffer is chained to the buffers
274 * list.
275 *
276 * LOCKING:
277 * Kernel thread context (may sleep).
278 *
279 * RETURNS:
280 * 0 on success, -errno on failure.
281 */
282static int sysfs_get_open_dirent(struct sysfs_dirent *sd,
283 struct sysfs_buffer *buffer)
284{
285 struct sysfs_open_dirent *od, *new_od = NULL;
286
287 retry:
288 spin_lock(&sysfs_open_dirent_lock);
289
290 if (!sd->s_attr.open && new_od) {
291 sd->s_attr.open = new_od;
292 new_od = NULL;
293 }
294
295 od = sd->s_attr.open;
296 if (od) {
297 atomic_inc(&od->refcnt);
298 list_add_tail(&buffer->list, &od->buffers);
299 }
300
301 spin_unlock(&sysfs_open_dirent_lock);
302
303 if (od) {
304 kfree(new_od);
305 return 0;
306 }
307
308 /* not there, initialize a new one and retry */
309 new_od = kmalloc(sizeof(*new_od), GFP_KERNEL);
310 if (!new_od)
311 return -ENOMEM;
312
313 atomic_set(&new_od->refcnt, 0);
314 atomic_set(&new_od->event, 1);
315 init_waitqueue_head(&new_od->poll);
316 INIT_LIST_HEAD(&new_od->buffers);
317 goto retry;
318}
319
320/**
321 * sysfs_put_open_dirent - put sysfs_open_dirent
322 * @sd: target sysfs_dirent
323 * @buffer: associated sysfs_buffer
324 *
325 * Put @sd->s_attr.open and unlink @buffer from the buffers list.
326 * If reference count reaches zero, disassociate and free it.
327 *
328 * LOCKING:
329 * None.
330 */
331static void sysfs_put_open_dirent(struct sysfs_dirent *sd,
332 struct sysfs_buffer *buffer)
333{
334 struct sysfs_open_dirent *od = sd->s_attr.open;
335
336 spin_lock(&sysfs_open_dirent_lock);
337
338 list_del(&buffer->list);
339 if (atomic_dec_and_test(&od->refcnt))
340 sd->s_attr.open = NULL;
341 else
342 od = NULL;
343
344 spin_unlock(&sysfs_open_dirent_lock);
345
346 kfree(od);
347}
348
241static int sysfs_open_file(struct inode *inode, struct file *file) 349static int sysfs_open_file(struct inode *inode, struct file *file)
242{ 350{
243 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; 351 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
244 struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj; 352 struct kobject *kobj = attr_sd->s_parent->s_dir.kobj;
245 struct sysfs_buffer * buffer; 353 struct sysfs_buffer * buffer;
246 struct sysfs_ops * ops = NULL; 354 struct sysfs_ops * ops = NULL;
247 int error; 355 int error;
@@ -294,33 +402,38 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
294 if (!buffer) 402 if (!buffer)
295 goto err_out; 403 goto err_out;
296 404
297 init_MUTEX(&buffer->sem); 405 mutex_init(&buffer->mutex);
298 buffer->needs_read_fill = 1; 406 buffer->needs_read_fill = 1;
299 buffer->ops = ops; 407 buffer->ops = ops;
300 file->private_data = buffer; 408 file->private_data = buffer;
301 409
302 /* open succeeded, put active references and pin attr_sd */ 410 /* make sure we have open dirent struct */
411 error = sysfs_get_open_dirent(attr_sd, buffer);
412 if (error)
413 goto err_free;
414
415 /* open succeeded, put active references */
303 sysfs_put_active_two(attr_sd); 416 sysfs_put_active_two(attr_sd);
304 sysfs_get(attr_sd);
305 return 0; 417 return 0;
306 418
419 err_free:
420 kfree(buffer);
307 err_out: 421 err_out:
308 sysfs_put_active_two(attr_sd); 422 sysfs_put_active_two(attr_sd);
309 return error; 423 return error;
310} 424}
311 425
312static int sysfs_release(struct inode * inode, struct file * filp) 426static int sysfs_release(struct inode *inode, struct file *filp)
313{ 427{
314 struct sysfs_dirent *attr_sd = filp->f_path.dentry->d_fsdata; 428 struct sysfs_dirent *sd = filp->f_path.dentry->d_fsdata;
315 struct sysfs_buffer *buffer = filp->private_data; 429 struct sysfs_buffer *buffer = filp->private_data;
316 430
317 sysfs_put(attr_sd); 431 sysfs_put_open_dirent(sd, buffer);
432
433 if (buffer->page)
434 free_page((unsigned long)buffer->page);
435 kfree(buffer);
318 436
319 if (buffer) {
320 if (buffer->page)
321 free_page((unsigned long)buffer->page);
322 kfree(buffer);
323 }
324 return 0; 437 return 0;
325} 438}
326 439
@@ -335,24 +448,24 @@ static int sysfs_release(struct inode * inode, struct file * filp)
335 * again will not get new data, or reset the state of 'poll'. 448 * again will not get new data, or reset the state of 'poll'.
336 * Reminder: this only works for attributes which actively support 449 * Reminder: this only works for attributes which actively support
337 * it, and it is not possible to test an attribute from userspace 450 * it, and it is not possible to test an attribute from userspace
338 * to see if it supports poll (Nether 'poll' or 'select' return 451 * to see if it supports poll (Neither 'poll' nor 'select' return
339 * an appropriate error code). When in doubt, set a suitable timeout value. 452 * an appropriate error code). When in doubt, set a suitable timeout value.
340 */ 453 */
341static unsigned int sysfs_poll(struct file *filp, poll_table *wait) 454static unsigned int sysfs_poll(struct file *filp, poll_table *wait)
342{ 455{
343 struct sysfs_buffer * buffer = filp->private_data; 456 struct sysfs_buffer * buffer = filp->private_data;
344 struct sysfs_dirent *attr_sd = filp->f_path.dentry->d_fsdata; 457 struct sysfs_dirent *attr_sd = filp->f_path.dentry->d_fsdata;
345 struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj; 458 struct sysfs_open_dirent *od = attr_sd->s_attr.open;
346 459
347 /* need parent for the kobj, grab both */ 460 /* need parent for the kobj, grab both */
348 if (!sysfs_get_active_two(attr_sd)) 461 if (!sysfs_get_active_two(attr_sd))
349 goto trigger; 462 goto trigger;
350 463
351 poll_wait(filp, &kobj->poll, wait); 464 poll_wait(filp, &od->poll, wait);
352 465
353 sysfs_put_active_two(attr_sd); 466 sysfs_put_active_two(attr_sd);
354 467
355 if (buffer->event != atomic_read(&attr_sd->s_event)) 468 if (buffer->event != atomic_read(&od->event))
356 goto trigger; 469 goto trigger;
357 470
358 return 0; 471 return 0;
@@ -373,8 +486,17 @@ void sysfs_notify(struct kobject *k, char *dir, char *attr)
373 if (sd && attr) 486 if (sd && attr)
374 sd = sysfs_find_dirent(sd, attr); 487 sd = sysfs_find_dirent(sd, attr);
375 if (sd) { 488 if (sd) {
376 atomic_inc(&sd->s_event); 489 struct sysfs_open_dirent *od;
377 wake_up_interruptible(&k->poll); 490
491 spin_lock(&sysfs_open_dirent_lock);
492
493 od = sd->s_attr.open;
494 if (od) {
495 atomic_inc(&od->event);
496 wake_up_interruptible(&od->poll);
497 }
498
499 spin_unlock(&sysfs_open_dirent_lock);
378 } 500 }
379 501
380 mutex_unlock(&sysfs_mutex); 502 mutex_unlock(&sysfs_mutex);
@@ -397,25 +519,21 @@ int sysfs_add_file(struct sysfs_dirent *dir_sd, const struct attribute *attr,
397 umode_t mode = (attr->mode & S_IALLUGO) | S_IFREG; 519 umode_t mode = (attr->mode & S_IALLUGO) | S_IFREG;
398 struct sysfs_addrm_cxt acxt; 520 struct sysfs_addrm_cxt acxt;
399 struct sysfs_dirent *sd; 521 struct sysfs_dirent *sd;
522 int rc;
400 523
401 sd = sysfs_new_dirent(attr->name, mode, type); 524 sd = sysfs_new_dirent(attr->name, mode, type);
402 if (!sd) 525 if (!sd)
403 return -ENOMEM; 526 return -ENOMEM;
404 sd->s_elem.attr.attr = (void *)attr; 527 sd->s_attr.attr = (void *)attr;
405 528
406 sysfs_addrm_start(&acxt, dir_sd); 529 sysfs_addrm_start(&acxt, dir_sd);
530 rc = sysfs_add_one(&acxt, sd);
531 sysfs_addrm_finish(&acxt);
407 532
408 if (!sysfs_find_dirent(dir_sd, attr->name)) { 533 if (rc)
409 sysfs_add_one(&acxt, sd);
410 sysfs_link_sibling(sd);
411 }
412
413 if (!sysfs_addrm_finish(&acxt)) {
414 sysfs_put(sd); 534 sysfs_put(sd);
415 return -EEXIST;
416 }
417 535
418 return 0; 536 return rc;
419} 537}
420 538
421 539
@@ -457,42 +575,6 @@ int sysfs_add_file_to_group(struct kobject *kobj,
457} 575}
458EXPORT_SYMBOL_GPL(sysfs_add_file_to_group); 576EXPORT_SYMBOL_GPL(sysfs_add_file_to_group);
459 577
460
461/**
462 * sysfs_update_file - update the modified timestamp on an object attribute.
463 * @kobj: object we're acting for.
464 * @attr: attribute descriptor.
465 */
466int sysfs_update_file(struct kobject * kobj, const struct attribute * attr)
467{
468 struct sysfs_dirent *victim_sd = NULL;
469 struct dentry *victim = NULL;
470 int rc;
471
472 rc = -ENOENT;
473 victim_sd = sysfs_get_dirent(kobj->sd, attr->name);
474 if (!victim_sd)
475 goto out;
476
477 victim = sysfs_get_dentry(victim_sd);
478 if (IS_ERR(victim)) {
479 rc = PTR_ERR(victim);
480 victim = NULL;
481 goto out;
482 }
483
484 mutex_lock(&victim->d_inode->i_mutex);
485 victim->d_inode->i_mtime = CURRENT_TIME;
486 fsnotify_modify(victim);
487 mutex_unlock(&victim->d_inode->i_mutex);
488 rc = 0;
489 out:
490 dput(victim);
491 sysfs_put(victim_sd);
492 return rc;
493}
494
495
496/** 578/**
497 * sysfs_chmod_file - update the modified mode value on an object attribute. 579 * sysfs_chmod_file - update the modified mode value on an object attribute.
498 * @kobj: object we're acting for. 580 * @kobj: object we're acting for.
@@ -513,7 +595,9 @@ int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode)
513 if (!victim_sd) 595 if (!victim_sd)
514 goto out; 596 goto out;
515 597
598 mutex_lock(&sysfs_rename_mutex);
516 victim = sysfs_get_dentry(victim_sd); 599 victim = sysfs_get_dentry(victim_sd);
600 mutex_unlock(&sysfs_rename_mutex);
517 if (IS_ERR(victim)) { 601 if (IS_ERR(victim)) {
518 rc = PTR_ERR(victim); 602 rc = PTR_ERR(victim);
519 victim = NULL; 603 victim = NULL;
@@ -521,10 +605,19 @@ int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode)
521 } 605 }
522 606
523 inode = victim->d_inode; 607 inode = victim->d_inode;
608
524 mutex_lock(&inode->i_mutex); 609 mutex_lock(&inode->i_mutex);
610
525 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); 611 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
526 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; 612 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
527 rc = notify_change(victim, &newattrs); 613 rc = notify_change(victim, &newattrs);
614
615 if (rc == 0) {
616 mutex_lock(&sysfs_mutex);
617 victim_sd->s_mode = newattrs.ia_mode;
618 mutex_unlock(&sysfs_mutex);
619 }
620
528 mutex_unlock(&inode->i_mutex); 621 mutex_unlock(&inode->i_mutex);
529 out: 622 out:
530 dput(victim); 623 dput(victim);
@@ -632,4 +725,3 @@ EXPORT_SYMBOL_GPL(sysfs_schedule_callback);
632 725
633EXPORT_SYMBOL_GPL(sysfs_create_file); 726EXPORT_SYMBOL_GPL(sysfs_create_file);
634EXPORT_SYMBOL_GPL(sysfs_remove_file); 727EXPORT_SYMBOL_GPL(sysfs_remove_file);
635EXPORT_SYMBOL_GPL(sysfs_update_file);
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index f318b73c790c..d1972374655a 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -13,8 +13,6 @@
13#include <linux/dcache.h> 13#include <linux/dcache.h>
14#include <linux/namei.h> 14#include <linux/namei.h>
15#include <linux/err.h> 15#include <linux/err.h>
16#include <linux/fs.h>
17#include <asm/semaphore.h>
18#include "sysfs.h" 16#include "sysfs.h"
19 17
20 18
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 10d1b52899f1..9236635111f4 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -1,7 +1,11 @@
1/* 1/*
2 * inode.c - basic inode and dentry operations. 2 * fs/sysfs/inode.c - basic sysfs inode and dentry operations
3 * 3 *
4 * sysfs is Copyright (c) 2001-3 Patrick Mochel 4 * Copyright (c) 2001-3 Patrick Mochel
5 * Copyright (c) 2007 SUSE Linux Products GmbH
6 * Copyright (c) 2007 Tejun Heo <teheo@suse.de>
7 *
8 * This file is released under the GPLv2.
5 * 9 *
6 * Please see Documentation/filesystems/sysfs.txt for more information. 10 * Please see Documentation/filesystems/sysfs.txt for more information.
7 */ 11 */
@@ -14,7 +18,6 @@
14#include <linux/capability.h> 18#include <linux/capability.h>
15#include <linux/errno.h> 19#include <linux/errno.h>
16#include <linux/sched.h> 20#include <linux/sched.h>
17#include <asm/semaphore.h>
18#include "sysfs.h" 21#include "sysfs.h"
19 22
20extern struct super_block * sysfs_sb; 23extern struct super_block * sysfs_sb;
@@ -34,16 +37,6 @@ static const struct inode_operations sysfs_inode_operations ={
34 .setattr = sysfs_setattr, 37 .setattr = sysfs_setattr,
35}; 38};
36 39
37void sysfs_delete_inode(struct inode *inode)
38{
39 /* Free the shadowed directory inode operations */
40 if (sysfs_is_shadowed_inode(inode)) {
41 kfree(inode->i_op);
42 inode->i_op = NULL;
43 }
44 return generic_delete_inode(inode);
45}
46
47int sysfs_setattr(struct dentry * dentry, struct iattr * iattr) 40int sysfs_setattr(struct dentry * dentry, struct iattr * iattr)
48{ 41{
49 struct inode * inode = dentry->d_inode; 42 struct inode * inode = dentry->d_inode;
@@ -133,8 +126,22 @@ static inline void set_inode_attr(struct inode * inode, struct iattr * iattr)
133 */ 126 */
134static struct lock_class_key sysfs_inode_imutex_key; 127static struct lock_class_key sysfs_inode_imutex_key;
135 128
129static int sysfs_count_nlink(struct sysfs_dirent *sd)
130{
131 struct sysfs_dirent *child;
132 int nr = 0;
133
134 for (child = sd->s_dir.children; child; child = child->s_sibling)
135 if (sysfs_type(child) == SYSFS_DIR)
136 nr++;
137
138 return nr + 2;
139}
140
136static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode) 141static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode)
137{ 142{
143 struct bin_attribute *bin_attr;
144
138 inode->i_blocks = 0; 145 inode->i_blocks = 0;
139 inode->i_mapping->a_ops = &sysfs_aops; 146 inode->i_mapping->a_ops = &sysfs_aops;
140 inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info; 147 inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info;
@@ -150,6 +157,32 @@ static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode)
150 set_inode_attr(inode, sd->s_iattr); 157 set_inode_attr(inode, sd->s_iattr);
151 } else 158 } else
152 set_default_inode_attr(inode, sd->s_mode); 159 set_default_inode_attr(inode, sd->s_mode);
160
161
162 /* initialize inode according to type */
163 switch (sysfs_type(sd)) {
164 case SYSFS_DIR:
165 inode->i_op = &sysfs_dir_inode_operations;
166 inode->i_fop = &sysfs_dir_operations;
167 inode->i_nlink = sysfs_count_nlink(sd);
168 break;
169 case SYSFS_KOBJ_ATTR:
170 inode->i_size = PAGE_SIZE;
171 inode->i_fop = &sysfs_file_operations;
172 break;
173 case SYSFS_KOBJ_BIN_ATTR:
174 bin_attr = sd->s_bin_attr.bin_attr;
175 inode->i_size = bin_attr->size;
176 inode->i_fop = &bin_fops;
177 break;
178 case SYSFS_KOBJ_LINK:
179 inode->i_op = &sysfs_symlink_inode_operations;
180 break;
181 default:
182 BUG();
183 }
184
185 unlock_new_inode(inode);
153} 186}
154 187
155/** 188/**
@@ -177,50 +210,24 @@ struct inode * sysfs_get_inode(struct sysfs_dirent *sd)
177 return inode; 210 return inode;
178} 211}
179 212
180/**
181 * sysfs_instantiate - instantiate dentry
182 * @dentry: dentry to be instantiated
183 * @inode: inode associated with @sd
184 *
185 * Unlock @inode if locked and instantiate @dentry with @inode.
186 *
187 * LOCKING:
188 * None.
189 */
190void sysfs_instantiate(struct dentry *dentry, struct inode *inode)
191{
192 BUG_ON(!dentry || dentry->d_inode);
193
194 if (inode->i_state & I_NEW)
195 unlock_new_inode(inode);
196
197 d_instantiate(dentry, inode);
198}
199
200int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name) 213int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name)
201{ 214{
202 struct sysfs_addrm_cxt acxt; 215 struct sysfs_addrm_cxt acxt;
203 struct sysfs_dirent **pos, *sd; 216 struct sysfs_dirent *sd;
204 217
205 if (!dir_sd) 218 if (!dir_sd)
206 return -ENOENT; 219 return -ENOENT;
207 220
208 sysfs_addrm_start(&acxt, dir_sd); 221 sysfs_addrm_start(&acxt, dir_sd);
209 222
210 for (pos = &dir_sd->s_children; *pos; pos = &(*pos)->s_sibling) { 223 sd = sysfs_find_dirent(dir_sd, name);
211 sd = *pos; 224 if (sd)
212 225 sysfs_remove_one(&acxt, sd);
213 if (!sysfs_type(sd)) 226
214 continue; 227 sysfs_addrm_finish(&acxt);
215 if (!strcmp(sd->s_name, name)) {
216 *pos = sd->s_sibling;
217 sd->s_sibling = NULL;
218 sysfs_remove_one(&acxt, sd);
219 break;
220 }
221 }
222 228
223 if (sysfs_addrm_finish(&acxt)) 229 if (sd)
224 return 0; 230 return 0;
225 return -ENOENT; 231 else
232 return -ENOENT;
226} 233}
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index fbc7b65fe262..c76c540be3c8 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -1,5 +1,13 @@
1/* 1/*
2 * mount.c - operations for initializing and mounting sysfs. 2 * fs/sysfs/symlink.c - operations for initializing and mounting sysfs
3 *
4 * Copyright (c) 2001-3 Patrick Mochel
5 * Copyright (c) 2007 SUSE Linux Products GmbH
6 * Copyright (c) 2007 Tejun Heo <teheo@suse.de>
7 *
8 * This file is released under the GPLv2.
9 *
10 * Please see Documentation/filesystems/sysfs.txt for more information.
3 */ 11 */
4 12
5#define DEBUG 13#define DEBUG
@@ -8,25 +16,25 @@
8#include <linux/mount.h> 16#include <linux/mount.h>
9#include <linux/pagemap.h> 17#include <linux/pagemap.h>
10#include <linux/init.h> 18#include <linux/init.h>
11#include <asm/semaphore.h>
12 19
13#include "sysfs.h" 20#include "sysfs.h"
14 21
15/* Random magic number */ 22/* Random magic number */
16#define SYSFS_MAGIC 0x62656572 23#define SYSFS_MAGIC 0x62656572
17 24
18struct vfsmount *sysfs_mount; 25static struct vfsmount *sysfs_mount;
19struct super_block * sysfs_sb = NULL; 26struct super_block * sysfs_sb = NULL;
20struct kmem_cache *sysfs_dir_cachep; 27struct kmem_cache *sysfs_dir_cachep;
21 28
22static const struct super_operations sysfs_ops = { 29static const struct super_operations sysfs_ops = {
23 .statfs = simple_statfs, 30 .statfs = simple_statfs,
24 .drop_inode = sysfs_delete_inode, 31 .drop_inode = generic_delete_inode,
25}; 32};
26 33
27struct sysfs_dirent sysfs_root = { 34struct sysfs_dirent sysfs_root = {
35 .s_name = "",
28 .s_count = ATOMIC_INIT(1), 36 .s_count = ATOMIC_INIT(1),
29 .s_flags = SYSFS_ROOT, 37 .s_flags = SYSFS_DIR,
30 .s_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO, 38 .s_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO,
31 .s_ino = 1, 39 .s_ino = 1,
32}; 40};
@@ -50,11 +58,6 @@ static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
50 return -ENOMEM; 58 return -ENOMEM;
51 } 59 }
52 60
53 inode->i_op = &sysfs_dir_inode_operations;
54 inode->i_fop = &sysfs_dir_operations;
55 inc_nlink(inode); /* directory, account for "." */
56 unlock_new_inode(inode);
57
58 /* instantiate and link root dentry */ 61 /* instantiate and link root dentry */
59 root = d_alloc_root(inode); 62 root = d_alloc_root(inode);
60 if (!root) { 63 if (!root) {
@@ -62,7 +65,6 @@ static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
62 iput(inode); 65 iput(inode);
63 return -ENOMEM; 66 return -ENOMEM;
64 } 67 }
65 sysfs_root.s_dentry = root;
66 root->d_fsdata = &sysfs_root; 68 root->d_fsdata = &sysfs_root;
67 sb->s_root = root; 69 sb->s_root = root;
68 return 0; 70 return 0;
@@ -77,7 +79,7 @@ static int sysfs_get_sb(struct file_system_type *fs_type,
77static struct file_system_type sysfs_fs_type = { 79static struct file_system_type sysfs_fs_type = {
78 .name = "sysfs", 80 .name = "sysfs",
79 .get_sb = sysfs_get_sb, 81 .get_sb = sysfs_get_sb,
80 .kill_sb = kill_litter_super, 82 .kill_sb = kill_anon_super,
81}; 83};
82 84
83int __init sysfs_init(void) 85int __init sysfs_init(void)
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index 4ce687f0b5d0..3eac20c63c41 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -1,5 +1,13 @@
1/* 1/*
2 * symlink.c - operations for sysfs symlinks. 2 * fs/sysfs/symlink.c - sysfs symlink implementation
3 *
4 * Copyright (c) 2001-3 Patrick Mochel
5 * Copyright (c) 2007 SUSE Linux Products GmbH
6 * Copyright (c) 2007 Tejun Heo <teheo@suse.de>
7 *
8 * This file is released under the GPLv2.
9 *
10 * Please see Documentation/filesystems/sysfs.txt for more information.
3 */ 11 */
4 12
5#include <linux/fs.h> 13#include <linux/fs.h>
@@ -7,7 +15,7 @@
7#include <linux/module.h> 15#include <linux/module.h>
8#include <linux/kobject.h> 16#include <linux/kobject.h>
9#include <linux/namei.h> 17#include <linux/namei.h>
10#include <asm/semaphore.h> 18#include <linux/mutex.h>
11 19
12#include "sysfs.h" 20#include "sysfs.h"
13 21
@@ -60,10 +68,9 @@ int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char
60 68
61 BUG_ON(!name); 69 BUG_ON(!name);
62 70
63 if (!kobj) { 71 if (!kobj)
64 if (sysfs_mount && sysfs_mount->mnt_sb) 72 parent_sd = &sysfs_root;
65 parent_sd = sysfs_mount->mnt_sb->s_root->d_fsdata; 73 else
66 } else
67 parent_sd = kobj->sd; 74 parent_sd = kobj->sd;
68 75
69 error = -EFAULT; 76 error = -EFAULT;
@@ -87,20 +94,15 @@ int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char
87 if (!sd) 94 if (!sd)
88 goto out_put; 95 goto out_put;
89 96
90 sd->s_elem.symlink.target_sd = target_sd; 97 sd->s_symlink.target_sd = target_sd;
91 target_sd = NULL; /* reference is now owned by the symlink */ 98 target_sd = NULL; /* reference is now owned by the symlink */
92 99
93 sysfs_addrm_start(&acxt, parent_sd); 100 sysfs_addrm_start(&acxt, parent_sd);
101 error = sysfs_add_one(&acxt, sd);
102 sysfs_addrm_finish(&acxt);
94 103
95 if (!sysfs_find_dirent(parent_sd, name)) { 104 if (error)
96 sysfs_add_one(&acxt, sd);
97 sysfs_link_sibling(sd);
98 }
99
100 if (!sysfs_addrm_finish(&acxt)) {
101 error = -EEXIST;
102 goto out_put; 105 goto out_put;
103 }
104 106
105 return 0; 107 return 0;
106 108
@@ -148,7 +150,7 @@ static int sysfs_getlink(struct dentry *dentry, char * path)
148{ 150{
149 struct sysfs_dirent *sd = dentry->d_fsdata; 151 struct sysfs_dirent *sd = dentry->d_fsdata;
150 struct sysfs_dirent *parent_sd = sd->s_parent; 152 struct sysfs_dirent *parent_sd = sd->s_parent;
151 struct sysfs_dirent *target_sd = sd->s_elem.symlink.target_sd; 153 struct sysfs_dirent *target_sd = sd->s_symlink.target_sd;
152 int error; 154 int error;
153 155
154 mutex_lock(&sysfs_mutex); 156 mutex_lock(&sysfs_mutex);
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 6b8c8d76d308..f0326f281d1c 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -1,20 +1,39 @@
1/*
2 * fs/sysfs/sysfs.h - sysfs internal header file
3 *
4 * Copyright (c) 2001-3 Patrick Mochel
5 * Copyright (c) 2007 SUSE Linux Products GmbH
6 * Copyright (c) 2007 Tejun Heo <teheo@suse.de>
7 *
8 * This file is released under the GPLv2.
9 */
10
11struct sysfs_open_dirent;
12
13/* type-specific structures for sysfs_dirent->s_* union members */
1struct sysfs_elem_dir { 14struct sysfs_elem_dir {
2 struct kobject * kobj; 15 struct kobject *kobj;
16 /* children list starts here and goes through sd->s_sibling */
17 struct sysfs_dirent *children;
3}; 18};
4 19
5struct sysfs_elem_symlink { 20struct sysfs_elem_symlink {
6 struct sysfs_dirent * target_sd; 21 struct sysfs_dirent *target_sd;
7}; 22};
8 23
9struct sysfs_elem_attr { 24struct sysfs_elem_attr {
10 struct attribute * attr; 25 struct attribute *attr;
26 struct sysfs_open_dirent *open;
11}; 27};
12 28
13struct sysfs_elem_bin_attr { 29struct sysfs_elem_bin_attr {
14 struct bin_attribute * bin_attr; 30 struct bin_attribute *bin_attr;
15}; 31};
16 32
17/* 33/*
34 * sysfs_dirent - the building block of sysfs hierarchy. Each and
35 * every sysfs node is represented by single sysfs_dirent.
36 *
18 * As long as s_count reference is held, the sysfs_dirent itself is 37 * As long as s_count reference is held, the sysfs_dirent itself is
19 * accessible. Dereferencing s_elem or any other outer entity 38 * accessible. Dereferencing s_elem or any other outer entity
20 * requires s_active reference. 39 * requires s_active reference.
@@ -22,28 +41,43 @@ struct sysfs_elem_bin_attr {
22struct sysfs_dirent { 41struct sysfs_dirent {
23 atomic_t s_count; 42 atomic_t s_count;
24 atomic_t s_active; 43 atomic_t s_active;
25 struct sysfs_dirent * s_parent; 44 struct sysfs_dirent *s_parent;
26 struct sysfs_dirent * s_sibling; 45 struct sysfs_dirent *s_sibling;
27 struct sysfs_dirent * s_children; 46 const char *s_name;
28 const char * s_name;
29 47
30 union { 48 union {
31 struct sysfs_elem_dir dir; 49 struct sysfs_elem_dir s_dir;
32 struct sysfs_elem_symlink symlink; 50 struct sysfs_elem_symlink s_symlink;
33 struct sysfs_elem_attr attr; 51 struct sysfs_elem_attr s_attr;
34 struct sysfs_elem_bin_attr bin_attr; 52 struct sysfs_elem_bin_attr s_bin_attr;
35 } s_elem; 53 };
36 54
37 unsigned int s_flags; 55 unsigned int s_flags;
38 umode_t s_mode;
39 ino_t s_ino; 56 ino_t s_ino;
40 struct dentry * s_dentry; 57 umode_t s_mode;
41 struct iattr * s_iattr; 58 struct iattr *s_iattr;
42 atomic_t s_event;
43}; 59};
44 60
45#define SD_DEACTIVATED_BIAS INT_MIN 61#define SD_DEACTIVATED_BIAS INT_MIN
62
63#define SYSFS_TYPE_MASK 0x00ff
64#define SYSFS_DIR 0x0001
65#define SYSFS_KOBJ_ATTR 0x0002
66#define SYSFS_KOBJ_BIN_ATTR 0x0004
67#define SYSFS_KOBJ_LINK 0x0008
68#define SYSFS_COPY_NAME (SYSFS_DIR | SYSFS_KOBJ_LINK)
69
70#define SYSFS_FLAG_MASK ~SYSFS_TYPE_MASK
71#define SYSFS_FLAG_REMOVED 0x0200
72
73static inline unsigned int sysfs_type(struct sysfs_dirent *sd)
74{
75 return sd->s_flags & SYSFS_TYPE_MASK;
76}
46 77
78/*
79 * Context structure to be used while adding/removing nodes.
80 */
47struct sysfs_addrm_cxt { 81struct sysfs_addrm_cxt {
48 struct sysfs_dirent *parent_sd; 82 struct sysfs_dirent *parent_sd;
49 struct inode *parent_inode; 83 struct inode *parent_inode;
@@ -51,63 +85,47 @@ struct sysfs_addrm_cxt {
51 int cnt; 85 int cnt;
52}; 86};
53 87
54extern struct vfsmount * sysfs_mount; 88/*
89 * mount.c
90 */
55extern struct sysfs_dirent sysfs_root; 91extern struct sysfs_dirent sysfs_root;
92extern struct super_block *sysfs_sb;
56extern struct kmem_cache *sysfs_dir_cachep; 93extern struct kmem_cache *sysfs_dir_cachep;
57 94
58extern struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd); 95/*
59extern void sysfs_link_sibling(struct sysfs_dirent *sd); 96 * dir.c
60extern void sysfs_unlink_sibling(struct sysfs_dirent *sd); 97 */
61extern struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd);
62extern void sysfs_put_active(struct sysfs_dirent *sd);
63extern struct sysfs_dirent *sysfs_get_active_two(struct sysfs_dirent *sd);
64extern void sysfs_put_active_two(struct sysfs_dirent *sd);
65extern void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
66 struct sysfs_dirent *parent_sd);
67extern void sysfs_add_one(struct sysfs_addrm_cxt *acxt,
68 struct sysfs_dirent *sd);
69extern void sysfs_remove_one(struct sysfs_addrm_cxt *acxt,
70 struct sysfs_dirent *sd);
71extern int sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt);
72
73extern void sysfs_delete_inode(struct inode *inode);
74extern struct inode * sysfs_get_inode(struct sysfs_dirent *sd);
75extern void sysfs_instantiate(struct dentry *dentry, struct inode *inode);
76
77extern void release_sysfs_dirent(struct sysfs_dirent * sd);
78extern struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
79 const unsigned char *name);
80extern struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
81 const unsigned char *name);
82extern struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode,
83 int type);
84
85extern int sysfs_add_file(struct sysfs_dirent *dir_sd,
86 const struct attribute *attr, int type);
87extern int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name);
88extern struct sysfs_dirent *sysfs_find(struct sysfs_dirent *dir, const char * name);
89
90extern int sysfs_create_subdir(struct kobject *kobj, const char *name,
91 struct sysfs_dirent **p_sd);
92extern void sysfs_remove_subdir(struct sysfs_dirent *sd);
93
94extern int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
95
96extern spinlock_t sysfs_assoc_lock;
97extern struct mutex sysfs_mutex; 98extern struct mutex sysfs_mutex;
98extern struct super_block * sysfs_sb; 99extern struct mutex sysfs_rename_mutex;
100extern spinlock_t sysfs_assoc_lock;
101
99extern const struct file_operations sysfs_dir_operations; 102extern const struct file_operations sysfs_dir_operations;
100extern const struct file_operations sysfs_file_operations;
101extern const struct file_operations bin_fops;
102extern const struct inode_operations sysfs_dir_inode_operations; 103extern const struct inode_operations sysfs_dir_inode_operations;
103extern const struct inode_operations sysfs_symlink_inode_operations;
104
105static inline unsigned int sysfs_type(struct sysfs_dirent *sd)
106{
107 return sd->s_flags & SYSFS_TYPE_MASK;
108}
109 104
110static inline struct sysfs_dirent * sysfs_get(struct sysfs_dirent * sd) 105struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd);
106struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd);
107void sysfs_put_active(struct sysfs_dirent *sd);
108struct sysfs_dirent *sysfs_get_active_two(struct sysfs_dirent *sd);
109void sysfs_put_active_two(struct sysfs_dirent *sd);
110void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
111 struct sysfs_dirent *parent_sd);
112int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd);
113void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd);
114void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt);
115
116struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
117 const unsigned char *name);
118struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
119 const unsigned char *name);
120struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type);
121
122void release_sysfs_dirent(struct sysfs_dirent *sd);
123
124int sysfs_create_subdir(struct kobject *kobj, const char *name,
125 struct sysfs_dirent **p_sd);
126void sysfs_remove_subdir(struct sysfs_dirent *sd);
127
128static inline struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd)
111{ 129{
112 if (sd) { 130 if (sd) {
113 WARN_ON(!atomic_read(&sd->s_count)); 131 WARN_ON(!atomic_read(&sd->s_count));
@@ -116,13 +134,33 @@ static inline struct sysfs_dirent * sysfs_get(struct sysfs_dirent * sd)
116 return sd; 134 return sd;
117} 135}
118 136
119static inline void sysfs_put(struct sysfs_dirent * sd) 137static inline void sysfs_put(struct sysfs_dirent *sd)
120{ 138{
121 if (sd && atomic_dec_and_test(&sd->s_count)) 139 if (sd && atomic_dec_and_test(&sd->s_count))
122 release_sysfs_dirent(sd); 140 release_sysfs_dirent(sd);
123} 141}
124 142
125static inline int sysfs_is_shadowed_inode(struct inode *inode) 143/*
126{ 144 * inode.c
127 return S_ISDIR(inode->i_mode) && inode->i_op->follow_link; 145 */
128} 146struct inode *sysfs_get_inode(struct sysfs_dirent *sd);
147int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
148int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name);
149
150/*
151 * file.c
152 */
153extern const struct file_operations sysfs_file_operations;
154
155int sysfs_add_file(struct sysfs_dirent *dir_sd,
156 const struct attribute *attr, int type);
157
158/*
159 * bin.c
160 */
161extern const struct file_operations bin_fops;
162
163/*
164 * symlink.c
165 */
166extern const struct inode_operations sysfs_symlink_inode_operations;
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 5f152f60d74d..6f4c29e9c3d9 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -323,17 +323,13 @@ xfs_iomap_valid(
323/* 323/*
324 * BIO completion handler for buffered IO. 324 * BIO completion handler for buffered IO.
325 */ 325 */
326STATIC int 326STATIC void
327xfs_end_bio( 327xfs_end_bio(
328 struct bio *bio, 328 struct bio *bio,
329 unsigned int bytes_done,
330 int error) 329 int error)
331{ 330{
332 xfs_ioend_t *ioend = bio->bi_private; 331 xfs_ioend_t *ioend = bio->bi_private;
333 332
334 if (bio->bi_size)
335 return 1;
336
337 ASSERT(atomic_read(&bio->bi_cnt) >= 1); 333 ASSERT(atomic_read(&bio->bi_cnt) >= 1);
338 ioend->io_error = test_bit(BIO_UPTODATE, &bio->bi_flags) ? 0 : error; 334 ioend->io_error = test_bit(BIO_UPTODATE, &bio->bi_flags) ? 0 : error;
339 335
@@ -343,7 +339,6 @@ xfs_end_bio(
343 bio_put(bio); 339 bio_put(bio);
344 340
345 xfs_finish_ioend(ioend, 0); 341 xfs_finish_ioend(ioend, 0);
346 return 0;
347} 342}
348 343
349STATIC void 344STATIC void
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index b0f0e58866de..39f44ee572e8 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1103,19 +1103,15 @@ _xfs_buf_ioend(
1103 } 1103 }
1104} 1104}
1105 1105
1106STATIC int 1106STATIC void
1107xfs_buf_bio_end_io( 1107xfs_buf_bio_end_io(
1108 struct bio *bio, 1108 struct bio *bio,
1109 unsigned int bytes_done,
1110 int error) 1109 int error)
1111{ 1110{
1112 xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private; 1111 xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private;
1113 unsigned int blocksize = bp->b_target->bt_bsize; 1112 unsigned int blocksize = bp->b_target->bt_bsize;
1114 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 1113 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1115 1114
1116 if (bio->bi_size)
1117 return 1;
1118
1119 if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) 1115 if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
1120 bp->b_error = EIO; 1116 bp->b_error = EIO;
1121 1117
@@ -1143,7 +1139,6 @@ xfs_buf_bio_end_io(
1143 1139
1144 _xfs_buf_ioend(bp, 1); 1140 _xfs_buf_ioend(bp, 1);
1145 bio_put(bio); 1141 bio_put(bio);
1146 return 0;
1147} 1142}
1148 1143
1149STATIC void 1144STATIC void